Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-19 11:48:10 (GMT)
committer Tomeu Vizoso <tomeu@tomeuvizoso.net>2008-09-19 11:48:10 (GMT)
commit650ab8203ea5ec247ea60740201d2a0e76f346ac (patch)
tree967ec6c43ba9ad6bdec57765f474965a5e35066c
parent76cb0e431266135d418f60b608c286988b3a563e (diff)
Rebuild index if needed
-rw-r--r--src/olpc/datastore/datastore.py65
-rw-r--r--src/olpc/datastore/indexstore.py26
-rw-r--r--src/olpc/datastore/layoutmanager.py27
3 files changed, 109 insertions, 9 deletions
diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py
index 4ae9a09..95ea4da 100644
--- a/src/olpc/datastore/datastore.py
+++ b/src/olpc/datastore/datastore.py
@@ -16,8 +16,10 @@ import time
import os
import dbus
+import gobject
from olpc.datastore import layoutmanager
+from olpc.datastore.layoutmanager import MAX_QUERY_LIMIT
from olpc.datastore.metadatastore import MetadataStore
from olpc.datastore.indexstore import IndexStore
from olpc.datastore.filestore import FileStore
@@ -41,9 +43,43 @@ class DataStore(dbus.service.Object):
dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH)
self._metadata_store = MetadataStore()
+
self._index_store = IndexStore()
+ try:
+ self._index_store.open_index()
+ except Exception, e:
+ logging.error('Failed to open index, will rebuild: %r', e)
+ layoutmanager.get_instance().index_updated = False
+ self._index_store.remove_index()
+ self._index_store.open_index()
+
self._file_store = FileStore()
+ if not layoutmanager.get_instance().index_updated:
+ logging.debug('Index is not up-to-date, will update')
+ self._rebuild_index()
+
+ def _rebuild_index(self):
+ uids = layoutmanager.get_instance().find_all()
+ gobject.idle_add(lambda: self.__rebuild_index_cb(uids),
+ priority=gobject.PRIORITY_LOW)
+
+ def __rebuild_index_cb(self, uids):
+ uid = uids.pop()
+
+ logging.debug('Updating entry %r in index. %d to go.' % \
+ (uid, len(uids)))
+
+ if not self._index_store.contains(uid):
+ props = self._metadata_store.retrieve(uid)
+ self._index_store.store(uid, props)
+
+ if not uids:
+ layoutmanager.get_instance().index_updated = True
+ return False
+ else:
+ return True
+
def _create_completion_cb(self, async_cb, async_err_cb, uid, exc=None):
logger.debug("_create_completion_cb(%r, %r, %r, %r)" % \
(async_cb, async_err_cb, uid, exc))
@@ -117,7 +153,27 @@ class DataStore(dbus.service.Object):
out_signature='aa{sv}u')
def find(self, query, properties):
t = time.time()
- uids, count = self._index_store.find(query)
+
+ if not layoutmanager.get_instance().index_updated:
+ logging.warning('Index updating, returning all entries')
+
+ uids = layoutmanager.get_instance().find_all()
+ count = len(uids)
+
+ offset = query.get('offset', 0)
+ limit = query.get('limit', MAX_QUERY_LIMIT)
+ uids = uids[offset, offset + limit]
+ else:
+ try:
+ uids, count = self._index_store.find(query)
+ except Exception, e:
+ logging.error('Failed to query index, will rebuild: %r', e)
+ layoutmanager.get_instance().index_updated = False
+ self._index_store.close_index()
+ self._index_store.remove_index()
+ self._index_store.open_index()
+ self._rebuild_index()
+
entries = []
for uid in uids:
metadata = self._metadata_store.retrieve(uid, properties)
@@ -154,7 +210,11 @@ class DataStore(dbus.service.Object):
raise ValueError('Only ''activity'' is a supported property name')
if query:
raise ValueError('The query parameter is not supported')
- return self._index_store.get_activities()
+ if layoutmanager.get_instance().index_updated:
+ return self._index_store.get_activities()
+ else:
+ logging.warning('Index updating, returning an empty list')
+ return []
@dbus.service.method(DS_DBUS_INTERFACE,
in_signature='s',
@@ -176,6 +236,7 @@ class DataStore(dbus.service.Object):
def stop(self):
"""shutdown the service"""
+ self._index_store.close_index()
self.Stopped()
@dbus.service.signal(DS_DBUS_INTERFACE)
diff --git a/src/olpc/datastore/indexstore.py b/src/olpc/datastore/indexstore.py
index e4f36fd..0ce91ee 100644
--- a/src/olpc/datastore/indexstore.py
+++ b/src/olpc/datastore/indexstore.py
@@ -1,14 +1,14 @@
import logging
import time
import sys
+import os
import gobject
import xapian
from xapian import WritableDatabase, Document, Enquire, Query, QueryParser
from olpc.datastore import layoutmanager
-
-_MAX_LIMIT = 4096
+from olpc.datastore.layoutmanager import MAX_QUERY_LIMIT
_VALUE_UID = 0
_VALUE_TIMESTAMP = 1
@@ -31,12 +31,24 @@ _PROPERTIES_NOT_TO_INDEX = ['timestamp', 'activity_id', 'keep', 'preview']
class IndexStore(object):
def __init__(self):
- index_path = layoutmanager.get_instance().get_index_path()
- self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN)
+ self._database = None
self._flush_timeout = None
self._pending_writes = 0
- def _document_exists(self, uid):
+ def open_index(self):
+ index_path = layoutmanager.get_instance().get_index_path()
+ self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN)
+
+ def close_index(self):
+ self._database.flush()
+ self._database = None
+
+ def remove_index(self):
+ index_path = layoutmanager.get_instance().get_index_path()
+ for f in os.listdir(index_path):
+ os.remove(os.path.join(index_path, f))
+
+ def contains(self, uid):
postings = self._database.postlist(_PREFIX_UID + uid)
try:
postlist_item = postings.next()
@@ -73,7 +85,7 @@ class IndexStore(object):
term_generator.index_text_without_positions(
self._extract_text(properties), 1, '')
- if not self._document_exists(uid):
+ if not self.contains(uid):
self._database.add_document(document)
else:
self._database.replace_document(_PREFIX_UID + uid, document)
@@ -93,7 +105,7 @@ class IndexStore(object):
enquire.set_query(self._parse_query(query))
offset = query.get('offset', 0)
- limit = query.get('limit', _MAX_LIMIT)
+ limit = query.get('limit', MAX_QUERY_LIMIT)
# This will assure that the results count is exact.
check_at_least = offset + limit + 1
diff --git a/src/olpc/datastore/layoutmanager.py b/src/olpc/datastore/layoutmanager.py
index e9c435e..db91690 100644
--- a/src/olpc/datastore/layoutmanager.py
+++ b/src/olpc/datastore/layoutmanager.py
@@ -1,5 +1,7 @@
import os
+MAX_QUERY_LIMIT = 40960
+
class LayoutManager(object):
def __init__(self):
profile = os.environ.get('SUGAR_PROFILE', 'default')
@@ -11,6 +13,9 @@ class LayoutManager(object):
self._create_if_needed(self.get_checksums_dir())
self._create_if_needed(self.get_queue_path())
+ index_updated_path = os.path.join(self._root_path, 'index_updated')
+ self._index_updated = os.path.exists(index_updated_path)
+
def _create_if_needed(self, path):
if not os.path.exists(path):
os.makedirs(path)
@@ -27,6 +32,28 @@ class LayoutManager(object):
def get_queue_path(self):
return os.path.join(self.get_checksums_dir(), 'queue')
+ def _is_index_updated(self):
+ return self._index_updated
+
+ def _set_index_updated(self, index_updated):
+ if index_updated != self._index_updated:
+ self._index_updated = index_updated
+
+ index_updated_path = os.path.join(self._root_path, 'index_updated')
+ if os.path.exists(index_updated_path):
+ os.remove(index_updated_path)
+ else:
+ open(index_updated_path, 'w').close()
+
+ index_updated = property(_is_index_updated, _set_index_updated)
+
+ def find_all(self):
+ uids = []
+ for f in os.listdir(self._root_path):
+ if f not in ['index', 'checksums', 'queue', 'index_updated']:
+ uids.extend(os.listdir(os.path.join(self._root_path, f)))
+ return uids
+
_instance = None
def get_instance():
global _instance