diff options
author | Tomeu Vizoso <tomeu@tomeuvizoso.net> | 2008-09-19 11:48:10 (GMT) |
---|---|---|
committer | Tomeu Vizoso <tomeu@tomeuvizoso.net> | 2008-09-19 11:48:10 (GMT) |
commit | 650ab8203ea5ec247ea60740201d2a0e76f346ac (patch) | |
tree | 967ec6c43ba9ad6bdec57765f474965a5e35066c | |
parent | 76cb0e431266135d418f60b608c286988b3a563e (diff) |
Rebuild index if needed
-rw-r--r-- | src/olpc/datastore/datastore.py | 65 | ||||
-rw-r--r-- | src/olpc/datastore/indexstore.py | 26 | ||||
-rw-r--r-- | src/olpc/datastore/layoutmanager.py | 27 |
3 files changed, 109 insertions, 9 deletions
diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py index 4ae9a09..95ea4da 100644 --- a/src/olpc/datastore/datastore.py +++ b/src/olpc/datastore/datastore.py @@ -16,8 +16,10 @@ import time import os import dbus +import gobject from olpc.datastore import layoutmanager +from olpc.datastore.layoutmanager import MAX_QUERY_LIMIT from olpc.datastore.metadatastore import MetadataStore from olpc.datastore.indexstore import IndexStore from olpc.datastore.filestore import FileStore @@ -41,9 +43,43 @@ class DataStore(dbus.service.Object): dbus.service.Object.__init__(self, bus_name, DS_OBJECT_PATH) self._metadata_store = MetadataStore() + self._index_store = IndexStore() + try: + self._index_store.open_index() + except Exception, e: + logging.error('Failed to open index, will rebuild: %r', e) + layoutmanager.get_instance().index_updated = False + self._index_store.remove_index() + self._index_store.open_index() + self._file_store = FileStore() + if not layoutmanager.get_instance().index_updated: + logging.debug('Index is not up-to-date, will update') + self._rebuild_index() + + def _rebuild_index(self): + uids = layoutmanager.get_instance().find_all() + gobject.idle_add(lambda: self.__rebuild_index_cb(uids), + priority=gobject.PRIORITY_LOW) + + def __rebuild_index_cb(self, uids): + uid = uids.pop() + + logging.debug('Updating entry %r in index. %d to go.' % \ + (uid, len(uids))) + + if not self._index_store.contains(uid): + props = self._metadata_store.retrieve(uid) + self._index_store.store(uid, props) + + if not uids: + layoutmanager.get_instance().index_updated = True + return False + else: + return True + def _create_completion_cb(self, async_cb, async_err_cb, uid, exc=None): logger.debug("_create_completion_cb(%r, %r, %r, %r)" % \ (async_cb, async_err_cb, uid, exc)) @@ -117,7 +153,27 @@ class DataStore(dbus.service.Object): out_signature='aa{sv}u') def find(self, query, properties): t = time.time() - uids, count = self._index_store.find(query) + + if not layoutmanager.get_instance().index_updated: + logging.warning('Index updating, returning all entries') + + uids = layoutmanager.get_instance().find_all() + count = len(uids) + + offset = query.get('offset', 0) + limit = query.get('limit', MAX_QUERY_LIMIT) + uids = uids[offset, offset + limit] + else: + try: + uids, count = self._index_store.find(query) + except Exception, e: + logging.error('Failed to query index, will rebuild: %r', e) + layoutmanager.get_instance().index_updated = False + self._index_store.close_index() + self._index_store.remove_index() + self._index_store.open_index() + self._rebuild_index() + entries = [] for uid in uids: metadata = self._metadata_store.retrieve(uid, properties) @@ -154,7 +210,11 @@ class DataStore(dbus.service.Object): raise ValueError('Only ''activity'' is a supported property name') if query: raise ValueError('The query parameter is not supported') - return self._index_store.get_activities() + if layoutmanager.get_instance().index_updated: + return self._index_store.get_activities() + else: + logging.warning('Index updating, returning an empty list') + return [] @dbus.service.method(DS_DBUS_INTERFACE, in_signature='s', @@ -176,6 +236,7 @@ class DataStore(dbus.service.Object): def stop(self): """shutdown the service""" + self._index_store.close_index() self.Stopped() @dbus.service.signal(DS_DBUS_INTERFACE) diff --git a/src/olpc/datastore/indexstore.py b/src/olpc/datastore/indexstore.py index e4f36fd..0ce91ee 100644 --- a/src/olpc/datastore/indexstore.py +++ b/src/olpc/datastore/indexstore.py @@ -1,14 +1,14 @@ import logging import time import sys +import os import gobject import xapian from xapian import WritableDatabase, Document, Enquire, Query, QueryParser from olpc.datastore import layoutmanager - -_MAX_LIMIT = 4096 +from olpc.datastore.layoutmanager import MAX_QUERY_LIMIT _VALUE_UID = 0 _VALUE_TIMESTAMP = 1 @@ -31,12 +31,24 @@ _PROPERTIES_NOT_TO_INDEX = ['timestamp', 'activity_id', 'keep', 'preview'] class IndexStore(object): def __init__(self): - index_path = layoutmanager.get_instance().get_index_path() - self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN) + self._database = None self._flush_timeout = None self._pending_writes = 0 - def _document_exists(self, uid): + def open_index(self): + index_path = layoutmanager.get_instance().get_index_path() + self._database = WritableDatabase(index_path, xapian.DB_CREATE_OR_OPEN) + + def close_index(self): + self._database.flush() + self._database = None + + def remove_index(self): + index_path = layoutmanager.get_instance().get_index_path() + for f in os.listdir(index_path): + os.remove(os.path.join(index_path, f)) + + def contains(self, uid): postings = self._database.postlist(_PREFIX_UID + uid) try: postlist_item = postings.next() @@ -73,7 +85,7 @@ class IndexStore(object): term_generator.index_text_without_positions( self._extract_text(properties), 1, '') - if not self._document_exists(uid): + if not self.contains(uid): self._database.add_document(document) else: self._database.replace_document(_PREFIX_UID + uid, document) @@ -93,7 +105,7 @@ class IndexStore(object): enquire.set_query(self._parse_query(query)) offset = query.get('offset', 0) - limit = query.get('limit', _MAX_LIMIT) + limit = query.get('limit', MAX_QUERY_LIMIT) # This will assure that the results count is exact. check_at_least = offset + limit + 1 diff --git a/src/olpc/datastore/layoutmanager.py b/src/olpc/datastore/layoutmanager.py index e9c435e..db91690 100644 --- a/src/olpc/datastore/layoutmanager.py +++ b/src/olpc/datastore/layoutmanager.py @@ -1,5 +1,7 @@ import os +MAX_QUERY_LIMIT = 40960 + class LayoutManager(object): def __init__(self): profile = os.environ.get('SUGAR_PROFILE', 'default') @@ -11,6 +13,9 @@ class LayoutManager(object): self._create_if_needed(self.get_checksums_dir()) self._create_if_needed(self.get_queue_path()) + index_updated_path = os.path.join(self._root_path, 'index_updated') + self._index_updated = os.path.exists(index_updated_path) + def _create_if_needed(self, path): if not os.path.exists(path): os.makedirs(path) @@ -27,6 +32,28 @@ class LayoutManager(object): def get_queue_path(self): return os.path.join(self.get_checksums_dir(), 'queue') + def _is_index_updated(self): + return self._index_updated + + def _set_index_updated(self, index_updated): + if index_updated != self._index_updated: + self._index_updated = index_updated + + index_updated_path = os.path.join(self._root_path, 'index_updated') + if os.path.exists(index_updated_path): + os.remove(index_updated_path) + else: + open(index_updated_path, 'w').close() + + index_updated = property(_is_index_updated, _set_index_updated) + + def find_all(self): + uids = [] + for f in os.listdir(self._root_path): + if f not in ['index', 'checksums', 'queue', 'index_updated']: + uids.extend(os.listdir(os.path.join(self._root_path, f))) + return uids + _instance = None def get_instance(): global _instance |