Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/src/carquinyol/indexstore.py
diff options
context:
space:
mode:
authorSascha Silbe <silbe@activitycentral.com>2011-03-04 14:02:13 (GMT)
committer Sascha Silbe <silbe@activitycentral.com>2011-03-04 14:02:13 (GMT)
commitad3e6fafb99eca267fc2f470ea7b6cb5b43eec3d (patch)
tree3fff9bb06fd8ad7fbaf47d1ab17f2f765daaf128 /src/carquinyol/indexstore.py
parent83d9f81b5e442cd8e9006ec9a474cf6d0913578f (diff)
parent4babd564825dbcad358f8992abcaeefde78943cd (diff)
Merge remote branch 'refs/remotes/origin/t/versions' into HEAD
* refs/remotes/origin/t/versions: (53 commits) Add gconf setting /desktop/sugar/datastore/max_versions Allow specifying the version_id of a new version. add missing pieces for last commit add compatibility with the old (unversioned) API New TopGit dependency: t/rainbow-0.8 fix 0.82 migration typos fix typos New TopGit dependency: t/migration-rebuild-index add metadata to Saved signal adjust wording to new API test suite: expect/filter out parent_id save(): ensure parent_id is set correctly in metadata index store: replace document if already in database (for change_metadata) change_metadata(): make sure timestamp is set, like we do for save() fix test suite failure fix migration of checksum entries work around Xapian returning incorrect match counts if offset/limit are used fix sort order in ambiguous cases, fix obscure test suite breakage due to overlapping timestamp values fix FileStore.retrieve() broken by last merge test_massops.py: test ordering of find() results (for all supported orders) and offset/limit (for default order) ...
Diffstat (limited to 'src/carquinyol/indexstore.py')
-rw-r--r--src/carquinyol/indexstore.py143
1 files changed, 107 insertions, 36 deletions
diff --git a/src/carquinyol/indexstore.py b/src/carquinyol/indexstore.py
index 80a1ade..c9cd052 100644
--- a/src/carquinyol/indexstore.py
+++ b/src/carquinyol/indexstore.py
@@ -25,16 +25,18 @@ from xapian import WritableDatabase, Document, Enquire, Query
from carquinyol import layoutmanager
from carquinyol.layoutmanager import MAX_QUERY_LIMIT
-_VALUE_UID = 0
+_VALUE_TREE_ID = 0
_VALUE_TIMESTAMP = 1
_VALUE_TITLE = 2
-# 3 reserved for version support
+_VALUE_VERSION_ID = 3
_VALUE_FILESIZE = 4
_VALUE_CREATION_TIME = 5
_PREFIX_NONE = 'N'
_PREFIX_FULL_VALUE = 'F'
-_PREFIX_UID = 'Q'
+_PREFIX_OBJECT_ID = 'O'
+_PREFIX_TREE_ID = 'Q'
+_PREFIX_VERSION_ID = 'V'
_PREFIX_ACTIVITY = 'A'
_PREFIX_ACTIVITY_ID = 'I'
_PREFIX_MIME_TYPE = 'M'
@@ -51,7 +53,8 @@ _PROPERTIES_NOT_TO_INDEX = ['timestamp', 'preview']
_MAX_RESULTS = int(2 ** 31 - 1)
_QUERY_TERM_MAP = {
- 'uid': _PREFIX_UID,
+ 'tree_id': _PREFIX_TREE_ID,
+ 'version_id': _PREFIX_VERSION_ID,
'activity': _PREFIX_ACTIVITY,
'activity_id': _PREFIX_ACTIVITY_ID,
'mime_type': _PREFIX_MIME_TYPE,
@@ -257,34 +260,36 @@ class IndexStore(object):
for f in os.listdir(index_path):
os.remove(os.path.join(index_path, f))
- def contains(self, uid):
- postings = self._database.postlist(_PREFIX_FULL_VALUE + \
- _PREFIX_UID + uid)
+ def contains(self, object_id):
+ postings = self._database.postlist(self._object_id_term(object_id))
try:
__ = postings.next()
except StopIteration:
return False
return True
- def store(self, uid, properties):
+ def store(self, object_id, properties):
+ tree_id, version_id = object_id
+ id_term = self._object_id_term(object_id)
document = Document()
- document.add_value(_VALUE_UID, uid)
+ document.add_value(_VALUE_TREE_ID, tree_id)
+ document.add_value(_VALUE_VERSION_ID, version_id)
+ document.add_term(id_term)
term_generator = TermGenerator()
term_generator.index_document(document, properties)
- if not self.contains(uid):
- self._database.add_document(document)
+ if self.contains(object_id):
+ self._database.replace_document(id_term, document)
else:
- self._database.replace_document(_PREFIX_FULL_VALUE + \
- _PREFIX_UID + uid, document)
+ self._database.add_document(document)
self._flush()
- def find(self, query):
- offset = query.pop('offset', 0)
- limit = query.pop('limit', MAX_QUERY_LIMIT)
- order_by = query.pop('order_by', [])
- query_string = query.pop('query', None)
+ def find(self, query, query_string, options):
+ offset = options.pop('offset', 0)
+ limit = options.pop('limit', MAX_QUERY_LIMIT)
+ order_by = options.pop('order_by', [])
+ all_versions = options.pop('all_versions', False)
query_parser = QueryParser()
query_parser.set_database(self._database)
@@ -300,38 +305,101 @@ class IndexStore(object):
order_by = order_by[0]
if order_by == '+timestamp':
- enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
- enquire.set_docid_order(False)
+ order_by_value = _VALUE_TIMESTAMP
+ order_by_direction = True
elif order_by == '-timestamp':
- enquire.set_sort_by_value(_VALUE_TIMESTAMP, False)
- enquire.set_docid_order(True)
+ order_by_value = _VALUE_TIMESTAMP
+ order_by_direction = False
elif order_by == '+title':
- enquire.set_sort_by_value(_VALUE_TITLE, True)
+ order_by_value = _VALUE_TITLE
+ order_by_direction = True
elif order_by == '-title':
- enquire.set_sort_by_value(_VALUE_TITLE, False)
+ order_by_value = _VALUE_TITLE
+ order_by_direction = False
elif order_by == '+filesize':
- enquire.set_sort_by_value(_VALUE_FILESIZE, True)
+ order_by_value = _VALUE_FILESIZE
+ order_by_direction = True
elif order_by == '-filesize':
- enquire.set_sort_by_value(_VALUE_FILESIZE, False)
+ order_by_value = _VALUE_FILESIZE
+ order_by_direction = False
elif order_by == '+creation_time':
- enquire.set_sort_by_value(_VALUE_CREATION_TIME, True)
+ order_by_value = _VALUE_CREATION_TIME
+ order_by_direction = True
elif order_by == '-creation_time':
- enquire.set_sort_by_value(_VALUE_CREATION_TIME, False)
+ order_by_value = _VALUE_CREATION_TIME
+ order_by_direction = False
else:
+ order_by_value = _VALUE_TIMESTAMP
+ order_by_direction = True
logging.warning('Unsupported property for sorting: %s', order_by)
order_by = '+timestamp'
- query_result = enquire.get_mset(offset, limit, check_at_least)
- total_count = query_result.get_matches_estimated()
+ logging.debug('order_by=%r, order_by_value=%r, order_by_direction=%r',
+ order_by, order_by_value, order_by_direction)
+ enquire.set_sort_by_value(order_by_value, reverse=order_by_direction)
+ enquire.set_docid_order({True: enquire.DESCENDING,
+ False: enquire.ASCENDING}[order_by_direction])
+
+ if not all_versions:
+ enquire.set_collapse_key(_VALUE_TREE_ID)
+
+ if all_versions or (order_by == '+timestamp'):
+ logging.debug('using Xapian for sorting')
+# query_result = enquire.get_mset(offset, limit, check_at_least)
+ # FIXME: work around Xapian returning incorrect match counts
+ query_result = enquire.get_mset(0, MAX_QUERY_LIMIT, MAX_QUERY_LIMIT)
+ else:
+ # Xapian doesn't support using a different sort order while
+ # collapsing (which needs to be timestamp in our case), so
+ # we need to query everything and sort+limit ourselves.
+ logging.debug('using Xapian for collapsing only')
+ enquire.set_sort_by_value(_VALUE_TIMESTAMP, True)
+ enquire.set_docid_order(enquire.ASCENDING)
+ query_result = enquire.get_mset(0, MAX_QUERY_LIMIT, MAX_QUERY_LIMIT)
+
+ total_count = query_result.get_matches_lower_bound()
+ documents = [hit.document for hit in query_result]
+
+ if (not all_versions) and (order_by != '+timestamp'):
+ logging.debug('sorting in Python')
+ def _cmp(a, b):
+ value_a = a.get_value(order_by_value)
+ value_b = b.get_value(order_by_value)
+ if value_a < value_b:
+ return -1
+ elif value_a > value_b:
+ return 1
+ elif a.get_docid() < b.get_docid():
+ return -1
+ elif a.get_docid() > b.get_docid():
+ return 1
+ return 0
+
+ documents.sort(cmp=_cmp, reverse=order_by_direction)
+ documents = documents[offset:offset+limit]
+ else:
+ # FIXME: work around Xapian returning incorrect match counts
+ logging.debug('doing offset/limit in Python (%r results, offset %r, limit %r)',
+ len(documents), offset, limit)
+ documents = documents[offset:offset+limit]
+
+ object_ids = []
+ for document in documents:
+ object_ids.append((document.get_value(_VALUE_TREE_ID),
+ document.get_value(_VALUE_VERSION_ID)))
- uids = []
- for hit in query_result:
- uids.append(hit.document.get_value(_VALUE_UID))
+ return (object_ids, total_count)
- return (uids, total_count)
+ def delete(self, object_id):
+ object_id_term = self._object_id_term(object_id)
- def delete(self, uid):
- self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid)
+ enquire = Enquire(self._database)
+ enquire.set_query(Query(object_id_term))
+ query_results = enquire.get_mset(0, 2, 2)
+ documents = [hit.document for hit in query_results]
+ assert len(documents) == 1
+
+ self._database.delete_document(object_id_term)
self._flush()
def get_activities(self):
@@ -341,6 +409,9 @@ class IndexStore(object):
activities.append(term.term[len(prefix):])
return activities
+ def _object_id_term(self, object_id):
+ return _PREFIX_FULL_VALUE + _PREFIX_OBJECT_ID + '%s-%s' % object_id
+
def flush(self):
self._flush(True)