diff options
author | Sascha Silbe <silbe@activitycentral.com> | 2011-03-04 14:02:13 (GMT) |
---|---|---|
committer | Sascha Silbe <silbe@activitycentral.com> | 2011-03-04 14:02:13 (GMT) |
commit | ad3e6fafb99eca267fc2f470ea7b6cb5b43eec3d (patch) | |
tree | 3fff9bb06fd8ad7fbaf47d1ab17f2f765daaf128 /src/carquinyol/indexstore.py | |
parent | 83d9f81b5e442cd8e9006ec9a474cf6d0913578f (diff) | |
parent | 4babd564825dbcad358f8992abcaeefde78943cd (diff) |
Merge remote branch 'refs/remotes/origin/t/versions' into HEAD
* refs/remotes/origin/t/versions: (53 commits)
Add gconf setting /desktop/sugar/datastore/max_versions
Allow specifying the version_id of a new version.
add missing pieces for last commit
add compatibility with the old (unversioned) API
New TopGit dependency: t/rainbow-0.8
fix 0.82 migration typos
fix typos
New TopGit dependency: t/migration-rebuild-index
add metadata to Saved signal
adjust wording to new API
test suite: expect/filter out parent_id
save(): ensure parent_id is set correctly in metadata
index store: replace document if already in database (for change_metadata)
change_metadata(): make sure timestamp is set, like we do for save()
fix test suite failure
fix migration of checksum entries
work around Xapian returning incorrect match counts if offset/limit are used
fix sort order in ambiguous cases, fix obscure test suite breakage due to overlapping timestamp values
fix FileStore.retrieve() broken by last merge
test_massops.py: test ordering of find() results (for all supported orders) and offset/limit (for default order)
...
Diffstat (limited to 'src/carquinyol/indexstore.py')
-rw-r--r-- | src/carquinyol/indexstore.py | 143 |
1 files changed, 107 insertions, 36 deletions
diff --git a/src/carquinyol/indexstore.py b/src/carquinyol/indexstore.py index 80a1ade..c9cd052 100644 --- a/src/carquinyol/indexstore.py +++ b/src/carquinyol/indexstore.py @@ -25,16 +25,18 @@ from xapian import WritableDatabase, Document, Enquire, Query from carquinyol import layoutmanager from carquinyol.layoutmanager import MAX_QUERY_LIMIT -_VALUE_UID = 0 +_VALUE_TREE_ID = 0 _VALUE_TIMESTAMP = 1 _VALUE_TITLE = 2 -# 3 reserved for version support +_VALUE_VERSION_ID = 3 _VALUE_FILESIZE = 4 _VALUE_CREATION_TIME = 5 _PREFIX_NONE = 'N' _PREFIX_FULL_VALUE = 'F' -_PREFIX_UID = 'Q' +_PREFIX_OBJECT_ID = 'O' +_PREFIX_TREE_ID = 'Q' +_PREFIX_VERSION_ID = 'V' _PREFIX_ACTIVITY = 'A' _PREFIX_ACTIVITY_ID = 'I' _PREFIX_MIME_TYPE = 'M' @@ -51,7 +53,8 @@ _PROPERTIES_NOT_TO_INDEX = ['timestamp', 'preview'] _MAX_RESULTS = int(2 ** 31 - 1) _QUERY_TERM_MAP = { - 'uid': _PREFIX_UID, + 'tree_id': _PREFIX_TREE_ID, + 'version_id': _PREFIX_VERSION_ID, 'activity': _PREFIX_ACTIVITY, 'activity_id': _PREFIX_ACTIVITY_ID, 'mime_type': _PREFIX_MIME_TYPE, @@ -257,34 +260,36 @@ class IndexStore(object): for f in os.listdir(index_path): os.remove(os.path.join(index_path, f)) - def contains(self, uid): - postings = self._database.postlist(_PREFIX_FULL_VALUE + \ - _PREFIX_UID + uid) + def contains(self, object_id): + postings = self._database.postlist(self._object_id_term(object_id)) try: __ = postings.next() except StopIteration: return False return True - def store(self, uid, properties): + def store(self, object_id, properties): + tree_id, version_id = object_id + id_term = self._object_id_term(object_id) document = Document() - document.add_value(_VALUE_UID, uid) + document.add_value(_VALUE_TREE_ID, tree_id) + document.add_value(_VALUE_VERSION_ID, version_id) + document.add_term(id_term) term_generator = TermGenerator() term_generator.index_document(document, properties) - if not self.contains(uid): - self._database.add_document(document) + if self.contains(object_id): + self._database.replace_document(id_term, document) else: - self._database.replace_document(_PREFIX_FULL_VALUE + \ - _PREFIX_UID + uid, document) + self._database.add_document(document) self._flush() - def find(self, query): - offset = query.pop('offset', 0) - limit = query.pop('limit', MAX_QUERY_LIMIT) - order_by = query.pop('order_by', []) - query_string = query.pop('query', None) + def find(self, query, query_string, options): + offset = options.pop('offset', 0) + limit = options.pop('limit', MAX_QUERY_LIMIT) + order_by = options.pop('order_by', []) + all_versions = options.pop('all_versions', False) query_parser = QueryParser() query_parser.set_database(self._database) @@ -300,38 +305,101 @@ class IndexStore(object): order_by = order_by[0] if order_by == '+timestamp': - enquire.set_sort_by_value(_VALUE_TIMESTAMP, True) - enquire.set_docid_order(False) + order_by_value = _VALUE_TIMESTAMP + order_by_direction = True elif order_by == '-timestamp': - enquire.set_sort_by_value(_VALUE_TIMESTAMP, False) - enquire.set_docid_order(True) + order_by_value = _VALUE_TIMESTAMP + order_by_direction = False elif order_by == '+title': - enquire.set_sort_by_value(_VALUE_TITLE, True) + order_by_value = _VALUE_TITLE + order_by_direction = True elif order_by == '-title': - enquire.set_sort_by_value(_VALUE_TITLE, False) + order_by_value = _VALUE_TITLE + order_by_direction = False elif order_by == '+filesize': - enquire.set_sort_by_value(_VALUE_FILESIZE, True) + order_by_value = _VALUE_FILESIZE + order_by_direction = True elif order_by == '-filesize': - enquire.set_sort_by_value(_VALUE_FILESIZE, False) + order_by_value = _VALUE_FILESIZE + order_by_direction = False elif order_by == '+creation_time': - enquire.set_sort_by_value(_VALUE_CREATION_TIME, True) + order_by_value = _VALUE_CREATION_TIME + order_by_direction = True elif order_by == '-creation_time': - enquire.set_sort_by_value(_VALUE_CREATION_TIME, False) + order_by_value = _VALUE_CREATION_TIME + order_by_direction = False else: + order_by_value = _VALUE_TIMESTAMP + order_by_direction = True logging.warning('Unsupported property for sorting: %s', order_by) order_by = '+timestamp' - query_result = enquire.get_mset(offset, limit, check_at_least) - total_count = query_result.get_matches_estimated() + logging.debug('order_by=%r, order_by_value=%r, order_by_direction=%r', + order_by, order_by_value, order_by_direction) + enquire.set_sort_by_value(order_by_value, reverse=order_by_direction) + enquire.set_docid_order({True: enquire.DESCENDING, + False: enquire.ASCENDING}[order_by_direction]) + + if not all_versions: + enquire.set_collapse_key(_VALUE_TREE_ID) + + if all_versions or (order_by == '+timestamp'): + logging.debug('using Xapian for sorting') +# query_result = enquire.get_mset(offset, limit, check_at_least) + # FIXME: work around Xapian returning incorrect match counts + query_result = enquire.get_mset(0, MAX_QUERY_LIMIT, MAX_QUERY_LIMIT) + else: + # Xapian doesn't support using a different sort order while + # collapsing (which needs to be timestamp in our case), so + # we need to query everything and sort+limit ourselves. + logging.debug('using Xapian for collapsing only') + enquire.set_sort_by_value(_VALUE_TIMESTAMP, True) + enquire.set_docid_order(enquire.ASCENDING) + query_result = enquire.get_mset(0, MAX_QUERY_LIMIT, MAX_QUERY_LIMIT) + + total_count = query_result.get_matches_lower_bound() + documents = [hit.document for hit in query_result] + + if (not all_versions) and (order_by != '+timestamp'): + logging.debug('sorting in Python') + def _cmp(a, b): + value_a = a.get_value(order_by_value) + value_b = b.get_value(order_by_value) + if value_a < value_b: + return -1 + elif value_a > value_b: + return 1 + elif a.get_docid() < b.get_docid(): + return -1 + elif a.get_docid() > b.get_docid(): + return 1 + return 0 + + documents.sort(cmp=_cmp, reverse=order_by_direction) + documents = documents[offset:offset+limit] + else: + # FIXME: work around Xapian returning incorrect match counts + logging.debug('doing offset/limit in Python (%r results, offset %r, limit %r)', + len(documents), offset, limit) + documents = documents[offset:offset+limit] + + object_ids = [] + for document in documents: + object_ids.append((document.get_value(_VALUE_TREE_ID), + document.get_value(_VALUE_VERSION_ID))) - uids = [] - for hit in query_result: - uids.append(hit.document.get_value(_VALUE_UID)) + return (object_ids, total_count) - return (uids, total_count) + def delete(self, object_id): + object_id_term = self._object_id_term(object_id) - def delete(self, uid): - self._database.delete_document(_PREFIX_FULL_VALUE + _PREFIX_UID + uid) + enquire = Enquire(self._database) + enquire.set_query(Query(object_id_term)) + query_results = enquire.get_mset(0, 2, 2) + documents = [hit.document for hit in query_results] + assert len(documents) == 1 + + self._database.delete_document(object_id_term) self._flush() def get_activities(self): @@ -341,6 +409,9 @@ class IndexStore(object): activities.append(term.term[len(prefix):]) return activities + def _object_id_term(self, object_id): + return _PREFIX_FULL_VALUE + _PREFIX_OBJECT_ID + '%s-%s' % object_id + def flush(self): self._flush(True) |