diff options
Diffstat (limited to 'src/carquinyol/indexstore.py')
-rw-r--r-- | src/carquinyol/indexstore.py | 45 |
1 files changed, 35 insertions, 10 deletions
diff --git a/src/carquinyol/indexstore.py b/src/carquinyol/indexstore.py index 42c3132..617ebd4 100644 --- a/src/carquinyol/indexstore.py +++ b/src/carquinyol/indexstore.py @@ -27,8 +27,10 @@ from carquinyol.layoutmanager import MAX_QUERY_LIMIT _VALUE_UID = 0 _VALUE_TIMESTAMP = 1 _VALUE_TITLE = 2 +_VALUE_VID = 3 _PREFIX_UID = 'Q' +_PREFIX_VID = 'V' _PREFIX_ACTIVITY = 'A' _PREFIX_ACTIVITY_ID = 'I' _PREFIX_MIME_TYPE = 'M' @@ -69,17 +71,19 @@ class IndexStore(object): for f in os.listdir(index_path): os.remove(os.path.join(index_path, f)) - def contains(self, uid): - postings = self._database.postlist(_PREFIX_UID + uid) + def contains(self, uid, vid): + postings = self._database.postlist(_PREFIX_UID + uid + _PREFIX_VID + vid) try: postlist_item = postings.next() except StopIteration: return False return True - def store(self, uid, properties): + def store(self, uid, vid, properties): document = Document() + document.add_term("%s%s%s%s" % (_PREFIX_UID, uid, _PREFIX_VID, vid)) document.add_term(_PREFIX_UID + uid) + document.add_term(_PREFIX_VID + vid) document.add_term(_PREFIX_ACTIVITY + properties.get('activity', '')) document.add_term(_PREFIX_MIME_TYPE + properties.get('mime_type', '')) document.add_term(_PREFIX_ACTIVITY_ID + @@ -87,6 +91,7 @@ class IndexStore(object): document.add_term(_PREFIX_KEEP + str(properties.get('keep', 0))) document.add_value(_VALUE_UID, uid) + document.add_value(_VALUE_VID, vid) document.add_value(_VALUE_TIMESTAMP, str(properties['timestamp'])) document.add_value(_VALUE_TITLE, properties.get('title', '').strip()) @@ -107,10 +112,10 @@ class IndexStore(object): term_generator.index_text_without_positions( self._extract_text(properties), 1, '') - if not self.contains(uid): + if not self.contains(uid, vid): self._database.add_document(document) else: - self._database.replace_document(_PREFIX_UID + uid, document) + self._database.replace_document("%s%s%s%s" % (_PREFIX_UID, uid, _PREFIX_VID, vid), document) self._flush() def _extract_text(self, properties): @@ -129,6 +134,7 @@ class IndexStore(object): def find(self, query): offset = query.pop('offset', 0) limit = query.pop('limit', MAX_QUERY_LIMIT) + all_versions = query.pop('all_versions', False) order_by = query.pop('order_by', []) enquire = Enquire(self._database) @@ -156,11 +162,25 @@ class IndexStore(object): query_result = enquire.get_mset(offset, limit, check_at_least) total_count = query_result.get_matches_estimated() - uids = [] + uvids = [] + timestamps = [] for hit in query_result: - uids.append(hit.document.get_value(_VALUE_UID)) + uvids.append((hit.document.get_value(_VALUE_UID), hit.document.get_value(_VALUE_VID))) - return (uids, total_count) + if not all_versions : + # only return latest version for each entry + # result set is already sorted by time so we only need to take the first entry for each uid + uids_vid = {} + uvids_new = [] + for (uid, vid) in uvids : + if uid not in uids_vid : + uids_vid[uid] = vid + uvids_new.append((uid,vid)) + + uvids = uvids_new + + # TODO: total_count will be totally off if all_versions is not set + return (uvids, total_count) def _parse_query(self, query_dict): logging.debug('_parse_query %r' % query_dict) @@ -193,10 +213,15 @@ class IndexStore(object): query = Query(Query.OP_VALUE_RANGE, _VALUE_TIMESTAMP, start, end) queries.append(query) + # TODO: refactor / simplify uid = query_dict.pop('uid', None) if uid is not None: queries.append(Query(_PREFIX_UID + uid)) + vid = query_dict.pop('vid', None) + if vid is not None: + queries.append(Query(_PREFIX_VID + vid)) + activity = query_dict.pop('activity', None) if activity is not None: queries.append(Query(_PREFIX_ACTIVITY + activity)) @@ -226,8 +251,8 @@ class IndexStore(object): return Query(Query.OP_AND, queries) - def delete(self, uid): - self._database.delete_document(_PREFIX_UID + uid) + def delete(self, uid, vid): + self._database.delete_document("%s%s%s%s" % (_PREFIX_UID, uid, _PREFIX_VID, vid)) def get_activities(self): activities = [] |