Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorTomeu Vizoso <tomeu@tomeuvizoso.net>2008-08-22 13:33:04 (GMT)
committer Tomeu Vizoso <tomeu@tomeuvizoso.net>2008-08-22 13:33:04 (GMT)
commitb740fc38b11e6b7a0699ef97987889ffee47d3f2 (patch)
tree47845d4388db2931778e6ae9c1cdb894fa156bc2 /src
parent43d44783dbe2e41fb764b45dc24019dcc49c8c4d (diff)
Index text and implement full text search
Diffstat (limited to 'src')
-rw-r--r--src/olpc/datastore/indexstore.py51
1 files changed, 50 insertions, 1 deletions
diff --git a/src/olpc/datastore/indexstore.py b/src/olpc/datastore/indexstore.py
index aece2d1..0adf9d4 100644
--- a/src/olpc/datastore/indexstore.py
+++ b/src/olpc/datastore/indexstore.py
@@ -1,7 +1,8 @@
import os
+import logging
import xapian
-from xapian import WritableDatabase, Document, Enquire, Query
+from xapian import WritableDatabase, Document, Enquire, Query, QueryParser
_MAX_LIMIT = 4096
@@ -12,6 +13,8 @@ _VALUE_MIME_TYPE = 3
_VALUE_ACTIVITY = 4
_VALUE_KEEP = 5
+_PROPERTIES_NOT_TO_INDEX = ['timestamp', 'activity_id', 'keep', 'preview']
+
class IndexStore(object):
def __init__(self, root_path):
index_path = os.path.join(root_path, 'index')
@@ -34,12 +37,38 @@ class IndexStore(object):
document.add_value(_VALUE_MIME_TYPE, str(properties['keep']))
document.add_value(_VALUE_ACTIVITY, properties['activity'])
+ term_generator = xapian.TermGenerator()
+
+ # TODO: we should do stemming, but in which language?
+ #if language is not None:
+ # term_generator.set_stemmer(_xapian.Stem(language))
+
+ # TODO: we should use a stopper
+ #if stop is not None:
+ # stopper = _xapian.SimpleStopper()
+ # for term in stop:
+ # stopper.add (term)
+ # term_generator.set_stopper (stopper)
+
+ term_generator.set_document(document)
+ term_generator.index_text_without_positions(
+ self._extract_text(properties), 1, '')
+
if not self._document_exists(uid):
self._database.add_document(document)
else:
self._database.replace_document('Q' + uid, document)
self._database.flush()
+ def _extract_text(self, properties):
+ text = ''
+ for key, value in properties.items():
+ if key not in _PROPERTIES_NOT_TO_INDEX:
+ if text:
+ text += ' '
+ text += value
+ return text
+
def find(self, query):
enquire = Enquire(self._database)
enquire.set_query(self._parse_query(query))
@@ -62,8 +91,28 @@ class IndexStore(object):
return (uids, total_count)
def _parse_query(self, query_dict):
+ logging.debug('_parse_query %r' % query_dict)
queries = []
+ if query_dict.has_key('query'):
+ query_parser = QueryParser()
+ query_parser.set_database(self._database)
+ #query_parser.set_default_op(Query.OP_AND)
+
+ # TODO: we should do stemming, but in which language?
+ #query_parser.set_stemmer(_xapian.Stem(lang))
+ #query_parser.set_stemming_strategy(qp.STEM_SOME)
+
+ query = query_parser.parse_query(
+ query_dict['query'],
+ QueryParser.FLAG_PHRASE |
+ QueryParser.FLAG_BOOLEAN |
+ QueryParser.FLAG_LOVEHATE |
+ QueryParser.FLAG_WILDCARD,
+ '')
+
+ queries.append(query)
+
if query_dict.has_key('uid'):
queries.append(Query('Q' + query_dict['uid']))