Merge branch 'dualxap'

author: Benjamin Saller <bcsaller@objectrealms.net> 2007-07-13 04:33:19 (GMT)
committer: Benjamin Saller <bcsaller@objectrealms.net> 2007-07-13 04:33:19 (GMT)
commit: ea06d07882613332afa18e1e0fe2323ba2580b06 (patch)
tree: 7f50caccf827f209bebb05171cfb3e816f82d4f8
parent: a9ef157802cd7c3884814bc55b6cff1a5886c66e (diff)
parent: a41d2d5ca21115f3b76a789f5874f35dca089a3d (diff)
27 files changed, 1097 insertions, 1679 deletions
diff --git a/bin/datastore-service b/bin/datastore-service
index 4300619..532516b 100755
--- a/bin/datastore-service
+++ b/bin/datastore-service
@@ -4,7 +4,6 @@ import gobject
 import dbus.service
 import dbus.mainloop.glib
 from olpc.datastore import DataStore, DS_LOG_CHANNEL, backingstore
-from olpc.datastore.indexer import INDEX_SERVICE, INDEX_OBJECT_PATH
 import logging
 
 SYNC_INDEX = True
@@ -32,8 +31,6 @@ logging.basicConfig(level=logging.DEBUG,
                     filename = filename,
                     )
 # disable subsystem logging except where critical
-logging.getLogger('sqlalchemy').setLevel(logging.CRITICAL)
-logging.getLogger('lemur').setLevel(logging.CRITICAL)
 logger = logging.getLogger(DS_LOG_CHANNEL)
 
 # check for old lockfiles, the rules here are that we can't be
@@ -53,7 +50,7 @@ bus = dbus.SessionBus()
 ds = DataStore()
 ds.registerBackend(backingstore.FileBackingStore)
 ds.registerBackend(backingstore.InplaceFileBackingStore)
-ds.mount(repo_dir, {'querymanager_sync_index': SYNC_INDEX})
+ds.mount(repo_dir, {'indexmanager.sync_index': SYNC_INDEX})
 
 # and run it 
 logger.info("Starting Datastore %s" % (repo_dir))
@@ -68,9 +65,6 @@ signal.signal(signal.SIGHUP, handle_shutdown)
 signal.signal(signal.SIGTERM, handle_shutdown)
 
 def main():
-    if SYNC_INDEX is False:
-        indexer = bus.get_object(INDEX_SERVICE, INDEX_OBJECT_PATH)
-        
     try: mainloop.run()
     except KeyboardInterrupt:
         ds.stop()
diff --git a/bin/index-service b/bin/index-service
deleted file mode 100755
index a2ff83c..0000000
--- a/bin/index-service
+++ /dev/null
@@ -1,173 +0,0 @@
-#!/usr/bin/env python
-
-""" Async index service for the Datastore.
-
-Subscribes to the create/update/delete messages of the Datastore and
-performs the indexing. When this service is enabled the Datastore
-access the Xapian repository in read only mode.
-"""
-
-
-try: from ore.main import Application
-except ImportError: Application = object
-
-from olpc.datastore.datastore import DS_SERVICE, DS_OBJECT_PATH
-from olpc.datastore.datastore import DS_DBUS_INTERFACE
-from olpc.datastore.indexer import Indexer
-import dbus
-import dbus.mainloop.glib
-import logging
-import sys
-import os
-import signal
-
-profile = os.environ.get('SUGAR_PROFILE', 'default')
-base_dir = os.path.join(os.path.expanduser('~'), '.sugar', profile)
-repo_dir = os.path.join(base_dir, 'datastore')
-fulltext_dir = os.path.join(repo_dir, 'fulltext')
-
-log_dir = os.path.join(base_dir, "logs")
-if not os.path.exists(log_dir): os.makedirs(log_dir)
-
-os.chdir(repo_dir)
-
-# setup logger
-filename = None
-if not sys.stdin.isatty():
-    filename = os.path.join(log_dir, "indexer.log")
-logging.basicConfig(level=logging.DEBUG,
-                    format="%(asctime)-15s %(levelname)s: %(message)s",
-                    filename = filename,
-                    )
-
-logger = logging.getLogger('org.laptop.sugar.Indexer')
-logger.setLevel(logging.DEBUG)
-
-class IndexService(Application):
-    def manage_options(self):
-        self.parser.add_option("--olpc.fulltext.repo",
-                               dest="fulltext_dir",
-                               action="store", default='fulltext',
-                               help="""Location of the FullText Repository""")
-
-
-    def main(self):
-        logging.debug('Starting the index service at %s' % self.options.fulltext_dir)
-        dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
-        bus = dbus.SessionBus()
-        self.fulltext = Indexer(self.options.fulltext_dir)
-        self.fulltext.use_fulltext = True
-        
-        ds = bus.get_object(DS_SERVICE, DS_OBJECT_PATH)
-        self.ds = dbus.Interface(ds, dbus_interface=DS_DBUS_INTERFACE)
-
-        self.ds.connect_to_signal("Created", self.created,
-                                  dbus_interface=DS_DBUS_INTERFACE)
-
-        self.ds.connect_to_signal("Updated", self.updated,
-                                  dbus_interface=DS_DBUS_INTERFACE)
-
-        self.ds.connect_to_signal("Deleted", self.deleted,
-                                  dbus_interface=DS_DBUS_INTERFACE)
-
-
-        self.ds.connect_to_signal("Stopped", self.stopped,
-                                  dbus_interface=DS_DBUS_INTERFACE)
-
-        self.eventloop.run()
-
-    def get_textprops(self, uid):
-        # text properties also get full text indexing
-        # currently this is still searched with the 'fulltext'
-        # parameter of find()
-        textprops = {}
-        for k,v in self.ds.get_properties(uid, dict(type='text')).items():
-            textprops[str(k)] = v and str(v) or ''
-        return textprops
-
-    def created(self, uid):
-        """An object was created on the bus and we want to index it"""
-        # because the file isn't encoded anywhere accessible in the
-        # create call we must actually get the filename and trigger
-        # the indexing on that
-        filename = self.ds.get_filename(uid)
-        r = None
-        if filename:
-            mime_type = self.ds.get_properties(uid, {}).get('mime_type', None)
-            r = self.fulltext.fulltext_index(uid, filename, mime_type,
-                                             self.get_textprops(uid))
-        if r is True:
-            logger.debug("index creation of %s" % uid)
-        elif r is False:
-            logger.debug("unable to index creation of  %s" % uid)
-        else:
-            logger.debug("nothing to index on creation of %s" % uid)
-            
-    def updated(self, uid):
-        """An object was updated on the bus and we want to index it"""
-        # because the file isn't encoded anywhere accessible in the
-        # create call we must actually get the filename and trigger
-        # the indexing on that
-        filename = self.ds.get_filename(uid)
-        r = None
-        if filename:
-            mime_type = self.ds.get_properties(uid, {}).get('mime_type',
-                                                        None)
-            r = self.fulltext.fulltext_index(uid, filename, mime_type,
-                                             self.get_textprops(uid))
-        if r is True:
-            logger.debug("index update of %s" % uid)
-        elif r is False:
-            logger.debug("unable to index update of  %s" % uid)
-        else:
-            logger.debug("nothing to index on update of %s" % uid)
-            
-        
-    def deleted(self, uid):
-        """An object was updated on the bus and we want to index it"""
-        # because the file isn't encoded anywhere accessible in the
-        # create call we must actually get the filename and trigger
-        # the indexing on that
-        try:
-            self.fulltext.fulltext_unindex(uid)
-            logger.debug("unindex deletion  of %s" % uid);
-        except KeyError: pass
-
-        
-    def stopped(self):
-        """Respond to the datastore being stopped by shutting down
-        ourselves"""
-        self.fulltext.stop()
-        self.eventloop.quit()
-
-
-if __name__ == "__main__":
-    def handle_shutdown(signum, frame):
-        idx.stopped()
-        print "shutdown cleanly"
-        raise SystemExit("Shutting down on signal %s" % signum)
-
-    signal.signal(signal.SIGHUP, handle_shutdown)
-    signal.signal(signal.SIGTERM, handle_shutdown)
-
-    idx = IndexService()
-    #idx()
-    # w/o ore.main
-
-    import gobject 
-    idx.eventloop = gobject.MainLoop()
-    class options(object): pass
-    o = options()
-    o.fulltext_dir = 'fulltext'
-    idx.options = o
-    try:
-        idx.main()
-    except:
-        # force logging this one
-        logger.setLevel(logging.DEBUG)
-        logger.debug("Problem in index service",
-                     exc_info=sys.exc_info())
-        idx.stopped()
-
-
-    
diff --git a/bin/sample-client.py b/bin/sample-client.py
index bd609a7..2a2b19c 100755
--- a/bin/sample-client.py
+++ b/bin/sample-client.py
@@ -1,8 +1,6 @@
 #!/usr/bin/env python
-from ore.main import Application
 import dbus
 import os
-import time
 
 def main():
     bus = dbus.SessionBus()
@@ -10,36 +8,62 @@ def main():
                                "/org/laptop/sugar/DataStore")
 
     uid = datastore.create(dict(title="from dbus", author="Benjamin"), os.path.abspath('tests/test.pdf'))
-    print "created uid", uid
+    print "created uid", uid, "with binary content"
     
-    
-    #for u in datastore.find()[0]:
-    #        datastore.delete(u['uid'])
-    #return
-    # let the async indexer run
-    time.sleep(1.2)
-    #import pdb;pdb.set_trace()
-    print "find", datastore.find(dict(author="Benjamin", title="from"))
+    datastore.complete_indexing()
+
     res, count = datastore.find(dict(fulltext="peek"))
-    if not res:
-        print "unable to index content"
-        #return 
-    print "bcsaller", [item['uid'] for item in res]
+    assert count == 1, "failed to index content"
+    assert res[0]['uid'] == uid, "returned incorrect results"
+    print "found inside binary file :: PDF"
+    
+    assert datastore.find(dict(fulltext="kfdshaksjd"))[1] == 0
+    print "successfully ignored bad searches"
+    
+    # try the other mimetypes
+    datastore.update(uid, dict(title="updated title",
+                               mime_type="application/msword"),
+                     os.path.abspath('tests/test.doc'))
 
-    print "huh?", datastore.find(dict(fulltext="kfdshaksjd"))
+    datastore.complete_indexing()
+
+    assert datastore.find(dict(fulltext="inside"))[0][0]['uid'] == uid
+    print "found in binary file :: WORD"
+    
+    datastore.update(uid, dict(title="another updated title",
+                               mime_type="application/vnd.oasis.opendocument.text"),
+                     os.path.abspath('tests/test.odt'))
+    datastore.complete_indexing()
+    
+    assert datastore.find(dict(fulltext="amazed"))[0][0]['uid'] == uid
+    print "found in binary file :: ODT"
 
-    # try the other mimetypes
-    datastore.update(uid, dict(title="updated title", mime_type="application/msword"), os.path.abspath('tests/test.doc'))
-    print datastore.find(dict(fulltext="inside"))
-    datastore.update(uid, dict(title="another updated title", mime_type="application/vnd.oasis.opendocument.text"), os.path.abspath('tests/test.odt'))
-    print datastore.find(dict(fulltext="amazed"))
     datastore.get_properties(uid)
 
-    print "title in fulltext", datastore.find(dict(fulltext="another"))
-  
+    assert datastore.find(dict(title="another"))[0][0]['uid'] == uid
+    print "found title using dict params", 
+
+    assert datastore.find("another")[0][0]['uid'] == uid
+    print "found title in search of all fields (as text)"
+
+
+    assert datastore.find('title:"another"')[0][0]['uid'] == uid
+    print "field in query  field:'value' "
+
     datastore.delete(uid)
+    datastore.complete_indexing()
+
+    print "deleted", uid
+    try: datastore.get_properties(uid)
+    except: pass
+    else:
+        print "Found deleted value... oops"
+        raise  KeyError(uid)
+    
+    print "ALL GOOD"
     
 if __name__ == '__main__':
+    #from ore.main import Application
     #a = Application("client", main)
     #a.plugins.append('ore.main.profile_support.ProfileSupport')
     #a()
diff --git a/etc/Makefile.am b/etc/Makefile.am
index 1d8a54c..a9b28b1 100644
--- a/etc/Makefile.am
+++ b/etc/Makefile.am
@@ -1,15 +1,12 @@
 servicedir = $(datadir)/dbus-1/services
 service_in_files =	\
-	org.laptop.sugar.DataStore.service.in	\
-	org.laptop.sugar.Indexer.service.in
+	org.laptop.sugar.DataStore.service.in	
+
 service_DATA = $(service_in_files:.service.in=.service)
 
 org.laptop.sugar.DataStore.service: org.laptop.sugar.DataStore.service.in
 	@sed -e "s|\@bindir\@|$(bindir)|" $< > $@
 
-org.laptop.sugar.Indexer.service: org.laptop.sugar.Indexer.service.in
-	@sed -e "s|\@bindir\@|$(bindir)|" $< > $@
-
 DISTCLEANFILES = $(service_DATA)
 
 EXTRA_DIST = $(service_in_files)
diff --git a/etc/org.laptop.sugar.Indexer.service.in b/etc/org.laptop.sugar.Indexer.service.in
deleted file mode 100644
index fb0a7ec..0000000
--- a/etc/org.laptop.sugar.Indexer.service.in
+++ /dev/null
@@ -1,3 +0,0 @@
-[D-BUS Service]
-Name = org.laptop.sugar.Indexer
-Exec = @bindir@/index-service
diff --git a/src/olpc/datastore/__init__.py b/src/olpc/datastore/__init__.py
index d38dcff..fd38d75 100644
--- a/src/olpc/datastore/__init__.py
+++ b/src/olpc/datastore/__init__.py
@@ -1,7 +1,5 @@
 # datastore package
+from olpc.datastore.datastore import DataStore, DS_LOG_CHANNEL
 
 
-from olpc.datastore.datastore import DataStore, DS_LOG_CHANNEL
-from olpc.datastore.backingstore import FileBackingStore
-from olpc.datastore.query import DefaultQueryManager
 
diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py
index b0a05ad..354426e 100644
--- a/src/olpc/datastore/backingstore.py
+++ b/src/olpc/datastore/backingstore.py
@@ -17,7 +17,7 @@ import re
 import subprocess
 import time
 
-from olpc.datastore import query
+from olpc.datastore.xapianindex import IndexManager
 from olpc.datastore import utils
 
 # changing this pattern impacts _targetFile
@@ -75,7 +75,7 @@ class BackingStore(object):
     def load(self):
         """load the index for a given mount-point, then initialize its
         fulltext subsystem. This is the routine that will bootstrap
-        the querymanager (though create() may have just created it)
+        the indexmanager (though create() may have just created it)
         """
         pass
 
@@ -121,11 +121,11 @@ class FileBackingStore(BackingStore):
         """ FileSystemStore(path=<root of managed storage>)
         """
         self.options = kwargs
-        self.local_querymanager = self.options.get('local_querymanager', True)
+        self.local_indexmanager = self.options.get('local_indexmanager', True)
 
         self.uri = uri
         self.base = os.path.join(uri, self.STORE_NAME)
-        self.querymanager = None
+        self.indexmanager = None
         
     # Informational
     def descriptor(self):
@@ -190,47 +190,40 @@ class FileBackingStore(BackingStore):
         if not os.path.exists(self.base):
             os.makedirs(self.base)
 
-        # examine options and see what the querymanager plan is
-        if self.local_querymanager:
-            # create a local storage using the querymanager
+        # examine options and see what the indexmanager plan is
+        if self.local_indexmanager:
+            # create a local storage using the indexmanager
             # otherwise we will connect the global manager
             # in load
             index_name = os.path.join(self.base, self.INDEX_NAME)
-            options = utils.options_for(self.options, 'querymanager_')
-            if 'fulltext_repo' not in options:
-                options['fulltext_repo'] = os.path.join(self.base,
-                                                        query.DefaultQueryManager.FULLTEXT_NAME)
-                
-            qm = query.DefaultQueryManager(index_name, **options)
+            options = utils.options_for(self.options, 'indexmanager.')
+            im = IndexManager()
             # This will ensure the fulltext and so on are all assigned
-            qm.bind_to(self)
-            qm.prepare()
+            im.bind_to(self)
+            im.connect(index_name, **options)
 
             self.create_descriptor(**options)
-            self.querymanager = qm
+            self.indexmanager = im
             
     def load(self):
-        if not self.querymanager and self.local_querymanager:
-            # create a local storage using the querymanager
+        if not self.indexmanager and self.local_indexmanager:
+            # create a local storage using the indexmanager
             # otherwise we will connect the global manager
             # in load
             index_name = os.path.join(self.base, self.INDEX_NAME)
-            options = utils.options_for(self.options, 'querymanager_')
-            if 'fulltext_repo' not in self.options:
-                options['fulltext_repo'] = os.path.join(self.base,
-                                                        query.DefaultQueryManager.FULLTEXT_NAME)
-                
-            qm = query.DefaultQueryManager(index_name, **options)
+            options = utils.options_for(self.options, 'indexmanager.')
+            im = IndexManager()
 
             desc = utils.options_for(self.options,
-                                     'querymanager_', invert=True)
+                                     'indexmanager.',
+                                     invert=True)
             if desc: self.create_descriptor(**desc)
                 
             # This will ensure the fulltext and so on are all assigned
-            qm.bind_to(self)
-            qm.prepare()
+            im.bind_to(self)
+            im.connect(index_name)
 
-            self.querymanager = qm
+            self.indexmanager = im
             
     def bind_to(self, datastore):
         ## signal from datastore that we are being bound to it
@@ -283,7 +276,7 @@ class FileBackingStore(BackingStore):
         # env would contain things like cwd if we wanted to map to a
         # known space
         
-        content = self.querymanager.get(uid)
+        content = self.indexmanager.get(uid)
         # we need to map a copy of the content from the backingstore into the
         # activities addressable space.
         # map this to a rw file
@@ -316,7 +309,7 @@ class FileBackingStore(BackingStore):
             fp.write(line)
         fp.close()
         if verify:
-            content = self.querymanager.get(uid)
+            content = self.indexmanager.get(uid)
             content.checksum = c.hexdigest()
 
     def _checksum(self, filename):
@@ -329,18 +322,18 @@ class FileBackingStore(BackingStore):
         
     # File Management API
     def create(self, props, filelike):
-        content = self.querymanager.create(props, filelike)
+        uid = self.indexmanager.index(props, filelike)
         filename = filelike
         if filelike:
             if isinstance(filelike, basestring):
                 # lets treat it as a filename
                 filelike = open(filelike, "r")
             filelike.seek(0)
-            self._writeContent(content.id, filelike, replace=False)
-        return content
+            self._writeContent(uid, filelike, replace=False)
+        return uid
     
     def get(self, uid, env=None, allowMissing=False):
-        content = self.querymanager.get(uid)
+        content = self.indexmanager.get(uid)
         if not content: raise KeyError(uid)
         path = self._translatePath(uid)
         fp = None
@@ -352,7 +345,9 @@ class FileBackingStore(BackingStore):
         return self._mapContent(uid, fp, path, env)
 
     def update(self, uid, props, filelike=None):
-        self.querymanager.update(uid, props, filelike)
+        if 'uid' not in props: props['uid'] = uid
+            
+        self.indexmanager.index(props, filelike)
         filename = filelike
         if filelike:
             if isinstance(filelike, basestring):
@@ -365,7 +360,7 @@ class FileBackingStore(BackingStore):
         self._writeContent(uid, filelike)
 
     def delete(self, uid, allowMissing=True):
-        self.querymanager.delete(uid)
+        self.indexmanager.delete(uid)
         path = self._translatePath(uid)
         if os.path.exists(path):
             os.unlink(path)
@@ -374,21 +369,23 @@ class FileBackingStore(BackingStore):
                 raise KeyError("object for uid:%s missing" % uid)            
         
     def get_uniquevaluesfor(self, propertyname):
-        return self.querymanager.get_uniquevaluesfor(propertyname)
+        return self.indexmanager.get_uniquevaluesfor(propertyname)
     
 
     def find(self, query):
-        return self.querymanager.find(query)
+        return self.indexmanager.search(query)
 
     def stop(self):
-        self.querymanager.stop()
-        
+        self.indexmanager.stop()
+
+    def complete_indexing(self):
+        self.indexmanager.complete_indexing()
 
 class InplaceFileBackingStore(FileBackingStore):
     """Like the normal FileBackingStore this Backingstore manages the
     storage of files, but doesn't move files into a repository. There
     are no working copies. It simply adds index data through its
-    querymanager and provides fulltext ontop of a regular
+    indexmanager and provides fulltext ontop of a regular
     filesystem. It does record its metadata relative to this mount
     point.
 
@@ -434,45 +431,48 @@ class InplaceFileBackingStore(FileBackingStore):
             for fn in filenames:
                 source = os.path.join(dirpath, fn)
                 relative = source[len(self.uri)+1:]
-                result, count = self.querymanager.find(dict(filename=relative))
+
+                result, count = self.indexmanager.search(dict(filename=relative))
                 if not count:
                     # create a new record
                     self.create(dict(filename=relative), source)
                 else:
                     # update the object with the new content iif the
                     # checksum is different
-                    # XXX: what if there is more than one? (shouldn't happen)
-                    content = result[0]
-                    uid = content
+                    # XXX: what if there is more than one? (shouldn't
+                    # happen)
+                    content = result.next()
+                    uid = content.id
                     # only if the checksum is different
-                    checksum = self._checksum(source)
-                    if checksum != content.checksum:
-                        self.update(uid, dict(filename=relative), source)
-                        
-        #self.querymanager.index.flush()
+                    #checksum = self._checksum(source)
+                    #if checksum != content.checksum:
+                    self.update(uid, dict(filename=relative), source)
                         
-
+        if self.options.get('sync_mount', False):
+            self.complete_indexing()
+            
     # File Management API
     def create(self, props, filelike):
         # the file would have already been changed inplace
         # don't touch it
-        return self.querymanager.create(props, filelike)
+        return self.indexmanager.index(props, filelike)
     
     def get(self, uid, env=None, allowMissing=False):
-        content = self.querymanager.get(uid)
+        content = self.indexmanager.get(uid)
         if not content: raise KeyError(uid)
         return content.get_property('filename')
 
     def update(self, uid, props, filelike=None):
         # the file would have already been changed inplace
         # don't touch it
-        self.querymanager.update(uid, props, filelike)
+        props['uid'] = uid
+        self.indexmanager.index(props, filelike)
         
-    def delete(self, uid, allowMissing=True):
-        c = self.querymanager.get(uid)
-        path = c.get_property('filename')
-        self.querymanager.delete(uid)
-        if os.path.exists(path):
+    def delete(self, uid):
+        c = self.indexmanager.get(uid)
+        path = c.get_property('filename', None)
+        self.indexmanager.delete(uid)
+        if path and os.path.exists(path):
             os.unlink(path)
         
 
diff --git a/src/olpc/datastore/converter.py b/src/olpc/datastore/converter.py
index 1250dbb..6f0ede6 100644
--- a/src/olpc/datastore/converter.py
+++ b/src/olpc/datastore/converter.py
@@ -95,11 +95,13 @@ class Converter(object):
         # maps both extension -> plugin
         # and mimetype -> plugin
         self._converters = {}
+        self._default = None
         self.logger = logging.getLogger('org.laptop.sugar.Indexer')
     
     def registerConverter(self, ext_or_mime, plugin):
         if plugin.verify():
             self._converters[ext_or_mime] = plugin
+            if self._default is None: self._default = plugin
 
     def __call__(self, filename, encoding=None, mimetype=None):
         """Convert filename's content to utf-8 encoded text."""        
@@ -119,6 +121,8 @@ class Converter(object):
         converter = self._converters.get(mt)
         if not converter:
             converter = self._converters.get(ext)
+            if not converter:
+                converter = self._default
         if converter:
             try:
                 return converter(filename)
diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py
index 142d801..9121be9 100644
--- a/src/olpc/datastore/datastore.py
+++ b/src/olpc/datastore/datastore.py
@@ -18,8 +18,6 @@ import dbus.mainloop.glib
 
 from olpc.datastore import utils
 
-from StringIO import StringIO
-
 # the name used by the logger
 DS_LOG_CHANNEL = 'org.laptop.sugar.DataStore'
 
@@ -68,14 +66,11 @@ class DataStore(dbus.service.Object):
         # medium (maybe an SD card for example) and we'd want to keep
         # that on the XO itself. In these cases their might be very
         # little identifying information on the media itself.
-
         uri = str(uri)
 
-        _options = {}
-        if options:
-            for key, value in options.iteritems():
-                _options[str(key)] = str(value)
-
+        _options = utils._convert(options)
+        if _options is None: _options = {}
+        
         mp = self.connect_backingstore(uri, **_options)
         if not mp: return ''
         if mp.id in self.mountpoints:
@@ -116,14 +111,28 @@ class DataStore(dbus.service.Object):
     ##  sticks and so on. We provide a facility for tracking
     ##  co-authors of content
     ##  there are associated changes to 'find' to resolve buddies
-    def addBuddy(self, id, name, fg_color, bg_color):
-        pass
+    def addBuddy(self, id, name, fg_color, bg_color, mountpoint=None):
+        mp = None
+        if mountpoint is None: mp = self.root
+        else: mp = self.mountpoints.get(mountpoint)
+        if mp is None: raise ValueError("Invalid mountpoint")
+        mp.addBuddy(id, name, fg_color, bg_color)
+
+    def getBuddy(self, bid):
+        """Get a buddy by its id"""
+        b = None
+        for mp in self.mountpoints.itervalues():
+            b = mp.getBuddy(bid)
+            if b: break
+        return b
 
-    def getBuddy(self, id):
-        pass
     
     def buddies(self):
-        pass
+        buddies = set()
+        for mp in self.mountpoints.itervalues():
+            buddies = buddies.union(mp.getBuddies())
+        return buddies
+        
     
 
     ## end buddy api
@@ -146,7 +155,7 @@ class DataStore(dbus.service.Object):
 
     def _resolveMountpoint(self, mountpoint=None):
         if isinstance(mountpoint, dict):
-            mountpoint = mountpoint.get('mountpoint')
+            mountpoint = mountpoint.pop('mountpoint', None)
             
         if mountpoint is not None:
             # this should be the id of a mount point
@@ -173,26 +182,15 @@ class DataStore(dbus.service.Object):
         over this process can come at a later time.
         """
         mp = self._resolveMountpoint(props)
-        content = mp.create(props, filelike)
-        self.Created(content.id)
-        logging.debug("created %s" % content.id)
+        uid = mp.create(props, filelike)
+        self.Created(uid)
+        logging.debug("created %s" % uid)
         
-        return content.id
+        return uid
 
     @dbus.service.signal(DS_DBUS_INTERFACE, signature="s")
     def Created(self, uid): pass
         
-    
-    @dbus.service.method(DS_DBUS_INTERFACE,
-             in_signature='',
-             out_signature='as')
-    def all(self):
-        # workaround for not having optional args or None in
-        # DBus ..  blah
-        results = self.querymanager.find()
-        return [r.id for r in results]
-
-
     def _multiway_search(self, query):
         mountpoints = query.pop('mountpoints', self.mountpoints)
         mountpoints = [self.mountpoints[str(m)] for m in mountpoints]
@@ -306,9 +304,8 @@ class DataStore(dbus.service.Object):
         d = []
         for r in results:
             props =  {}
-            for prop in r.get_properties():
-                props[prop.key] = prop.marshall()
-
+            props.update(r.properties)
+            
             if 'uid' not in props:
                 props['uid'] = r.id
 
@@ -317,7 +314,7 @@ class DataStore(dbus.service.Object):
             
             filename = ''
             if include_files :
-                try: filename = self.backingstore.get(r.id).filename
+                try: filename = r.filename
                 except KeyError: pass
                 props['filename'] = filename
             d.append(props)
@@ -344,25 +341,13 @@ class DataStore(dbus.service.Object):
             except AttributeError: pass
         return ''
         
-    def get_data(self, uid):
-        content = self.get(uid)
-        if content:
-            return content.get_data()
-
-    def put_data(self, uid, data):
-        self.update(uid, None, StringIO(data))
-
     #@utils.sanitize_dbus
     @dbus.service.method(DS_DBUS_INTERFACE,
-                         in_signature='sa{sv}',
+                         in_signature='s',
                          out_signature='a{sv}')
-    def get_properties(self, uid, query=None):
+    def get_properties(self, uid):
         content = self.get(uid)
-        dictionary = {}
-        if not query: query = {}
-        for prop in content.get_properties(**query):
-            dictionary[prop.key] = prop.marshall()
-        return dictionary
+        return content.properties
 
     @dbus.service.method(DS_DBUS_INTERFACE,
                          in_signature='sa{sv}',
@@ -372,7 +357,7 @@ class DataStore(dbus.service.Object):
         mountpoints = query.pop('mountpoints', self.mountpoints)
         mountpoints = [self.mountpoints[str(m)] for m in mountpoints]
         results = set()
-        
+
         for mp in mountpoints:
             result = mp.get_uniquevaluesfor(propertyname)
             results = results.union(result)
@@ -405,8 +390,8 @@ class DataStore(dbus.service.Object):
         content = self.get(uid)
         if content:
             content.backingstore.delete(uid)
-            self.Deleted(content.id)
-            logger.debug("deleted %s" % content.id)
+            self.Deleted(uid)
+            logger.debug("deleted %s" % uid)
 
     @dbus.service.signal(DS_DBUS_INTERFACE, signature="s")
     def Deleted(self, uid): pass
@@ -421,4 +406,12 @@ class DataStore(dbus.service.Object):
     @dbus.service.signal(DS_DBUS_INTERFACE)
     def Stopped(self): pass
 
-        
+    @dbus.service.method(DS_DBUS_INTERFACE,
+             in_signature='',
+             out_signature='')
+    def complete_indexing(self):
+        """Block waiting for all queued indexing operations to
+        complete. Used mostly in testing"""
+        for mp in self.mountpoints.itervalues():
+            mp.complete_indexing()
+            
diff --git a/src/olpc/datastore/indexer.py b/src/olpc/datastore/indexer.py
deleted file mode 100644
index de7ef33..0000000
--- a/src/olpc/datastore/indexer.py
+++ /dev/null
@@ -1,47 +0,0 @@
-""" 
-indexer
-~~~~~~~~~~~~~~~~~~~~
-fulltext index module
-
-""" 
-
-__author__ = 'Benjamin Saller <bcsaller@objectrealms.net>'
-__docformat__ = 'restructuredtext'
-__copyright__ = 'Copyright ObjectRealms, LLC, 2007'
-__license__  = 'The GNU Public License V2+'
-
-
-# the name used by the logger
-import logging
-import dbus.service
-import dbus.mainloop.glib
-from olpc.datastore.query import XapianFulltext
-
-INDEX_LOG_CHANNEL = 'org.laptop.sugar.Indexer'
-
-INDEX_SERVICE = "org.laptop.sugar.Indexer"
-INDEX_DBUS_INTERFACE = "org.laptop.sugar.Indexer"
-INDEX_OBJECT_PATH = "/org/laptop/sugar/Indexer"
-
-logger = logging.getLogger(INDEX_LOG_CHANNEL)
-
-class Indexer(dbus.service.Object, XapianFulltext):
-    # This object doesn't really publish an interface right now
-    # Its a bus object so that dbus can start it automatically
-    # when the datastore requests a binding to it
-    def __init__(self, repo='fulltext'):
-        # global handle to the main look
-        dbus.mainloop.glib.DBusGMainLoop(set_as_default=True)
-        session_bus = dbus.SessionBus()
-
-        self.bus_name = dbus.service.BusName(INDEX_SERVICE,
-                                             bus=session_bus,
-                                             replace_existing=True,
-                                             allow_replacement=True)
-        dbus.service.Object.__init__(self, self.bus_name, INDEX_OBJECT_PATH)
-
-    
-        self.connect_fulltext(repo, read_only=False)
-
-    
-
diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py
index 8c8ab05..18d409f 100644
--- a/src/olpc/datastore/model.py
+++ b/src/olpc/datastore/model.py
@@ -10,17 +10,11 @@ __docformat__ = 'restructuredtext'
 __copyright__ = 'Copyright ObjectRealms, LLC, 2007'
 __license__  = 'The GNU Public License V2+'
 
-from sqlalchemy import Table, Column, UniqueConstraint
-from sqlalchemy import String, Integer, Unicode
-from sqlalchemy import ForeignKey, Sequence, Index
-from sqlalchemy import mapper, relation
-from sqlalchemy import create_session
-from sqlalchemy import MapperExtension, EXT_PASS, clear_mappers
-
 import datetime
 import mimetypes
 import os
 import time
+import warnings
 
 # XXX: Open issues
 # list properties - Contributors (a, b, c)
@@ -28,51 +22,139 @@ import time
 # content state   - searches don't include content deletion flag
 #                 - not recording if content is on other storage yet
 
-
-# we have a global thread local session factory
-context = {}
 propertyTypes = {}
 _marker = object()
 
-def get_session(backingstore):
-    return context[backingstore]
+def registerPropertyType(kind, get, set, xapian_sort_type=None, defaults=None):
+    propertyTypes[kind] = PropertyImpl(get, set, xapian_sort_type, defaults)
 
-def registerPropertyType(kind, class_): propertyTypes[kind] = class_
 def propertyByKind(kind): return propertyTypes[kind]
 
+class PropertyImpl(object):
+    __slots__ = ('_get', '_set', 'xapian_sort_type', 'defaults')
+    
+    def __init__(self, get, set, xapian_sort_type=None, defaults=None):
+        self._get, self._set = get, set
+        self.xapian_sort_type = xapian_sort_type
+        self.defaults = defaults
+        
+    def get(self, value): return self._get(value)
+    def set(self, value): return self._set(value)
+    
+class Property(object):
+    """Light-weight property implementation.
+    Handles typed properties via a global registry of type->callbacks
+
+    >>> p = Property(key, value, 'string')
+    >>> b = Property(key, value, 'binary')
+    """
+    def __init__(self, key, value, kind=None):
+        self.key = key
+        self._value = value
+        self.kind = kind
+        if kind not in propertyTypes:
+            warnings.warn("Unknown property type: %s on key %s" % \
+                          (kind, key), RuntimeWarning)
+        else: self._impl = propertyTypes[kind]
+
+    @classmethod
+    def fromstring(cls, key, value=''):
+        kind = 'string'
+        if ':' in key:
+            key, kind = key.split(':', 1)
+            # now resolve the kind to a property class
+        return cls(key, value, kind)
+    
 
-class Content(object):
     def __repr__(self):
-        return "<Content id:%s>" % (self.id, )
+        return "<%s(%s) %s:%r>" % (self.__class__.__name__,
+                                   self.kind,
+                                   self.key, self.value)
 
-    def get_property(self, key, default=_marker):
-        # mapped to property keys
-        session = get_session(self.backingstore)
-        query = session.query(Property)
-        p = query.get_by(content_id=self.id, key=key)
-        if not p:
-            if default is _marker: raise AttributeError(key)
-            return default
-        return p.value
-
-    def get_properties(self, **kwargs):
-        session = get_session(self.backingstore)
-        query = session.query(Property)
-        return query.select_by(content_id=self.id, **kwargs)
-
-
-    # Backingstore dependent bindings 
-    def get_file(self):
-        if not hasattr(self, "_file") or self._file.closed is True:
-            self.backingstore.get(self.id)
-        return self._file
+    def get_value(self): return self._impl.get(self._value)
+    def set_value(self, value): self._value = self._impl.set(value)
+    value = property(get_value, set_value)
+
+    def __str__(self): return str(self.value)
+
+class Model(object):
+    """Object containing the field/property model used by the
+    system"""
     
-    def set_file(self, fileobj):
-        self._file = fileobj
-    file = property(get_file, set_file)
+    def __init__(self):
+        self.fields = {}
+        self.fieldnames = []
+
+    def copy(self):
+        m = Model()
+        m.fields = self.fields.copy()
+        m.fieldnames = self.fieldnames[:]
+        return m
+    
+    def addField(self, key, kind, overrides=None):
+        """ Add a field to the model.
+        key     -- field name
+        kind    -- type by name (registered with registerPropertyType)
+        kwargs  -- overrides and additional values to the default
+                   arguments supplied by kind
+         """
+        if key in self.fields:
+            raise KeyError("""Another source tried to add %s field to the model""" % key)
+
+        impl = propertyByKind(kind)
+        options = impl.defaults.copy()
+        if overrides: options.update(overrides)
+        if impl.xapian_sort_type:
+            if 'type' not in options:
+                options['type'] = impl.xapian_sort_type
+        
+        self.fields[key] = (key, kind, options)
+        self.fieldnames.append(key)
+        return self
+    
+    def addFields(self, *args):
+        """ List of arguments to addField """
+        for arg in args: self.addField(*arg)
+        return self        
+
+    def apply(self, indexmanager):
+        addField = indexmanager.addField
+        for fn in self.fieldnames:
+            args = self.fields[fn]
+            addField(args[0], **args[2])
+
+class Content(object):
+    """A light weight proxy around Xapian Documents from secore.
+    This provides additional methods which are used in the
+    backingstore to assist in storage
+    """
+    __slots__ = ('_doc', '_backingstore', '_file')
+    
+    def __init__(self, xapdoc, backingstore=None):
+        self._doc = xapdoc
+        self._backingstore = backingstore
+        self._file = None
+
+    def __repr__(self):
+        return "<%s %s>" %(self.__class__.__name__,
+                           self.properties)
+    
+    def get_property(self, key, default=_marker):
+        result = self._doc.data.get(key, default)
+        if result is _marker: raise KeyError(key)
+        if isinstance(result, list) and len(result) == 1:
+            return result[0]
+        return result
 
     @property
-    def filename(self): return self.file.name
+    def properties(self):
+        d = {}
+        for k, v in self.data.iteritems():
+            if isinstance(v, list) and len(v) == 1:
+                v = v[0]
+            d[k] = v
+        return d
+    
 
     def suggestName(self):
         # we look for certain known property names
@@ -89,8 +171,7 @@ class Content(object):
             f, e = os.path.splitext(filename)
             if e: return filename, None
             if ext: return "%s.%s" % (filename, ext), None
-        elif ext:
-            return None, ext
+        elif ext: return None, ext
         else:
             # try to get an extension from the mimetype if available
             mt = self.get_property('mime_type', None)
@@ -99,279 +180,110 @@ class Content(object):
                 if ext: return None, ext
         return None, None
 
-    def get_data(self):
-        f = self.file
-        t = f.tell()
-        data = f.read()
-        f.seek(t)
-        return data
-    
-    def set_data(self, filelike):
-        self.backingstore.set(self.id, filelike)
-
-    data = property(get_data, set_data)
-    
-
-class BackingStoreContentMapping(MapperExtension):
-    """This mapper extension populates Content objects with the
-    binding to the backing store the files are kept on, this allow the
-    file-like methods to work as expected on content
-    """
-    def __init__(self, backingstore):
-        MapperExtension.__init__(self)
-        self.backingstore = backingstore
-
-    def populate_instance(self, mapper, selectcontext, row, instance, identitykey, isnew):
-        """called right before the mapper, after creating an instance
-        from a row, passes the row to its MapperProperty objects which
-        are responsible for populating the object's attributes. If
-        this method returns EXT_PASS, it is assumed that the mapper
-        should do the appending, else if this method returns any other
-        value or None, it is assumed that the append was handled by
-        this method.  
-        
-        """
-        instance.backingstore = self.backingstore
-        # allow normal population to happen 
-        return EXT_PASS
-
-            
-class Property(object):
-    """A typed key value pair associated with a content object.
-    This is the objects metadata. The value side of the kv pair is
-    typically encoded as a UTF-8 String. There are however cases where
-    richer metadata is required by the application using the
-    datastore.
-    In these cases the type field is overridden to encode a reference
-    to another object that must be used to satisfy this value. An
-    example of this would be storing a PNG thumbnail as the a
-    value. In a case such as that the value should be set to a path or
-    key used to find the image on stable storage or in a database and
-    the type field will be used to demarshall it through this object.
-    """
-    def __init__(self, key, value, type='string'):
-        self.key = key
-        self.value = value
-        self.type = type
-
-    def __repr__(self):
-        return "<%s %s:%r>" % (self.__class__.__name__,
-                                     self.key, self.value)
-    def marshall(self):
-        """Return the value marshalled as a string"""
-        return str(self.value)
-
-class TextProperty(Property):
-    """A text property is one that will also get full automatic text
-    indexing when available. This is used for fields like title where
-    searching in the text is more important than doing a direct match
-    """
-    def __init__(self, key, value, type='text'):
-        Property.__init__(self, key, value, type)
-
-    def get_value(self): return self._value
-    def set_value(self, value): self._value = value
-    value = property(get_value, set_value)
-
+    def get_file(self):
+        if not hasattr(self, "_file") or self._file.closed is True:
+            self.backingstore.get(self.id)
+        return self._file
     
-class DateProperty(Property):
-    format = "%Y-%m-%dT%H:%M:%S"
-
-    def __init__(self, key, value, type="date"):
-        self._value = None
-        Property.__init__(self, key, value, type)
-
-    def get_value(self):
-        # parse the value back into a datetime
-        # XXX: strptime on datetime is a 2.5 thing :(
-        # XXX: we lose timezone in this conversion currently
-        if not self._value: return None
-        ti = time.strptime(self._value, self.format)
-        dt = datetime.datetime(*(ti[:-2]))
-        dt = dt.replace(microsecond=0)
-        return dt
-
-    def set_value(self, value):
-        if isinstance(value, basestring):
-            # XXX: there  is an issue with microseconds not getting parsed
-            ti = time.strptime(value, self.format)
-            value = datetime.datetime(*(ti[:-2]))
-        value = value.replace(microsecond=0)
-            
-        self._value = value.isoformat()
+    def set_file(self, fileobj):
+        self._file = fileobj
+    file = property(get_file, set_file)
 
-    value = property(get_value, set_value)
+    @property
+    def filename(self): return self.file.name
 
-    def marshall(self): return self.value.isoformat()
-    
+    @property
+    def contents(self): return self.file.read()
     
-class NumberProperty(Property):
-    def __init__(self, key, value, type="number"):
-        Property.__init__(self, key, value, type)
-
-    def get_value(self): return float(self._value)
-    def set_value(self, value): self._value = value
-    value = property(get_value, set_value)
+    @property
+    def backingstore(self): return self._backingstore
     
+    @property
+    def id(self): return self._doc.id
 
-class BinaryProperty(Property):
-    # base64 encode binary data 
-    def __init__(self, key, value, type="binary"):
-        Property.__init__(self, key, value, type)
-
-    def get_value(self): return self._value.decode('base64')
-    def set_value(self, value): self._value = value.encode('base64')
-    value = property(get_value, set_value)
-
-
-class Model(object):
-    """ Manages the global state of the metadata model index. This is
-    intended to only be consumed by an olpc.datastore.query.QueryManager
-    instance for the management of its metadata.
-
-    >>> m = Model()
-    >>> m.prepare(querymanager)
-
-    >>> m.content
-    ... # Content Table
-
-    >>> m['content']
-    ... # content Mapper
-
-    For details see the sqlalchemy documentation
-    
-    """
-    
-    def __init__(self):
-        self.tables = {}
-        self.mappers = {}
+    @property
+    def data(self): return self._doc.data
     
-    def __getattr__(self, key): return self.tables[key]
-    def __getitem__(self, key): return self.mappers[key]
-
 
-
-    def prepare(self, querymanager):
-        self.querymanager = querymanager
-
-        # a single session manages the exclusive access we keep to the
-        # db.
-        global context
-        self.session = create_session(bind_to=self.querymanager.db)
-        context[self.querymanager.backingstore] = self.session
-        
-        # content object
-        content = Table('content',
-                        self.querymanager.metadata,
-                        Column('id', String, primary_key=True, nullable=False),
-                        Column('activity_id', Integer),
-                        Column('checksum', String,),
-                        UniqueConstraint('id', name='content_key')
-                        )
-        Index('content_activity_id_idx', content.c.activity_id)
-        
-        # the properties of content objects
-        properties = Table('properties',
-                           self.querymanager.metadata,
-                           Column('id', Integer, Sequence('property_id_seq'), primary_key=True),
-                           Column('content_id', Integer, ForeignKey('content.id')),
-                           Column('key', Unicode,  ),
-                           Column('value', Unicode, ),
-                           Column('type', Unicode, ),
-                           # unique key to content mapping
-                           UniqueConstraint('content_id', 'key',
-                                            name='property_content_key')
-                           )
-                           
-        Index('property_key_idx', properties.c.key)
-        Index('property_type_idx', properties.c.type)
-
-        # storage
-        storage = Table('storage',
-                        self.querymanager.metadata,
-                        Column('id', String, primary_key=True),
-                        Column('description', String, ),
-                        Column('uri', String, )
-                        )
-
-        # storage -> * content
-        # XXX: this could be a purely runtime in-memory construct
-        # removing the storage table as well. Would depend in part on
-        # the frequency of the garbage collection runs and the
-        # frequency of connection to stable storage
-        storage_content = Table('storage_content',
-                                self.querymanager.metadata,
-                                Column('storage_id', Integer, ForeignKey('storage.id')),
-                                Column('content_id', Integer, ForeignKey('content.id')),
-                                )
-        Index('idx_storage_content_content_id', storage_content.c.content_id)
-        
-        # Object Mapping
-        # the query manager provides a mapping extension for
-        # Content <-> BackingStore binding
-
-        # XXX gross and not what we want, we can only define mappers
-        # once but we may have more than one datastore.
-        # this can impact all sqla in the runtime though
-        clear_mappers()
+## class Buddy(object):
+##     """A co-author on content. Information is collected and managed
+##     here"""
+##     pass
+
+
+
+def noop(value): return value
+
+import re
+base64hack = re.compile("(\S{212})")
+def base64enc(value): return ' '.join(base64hack.split(value.encode('base64')))
+def base64dec(value): return value.replace(' ', '').decode('base64')
+
+dateformat = "%Y-%m-%dT%H:%M:%S"
+def datedec(value, dateformat=dateformat):
+    ti = time.strptime(value, dateformat)
+    dt = datetime.datetime(*(ti[:-2]))
+    dt = dt.replace(microsecond=0)
+    return dt
+
+def dateenc(value, dateformat=dateformat):
+    if isinstance(value, basestring):
+        # XXX: there  is an issue with microseconds not getting parsed
+        ti = time.strptime(value, dateformat)
+        value = datetime.datetime(*(ti[:-2]))
+    value = value.replace(microsecond=0)
+    # XXX: drop time for now, this is a xapian issue
+    value = value.date()
+    return value.isoformat()
+
+# type, get, set, xapian sort type [string|float|date], defaults
+# defaults are the default options to addField in IndexManager
+# these can be overridden on model assignment
+registerPropertyType('string', noop, noop, 'string', {'store' : True,
+                                                      'exact' : True,
+                                                      'sortable' : True})
+
+registerPropertyType('text', noop, noop, 'string', {'store' : True,
+                                                    'exact' : False,
+                                                    'sortable' : False})
+
+registerPropertyType('binary', noop, noop, None, {'store' : True,
+                                                  'exact' : False,
+                                                  'sortable' : False})
+
+registerPropertyType('int', str, int, 'float', {'store' : True,
+                                                'exact' : True,
+                                                'sortable' : True})
+
+registerPropertyType('number', str, float, 'float', {'store' : True,
+                                                     'exact' : True,
+                                                     'sortable' : True})
+
+registerPropertyType('date', dateenc, datedec, 'date', {'store' : True,
+                                                        'exact' : True,
+                                                        'sortable' : True
+                                                        })
+
+
+
+defaultModel = Model().addFields(    
+    ('fulltext', 'text'),
+    # vid is version id
+    ('vid', 'number'),
+    ('checksum', 'string'),
+    ('filename', 'string'),
+    # Title has additional weight 
+    ('title', 'text', {'weight' : 2 }),
+    ('url', 'string'),
+    ('mimetype', 'string'),
+    ('author', 'string'),
+    ('language', 'string'),
+    ('ctime', 'date'),
+    ('mtime', 'date'),
+    # this will just be a space delimited list of tags
+    # indexed with the content
+    # I give them high weight as they have user given semantic value.
+    ('tags', 'text', {'weight' :3 } ),
+    )
 
         
-        content_mapper = mapper(Content, content,
-                                extension=self.querymanager.content_ext,
-                                properties = {
-                                            'properties' : relation(Property,
-                                                                    cascade="all,delete-orphan",
-                                                                    backref='content',
-                                                                    lazy=True),
-                                            },
-
-                                )
-        
-        # retain reference to these tables to use for queries
-        self.tables['content'] = content
-        self.tables['properties'] = properties
-        self.tables['storage'] = storage
-        self.tables['storage_content'] = storage_content
-
-        # and the mappers (though most likely not needed)
-        property_mapper = mapper(Property, properties, polymorphic_on=properties.c.type)
-        self.mappers['properties'] = property_mapper
-        self.mappers['content'] = content_mapper
-
-        # default Property types are mapped to classes here        
-        self.addPropertyType(DateProperty, 'date')
-        self.addPropertyType(NumberProperty, 'number')
-        self.addPropertyType(TextProperty, 'text')
-        self.addPropertyType(BinaryProperty, 'binary')
-        
-        
-        
-
-    def addPropertyType(self, PropertyClass, typename,
-                        map_value=True, **kwargs):
-        """Register a new type of Property. PropertyClass should be a
-        subclass of Property, typename is the textual
-        name of the new Property type.
-
-        The flag map_value indicates if Property.value should
-        automatically be diverted to _value so that you can more
-        easily manage the interfaces 'value' as a Python property
-        (descriptor)
-
-        Keyword args will be passed to the properties dictionary of
-        the sqlalchemy mapper call. See sqlalchemy docs for additional
-        details.
-        """
-        properties = {}
-        properties.update(kwargs)
-        if map_value is True:
-            properties['_value'] = self.properties.c.value
-            
-        mapper(PropertyClass,
-               inherits=self.mappers['properties'],
-               polymorphic_identity=typename,
-               properties=properties
-               )
-
-        registerPropertyType(typename, PropertyClass)
-
diff --git a/src/olpc/datastore/query.py b/src/olpc/datastore/query.py
deleted file mode 100644
index 2c5dd9f..0000000
--- a/src/olpc/datastore/query.py
+++ /dev/null
@@ -1,642 +0,0 @@
-""" 
-olpc.datastore.query
-~~~~~~~~~~~~~~~~~~~~
-manage the metadata index and make it queryable. this in turn will
-depend on olpc.datastore.fulltext which indexes the actual content.
-
-""" 
-
-__author__ = 'Benjamin Saller <bcsaller@objectrealms.net>'
-__docformat__ = 'restructuredtext'
-__copyright__ = 'Copyright ObjectRealms, LLC, 2007'
-__license__  = 'The GNU Public License V2+'
-
-
-from datetime import datetime
-from lemur.xapian.sei import DocumentStore, DocumentPiece, SortableValue
-from olpc.datastore.converter import converter
-from olpc.datastore.model import DateProperty, TextProperty
-from olpc.datastore.model import Model, Content, Property, propertyByKind
-from olpc.datastore.utils import create_uid
-
-from sqlalchemy import create_engine, BoundMetaData
-from sqlalchemy import select, intersect, and_ 
-import atexit
-import logging
-import os, sys
-
-_marker = object()
-
-
-class SugarDomain(object):
-    """The underlying property set used for metadata in the sugar
-    system"""
-    def kind_by_key(self, key):
-        """resolves property names to the factory type that supports
-        them in the model
-        """
-        # key may be a two part form directly indicating the property
-        # type
-        if ':' in key:
-            key, kind = key.split(':', 1)
-            # now resolve the kind to a property class
-            return key, propertyByKind(kind)
-        
-        return key, {
-            'ctime' : DateProperty,
-            'mtime' : DateProperty,
-            'author' : Property,
-            'title'  : TextProperty,
-            'mime_type' : Property,
-            'language' : Property,
-            }.get(key, Property)
-
-    def propertyFactory(self, key, value='', dict=None):
-        key, kind = self.kind_by_key(key)
-        p = kind(key, value)
-        if dict is not None: dict[key] = p
-        return kind
-    
-    def _automaticProperties(self):
-        d = {}
-        now = datetime.now()
-        self.propertyFactory('mtime', now, dict=d)
-        return d
-    
-    def _defaultProperties(self):
-        d = {}
-        now = datetime.now()
-        self.propertyFactory('ctime', now, dict=d)
-        self.propertyFactory('author', dict=d)
-        self.propertyFactory('title', dict=d)
-        self.propertyFactory('mime_type', dict=d)
-        self.propertyFactory('language', dict=d)
-
-        d.update(self._automaticProperties())
-        return d
-
-    def _normalizeProps(self, props, creating, include_defaults):
-        # return a dict of {name : property}
-        if isinstance(props, dict):
-            # convert it into a  dict of Property objects
-            d = {}
-            for k,v in props.iteritems():
-                k, kind = self.kind_by_key(k)
-                p = kind(k, v)
-                d[k] = p
-            if creating and include_defaults:
-                defaults = self._defaultProperties()
-                for k, v in defaults.iteritems():
-                    if k not in d: d[k] = v
-            props = d
-        else:
-            d = {}
-            for p in props:
-                d[p.key] = p
-            props = d
-        return props
-
-
-
-class QueryManager(SugarDomain):
-    FULLTEXT_NAME = "fulltext"
-    
-    def __init__(self, metadata_uri, **options):
-        """
-        The metadata_uri is a string used to find the database.
-        
-
-        This will check keywords for:
-               'language'    Language is the language code used in the fulltext
-                             engine. This helps improve stemming and
-                             so on. In the future additional control
-                             will be provided. 
-
-               'sync_index' which determines if we use an internal
-                             sync index impl or an out of process one
-                             via DBus. If the async process is to be
-                             used it must be properly configured and
-                             available for DBus to spawn.
-               
-               'fulltext_repo' the full filepath to which the fulltext
-                               index data will be stored
-                               
-               'use_fulltext' when true indexing will be performed
-
-        """
-        self.uri = metadata_uri
-        self.options = options
-
-        self.backingstore = None
-        self.content_ext = None
-
-        
-    def _handle_option(self, options, key, default=_marker):
-        value = options.get(key, default)
-        if value is _marker: raise KeyError(key)
-        setattr(self, key, value)
-            
-    def _handle_options(self, **kwargs):
-        self._handle_option(kwargs, 'fulltext_repo')
-        self._handle_option(kwargs, 'use_fulltext', True)
-        self._handle_option(kwargs, 'sync_index', True)
-        self._handle_option(kwargs, 'language', 'en')
-        self.sync_index = self.use_fulltext and self.sync_index
-
-    def bind_to(self, backingstore):
-        self.backingstore = backingstore
-        
-    def prepare_index(self):
-        self.connect_db()
-        self.prepare_db()
-        self.connect_model()
-
-    def prepare_fulltext(self):
-        self.connect_fulltext(self.fulltext_repo, self.language,
-                              read_only=not self.sync_index)
-        
-    def prepare(self):
-        """This is called by the datastore with its backingstore and
-        querymanager. Its assumed that querymanager is None and we are
-        the first in this release
-        """
-        self._handle_options(**self.options)
-        self.prepare_index()
-        self.prepare_fulltext()
-        return True
-    
-    def stop(self):
-        pass
-        
-    # Primary interface
-    def create(self, props, filelike=None, include_defaults=True):
-        """Props can either be a dict of k,v pairs or a sequence of
-        Property objects.
-
-        The advantage of using property objects is that the data can
-        by typed. When k/v pairs are used a default string type will
-        be chosen.
-
-        When include_defaults is True a default set of properties are
-        created on behalf of the Content if they were not provided.
-
-        These include:
-             author : ''
-             title  : ''
-             mime_type : ''
-             language : '',
-             ctime : '',
-             mtime : '',
-        """
-        s = self.model.session
-        c = Content()
-        # its important the id be set before other operations
-        c.id = create_uid()
-        s.save(c)
-        
-        self._bindProperties(c, props, creating=True, include_defaults=include_defaults)
-        s.flush()
-        c.backingstore = self.backingstore
-        
-        if self.sync_index and filelike:
-            self.fulltext_index(c.id, filelike,
-                                mimetype=c.get_property('mime_type'),
-                                textprops=self.get_textprops(c))
-
-        return c
-    
-    def update(self, content_or_uid, props=None, filelike=None):
-        content = self._resolve(content_or_uid)
-        content.backingstore = self.backingstore
-        if props is not None:
-            self._bindProperties(content, props, creating=False)
-            self.model.session.flush()
-
-        if self.sync_index and filelike:
-            self.fulltext_index(content.id, filelike, textprops=self.get_textprops(content))
-
-    
-    def _bindProperties(self, content, props, creating=False, include_defaults=False):
-        """Handle either a dict of properties or a list of property
-        objects, binding them to the content instance.
-        """
-        # for information on include_defaults see create()
-        # default properties are only provided when creating is True
-        session = self.model.session
-
-        props = self._normalizeProps(props, creating,
-                                     include_defaults)
-        
-        # we should have a dict of property objects
-        if creating:
-            content.properties.extend(props.values())
-        else:
-            # if the automatically maintained properties (like mtime)
-            # are not set, include them now
-            auto = self._automaticProperties()
-            auto.update(props)
-            props = auto
-            # we have to check for the update case
-            oldProps = dict([(p.key, p) for p in content.properties])
-            for k, p in props.iteritems():
-                if k in oldProps:
-                    oldProps[k].value = p.value
-                    oldProps[k].type  = p.type
-                else:    
-                    content.properties.append(p)
-                    
-    def get(self, uid):
-        return self.model.session.query(self.model.mappers['content']).get(uid)
-
-    def get_properties(self, content_or_uid, keys):
-        c = self._resolve(content_or_uid)
-        return self.model.session.query(Property).select_by(self.model.property.c.key.in_(keys),
-                                                            content_id=c.id)
-
-
-    def get_uniquevaluesfor(self, propertyname):
-        properties = self.model.tables['properties']
-        return [r[0] for r in select([properties.c.value],
-                                     properties.c.key==propertyname,
-                                     distinct=True).execute().fetchall()]
-        
-
-
-    def delete(self, content_or_uid):
-        c = self._resolve(content_or_uid)
-        s = self.model.session
-        s.delete(c)
-        s.flush()
-        if self.sync_index:
-            self.fulltext_unindex(c.id)
-
-        
-    def find(self, query=None, **kwargs):
-        """
-        dates can be search in one of two ways.
-        date='YYYY-MM-DD HH:MM:SS'
-        date={'start' : 'YYYY-MM-DD HH:MM:SS',
-              'end'   : 'YYYY-MM-DD HH:MM:SS'
-              }
-              where date is either ctime or mtime.
-        if start or end is omitted its becomes a simple before/after
-        style query. If both are provided its a between query.
-
-        providing the key 'fulltext' will include a full text search
-        of content matching its parameters. see fulltext_search for
-        additional details.
-
-
-        If 'limit' is passed it will be the maximum number of results
-        to return and 'offset' will be the offset from 0 into the
-        result set to return.
-        
-        """
-
-        # XXX: this will have to be expanded, but in its simplest form
-        if not self.sync_index: self.index.reopen()
-        
-        s = self.model.session
-        properties = self.model.tables['properties']
-        if not query: query = {}
-        query.update(kwargs)
-        q = s.query(Content)
-        # rewrite the query to reference properties
-        # XXX: if there is a 'content' key will will have to search
-        # the content using the full text index which will result in a
-        # list of id's which must be mapped into the query
-        # fulltext_threshold is the minimum acceptable relevance score
-        limit = query.pop('limit', None)
-        offset = query.pop('offset', None)
-
-        if offset: q = q.offset(offset)
-        if limit: q = q.limit(limit)
-        
-        if query:
-            where = []
-            fulltext = query.pop('fulltext', None)
-            threshold = query.pop('fulltext_threshold', 60)
-
-            
-            
-            statement = None
-            ft_select = None
-            
-            if query:
-                # daterange support
-                # XXX: this is sort of a hack because
-                #      - it relies on Manifest typing in sqlite
-                #      - value's type is not normalized
-                #      - we make special exception based on property name
-                # if we need special db handling of dates ctime/mtime
-                # will become columns of Content and not properties
-                ctime = query.pop('ctime', None)
-                mtime = query.pop('mtime', None)
-                if ctime or mtime:
-                    self._query_dates(ctime, mtime, where)
-                for k,v in query.iteritems():
-                    if isinstance(v, list):
-                        v = properties.c.value.in_(*v)
-                    else:
-                        v = properties.c.value==v
-                        
-                    where.append(select([properties.c.content_id],
-                                        and_( properties.c.key==k,
-                                              v)))
-                                 
-                statement = intersect(*where)
-                statement.distinct=True
-                
-            if fulltext and self.use_fulltext:
-                # perform the full text search and map the id's into
-                # the statement for inclusion
-                ft_res = self.fulltext_search(fulltext)
-                if ft_res:
-                    ft_ids = [ft[0] for ft in ft_res if ft[1] >=
-                              threshold]
-                    
-                    if ft_ids:
-                        ft_select = select([properties.c.content_id],
-                                           properties.c.content_id.in_(*ft_ids))
-
-                if ft_select is None:
-                    # the full text query eliminated the possibility
-                    # of results by returning nothing under a logical
-                    # AND condition, bail now
-                    return ([], 0)
-                else:
-                    if statement is None:
-                        statement = ft_select
-                        statement.distinct = True
-                    else:
-                        statement = intersect(statement, ft_select)
-
-            result = statement.execute()
-            r = [q.get(i[0]) for i in result]
-            r = (r, len(r))
-        else:
-            r = (q.select(), q.count())
-
-        # XXX: make sure the proper backingstore is mapped
-        # this currently forbids the use case of keeping index data
-        # for a read-only store.
-        for item in r[0]:
-            item.backingstore = self.backingstore
-
-        return r
-    
-    # sqla util
-    def _resolve(self, content_or_uid):
-        if isinstance(content_or_uid, basestring):
-            # we need to resolve the object
-            content_or_uid = self.model.session.query(Content).get(content_or_uid)
-        return content_or_uid
-
-    def _query_dates(self, ctime, mtime, selects):
-        if ctime: selects.append(self._query_date('ctime', ctime))
-        if mtime: selects.append(self._query_date('mtime', mtime))
-
-    def _query_date(self, key, date):
-        properties = self.model.properties
-                    
-        if isinstance(date, basestring):
-            s = select([properties.c.content_id],
-                       and_( properties.c.key==key,
-                             properties.c.value==date))
-        else:
-            # its a dict with start/end
-            start = date.get('start')
-            end = date.get('end')
-            if start and end:
-                s = select([properties.c.content_id],
-                           and_( properties.c.key==key,
-                                 properties.c.value.between(start,
-                                                            end)))
-            elif start:
-                s = select([properties.c.content_id],
-                           and_( properties.c.key==key,
-                                 properties.c.value >=start))
-            else:
-                s = select([properties.c.content_id],
-                           and_( properties.c.key==key,
-                                 properties.c.value < end))
-
-        return s
-
-
-    def get_textprops(self, uid_or_content):
-        # text properties also get full text indexing
-        # currently this is still searched with the 'fulltext'
-        # parameter of find()
-        content = self._resolve(uid_or_content)
-        textprops = {}
-        for p in content.get_properties(type='text'):
-            textprops[p.key] = p.value and p.value or ''
-        return textprops
-        
-        
-    # fulltext interface
-    def fulltext_index(self, uid, fileobj, mimetype=None, textprops=None):
-        """Index the fileobj relative to uid which should be a
-        olpc.datastore.model.Content object's uid. The fileobj can be
-        either a pathname or an object implementing the Python file
-        ('read') interface.
-        """
-        pass
-
-    def fulltext_unindex(self, content_id):
-        pass
-
-    def fulltext_search(self, *args, **kwargs):
-        return []
-    
-    # lifecycle
-    def connect_db(self):
-        """Connect to the underlying database. Called implicitly by
-        __init__"""
-        pass
-    
-    
-    def prepare_db(self):
-        """After connecting to the metadata database take any
-        initialization steps needed for the environment.
-
-        This is called implicitly by __init__ before the model is
-        brought online.
-        """
-        pass
-
-    def connect_model(self, model):
-        """Connect the model. Called with the model passed into
-        __init__ after the database has been prepared.
-        """
-        pass
-
-    def connect_fulltext(self, repo, language, read_only):
-        """Connect the full text index"""
-        pass
-        
-    
-class SQLiteQueryManager(QueryManager):
-    """The default implementation of the query manager. This owns the
-    model object and the fulltext object
-    """
-
-    def __init__(self, uri, **kwargs):
-        super(SQLiteQueryManager, self).__init__(uri, **kwargs)
-        # now re-write the URI to be sqlite specific
-        # (we were initialized to a namepattern in the proper
-        # directory by the backingstore)
-        self.uri= "sqlite:///%s.db"  % self.uri
-        
-    def connect_db(self):
-        self.db = create_engine(self.uri)
-        self.metadata = BoundMetaData(self.db)
-        
-    def prepare_db(self):
-        # Using the sqlite backend we can tune the performance to
-        # limit writes as much as possible
-        if self.db.name.startswith('sqlite'):
-            connection = self.db.connect()
-            # cut down of per-activity file locking writes
-            connection.execute("PRAGMA locking_mode=EXCLUSIVE")
-            # don't demand fsync -- if this is too dangerous
-            # we can change it to normal which is still less writey
-            # than the default FULL
-            connection.execute("PRAGMA synchronous=OFF")
-            # temporary tables and indices are kept in memory
-            connection.execute("PRAGMA temp_store=MEMORY")
-            # XXX: what is the ideal jffs2 page size
-            # connection.execute("PRAGMA page_size 4096")
-        
-    def connect_model(self, model=None):
-        if model is None: model = Model()
-        # take the model and connect it to us
-        model.prepare(self)
-
-        # make sure all the tables and indexes exist
-        self.metadata.create_all()
-        
-        self.model = model
-
-
-    def stop(self):
-        # clean up
-        self.db.dispose()
-
-# Full text support
-def flatten_unicode(value): return value.encode('utf-8')
-
-class XapianBinaryValue(SortableValue):
-    def __init__(self, value, field_name="content"):
-        SortableValue.__init__(self, value, field_name)
-
-class XapianFulltext(object):
-    def connect_fulltext(self, repo, language='en', read_only=True):
-        if not os.path.exists(repo) and read_only is True:
-            # create the store 
-            index = DocumentStore(repo, language, read_only=False)
-            index.close()
-            # and abandon it
-        self.index = DocumentStore(repo, language, read_only=read_only)
-        self.index.registerFlattener(unicode, flatten_unicode)
-        atexit.register(self.index.close)
-        
-    def fulltext_index(self, uid, fileobj, mimetype=None, textprops=None):
-        """Index the fileobj relative to uid which should be a
-        olpc.datastore.model.Content's uid. The fileobj can be either
-        a pathname or an object implementing the Python file ('read')
-        interface.
-        """
-        piece = DocumentPiece
-        if isinstance(fileobj, basestring):
-            # treat it as a pathname
-            # use the global converter to try to get text from the
-            # file            
-            fp = converter(fileobj, mimetype=mimetype)
-            #piece = XapianBinaryValue
-        elif hasattr(fileobj, 'read'):
-            # this is an off case, we have to assume utf-8 data
-            logging.debug("Indexing from readable, not filename")
-            fp = fileobj
-        else:
-            raise ValueError("Not a valid file object")
-
-        if fp is None:
-            # for whatever reason we were unable to get the content
-            # into an indexable form.
-            logging.debug("Unable to index %s %s" % (uid, fileobj))
-            return False
-        return self._ft_index(uid, fp, piece, textprops)
-
-    def _ft_index(self, content_id, fp, piece=DocumentPiece, fields=None):
-        try:
-            doc = [piece(fp.read())]
-            if fields:
-                # add in properties that need extra fulltext like
-                # management
-                for key, value in fields.iteritems():
-                    doc.append(DocumentPiece(value, key))
-
-            self.index.addDocument(doc, content_id)
-            self.index.flush()
-            return True
-        except:
-            logging.debug("fulltext index exception", exc_info=sys.exc_info())
-            return False
-
-
-
-    def fulltext_search(self, *args, **kwargs):
-        """
-        perform search(search_string, ) -> [(content_id, relevance), ...]
-        
-        search_string is a string defining the serach in standard web search
-        syntax.
-
-        ie: it contains a set of search terms.  Each search term may be
-        preceded by a "+" sign to indicate that the term is required, or a "-"
-        to indicate that is is required to be absent.
-
-        If field_name is not None, it is the prefix of a field, which the
-        search will be restricted to.
-        
-        If field_name is None, the search will search all fields by default,
-        but search terms may be preceded by a fieldname followed by a colon to
-        restrict part of the search to a given field.
-
-        combiner is one of DocumentStore.OP_OR or DocumentStore.OP_AND, and is
-        used to indicate the default operator used to combine terms.
-
-        partial is a flag, which should be set to True to enable partial search
-        matching, for use when doing interactive searches and we're not sure if
-        the user has finished typing the search yet.
-
-        range_restrictions is a RangeRestrictions object, used to restrict the
-        search results.
-
-        """
-        if len(args) == 1:
-            # workaround for api change
-            args = (args[0], 0, 10)
-
-        res = self.index.performSearch(*args, **kwargs)
-        est = max(1, res.estimatedResultCount())
-        return res.getResults(0, est)
-
-    def fulltext_similar(self, *content_ids):
-        return self.index.findSimilar(content_ids)
-
-    def fulltext_unindex(self, content_id):
-        self.index.deleteDocument(content_id)
-
-    def stop(self):
-        if self.use_fulltext:
-            self.index.close()
-            
-
-class DefaultQueryManager(XapianFulltext, SQLiteQueryManager):
-
-    def stop(self):
-        XapianFulltext.stop(self)
-        SQLiteQueryManager.stop(self)
diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py
new file mode 100644
index 0000000..ec7206d
--- /dev/null
+++ b/src/olpc/datastore/xapianindex.py
@@ -0,0 +1,390 @@
+""" 
+xapianindex
+~~~~~~~~~~~~~~~~~~~~
+maintain indexes on content
+
+""" 
+
+__author__ = 'Benjamin Saller <bcsaller@objectrealms.net>'
+__docformat__ = 'restructuredtext'
+__copyright__ = 'Copyright ObjectRealms, LLC, 2007'
+__license__  = 'The GNU Public License V2+'
+
+
+from Queue import Queue, Empty
+import logging
+import re
+
+import threading
+import warnings
+
+import secore
+
+from olpc.datastore import model 
+from olpc.datastore.converter import converter
+from olpc.datastore.utils import create_uid
+
+
+# Setup Logger
+logger = logging.getLogger('org.sugar.datastore.xapianindex')
+
+# Indexer Operations
+CREATE = 1
+UPDATE = 2
+DELETE = 3
+
+
+class ContentMappingIter(object):
+    """An iterator over a set of results from a search.
+
+    """
+    def __init__(self, results, backingstore):
+        self._results = results
+        self._backingstore = backingstore
+        self._iter = iter(results)
+
+    def __iter__(self): return self
+    
+    def next(self):
+        searchresult = self._iter.next()
+        return model.Content(searchresult, self._backingstore)
+
+
+class IndexManager(object):
+    DEFAULT_DATABASE_NAME = 'index'
+    
+    def __init__(self, default_language='en'):
+        # We will maintain two connections to the database
+        # we trigger automatic flushes to the read_index
+        # after any write operation        
+        self.write_index = None
+        self.read_index = None
+        self.queue = Queue(0)
+        self.indexer_running = False
+        self.language = default_language
+
+        self.backingstore = None
+        
+        self.fields = set()
+        
+    #
+    # Initialization
+    def connect(self, repo, **kwargs):
+        if self.write_index is not None:
+            warnings.warn('''Requested redundant connect to index''',
+                          RuntimeWarning)
+
+        self.repo = repo
+        self.write_index = secore.IndexerConnection(repo)
+
+        # configure the database according to the model
+        datamodel = kwargs.get('model', model.defaultModel)
+        datamodel.apply(self)
+
+        # store a reference
+        self.datamodel = datamodel
+        
+        self.read_index = secore.SearchConnection(repo)
+
+        self.flush()        
+
+        # by default we start the indexer now
+        self.startIndexer()
+
+    def bind_to(self, backingstore):
+        # signal from backingstore that its our parent
+        self.backingstore = backingstore
+
+    # flow control
+    def flush(self):
+        """Called after any database mutation"""
+        self.write_index.flush()
+        self.read_index.reopen()
+    
+    def stop(self):
+        self.stopIndexer()
+        self.write_index.close()
+        self.read_index.close()
+
+    # Index thread management
+    def startIndexer(self):
+        self.indexer_running = True
+        self.indexer = threading.Thread(target=self.indexThread)
+        self.indexer.setDaemon(True)
+        self.indexer.start()
+        
+    def stopIndexer(self, force=False):
+        if not self.indexer_running: return 
+        if not force: self.queue.join()
+        self.indexer_running = False
+        self.indexer.join()
+
+    def enque(self, uid, vid, doc, operation, filestuff=None):
+        self.queue.put((uid, vid, doc, operation, filestuff))
+
+    def indexThread(self):
+        # process the queue
+        # XXX: there is currently no way to remove items from the queue
+        # for example if a USB stick is added and quickly removed
+        # the mount should however get a stop() call which would
+        # request that the indexing finish
+        while self.indexer_running:
+            # include timeout here to ease shutdown of the thread
+            # if this is a non-issue we can simply allow it to block
+            try:
+                uid, vid, doc, operation, filestuff = self.queue.get(timeout=0.5)
+                if operation is DELETE: self.write_index.delete(uid)
+                elif operation in (CREATE, UPDATE):
+                    # Here we handle the conversion of binary
+                    # documents to plain text for indexing. This is
+                    # done in the thread to keep things async and
+                    # latency lower.
+                    if filestuff:
+                        filename, mimetype = filestuff
+                        fp = converter(filename, mimetype)
+                        if fp:
+                            doc.fields.append(secore.Field('fulltext',
+                                                           fp.read()))
+                            
+                    if operation is CREATE: self.write_index.add(doc)
+                    elif operation is UPDATE: self.write_index.replace(doc)
+
+                else:
+                    logger.warning("Unknown indexer operation ( %s: %s)" % \
+                                   (uid, operation))
+                    continue
+
+                # XXX: this isn't quite true, we haven't called flush
+                # yet so the document might not be on disk
+                logger.info("Indexed Content %s:%s" % (uid, vid))
+                # but we still tell the queue its complete 
+                self.queue.task_done()
+                
+            except Empty:
+                pass
+##             except:
+##                 import traceback
+##                 traceback.print_exc()
+##                 try: self.write_index.close()
+##                 except: pass
+##                 try:
+##                     self.write_index = secore.IndexerConnection(self.repo)
+##                     self.read_index.reopen()
+##                 except:
+##                     # Shut down the indexer
+##                     logger.critical("Indexer Failed, Shutting it down")
+##                     self.indexer_running = False
+                    
+                
+                
+
+
+    def complete_indexing(self):
+        """Intentionally block until the indexing is complete. Used
+        primarily in testing.
+        """
+        self.queue.join()
+        self.flush()
+    
+    #
+    # Field management
+    def addField(self, key, store=True, exact=False, sortable=False,
+                 type='string', collapse=False,
+                 **kwargs):
+        language = kwargs.pop('language', self.language)
+        
+        xi = self.write_index.add_field_action
+        
+        if store: xi(key, secore.FieldActions.STORE_CONTENT)
+        if exact: xi(key, secore.FieldActions.INDEX_EXACT)
+        else:
+            # weight -- int 1 or more
+            # nopos  -- don't include positional information
+            # noprefix -- boolean
+            xi(key, secore.FieldActions.INDEX_FREETEXT, language=language, **kwargs)
+
+        if sortable:
+            xi(key, secore.FieldActions.SORTABLE, type=type)
+        if collapse:
+            xi(key, secore.FieldActions.COLLAPSE)
+
+        # track this to find missing field configurations
+        self.fields.add(key)
+
+    #
+    # Index Functions
+    def _mapProperties(self, props):
+        """data normalization function, maps dicts of key:kind->value
+        to Property objects
+        """
+        d = {}
+        for k,v in props.iteritems():
+            p = model.Property.fromstring(k, v)
+            d[p.key] = p
+        return d
+
+    def index(self, props, filename=None):
+        """Index the content of an object.
+        Props must contain the following:
+            key -> Property()
+        """
+        props = self._mapProperties(props)
+        doc = secore.UnprocessedDocument()
+        add = doc.fields.append
+        fp = None
+        operation = UPDATE
+
+        filestuff = None
+        if filename:
+            # enque async file processing
+            # XXX: to make sure the file is kept around we could keep
+            # and open fp?
+            mimetype = props.get("mimetype")
+            mimetype = mimetype and mimetype.value or 'text/plain'
+            filestuff = (filename, mimetype)
+
+        #
+        # Version handling
+        #
+        # we implicitly create new versions of documents the version
+        # id should have been set by the higher level system
+        uid = props.pop('uid', None)
+        vid = props.pop('vid', None)
+
+        if uid: uid = uid.value
+        else:
+            uid = create_uid()
+            operation = CREATE
+            
+        if vid: vid = str(float(vid.value) + 1.0)
+        else: vid = "1.0"
+        
+        doc.id = uid
+        add(secore.Field('vid', vid))
+        
+        #
+        # Property indexing
+        for k, prop in props.iteritems():
+            value = prop.value
+
+            if k not in self.fields:
+                warnings.warn("""Missing field configuration for %s""" % k,
+                              RuntimeWarning)
+                continue
+            
+            add(secore.Field(k, value))
+            
+        # queue the document for processing
+        self.enque(uid, vid, doc, operation, filestuff)
+
+        return uid
+
+    def get(self, uid):
+        doc = self.read_index.get_document(uid)
+        if not doc: raise KeyError(uid)
+        return model.Content(doc, self.backingstore)
+
+    def delete(self, uid):
+        # does this need queuing?
+        # the higher level abstractions have to handle interaction
+        # with versioning policy and so on
+        self.enque(uid, None, None, DELETE)
+        
+    #
+    # Search
+    def search(self, query, start_index=0, end_index=50):
+        """search the xapian store.
+        query is a string defining the serach in standard web search syntax.
+
+        ie: it contains a set of search terms.  Each search term may be
+        preceded by a "+" sign to indicate that the term is required, or a "-"
+        to indicate that is is required to be absent.
+        """
+        ri = self.read_index
+        if not query:
+            q = self.read_index.query_all()
+        elif isinstance(query, dict):
+            queries = []
+            # each term becomes part of the query join
+            for k, v in query.iteritems():
+                queries.append(ri.query_field(k, v))
+            q = ri.query_composite(ri.OP_AND, queries)
+        else:
+            q = self.parse_query(query)
+            
+        results = ri.search(q, start_index, end_index)
+        count = results.matches_estimated
+
+        # map the result set to model.Content items
+        return ContentMappingIter(results, self.backingstore), count
+    
+
+    def get_uniquevaluesfor(self, property):
+        # XXX: this is very sketchy code
+        # try to get the searchconnection to support this directly
+        # this should only apply to EXACT fields
+        r = set()
+        prefix = self.read_index._field_mappings.get_prefix(property)
+        plen = len(prefix)
+        termiter = self.read_index._index.allterms(prefix)
+        for t in termiter:
+            term = t.term
+            if len(term) > plen:
+                term = term[plen:]
+                if term.startswith(':'): term = term[1:]
+                r.add(term)
+
+        # r holds the textual representation of the fields value set
+        # if the type of field or property needs conversion to a
+        # different python type this has to happen now
+        descriptor = self.datamodel.fields.get(property)
+        if descriptor:
+            kind = descriptor[1]
+            impl = model.propertyByKind(kind)
+            r = set([impl.set(i) for i in r])
+        
+        return r
+                                                         
+    def parse_query(self, query):
+        # accept standard web query like syntax
+        # 'this' -- match this
+        # 'this that' -- match this and that in document
+        # '"this that"' match the exact pharse 'this that'
+        # 'title:foo' match a document whose title contains 'foo'
+        # 'title:"A tale of two datastores"' exact title match
+        # '-this that' match that w/o this
+        ri = self.read_index
+        start = 0
+        end = len(query)
+        nextword = re.compile("(\S+)")
+        endquote = re.compile('(")')
+        queries = []
+        while start < end:
+            m = nextword.match(query, start)
+            if not m: break
+            orig = start
+            field = None
+            start = m.end() + 1
+            word = m.group(1)
+            if ':' in word:
+                # see if its a field match
+                fieldname, w = word.split(':', 1)
+                if fieldname in self.fields:
+                    field = fieldname
+                    
+                word = w
+
+            if word.startswith('"'):
+                qm = endquote.search(query, start)
+                if qm:
+                    #XXX: strip quotes or not here
+                    #word = query[orig+1:qm.end(1)-1]
+                    word = query[orig:qm.end(1)]
+                    start = qm.end(1) + 1
+
+            if field:
+                queries.append(ri.query_field(field, word))
+            else:
+                queries.append(ri.query_parse(word))
+        q = ri.query_composite(ri.OP_AND, queries)
+        return q
diff --git a/tests/Makefile b/tests/Makefile
index 7961b02..c2581cb 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -2,10 +2,9 @@
 # its not an option to configure
 PYTHON=python
 
-all: test
+all: clean test
 
 test: 
-	@rm -rf fulltext 
 	@${PYTHON} runalltests.py 
 
 valgrind:
@@ -17,6 +16,7 @@ profile:
 	@${PYTHON} ./profilealltests.py
 
 clean: 
+	@${PYTHON} ./cleaner.py
 	@find . -name "*.pyc" -exec rm {} \;
 	@find . -name "*~" -exec rm {} \;
 	@find . -name "hotspot*" -exec rm {} \;
diff --git a/tests/cleaner.py b/tests/cleaner.py
new file mode 100755
index 0000000..cfa15bf
--- /dev/null
+++ b/tests/cleaner.py
@@ -0,0 +1,40 @@
+#!/usr/bin/python
+import os
+import re
+from ore.main import Application
+
+filepattern = re.compile("(\w{8})\-(\w{4})\-(\w{4})\-(\w{4})\-(\w{12})")
+tmppattern = re.compile("tmp\S{6}")
+onepattern = re.compile("one.*\.txt")
+
+staticdirs = re.compile('test_ds|store\d')
+
+filepatterns = [filepattern, tmppattern, onepattern]
+dirpatterns = [staticdirs]
+
+class Cleaner(Application):
+    def manage_options(self):
+        self.parser.add_option("--base", dest="base_dir",
+                               action="store", default='/tmp',
+                               help="""Where to clean (/tmp)""")
+
+    def main(self):
+        """clean up files left from testing in /tmp"""
+        # this is done using patterned names
+        for root, dirs, files in os.walk(self.options.base_dir):
+            for filename in files:
+                for pat in filepatterns:
+                    if pat.match(filename):
+                        fn = os.path.join(root, filename)
+                        os.remove(fn)
+                        break
+            for dirname in dirs:
+                for pat in dirpatterns:
+                    if pat.match(dirname):
+                        dn = os.path.join(root, dirname)
+                        os.system('rm -rf %s' % dn)
+                        
+if __name__ == "__main__":
+    Cleaner("cleaner")()
+
+
diff --git a/tests/milestone_1.txt b/tests/milestone_1.txt
index bde3720..48d09bc 100644
--- a/tests/milestone_1.txt
+++ b/tests/milestone_1.txt
@@ -12,6 +12,10 @@ datastore.
 
 
 First, create and connect the store.
+>>> from testutils import waitforindex
+>>> import os
+>>> assert os.system('rm -rf /tmp/test_ds') == 0
+
 
 >>> from olpc.datastore import DataStore
 >>> from olpc.datastore import backingstore
@@ -35,11 +39,13 @@ Note that we retain no reference to the created documents.
 Now we should be able to test the first requirement.
 * Get the unique ids of all the objects in the store.
 
+>>> waitforindex(ds)
+
 >>> results, count = ds.find()
 
 A find command with out any parameters will return everything in the store.
 
-* Get an object from the store given his uid.
+* Get an object from the store given its uid.
 
 Here we manually cycle through the results looking for the title we
 want.
@@ -51,30 +57,24 @@ want.
 
 * Get the object metadata.
 >>> c1.properties
-[...]
+{...}
 
 * Get the object file.
 >>> c1.filename
 '/tmp/...'
 
->>> c1.data
+>>> c1.contents
 'this is the first document'
 >>> c1.file
 <open file ...>
 
 
-Or if you prefer access through the datastore (which is how DBus would
-use it)
-
->>> fn = ds.get_filename(first_uid)
->>> ds.get_data(first_uid)
-'this is the first document'
-
 
 Now we can modify that file and then 
 * Push the changes made to the file back to the store.
 * Update the metadata of an object.
 
+>>> fn = c1.filename
 >>> fp = open(fn, 'a')
 >>> print >>fp, "more content"
 >>> fp.close()
@@ -87,6 +87,4 @@ We can also remove the file from the repository.
 This is the basis of milestone 1.
 
 >>> ds.stop()
->>> del ds
-
-
+>>> assert os.system('rm -rf /tmp/test_ds') == 0
diff --git a/tests/milestone_2.txt b/tests/milestone_2.txt
index 516d497..73fd43a 100644
--- a/tests/milestone_2.txt
+++ b/tests/milestone_2.txt
@@ -7,17 +7,22 @@ First clean up from any other tests.
 >>> assert os.system('rm -rf /tmp/test_ds/') == 0
 
 >>> from olpc.datastore import DataStore
->>> from olpc.datastore import backingstore
+>>> from olpc.datastore import backingstore, model
 >>> ds = DataStore()
 >>> ds.registerBackend(backingstore.FileBackingStore)
->>> assert ds.mount("/tmp/test_ds")
+>>> dm = model.defaultModel.copy().addField('year', 'int').addField('month', 'string')
 
->>> a = ds.create(dict(title="Content A", author="Bob", year=1999, month="Jan"), '')
->>> b = ds.create(dict(title="Content B", author="Alice", year=2000, month="Jan"), '')
+>>> assert ds.mount("/tmp/test_ds", {'indexmanager.model' : dm})
+
+>>> a = ds.create(dict(title="Content A", author="Bob", year="1999", month="Jan"), '')
+>>> b = ds.create(dict(title="Content B", author="Alice", year="2000", month="Jan"), '')
 
 Find should return both
 >>> def find2uids(results): return [i['uid'] for i in results[0]]
->>> assert set(find2uids(ds.find({}))) == set([a,b])
+
+>>> ds.complete_indexing()
+
+>>> assert set(find2uids(ds.find())) == set([a,b])
 
 
 But what if we want the results ordered?
@@ -35,3 +40,4 @@ and if we want to reverse order it?
 
 >>> ds.stop()
 >>> del ds
+>>> assert os.system('rm -rf /tmp/test_ds/') == 0
diff --git a/tests/mountpoints.txt b/tests/mountpoints.txt
index 9a821b5..eebad9b 100644
--- a/tests/mountpoints.txt
+++ b/tests/mountpoints.txt
@@ -20,7 +20,7 @@ Here we create a datastore, and mount a backingstore on tmp. By
 default this will create a new directory in /tmp which will then be
 used for storage.
 
->>> ds = DataStore(sync_index=True)
+>>> ds = DataStore()
 >>> ds.registerBackend(backingstore.FileBackingStore)
 >>> mp1 = ds.mount("/tmp/store1", dict(title="Primary Storage"))
 
@@ -36,11 +36,12 @@ can be used to control the storage target or to filter results.
 Now lets create some content
 
 >>> u1 = ds.create(dict(title="Document 1", filename="one.txt"), tmpData("""document one"""))
->>> u2 = ds.create(dict(title="Document 2", mime_type="text/plain"), tmpData("""document two"""))
+>>> u2 = ds.create(dict(title="Document 2", mimetype="text/plain"), tmpData("""document two"""))
 
 We can now, if we wish verify which mount point this content came
 from.
 
+>>> ds.complete_indexing()
 
 >>> c1 = ds.get(u1)
 >>> assert c1.backingstore.id == mountpoint
@@ -61,6 +62,8 @@ Now lets add another mount point.
 Now lets create a new content item.
 >>> u3 = ds.create(dict(title="Document 3", mountpoint=mp2), tmpData("""document three"""))
 
+>>> ds.complete_indexing()
+
 We explictly passed a mount point here. Lets examine the properties of
 the object and verify this.
 >>> c3 = ds.find(dict(title="Document 3"))[0][0]
@@ -102,6 +105,8 @@ Register the filesystem type
 
 If that worked it should have imported content on load().
 
+>>> ds.complete_indexing()
+
 >>> result, count = ds.find(dict(fulltext="four"))
 >>> assert count == 1
 >>> assert result[0]['mountpoint'] == mp3
@@ -112,7 +117,11 @@ as DBus data.
 
 >>> ds.unmount(mp3)
 
->>> mp3 = ds.mount("inplace:/tmp/store3", dict(title=dbus.String("Fake USB again")))
+>>> mp3 = ds.mount("inplace:/tmp/store3", dict(title=dbus.String("Fake USB again"),  
+...                                            sync_mount=True))
+
+>>> ds.complete_indexing()
+
 
 >>> result, count = ds.find(dict(fulltext="four"))
 >>> assert count == 1
diff --git a/tests/properties.txt b/tests/properties.txt
index 689414f..fe34782 100644
--- a/tests/properties.txt
+++ b/tests/properties.txt
@@ -8,16 +8,23 @@ properties to content and managing them.
 
 
 >>> from olpc.datastore import DataStore
->>> from olpc.datastore import backingstore
+>>> from olpc.datastore import backingstore, model
 >>> from testutils import tmpData
 >>> import dbus
 
 Set up two mount points.
 
->>> ds = DataStore(sync_index=True)
+>>> ds = DataStore()
 >>> ds.registerBackend(backingstore.FileBackingStore)
->>> mp1 = ds.mount("/tmp/store1", dict(title="Primary Storage"))
->>> mp2 = ds.mount("/tmp/store2", dict(title="Secondary Storage"))
+
+Extend the model to retain a 'year' property used below.
+
+>>> dm = model.defaultModel.copy().addField('year', "int")
+
+Mount a couple of stores.
+
+>>> mp1 = ds.mount("/tmp/store1", {'title' : "Primary Storage", 'indexmanager.model' : dm})
+>>> mp2 = ds.mount("/tmp/store2", {'title' : "Secondary Storage", 'indexmanager.model' : dm})
 
 Create some content on each.
 
@@ -28,15 +35,14 @@ Create some content on each.
 >>> u4 = ds.create({'title' : "Gamma doc", 'author' : "HAL", 'year:number' : 2001, 'mountpoint' : mp2}, tmpData("""Document 4""")) 
 
 Now we should be able to discover things about the system properties.
+>>> ds.complete_indexing()
 
 Here we test that we can extract the unique values for certain properties.
 >>> assert set(ds.get_uniquevaluesfor('author')) == set(['Ben', 'HAL'])
 
-Here we try to gather the values for the property year. We'd expect
-these values to come back as numbers, however in the current
-implementation they are stored as unicode values.
+Here we try to gather the values for the property year.
 
->>> assert set(ds.get_uniquevaluesfor('year')) == set([u'2000', u'2001']) 
+>>> assert set(ds.get_uniquevaluesfor('year')) == set([2000, 2001]) 
 
 
 
diff --git a/tests/query.txt b/tests/query.txt
deleted file mode 100644
index 2c58851..0000000
--- a/tests/query.txt
+++ /dev/null
@@ -1,277 +0,0 @@
-This document outlines the basic usage of the olpc.datastore.query and
-olpc.datastore.model modules. Not that these are not use independ of
-the olpc.datastore.backend which in turn is only accessed through the
-olpc.datastore module. This is intended only to document the innards
-of those modules.
-
->>> import os
->>> assert os.system('rm -rf /tmp/_test_index') == 0
->>> assert os.system('rm -rf /tmp/_test_fulltext') == 0
-
-
-First lets create a query manager
-
->>> from olpc.datastore.query import DefaultQueryManager
->>> from olpc.datastore.model import Property
->>> qm = DefaultQueryManager("/tmp/_test_index", fulltext_repo='/tmp/_test_fulltext')
->>> qm.prepare()
-True
-
-That will create the memory backed database which will be used in this
-documentation. The call to prepare() is normally invoked by the higher
-level datastore for you.
-
-Because this is a new database there should be nothing in it. We can
-verify this with the call to the find() method.
-
->>> qm.find()
-([], 0)
-
-
-The simplest way to add an entry to the datastore is by passing a
-dictionary of properties to the create method.
-
->>> a = qm.create(dict(title="New Content"))
-
-Find will now return this object.
-
->>> qm.find()
-([<Content id:...>], 1)
-
-We can examine the Properties of this object.
-
->>> a.properties
-[... <TextProperty title:'New Content'>, ...]
-
-This returned a list of all properties on the Content object in which
-case we can find the property by enumeration. The other option is
-using the get_properties call on Content
-
->>> a.get_properties(key='title')
-[<TextProperty title:'New Content'>]
-
-Using the query manager API we are able to update the
-properties. Using this form automatically synchronizes with the
-database and the property is immediately available.  To demonstrate
-that this works lets attach another property. 
->>> qm.update(a, dict(author='Benjamin'))
-
-A request for title still returns only the title property.
->>> a.get_properties(key='title')
-[<TextProperty title:'New Content'>]
-
-And a request for author works as expected.
->>> a.get_properties(key='author')
-[<Property author:'Benjamin'>]
-
->>> qm.update(a, dict(foo='bar'))
->>> set([p.key for p in a.properties]) == set(['title', 'mtime', 'ctime', 'language', 'mime_type', 'author', 'foo'])
-True
-
-We could have also passed an id for the content object rather than the
-object itself. A list of properties would have been acceptable in
-place of a dictionary. One thing that is shown here is that a number
-of default properties were added when the Content object was
-created. This is done by default and can be controlled by the
-include_defaults flag to the create() method.
-
-Some of the default Property objects have values which are not of the
-'string' type. 'ctime' and 'mtime' are examples of these
-
->>> a.get_property('ctime')
-datetime.datetime(...)
-
-We can see that ctime has been mapped to a standard Python
-datetime.datetime instance. olpc.datastore.model includes support for
-'number' and 'datetime' Property types by default. To add support for
-new property types see the oplc.datastore.model.Model.addPropertyType
-method. 
-
-Here we want to show that certain types of Properties map to
-specialized implemenations automatically based on their type. 'ctime'
-is a DateTime Property and we can verify that it is returned properly
-from the mapping layer with the following.
->>> ctimeProp = a.get_properties(key='ctime')[0]
->>> ctimeProp.type == "date"
-True
->>> type(ctimeProp)
-<class 'olpc.datastore.model.DateProperty'>
-
-Special support is needed to make dates easily addressable within the
-datastore. The properties 'ctime', creation time, and 'mtime',
-modification time are supported. To query on these properties two
-methods are available.
-
->>> qm.find(ctime="2007-01-01 00:00")
-([], 0)
-
-Which matches nothing. And the other form is to pass a dict with
-'start' and 'end' range boundries.
->>> import datetime
->>> now = datetime.datetime.now().isoformat()
-
->>> qm.find(ctime=dict(end=now))
-([<Content id:...>], 1)
-
-
-Property keys are unique per Content item. This means that adding a
-Property with an existing key should update the value rather than add
-another Property with the same key.
-
->>> qm.update(a.id, [Property('another', 'property')])
->>> a.get_property('another')
-'property'
-
->>> l1 = len(a.properties)
->>> qm.update(a, dict(another="test"))
->>> a.get_property('another')
-'test'
->>> len(a.properties) == l1
-True
-
-Both forms of passing properties work.
-
->>> l1 = len(a.properties)
->>> qm.update(a, [Property('another', 'value')])
->>> a.get_property('another')
-'value'
->>> len(a.properties) == l1
-True
-
-
-We can also navigate from a Property object to the content to which it
-refers. This is available through the 'content' attrbiute of
-properties. Only properties bound to content and synchronized with the
-database have this property.
-
->>> p = a.get_properties(key='author')[0]
->>> p.content
-<Content id:...>
-
-Let's create additional content.
-
->>> b = qm.create(dict(title="My Picture", author="Sarah", mime_type="image/png"))
->>> c = qm.create(dict(title="My Song", author="Sarah", mime_type="audio/mp3"))
-
-At this point the find() method should be able to provide us with more
-interesting results. Note at this point that find() has multiple
-responsibilities. First it must search the Properties of objects,
-secondly it provides access to the full-text index associated with
-content in the datastore.
-
-For now we are only interested in the properties.
-
->>> qm.find(mime_type="image/png") == ([b], 1)
-True
-
->>> qm.find(author="Benjamin") == ([a],1)
-True
-
->>> qm.find(author="Sarah") == ([b, c],2)
-True
-
->>> qm.find(author="Sarah", mime_type="audio/mp3") == ([c], 1)
-True
-
-Passing the special value, 'content' to find will pass an expression
-to the full text index engine. The query manager maintains a full-text
-index in parallel to the normal metadata storeage. The full text index
-is updated on every create, update and delete call to the query
-manager provided the mime_type Property of the Content is one
-understood by the index.
-
->>> from StringIO import StringIO
->>> qm.update(a, {}, StringIO("this is my content, hear it roar"))
->>> qm.find(fulltext="roar") == ([a], 1)
-True
-
-Combining this with properties also works.
->>> qm.find(fulltext="roar", author="Benjamin") == ([a], 1)
-True
-
-And we can verify the negitive as well.
->>> qm.find(fulltext="roar", author="Sarah")
-([], 0)
-
-Calls to update() and create() both take an optional file argument
-which will update the fulltext indexed content with the new value of
-file.
-
->>> qm.update(a, filelike=StringIO("different text"))
-
-The new content will be found
->>> qm.find(fulltext="different", author="Benjamin") == ([a], 1)
-True
-
-And the old content is not.
->>> qm.find(fulltext="roar", author="Benjamin")
-([], 0)
-
-
-Passing a filename for file works as well. Files can be in a variety
-of binary formats include PDF.
->>> qm.update(a, filelike="test.doc")
->>> qm.find(fulltext="roar", author="Benjamin")
-([], 0)
->>> qm.find(fulltext="amazed", author="Benjamin") == ([a], 1)
-True
-
-We have converters for DOC, PDF and ODT by default
-
->>> qm.update(a, filelike="test.pdf")
->>> qm.find(fulltext="peek", author="Benjamin") == ([a], 1)
-True
-
-
->>> qm.update(a, filelike="test.odt")
->>> qm.find(fulltext="amazed", author="Benjamin") == ([a], 1)
-True
-
->>> qm.update(a, dict(title="titled indeed"), filelike="test.doc")
->>> qm.find(fulltext="amazed", author="Benjamin") == ([a], 1)
-True
-
-For the last example we can see that we also updated the title. Here
-we show that because title is a 'text' property, rather than a simple
-string its contents will be available to the text indexing engine as
-well.
-
-Searching for a direct match on the property works.
->>> qm.find(title="titled indeed") == ([a], 1)
-True
-
-Doing a search for text internal to the title doesn't however. 
-
->>> qm.find(title="indeed") == ([a], 1)
-False
-
-Searching for it in the fulltext index does return a result.
->>> qm.find(fulltext="indeed") == ([a], 1)
-True
-
-Searching for only title in fulltext index does return a result as well.
->>> qm.find(fulltext="title:indeed") == ([a], 1)
-True
-
-
-Here we show off the get_uniquevaluesfor call examining all the values
-used in the 'author' field.
-
->>> assert set(qm.get_uniquevaluesfor('author')) == set(['Benjamin', 'Sarah'])
-
-Now that we can see a set of possible values it might be nice to
-select any content with properties from a known set. For example
-
->>> r, c = qm.find(author=['Benjamin', 'Sarah'])
->>> assert c == 3
-
-By putting the request value in a list we can ask that the value be
-'IN' this collection. All participating values are included in this
-way.
-
-
-Now for politeness we shut everything down
->>> qm.stop()
->>> import shutil, os
->>> shutil.rmtree('/tmp/_test_fulltext')
->>> os.unlink('/tmp/_test_index.db')
diff --git a/tests/runalltests.py b/tests/runalltests.py
index bbf0f97..02034b9 100644
--- a/tests/runalltests.py
+++ b/tests/runalltests.py
@@ -14,15 +14,14 @@ import unittest
 import doctest
 from pkg_resources import resource_filename
 
-from sqlalchemy import clear_mappers
 
 doctests = [
-    resource_filename(__name__, "query.txt"),
+    resource_filename(__name__, "xapianindex.txt"),
     resource_filename(__name__, "milestone_1.txt"),
     resource_filename(__name__, "sugar_demo_may17.txt"),
     resource_filename(__name__, "milestone_2.txt"),
     resource_filename(__name__, "mountpoints.txt"),
-    resource_filename(__name__, "properties.txt")
+    resource_filename(__name__, "properties.txt"),
     
 ]
 
@@ -30,43 +29,30 @@ doctest_options = doctest.ELLIPSIS
 doctest_options |= doctest.REPORT_ONLY_FIRST_FAILURE
 
 
-# IF YOU ARE NOT GETTING THE RESULTS YOU EXPECT WHILE TESTING
-# THIS IS THE LIKELY CAUSE
-# :: Use distutils to modify the pythonpath for inplace testing
-# using the build directory
-from distutils.util import get_platform
-plat_specifier = ".%s-%s" % (get_platform(), sys.version[0:3])
-build_platlib = os.path.join("build", 'lib' + plat_specifier)
-test_lib = os.path.join(os.path.abspath(".."), build_platlib)
-sys.path.insert(0, test_lib)
-# END PATH ADJUSTMENT CODE
-
-
-
-def tearDownDS(test):
-    # reset the module global mappers used in SQLAlchemy between tests
-    clear_mappers()
-    # and remove the test repository used in some tests
-    os.system('rm -rf /tmp/test_ds')
-    
 def test_suite():
+    global doctests
     suite = unittest.TestSuite()
+    if len(sys.argv) > 1:
+        doctests = sys.argv[1:]
+        
     for dt in doctests:
         suite.addTest(doctest.DocFileSuite(dt,
-    optionflags=doctest_options, tearDown=tearDownDS))
+    optionflags=doctest_options))
 
-    tests = os.listdir(os.curdir)
-    tests = [n[:-3] for n in tests if n.startswith('test') and
-             n.endswith('.py')]
+    if len(sys.argv) <= 1:
+        tests = os.listdir(os.curdir)
+        tests = [n[:-3] for n in tests if n.startswith('test') and
+                 n.endswith('.py')]
 
-    for test in tests:
-        m = __import__(test)
-        if hasattr(m, 'test_suite'):
-            suite.addTest(m.test_suite())
+        for test in tests:
+            m = __import__(test)
+            if hasattr(m, 'test_suite'):
+                suite.addTest(m.test_suite())
     return suite
 
 
 if __name__ == "__main__":
     runner = unittest.TextTestRunner(verbosity=1)
-    runner.run(test_suite())
+    suite = test_suite()
+    runner.run(suite)
                         
diff --git a/tests/sugar_demo_may17.txt b/tests/sugar_demo_may17.txt
index c899799..f242140 100644
--- a/tests/sugar_demo_may17.txt
+++ b/tests/sugar_demo_may17.txt
@@ -2,6 +2,7 @@ How Sugar will interact with the DS for the May 17th demo in Argentina:
 
 >>> from olpc.datastore import DataStore
 >>> from olpc.datastore import backingstore
+>>> from testutils import waitforindex
 >>> ds = DataStore()
 >>> ds.registerBackend(backingstore.FileBackingStore)
 >>> assert ds.mount("/tmp/test_ds")
@@ -9,11 +10,14 @@ How Sugar will interact with the DS for the May 17th demo in Argentina:
 
 Create an entry without data:
 >>> uid = ds.create(dict(title="New entry"), '')
+>>> waitforindex(ds)
+
 >>> ds.get_filename(uid)
 ''
 
 Update an entry without data:
 >>> ds.update(uid, dict(title="New entry still without content"), '')
+>>> waitforindex(ds)
  
 >>> ds.get_filename(uid)
 ''
@@ -23,6 +27,7 @@ Add some data to the same entry:
 >>> print >>fp, "some content"
 >>> fp.close()
 >>> ds.update(uid, dict(title="Same entry now with some content"), fp.name)
+>>> waitforindex(ds)
 
 Retrieve that data:
 >>> fn = ds.get_filename(uid)
@@ -36,6 +41,7 @@ Update again:
 >>> print >>fp, "some other content"
 >>> fp.close()
 >>> ds.update(uid, dict(title="Same entry with some other content"), fp.name)
+>>> waitforindex(ds)
 
 And retrieve again:
 >>> fn = ds.get_filename(uid)
@@ -60,6 +66,7 @@ Set content as pdf:
 >>> ds.update(uid, dict(title="Same entry with some content in pdf"), 'test.pdf')
 >>> ds.update(uid, dict(title="Same entry with some content in doc"), 'test.doc')
 >>> ds.update(uid, dict(title="Same entry with some content in odt"), 'test.odt')
+>>> waitforindex(ds)
 
 >>> ds.stop()
 >>> del ds
diff --git a/tests/test_backingstore.py b/tests/test_backingstore.py
index 28fdeba..8aab9e6 100644
--- a/tests/test_backingstore.py
+++ b/tests/test_backingstore.py
@@ -1,21 +1,21 @@
 import unittest
-from StringIO import StringIO
+from testutils import tmpData, waitforindex
 
 from olpc.datastore import backingstore
-from sqlalchemy import clear_mappers
 import os
 
 DEFAULT_STORE = '/tmp/_bs_test'
 
 class Test(unittest.TestCase):
-    def tearDown(self):
+    def setUp(self):
         if os.path.exists(DEFAULT_STORE):
             os.system("rm -rf %s" % DEFAULT_STORE)
 
-        clear_mappers()
+    def tearDown(self):
+        if os.path.exists(DEFAULT_STORE):
+            os.system("rm -rf %s" % DEFAULT_STORE)
             
     def test_fsstore(self):
-        clear_mappers()
         bs = backingstore.FileBackingStore(DEFAULT_STORE)
         bs.initialize_and_load()
         bs.create_descriptor()
@@ -28,20 +28,28 @@ class Test(unittest.TestCase):
         d = """This is a test"""
         d2 = "Different"
         
-        c = bs.create(dict(title="A"), StringIO(d))
-        obj = bs.get(c.id)
+        uid = bs.create(dict(title="A"), tmpData(d))
+
+        waitforindex(bs)
+        
+        obj = bs.get(uid)
+
         assert obj.get_property('title') == "A"
         got = obj.file.read()
         assert got == d
 
-        bs.update(c.id, dict(title="B"), StringIO(d2))
-        obj = bs.get(c.id)
+        bs.update(uid, dict(title="B"), tmpData(d2))
+
+        waitforindex(bs)
+        
+        obj = bs.get(uid)
         assert obj.get_property('title') == "B"
         got = obj.file.read()
         assert got == d2
 
-        bs.delete(c.id)
-        self.failUnlessRaises(KeyError, bs.get, c.id)
+        bs.delete(uid)
+        bs.complete_indexing()
+        self.failUnlessRaises(KeyError, bs.get, uid)
         
 def test_suite():
     suite = unittest.TestSuite()
diff --git a/tests/test_model.py b/tests/test_model.py
index 6e8c896..2ac2fb2 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -1,35 +1,55 @@
 import unittest
-from testutils import tmpData
+from testutils import tmpData, waitforindex
 
 from olpc.datastore import DataStore
 from olpc.datastore import model, backingstore
 import datetime
 import os
 
+
+DEFAULT_STORE = '/tmp/test_ds'
+
 class Test(unittest.TestCase):
+    def setUp(self): os.system('rm -rf %s' % DEFAULT_STORE)
+    def tearDown(self): os.system('rm -rf %s' % DEFAULT_STORE)
+        
     def test_dateproperty(self):
         n = datetime.datetime.now()
         # we have to kill the microseconds as
         # time.strptime which we must use in 2.4 doesn't parse it
         n = n.replace(microsecond=0)
-        p = model.DateProperty('ctime', n)
+        p = model.Property('ctime', n, 'date')
         assert p.key == "ctime"
-        assert p.value.isoformat() == n.isoformat()
+        # XXX: the 'date()' is a work around for a missing secore
+        # feature right now
+        assert p.value == n.date().isoformat()
 
+        
     def test_binaryproperty(self):
         ds = DataStore()
         ds.registerBackend(backingstore.FileBackingStore)
-        ds.mount('/tmp/test_ds')
+
+        #add a custom field to the model 
+        dm = model.defaultModel.copy().addField('thumbnail', 'binary')
+                                         
         
+        ds.mount(DEFAULT_STORE, {'indexmanager.model' : dm})
+
+
         data = open('test.jpg', 'r').read()
         # binary data with \0's in it can cause dbus errors here
-        uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data},
-                        tmpData("with image\0\0 prop"))
+        fn = tmpData("with image\0\0 prop")
+        # XXX: We should be able to remove:binary now
+        uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data}, fn)
+        
+        waitforindex(ds)
+
         c = ds.get(uid)
         assert c.get_property('thumbnail') == data
+
         ds.stop()
 
-        os.system('rm -rf /tmp/test_ds')
+
         
 def test_suite():
     suite = unittest.TestSuite()
diff --git a/tests/test_xapianindex.py b/tests/test_xapianindex.py
new file mode 100644
index 0000000..db6afef
--- /dev/null
+++ b/tests/test_xapianindex.py
@@ -0,0 +1,90 @@
+from olpc.datastore.xapianindex import IndexManager
+import os
+from datetime import datetime
+
+import time
+import unittest
+import gnomevfs
+
+DEFAULT_STORE = '/tmp/_xi_test'
+
+
+def index_file(iconn, filepath):
+    """Index a file."""
+
+    mimetype = gnomevfs.get_mime_type(filepath)
+    main, subtype = mimetype.split('/',1)
+
+    stat = os.stat(filepath)
+    ctime = datetime.fromtimestamp(stat.st_ctime)
+    mtime = datetime.fromtimestamp(stat.st_mtime)
+    
+    if main in ['image']: filepath = None
+    if subtype in ['x-trash', 'x-python-bytecode']: filepath = None
+
+
+
+    props = {'mimetype' : mimetype, 'mtime:date' : mtime,
+             'ctime:date' : ctime,}
+
+    if filepath:
+        fn = os.path.split(filepath)[1]
+        props['filename'] = fn 
+    
+    iconn.index(props, filepath)
+
+    return 1
+
+def index_path(iconn, docpath):
+    """Index a path."""
+    count = 0
+    for dirpath, dirnames, filenames in os.walk(docpath):
+        for filename in filenames:
+            filepath = os.path.join(dirpath, filename)
+            index_file(iconn, filepath)
+            count += 1
+    return count
+
+class Test(unittest.TestCase):
+    def setUp(self):
+        if os.path.exists(DEFAULT_STORE):
+            os.system("rm -rf %s" % DEFAULT_STORE)
+
+    def tearDown(self):
+        if os.path.exists(DEFAULT_STORE):
+            os.system("rm -rf %s" % DEFAULT_STORE)
+
+    def test_index(self):
+        # import a bunch of documents into the store
+        im = IndexManager()
+        im.connect(DEFAULT_STORE)
+
+        # test basic index performance
+        start = time.time()
+        count = index_path(im, os.getcwd())
+        end = time.time()
+        delta = end - start
+
+        #print "%s in %s %s/sec" % (count, delta, count/delta)
+
+        # wait for indexing to finish
+        im.complete_indexing()
+
+        # test basic search performance
+        results = list(im.search('peek')[0])
+
+        # this indicates that we found text inside binary content that
+        # we expected 
+        assert 'test.pdf' in set(r.get_property('filename') for r in results)
+
+        assert im.search('mimetype:application/pdf filename:test.pdf peek')[1] == 1
+        
+        
+def test_suite():
+    suite = unittest.TestSuite()
+    suite.addTest(unittest.makeSuite(Test))
+    return suite
+
+if __name__ == "__main__":
+    unittest.main()
+                    
diff --git a/tests/testutils.py b/tests/testutils.py
index 243747a..a4efc0a 100644
--- a/tests/testutils.py
+++ b/tests/testutils.py
@@ -7,3 +7,8 @@ def tmpData(data):
     os.write(fd, data)
     os.close(fd)
     return fn
+
+def waitforindex(obj):
+    # wait for any/all index managers associated with object to finish
+    # indexing so that tests can do there thing
+    obj.complete_indexing()
diff --git a/tests/xapianindex.txt b/tests/xapianindex.txt
new file mode 100644
index 0000000..354d7a8
--- /dev/null
+++ b/tests/xapianindex.txt
@@ -0,0 +1,73 @@
+The xapian index module can be used directly as follows
+
+First clean up any old test data.
+
+>>> index_home = "/tmp/xi"
+>>> import os, sys, time, logging
+>>> assert os.system('rm -rf %s' % index_home) == 0
+
+# >>> logging.basicConfig(level=logging.DEBUG,
+# ...                    format="%(asctime)-15s %(name)s %(levelname)s: %(message)s",
+# ...                    stream=sys.stderr)
+     
+
+>>> from olpc.datastore.xapianindex import IndexManager
+>>> from olpc.datastore import model
+>>> im = IndexManager()
+>>> im.connect(index_home)
+
+
+Now add the file to the index.
+
+>>> props = dict(title="PDF Document",
+...              mimetype="application/pdf")
+
+
+>>> uid = im.index(props, "test.pdf")
+
+Let the async indexer do its thing. We ask the indexer if it has work
+left, when it has none we expect our content to be indexed and searchable.
+
+>>> im.complete_indexing()
+
+
+Searching on an property of the content works.
+>>> def expect(r, count=None):
+...    if count: assert r[1] == count
+...    return list(r[0])
+>>> def expect_single(r):
+...    assert r[1] == 1
+...    return r[0].next()
+>>> def expect_none(r):
+...    assert r[1] == 0
+...    assert list(r[0]) == []
+
+
+>>> assert expect_single(im.search("PDF")).id == uid
+
+Searching into the binary content of the object works as well.
+>>> assert expect_single(im.search("peek")).id == uid
+
+Specifying a search that demands a document term be found only in the
+title works as well.
+
+>>> assert expect_single(im.search('title:PDF')).id == uid
+>>> expect_none(im.search('title:peek'))
+
+Searching for documents that are PDF works as expected here. Here we
+use the dictionary form of the query where each field name is given
+and creates a search.
+>>> assert expect_single(im.search(dict(mimetype='application/pdf'))).id == uid
+
+Punctuation is fine.
+
+>>> assert expect_single(im.search("Don't peek")).id == uid
+
+As well as quoted strings
+
+>>> assert expect_single(im.search(r'''"Don't peek"''')).id == uid
+
+Cleanly shut down.
+>>> im.stop()
+
+>>> assert os.system('rm -rf %s' % index_home) == 0
author	Benjamin Saller <bcsaller@objectrealms.net>	2007-07-13 04:33:19 (GMT)
committer	Benjamin Saller <bcsaller@objectrealms.net>	2007-07-13 04:33:19 (GMT)
commit	ea06d07882613332afa18e1e0fe2323ba2580b06 (patch)
tree	7f50caccf827f209bebb05171cfb3e816f82d4f8
parent	a9ef157802cd7c3884814bc55b6cff1a5886c66e (diff)
parent	a41d2d5ca21115f3b76a789f5874f35dca089a3d (diff)