diff options
author | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-12 21:17:48 (GMT) |
---|---|---|
committer | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-12 21:17:48 (GMT) |
commit | 7aae48766ae46bd530a3c556cd4e92a0e02f7ad3 (patch) | |
tree | 62e37ba449d5d0f628af9c0f7b1401828f2a154d | |
parent | f577c2c142c7648a482e0eec7ecd736c1ca716d7 (diff) |
check point before prop kind/type merge
-rwxr-xr-x | bin/datastore-service | 2 | ||||
-rw-r--r-- | etc/Makefile.am | 7 | ||||
-rw-r--r-- | src/olpc/datastore/__init__.py | 4 | ||||
-rw-r--r-- | src/olpc/datastore/backingstore.py | 94 | ||||
-rw-r--r-- | src/olpc/datastore/converter.py | 4 | ||||
-rw-r--r-- | src/olpc/datastore/datastore.py | 71 | ||||
-rw-r--r-- | src/olpc/datastore/model.py | 523 | ||||
-rw-r--r-- | src/olpc/datastore/xapianindex.py | 195 | ||||
-rw-r--r-- | tests/Makefile | 4 | ||||
-rw-r--r-- | tests/milestone_1.txt | 22 | ||||
-rw-r--r-- | tests/mountpoints.txt | 9 | ||||
-rw-r--r-- | tests/properties.txt | 15 | ||||
-rw-r--r-- | tests/query.txt | 14 | ||||
-rw-r--r-- | tests/runalltests.py | 11 | ||||
-rw-r--r-- | tests/sugar_demo_may17.txt | 7 | ||||
-rw-r--r-- | tests/test_backingstore.py | 29 | ||||
-rw-r--r-- | tests/test_model.py | 35 | ||||
-rw-r--r-- | tests/testutils.py | 18 | ||||
-rw-r--r-- | tests/xapianindex.txt | 47 |
19 files changed, 581 insertions, 530 deletions
diff --git a/bin/datastore-service b/bin/datastore-service index 4300619..b21e529 100755 --- a/bin/datastore-service +++ b/bin/datastore-service @@ -53,7 +53,7 @@ bus = dbus.SessionBus() ds = DataStore() ds.registerBackend(backingstore.FileBackingStore) ds.registerBackend(backingstore.InplaceFileBackingStore) -ds.mount(repo_dir, {'querymanager_sync_index': SYNC_INDEX}) +ds.mount(repo_dir, {'indexmanager.sync_index': SYNC_INDEX}) # and run it logger.info("Starting Datastore %s" % (repo_dir)) diff --git a/etc/Makefile.am b/etc/Makefile.am index 1d8a54c..a9b28b1 100644 --- a/etc/Makefile.am +++ b/etc/Makefile.am @@ -1,15 +1,12 @@ servicedir = $(datadir)/dbus-1/services service_in_files = \ - org.laptop.sugar.DataStore.service.in \ - org.laptop.sugar.Indexer.service.in + org.laptop.sugar.DataStore.service.in + service_DATA = $(service_in_files:.service.in=.service) org.laptop.sugar.DataStore.service: org.laptop.sugar.DataStore.service.in @sed -e "s|\@bindir\@|$(bindir)|" $< > $@ -org.laptop.sugar.Indexer.service: org.laptop.sugar.Indexer.service.in - @sed -e "s|\@bindir\@|$(bindir)|" $< > $@ - DISTCLEANFILES = $(service_DATA) EXTRA_DIST = $(service_in_files) diff --git a/src/olpc/datastore/__init__.py b/src/olpc/datastore/__init__.py index d38dcff..fd38d75 100644 --- a/src/olpc/datastore/__init__.py +++ b/src/olpc/datastore/__init__.py @@ -1,7 +1,5 @@ # datastore package +from olpc.datastore.datastore import DataStore, DS_LOG_CHANNEL -from olpc.datastore.datastore import DataStore, DS_LOG_CHANNEL -from olpc.datastore.backingstore import FileBackingStore -from olpc.datastore.query import DefaultQueryManager diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py index b0a05ad..8ed1011 100644 --- a/src/olpc/datastore/backingstore.py +++ b/src/olpc/datastore/backingstore.py @@ -17,7 +17,7 @@ import re import subprocess import time -from olpc.datastore import query +from olpc.datastore.xapianindex import IndexManager from olpc.datastore import utils # changing this pattern impacts _targetFile @@ -75,7 +75,7 @@ class BackingStore(object): def load(self): """load the index for a given mount-point, then initialize its fulltext subsystem. This is the routine that will bootstrap - the querymanager (though create() may have just created it) + the indexmanager (though create() may have just created it) """ pass @@ -121,11 +121,11 @@ class FileBackingStore(BackingStore): """ FileSystemStore(path=<root of managed storage>) """ self.options = kwargs - self.local_querymanager = self.options.get('local_querymanager', True) + self.local_indexmanager = self.options.get('local_indexmanager', True) self.uri = uri self.base = os.path.join(uri, self.STORE_NAME) - self.querymanager = None + self.indexmanager = None # Informational def descriptor(self): @@ -190,47 +190,40 @@ class FileBackingStore(BackingStore): if not os.path.exists(self.base): os.makedirs(self.base) - # examine options and see what the querymanager plan is - if self.local_querymanager: - # create a local storage using the querymanager + # examine options and see what the indexmanager plan is + if self.local_indexmanager: + # create a local storage using the indexmanager # otherwise we will connect the global manager # in load index_name = os.path.join(self.base, self.INDEX_NAME) - options = utils.options_for(self.options, 'querymanager_') - if 'fulltext_repo' not in options: - options['fulltext_repo'] = os.path.join(self.base, - query.DefaultQueryManager.FULLTEXT_NAME) - - qm = query.DefaultQueryManager(index_name, **options) + options = utils.options_for(self.options, 'indexmanager.') + im = IndexManager() # This will ensure the fulltext and so on are all assigned - qm.bind_to(self) - qm.prepare() + im.bind_to(self) + im.connect(index_name, **options) self.create_descriptor(**options) - self.querymanager = qm + self.indexmanager = im def load(self): - if not self.querymanager and self.local_querymanager: - # create a local storage using the querymanager + if not self.indexmanager and self.local_indexmanager: + # create a local storage using the indexmanager # otherwise we will connect the global manager # in load index_name = os.path.join(self.base, self.INDEX_NAME) - options = utils.options_for(self.options, 'querymanager_') - if 'fulltext_repo' not in self.options: - options['fulltext_repo'] = os.path.join(self.base, - query.DefaultQueryManager.FULLTEXT_NAME) - - qm = query.DefaultQueryManager(index_name, **options) + options = utils.options_for(self.options, 'indexmanager.') + im = IndexManager() desc = utils.options_for(self.options, - 'querymanager_', invert=True) + 'indexmanager.', + invert=True) if desc: self.create_descriptor(**desc) # This will ensure the fulltext and so on are all assigned - qm.bind_to(self) - qm.prepare() + im.bind_to(self) + im.connect(index_name) - self.querymanager = qm + self.indexmanager = im def bind_to(self, datastore): ## signal from datastore that we are being bound to it @@ -283,7 +276,7 @@ class FileBackingStore(BackingStore): # env would contain things like cwd if we wanted to map to a # known space - content = self.querymanager.get(uid) + content = self.indexmanager.get(uid) # we need to map a copy of the content from the backingstore into the # activities addressable space. # map this to a rw file @@ -316,7 +309,7 @@ class FileBackingStore(BackingStore): fp.write(line) fp.close() if verify: - content = self.querymanager.get(uid) + content = self.indexmanager.get(uid) content.checksum = c.hexdigest() def _checksum(self, filename): @@ -329,18 +322,18 @@ class FileBackingStore(BackingStore): # File Management API def create(self, props, filelike): - content = self.querymanager.create(props, filelike) + uid = self.indexmanager.index(props, filelike) filename = filelike if filelike: if isinstance(filelike, basestring): # lets treat it as a filename filelike = open(filelike, "r") filelike.seek(0) - self._writeContent(content.id, filelike, replace=False) - return content + self._writeContent(uid, filelike, replace=False) + return uid def get(self, uid, env=None, allowMissing=False): - content = self.querymanager.get(uid) + content = self.indexmanager.get(uid) if not content: raise KeyError(uid) path = self._translatePath(uid) fp = None @@ -352,7 +345,9 @@ class FileBackingStore(BackingStore): return self._mapContent(uid, fp, path, env) def update(self, uid, props, filelike=None): - self.querymanager.update(uid, props, filelike) + if 'uid' not in props: props['uid'] = uid + + self.indexmanager.index(props, filelike) filename = filelike if filelike: if isinstance(filelike, basestring): @@ -365,7 +360,7 @@ class FileBackingStore(BackingStore): self._writeContent(uid, filelike) def delete(self, uid, allowMissing=True): - self.querymanager.delete(uid) + self.indexmanager.delete(uid) path = self._translatePath(uid) if os.path.exists(path): os.unlink(path) @@ -374,21 +369,21 @@ class FileBackingStore(BackingStore): raise KeyError("object for uid:%s missing" % uid) def get_uniquevaluesfor(self, propertyname): - return self.querymanager.get_uniquevaluesfor(propertyname) + return self.indexmanager.get_uniquevaluesfor(propertyname) def find(self, query): - return self.querymanager.find(query) + return self.indexmanager.search(query) def stop(self): - self.querymanager.stop() + self.indexmanager.stop() class InplaceFileBackingStore(FileBackingStore): """Like the normal FileBackingStore this Backingstore manages the storage of files, but doesn't move files into a repository. There are no working copies. It simply adds index data through its - querymanager and provides fulltext ontop of a regular + indexmanager and provides fulltext ontop of a regular filesystem. It does record its metadata relative to this mount point. @@ -434,7 +429,7 @@ class InplaceFileBackingStore(FileBackingStore): for fn in filenames: source = os.path.join(dirpath, fn) relative = source[len(self.uri)+1:] - result, count = self.querymanager.find(dict(filename=relative)) + result, count = self.indexmanager.search(dict(filename=relative)) if not count: # create a new record self.create(dict(filename=relative), source) @@ -449,30 +444,29 @@ class InplaceFileBackingStore(FileBackingStore): if checksum != content.checksum: self.update(uid, dict(filename=relative), source) - #self.querymanager.index.flush() # File Management API def create(self, props, filelike): # the file would have already been changed inplace # don't touch it - return self.querymanager.create(props, filelike) + return self.indexmanager.index(props, filelike) def get(self, uid, env=None, allowMissing=False): - content = self.querymanager.get(uid) + content = self.indexmanager.get(uid) if not content: raise KeyError(uid) return content.get_property('filename') def update(self, uid, props, filelike=None): # the file would have already been changed inplace # don't touch it - self.querymanager.update(uid, props, filelike) + self.indexmanager.index(uid, props, filelike) - def delete(self, uid, allowMissing=True): - c = self.querymanager.get(uid) - path = c.get_property('filename') - self.querymanager.delete(uid) - if os.path.exists(path): + def delete(self, uid): + c = self.indexmanager.get(uid) + path = c.get_property('filename', None) + self.indexmanager.delete(uid) + if path and os.path.exists(path): os.unlink(path) diff --git a/src/olpc/datastore/converter.py b/src/olpc/datastore/converter.py index 1250dbb..6f0ede6 100644 --- a/src/olpc/datastore/converter.py +++ b/src/olpc/datastore/converter.py @@ -95,11 +95,13 @@ class Converter(object): # maps both extension -> plugin # and mimetype -> plugin self._converters = {} + self._default = None self.logger = logging.getLogger('org.laptop.sugar.Indexer') def registerConverter(self, ext_or_mime, plugin): if plugin.verify(): self._converters[ext_or_mime] = plugin + if self._default is None: self._default = plugin def __call__(self, filename, encoding=None, mimetype=None): """Convert filename's content to utf-8 encoded text.""" @@ -119,6 +121,8 @@ class Converter(object): converter = self._converters.get(mt) if not converter: converter = self._converters.get(ext) + if not converter: + converter = self._default if converter: try: return converter(filename) diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py index 142d801..da8ab74 100644 --- a/src/olpc/datastore/datastore.py +++ b/src/olpc/datastore/datastore.py @@ -18,8 +18,6 @@ import dbus.mainloop.glib from olpc.datastore import utils -from StringIO import StringIO - # the name used by the logger DS_LOG_CHANNEL = 'org.laptop.sugar.DataStore' @@ -68,14 +66,11 @@ class DataStore(dbus.service.Object): # medium (maybe an SD card for example) and we'd want to keep # that on the XO itself. In these cases their might be very # little identifying information on the media itself. - uri = str(uri) - _options = {} - if options: - for key, value in options.iteritems(): - _options[str(key)] = str(value) - + _options = utils._convert(options) + if _options is None: _options = {} + mp = self.connect_backingstore(uri, **_options) if not mp: return '' if mp.id in self.mountpoints: @@ -116,14 +111,28 @@ class DataStore(dbus.service.Object): ## sticks and so on. We provide a facility for tracking ## co-authors of content ## there are associated changes to 'find' to resolve buddies - def addBuddy(self, id, name, fg_color, bg_color): - pass + def addBuddy(self, id, name, fg_color, bg_color, mountpoint=None): + mp = None + if mountpoint is None: mp = self.root + else: mp = self.mountpoints.get(mountpoint) + if mp is None: raise ValueError("Invalid mountpoint") + mp.addBuddy(id, name, fg_color, bg_color) + + def getBuddy(self, bid): + """Get a buddy by its id""" + b = None + for mp in self.mountpoints.itervalues(): + b = mp.getBuddy(bid) + if b: break + return b - def getBuddy(self, id): - pass def buddies(self): - pass + buddies = set() + for mp in self.mountpoints.itervalues(): + buddies = buddies.union(mp.getBuddies()) + return buddies + ## end buddy api @@ -173,26 +182,15 @@ class DataStore(dbus.service.Object): over this process can come at a later time. """ mp = self._resolveMountpoint(props) - content = mp.create(props, filelike) - self.Created(content.id) - logging.debug("created %s" % content.id) + uid = mp.create(props, filelike) + self.Created(uid) + logging.debug("created %s" % uid) - return content.id + return uid @dbus.service.signal(DS_DBUS_INTERFACE, signature="s") def Created(self, uid): pass - - @dbus.service.method(DS_DBUS_INTERFACE, - in_signature='', - out_signature='as') - def all(self): - # workaround for not having optional args or None in - # DBus .. blah - results = self.querymanager.find() - return [r.id for r in results] - - def _multiway_search(self, query): mountpoints = query.pop('mountpoints', self.mountpoints) mountpoints = [self.mountpoints[str(m)] for m in mountpoints] @@ -306,9 +304,8 @@ class DataStore(dbus.service.Object): d = [] for r in results: props = {} - for prop in r.get_properties(): - props[prop.key] = prop.marshall() - + props.update(r.properties) + if 'uid' not in props: props['uid'] = r.id @@ -317,7 +314,7 @@ class DataStore(dbus.service.Object): filename = '' if include_files : - try: filename = self.backingstore.get(r.id).filename + try: filename = r.filename except KeyError: pass props['filename'] = filename d.append(props) @@ -344,14 +341,6 @@ class DataStore(dbus.service.Object): except AttributeError: pass return '' - def get_data(self, uid): - content = self.get(uid) - if content: - return content.get_data() - - def put_data(self, uid, data): - self.update(uid, None, StringIO(data)) - #@utils.sanitize_dbus @dbus.service.method(DS_DBUS_INTERFACE, in_signature='sa{sv}', @@ -360,7 +349,7 @@ class DataStore(dbus.service.Object): content = self.get(uid) dictionary = {} if not query: query = {} - for prop in content.get_properties(**query): + for prop in content.get_properties(query): dictionary[prop.key] = prop.marshall() return dictionary diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py index 8c8ab05..5c737ad 100644 --- a/src/olpc/datastore/model.py +++ b/src/olpc/datastore/model.py @@ -10,17 +10,11 @@ __docformat__ = 'restructuredtext' __copyright__ = 'Copyright ObjectRealms, LLC, 2007' __license__ = 'The GNU Public License V2+' -from sqlalchemy import Table, Column, UniqueConstraint -from sqlalchemy import String, Integer, Unicode -from sqlalchemy import ForeignKey, Sequence, Index -from sqlalchemy import mapper, relation -from sqlalchemy import create_session -from sqlalchemy import MapperExtension, EXT_PASS, clear_mappers - import datetime import mimetypes import os import time +import warnings # XXX: Open issues # list properties - Contributors (a, b, c) @@ -28,51 +22,211 @@ import time # content state - searches don't include content deletion flag # - not recording if content is on other storage yet - -# we have a global thread local session factory -context = {} propertyTypes = {} _marker = object() -def get_session(backingstore): - return context[backingstore] +def registerPropertyType(kind, get, set, xapian_sort_type=None, defaults=None): + propertyTypes[kind] = PropertyImpl(get, set, xapian_sort_type, defaults) -def registerPropertyType(kind, class_): propertyTypes[kind] = class_ def propertyByKind(kind): return propertyTypes[kind] +class PropertyImpl(object): + __slots__ = ('_get', '_set', 'xapian_sort_type', 'defaults') + + def __init__(self, get, set, xapian_sort_type=None, defaults=None): + self._get, self._set = get, set + self.xapian_sort_type = xapian_sort_type + self.defaults = defaults + + def get(self, value): return self._get(value) + def set(self, value): return self._set(value) + +class Property(object): + """Light-weight property implementation. + Handles typed properties via a global registry of type->callbacks + + >>> p = Property(key, value, 'string') + >>> b = Property(key, value, 'binary') + """ + def __init__(self, key, value, kind=None): + self.key = key + self._value = value + self.kind = kind + if kind not in propertyTypes: + warnings.warn("Unknown property type: %s on key %s" % \ + (kind, key), RuntimeWarning) + else: self._impl = propertyTypes[kind] + + @classmethod + def fromstring(cls, key, value=''): + kind = 'string' + if ':' in key: + key, kind = key.split(':', 1) + # now resolve the kind to a property class + return cls(key, value, kind) + -class Content(object): def __repr__(self): - return "<Content id:%s>" % (self.id, ) + return "<%s(%s) %s:%r>" % (self.__class__.__name__, + self.kind, + self.key, self.value) - def get_property(self, key, default=_marker): - # mapped to property keys - session = get_session(self.backingstore) - query = session.query(Property) - p = query.get_by(content_id=self.id, key=key) - if not p: - if default is _marker: raise AttributeError(key) - return default - return p.value - - def get_properties(self, **kwargs): - session = get_session(self.backingstore) - query = session.query(Property) - return query.select_by(content_id=self.id, **kwargs) - - - # Backingstore dependent bindings - def get_file(self): - if not hasattr(self, "_file") or self._file.closed is True: - self.backingstore.get(self.id) - return self._file + def get_value(self): return self._impl.get(self._value) + def set_value(self, value): self._value = self._impl.set(value) + value = property(get_value, set_value) + + def __str__(self): return str(self.value) - def set_file(self, fileobj): - self._file = fileobj - file = property(get_file, set_file) +def noop(value): return value + +# Xapian doesn't have real binary storage, rather these keys will get +# indexed it its database. If the key size is too large the indexing +# will fail +# there are two solutions -- divert the storage to the backingstore +# and retain a key reference to recover it (this is the correct +# solution long term as it participates in versioning) and what I do +# now which is to insert and remove spaces into the base64 stream +# every fixed amount of characters +import re +base64hack = re.compile("(\S{212})") +def base64enc(value): return ' '.join(base64hack.split(value.encode('base64'))) +def base64dec(value): return value.replace(' ', '').decode('base64') + +dateformat = "%Y-%m-%dT%H:%M:%S" +def datedec(value, dateformat=dateformat): + ti = time.strptime(value, dateformat) + dt = datetime.datetime(*(ti[:-2])) + dt = dt.replace(microsecond=0) + return dt + +def dateenc(value, dateformat=dateformat): + if isinstance(value, basestring): + # XXX: there is an issue with microseconds not getting parsed + ti = time.strptime(value, dateformat) + value = datetime.datetime(*(ti[:-2])) + value = value.replace(microsecond=0) + # XXX: drop time for now, this is a xapian issue + value = value.date() + return value.isoformat() + +# syntactic sugar for the below +def p(key, kind, **kwargs): return (key, kind, kwargs) + +# type, get, set, xapian sort type [string|float|date], defaults +# defaults are the default options to addField in IndexManager +# these can be overridden on model assignment +registerPropertyType('string', noop, noop, 'string', {'store' : True, + 'exact' : True, + 'sortable' : True}) + +registerPropertyType('text', noop, noop, 'string', {'store' : True, + 'exact' : False, + 'sortable' : False}) + +registerPropertyType('binary', noop, noop, None, {'store' : True, + 'exact' : False, + 'sortable' : False}) + +registerPropertyType('number', str, float, 'float', {'store' : True, + 'exact' : True, + 'sortable' : True}) + +registerPropertyType('date', dateenc, datedec, 'date', {'store' : True, + 'exact' : True, + 'sortable' : True + }) + + +class Model(object): + """Object containing the field/property model used by the + system""" + + def __init__(self): + self.fields = {} + self.fieldnames = [] + + def addField(self, key, kind, **kwargs): + """ Add a field to the model. + key -- field name + kind -- type by name (registered with registerPropertyType) + kwargs -- overrides and additional values to the default + arguments supplied by kind + """ + if key in self.fields: + raise KeyError("""Another source tried to add %s field to + the model""" % key) + + impl = propertyByKind(kind) + options = impl.defaults.copy() + if kwargs: options.update(kwargs) + if impl.xapian_sort_type: + if 'type' not in options: + options['type'] = impl.xapian_sort_type + + self.fields[key] = (key, kind, options) + self.fieldnames.append(key) + return self + + def addFields(self, *args): + """ List of arguments to addField """ + for arg in args: self.addField(arg[0], arg[1], **arg[2]) + return self + + def apply(self, indexmanager): + addField = indexmanager.addField + for fn in self.fieldnames: + args = self.fields[fn] + addField(args[0], **args[2]) + + +defaultModel = Model().addFields( + p('text', 'text'), + # vid is version id + p('vid', store=True, exact=True, sortable=True, type="float"), + p('filename', store=True, exact=True), + # Title has additional weight + p('title', store=True, exact=False, weight=2, sortable=True), + p('url', store=True, exact=True, sortable=True), + p('mimetype', store=True, exact=True), + p('author', store=True, exact=True), + p('language', store=True, exact=True), + p('ctime', store=True, exact=True, sortable=True, type='date'), + p('mtime', store=True, exact=True, sortable=True, type='date'), + # this will just be a space delimited list of tags + # indexed with the content + # I give them high weight as they have user given semantic value. + p('tags', store=True, exact=False, weight=3, sortable=True), + ) + + +class Content(object): + """A light weight proxy around Xapian Documents from secore. + This provides additional methods which are used in the + backingstore to assist in storage + """ + __slots__ = ('_doc', '_backingstore', '_file') + + def __init__(self, xapdoc, backingstore=None): + self._doc = xapdoc + self._backingstore = backingstore + self._file = None + + def get_property(self, key, default=_marker): + result = self._doc.data.get(key, default) + if result is _marker: raise KeyError(key) + if isinstance(result, list) and len(result) == 1: + return result[0] + return result @property - def filename(self): return self.file.name + def properties(self): + d = {} + for k, v in self.data.iteritems(): + if isinstance(v, list) and len(v) == 1: + v = v[0] + d[k] = v + return d + def suggestName(self): # we look for certain known property names @@ -89,8 +243,7 @@ class Content(object): f, e = os.path.splitext(filename) if e: return filename, None if ext: return "%s.%s" % (filename, ext), None - elif ext: - return None, ext + elif ext: return None, ext else: # try to get an extension from the mimetype if available mt = self.get_property('mime_type', None) @@ -99,279 +252,35 @@ class Content(object): if ext: return None, ext return None, None - def get_data(self): - f = self.file - t = f.tell() - data = f.read() - f.seek(t) - return data - - def set_data(self, filelike): - self.backingstore.set(self.id, filelike) - - data = property(get_data, set_data) - - -class BackingStoreContentMapping(MapperExtension): - """This mapper extension populates Content objects with the - binding to the backing store the files are kept on, this allow the - file-like methods to work as expected on content - """ - def __init__(self, backingstore): - MapperExtension.__init__(self) - self.backingstore = backingstore - - def populate_instance(self, mapper, selectcontext, row, instance, identitykey, isnew): - """called right before the mapper, after creating an instance - from a row, passes the row to its MapperProperty objects which - are responsible for populating the object's attributes. If - this method returns EXT_PASS, it is assumed that the mapper - should do the appending, else if this method returns any other - value or None, it is assumed that the append was handled by - this method. - - """ - instance.backingstore = self.backingstore - # allow normal population to happen - return EXT_PASS - - -class Property(object): - """A typed key value pair associated with a content object. - This is the objects metadata. The value side of the kv pair is - typically encoded as a UTF-8 String. There are however cases where - richer metadata is required by the application using the - datastore. - In these cases the type field is overridden to encode a reference - to another object that must be used to satisfy this value. An - example of this would be storing a PNG thumbnail as the a - value. In a case such as that the value should be set to a path or - key used to find the image on stable storage or in a database and - the type field will be used to demarshall it through this object. - """ - def __init__(self, key, value, type='string'): - self.key = key - self.value = value - self.type = type - - def __repr__(self): - return "<%s %s:%r>" % (self.__class__.__name__, - self.key, self.value) - def marshall(self): - """Return the value marshalled as a string""" - return str(self.value) - -class TextProperty(Property): - """A text property is one that will also get full automatic text - indexing when available. This is used for fields like title where - searching in the text is more important than doing a direct match - """ - def __init__(self, key, value, type='text'): - Property.__init__(self, key, value, type) - - def get_value(self): return self._value - def set_value(self, value): self._value = value - value = property(get_value, set_value) - + def get_file(self): + if not hasattr(self, "_file") or self._file.closed is True: + self.backingstore.get(self.id) + return self._file -class DateProperty(Property): - format = "%Y-%m-%dT%H:%M:%S" - - def __init__(self, key, value, type="date"): - self._value = None - Property.__init__(self, key, value, type) - - def get_value(self): - # parse the value back into a datetime - # XXX: strptime on datetime is a 2.5 thing :( - # XXX: we lose timezone in this conversion currently - if not self._value: return None - ti = time.strptime(self._value, self.format) - dt = datetime.datetime(*(ti[:-2])) - dt = dt.replace(microsecond=0) - return dt - - def set_value(self, value): - if isinstance(value, basestring): - # XXX: there is an issue with microseconds not getting parsed - ti = time.strptime(value, self.format) - value = datetime.datetime(*(ti[:-2])) - value = value.replace(microsecond=0) - - self._value = value.isoformat() + def set_file(self, fileobj): + self._file = fileobj + file = property(get_file, set_file) - value = property(get_value, set_value) + @property + def filename(self): return self.file.name - def marshall(self): return self.value.isoformat() - + @property + def contents(self): return self.file.read() -class NumberProperty(Property): - def __init__(self, key, value, type="number"): - Property.__init__(self, key, value, type) - - def get_value(self): return float(self._value) - def set_value(self, value): self._value = value - value = property(get_value, set_value) + @property + def backingstore(self): return self._backingstore + @property + def id(self): return self._doc.id -class BinaryProperty(Property): - # base64 encode binary data - def __init__(self, key, value, type="binary"): - Property.__init__(self, key, value, type) - - def get_value(self): return self._value.decode('base64') - def set_value(self, value): self._value = value.encode('base64') - value = property(get_value, set_value) - - -class Model(object): - """ Manages the global state of the metadata model index. This is - intended to only be consumed by an olpc.datastore.query.QueryManager - instance for the management of its metadata. - - >>> m = Model() - >>> m.prepare(querymanager) - - >>> m.content - ... # Content Table - - >>> m['content'] - ... # content Mapper - - For details see the sqlalchemy documentation - - """ - - def __init__(self): - self.tables = {} - self.mappers = {} + @property + def data(self): return self._doc.data - def __getattr__(self, key): return self.tables[key] - def __getitem__(self, key): return self.mappers[key] - - - def prepare(self, querymanager): - self.querymanager = querymanager +## class Buddy(object): +## """A co-author on content. Information is collected and managed +## here""" +## pass - # a single session manages the exclusive access we keep to the - # db. - global context - self.session = create_session(bind_to=self.querymanager.db) - context[self.querymanager.backingstore] = self.session - - # content object - content = Table('content', - self.querymanager.metadata, - Column('id', String, primary_key=True, nullable=False), - Column('activity_id', Integer), - Column('checksum', String,), - UniqueConstraint('id', name='content_key') - ) - Index('content_activity_id_idx', content.c.activity_id) - - # the properties of content objects - properties = Table('properties', - self.querymanager.metadata, - Column('id', Integer, Sequence('property_id_seq'), primary_key=True), - Column('content_id', Integer, ForeignKey('content.id')), - Column('key', Unicode, ), - Column('value', Unicode, ), - Column('type', Unicode, ), - # unique key to content mapping - UniqueConstraint('content_id', 'key', - name='property_content_key') - ) - - Index('property_key_idx', properties.c.key) - Index('property_type_idx', properties.c.type) - - # storage - storage = Table('storage', - self.querymanager.metadata, - Column('id', String, primary_key=True), - Column('description', String, ), - Column('uri', String, ) - ) - - # storage -> * content - # XXX: this could be a purely runtime in-memory construct - # removing the storage table as well. Would depend in part on - # the frequency of the garbage collection runs and the - # frequency of connection to stable storage - storage_content = Table('storage_content', - self.querymanager.metadata, - Column('storage_id', Integer, ForeignKey('storage.id')), - Column('content_id', Integer, ForeignKey('content.id')), - ) - Index('idx_storage_content_content_id', storage_content.c.content_id) - - # Object Mapping - # the query manager provides a mapping extension for - # Content <-> BackingStore binding - - # XXX gross and not what we want, we can only define mappers - # once but we may have more than one datastore. - # this can impact all sqla in the runtime though - clear_mappers() - - - content_mapper = mapper(Content, content, - extension=self.querymanager.content_ext, - properties = { - 'properties' : relation(Property, - cascade="all,delete-orphan", - backref='content', - lazy=True), - }, - - ) - - # retain reference to these tables to use for queries - self.tables['content'] = content - self.tables['properties'] = properties - self.tables['storage'] = storage - self.tables['storage_content'] = storage_content - - # and the mappers (though most likely not needed) - property_mapper = mapper(Property, properties, polymorphic_on=properties.c.type) - self.mappers['properties'] = property_mapper - self.mappers['content'] = content_mapper - - # default Property types are mapped to classes here - self.addPropertyType(DateProperty, 'date') - self.addPropertyType(NumberProperty, 'number') - self.addPropertyType(TextProperty, 'text') - self.addPropertyType(BinaryProperty, 'binary') - - - - def addPropertyType(self, PropertyClass, typename, - map_value=True, **kwargs): - """Register a new type of Property. PropertyClass should be a - subclass of Property, typename is the textual - name of the new Property type. - - The flag map_value indicates if Property.value should - automatically be diverted to _value so that you can more - easily manage the interfaces 'value' as a Python property - (descriptor) - - Keyword args will be passed to the properties dictionary of - the sqlalchemy mapper call. See sqlalchemy docs for additional - details. - """ - properties = {} - properties.update(kwargs) - if map_value is True: - properties['_value'] = self.properties.c.value - mapper(PropertyClass, - inherits=self.mappers['properties'], - polymorphic_identity=typename, - properties=properties - ) - - registerPropertyType(typename, PropertyClass) - diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py index 5772433..b02f4af 100644 --- a/src/olpc/datastore/xapianindex.py +++ b/src/olpc/datastore/xapianindex.py @@ -28,10 +28,26 @@ from olpc.datastore.utils import create_uid # Setup Logger logger = logging.getLogger('org.sugar.datastore.xapianindex') +class ContentMappingIter(object): + """An iterator over a set of results from a search. + + """ + def __init__(self, results, backingstore): + self._results = results + self._backingstore = backingstore + self._iter = iter(results) + + def __iter__(self): return self + + def next(self): + searchresult = self._iter.next() + return model.Content(searchresult, self._backingstore) -class IndexManager(object): - def __init__(self, language='en'): +class IndexManager(object): + DEFAULT_DATABASE_NAME = 'index' + + def __init__(self, default_language='en'): # We will maintain two connections to the database # we trigger automatic flushes to the read_index # after any write operation @@ -39,30 +55,52 @@ class IndexManager(object): self.read_index = None self.queue = Queue(0) self.indexer_running = False - self.language = language + self.language = default_language + self.backingstore = None + self.fields = set() # # Initialization - def connect(self, repo): + def connect(self, repo, **kwargs): if self.write_index is not None: - warnings.warn('''Requested redundant connect''', RuntimeWarning) - + warnings.warn('''Requested redundant connect to index''', + RuntimeWarning) + + self.repo = repo self.write_index = secore.IndexerConnection(repo) - self.setupFields() + + # configure the database according to the model + datamodel = kwargs.get('model', model.defaultModel) + datamodel.apply(self) + + # store a reference + self.datamodel = datamodel self.read_index = secore.SearchConnection(repo) - + + self.flush() + # by default we start the indexer now self.startIndexer() + def bind_to(self, backingstore): + # signal from backingstore that its our parent + self.backingstore = backingstore + + # flow control + def flush(self): + """Called after any database mutation""" + self.write_index.flush() + self.read_index.reopen() + def stop(self): self.stopIndexer() self.write_index.close() self.read_index.close() - + # Index thread management def startIndexer(self): self.indexer_running = True self.indexer = threading.Thread(target=self.indexThread, @@ -76,33 +114,53 @@ class IndexManager(object): self.indexer_running = False self.indexer.join() - def enque(self, uid, vid, doc): - self.queue.put((uid, vid, doc)) + def enque(self, uid, vid, doc, created): + self.queue.put((uid, vid, doc, created)) def indexThread(self): # process the queue + # XXX: there is currently no way to remove items from the queue + # for example if a USB stick is added and quickly removed + # the mount should however get a stop() call which would + # request that the indexing finish + logger = logging.getLogger('org.sugar.datastore.xapianindex.indexThread') while self.indexer_running: # include timeout here to ease shutdown of the thread # if this is a non-issue we can simply allow it to block try: - uid, vid, doc = self.queue.get(timeout=0.5) - self.write_index.add(doc) + uid, vid, doc, created = self.queue.get(timeout=0.5) + + if created: self.write_index.add(doc) + else: self.write_index.replace(doc) + + # XXX: if there is still work in the queue we could + # delay the flush() self.flush() + logger.info("Indexed Content %s:%s" % (uid, vid)) self.queue.task_done() except Empty: pass - + except: + logger.exception("Error in index thread. Attempting recovery") + try: self.write_index.close() + except: pass + self.write_index = secore.IndexerConnection(self.repo) + self.read_index.reopen() + + + @property def working(self): """Does the indexer have work""" - return not self.queue.empty() - - def flush(self): - """Called after any database mutation""" - self.write_index.flush() - self.read_index.reopen() + return self.indexer_running and not self.queue.empty() + def complete_indexing(self): + """Intentionally block until the indexing is complete. Used + primarily in testing. + """ + self.queue.join() + # # Field management def addField(self, key, store=True, exact=False, sortable=False, @@ -127,35 +185,29 @@ class IndexManager(object): # track this to find missing field configurations self.fields.add(key) - - def setupFields(self): - # add standard fields - # text is content objects information - self.addField('text', store=False, exact=False) - - # vid is version id - self.addField('vid', store=True, exact=True, sortable=True, type="float") - - # Title has additional weight - self.addField('title', store=True, exact=False, weight=2, sortable=True) - self.addField('mimetype', store=True, exact=True) - self.addField('author', store=True, exact=True) - self.addField('language', store=True, exact=True) - - - self.addField('ctime', store=True, exact=True, sortable=True, type='date') - self.addField('mtime', store=True, exact=True, sortable=True, type='date') - # # Index Functions + def mapProperties(self, props): + """data normalization function, maps dicts of key:kind->value + to Property objects + """ + d = {} + for k,v in props.iteritems(): + p = model.Property.fromstring(k, v) + d[p.key] = p + return d + def index(self, props, filename=None): """Index the content of an object. Props must contain the following: key -> Property() """ + props = self.mapProperties(props) doc = secore.UnprocessedDocument() add = doc.fields.append + fp = None + created = False if filename: mimetype = props.get("mimetype") @@ -177,7 +229,10 @@ class IndexManager(object): vid = props.pop('vid', None) if uid: uid = uid.value - else: uid = create_uid() + else: + uid = create_uid() + created = True + if vid: vid = vid.value else: vid = "1.0" @@ -187,19 +242,32 @@ class IndexManager(object): # # Property indexing for k, prop in props.iteritems(): - if isinstance(prop, model.BinaryProperty): continue value = prop.value + if k not in self.fields: warnings.warn("""Missing field configuration for %s""" % k, - RuntimeWarning) + RuntimeWarning) continue + add(secore.Field(k, value)) - + # queue the document for processing - self.enque(uid, vid, doc) + self.enque(uid, vid, doc, created) return uid + def get(self, uid): + doc = self.read_index.get_document(uid) + if not doc: raise KeyError(uid) + return model.Content(doc, self.backingstore) + + def delete(self, uid): + # does this need queuing? + # the higher level abstractions have to handle interaction + # with versioning policy and so on + self.write_index.delete(uid) + self.flush() + # # Search def search(self, query, start_index=0, end_index=50): @@ -210,10 +278,10 @@ class IndexManager(object): preceded by a "+" sign to indicate that the term is required, or a "-" to indicate that is is required to be absent. """ - # this will return the [(id, relevance), ...], estimated - # result count ri = self.read_index - if isinstance(query, dict): + if not query: + q = self.read_index.query_all() + elif isinstance(query, dict): queries = [] # each term becomes part of the query join for k, v in query.iteritems(): @@ -221,11 +289,40 @@ class IndexManager(object): q = ri.query_composite(ri.OP_AND, queries) else: q = self.parse_query(query) - results = ri.search(q, start_index, end_index) - return [r.id for r in results] + count = results.matches_estimated + + # map the result set to model.Content items + return ContentMappingIter(results, self.backingstore), count + + + def get_uniquevaluesfor(self, property): + # XXX: this is very sketchy code + # try to get the searchconnection to support this directly + # this should only apply to EXACT fields + r = set() + prefix = self.read_index._field_mappings.get_prefix(property) + plen = len(prefix) + termiter = self.read_index._index.allterms(prefix) + for t in termiter: + term = t.term + if len(term) > plen: + term = term[plen:] + if term.startswith(':'): term = term[1:] + r.add(term) + + # r holds the textual representation of the fields value set + # if the type of field or property needs conversion to a + # different python type this has to happen now + descriptor = self.datamodel.fields.get(property) + if descriptor: + kind = descriptor[1].get('type', 'string') + impl = model.propertyByKind(kind) + r = set([impl.get(i) for i in r]) + return r + def parse_query(self, query): # accept standard web query like syntax # 'this' -- match this diff --git a/tests/Makefile b/tests/Makefile index 7961b02..c2581cb 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -2,10 +2,9 @@ # its not an option to configure PYTHON=python -all: test +all: clean test test: - @rm -rf fulltext @${PYTHON} runalltests.py valgrind: @@ -17,6 +16,7 @@ profile: @${PYTHON} ./profilealltests.py clean: + @${PYTHON} ./cleaner.py @find . -name "*.pyc" -exec rm {} \; @find . -name "*~" -exec rm {} \; @find . -name "hotspot*" -exec rm {} \; diff --git a/tests/milestone_1.txt b/tests/milestone_1.txt index bde3720..2472260 100644 --- a/tests/milestone_1.txt +++ b/tests/milestone_1.txt @@ -12,6 +12,10 @@ datastore. First, create and connect the store. +>>> from testutils import waitforindex +>>> import os +>>> assert os.system('rm -rf /tmp/test_ds') == 0 + >>> from olpc.datastore import DataStore >>> from olpc.datastore import backingstore @@ -35,11 +39,13 @@ Note that we retain no reference to the created documents. Now we should be able to test the first requirement. * Get the unique ids of all the objects in the store. +>>> waitforindex(ds) + >>> results, count = ds.find() A find command with out any parameters will return everything in the store. -* Get an object from the store given his uid. +* Get an object from the store given its uid. Here we manually cycle through the results looking for the title we want. @@ -51,30 +57,24 @@ want. * Get the object metadata. >>> c1.properties -[...] +{...} * Get the object file. >>> c1.filename '/tmp/...' ->>> c1.data +>>> c1.contents 'this is the first document' >>> c1.file <open file ...> -Or if you prefer access through the datastore (which is how DBus would -use it) - ->>> fn = ds.get_filename(first_uid) ->>> ds.get_data(first_uid) -'this is the first document' - Now we can modify that file and then * Push the changes made to the file back to the store. * Update the metadata of an object. +>>> fn = c1.filename >>> fp = open(fn, 'a') >>> print >>fp, "more content" >>> fp.close() @@ -89,4 +89,4 @@ This is the basis of milestone 1. >>> ds.stop() >>> del ds - +>>> assert os.system('rm -rf /tmp/test_ds') == 0 diff --git a/tests/mountpoints.txt b/tests/mountpoints.txt index 9a821b5..1066da0 100644 --- a/tests/mountpoints.txt +++ b/tests/mountpoints.txt @@ -12,7 +12,7 @@ mounting a backingstore on the datastore. >>> from olpc.datastore import DataStore >>> from olpc.datastore import backingstore ->>> from testutils import tmpData +>>> from testutils import tmpData, waitforindex >>> import dbus @@ -41,6 +41,7 @@ Now lets create some content We can now, if we wish verify which mount point this content came from. +>>> waitforindex(ds) >>> c1 = ds.get(u1) >>> assert c1.backingstore.id == mountpoint @@ -61,6 +62,8 @@ Now lets add another mount point. Now lets create a new content item. >>> u3 = ds.create(dict(title="Document 3", mountpoint=mp2), tmpData("""document three""")) +>>> waitforindex(ds) + We explictly passed a mount point here. Lets examine the properties of the object and verify this. >>> c3 = ds.find(dict(title="Document 3"))[0][0] @@ -102,6 +105,8 @@ Register the filesystem type If that worked it should have imported content on load(). +>>> waitforindex(ds) + >>> result, count = ds.find(dict(fulltext="four")) >>> assert count == 1 >>> assert result[0]['mountpoint'] == mp3 @@ -114,6 +119,8 @@ as DBus data. >>> mp3 = ds.mount("inplace:/tmp/store3", dict(title=dbus.String("Fake USB again"))) +>>> waitforindex(ds) + >>> result, count = ds.find(dict(fulltext="four")) >>> assert count == 1 >>> assert result[0]['mountpoint'] == mp3 diff --git a/tests/properties.txt b/tests/properties.txt index 689414f..dd93b69 100644 --- a/tests/properties.txt +++ b/tests/properties.txt @@ -8,16 +8,23 @@ properties to content and managing them. >>> from olpc.datastore import DataStore ->>> from olpc.datastore import backingstore +>>> from olpc.datastore import backingstore, model >>> from testutils import tmpData >>> import dbus Set up two mount points. ->>> ds = DataStore(sync_index=True) +>>> ds = DataStore() >>> ds.registerBackend(backingstore.FileBackingStore) ->>> mp1 = ds.mount("/tmp/store1", dict(title="Primary Storage")) ->>> mp2 = ds.mount("/tmp/store2", dict(title="Secondary Storage")) + +Extend the model to retain a 'year' property used below. + +>>> dm = model.defaultModel.addField('year', store=True, exact=True, sortable=True, type="float") + +Mount a couple of stores. + +>>> mp1 = ds.mount("/tmp/store1", {'title' : "Primary Storage", 'indexmanager.model' : dm}) +>>> mp2 = ds.mount("/tmp/store2", {'title' : "Secondary Storage", 'indexmanager.model' : dm}) Create some content on each. diff --git a/tests/query.txt b/tests/query.txt index 2c58851..1e7624e 100644 --- a/tests/query.txt +++ b/tests/query.txt @@ -47,7 +47,7 @@ This returned a list of all properties on the Content object in which case we can find the property by enumeration. The other option is using the get_properties call on Content ->>> a.get_properties(key='title') +>>> a.get_properties(dict(key='title')) [<TextProperty title:'New Content'>] Using the query manager API we are able to update the @@ -57,11 +57,11 @@ that this works lets attach another property. >>> qm.update(a, dict(author='Benjamin')) A request for title still returns only the title property. ->>> a.get_properties(key='title') +>>> a.get_properties(dict(key='title')) [<TextProperty title:'New Content'>] And a request for author works as expected. ->>> a.get_properties(key='author') +>>> a.get_properties(dict(key='author')) [<Property author:'Benjamin'>] >>> qm.update(a, dict(foo='bar')) @@ -91,11 +91,9 @@ Here we want to show that certain types of Properties map to specialized implemenations automatically based on their type. 'ctime' is a DateTime Property and we can verify that it is returned properly from the mapping layer with the following. ->>> ctimeProp = a.get_properties(key='ctime')[0] ->>> ctimeProp.type == "date" +>>> ctimeProp = a.get_properties(dict(key='ctime'))[0] +>>> ctimeProp.kind == "date" True ->>> type(ctimeProp) -<class 'olpc.datastore.model.DateProperty'> Special support is needed to make dates easily addressable within the datastore. The properties 'ctime', creation time, and 'mtime', @@ -144,7 +142,7 @@ refers. This is available through the 'content' attrbiute of properties. Only properties bound to content and synchronized with the database have this property. ->>> p = a.get_properties(key='author')[0] +>>> p = a.get_properties(dict(key='author'))[0] >>> p.content <Content id:...> diff --git a/tests/runalltests.py b/tests/runalltests.py index bbf0f97..28802ec 100644 --- a/tests/runalltests.py +++ b/tests/runalltests.py @@ -14,10 +14,9 @@ import unittest import doctest from pkg_resources import resource_filename -from sqlalchemy import clear_mappers doctests = [ - resource_filename(__name__, "query.txt"), + resource_filename(__name__, "xapianindex.txt"), resource_filename(__name__, "milestone_1.txt"), resource_filename(__name__, "sugar_demo_may17.txt"), resource_filename(__name__, "milestone_2.txt"), @@ -44,13 +43,14 @@ sys.path.insert(0, test_lib) def tearDownDS(test): - # reset the module global mappers used in SQLAlchemy between tests - clear_mappers() # and remove the test repository used in some tests os.system('rm -rf /tmp/test_ds') def test_suite(): suite = unittest.TestSuite() + if len(sys.argv) > 1: + doctests = sys.argv[1:] + for dt in doctests: suite.addTest(doctest.DocFileSuite(dt, optionflags=doctest_options, tearDown=tearDownDS)) @@ -68,5 +68,6 @@ def test_suite(): if __name__ == "__main__": runner = unittest.TextTestRunner(verbosity=1) - runner.run(test_suite()) + suite = test_suite() + runner.run(suite) diff --git a/tests/sugar_demo_may17.txt b/tests/sugar_demo_may17.txt index c899799..f242140 100644 --- a/tests/sugar_demo_may17.txt +++ b/tests/sugar_demo_may17.txt @@ -2,6 +2,7 @@ How Sugar will interact with the DS for the May 17th demo in Argentina: >>> from olpc.datastore import DataStore >>> from olpc.datastore import backingstore +>>> from testutils import waitforindex >>> ds = DataStore() >>> ds.registerBackend(backingstore.FileBackingStore) >>> assert ds.mount("/tmp/test_ds") @@ -9,11 +10,14 @@ How Sugar will interact with the DS for the May 17th demo in Argentina: Create an entry without data: >>> uid = ds.create(dict(title="New entry"), '') +>>> waitforindex(ds) + >>> ds.get_filename(uid) '' Update an entry without data: >>> ds.update(uid, dict(title="New entry still without content"), '') +>>> waitforindex(ds) >>> ds.get_filename(uid) '' @@ -23,6 +27,7 @@ Add some data to the same entry: >>> print >>fp, "some content" >>> fp.close() >>> ds.update(uid, dict(title="Same entry now with some content"), fp.name) +>>> waitforindex(ds) Retrieve that data: >>> fn = ds.get_filename(uid) @@ -36,6 +41,7 @@ Update again: >>> print >>fp, "some other content" >>> fp.close() >>> ds.update(uid, dict(title="Same entry with some other content"), fp.name) +>>> waitforindex(ds) And retrieve again: >>> fn = ds.get_filename(uid) @@ -60,6 +66,7 @@ Set content as pdf: >>> ds.update(uid, dict(title="Same entry with some content in pdf"), 'test.pdf') >>> ds.update(uid, dict(title="Same entry with some content in doc"), 'test.doc') >>> ds.update(uid, dict(title="Same entry with some content in odt"), 'test.odt') +>>> waitforindex(ds) >>> ds.stop() >>> del ds diff --git a/tests/test_backingstore.py b/tests/test_backingstore.py index 28fdeba..a13e28c 100644 --- a/tests/test_backingstore.py +++ b/tests/test_backingstore.py @@ -1,21 +1,21 @@ import unittest -from StringIO import StringIO +from testutils import tmpData, waitforindex from olpc.datastore import backingstore -from sqlalchemy import clear_mappers import os DEFAULT_STORE = '/tmp/_bs_test' class Test(unittest.TestCase): - def tearDown(self): + def setUp(self): if os.path.exists(DEFAULT_STORE): os.system("rm -rf %s" % DEFAULT_STORE) - clear_mappers() + def tearDown(self): + if os.path.exists(DEFAULT_STORE): + os.system("rm -rf %s" % DEFAULT_STORE) def test_fsstore(self): - clear_mappers() bs = backingstore.FileBackingStore(DEFAULT_STORE) bs.initialize_and_load() bs.create_descriptor() @@ -28,20 +28,27 @@ class Test(unittest.TestCase): d = """This is a test""" d2 = "Different" - c = bs.create(dict(title="A"), StringIO(d)) - obj = bs.get(c.id) + uid = bs.create(dict(title="A"), tmpData(d)) + + waitforindex(bs) + + obj = bs.get(uid) + assert obj.get_property('title') == "A" got = obj.file.read() assert got == d - bs.update(c.id, dict(title="B"), StringIO(d2)) - obj = bs.get(c.id) + bs.update(uid, dict(title="B"), tmpData(d2)) + + waitforindex(bs) + + obj = bs.get(uid) assert obj.get_property('title') == "B" got = obj.file.read() assert got == d2 - bs.delete(c.id) - self.failUnlessRaises(KeyError, bs.get, c.id) + bs.delete(uid) + self.failUnlessRaises(KeyError, bs.get, uid) def test_suite(): suite = unittest.TestSuite() diff --git a/tests/test_model.py b/tests/test_model.py index 6e8c896..d7aea45 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -1,35 +1,56 @@ import unittest -from testutils import tmpData +from testutils import tmpData, waitforindex from olpc.datastore import DataStore from olpc.datastore import model, backingstore import datetime import os + +DEFAULT_STORE = '/tmp/test_ds' + class Test(unittest.TestCase): + def setUp(self): os.system('rm -rf %s' % DEFAULT_STORE) + def tearDown(self): os.system('rm -rf %s' % DEFAULT_STORE) + def test_dateproperty(self): n = datetime.datetime.now() # we have to kill the microseconds as # time.strptime which we must use in 2.4 doesn't parse it n = n.replace(microsecond=0) - p = model.DateProperty('ctime', n) + p = model.Property('ctime', n, 'date') assert p.key == "ctime" - assert p.value.isoformat() == n.isoformat() + # XXX: the 'date()' is a work around for a missing secore + # feature right now + assert p.value == n.date().isoformat() + def test_binaryproperty(self): ds = DataStore() ds.registerBackend(backingstore.FileBackingStore) - ds.mount('/tmp/test_ds') + + #add a custom field to the model + dm = model.defaultModel.addField('thumbnail', + store=True, + exact=False, + sortable=False) + ds.mount(DEFAULT_STORE, {'indexmanager.model' : dm}) + + data = open('test.jpg', 'r').read() # binary data with \0's in it can cause dbus errors here - uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data}, - tmpData("with image\0\0 prop")) + fn = tmpData("with image\0\0 prop") + uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data}, fn) + + waitforindex(ds) + c = ds.get(uid) assert c.get_property('thumbnail') == data + ds.stop() - os.system('rm -rf /tmp/test_ds') + def test_suite(): suite = unittest.TestSuite() diff --git a/tests/testutils.py b/tests/testutils.py index 243747a..48d1060 100644 --- a/tests/testutils.py +++ b/tests/testutils.py @@ -1,5 +1,9 @@ import tempfile import os +import time + +from olpc.datastore.xapianindex import IndexManager +from olpc.datastore.datastore import DataStore def tmpData(data): """Put data into a temporary file returning the filename """ @@ -7,3 +11,17 @@ def tmpData(data): os.write(fd, data) os.close(fd) return fn + +def waitforindex(obj, interval=0.1): + # wait for any/all index managers associated with object to finish + # indexing so that tests can do there thing + if isinstance(obj, IndexManager): + obj.complete_indexing() + elif isinstance(obj, DataStore): + for mp in obj.mountpoints.values(): + im = mp.indexmanager + im.complete_indexing() + else: + # backingstore + obj.indexmanager.complete_indexing() + diff --git a/tests/xapianindex.txt b/tests/xapianindex.txt index de495a6..5ef1d5c 100644 --- a/tests/xapianindex.txt +++ b/tests/xapianindex.txt @@ -16,20 +16,11 @@ First clean up any old test data. >>> im = IndexManager() >>> im.connect(index_home) -A small utility method for wrapping a normal dict into proper property -objects. - ->>> def propsdict(**kwargs): -... d = {} -... for k,v in kwargs.iteritems(): -... d[k] = model.Property(k, v) -... return d - Now add the file to the index. ->>> props = propsdict(title="PDF Document", -... mimetype="application/pdf") +>>> props = dict(title="PDF Document", +... mimetype="application/pdf") >>> uid = im.index(props, "test.pdf") @@ -41,36 +32,42 @@ left, when it has none we expect our content to be indexed and searchable. Searching on an property of the content works. ->>> assert im.search("PDF")[0] == uid +>>> def expect(r, count=None): +... if count: assert r[1] == count +... return list(r[0]) +>>> def expect_single(r): +... assert r[1] == 1 +... return r[0].next() +>>> def expect_none(r): +... assert r[1] == 0 +... assert list(r[0]) == [] + + +>>> assert expect_single(im.search("PDF")).id == uid Searching into the binary content of the object works as well. ->>> assert im.search("peek")[0] == uid +>>> assert expect_single(im.search("peek")).id == uid Specifying a search that demands a document term be found only in the title works as well. ->>> assert im.search('title:PDF')[0] == uid ->>> im.search('title:peek') -[] +>>> assert expect_single(im.search('title:PDF')).id == uid +>>> expect_none(im.search('title:peek')) Searching for documents that are PDF works as expected here. Here we use the dictionary form of the query where each field name is given and creates a search. ->>> assert im.search(dict(mimetype='application/pdf'))[0] == uid - - -#Likewise excluding the match works as expected -#>>> im.search('-title:PDF') -#[] - +>>> assert expect_single(im.search(dict(mimetype='application/pdf'))).id == uid Punctuation is fine. ->>> assert im.search("Don't peek")[0] == uid +>>> assert expect_single(im.search("Don't peek")).id == uid As well as quoted strings ->>> assert im.search(r'''"Don't peek"''')[0] == uid +>>> assert expect_single(im.search(r'''"Don't peek"''')).id == uid Cleanly shut down. >>> im.stop() + +>>> assert os.system('rm -rf %s' % index_home) == 0 |