Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Saller <bcsaller@objectrealms.net>2007-07-12 21:17:48 (GMT)
committer Benjamin Saller <bcsaller@objectrealms.net>2007-07-12 21:17:48 (GMT)
commit7aae48766ae46bd530a3c556cd4e92a0e02f7ad3 (patch)
tree62e37ba449d5d0f628af9c0f7b1401828f2a154d
parentf577c2c142c7648a482e0eec7ecd736c1ca716d7 (diff)
check point before prop kind/type merge
-rwxr-xr-xbin/datastore-service2
-rw-r--r--etc/Makefile.am7
-rw-r--r--src/olpc/datastore/__init__.py4
-rw-r--r--src/olpc/datastore/backingstore.py94
-rw-r--r--src/olpc/datastore/converter.py4
-rw-r--r--src/olpc/datastore/datastore.py71
-rw-r--r--src/olpc/datastore/model.py523
-rw-r--r--src/olpc/datastore/xapianindex.py195
-rw-r--r--tests/Makefile4
-rw-r--r--tests/milestone_1.txt22
-rw-r--r--tests/mountpoints.txt9
-rw-r--r--tests/properties.txt15
-rw-r--r--tests/query.txt14
-rw-r--r--tests/runalltests.py11
-rw-r--r--tests/sugar_demo_may17.txt7
-rw-r--r--tests/test_backingstore.py29
-rw-r--r--tests/test_model.py35
-rw-r--r--tests/testutils.py18
-rw-r--r--tests/xapianindex.txt47
19 files changed, 581 insertions, 530 deletions
diff --git a/bin/datastore-service b/bin/datastore-service
index 4300619..b21e529 100755
--- a/bin/datastore-service
+++ b/bin/datastore-service
@@ -53,7 +53,7 @@ bus = dbus.SessionBus()
ds = DataStore()
ds.registerBackend(backingstore.FileBackingStore)
ds.registerBackend(backingstore.InplaceFileBackingStore)
-ds.mount(repo_dir, {'querymanager_sync_index': SYNC_INDEX})
+ds.mount(repo_dir, {'indexmanager.sync_index': SYNC_INDEX})
# and run it
logger.info("Starting Datastore %s" % (repo_dir))
diff --git a/etc/Makefile.am b/etc/Makefile.am
index 1d8a54c..a9b28b1 100644
--- a/etc/Makefile.am
+++ b/etc/Makefile.am
@@ -1,15 +1,12 @@
servicedir = $(datadir)/dbus-1/services
service_in_files = \
- org.laptop.sugar.DataStore.service.in \
- org.laptop.sugar.Indexer.service.in
+ org.laptop.sugar.DataStore.service.in
+
service_DATA = $(service_in_files:.service.in=.service)
org.laptop.sugar.DataStore.service: org.laptop.sugar.DataStore.service.in
@sed -e "s|\@bindir\@|$(bindir)|" $< > $@
-org.laptop.sugar.Indexer.service: org.laptop.sugar.Indexer.service.in
- @sed -e "s|\@bindir\@|$(bindir)|" $< > $@
-
DISTCLEANFILES = $(service_DATA)
EXTRA_DIST = $(service_in_files)
diff --git a/src/olpc/datastore/__init__.py b/src/olpc/datastore/__init__.py
index d38dcff..fd38d75 100644
--- a/src/olpc/datastore/__init__.py
+++ b/src/olpc/datastore/__init__.py
@@ -1,7 +1,5 @@
# datastore package
+from olpc.datastore.datastore import DataStore, DS_LOG_CHANNEL
-from olpc.datastore.datastore import DataStore, DS_LOG_CHANNEL
-from olpc.datastore.backingstore import FileBackingStore
-from olpc.datastore.query import DefaultQueryManager
diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py
index b0a05ad..8ed1011 100644
--- a/src/olpc/datastore/backingstore.py
+++ b/src/olpc/datastore/backingstore.py
@@ -17,7 +17,7 @@ import re
import subprocess
import time
-from olpc.datastore import query
+from olpc.datastore.xapianindex import IndexManager
from olpc.datastore import utils
# changing this pattern impacts _targetFile
@@ -75,7 +75,7 @@ class BackingStore(object):
def load(self):
"""load the index for a given mount-point, then initialize its
fulltext subsystem. This is the routine that will bootstrap
- the querymanager (though create() may have just created it)
+ the indexmanager (though create() may have just created it)
"""
pass
@@ -121,11 +121,11 @@ class FileBackingStore(BackingStore):
""" FileSystemStore(path=<root of managed storage>)
"""
self.options = kwargs
- self.local_querymanager = self.options.get('local_querymanager', True)
+ self.local_indexmanager = self.options.get('local_indexmanager', True)
self.uri = uri
self.base = os.path.join(uri, self.STORE_NAME)
- self.querymanager = None
+ self.indexmanager = None
# Informational
def descriptor(self):
@@ -190,47 +190,40 @@ class FileBackingStore(BackingStore):
if not os.path.exists(self.base):
os.makedirs(self.base)
- # examine options and see what the querymanager plan is
- if self.local_querymanager:
- # create a local storage using the querymanager
+ # examine options and see what the indexmanager plan is
+ if self.local_indexmanager:
+ # create a local storage using the indexmanager
# otherwise we will connect the global manager
# in load
index_name = os.path.join(self.base, self.INDEX_NAME)
- options = utils.options_for(self.options, 'querymanager_')
- if 'fulltext_repo' not in options:
- options['fulltext_repo'] = os.path.join(self.base,
- query.DefaultQueryManager.FULLTEXT_NAME)
-
- qm = query.DefaultQueryManager(index_name, **options)
+ options = utils.options_for(self.options, 'indexmanager.')
+ im = IndexManager()
# This will ensure the fulltext and so on are all assigned
- qm.bind_to(self)
- qm.prepare()
+ im.bind_to(self)
+ im.connect(index_name, **options)
self.create_descriptor(**options)
- self.querymanager = qm
+ self.indexmanager = im
def load(self):
- if not self.querymanager and self.local_querymanager:
- # create a local storage using the querymanager
+ if not self.indexmanager and self.local_indexmanager:
+ # create a local storage using the indexmanager
# otherwise we will connect the global manager
# in load
index_name = os.path.join(self.base, self.INDEX_NAME)
- options = utils.options_for(self.options, 'querymanager_')
- if 'fulltext_repo' not in self.options:
- options['fulltext_repo'] = os.path.join(self.base,
- query.DefaultQueryManager.FULLTEXT_NAME)
-
- qm = query.DefaultQueryManager(index_name, **options)
+ options = utils.options_for(self.options, 'indexmanager.')
+ im = IndexManager()
desc = utils.options_for(self.options,
- 'querymanager_', invert=True)
+ 'indexmanager.',
+ invert=True)
if desc: self.create_descriptor(**desc)
# This will ensure the fulltext and so on are all assigned
- qm.bind_to(self)
- qm.prepare()
+ im.bind_to(self)
+ im.connect(index_name)
- self.querymanager = qm
+ self.indexmanager = im
def bind_to(self, datastore):
## signal from datastore that we are being bound to it
@@ -283,7 +276,7 @@ class FileBackingStore(BackingStore):
# env would contain things like cwd if we wanted to map to a
# known space
- content = self.querymanager.get(uid)
+ content = self.indexmanager.get(uid)
# we need to map a copy of the content from the backingstore into the
# activities addressable space.
# map this to a rw file
@@ -316,7 +309,7 @@ class FileBackingStore(BackingStore):
fp.write(line)
fp.close()
if verify:
- content = self.querymanager.get(uid)
+ content = self.indexmanager.get(uid)
content.checksum = c.hexdigest()
def _checksum(self, filename):
@@ -329,18 +322,18 @@ class FileBackingStore(BackingStore):
# File Management API
def create(self, props, filelike):
- content = self.querymanager.create(props, filelike)
+ uid = self.indexmanager.index(props, filelike)
filename = filelike
if filelike:
if isinstance(filelike, basestring):
# lets treat it as a filename
filelike = open(filelike, "r")
filelike.seek(0)
- self._writeContent(content.id, filelike, replace=False)
- return content
+ self._writeContent(uid, filelike, replace=False)
+ return uid
def get(self, uid, env=None, allowMissing=False):
- content = self.querymanager.get(uid)
+ content = self.indexmanager.get(uid)
if not content: raise KeyError(uid)
path = self._translatePath(uid)
fp = None
@@ -352,7 +345,9 @@ class FileBackingStore(BackingStore):
return self._mapContent(uid, fp, path, env)
def update(self, uid, props, filelike=None):
- self.querymanager.update(uid, props, filelike)
+ if 'uid' not in props: props['uid'] = uid
+
+ self.indexmanager.index(props, filelike)
filename = filelike
if filelike:
if isinstance(filelike, basestring):
@@ -365,7 +360,7 @@ class FileBackingStore(BackingStore):
self._writeContent(uid, filelike)
def delete(self, uid, allowMissing=True):
- self.querymanager.delete(uid)
+ self.indexmanager.delete(uid)
path = self._translatePath(uid)
if os.path.exists(path):
os.unlink(path)
@@ -374,21 +369,21 @@ class FileBackingStore(BackingStore):
raise KeyError("object for uid:%s missing" % uid)
def get_uniquevaluesfor(self, propertyname):
- return self.querymanager.get_uniquevaluesfor(propertyname)
+ return self.indexmanager.get_uniquevaluesfor(propertyname)
def find(self, query):
- return self.querymanager.find(query)
+ return self.indexmanager.search(query)
def stop(self):
- self.querymanager.stop()
+ self.indexmanager.stop()
class InplaceFileBackingStore(FileBackingStore):
"""Like the normal FileBackingStore this Backingstore manages the
storage of files, but doesn't move files into a repository. There
are no working copies. It simply adds index data through its
- querymanager and provides fulltext ontop of a regular
+ indexmanager and provides fulltext ontop of a regular
filesystem. It does record its metadata relative to this mount
point.
@@ -434,7 +429,7 @@ class InplaceFileBackingStore(FileBackingStore):
for fn in filenames:
source = os.path.join(dirpath, fn)
relative = source[len(self.uri)+1:]
- result, count = self.querymanager.find(dict(filename=relative))
+ result, count = self.indexmanager.search(dict(filename=relative))
if not count:
# create a new record
self.create(dict(filename=relative), source)
@@ -449,30 +444,29 @@ class InplaceFileBackingStore(FileBackingStore):
if checksum != content.checksum:
self.update(uid, dict(filename=relative), source)
- #self.querymanager.index.flush()
# File Management API
def create(self, props, filelike):
# the file would have already been changed inplace
# don't touch it
- return self.querymanager.create(props, filelike)
+ return self.indexmanager.index(props, filelike)
def get(self, uid, env=None, allowMissing=False):
- content = self.querymanager.get(uid)
+ content = self.indexmanager.get(uid)
if not content: raise KeyError(uid)
return content.get_property('filename')
def update(self, uid, props, filelike=None):
# the file would have already been changed inplace
# don't touch it
- self.querymanager.update(uid, props, filelike)
+ self.indexmanager.index(uid, props, filelike)
- def delete(self, uid, allowMissing=True):
- c = self.querymanager.get(uid)
- path = c.get_property('filename')
- self.querymanager.delete(uid)
- if os.path.exists(path):
+ def delete(self, uid):
+ c = self.indexmanager.get(uid)
+ path = c.get_property('filename', None)
+ self.indexmanager.delete(uid)
+ if path and os.path.exists(path):
os.unlink(path)
diff --git a/src/olpc/datastore/converter.py b/src/olpc/datastore/converter.py
index 1250dbb..6f0ede6 100644
--- a/src/olpc/datastore/converter.py
+++ b/src/olpc/datastore/converter.py
@@ -95,11 +95,13 @@ class Converter(object):
# maps both extension -> plugin
# and mimetype -> plugin
self._converters = {}
+ self._default = None
self.logger = logging.getLogger('org.laptop.sugar.Indexer')
def registerConverter(self, ext_or_mime, plugin):
if plugin.verify():
self._converters[ext_or_mime] = plugin
+ if self._default is None: self._default = plugin
def __call__(self, filename, encoding=None, mimetype=None):
"""Convert filename's content to utf-8 encoded text."""
@@ -119,6 +121,8 @@ class Converter(object):
converter = self._converters.get(mt)
if not converter:
converter = self._converters.get(ext)
+ if not converter:
+ converter = self._default
if converter:
try:
return converter(filename)
diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py
index 142d801..da8ab74 100644
--- a/src/olpc/datastore/datastore.py
+++ b/src/olpc/datastore/datastore.py
@@ -18,8 +18,6 @@ import dbus.mainloop.glib
from olpc.datastore import utils
-from StringIO import StringIO
-
# the name used by the logger
DS_LOG_CHANNEL = 'org.laptop.sugar.DataStore'
@@ -68,14 +66,11 @@ class DataStore(dbus.service.Object):
# medium (maybe an SD card for example) and we'd want to keep
# that on the XO itself. In these cases their might be very
# little identifying information on the media itself.
-
uri = str(uri)
- _options = {}
- if options:
- for key, value in options.iteritems():
- _options[str(key)] = str(value)
-
+ _options = utils._convert(options)
+ if _options is None: _options = {}
+
mp = self.connect_backingstore(uri, **_options)
if not mp: return ''
if mp.id in self.mountpoints:
@@ -116,14 +111,28 @@ class DataStore(dbus.service.Object):
## sticks and so on. We provide a facility for tracking
## co-authors of content
## there are associated changes to 'find' to resolve buddies
- def addBuddy(self, id, name, fg_color, bg_color):
- pass
+ def addBuddy(self, id, name, fg_color, bg_color, mountpoint=None):
+ mp = None
+ if mountpoint is None: mp = self.root
+ else: mp = self.mountpoints.get(mountpoint)
+ if mp is None: raise ValueError("Invalid mountpoint")
+ mp.addBuddy(id, name, fg_color, bg_color)
+
+ def getBuddy(self, bid):
+ """Get a buddy by its id"""
+ b = None
+ for mp in self.mountpoints.itervalues():
+ b = mp.getBuddy(bid)
+ if b: break
+ return b
- def getBuddy(self, id):
- pass
def buddies(self):
- pass
+ buddies = set()
+ for mp in self.mountpoints.itervalues():
+ buddies = buddies.union(mp.getBuddies())
+ return buddies
+
## end buddy api
@@ -173,26 +182,15 @@ class DataStore(dbus.service.Object):
over this process can come at a later time.
"""
mp = self._resolveMountpoint(props)
- content = mp.create(props, filelike)
- self.Created(content.id)
- logging.debug("created %s" % content.id)
+ uid = mp.create(props, filelike)
+ self.Created(uid)
+ logging.debug("created %s" % uid)
- return content.id
+ return uid
@dbus.service.signal(DS_DBUS_INTERFACE, signature="s")
def Created(self, uid): pass
-
- @dbus.service.method(DS_DBUS_INTERFACE,
- in_signature='',
- out_signature='as')
- def all(self):
- # workaround for not having optional args or None in
- # DBus .. blah
- results = self.querymanager.find()
- return [r.id for r in results]
-
-
def _multiway_search(self, query):
mountpoints = query.pop('mountpoints', self.mountpoints)
mountpoints = [self.mountpoints[str(m)] for m in mountpoints]
@@ -306,9 +304,8 @@ class DataStore(dbus.service.Object):
d = []
for r in results:
props = {}
- for prop in r.get_properties():
- props[prop.key] = prop.marshall()
-
+ props.update(r.properties)
+
if 'uid' not in props:
props['uid'] = r.id
@@ -317,7 +314,7 @@ class DataStore(dbus.service.Object):
filename = ''
if include_files :
- try: filename = self.backingstore.get(r.id).filename
+ try: filename = r.filename
except KeyError: pass
props['filename'] = filename
d.append(props)
@@ -344,14 +341,6 @@ class DataStore(dbus.service.Object):
except AttributeError: pass
return ''
- def get_data(self, uid):
- content = self.get(uid)
- if content:
- return content.get_data()
-
- def put_data(self, uid, data):
- self.update(uid, None, StringIO(data))
-
#@utils.sanitize_dbus
@dbus.service.method(DS_DBUS_INTERFACE,
in_signature='sa{sv}',
@@ -360,7 +349,7 @@ class DataStore(dbus.service.Object):
content = self.get(uid)
dictionary = {}
if not query: query = {}
- for prop in content.get_properties(**query):
+ for prop in content.get_properties(query):
dictionary[prop.key] = prop.marshall()
return dictionary
diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py
index 8c8ab05..5c737ad 100644
--- a/src/olpc/datastore/model.py
+++ b/src/olpc/datastore/model.py
@@ -10,17 +10,11 @@ __docformat__ = 'restructuredtext'
__copyright__ = 'Copyright ObjectRealms, LLC, 2007'
__license__ = 'The GNU Public License V2+'
-from sqlalchemy import Table, Column, UniqueConstraint
-from sqlalchemy import String, Integer, Unicode
-from sqlalchemy import ForeignKey, Sequence, Index
-from sqlalchemy import mapper, relation
-from sqlalchemy import create_session
-from sqlalchemy import MapperExtension, EXT_PASS, clear_mappers
-
import datetime
import mimetypes
import os
import time
+import warnings
# XXX: Open issues
# list properties - Contributors (a, b, c)
@@ -28,51 +22,211 @@ import time
# content state - searches don't include content deletion flag
# - not recording if content is on other storage yet
-
-# we have a global thread local session factory
-context = {}
propertyTypes = {}
_marker = object()
-def get_session(backingstore):
- return context[backingstore]
+def registerPropertyType(kind, get, set, xapian_sort_type=None, defaults=None):
+ propertyTypes[kind] = PropertyImpl(get, set, xapian_sort_type, defaults)
-def registerPropertyType(kind, class_): propertyTypes[kind] = class_
def propertyByKind(kind): return propertyTypes[kind]
+class PropertyImpl(object):
+ __slots__ = ('_get', '_set', 'xapian_sort_type', 'defaults')
+
+ def __init__(self, get, set, xapian_sort_type=None, defaults=None):
+ self._get, self._set = get, set
+ self.xapian_sort_type = xapian_sort_type
+ self.defaults = defaults
+
+ def get(self, value): return self._get(value)
+ def set(self, value): return self._set(value)
+
+class Property(object):
+ """Light-weight property implementation.
+ Handles typed properties via a global registry of type->callbacks
+
+ >>> p = Property(key, value, 'string')
+ >>> b = Property(key, value, 'binary')
+ """
+ def __init__(self, key, value, kind=None):
+ self.key = key
+ self._value = value
+ self.kind = kind
+ if kind not in propertyTypes:
+ warnings.warn("Unknown property type: %s on key %s" % \
+ (kind, key), RuntimeWarning)
+ else: self._impl = propertyTypes[kind]
+
+ @classmethod
+ def fromstring(cls, key, value=''):
+ kind = 'string'
+ if ':' in key:
+ key, kind = key.split(':', 1)
+ # now resolve the kind to a property class
+ return cls(key, value, kind)
+
-class Content(object):
def __repr__(self):
- return "<Content id:%s>" % (self.id, )
+ return "<%s(%s) %s:%r>" % (self.__class__.__name__,
+ self.kind,
+ self.key, self.value)
- def get_property(self, key, default=_marker):
- # mapped to property keys
- session = get_session(self.backingstore)
- query = session.query(Property)
- p = query.get_by(content_id=self.id, key=key)
- if not p:
- if default is _marker: raise AttributeError(key)
- return default
- return p.value
-
- def get_properties(self, **kwargs):
- session = get_session(self.backingstore)
- query = session.query(Property)
- return query.select_by(content_id=self.id, **kwargs)
-
-
- # Backingstore dependent bindings
- def get_file(self):
- if not hasattr(self, "_file") or self._file.closed is True:
- self.backingstore.get(self.id)
- return self._file
+ def get_value(self): return self._impl.get(self._value)
+ def set_value(self, value): self._value = self._impl.set(value)
+ value = property(get_value, set_value)
+
+ def __str__(self): return str(self.value)
- def set_file(self, fileobj):
- self._file = fileobj
- file = property(get_file, set_file)
+def noop(value): return value
+
+# Xapian doesn't have real binary storage, rather these keys will get
+# indexed it its database. If the key size is too large the indexing
+# will fail
+# there are two solutions -- divert the storage to the backingstore
+# and retain a key reference to recover it (this is the correct
+# solution long term as it participates in versioning) and what I do
+# now which is to insert and remove spaces into the base64 stream
+# every fixed amount of characters
+import re
+base64hack = re.compile("(\S{212})")
+def base64enc(value): return ' '.join(base64hack.split(value.encode('base64')))
+def base64dec(value): return value.replace(' ', '').decode('base64')
+
+dateformat = "%Y-%m-%dT%H:%M:%S"
+def datedec(value, dateformat=dateformat):
+ ti = time.strptime(value, dateformat)
+ dt = datetime.datetime(*(ti[:-2]))
+ dt = dt.replace(microsecond=0)
+ return dt
+
+def dateenc(value, dateformat=dateformat):
+ if isinstance(value, basestring):
+ # XXX: there is an issue with microseconds not getting parsed
+ ti = time.strptime(value, dateformat)
+ value = datetime.datetime(*(ti[:-2]))
+ value = value.replace(microsecond=0)
+ # XXX: drop time for now, this is a xapian issue
+ value = value.date()
+ return value.isoformat()
+
+# syntactic sugar for the below
+def p(key, kind, **kwargs): return (key, kind, kwargs)
+
+# type, get, set, xapian sort type [string|float|date], defaults
+# defaults are the default options to addField in IndexManager
+# these can be overridden on model assignment
+registerPropertyType('string', noop, noop, 'string', {'store' : True,
+ 'exact' : True,
+ 'sortable' : True})
+
+registerPropertyType('text', noop, noop, 'string', {'store' : True,
+ 'exact' : False,
+ 'sortable' : False})
+
+registerPropertyType('binary', noop, noop, None, {'store' : True,
+ 'exact' : False,
+ 'sortable' : False})
+
+registerPropertyType('number', str, float, 'float', {'store' : True,
+ 'exact' : True,
+ 'sortable' : True})
+
+registerPropertyType('date', dateenc, datedec, 'date', {'store' : True,
+ 'exact' : True,
+ 'sortable' : True
+ })
+
+
+class Model(object):
+ """Object containing the field/property model used by the
+ system"""
+
+ def __init__(self):
+ self.fields = {}
+ self.fieldnames = []
+
+ def addField(self, key, kind, **kwargs):
+ """ Add a field to the model.
+ key -- field name
+ kind -- type by name (registered with registerPropertyType)
+ kwargs -- overrides and additional values to the default
+ arguments supplied by kind
+ """
+ if key in self.fields:
+ raise KeyError("""Another source tried to add %s field to
+ the model""" % key)
+
+ impl = propertyByKind(kind)
+ options = impl.defaults.copy()
+ if kwargs: options.update(kwargs)
+ if impl.xapian_sort_type:
+ if 'type' not in options:
+ options['type'] = impl.xapian_sort_type
+
+ self.fields[key] = (key, kind, options)
+ self.fieldnames.append(key)
+ return self
+
+ def addFields(self, *args):
+ """ List of arguments to addField """
+ for arg in args: self.addField(arg[0], arg[1], **arg[2])
+ return self
+
+ def apply(self, indexmanager):
+ addField = indexmanager.addField
+ for fn in self.fieldnames:
+ args = self.fields[fn]
+ addField(args[0], **args[2])
+
+
+defaultModel = Model().addFields(
+ p('text', 'text'),
+ # vid is version id
+ p('vid', store=True, exact=True, sortable=True, type="float"),
+ p('filename', store=True, exact=True),
+ # Title has additional weight
+ p('title', store=True, exact=False, weight=2, sortable=True),
+ p('url', store=True, exact=True, sortable=True),
+ p('mimetype', store=True, exact=True),
+ p('author', store=True, exact=True),
+ p('language', store=True, exact=True),
+ p('ctime', store=True, exact=True, sortable=True, type='date'),
+ p('mtime', store=True, exact=True, sortable=True, type='date'),
+ # this will just be a space delimited list of tags
+ # indexed with the content
+ # I give them high weight as they have user given semantic value.
+ p('tags', store=True, exact=False, weight=3, sortable=True),
+ )
+
+
+class Content(object):
+ """A light weight proxy around Xapian Documents from secore.
+ This provides additional methods which are used in the
+ backingstore to assist in storage
+ """
+ __slots__ = ('_doc', '_backingstore', '_file')
+
+ def __init__(self, xapdoc, backingstore=None):
+ self._doc = xapdoc
+ self._backingstore = backingstore
+ self._file = None
+
+ def get_property(self, key, default=_marker):
+ result = self._doc.data.get(key, default)
+ if result is _marker: raise KeyError(key)
+ if isinstance(result, list) and len(result) == 1:
+ return result[0]
+ return result
@property
- def filename(self): return self.file.name
+ def properties(self):
+ d = {}
+ for k, v in self.data.iteritems():
+ if isinstance(v, list) and len(v) == 1:
+ v = v[0]
+ d[k] = v
+ return d
+
def suggestName(self):
# we look for certain known property names
@@ -89,8 +243,7 @@ class Content(object):
f, e = os.path.splitext(filename)
if e: return filename, None
if ext: return "%s.%s" % (filename, ext), None
- elif ext:
- return None, ext
+ elif ext: return None, ext
else:
# try to get an extension from the mimetype if available
mt = self.get_property('mime_type', None)
@@ -99,279 +252,35 @@ class Content(object):
if ext: return None, ext
return None, None
- def get_data(self):
- f = self.file
- t = f.tell()
- data = f.read()
- f.seek(t)
- return data
-
- def set_data(self, filelike):
- self.backingstore.set(self.id, filelike)
-
- data = property(get_data, set_data)
-
-
-class BackingStoreContentMapping(MapperExtension):
- """This mapper extension populates Content objects with the
- binding to the backing store the files are kept on, this allow the
- file-like methods to work as expected on content
- """
- def __init__(self, backingstore):
- MapperExtension.__init__(self)
- self.backingstore = backingstore
-
- def populate_instance(self, mapper, selectcontext, row, instance, identitykey, isnew):
- """called right before the mapper, after creating an instance
- from a row, passes the row to its MapperProperty objects which
- are responsible for populating the object's attributes. If
- this method returns EXT_PASS, it is assumed that the mapper
- should do the appending, else if this method returns any other
- value or None, it is assumed that the append was handled by
- this method.
-
- """
- instance.backingstore = self.backingstore
- # allow normal population to happen
- return EXT_PASS
-
-
-class Property(object):
- """A typed key value pair associated with a content object.
- This is the objects metadata. The value side of the kv pair is
- typically encoded as a UTF-8 String. There are however cases where
- richer metadata is required by the application using the
- datastore.
- In these cases the type field is overridden to encode a reference
- to another object that must be used to satisfy this value. An
- example of this would be storing a PNG thumbnail as the a
- value. In a case such as that the value should be set to a path or
- key used to find the image on stable storage or in a database and
- the type field will be used to demarshall it through this object.
- """
- def __init__(self, key, value, type='string'):
- self.key = key
- self.value = value
- self.type = type
-
- def __repr__(self):
- return "<%s %s:%r>" % (self.__class__.__name__,
- self.key, self.value)
- def marshall(self):
- """Return the value marshalled as a string"""
- return str(self.value)
-
-class TextProperty(Property):
- """A text property is one that will also get full automatic text
- indexing when available. This is used for fields like title where
- searching in the text is more important than doing a direct match
- """
- def __init__(self, key, value, type='text'):
- Property.__init__(self, key, value, type)
-
- def get_value(self): return self._value
- def set_value(self, value): self._value = value
- value = property(get_value, set_value)
-
+ def get_file(self):
+ if not hasattr(self, "_file") or self._file.closed is True:
+ self.backingstore.get(self.id)
+ return self._file
-class DateProperty(Property):
- format = "%Y-%m-%dT%H:%M:%S"
-
- def __init__(self, key, value, type="date"):
- self._value = None
- Property.__init__(self, key, value, type)
-
- def get_value(self):
- # parse the value back into a datetime
- # XXX: strptime on datetime is a 2.5 thing :(
- # XXX: we lose timezone in this conversion currently
- if not self._value: return None
- ti = time.strptime(self._value, self.format)
- dt = datetime.datetime(*(ti[:-2]))
- dt = dt.replace(microsecond=0)
- return dt
-
- def set_value(self, value):
- if isinstance(value, basestring):
- # XXX: there is an issue with microseconds not getting parsed
- ti = time.strptime(value, self.format)
- value = datetime.datetime(*(ti[:-2]))
- value = value.replace(microsecond=0)
-
- self._value = value.isoformat()
+ def set_file(self, fileobj):
+ self._file = fileobj
+ file = property(get_file, set_file)
- value = property(get_value, set_value)
+ @property
+ def filename(self): return self.file.name
- def marshall(self): return self.value.isoformat()
-
+ @property
+ def contents(self): return self.file.read()
-class NumberProperty(Property):
- def __init__(self, key, value, type="number"):
- Property.__init__(self, key, value, type)
-
- def get_value(self): return float(self._value)
- def set_value(self, value): self._value = value
- value = property(get_value, set_value)
+ @property
+ def backingstore(self): return self._backingstore
+ @property
+ def id(self): return self._doc.id
-class BinaryProperty(Property):
- # base64 encode binary data
- def __init__(self, key, value, type="binary"):
- Property.__init__(self, key, value, type)
-
- def get_value(self): return self._value.decode('base64')
- def set_value(self, value): self._value = value.encode('base64')
- value = property(get_value, set_value)
-
-
-class Model(object):
- """ Manages the global state of the metadata model index. This is
- intended to only be consumed by an olpc.datastore.query.QueryManager
- instance for the management of its metadata.
-
- >>> m = Model()
- >>> m.prepare(querymanager)
-
- >>> m.content
- ... # Content Table
-
- >>> m['content']
- ... # content Mapper
-
- For details see the sqlalchemy documentation
-
- """
-
- def __init__(self):
- self.tables = {}
- self.mappers = {}
+ @property
+ def data(self): return self._doc.data
- def __getattr__(self, key): return self.tables[key]
- def __getitem__(self, key): return self.mappers[key]
-
-
- def prepare(self, querymanager):
- self.querymanager = querymanager
+## class Buddy(object):
+## """A co-author on content. Information is collected and managed
+## here"""
+## pass
- # a single session manages the exclusive access we keep to the
- # db.
- global context
- self.session = create_session(bind_to=self.querymanager.db)
- context[self.querymanager.backingstore] = self.session
-
- # content object
- content = Table('content',
- self.querymanager.metadata,
- Column('id', String, primary_key=True, nullable=False),
- Column('activity_id', Integer),
- Column('checksum', String,),
- UniqueConstraint('id', name='content_key')
- )
- Index('content_activity_id_idx', content.c.activity_id)
-
- # the properties of content objects
- properties = Table('properties',
- self.querymanager.metadata,
- Column('id', Integer, Sequence('property_id_seq'), primary_key=True),
- Column('content_id', Integer, ForeignKey('content.id')),
- Column('key', Unicode, ),
- Column('value', Unicode, ),
- Column('type', Unicode, ),
- # unique key to content mapping
- UniqueConstraint('content_id', 'key',
- name='property_content_key')
- )
-
- Index('property_key_idx', properties.c.key)
- Index('property_type_idx', properties.c.type)
-
- # storage
- storage = Table('storage',
- self.querymanager.metadata,
- Column('id', String, primary_key=True),
- Column('description', String, ),
- Column('uri', String, )
- )
-
- # storage -> * content
- # XXX: this could be a purely runtime in-memory construct
- # removing the storage table as well. Would depend in part on
- # the frequency of the garbage collection runs and the
- # frequency of connection to stable storage
- storage_content = Table('storage_content',
- self.querymanager.metadata,
- Column('storage_id', Integer, ForeignKey('storage.id')),
- Column('content_id', Integer, ForeignKey('content.id')),
- )
- Index('idx_storage_content_content_id', storage_content.c.content_id)
-
- # Object Mapping
- # the query manager provides a mapping extension for
- # Content <-> BackingStore binding
-
- # XXX gross and not what we want, we can only define mappers
- # once but we may have more than one datastore.
- # this can impact all sqla in the runtime though
- clear_mappers()
-
-
- content_mapper = mapper(Content, content,
- extension=self.querymanager.content_ext,
- properties = {
- 'properties' : relation(Property,
- cascade="all,delete-orphan",
- backref='content',
- lazy=True),
- },
-
- )
-
- # retain reference to these tables to use for queries
- self.tables['content'] = content
- self.tables['properties'] = properties
- self.tables['storage'] = storage
- self.tables['storage_content'] = storage_content
-
- # and the mappers (though most likely not needed)
- property_mapper = mapper(Property, properties, polymorphic_on=properties.c.type)
- self.mappers['properties'] = property_mapper
- self.mappers['content'] = content_mapper
-
- # default Property types are mapped to classes here
- self.addPropertyType(DateProperty, 'date')
- self.addPropertyType(NumberProperty, 'number')
- self.addPropertyType(TextProperty, 'text')
- self.addPropertyType(BinaryProperty, 'binary')
-
-
-
- def addPropertyType(self, PropertyClass, typename,
- map_value=True, **kwargs):
- """Register a new type of Property. PropertyClass should be a
- subclass of Property, typename is the textual
- name of the new Property type.
-
- The flag map_value indicates if Property.value should
- automatically be diverted to _value so that you can more
- easily manage the interfaces 'value' as a Python property
- (descriptor)
-
- Keyword args will be passed to the properties dictionary of
- the sqlalchemy mapper call. See sqlalchemy docs for additional
- details.
- """
- properties = {}
- properties.update(kwargs)
- if map_value is True:
- properties['_value'] = self.properties.c.value
- mapper(PropertyClass,
- inherits=self.mappers['properties'],
- polymorphic_identity=typename,
- properties=properties
- )
-
- registerPropertyType(typename, PropertyClass)
-
diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py
index 5772433..b02f4af 100644
--- a/src/olpc/datastore/xapianindex.py
+++ b/src/olpc/datastore/xapianindex.py
@@ -28,10 +28,26 @@ from olpc.datastore.utils import create_uid
# Setup Logger
logger = logging.getLogger('org.sugar.datastore.xapianindex')
+class ContentMappingIter(object):
+ """An iterator over a set of results from a search.
+
+ """
+ def __init__(self, results, backingstore):
+ self._results = results
+ self._backingstore = backingstore
+ self._iter = iter(results)
+
+ def __iter__(self): return self
+
+ def next(self):
+ searchresult = self._iter.next()
+ return model.Content(searchresult, self._backingstore)
-class IndexManager(object):
- def __init__(self, language='en'):
+class IndexManager(object):
+ DEFAULT_DATABASE_NAME = 'index'
+
+ def __init__(self, default_language='en'):
# We will maintain two connections to the database
# we trigger automatic flushes to the read_index
# after any write operation
@@ -39,30 +55,52 @@ class IndexManager(object):
self.read_index = None
self.queue = Queue(0)
self.indexer_running = False
- self.language = language
+ self.language = default_language
+ self.backingstore = None
+
self.fields = set()
#
# Initialization
- def connect(self, repo):
+ def connect(self, repo, **kwargs):
if self.write_index is not None:
- warnings.warn('''Requested redundant connect''', RuntimeWarning)
-
+ warnings.warn('''Requested redundant connect to index''',
+ RuntimeWarning)
+
+ self.repo = repo
self.write_index = secore.IndexerConnection(repo)
- self.setupFields()
+
+ # configure the database according to the model
+ datamodel = kwargs.get('model', model.defaultModel)
+ datamodel.apply(self)
+
+ # store a reference
+ self.datamodel = datamodel
self.read_index = secore.SearchConnection(repo)
-
+
+ self.flush()
+
# by default we start the indexer now
self.startIndexer()
+ def bind_to(self, backingstore):
+ # signal from backingstore that its our parent
+ self.backingstore = backingstore
+
+ # flow control
+ def flush(self):
+ """Called after any database mutation"""
+ self.write_index.flush()
+ self.read_index.reopen()
+
def stop(self):
self.stopIndexer()
self.write_index.close()
self.read_index.close()
-
+ # Index thread management
def startIndexer(self):
self.indexer_running = True
self.indexer = threading.Thread(target=self.indexThread,
@@ -76,33 +114,53 @@ class IndexManager(object):
self.indexer_running = False
self.indexer.join()
- def enque(self, uid, vid, doc):
- self.queue.put((uid, vid, doc))
+ def enque(self, uid, vid, doc, created):
+ self.queue.put((uid, vid, doc, created))
def indexThread(self):
# process the queue
+ # XXX: there is currently no way to remove items from the queue
+ # for example if a USB stick is added and quickly removed
+ # the mount should however get a stop() call which would
+ # request that the indexing finish
+ logger = logging.getLogger('org.sugar.datastore.xapianindex.indexThread')
while self.indexer_running:
# include timeout here to ease shutdown of the thread
# if this is a non-issue we can simply allow it to block
try:
- uid, vid, doc = self.queue.get(timeout=0.5)
- self.write_index.add(doc)
+ uid, vid, doc, created = self.queue.get(timeout=0.5)
+
+ if created: self.write_index.add(doc)
+ else: self.write_index.replace(doc)
+
+ # XXX: if there is still work in the queue we could
+ # delay the flush()
self.flush()
+
logger.info("Indexed Content %s:%s" % (uid, vid))
self.queue.task_done()
except Empty:
pass
-
+ except:
+ logger.exception("Error in index thread. Attempting recovery")
+ try: self.write_index.close()
+ except: pass
+ self.write_index = secore.IndexerConnection(self.repo)
+ self.read_index.reopen()
+
+
+
@property
def working(self):
"""Does the indexer have work"""
- return not self.queue.empty()
-
- def flush(self):
- """Called after any database mutation"""
- self.write_index.flush()
- self.read_index.reopen()
+ return self.indexer_running and not self.queue.empty()
+ def complete_indexing(self):
+ """Intentionally block until the indexing is complete. Used
+ primarily in testing.
+ """
+ self.queue.join()
+
#
# Field management
def addField(self, key, store=True, exact=False, sortable=False,
@@ -127,35 +185,29 @@ class IndexManager(object):
# track this to find missing field configurations
self.fields.add(key)
-
- def setupFields(self):
- # add standard fields
- # text is content objects information
- self.addField('text', store=False, exact=False)
-
- # vid is version id
- self.addField('vid', store=True, exact=True, sortable=True, type="float")
-
- # Title has additional weight
- self.addField('title', store=True, exact=False, weight=2, sortable=True)
- self.addField('mimetype', store=True, exact=True)
- self.addField('author', store=True, exact=True)
- self.addField('language', store=True, exact=True)
-
-
- self.addField('ctime', store=True, exact=True, sortable=True, type='date')
- self.addField('mtime', store=True, exact=True, sortable=True, type='date')
-
#
# Index Functions
+ def mapProperties(self, props):
+ """data normalization function, maps dicts of key:kind->value
+ to Property objects
+ """
+ d = {}
+ for k,v in props.iteritems():
+ p = model.Property.fromstring(k, v)
+ d[p.key] = p
+ return d
+
def index(self, props, filename=None):
"""Index the content of an object.
Props must contain the following:
key -> Property()
"""
+ props = self.mapProperties(props)
doc = secore.UnprocessedDocument()
add = doc.fields.append
+ fp = None
+ created = False
if filename:
mimetype = props.get("mimetype")
@@ -177,7 +229,10 @@ class IndexManager(object):
vid = props.pop('vid', None)
if uid: uid = uid.value
- else: uid = create_uid()
+ else:
+ uid = create_uid()
+ created = True
+
if vid: vid = vid.value
else: vid = "1.0"
@@ -187,19 +242,32 @@ class IndexManager(object):
#
# Property indexing
for k, prop in props.iteritems():
- if isinstance(prop, model.BinaryProperty): continue
value = prop.value
+
if k not in self.fields:
warnings.warn("""Missing field configuration for %s""" % k,
- RuntimeWarning)
+ RuntimeWarning)
continue
+
add(secore.Field(k, value))
-
+
# queue the document for processing
- self.enque(uid, vid, doc)
+ self.enque(uid, vid, doc, created)
return uid
+ def get(self, uid):
+ doc = self.read_index.get_document(uid)
+ if not doc: raise KeyError(uid)
+ return model.Content(doc, self.backingstore)
+
+ def delete(self, uid):
+ # does this need queuing?
+ # the higher level abstractions have to handle interaction
+ # with versioning policy and so on
+ self.write_index.delete(uid)
+ self.flush()
+
#
# Search
def search(self, query, start_index=0, end_index=50):
@@ -210,10 +278,10 @@ class IndexManager(object):
preceded by a "+" sign to indicate that the term is required, or a "-"
to indicate that is is required to be absent.
"""
- # this will return the [(id, relevance), ...], estimated
- # result count
ri = self.read_index
- if isinstance(query, dict):
+ if not query:
+ q = self.read_index.query_all()
+ elif isinstance(query, dict):
queries = []
# each term becomes part of the query join
for k, v in query.iteritems():
@@ -221,11 +289,40 @@ class IndexManager(object):
q = ri.query_composite(ri.OP_AND, queries)
else:
q = self.parse_query(query)
-
results = ri.search(q, start_index, end_index)
- return [r.id for r in results]
+ count = results.matches_estimated
+
+ # map the result set to model.Content items
+ return ContentMappingIter(results, self.backingstore), count
+
+
+ def get_uniquevaluesfor(self, property):
+ # XXX: this is very sketchy code
+ # try to get the searchconnection to support this directly
+ # this should only apply to EXACT fields
+ r = set()
+ prefix = self.read_index._field_mappings.get_prefix(property)
+ plen = len(prefix)
+ termiter = self.read_index._index.allterms(prefix)
+ for t in termiter:
+ term = t.term
+ if len(term) > plen:
+ term = term[plen:]
+ if term.startswith(':'): term = term[1:]
+ r.add(term)
+
+ # r holds the textual representation of the fields value set
+ # if the type of field or property needs conversion to a
+ # different python type this has to happen now
+ descriptor = self.datamodel.fields.get(property)
+ if descriptor:
+ kind = descriptor[1].get('type', 'string')
+ impl = model.propertyByKind(kind)
+ r = set([impl.get(i) for i in r])
+ return r
+
def parse_query(self, query):
# accept standard web query like syntax
# 'this' -- match this
diff --git a/tests/Makefile b/tests/Makefile
index 7961b02..c2581cb 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -2,10 +2,9 @@
# its not an option to configure
PYTHON=python
-all: test
+all: clean test
test:
- @rm -rf fulltext
@${PYTHON} runalltests.py
valgrind:
@@ -17,6 +16,7 @@ profile:
@${PYTHON} ./profilealltests.py
clean:
+ @${PYTHON} ./cleaner.py
@find . -name "*.pyc" -exec rm {} \;
@find . -name "*~" -exec rm {} \;
@find . -name "hotspot*" -exec rm {} \;
diff --git a/tests/milestone_1.txt b/tests/milestone_1.txt
index bde3720..2472260 100644
--- a/tests/milestone_1.txt
+++ b/tests/milestone_1.txt
@@ -12,6 +12,10 @@ datastore.
First, create and connect the store.
+>>> from testutils import waitforindex
+>>> import os
+>>> assert os.system('rm -rf /tmp/test_ds') == 0
+
>>> from olpc.datastore import DataStore
>>> from olpc.datastore import backingstore
@@ -35,11 +39,13 @@ Note that we retain no reference to the created documents.
Now we should be able to test the first requirement.
* Get the unique ids of all the objects in the store.
+>>> waitforindex(ds)
+
>>> results, count = ds.find()
A find command with out any parameters will return everything in the store.
-* Get an object from the store given his uid.
+* Get an object from the store given its uid.
Here we manually cycle through the results looking for the title we
want.
@@ -51,30 +57,24 @@ want.
* Get the object metadata.
>>> c1.properties
-[...]
+{...}
* Get the object file.
>>> c1.filename
'/tmp/...'
->>> c1.data
+>>> c1.contents
'this is the first document'
>>> c1.file
<open file ...>
-Or if you prefer access through the datastore (which is how DBus would
-use it)
-
->>> fn = ds.get_filename(first_uid)
->>> ds.get_data(first_uid)
-'this is the first document'
-
Now we can modify that file and then
* Push the changes made to the file back to the store.
* Update the metadata of an object.
+>>> fn = c1.filename
>>> fp = open(fn, 'a')
>>> print >>fp, "more content"
>>> fp.close()
@@ -89,4 +89,4 @@ This is the basis of milestone 1.
>>> ds.stop()
>>> del ds
-
+>>> assert os.system('rm -rf /tmp/test_ds') == 0
diff --git a/tests/mountpoints.txt b/tests/mountpoints.txt
index 9a821b5..1066da0 100644
--- a/tests/mountpoints.txt
+++ b/tests/mountpoints.txt
@@ -12,7 +12,7 @@ mounting a backingstore on the datastore.
>>> from olpc.datastore import DataStore
>>> from olpc.datastore import backingstore
->>> from testutils import tmpData
+>>> from testutils import tmpData, waitforindex
>>> import dbus
@@ -41,6 +41,7 @@ Now lets create some content
We can now, if we wish verify which mount point this content came
from.
+>>> waitforindex(ds)
>>> c1 = ds.get(u1)
>>> assert c1.backingstore.id == mountpoint
@@ -61,6 +62,8 @@ Now lets add another mount point.
Now lets create a new content item.
>>> u3 = ds.create(dict(title="Document 3", mountpoint=mp2), tmpData("""document three"""))
+>>> waitforindex(ds)
+
We explictly passed a mount point here. Lets examine the properties of
the object and verify this.
>>> c3 = ds.find(dict(title="Document 3"))[0][0]
@@ -102,6 +105,8 @@ Register the filesystem type
If that worked it should have imported content on load().
+>>> waitforindex(ds)
+
>>> result, count = ds.find(dict(fulltext="four"))
>>> assert count == 1
>>> assert result[0]['mountpoint'] == mp3
@@ -114,6 +119,8 @@ as DBus data.
>>> mp3 = ds.mount("inplace:/tmp/store3", dict(title=dbus.String("Fake USB again")))
+>>> waitforindex(ds)
+
>>> result, count = ds.find(dict(fulltext="four"))
>>> assert count == 1
>>> assert result[0]['mountpoint'] == mp3
diff --git a/tests/properties.txt b/tests/properties.txt
index 689414f..dd93b69 100644
--- a/tests/properties.txt
+++ b/tests/properties.txt
@@ -8,16 +8,23 @@ properties to content and managing them.
>>> from olpc.datastore import DataStore
->>> from olpc.datastore import backingstore
+>>> from olpc.datastore import backingstore, model
>>> from testutils import tmpData
>>> import dbus
Set up two mount points.
->>> ds = DataStore(sync_index=True)
+>>> ds = DataStore()
>>> ds.registerBackend(backingstore.FileBackingStore)
->>> mp1 = ds.mount("/tmp/store1", dict(title="Primary Storage"))
->>> mp2 = ds.mount("/tmp/store2", dict(title="Secondary Storage"))
+
+Extend the model to retain a 'year' property used below.
+
+>>> dm = model.defaultModel.addField('year', store=True, exact=True, sortable=True, type="float")
+
+Mount a couple of stores.
+
+>>> mp1 = ds.mount("/tmp/store1", {'title' : "Primary Storage", 'indexmanager.model' : dm})
+>>> mp2 = ds.mount("/tmp/store2", {'title' : "Secondary Storage", 'indexmanager.model' : dm})
Create some content on each.
diff --git a/tests/query.txt b/tests/query.txt
index 2c58851..1e7624e 100644
--- a/tests/query.txt
+++ b/tests/query.txt
@@ -47,7 +47,7 @@ This returned a list of all properties on the Content object in which
case we can find the property by enumeration. The other option is
using the get_properties call on Content
->>> a.get_properties(key='title')
+>>> a.get_properties(dict(key='title'))
[<TextProperty title:'New Content'>]
Using the query manager API we are able to update the
@@ -57,11 +57,11 @@ that this works lets attach another property.
>>> qm.update(a, dict(author='Benjamin'))
A request for title still returns only the title property.
->>> a.get_properties(key='title')
+>>> a.get_properties(dict(key='title'))
[<TextProperty title:'New Content'>]
And a request for author works as expected.
->>> a.get_properties(key='author')
+>>> a.get_properties(dict(key='author'))
[<Property author:'Benjamin'>]
>>> qm.update(a, dict(foo='bar'))
@@ -91,11 +91,9 @@ Here we want to show that certain types of Properties map to
specialized implemenations automatically based on their type. 'ctime'
is a DateTime Property and we can verify that it is returned properly
from the mapping layer with the following.
->>> ctimeProp = a.get_properties(key='ctime')[0]
->>> ctimeProp.type == "date"
+>>> ctimeProp = a.get_properties(dict(key='ctime'))[0]
+>>> ctimeProp.kind == "date"
True
->>> type(ctimeProp)
-<class 'olpc.datastore.model.DateProperty'>
Special support is needed to make dates easily addressable within the
datastore. The properties 'ctime', creation time, and 'mtime',
@@ -144,7 +142,7 @@ refers. This is available through the 'content' attrbiute of
properties. Only properties bound to content and synchronized with the
database have this property.
->>> p = a.get_properties(key='author')[0]
+>>> p = a.get_properties(dict(key='author'))[0]
>>> p.content
<Content id:...>
diff --git a/tests/runalltests.py b/tests/runalltests.py
index bbf0f97..28802ec 100644
--- a/tests/runalltests.py
+++ b/tests/runalltests.py
@@ -14,10 +14,9 @@ import unittest
import doctest
from pkg_resources import resource_filename
-from sqlalchemy import clear_mappers
doctests = [
- resource_filename(__name__, "query.txt"),
+ resource_filename(__name__, "xapianindex.txt"),
resource_filename(__name__, "milestone_1.txt"),
resource_filename(__name__, "sugar_demo_may17.txt"),
resource_filename(__name__, "milestone_2.txt"),
@@ -44,13 +43,14 @@ sys.path.insert(0, test_lib)
def tearDownDS(test):
- # reset the module global mappers used in SQLAlchemy between tests
- clear_mappers()
# and remove the test repository used in some tests
os.system('rm -rf /tmp/test_ds')
def test_suite():
suite = unittest.TestSuite()
+ if len(sys.argv) > 1:
+ doctests = sys.argv[1:]
+
for dt in doctests:
suite.addTest(doctest.DocFileSuite(dt,
optionflags=doctest_options, tearDown=tearDownDS))
@@ -68,5 +68,6 @@ def test_suite():
if __name__ == "__main__":
runner = unittest.TextTestRunner(verbosity=1)
- runner.run(test_suite())
+ suite = test_suite()
+ runner.run(suite)
diff --git a/tests/sugar_demo_may17.txt b/tests/sugar_demo_may17.txt
index c899799..f242140 100644
--- a/tests/sugar_demo_may17.txt
+++ b/tests/sugar_demo_may17.txt
@@ -2,6 +2,7 @@ How Sugar will interact with the DS for the May 17th demo in Argentina:
>>> from olpc.datastore import DataStore
>>> from olpc.datastore import backingstore
+>>> from testutils import waitforindex
>>> ds = DataStore()
>>> ds.registerBackend(backingstore.FileBackingStore)
>>> assert ds.mount("/tmp/test_ds")
@@ -9,11 +10,14 @@ How Sugar will interact with the DS for the May 17th demo in Argentina:
Create an entry without data:
>>> uid = ds.create(dict(title="New entry"), '')
+>>> waitforindex(ds)
+
>>> ds.get_filename(uid)
''
Update an entry without data:
>>> ds.update(uid, dict(title="New entry still without content"), '')
+>>> waitforindex(ds)
>>> ds.get_filename(uid)
''
@@ -23,6 +27,7 @@ Add some data to the same entry:
>>> print >>fp, "some content"
>>> fp.close()
>>> ds.update(uid, dict(title="Same entry now with some content"), fp.name)
+>>> waitforindex(ds)
Retrieve that data:
>>> fn = ds.get_filename(uid)
@@ -36,6 +41,7 @@ Update again:
>>> print >>fp, "some other content"
>>> fp.close()
>>> ds.update(uid, dict(title="Same entry with some other content"), fp.name)
+>>> waitforindex(ds)
And retrieve again:
>>> fn = ds.get_filename(uid)
@@ -60,6 +66,7 @@ Set content as pdf:
>>> ds.update(uid, dict(title="Same entry with some content in pdf"), 'test.pdf')
>>> ds.update(uid, dict(title="Same entry with some content in doc"), 'test.doc')
>>> ds.update(uid, dict(title="Same entry with some content in odt"), 'test.odt')
+>>> waitforindex(ds)
>>> ds.stop()
>>> del ds
diff --git a/tests/test_backingstore.py b/tests/test_backingstore.py
index 28fdeba..a13e28c 100644
--- a/tests/test_backingstore.py
+++ b/tests/test_backingstore.py
@@ -1,21 +1,21 @@
import unittest
-from StringIO import StringIO
+from testutils import tmpData, waitforindex
from olpc.datastore import backingstore
-from sqlalchemy import clear_mappers
import os
DEFAULT_STORE = '/tmp/_bs_test'
class Test(unittest.TestCase):
- def tearDown(self):
+ def setUp(self):
if os.path.exists(DEFAULT_STORE):
os.system("rm -rf %s" % DEFAULT_STORE)
- clear_mappers()
+ def tearDown(self):
+ if os.path.exists(DEFAULT_STORE):
+ os.system("rm -rf %s" % DEFAULT_STORE)
def test_fsstore(self):
- clear_mappers()
bs = backingstore.FileBackingStore(DEFAULT_STORE)
bs.initialize_and_load()
bs.create_descriptor()
@@ -28,20 +28,27 @@ class Test(unittest.TestCase):
d = """This is a test"""
d2 = "Different"
- c = bs.create(dict(title="A"), StringIO(d))
- obj = bs.get(c.id)
+ uid = bs.create(dict(title="A"), tmpData(d))
+
+ waitforindex(bs)
+
+ obj = bs.get(uid)
+
assert obj.get_property('title') == "A"
got = obj.file.read()
assert got == d
- bs.update(c.id, dict(title="B"), StringIO(d2))
- obj = bs.get(c.id)
+ bs.update(uid, dict(title="B"), tmpData(d2))
+
+ waitforindex(bs)
+
+ obj = bs.get(uid)
assert obj.get_property('title') == "B"
got = obj.file.read()
assert got == d2
- bs.delete(c.id)
- self.failUnlessRaises(KeyError, bs.get, c.id)
+ bs.delete(uid)
+ self.failUnlessRaises(KeyError, bs.get, uid)
def test_suite():
suite = unittest.TestSuite()
diff --git a/tests/test_model.py b/tests/test_model.py
index 6e8c896..d7aea45 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -1,35 +1,56 @@
import unittest
-from testutils import tmpData
+from testutils import tmpData, waitforindex
from olpc.datastore import DataStore
from olpc.datastore import model, backingstore
import datetime
import os
+
+DEFAULT_STORE = '/tmp/test_ds'
+
class Test(unittest.TestCase):
+ def setUp(self): os.system('rm -rf %s' % DEFAULT_STORE)
+ def tearDown(self): os.system('rm -rf %s' % DEFAULT_STORE)
+
def test_dateproperty(self):
n = datetime.datetime.now()
# we have to kill the microseconds as
# time.strptime which we must use in 2.4 doesn't parse it
n = n.replace(microsecond=0)
- p = model.DateProperty('ctime', n)
+ p = model.Property('ctime', n, 'date')
assert p.key == "ctime"
- assert p.value.isoformat() == n.isoformat()
+ # XXX: the 'date()' is a work around for a missing secore
+ # feature right now
+ assert p.value == n.date().isoformat()
+
def test_binaryproperty(self):
ds = DataStore()
ds.registerBackend(backingstore.FileBackingStore)
- ds.mount('/tmp/test_ds')
+
+ #add a custom field to the model
+ dm = model.defaultModel.addField('thumbnail',
+ store=True,
+ exact=False,
+ sortable=False)
+ ds.mount(DEFAULT_STORE, {'indexmanager.model' : dm})
+
+
data = open('test.jpg', 'r').read()
# binary data with \0's in it can cause dbus errors here
- uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data},
- tmpData("with image\0\0 prop"))
+ fn = tmpData("with image\0\0 prop")
+ uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data}, fn)
+
+ waitforindex(ds)
+
c = ds.get(uid)
assert c.get_property('thumbnail') == data
+
ds.stop()
- os.system('rm -rf /tmp/test_ds')
+
def test_suite():
suite = unittest.TestSuite()
diff --git a/tests/testutils.py b/tests/testutils.py
index 243747a..48d1060 100644
--- a/tests/testutils.py
+++ b/tests/testutils.py
@@ -1,5 +1,9 @@
import tempfile
import os
+import time
+
+from olpc.datastore.xapianindex import IndexManager
+from olpc.datastore.datastore import DataStore
def tmpData(data):
"""Put data into a temporary file returning the filename """
@@ -7,3 +11,17 @@ def tmpData(data):
os.write(fd, data)
os.close(fd)
return fn
+
+def waitforindex(obj, interval=0.1):
+ # wait for any/all index managers associated with object to finish
+ # indexing so that tests can do there thing
+ if isinstance(obj, IndexManager):
+ obj.complete_indexing()
+ elif isinstance(obj, DataStore):
+ for mp in obj.mountpoints.values():
+ im = mp.indexmanager
+ im.complete_indexing()
+ else:
+ # backingstore
+ obj.indexmanager.complete_indexing()
+
diff --git a/tests/xapianindex.txt b/tests/xapianindex.txt
index de495a6..5ef1d5c 100644
--- a/tests/xapianindex.txt
+++ b/tests/xapianindex.txt
@@ -16,20 +16,11 @@ First clean up any old test data.
>>> im = IndexManager()
>>> im.connect(index_home)
-A small utility method for wrapping a normal dict into proper property
-objects.
-
->>> def propsdict(**kwargs):
-... d = {}
-... for k,v in kwargs.iteritems():
-... d[k] = model.Property(k, v)
-... return d
-
Now add the file to the index.
->>> props = propsdict(title="PDF Document",
-... mimetype="application/pdf")
+>>> props = dict(title="PDF Document",
+... mimetype="application/pdf")
>>> uid = im.index(props, "test.pdf")
@@ -41,36 +32,42 @@ left, when it has none we expect our content to be indexed and searchable.
Searching on an property of the content works.
->>> assert im.search("PDF")[0] == uid
+>>> def expect(r, count=None):
+... if count: assert r[1] == count
+... return list(r[0])
+>>> def expect_single(r):
+... assert r[1] == 1
+... return r[0].next()
+>>> def expect_none(r):
+... assert r[1] == 0
+... assert list(r[0]) == []
+
+
+>>> assert expect_single(im.search("PDF")).id == uid
Searching into the binary content of the object works as well.
->>> assert im.search("peek")[0] == uid
+>>> assert expect_single(im.search("peek")).id == uid
Specifying a search that demands a document term be found only in the
title works as well.
->>> assert im.search('title:PDF')[0] == uid
->>> im.search('title:peek')
-[]
+>>> assert expect_single(im.search('title:PDF')).id == uid
+>>> expect_none(im.search('title:peek'))
Searching for documents that are PDF works as expected here. Here we
use the dictionary form of the query where each field name is given
and creates a search.
->>> assert im.search(dict(mimetype='application/pdf'))[0] == uid
-
-
-#Likewise excluding the match works as expected
-#>>> im.search('-title:PDF')
-#[]
-
+>>> assert expect_single(im.search(dict(mimetype='application/pdf'))).id == uid
Punctuation is fine.
->>> assert im.search("Don't peek")[0] == uid
+>>> assert expect_single(im.search("Don't peek")).id == uid
As well as quoted strings
->>> assert im.search(r'''"Don't peek"''')[0] == uid
+>>> assert expect_single(im.search(r'''"Don't peek"''')).id == uid
Cleanly shut down.
>>> im.stop()
+
+>>> assert os.system('rm -rf %s' % index_home) == 0