diff options
author | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-16 00:20:09 (GMT) |
---|---|---|
committer | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-16 00:20:09 (GMT) |
commit | fd0d4d345cab7fb3a011ae1c80624cca1f07545a (patch) | |
tree | dd8fc1467e82b2fa33b7cba54a91b53e2ad052f7 | |
parent | 73245ec13b84d3ff7f6750f325e012087009e6ae (diff) |
changes for :date support
see test_model for an example:
uid = ds.create({'title' : "Document 1", 'thumbnail:binary' :
data, 'ctime:date' : datetime.datetime.now().isoformat()}, filename)
-rw-r--r-- | src/olpc/datastore/model.py | 71 | ||||
-rw-r--r-- | src/olpc/datastore/utils.py | 44 | ||||
-rw-r--r-- | src/olpc/datastore/xapianindex.py | 11 | ||||
-rw-r--r-- | tests/runalltests.py | 2 | ||||
-rw-r--r-- | tests/test_model.py | 12 |
5 files changed, 115 insertions, 25 deletions
diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py index baa162e..011c3f4 100644 --- a/src/olpc/datastore/model.py +++ b/src/olpc/datastore/model.py @@ -15,6 +15,8 @@ import mimetypes import os import time import warnings +from olpc.datastore.utils import timeparse + # XXX: Open issues # list properties - Contributors (a, b, c) @@ -25,21 +27,30 @@ import warnings propertyTypes = {} _marker = object() -def registerPropertyType(kind, get, set, xapian_sort_type=None, defaults=None): - propertyTypes[kind] = PropertyImpl(get, set, xapian_sort_type, defaults) +def registerPropertyType(kind, get, set, xapian_sort_type=None, + defaults=None, for_xapian=None, from_xapain=None): + propertyTypes[kind] = PropertyImpl(get, set, xapian_sort_type, + defaults, for_xapian=for_xapian, from_xapain=from_xapain) def propertyByKind(kind): return propertyTypes[kind] class PropertyImpl(object): - __slots__ = ('_get', '_set', 'xapian_sort_type', 'defaults') + __slots__ = ('_get', '_set', 'xapian_sort_type', 'defaults', '_for_xapian', '_from_xapian') - def __init__(self, get, set, xapian_sort_type=None, defaults=None): + def __init__(self, get, set, xapian_sort_type=None, defaults=None, + for_xapian=None, from_xapain=None): self._get, self._set = get, set self.xapian_sort_type = xapian_sort_type self.defaults = defaults + if not for_xapian: for_xapian = self._get + self._for_xapian = for_xapian + if not from_xapain: from_xapain = self._set + self._from_xapian = from_xapain def get(self, value): return self._get(value) def set(self, value): return self._set(value) + def for_xapian(self, value): return self._for_xapian(value) + def from_xapian(self, value): return self._from_xapian(value) class Property(object): """Light-weight property implementation. @@ -75,6 +86,10 @@ class Property(object): def set_value(self, value): self._value = self._impl.set(value) value = property(get_value, set_value) + @property + def for_xapian(self): return self._impl.for_xapian(self._value) + + def __str__(self): return str(self.value) class Model(object): @@ -128,12 +143,13 @@ class Content(object): This provides additional methods which are used in the backingstore to assist in storage """ - __slots__ = ('_doc', '_backingstore', '_file') + __slots__ = ('_doc', '_backingstore', '_file', '_model') - def __init__(self, xapdoc, backingstore=None): + def __init__(self, xapdoc, backingstore=None, model=None): self._doc = xapdoc self._backingstore = backingstore self._file = None + self._model = model def __repr__(self): return "<%s %s>" %(self.__class__.__name__, @@ -143,8 +159,11 @@ class Content(object): result = self._doc.data.get(key, default) if result is _marker: raise KeyError(key) if isinstance(result, list) and len(result) == 1: - return result[0] - return result + result = result[0] + field = self._model.fields.get(key) + kind = propertyByKind(field[1]) + return kind.from_xapian(result) + @property def properties(self): @@ -152,6 +171,9 @@ class Content(object): for k, v in self.data.iteritems(): if isinstance(v, list) and len(v) == 1: v = v[0] + field = self._model.fields.get(k) + kind = propertyByKind(field[1]) + v = kind.from_xapian(v) d[k] = v return d @@ -219,14 +241,27 @@ base64hack = re.compile("(\S{212})") def base64enc(value): return ' '.join(base64hack.split(value.encode('base64'))) def base64dec(value): return value.replace(' ', '').decode('base64') -dateformat = "%Y-%m-%dT%H:%M:%S" -def datedec(value, dateformat=dateformat): +DATEFORMAT = "%Y-%m-%dT%H:%M:%S" +def date2string(value): return value.replace(microsecond=0).isoformat() +def string2date(value): return timeparse(value, DATEFORMAT) + +def encode_datetime(value): + # encode datetime to timestamp (float) + # parse the typelib form to a datetime first + if isinstance(value, basestring): value = string2date(value) + return str(time.mktime(value.timetuple())) + +def decode_datetime(value): + # convert a float to a local datetime + return datetime.datetime.fromtimestamp(float(value)).isoformat() + +def datedec(value, dateformat=DATEFORMAT): ti = time.strptime(value, dateformat) dt = datetime.datetime(*(ti[:-2])) dt = dt.replace(microsecond=0) return dt -def dateenc(value, dateformat=dateformat): +def dateenc(value, dateformat=DATEFORMAT): if isinstance(value, basestring): # XXX: there is an issue with microseconds not getting parsed ti = time.strptime(value, dateformat) @@ -236,6 +271,8 @@ def dateenc(value, dateformat=dateformat): value = value.date() return value.isoformat() + + # type, get, set, xapian sort type [string|float|date], defaults # defaults are the default options to addField in IndexManager # these can be overridden on model assignment @@ -259,10 +296,12 @@ registerPropertyType('number', str, float, 'float', {'store' : True, 'exact' : True, 'sortable' : True}) -registerPropertyType('date', dateenc, datedec, 'date', {'store' : True, - 'exact' : True, - 'sortable' : True - }) +registerPropertyType('date', dateenc, datedec, 'float', {'store' : True, + 'exact' : True, + 'sortable' : True + }, + for_xapian=encode_datetime, + from_xapain=decode_datetime) @@ -272,6 +311,8 @@ defaultModel = Model().addFields( ('vid', 'number'), ('checksum', 'string'), ('filename', 'string'), + ('ext', 'string'), # its possible we don't store a filename, but + # only an extension we are interested in # Title has additional weight ('title', 'text', {'weight' : 2 }), ('url', 'string'), diff --git a/src/olpc/datastore/utils.py b/src/olpc/datastore/utils.py index b2afae0..5000cfb 100644 --- a/src/olpc/datastore/utils.py +++ b/src/olpc/datastore/utils.py @@ -1,4 +1,8 @@ +import datetime import dbus +import re +import time + class Singleton(type): """A singleton metaclass @@ -105,3 +109,43 @@ def sanitize_dbus(method): kw = _convert(kwargs) return method(self, *n, **kw) return decorator + +def timeparse(t, format): + """Parse a time string that might contain fractions of a second. + + Fractional seconds are supported using a fragile, miserable hack. + Given a time string like '02:03:04.234234' and a format string of + '%H:%M:%S', time.strptime() will raise a ValueError with this + message: 'unconverted data remains: .234234'. If %S is in the + format string and the ValueError matches as above, a datetime + object will be created from the part that matches and the + microseconds in the time string. + """ + try: + return datetime.datetime(*time.strptime(t, format)[0:6]).time() + except ValueError, msg: + if "%S" in format: + msg = str(msg) + mat = re.match(r"unconverted data remains:" + " \.([0-9]{1,6})$", msg) + if mat is not None: + # fractional seconds are present - this is the style + # used by datetime's isoformat() method + frac = "." + mat.group(1) + t = t[:-len(frac)] + t = datetime.datetime(*time.strptime(t, format)[0:6]) + microsecond = int(float(frac)*1e6) + return t.replace(microsecond=microsecond) + else: + mat = re.match(r"unconverted data remains:" + " \,([0-9]{3,3})$", msg) + if mat is not None: + # fractional seconds are present - this is the style + # used by the logging module + frac = "." + mat.group(1) + t = t[:-len(frac)] + t = datetime.datetime(*time.strptime(t, format)[0:6]) + microsecond = int(float(frac)*1e6) + return t.replace(microsecond=microsecond) + + raise diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py index 9e7ace9..ac1fa82 100644 --- a/src/olpc/datastore/xapianindex.py +++ b/src/olpc/datastore/xapianindex.py @@ -40,16 +40,17 @@ class ContentMappingIter(object): """An iterator over a set of results from a search. """ - def __init__(self, results, backingstore): + def __init__(self, results, backingstore, model): self._results = results self._backingstore = backingstore self._iter = iter(results) + self._model = model def __iter__(self): return self def next(self): searchresult = self._iter.next() - return model.Content(searchresult, self._backingstore) + return model.Content(searchresult, self._backingstore, self._model) class IndexManager(object): @@ -280,7 +281,7 @@ class IndexManager(object): # # Property indexing for k, prop in props.iteritems(): - value = prop.value + value = prop.for_xapian if k not in self.fields: warnings.warn("""Missing field configuration for %s""" % k, @@ -297,7 +298,7 @@ class IndexManager(object): def get(self, uid): doc = self.read_index.get_document(uid) if not doc: raise KeyError(uid) - return model.Content(doc, self.backingstore) + return model.Content(doc, self.backingstore, self.datamodel) def delete(self, uid): # does this need queuing? @@ -331,7 +332,7 @@ class IndexManager(object): count = results.matches_estimated # map the result set to model.Content items - return ContentMappingIter(results, self.backingstore), count + return ContentMappingIter(results, self.backingstore, self.datamodel), count def get_uniquevaluesfor(self, property): diff --git a/tests/runalltests.py b/tests/runalltests.py index e3757f2..896972f 100644 --- a/tests/runalltests.py +++ b/tests/runalltests.py @@ -15,7 +15,7 @@ import doctest from pkg_resources import resource_filename import logging -logging.basicConfig(level=logging.DEBUG, +logging.basicConfig(level=logging.WARN, format="%(asctime)-15s %(name)s %(levelname)s: %(message)s", stream=sys.stderr) diff --git a/tests/test_model.py b/tests/test_model.py index 2ac2fb2..87ed94f 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -18,7 +18,7 @@ class Test(unittest.TestCase): # we have to kill the microseconds as # time.strptime which we must use in 2.4 doesn't parse it n = n.replace(microsecond=0) - p = model.Property('ctime', n, 'date') + p = model.Property('ctime', n.isoformat(), 'date') assert p.key == "ctime" # XXX: the 'date()' is a work around for a missing secore # feature right now @@ -34,19 +34,23 @@ class Test(unittest.TestCase): ds.mount(DEFAULT_STORE, {'indexmanager.model' : dm}) - + n = datetime.datetime.now() data = open('test.jpg', 'r').read() # binary data with \0's in it can cause dbus errors here fn = tmpData("with image\0\0 prop") # XXX: We should be able to remove:binary now - uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : data}, fn) + uid = ds.create({'title' : "Document 1", 'thumbnail:binary' : + data, 'ctime:date' : n.isoformat()}, fn) waitforindex(ds) c = ds.get(uid) assert c.get_property('thumbnail') == data - + # I don't care about the microsecond issue now, the typelib + # patch later can fix that + assert c.get_property('ctime')[:19] == n.isoformat()[:19] + ds.stop() |