diff options
author | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-12 21:17:48 (GMT) |
---|---|---|
committer | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-12 21:17:48 (GMT) |
commit | 7aae48766ae46bd530a3c556cd4e92a0e02f7ad3 (patch) | |
tree | 62e37ba449d5d0f628af9c0f7b1401828f2a154d /src/olpc/datastore/model.py | |
parent | f577c2c142c7648a482e0eec7ecd736c1ca716d7 (diff) |
check point before prop kind/type merge
Diffstat (limited to 'src/olpc/datastore/model.py')
-rw-r--r-- | src/olpc/datastore/model.py | 523 |
1 files changed, 216 insertions, 307 deletions
diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py index 8c8ab05..5c737ad 100644 --- a/src/olpc/datastore/model.py +++ b/src/olpc/datastore/model.py @@ -10,17 +10,11 @@ __docformat__ = 'restructuredtext' __copyright__ = 'Copyright ObjectRealms, LLC, 2007' __license__ = 'The GNU Public License V2+' -from sqlalchemy import Table, Column, UniqueConstraint -from sqlalchemy import String, Integer, Unicode -from sqlalchemy import ForeignKey, Sequence, Index -from sqlalchemy import mapper, relation -from sqlalchemy import create_session -from sqlalchemy import MapperExtension, EXT_PASS, clear_mappers - import datetime import mimetypes import os import time +import warnings # XXX: Open issues # list properties - Contributors (a, b, c) @@ -28,51 +22,211 @@ import time # content state - searches don't include content deletion flag # - not recording if content is on other storage yet - -# we have a global thread local session factory -context = {} propertyTypes = {} _marker = object() -def get_session(backingstore): - return context[backingstore] +def registerPropertyType(kind, get, set, xapian_sort_type=None, defaults=None): + propertyTypes[kind] = PropertyImpl(get, set, xapian_sort_type, defaults) -def registerPropertyType(kind, class_): propertyTypes[kind] = class_ def propertyByKind(kind): return propertyTypes[kind] +class PropertyImpl(object): + __slots__ = ('_get', '_set', 'xapian_sort_type', 'defaults') + + def __init__(self, get, set, xapian_sort_type=None, defaults=None): + self._get, self._set = get, set + self.xapian_sort_type = xapian_sort_type + self.defaults = defaults + + def get(self, value): return self._get(value) + def set(self, value): return self._set(value) + +class Property(object): + """Light-weight property implementation. + Handles typed properties via a global registry of type->callbacks + + >>> p = Property(key, value, 'string') + >>> b = Property(key, value, 'binary') + """ + def __init__(self, key, value, kind=None): + self.key = key + self._value = value + self.kind = kind + if kind not in propertyTypes: + warnings.warn("Unknown property type: %s on key %s" % \ + (kind, key), RuntimeWarning) + else: self._impl = propertyTypes[kind] + + @classmethod + def fromstring(cls, key, value=''): + kind = 'string' + if ':' in key: + key, kind = key.split(':', 1) + # now resolve the kind to a property class + return cls(key, value, kind) + -class Content(object): def __repr__(self): - return "<Content id:%s>" % (self.id, ) + return "<%s(%s) %s:%r>" % (self.__class__.__name__, + self.kind, + self.key, self.value) - def get_property(self, key, default=_marker): - # mapped to property keys - session = get_session(self.backingstore) - query = session.query(Property) - p = query.get_by(content_id=self.id, key=key) - if not p: - if default is _marker: raise AttributeError(key) - return default - return p.value - - def get_properties(self, **kwargs): - session = get_session(self.backingstore) - query = session.query(Property) - return query.select_by(content_id=self.id, **kwargs) - - - # Backingstore dependent bindings - def get_file(self): - if not hasattr(self, "_file") or self._file.closed is True: - self.backingstore.get(self.id) - return self._file + def get_value(self): return self._impl.get(self._value) + def set_value(self, value): self._value = self._impl.set(value) + value = property(get_value, set_value) + + def __str__(self): return str(self.value) - def set_file(self, fileobj): - self._file = fileobj - file = property(get_file, set_file) +def noop(value): return value + +# Xapian doesn't have real binary storage, rather these keys will get +# indexed it its database. If the key size is too large the indexing +# will fail +# there are two solutions -- divert the storage to the backingstore +# and retain a key reference to recover it (this is the correct +# solution long term as it participates in versioning) and what I do +# now which is to insert and remove spaces into the base64 stream +# every fixed amount of characters +import re +base64hack = re.compile("(\S{212})") +def base64enc(value): return ' '.join(base64hack.split(value.encode('base64'))) +def base64dec(value): return value.replace(' ', '').decode('base64') + +dateformat = "%Y-%m-%dT%H:%M:%S" +def datedec(value, dateformat=dateformat): + ti = time.strptime(value, dateformat) + dt = datetime.datetime(*(ti[:-2])) + dt = dt.replace(microsecond=0) + return dt + +def dateenc(value, dateformat=dateformat): + if isinstance(value, basestring): + # XXX: there is an issue with microseconds not getting parsed + ti = time.strptime(value, dateformat) + value = datetime.datetime(*(ti[:-2])) + value = value.replace(microsecond=0) + # XXX: drop time for now, this is a xapian issue + value = value.date() + return value.isoformat() + +# syntactic sugar for the below +def p(key, kind, **kwargs): return (key, kind, kwargs) + +# type, get, set, xapian sort type [string|float|date], defaults +# defaults are the default options to addField in IndexManager +# these can be overridden on model assignment +registerPropertyType('string', noop, noop, 'string', {'store' : True, + 'exact' : True, + 'sortable' : True}) + +registerPropertyType('text', noop, noop, 'string', {'store' : True, + 'exact' : False, + 'sortable' : False}) + +registerPropertyType('binary', noop, noop, None, {'store' : True, + 'exact' : False, + 'sortable' : False}) + +registerPropertyType('number', str, float, 'float', {'store' : True, + 'exact' : True, + 'sortable' : True}) + +registerPropertyType('date', dateenc, datedec, 'date', {'store' : True, + 'exact' : True, + 'sortable' : True + }) + + +class Model(object): + """Object containing the field/property model used by the + system""" + + def __init__(self): + self.fields = {} + self.fieldnames = [] + + def addField(self, key, kind, **kwargs): + """ Add a field to the model. + key -- field name + kind -- type by name (registered with registerPropertyType) + kwargs -- overrides and additional values to the default + arguments supplied by kind + """ + if key in self.fields: + raise KeyError("""Another source tried to add %s field to + the model""" % key) + + impl = propertyByKind(kind) + options = impl.defaults.copy() + if kwargs: options.update(kwargs) + if impl.xapian_sort_type: + if 'type' not in options: + options['type'] = impl.xapian_sort_type + + self.fields[key] = (key, kind, options) + self.fieldnames.append(key) + return self + + def addFields(self, *args): + """ List of arguments to addField """ + for arg in args: self.addField(arg[0], arg[1], **arg[2]) + return self + + def apply(self, indexmanager): + addField = indexmanager.addField + for fn in self.fieldnames: + args = self.fields[fn] + addField(args[0], **args[2]) + + +defaultModel = Model().addFields( + p('text', 'text'), + # vid is version id + p('vid', store=True, exact=True, sortable=True, type="float"), + p('filename', store=True, exact=True), + # Title has additional weight + p('title', store=True, exact=False, weight=2, sortable=True), + p('url', store=True, exact=True, sortable=True), + p('mimetype', store=True, exact=True), + p('author', store=True, exact=True), + p('language', store=True, exact=True), + p('ctime', store=True, exact=True, sortable=True, type='date'), + p('mtime', store=True, exact=True, sortable=True, type='date'), + # this will just be a space delimited list of tags + # indexed with the content + # I give them high weight as they have user given semantic value. + p('tags', store=True, exact=False, weight=3, sortable=True), + ) + + +class Content(object): + """A light weight proxy around Xapian Documents from secore. + This provides additional methods which are used in the + backingstore to assist in storage + """ + __slots__ = ('_doc', '_backingstore', '_file') + + def __init__(self, xapdoc, backingstore=None): + self._doc = xapdoc + self._backingstore = backingstore + self._file = None + + def get_property(self, key, default=_marker): + result = self._doc.data.get(key, default) + if result is _marker: raise KeyError(key) + if isinstance(result, list) and len(result) == 1: + return result[0] + return result @property - def filename(self): return self.file.name + def properties(self): + d = {} + for k, v in self.data.iteritems(): + if isinstance(v, list) and len(v) == 1: + v = v[0] + d[k] = v + return d + def suggestName(self): # we look for certain known property names @@ -89,8 +243,7 @@ class Content(object): f, e = os.path.splitext(filename) if e: return filename, None if ext: return "%s.%s" % (filename, ext), None - elif ext: - return None, ext + elif ext: return None, ext else: # try to get an extension from the mimetype if available mt = self.get_property('mime_type', None) @@ -99,279 +252,35 @@ class Content(object): if ext: return None, ext return None, None - def get_data(self): - f = self.file - t = f.tell() - data = f.read() - f.seek(t) - return data - - def set_data(self, filelike): - self.backingstore.set(self.id, filelike) - - data = property(get_data, set_data) - - -class BackingStoreContentMapping(MapperExtension): - """This mapper extension populates Content objects with the - binding to the backing store the files are kept on, this allow the - file-like methods to work as expected on content - """ - def __init__(self, backingstore): - MapperExtension.__init__(self) - self.backingstore = backingstore - - def populate_instance(self, mapper, selectcontext, row, instance, identitykey, isnew): - """called right before the mapper, after creating an instance - from a row, passes the row to its MapperProperty objects which - are responsible for populating the object's attributes. If - this method returns EXT_PASS, it is assumed that the mapper - should do the appending, else if this method returns any other - value or None, it is assumed that the append was handled by - this method. - - """ - instance.backingstore = self.backingstore - # allow normal population to happen - return EXT_PASS - - -class Property(object): - """A typed key value pair associated with a content object. - This is the objects metadata. The value side of the kv pair is - typically encoded as a UTF-8 String. There are however cases where - richer metadata is required by the application using the - datastore. - In these cases the type field is overridden to encode a reference - to another object that must be used to satisfy this value. An - example of this would be storing a PNG thumbnail as the a - value. In a case such as that the value should be set to a path or - key used to find the image on stable storage or in a database and - the type field will be used to demarshall it through this object. - """ - def __init__(self, key, value, type='string'): - self.key = key - self.value = value - self.type = type - - def __repr__(self): - return "<%s %s:%r>" % (self.__class__.__name__, - self.key, self.value) - def marshall(self): - """Return the value marshalled as a string""" - return str(self.value) - -class TextProperty(Property): - """A text property is one that will also get full automatic text - indexing when available. This is used for fields like title where - searching in the text is more important than doing a direct match - """ - def __init__(self, key, value, type='text'): - Property.__init__(self, key, value, type) - - def get_value(self): return self._value - def set_value(self, value): self._value = value - value = property(get_value, set_value) - + def get_file(self): + if not hasattr(self, "_file") or self._file.closed is True: + self.backingstore.get(self.id) + return self._file -class DateProperty(Property): - format = "%Y-%m-%dT%H:%M:%S" - - def __init__(self, key, value, type="date"): - self._value = None - Property.__init__(self, key, value, type) - - def get_value(self): - # parse the value back into a datetime - # XXX: strptime on datetime is a 2.5 thing :( - # XXX: we lose timezone in this conversion currently - if not self._value: return None - ti = time.strptime(self._value, self.format) - dt = datetime.datetime(*(ti[:-2])) - dt = dt.replace(microsecond=0) - return dt - - def set_value(self, value): - if isinstance(value, basestring): - # XXX: there is an issue with microseconds not getting parsed - ti = time.strptime(value, self.format) - value = datetime.datetime(*(ti[:-2])) - value = value.replace(microsecond=0) - - self._value = value.isoformat() + def set_file(self, fileobj): + self._file = fileobj + file = property(get_file, set_file) - value = property(get_value, set_value) + @property + def filename(self): return self.file.name - def marshall(self): return self.value.isoformat() - + @property + def contents(self): return self.file.read() -class NumberProperty(Property): - def __init__(self, key, value, type="number"): - Property.__init__(self, key, value, type) - - def get_value(self): return float(self._value) - def set_value(self, value): self._value = value - value = property(get_value, set_value) + @property + def backingstore(self): return self._backingstore + @property + def id(self): return self._doc.id -class BinaryProperty(Property): - # base64 encode binary data - def __init__(self, key, value, type="binary"): - Property.__init__(self, key, value, type) - - def get_value(self): return self._value.decode('base64') - def set_value(self, value): self._value = value.encode('base64') - value = property(get_value, set_value) - - -class Model(object): - """ Manages the global state of the metadata model index. This is - intended to only be consumed by an olpc.datastore.query.QueryManager - instance for the management of its metadata. - - >>> m = Model() - >>> m.prepare(querymanager) - - >>> m.content - ... # Content Table - - >>> m['content'] - ... # content Mapper - - For details see the sqlalchemy documentation - - """ - - def __init__(self): - self.tables = {} - self.mappers = {} + @property + def data(self): return self._doc.data - def __getattr__(self, key): return self.tables[key] - def __getitem__(self, key): return self.mappers[key] - - - def prepare(self, querymanager): - self.querymanager = querymanager +## class Buddy(object): +## """A co-author on content. Information is collected and managed +## here""" +## pass - # a single session manages the exclusive access we keep to the - # db. - global context - self.session = create_session(bind_to=self.querymanager.db) - context[self.querymanager.backingstore] = self.session - - # content object - content = Table('content', - self.querymanager.metadata, - Column('id', String, primary_key=True, nullable=False), - Column('activity_id', Integer), - Column('checksum', String,), - UniqueConstraint('id', name='content_key') - ) - Index('content_activity_id_idx', content.c.activity_id) - - # the properties of content objects - properties = Table('properties', - self.querymanager.metadata, - Column('id', Integer, Sequence('property_id_seq'), primary_key=True), - Column('content_id', Integer, ForeignKey('content.id')), - Column('key', Unicode, ), - Column('value', Unicode, ), - Column('type', Unicode, ), - # unique key to content mapping - UniqueConstraint('content_id', 'key', - name='property_content_key') - ) - - Index('property_key_idx', properties.c.key) - Index('property_type_idx', properties.c.type) - - # storage - storage = Table('storage', - self.querymanager.metadata, - Column('id', String, primary_key=True), - Column('description', String, ), - Column('uri', String, ) - ) - - # storage -> * content - # XXX: this could be a purely runtime in-memory construct - # removing the storage table as well. Would depend in part on - # the frequency of the garbage collection runs and the - # frequency of connection to stable storage - storage_content = Table('storage_content', - self.querymanager.metadata, - Column('storage_id', Integer, ForeignKey('storage.id')), - Column('content_id', Integer, ForeignKey('content.id')), - ) - Index('idx_storage_content_content_id', storage_content.c.content_id) - - # Object Mapping - # the query manager provides a mapping extension for - # Content <-> BackingStore binding - - # XXX gross and not what we want, we can only define mappers - # once but we may have more than one datastore. - # this can impact all sqla in the runtime though - clear_mappers() - - - content_mapper = mapper(Content, content, - extension=self.querymanager.content_ext, - properties = { - 'properties' : relation(Property, - cascade="all,delete-orphan", - backref='content', - lazy=True), - }, - - ) - - # retain reference to these tables to use for queries - self.tables['content'] = content - self.tables['properties'] = properties - self.tables['storage'] = storage - self.tables['storage_content'] = storage_content - - # and the mappers (though most likely not needed) - property_mapper = mapper(Property, properties, polymorphic_on=properties.c.type) - self.mappers['properties'] = property_mapper - self.mappers['content'] = content_mapper - - # default Property types are mapped to classes here - self.addPropertyType(DateProperty, 'date') - self.addPropertyType(NumberProperty, 'number') - self.addPropertyType(TextProperty, 'text') - self.addPropertyType(BinaryProperty, 'binary') - - - - def addPropertyType(self, PropertyClass, typename, - map_value=True, **kwargs): - """Register a new type of Property. PropertyClass should be a - subclass of Property, typename is the textual - name of the new Property type. - - The flag map_value indicates if Property.value should - automatically be diverted to _value so that you can more - easily manage the interfaces 'value' as a Python property - (descriptor) - - Keyword args will be passed to the properties dictionary of - the sqlalchemy mapper call. See sqlalchemy docs for additional - details. - """ - properties = {} - properties.update(kwargs) - if map_value is True: - properties['_value'] = self.properties.c.value - mapper(PropertyClass, - inherits=self.mappers['properties'], - polymorphic_identity=typename, - properties=properties - ) - - registerPropertyType(typename, PropertyClass) - |