From 720c077dba3eb0944318cc0410d4a2df9880a305 Mon Sep 17 00:00:00 2001 From: Benjamin Saller Date: Fri, 20 Jul 2007 10:09:31 +0000 Subject: handle find(dict(query : 'title:foo mimetype:text/plain')) where query is the string arg accepted by xapianindex::search use the system binary for copies, its already optimized use gnomevfs in converter as well as importer sorting on non-existant properties shouldn't throw errors sorting on dates verified --- diff --git a/bin/datastore-service b/bin/datastore-service index b8555ee..7dd87ce 100755 --- a/bin/datastore-service +++ b/bin/datastore-service @@ -72,9 +72,9 @@ def main(): logger.debug("Datastore shutdown with error", exc_info=sys.exc_info()) -main() +#main() -#import hotshot -#p = hotshot.Profile('hs.prof') -#p.run('main()') +import hotshot +p = hotshot.Profile('hs.prof') +p.run('main()') diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py index a7a51ca..b5b93f9 100644 --- a/src/olpc/datastore/backingstore.py +++ b/src/olpc/datastore/backingstore.py @@ -11,14 +11,15 @@ __copyright__ = 'Copyright ObjectRealms, LLC, 2007' __license__ = 'The GNU Public License V2+' import cPickle as pickle -import sha +import gnomevfs import os import re -import shutil +import sha import subprocess import time from olpc.datastore.xapianindex import IndexManager +from olpc.datastore import bin_copy from olpc.datastore import utils # changing this pattern impacts _targetFile @@ -312,7 +313,7 @@ class FileBackingStore(BackingStore): fp.write(line) fp.close() else: - shutil.copyfile(filelike.name, path) + bin_copy.bin_copy(filelike.name, path) if verify: content = self.indexmanager.get(uid) content.checksum = c.hexdigest() @@ -438,9 +439,10 @@ class InplaceFileBackingStore(FileBackingStore): relative = source[len(self.uri)+1:] result, count = self.indexmanager.search(dict(filename=relative)) + mime_type = gnomevfs.get_mime_type(source) if not count: # create a new record - self.create(dict(filename=relative), source) + self.create(dict(filename=relative, mime_type=mime_type), source) else: # update the object with the new content iif the # checksum is different @@ -451,7 +453,7 @@ class InplaceFileBackingStore(FileBackingStore): # only if the checksum is different #checksum = self._checksum(source) #if checksum != content.checksum: - self.update(uid, dict(filename=relative), source) + self.update(uid, dict(filename=relative, mime_type=mime_type), source) if self.options.get('sync_mount', False): self.complete_indexing() diff --git a/src/olpc/datastore/bin_copy.py b/src/olpc/datastore/bin_copy.py new file mode 100644 index 0000000..1be1b6b --- /dev/null +++ b/src/olpc/datastore/bin_copy.py @@ -0,0 +1,24 @@ +import os, subprocess + + +def bin_copy(src, dest, mode=0600): + try: + subprocess.check_call(['/bin/cp', src, dest]) + except subprocess.CalledProcessError: + raise OSError("Copy failed %s %s" % (src, dest)) + else: + os.chmod(dest, mode) + + +if __name__ == "__main__": + import sys + if len(sys.argv) != 3: + raise SystemExit("usage: ") + + src, dest = sys.argv[1:] + + if not os.path.exists(src): raise OSError("missing src file") + + bin_copy(src, dest) + + diff --git a/src/olpc/datastore/converter.py b/src/olpc/datastore/converter.py index 6f0ede6..8821061 100644 --- a/src/olpc/datastore/converter.py +++ b/src/olpc/datastore/converter.py @@ -18,16 +18,16 @@ __license__ = 'The GNU Public License V2+' from olpc.datastore.utils import Singleton import codecs import logging -import mimetypes import os import subprocess import sys import tempfile +import gnomevfs def guess_mimetype(filename): - output = subprocess.Popen(["file", "-bi", filename], stdout=subprocess.PIPE).communicate()[0] - return output.split()[-1].strip() - + fn = os.path.abspath(filename) + mimetype = gnomevfs.get_mime_type(fn) + return mimetype class subprocessconverter(object): """Process a command. Collect the output @@ -110,19 +110,18 @@ class Converter(object): #can result in unexpected or no output. ext = os.path.splitext(filename)[1] if mimetype: mt = mimetype - else: - mt = mimetypes.guess_type(filename, False) - if mt[0] is not None: mt = "%s/%s" % mt - else: - # try harder to get the mimetype - # most datastore files won't have extensions - mt = guess_mimetype(filename) + else: mt = guess_mimetype(filename) + maintype, subtype = mt.split('/',1) converter = self._converters.get(mt) if not converter: converter = self._converters.get(ext) if not converter: converter = self._default + # it was an image or an unknown application + if maintype in ['image', 'application', 'audio', 'video'] or \ + subtype in ['x-trash', 'x-python-bytecode',]: + converter = None if converter: try: return converter(filename) diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py index 34eb23c..d026fce 100644 --- a/src/olpc/datastore/datastore.py +++ b/src/olpc/datastore/datastore.py @@ -256,7 +256,10 @@ class DataStore(dbus.service.Object): # only goes to the primary now. Punting on the merge case if isinstance(query, dict): kwargs.update(query) - + else: + if 'query' not in kwargs: + kwargs['query'] = query + include_files = kwargs.pop('include_files', False) order_by = kwargs.pop('order_by', []) diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py index b6e0829..9ff2e1f 100644 --- a/src/olpc/datastore/model.py +++ b/src/olpc/datastore/model.py @@ -191,9 +191,10 @@ class Content(object): result = result[0] field = self._model.fields.get(key) kind = propertyByKind(field[1]) + # Errors here usually property request for a missing field return kind.from_xapian(result) - - + + @property def properties(self): d = {} @@ -305,7 +306,9 @@ registerPropertyType('string', noop, noop, 'string', {'store' : True, registerPropertyType('text', noop, noop, 'string', {'store' : True, 'exact' : False, - 'sortable' : False}) + 'sortable' : False, + 'collapse' : True, + }) registerPropertyType('binary', noop, noop, None, {'store' : True, 'exact' : False, diff --git a/src/olpc/datastore/utils.py b/src/olpc/datastore/utils.py index 2998298..711007e 100644 --- a/src/olpc/datastore/utils.py +++ b/src/olpc/datastore/utils.py @@ -149,3 +149,5 @@ def timeparse(t, format): return t.replace(microsecond=microsecond) raise + + diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py index b104d44..46eca98 100644 --- a/src/olpc/datastore/xapianindex.py +++ b/src/olpc/datastore/xapianindex.py @@ -179,7 +179,14 @@ class IndexManager(object): filename, mimetype = filestuff fp = converter(filename, mimetype) if fp: - doc.fields.append(secore.Field('fulltext', fp.read())) + # read in at a fixed block size, try to + # conserve memory. If this doesn't work + # we can make doc.fields a generator + while True: + chunk = fp.read(2048) + if not chunk: break + doc.fields.append(secore.Field('fulltext', chunk)) + self.write_index.replace(doc) logger.info("update file content %s:%s" % (uid, vid)) else: @@ -294,7 +301,7 @@ class IndexManager(object): # Property indexing for k, prop in props.iteritems(): value = prop.for_xapian - + if k not in self.fields: warnings.warn("""Missing field configuration for %s""" % k, RuntimeWarning) @@ -333,10 +340,17 @@ class IndexManager(object): q = self.read_index.query_all() elif isinstance(query, dict): queries = [] - # each term becomes part of the query join - for k, v in query.iteritems(): - queries.append(ri.query_field(k, v)) - q = ri.query_composite(ri.OP_AND, queries) + q = query.pop('query', None) + if q: + queries.append(self.parse_query(q)) + if not query: + # we emptied it + q = self.read_index.query_all() + else: + # each term becomes part of the query join + for k, v in query.iteritems(): + queries.append(ri.query_field(k, v)) + q = ri.query_composite(ri.OP_AND, queries) else: q = self.parse_query(query) -- cgit v0.9.1