Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Saller <bcsaller@objectrealms.net>2007-07-20 10:09:31 (GMT)
committer Benjamin Saller <bcsaller@objectrealms.net>2007-07-20 10:09:31 (GMT)
commit720c077dba3eb0944318cc0410d4a2df9880a305 (patch)
tree51c92bed8c18529f4c3609a6504868726b766ff5
parent9aabdfb24b3d03dae3835bf29f8be005fa9ccab2 (diff)
handle find(dict(query : 'title:foo mimetype:text/plain'))
where query is the string arg accepted by xapianindex::search use the system binary for copies, its already optimized use gnomevfs in converter as well as importer sorting on non-existant properties shouldn't throw errors sorting on dates verified
-rwxr-xr-xbin/datastore-service8
-rw-r--r--src/olpc/datastore/backingstore.py12
-rw-r--r--src/olpc/datastore/bin_copy.py24
-rw-r--r--src/olpc/datastore/converter.py21
-rw-r--r--src/olpc/datastore/datastore.py5
-rw-r--r--src/olpc/datastore/model.py9
-rw-r--r--src/olpc/datastore/utils.py2
-rw-r--r--src/olpc/datastore/xapianindex.py26
8 files changed, 77 insertions, 30 deletions
diff --git a/bin/datastore-service b/bin/datastore-service
index b8555ee..7dd87ce 100755
--- a/bin/datastore-service
+++ b/bin/datastore-service
@@ -72,9 +72,9 @@ def main():
logger.debug("Datastore shutdown with error",
exc_info=sys.exc_info())
-main()
+#main()
-#import hotshot
-#p = hotshot.Profile('hs.prof')
-#p.run('main()')
+import hotshot
+p = hotshot.Profile('hs.prof')
+p.run('main()')
diff --git a/src/olpc/datastore/backingstore.py b/src/olpc/datastore/backingstore.py
index a7a51ca..b5b93f9 100644
--- a/src/olpc/datastore/backingstore.py
+++ b/src/olpc/datastore/backingstore.py
@@ -11,14 +11,15 @@ __copyright__ = 'Copyright ObjectRealms, LLC, 2007'
__license__ = 'The GNU Public License V2+'
import cPickle as pickle
-import sha
+import gnomevfs
import os
import re
-import shutil
+import sha
import subprocess
import time
from olpc.datastore.xapianindex import IndexManager
+from olpc.datastore import bin_copy
from olpc.datastore import utils
# changing this pattern impacts _targetFile
@@ -312,7 +313,7 @@ class FileBackingStore(BackingStore):
fp.write(line)
fp.close()
else:
- shutil.copyfile(filelike.name, path)
+ bin_copy.bin_copy(filelike.name, path)
if verify:
content = self.indexmanager.get(uid)
content.checksum = c.hexdigest()
@@ -438,9 +439,10 @@ class InplaceFileBackingStore(FileBackingStore):
relative = source[len(self.uri)+1:]
result, count = self.indexmanager.search(dict(filename=relative))
+ mime_type = gnomevfs.get_mime_type(source)
if not count:
# create a new record
- self.create(dict(filename=relative), source)
+ self.create(dict(filename=relative, mime_type=mime_type), source)
else:
# update the object with the new content iif the
# checksum is different
@@ -451,7 +453,7 @@ class InplaceFileBackingStore(FileBackingStore):
# only if the checksum is different
#checksum = self._checksum(source)
#if checksum != content.checksum:
- self.update(uid, dict(filename=relative), source)
+ self.update(uid, dict(filename=relative, mime_type=mime_type), source)
if self.options.get('sync_mount', False):
self.complete_indexing()
diff --git a/src/olpc/datastore/bin_copy.py b/src/olpc/datastore/bin_copy.py
new file mode 100644
index 0000000..1be1b6b
--- /dev/null
+++ b/src/olpc/datastore/bin_copy.py
@@ -0,0 +1,24 @@
+import os, subprocess
+
+
+def bin_copy(src, dest, mode=0600):
+ try:
+ subprocess.check_call(['/bin/cp', src, dest])
+ except subprocess.CalledProcessError:
+ raise OSError("Copy failed %s %s" % (src, dest))
+ else:
+ os.chmod(dest, mode)
+
+
+if __name__ == "__main__":
+ import sys
+ if len(sys.argv) != 3:
+ raise SystemExit("usage: <src> <dest>")
+
+ src, dest = sys.argv[1:]
+
+ if not os.path.exists(src): raise OSError("missing src file")
+
+ bin_copy(src, dest)
+
+
diff --git a/src/olpc/datastore/converter.py b/src/olpc/datastore/converter.py
index 6f0ede6..8821061 100644
--- a/src/olpc/datastore/converter.py
+++ b/src/olpc/datastore/converter.py
@@ -18,16 +18,16 @@ __license__ = 'The GNU Public License V2+'
from olpc.datastore.utils import Singleton
import codecs
import logging
-import mimetypes
import os
import subprocess
import sys
import tempfile
+import gnomevfs
def guess_mimetype(filename):
- output = subprocess.Popen(["file", "-bi", filename], stdout=subprocess.PIPE).communicate()[0]
- return output.split()[-1].strip()
-
+ fn = os.path.abspath(filename)
+ mimetype = gnomevfs.get_mime_type(fn)
+ return mimetype
class subprocessconverter(object):
"""Process a command. Collect the output
@@ -110,19 +110,18 @@ class Converter(object):
#can result in unexpected or no output.
ext = os.path.splitext(filename)[1]
if mimetype: mt = mimetype
- else:
- mt = mimetypes.guess_type(filename, False)
- if mt[0] is not None: mt = "%s/%s" % mt
- else:
- # try harder to get the mimetype
- # most datastore files won't have extensions
- mt = guess_mimetype(filename)
+ else: mt = guess_mimetype(filename)
+ maintype, subtype = mt.split('/',1)
converter = self._converters.get(mt)
if not converter:
converter = self._converters.get(ext)
if not converter:
converter = self._default
+ # it was an image or an unknown application
+ if maintype in ['image', 'application', 'audio', 'video'] or \
+ subtype in ['x-trash', 'x-python-bytecode',]:
+ converter = None
if converter:
try:
return converter(filename)
diff --git a/src/olpc/datastore/datastore.py b/src/olpc/datastore/datastore.py
index 34eb23c..d026fce 100644
--- a/src/olpc/datastore/datastore.py
+++ b/src/olpc/datastore/datastore.py
@@ -256,7 +256,10 @@ class DataStore(dbus.service.Object):
# only goes to the primary now. Punting on the merge case
if isinstance(query, dict):
kwargs.update(query)
-
+ else:
+ if 'query' not in kwargs:
+ kwargs['query'] = query
+
include_files = kwargs.pop('include_files', False)
order_by = kwargs.pop('order_by', [])
diff --git a/src/olpc/datastore/model.py b/src/olpc/datastore/model.py
index b6e0829..9ff2e1f 100644
--- a/src/olpc/datastore/model.py
+++ b/src/olpc/datastore/model.py
@@ -191,9 +191,10 @@ class Content(object):
result = result[0]
field = self._model.fields.get(key)
kind = propertyByKind(field[1])
+ # Errors here usually property request for a missing field
return kind.from_xapian(result)
-
-
+
+
@property
def properties(self):
d = {}
@@ -305,7 +306,9 @@ registerPropertyType('string', noop, noop, 'string', {'store' : True,
registerPropertyType('text', noop, noop, 'string', {'store' : True,
'exact' : False,
- 'sortable' : False})
+ 'sortable' : False,
+ 'collapse' : True,
+ })
registerPropertyType('binary', noop, noop, None, {'store' : True,
'exact' : False,
diff --git a/src/olpc/datastore/utils.py b/src/olpc/datastore/utils.py
index 2998298..711007e 100644
--- a/src/olpc/datastore/utils.py
+++ b/src/olpc/datastore/utils.py
@@ -149,3 +149,5 @@ def timeparse(t, format):
return t.replace(microsecond=microsecond)
raise
+
+
diff --git a/src/olpc/datastore/xapianindex.py b/src/olpc/datastore/xapianindex.py
index b104d44..46eca98 100644
--- a/src/olpc/datastore/xapianindex.py
+++ b/src/olpc/datastore/xapianindex.py
@@ -179,7 +179,14 @@ class IndexManager(object):
filename, mimetype = filestuff
fp = converter(filename, mimetype)
if fp:
- doc.fields.append(secore.Field('fulltext', fp.read()))
+ # read in at a fixed block size, try to
+ # conserve memory. If this doesn't work
+ # we can make doc.fields a generator
+ while True:
+ chunk = fp.read(2048)
+ if not chunk: break
+ doc.fields.append(secore.Field('fulltext', chunk))
+
self.write_index.replace(doc)
logger.info("update file content %s:%s" % (uid, vid))
else:
@@ -294,7 +301,7 @@ class IndexManager(object):
# Property indexing
for k, prop in props.iteritems():
value = prop.for_xapian
-
+
if k not in self.fields:
warnings.warn("""Missing field configuration for %s""" % k,
RuntimeWarning)
@@ -333,10 +340,17 @@ class IndexManager(object):
q = self.read_index.query_all()
elif isinstance(query, dict):
queries = []
- # each term becomes part of the query join
- for k, v in query.iteritems():
- queries.append(ri.query_field(k, v))
- q = ri.query_composite(ri.OP_AND, queries)
+ q = query.pop('query', None)
+ if q:
+ queries.append(self.parse_query(q))
+ if not query:
+ # we emptied it
+ q = self.read_index.query_all()
+ else:
+ # each term becomes part of the query join
+ for k, v in query.iteritems():
+ queries.append(ri.query_field(k, v))
+ q = ri.query_composite(ri.OP_AND, queries)
else:
q = self.parse_query(query)