Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorBenjamin Saller <bcsaller@objectrealms.net>2007-07-12 21:14:06 (GMT)
committer Benjamin Saller <bcsaller@objectrealms.net>2007-07-12 21:14:06 (GMT)
commitf577c2c142c7648a482e0eec7ecd736c1ca716d7 (patch)
tree259c5cf191116379e97d8aebc260f9664ad3a0e5 /tests
parentd7092a126f230f22344b50d79b8bd362d659953b (diff)
checkpoint new branch before the property type/xapian field merge
Diffstat (limited to 'tests')
-rwxr-xr-xtests/cleaner.py39
-rw-r--r--tests/test_xapianindex.py91
-rw-r--r--tests/xapianindex.txt76
3 files changed, 206 insertions, 0 deletions
diff --git a/tests/cleaner.py b/tests/cleaner.py
new file mode 100755
index 0000000..8cc795b
--- /dev/null
+++ b/tests/cleaner.py
@@ -0,0 +1,39 @@
+#!/usr/bin/python
+import os
+import re
+from ore.main import Application
+
+filepattern = re.compile("(\w{8})\-(\w{4})\-(\w{4})\-(\w{4})\-(\w{12})")
+tmppattern = re.compile("tmp\S{6}")
+
+staticdirs = re.compile('test_ds|store\d')
+
+filepatterns = [filepattern, tmppattern]
+dirpatterns = [staticdirs]
+
+class Cleaner(Application):
+ def manage_options(self):
+ self.parser.add_option("--base", dest="base_dir",
+ action="store", default='/tmp',
+ help="""Where to clean (/tmp)""")
+
+ def main(self):
+ """clean up files left from testing in /tmp"""
+ # this is done using patterned names
+ for root, dirs, files in os.walk(self.options.base_dir):
+ for filename in files:
+ for pat in filepatterns:
+ if pat.match(filename):
+ fn = os.path.join(root, filename)
+ os.remove(fn)
+ break
+ for dirname in dirs:
+ for pat in dirpatterns:
+ if pat.match(dirname):
+ dn = os.path.join(root, dirname)
+ os.system('rm -rf %s' % dn)
+
+if __name__ == "__main__":
+ Cleaner("cleaner")()
+
+
diff --git a/tests/test_xapianindex.py b/tests/test_xapianindex.py
new file mode 100644
index 0000000..cf39f01
--- /dev/null
+++ b/tests/test_xapianindex.py
@@ -0,0 +1,91 @@
+from testutils import waitforindex
+
+from olpc.datastore.xapianindex import IndexManager
+import os
+from datetime import datetime
+
+import time
+import unittest
+import gnomevfs
+
+DEFAULT_STORE = '/tmp/_xi_test'
+
+
+def index_file(iconn, filepath):
+ """Index a file."""
+
+ mimetype = gnomevfs.get_mime_type(filepath)
+ main, subtype = mimetype.split('/',1)
+
+ stat = os.stat(filepath)
+ ctime = datetime.fromtimestamp(stat.st_ctime)
+ mtime = datetime.fromtimestamp(stat.st_mtime)
+
+ if main in ['image']: filepath = None
+ if subtype in ['x-trash', 'x-python-bytecode']: filepath = None
+
+
+
+ props = {'mimetype' : mimetype, 'mtime:date' : mtime,
+ 'ctime:date' : ctime,}
+
+ if filepath:
+ fn = os.path.split(filepath)[1]
+ props['filename'] = fn
+
+ iconn.index(props, filepath)
+
+ return 1
+
+def index_path(iconn, docpath):
+ """Index a path."""
+ count = 0
+ for dirpath, dirnames, filenames in os.walk(docpath):
+ for filename in filenames:
+ filepath = os.path.join(dirpath, filename)
+ index_file(iconn, filepath)
+ count += 1
+ return count
+
+class Test(unittest.TestCase):
+ def setUp(self):
+ if os.path.exists(DEFAULT_STORE):
+ os.system("rm -rf %s" % DEFAULT_STORE)
+
+ def tearDown(self):
+ if os.path.exists(DEFAULT_STORE):
+ os.system("rm -rf %s" % DEFAULT_STORE)
+
+ def test_index(self):
+ # import a bunch of documents into the store
+ im = IndexManager()
+ im.connect(DEFAULT_STORE)
+
+ # test basic index performance
+ start = time.time()
+ count = index_path(im, os.getcwd())
+ end = time.time()
+ delta = end - start
+
+ #print "%s in %s %s/sec" % (count, delta, count/delta)
+
+ # wait for indexing to finish
+ waitforindex(im)
+
+ # test basic search performance
+ results = list(im.search('peek')[0])
+
+ # this indicates that we found text inside binary content that
+ # we expected
+ assert 'test.pdf' in set(r.get_property('filename') for r in results)
+
+
+
+def test_suite():
+ suite = unittest.TestSuite()
+ suite.addTest(unittest.makeSuite(Test))
+ return suite
+
+if __name__ == "__main__":
+ unittest.main()
+
diff --git a/tests/xapianindex.txt b/tests/xapianindex.txt
new file mode 100644
index 0000000..de495a6
--- /dev/null
+++ b/tests/xapianindex.txt
@@ -0,0 +1,76 @@
+The xapian index module can be used directly as follows
+
+First clean up any old test data.
+
+>>> index_home = "/tmp/xi"
+>>> import os, sys, time, logging
+>>> assert os.system('rm -rf %s' % index_home) == 0
+
+# >>> logging.basicConfig(level=logging.DEBUG,
+# ... format="%(asctime)-15s %(name)s %(levelname)s: %(message)s",
+# ... stream=sys.stderr)
+
+
+>>> from olpc.datastore.xapianindex import IndexManager
+>>> from olpc.datastore import model
+>>> im = IndexManager()
+>>> im.connect(index_home)
+
+A small utility method for wrapping a normal dict into proper property
+objects.
+
+>>> def propsdict(**kwargs):
+... d = {}
+... for k,v in kwargs.iteritems():
+... d[k] = model.Property(k, v)
+... return d
+
+
+Now add the file to the index.
+
+>>> props = propsdict(title="PDF Document",
+... mimetype="application/pdf")
+
+
+>>> uid = im.index(props, "test.pdf")
+
+Let the async indexer do its thing. We ask the indexer if it has work
+left, when it has none we expect our content to be indexed and searchable.
+
+>>> while im.working: time.sleep(0.5)
+
+
+Searching on an property of the content works.
+>>> assert im.search("PDF")[0] == uid
+
+Searching into the binary content of the object works as well.
+>>> assert im.search("peek")[0] == uid
+
+Specifying a search that demands a document term be found only in the
+title works as well.
+
+>>> assert im.search('title:PDF')[0] == uid
+>>> im.search('title:peek')
+[]
+
+Searching for documents that are PDF works as expected here. Here we
+use the dictionary form of the query where each field name is given
+and creates a search.
+>>> assert im.search(dict(mimetype='application/pdf'))[0] == uid
+
+
+#Likewise excluding the match works as expected
+#>>> im.search('-title:PDF')
+#[]
+
+
+Punctuation is fine.
+
+>>> assert im.search("Don't peek")[0] == uid
+
+As well as quoted strings
+
+>>> assert im.search(r'''"Don't peek"''')[0] == uid
+
+Cleanly shut down.
+>>> im.stop()