diff options
author | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-12 21:14:06 (GMT) |
---|---|---|
committer | Benjamin Saller <bcsaller@objectrealms.net> | 2007-07-12 21:14:06 (GMT) |
commit | f577c2c142c7648a482e0eec7ecd736c1ca716d7 (patch) | |
tree | 259c5cf191116379e97d8aebc260f9664ad3a0e5 /tests | |
parent | d7092a126f230f22344b50d79b8bd362d659953b (diff) |
checkpoint new branch before the property type/xapian field merge
Diffstat (limited to 'tests')
-rwxr-xr-x | tests/cleaner.py | 39 | ||||
-rw-r--r-- | tests/test_xapianindex.py | 91 | ||||
-rw-r--r-- | tests/xapianindex.txt | 76 |
3 files changed, 206 insertions, 0 deletions
diff --git a/tests/cleaner.py b/tests/cleaner.py new file mode 100755 index 0000000..8cc795b --- /dev/null +++ b/tests/cleaner.py @@ -0,0 +1,39 @@ +#!/usr/bin/python +import os +import re +from ore.main import Application + +filepattern = re.compile("(\w{8})\-(\w{4})\-(\w{4})\-(\w{4})\-(\w{12})") +tmppattern = re.compile("tmp\S{6}") + +staticdirs = re.compile('test_ds|store\d') + +filepatterns = [filepattern, tmppattern] +dirpatterns = [staticdirs] + +class Cleaner(Application): + def manage_options(self): + self.parser.add_option("--base", dest="base_dir", + action="store", default='/tmp', + help="""Where to clean (/tmp)""") + + def main(self): + """clean up files left from testing in /tmp""" + # this is done using patterned names + for root, dirs, files in os.walk(self.options.base_dir): + for filename in files: + for pat in filepatterns: + if pat.match(filename): + fn = os.path.join(root, filename) + os.remove(fn) + break + for dirname in dirs: + for pat in dirpatterns: + if pat.match(dirname): + dn = os.path.join(root, dirname) + os.system('rm -rf %s' % dn) + +if __name__ == "__main__": + Cleaner("cleaner")() + + diff --git a/tests/test_xapianindex.py b/tests/test_xapianindex.py new file mode 100644 index 0000000..cf39f01 --- /dev/null +++ b/tests/test_xapianindex.py @@ -0,0 +1,91 @@ +from testutils import waitforindex + +from olpc.datastore.xapianindex import IndexManager +import os +from datetime import datetime + +import time +import unittest +import gnomevfs + +DEFAULT_STORE = '/tmp/_xi_test' + + +def index_file(iconn, filepath): + """Index a file.""" + + mimetype = gnomevfs.get_mime_type(filepath) + main, subtype = mimetype.split('/',1) + + stat = os.stat(filepath) + ctime = datetime.fromtimestamp(stat.st_ctime) + mtime = datetime.fromtimestamp(stat.st_mtime) + + if main in ['image']: filepath = None + if subtype in ['x-trash', 'x-python-bytecode']: filepath = None + + + + props = {'mimetype' : mimetype, 'mtime:date' : mtime, + 'ctime:date' : ctime,} + + if filepath: + fn = os.path.split(filepath)[1] + props['filename'] = fn + + iconn.index(props, filepath) + + return 1 + +def index_path(iconn, docpath): + """Index a path.""" + count = 0 + for dirpath, dirnames, filenames in os.walk(docpath): + for filename in filenames: + filepath = os.path.join(dirpath, filename) + index_file(iconn, filepath) + count += 1 + return count + +class Test(unittest.TestCase): + def setUp(self): + if os.path.exists(DEFAULT_STORE): + os.system("rm -rf %s" % DEFAULT_STORE) + + def tearDown(self): + if os.path.exists(DEFAULT_STORE): + os.system("rm -rf %s" % DEFAULT_STORE) + + def test_index(self): + # import a bunch of documents into the store + im = IndexManager() + im.connect(DEFAULT_STORE) + + # test basic index performance + start = time.time() + count = index_path(im, os.getcwd()) + end = time.time() + delta = end - start + + #print "%s in %s %s/sec" % (count, delta, count/delta) + + # wait for indexing to finish + waitforindex(im) + + # test basic search performance + results = list(im.search('peek')[0]) + + # this indicates that we found text inside binary content that + # we expected + assert 'test.pdf' in set(r.get_property('filename') for r in results) + + + +def test_suite(): + suite = unittest.TestSuite() + suite.addTest(unittest.makeSuite(Test)) + return suite + +if __name__ == "__main__": + unittest.main() + diff --git a/tests/xapianindex.txt b/tests/xapianindex.txt new file mode 100644 index 0000000..de495a6 --- /dev/null +++ b/tests/xapianindex.txt @@ -0,0 +1,76 @@ +The xapian index module can be used directly as follows + +First clean up any old test data. + +>>> index_home = "/tmp/xi" +>>> import os, sys, time, logging +>>> assert os.system('rm -rf %s' % index_home) == 0 + +# >>> logging.basicConfig(level=logging.DEBUG, +# ... format="%(asctime)-15s %(name)s %(levelname)s: %(message)s", +# ... stream=sys.stderr) + + +>>> from olpc.datastore.xapianindex import IndexManager +>>> from olpc.datastore import model +>>> im = IndexManager() +>>> im.connect(index_home) + +A small utility method for wrapping a normal dict into proper property +objects. + +>>> def propsdict(**kwargs): +... d = {} +... for k,v in kwargs.iteritems(): +... d[k] = model.Property(k, v) +... return d + + +Now add the file to the index. + +>>> props = propsdict(title="PDF Document", +... mimetype="application/pdf") + + +>>> uid = im.index(props, "test.pdf") + +Let the async indexer do its thing. We ask the indexer if it has work +left, when it has none we expect our content to be indexed and searchable. + +>>> while im.working: time.sleep(0.5) + + +Searching on an property of the content works. +>>> assert im.search("PDF")[0] == uid + +Searching into the binary content of the object works as well. +>>> assert im.search("peek")[0] == uid + +Specifying a search that demands a document term be found only in the +title works as well. + +>>> assert im.search('title:PDF')[0] == uid +>>> im.search('title:peek') +[] + +Searching for documents that are PDF works as expected here. Here we +use the dictionary form of the query where each field name is given +and creates a search. +>>> assert im.search(dict(mimetype='application/pdf'))[0] == uid + + +#Likewise excluding the match works as expected +#>>> im.search('-title:PDF') +#[] + + +Punctuation is fine. + +>>> assert im.search("Don't peek")[0] == uid + +As well as quoted strings + +>>> assert im.search(r'''"Don't peek"''')[0] == uid + +Cleanly shut down. +>>> im.stop() |