The xapian index module can be used directly as follows First clean up any old test data. >>> index_home = "/tmp/xi" >>> import os, sys, time, logging >>> assert os.system('rm -rf %s' % index_home) == 0 # >>> logging.basicConfig(level=logging.DEBUG, # ... format="%(asctime)-15s %(name)s %(levelname)s: %(message)s", # ... stream=sys.stderr) >>> from olpc.datastore.xapianindex import IndexManager >>> from olpc.datastore import model >>> im = IndexManager() >>> im.connect(index_home) Now add the file to the index. >>> props = dict(title="PDF Document", ... mime_type="application/pdf") >>> uid = im.index(props, "test.pdf") Let the async indexer do its thing. We ask the indexer if it has work left, when it has none we expect our content to be indexed and searchable. >>> im.complete_indexing() Searching on an property of the content works. >>> def expect(r, count=None): ... if count: assert r[1] == count ... return list(r[0]) >>> def expect_single(r): ... assert r[1] == 1 ... return r[0].next() >>> def expect_none(r): ... assert r[1] == 0 ... assert list(r[0]) == [] >>> assert expect_single(im.search("PDF")).id == uid Searching into the binary content of the object works as well. >>> assert expect_single(im.search("peek")).id == uid Specifying a search that demands a document term be found only in the title works as well. >>> assert expect_single(im.search('title:PDF')).id == uid >>> expect_none(im.search('title:peek')) Searching for documents that are PDF works as expected here. Here we use the dictionary form of the query where each field name is given and creates a search. >>> assert expect_single(im.search(dict(mime_type='application/pdf'))).id == uid Punctuation is fine. >>> assert expect_single(im.search("Don't peek")).id == uid As well as quoted strings >>> assert expect_single(im.search(r'''"Don't peek"''')).id == uid We can also issue OR styled queries over a given field by submitting a list of queries to a given field. >>> assert expect_single(im.search(dict(mime_type=["text/plain", ... 'application/pdf']))).id == uid But an OR query for missing values still return nothing. >>> expect_none(im.search(dict(mime_type=["video/mpg", ... 'audio/ogg']))) Partial search... >>> assert expect_single(im.search(r'''pee*''')).id == uid We also support tagging of documents. >>> im.tag(uid, "foo bar") >>> assert expect_single(im.search('tags:foo')).id == uid Cleanly shut down. >>> im.stop() >>> assert os.system('rm -rf %s' % index_home) == 0