1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
from olpc.datastore.xapianindex import IndexManager
import os
from datetime import datetime
import time
import unittest
import gnomevfs
DEFAULT_STORE = '/tmp/_xi_test'
def index_file(iconn, filepath):
"""Index a file."""
mimetype = gnomevfs.get_mime_type(filepath)
main, subtype = mimetype.split('/',1)
stat = os.stat(filepath)
ctime = datetime.fromtimestamp(stat.st_ctime).isoformat()
mtime = datetime.fromtimestamp(stat.st_mtime).isoformat()
if main in ['image']: filepath = None
if subtype in ['x-trash', 'x-python-bytecode']: filepath = None
props = {'mime_type' : mimetype, 'mtime' : mtime, 'ctime' : ctime,}
if filepath:
fn = os.path.split(filepath)[1]
props['filename'] = fn
iconn.index(props, filepath)
return 1
def index_path(iconn, docpath):
"""Index a path."""
count = 0
for dirpath, dirnames, filenames in os.walk(docpath):
for filename in filenames:
filepath = os.path.join(dirpath, filename)
index_file(iconn, filepath)
count += 1
return count
class Test(unittest.TestCase):
def setUp(self):
if os.path.exists(DEFAULT_STORE):
os.system("rm -rf %s" % DEFAULT_STORE)
def tearDown(self):
if os.path.exists(DEFAULT_STORE):
os.system("rm -rf %s" % DEFAULT_STORE)
def test_index(self):
# import a bunch of documents into the store
im = IndexManager()
im.connect(DEFAULT_STORE)
# test basic index performance
start = time.time()
count = index_path(im, os.getcwd())
end = time.time()
delta = end - start
#print "%s in %s %s/sec" % (count, delta, count/delta)
# wait for indexing to finish
im.complete_indexing()
# test basic search performance
results = list(im.search('peek')[0])
# this indicates that we found text inside binary content that
# we expected
assert 'test.pdf' in set(r.get_property('filename') for r in results)
assert im.search('mime_type:application/pdf filename:test.pdf peek')[1] == 1
def test_suite():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test))
return suite
if __name__ == "__main__":
unittest.main()
|