Web   ·   Wiki   ·   Activities   ·   Blog   ·   Lists   ·   Chat   ·   Meeting   ·   Bugs   ·   Git   ·   Translate   ·   Archive   ·   People   ·   Donate
summaryrefslogtreecommitdiffstats
path: root/tests/test_xapianindex.py
blob: c455c448bb405f7dd0d2a613db9614e3e98ef926 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from olpc.datastore.xapianindex import IndexManager
import os
from datetime import datetime

import time
import unittest
import gnomevfs

DEFAULT_STORE = '/tmp/_xi_test'


def index_file(iconn, filepath):
    """Index a file."""

    mimetype = gnomevfs.get_mime_type(filepath)
    main, subtype = mimetype.split('/',1)

    stat = os.stat(filepath)
    ctime = datetime.fromtimestamp(stat.st_ctime).isoformat()
    mtime = datetime.fromtimestamp(stat.st_mtime).isoformat()
    
    if main in ['image']: filepath = None
    if subtype in ['x-trash', 'x-python-bytecode']: filepath = None



    props = {'mime_type' : mimetype, 'mtime' : mtime, 'ctime' : ctime,}

    if filepath:
        fn = os.path.split(filepath)[1]
        props['filename'] = fn 
    
    iconn.index(props, filepath)

    return 1

def index_path(iconn, docpath):
    """Index a path."""
    count = 0
    for dirpath, dirnames, filenames in os.walk(docpath):
        for filename in filenames:
            filepath = os.path.join(dirpath, filename)
            index_file(iconn, filepath)
            count += 1
    return count

class Test(unittest.TestCase):
    def setUp(self):
        if os.path.exists(DEFAULT_STORE):
            os.system("rm -rf %s" % DEFAULT_STORE)

    def tearDown(self):
        if os.path.exists(DEFAULT_STORE):
            os.system("rm -rf %s" % DEFAULT_STORE)

    def test_index(self):
        # import a bunch of documents into the store
        im = IndexManager()
        im.connect(DEFAULT_STORE)

        # test basic index performance
        start = time.time()
        count = index_path(im, os.getcwd())
        end = time.time()
        delta = end - start

        #print "%s in %s %s/sec" % (count, delta, count/delta)

        # wait for indexing to finish
        im.complete_indexing()

        # test basic search performance
        results = list(im.search('peek')[0])

        # this indicates that we found text inside binary content that
        # we expected 
        assert 'test.pdf' in set(r.get_property('filename') for r in results)

        assert im.search('mime_type:application/pdf filename:test.pdf peek')[1] == 1

        
def test_suite():
    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(Test))
    return suite

if __name__ == "__main__":
    unittest.main()