1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
|
#!/usr/bin/env python
#
# Copyright (C) 2007 Lemur Consulting Ltd
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
r"""fieldmappings.py: Mappings from field names to term prefixes, etc.
"""
__docformat__ = "restructuredtext en"
import cPickle as _cPickle
class FieldMappings(object):
"""Mappings from field names to term prefixes, slot values, etc.
The following mappings are maintained:
- a mapping from field name to the string prefix to insert at the start of
terms.
- a mapping from field name to the slot numbers to store the field contents
in.
"""
__slots__ = '_prefixes', '_prefixcount', '_slots', '_slotcount',
def __init__(self, serialised=None):
"""Create a new field mapping object, or unserialise a saved one.
"""
if serialised is not None:
(self._prefixes, self._prefixcount,
self._slots, self._slotcount) = _cPickle.loads(serialised)
else:
self._prefixes = {}
self._prefixcount = 0
self._slots = {}
self._slotcount = 0
def _genPrefix(self):
"""Generate a previously unused prefix.
Prefixes are uppercase letters, and start with 'X' (this is a Xapian
convention, for compatibility with other Xapian tools: other starting
letters are reserved for special meanings):
>>> maps = FieldMappings()
>>> maps._genPrefix()
'XA'
>>> maps._genPrefix()
'XB'
>>> [maps._genPrefix() for i in xrange(60)]
['XC', 'XD', 'XE', 'XF', 'XG', 'XH', 'XI', 'XJ', 'XK', 'XL', 'XM', 'XN', 'XO', 'XP', 'XQ', 'XR', 'XS', 'XT', 'XU', 'XV', 'XW', 'XX', 'XY', 'XZ', 'XAA', 'XBA', 'XCA', 'XDA', 'XEA', 'XFA', 'XGA', 'XHA', 'XIA', 'XJA', 'XKA', 'XLA', 'XMA', 'XNA', 'XOA', 'XPA', 'XQA', 'XRA', 'XSA', 'XTA', 'XUA', 'XVA', 'XWA', 'XXA', 'XYA', 'XZA', 'XAB', 'XBB', 'XCB', 'XDB', 'XEB', 'XFB', 'XGB', 'XHB', 'XIB', 'XJB']
>>> maps = FieldMappings()
>>> [maps._genPrefix() for i in xrange(27*26 + 5)][-10:]
['XVZ', 'XWZ', 'XXZ', 'XYZ', 'XZZ', 'XAAA', 'XBAA', 'XCAA', 'XDAA', 'XEAA']
"""
res = []
self._prefixcount += 1
num = self._prefixcount
while num != 0:
ch = (num - 1) % 26
res.append(chr(ch + ord('A')))
num -= ch
num = num // 26
return 'X' + ''.join(res)
def get_prefix(self, fieldname):
"""Get the prefix used for a given field name.
"""
return self._prefixes[fieldname]
def get_slot(self, fieldname):
"""Get the slot number used for a given field name.
"""
return self._slots[fieldname]
def add_prefix(self, fieldname):
"""Allocate a prefix for the given field.
If a prefix is already allocated for this field, this has no effect.
"""
if fieldname in self._prefixes:
return
self._prefixes[fieldname] = self._genPrefix()
def add_slot(self, fieldname):
"""Allocate a slot number for the given field.
If a slot number is already allocated for this field, this has no effect.
"""
if fieldname in self._slots:
return
self._slots[fieldname] = self._slotcount
self._slotcount += 1
def serialise(self):
"""Serialise the field mappings to a string.
This can be unserialised by passing the result of this method to the
constructor of a new FieldMappings object.
"""
return _cPickle.dumps((self._prefixes,
self._prefixcount,
self._slots,
self._slotcount,
), 2)
|