From a31cd062c96fbbd337d181a33b3890d8c2b5f32b Mon Sep 17 00:00:00 2001 From: Benjamin Saller Date: Mon, 30 Jul 2007 04:35:42 +0000 Subject: updated secore to latest --- diff --git a/secore/datastructures.py b/secore/datastructures.py index 414625d..b7061fa 100644 --- a/secore/datastructures.py +++ b/secore/datastructures.py @@ -20,6 +20,7 @@ r"""datastructures.py: Datastructures for search engine core. """ __docformat__ = "restructuredtext en" +import errors as _errors import xapian as _xapian import cPickle as _cPickle @@ -117,6 +118,13 @@ class ProcessedDocument(object): # of our locale. if ord(term[0]) >= ord('A') and ord(term[0]) <= ord('Z'): prefix = prefix + ':' + + if len(prefix + term) > 220: + raise _errors.IndexerError("Field %r is too long: maximum length " + "220 - was %d (%r)" % + (field, len(prefix + term), + prefix + term)) + if positions is None: self._doc.add_term(prefix + term, wdfinc) elif isinstance(positions, int): diff --git a/secore/fieldactions.py b/secore/fieldactions.py index c595f0b..3de7cc9 100644 --- a/secore/fieldactions.py +++ b/secore/fieldactions.py @@ -42,6 +42,22 @@ def _act_index_exact(fieldname, doc, value, context): """ doc.add_term(fieldname, value, 0) +def _act_tag(fieldname, doc, value, context): + """Perform the TAG action. + + """ + doc.add_term(fieldname, value.lower(), 0) + +def _act_facet(fieldname, doc, value, context, type=None): + """Perform the FACET action. + + """ + marshaller = SortableMarshaller() + fn = marshaller.get_marshall_function(fieldname, type) + doc.add_term(fieldname, value.lower(), 0) + value = fn(fieldname, value) + doc.add_value(fieldname, value) + def _act_index_freetext(fieldname, doc, value, context, weight=1, language=None, stop=None, spell=False, nopos=False, noprefix=False): @@ -210,7 +226,7 @@ class FieldActions(object): - 'string' - sort in lexicographic (ie, alphabetical) order. This is the default, used if no type is set. - 'float' - treat the values as (decimal representations of) floating - point numbers, and sort in numerical order . The values in the field + point numbers, and sort in numerical order. The values in the field must be valid floating point numbers (according to Python's float() function). - 'date' - sort in date order. The values must be valid dates (either @@ -221,6 +237,23 @@ class FieldActions(object): "collapse" result sets, such that only the highest result with each value of the field will be returned. + - `TAG`: the field contains tags; these are strings, which will be matched + in a case insensitive way, but otherwise must be exact matches. Tag + fields can be searched for by making an explict query (ie, using + query_field(), but not with query_parse()). A list of the most frequent + tags in a result set can also be accessed easily. + + - `FACET`: the field represents a classification facet; these are strings + which will be matched exactly, but a list of all the facets present in + the result set can also be accessed easily - in addition, a suitable + subset of the facets, and a selection of the facet values, present in the + result set can be calculated. One optional parameter may be supplied: + + - 'type' is a value indicating the type of facet contained in the field: + + - 'string' - the facet values are exact binary strings. + - 'float' - the facet values are floating point numbers. + """ # See the class docstring for the meanings of the following constants. @@ -229,6 +262,8 @@ class FieldActions(object): INDEX_FREETEXT = 3 SORTABLE = 4 COLLAPSE = 5 + TAG = 6 + FACET = 7 # Sorting and collapsing store the data in a value, but the format depends # on the sort type. Easiest way to implement is to treat them as the same @@ -253,7 +288,10 @@ class FieldActions(object): FieldActions.INDEX_EXACT, FieldActions.INDEX_FREETEXT, FieldActions.SORTABLE, - FieldActions.COLLAPSE,): + FieldActions.COLLAPSE, + FieldActions.TAG, + FieldActions.FACET, + ): raise _errors.IndexerError("Unknown field action: %r" % action) info = self._action_info[action] @@ -312,7 +350,7 @@ class FieldActions(object): raise _errors.IndexerError("Field %r is already marked for " "sorting, with a different " "sort type" % self._fieldname) - + if self.NEED_PREFIX in info[3]: field_mappings.add_prefix(self._fieldname) if self.NEED_SLOT in info[3]: @@ -351,6 +389,8 @@ class FieldActions(object): SORTABLE: ('SORTABLE', ('type', ), None, (NEED_SLOT,), ), COLLAPSE: ('COLLAPSE', (), None, (NEED_SLOT,), ), SORT_AND_COLLAPSE: ('SORT_AND_COLLAPSE', ('type', ), _act_sort_and_collapse, (NEED_SLOT,), ), + TAG: ('TAG', (), _act_tag, (NEED_PREFIX,), ), + FACET: ('FACET', ('type', ), _act_facet, (NEED_PREFIX, NEED_SLOT,), ), } if __name__ == '__main__': diff --git a/secore/fieldmappings.py b/secore/fieldmappings.py index 3838ce5..7347448 100644 --- a/secore/fieldmappings.py +++ b/secore/fieldmappings.py @@ -76,6 +76,17 @@ class FieldMappings(object): num = num // 26 return 'X' + ''.join(res) + def get_fieldname_from_prefix(self, prefix): + """Get a fieldname from a prefix. + + If the prefix is not found, return None. + + """ + for key, val in self._prefixes.iteritems(): + if val == prefix: + return key + return None + def get_prefix(self, fieldname): """Get the prefix used for a given field name. diff --git a/secore/indexerconnection.py b/secore/indexerconnection.py index be82319..87fdd35 100644 --- a/secore/indexerconnection.py +++ b/secore/indexerconnection.py @@ -225,6 +225,56 @@ class IndexerConnection(object): xapdoc = document.prepare() self._index.replace_document('Q' + id, xapdoc) + def _make_synonym_key(self, original, field): + """Make a synonym key (ie, the term or group of terms to store in + xapian). + + """ + if field is not None: + prefix = self._field_mappings.get_prefix(field) + else: + prefix = '' + original = original.lower() + # Add the prefix to the start of each word. + return ' '.join((prefix + word for word in original.split(' '))) + + def add_synonym(self, original, synonym, field=None): + """Add a synonym to the index. + + - `original` is the word or words which will be synonym expanded in + searches (if multiple words are specified, each word should be + separated by a single space). + - `synonym` is a synonym for `original`. + - `field` is the field which this synonym is specific to. If no field + is specified, the synonym will be used for searches which are not + specific to any particular field. + + """ + key = self._make_synonym_key(original, field) + self._index.add_synonym(key, synonym.lower()) + + def remove_synonym(self, original, synonym, field=None): + """Remove a synonym from the index. + + - `field` is the field which this synonym is specific to. If no field + is specified, the synonym will be used for searches which are not + specific to any particular field. + + """ + key = self._make_synonym_key(original, field) + self._index.remove_synonym(key, synonym) + + def clear_synonyms(self, original, field=None): + """Remove all synonyms for a word (or phrase). + + - `field` is the field which this synonym is specific to. If no field + is specified, the synonym will be used for searches which are not + specific to any particular field. + + """ + key = self._make_synonym_key(original, field) + self._index.clear_synonyms(key) + def delete(self, id): """Delete a document from the search engine index. @@ -332,6 +382,32 @@ class IndexerConnection(object): result._doc = self._index.get_document(plitem.docid) return result + def iter_synonyms(self, prefix=""): + """Get an iterator over the synonyms. + + - `prefix`: if specified, only synonym keys with this prefix will be + returned. + + The iterator returns 2-tuples, in which the first item is the key (ie, + a 2-tuple holding the term or terms which will be synonym expanded, + followed by the fieldname specified (or None if no fieldname)), and the + second item is a tuple of strings holding the synonyms for the first + item. + + These return values are suitable for the dict() builtin, so you can + write things like: + + >>> conn = IndexerConnection('foo') + >>> conn.add_synonym('foo', 'bar') + >>> conn.add_synonym('foo bar', 'baz') + >>> conn.add_synonym('foo bar', 'foo baz') + >>> dict(conn.iter_synonyms()) + {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')} + + """ + return SynonymIter(self._index, self._field_mappings, prefix) + + class PrefixedTermIter(object): """Iterate through all the terms with a given prefix. @@ -340,7 +416,7 @@ class PrefixedTermIter(object): """Initialise the prefixed term iterator. - `prefix` is the prefix to return terms for. - - `termiter` is a xapian TermIterator, which should be at it's start. + - `termiter` is a xapian TermIterator, which should be at its start. """ @@ -364,7 +440,6 @@ class PrefixedTermIter(object): def next(self): """Get the next term with the specified prefix. - """ if not self._started: term = self._termiter.skip_to(self._prefix).term @@ -375,6 +450,45 @@ class PrefixedTermIter(object): raise StopIteration return term[self._prefixlen:] + +class SynonymIter(object): + """Iterate through a list of synonyms. + + """ + def __init__(self, index, field_mappings, prefix): + """Initialise the synonym iterator. + + - `index` is the index to get the synonyms from. + - `field_mappings` is the FieldMappings object for the iterator. + - `prefix` is the prefix to restrict the returned synonyms to. + + """ + self._index = index + self._field_mappings = field_mappings + self._syniter = self._index.synonym_keys(prefix) + + def __iter__(self): + return self + + def next(self): + """Get the next synonym. + + """ + synkey = self._syniter.next() + pos = 0 + for char in synkey: + if char.isupper(): pos += 1 + else: break + if pos == 0: + fieldname = None + terms = synkey + else: + prefix = synkey[:pos] + fieldname = self._field_mappings.get_fieldname_from_prefix(prefix) + terms = ' '.join((term[pos:] for term in synkey.split(' '))) + synval = tuple(self._index.synonyms(synkey)) + return ((terms, fieldname), synval) + if __name__ == '__main__': import doctest, sys doctest.testmod (sys.modules[__name__]) diff --git a/secore/marshall.py b/secore/marshall.py index ebcc71d..8f1caee 100644 --- a/secore/marshall.py +++ b/secore/marshall.py @@ -21,50 +21,14 @@ r"""marshall.py: Marshal values into strings __docformat__ = "restructuredtext en" import math - -def _long_to_base256_array(value, length, flip): - result = [] - for i in xrange(length): - n = value % 256 - if flip: n = 255 - n - result.insert(0, chr(n)) - value /= 256 - return result +import xapian def float_to_string(value): """Marshall a floating point number to a string which sorts in the appropriate manner. """ - mantissa, exponent = math.frexp(value) - sign = '1' - if mantissa < 0: - mantissa = -mantissa - sign = '0' - - # IEEE representation of doubles uses 11 bits for the exponent, with a bias - # of 1023. There's then another 52 bits in the mantissa, so we need to - # add 1075 to be sure that the exponent won't be negative. - # Even then, we check that the exponent isn't negative, and consider the - # value to be equal to zero if it is. - exponent += 1075 - if exponent < 0: # Note - this can't happen on most architectures #pragma: no cover - exponent = 0 - mantissa = 0 - elif mantissa == 0: - exponent = 0 - - # IEEE representation of doubles uses 52 bits for the mantissa. Convert it - # to a 7 character string, and convert the exponent to a 2 character - # string. - - mantissa = long(mantissa * (2**52)) - - digits = [sign] - digits.extend(_long_to_base256_array(exponent, 2, sign == '0')) - digits.extend(_long_to_base256_array(mantissa, 7, sign == '0')) - - return ''.join(digits) + return xapian.sortable_serialise(value) def date_to_string(date): """Marshall a date to a string which sorts in the appropriate manner. diff --git a/secore/searchconnection.py b/secore/searchconnection.py index 79fa509..f7caeab 100644 --- a/secore/searchconnection.py +++ b/secore/searchconnection.py @@ -20,14 +20,16 @@ r"""searchconnection.py: A connection to the search engine for searching. """ __docformat__ = "restructuredtext en" +import os as _os +import cPickle as _cPickle + import xapian as _xapian from datastructures import * from fieldactions import * import fieldmappings as _fieldmappings import highlight as _highlight import errors as _errors -import os as _os -import cPickle as _cPickle +import indexerconnection as _indexerconnection class SearchResult(ProcessedDocument): """A result from a search. @@ -42,7 +44,10 @@ class SearchResult(ProcessedDocument): """Get the language that should be used for a given field. """ - actions = self._results._conn._field_actions[field]._actions + try: + actions = self._results._conn._field_actions[field]._actions + except KeyError: + actions = {} for action, kwargslist in actions.iteritems(): if action == FieldActions.INDEX_FREETEXT: for kwargs in kwargslist: @@ -118,20 +123,24 @@ class SearchResultIter(object): def next(self): msetitem = self._iter.next() - return SearchResult(msetitem, - self._results) + return SearchResult(msetitem, self._results) class SearchResults(object): """A set of results of a search. """ - def __init__(self, conn, enq, query, mset, fieldmappings): + def __init__(self, conn, enq, query, mset, fieldmappings, tagspy, + facetspy, facetfields): self._conn = conn self._enq = enq self._query = query self._mset = mset self._fieldmappings = fieldmappings + self._tagspy = tagspy + self._facetspy = facetspy + self._facetfields = facetfields + self._numeric_ranges_built = {} def __repr__(self): return ("= maxfacets: + break + return result + + class SearchConnection(object): """A connection to the search engine for searching. The connection will access a view of the database. """ + _qp_flags_std = (_xapian.QueryParser.FLAG_PHRASE | + _xapian.QueryParser.FLAG_BOOLEAN | + _xapian.QueryParser.FLAG_LOVEHATE | + _xapian.QueryParser.FLAG_AUTO_SYNONYMS | + _xapian.QueryParser.FLAG_AUTO_MULTIWORD_SYNONYMS) + _qp_flags_nobool = (_qp_flags_std | _xapian.QueryParser.FLAG_BOOLEAN) ^ _xapian.QueryParser.FLAG_BOOLEAN def __init__(self, indexpath): """Create a new connection to the index for searching. @@ -252,7 +355,10 @@ class SearchConnection(object): """Get the sort type that should be used for a given field. """ - actions = self._field_actions[field]._actions + try: + actions = self._field_actions[field]._actions + except KeyError: + actions = {} for action, kwargslist in actions.iteritems(): if action == FieldActions.SORT_AND_COLLAPSE: for kwargs in kwargslist: @@ -266,6 +372,7 @@ class SearchConnection(object): # class. Move it to a shared location. config_file = _os.path.join(self._indexpath, 'config') if not _os.path.exists(config_file): + self._field_actions = {} self._field_mappings = _fieldmappings.FieldMappings() return fd = open(config_file) @@ -368,21 +475,35 @@ class SearchConnection(object): raise _errors.SearchError("SearchConnection has been closed") return _xapian.Query(operator, list(queries)) - def query_filter(self, query, filter): + def query_filter(self, query, filter, exclude=False): """Filter a query with another query. - Documents will only match the resulting query if they match both - queries, but will be weighted according to only the first query. + If exclude is False (or not specified), documents will only match the + resulting query if they match the both the first and second query: the + results of the first query are "filtered" to only include those which + also match the second query. + + If exclude is True, documents will only match the resulting query if + they match the first query, but not the second query: the results of + the first query are "filtered" to only include those which do not match + the second query. + + Documents will always be weighted according to only the first query. - `query`: The query to filter. - `filter`: The filter to apply to the query. + - `exclude`: If True, the sense of the filter is reversed - only + documents which do not match the second query will be returned. """ if self._index is None: raise _errors.SearchError("SearchConnection has been closed") if not isinstance(filter, _xapian.Query): raise _errors.SearchError("Filter must be a Xapian Query object") - return _xapian.Query(_xapian.Query.OP_FILTER, query, filter) + if exclude: + return _xapian.Query(_xapian.Query.OP_AND_NOT, query, filter) + else: + return _xapian.Query(_xapian.Query.OP_FILTER, query, filter) def query_range(self, field, begin, end): """Create a query for a range search. @@ -407,9 +528,61 @@ class SearchConnection(object): begin = fn(field, begin) end = fn(field, end) - slot = self._field_mappings.get_slot(field) + try: + slot = self._field_mappings.get_slot(field) + except KeyError: + return _xapian.Query() return _xapian.Query(_xapian.Query.OP_VALUE_RANGE, slot, begin, end) + def query_facet(self, field, val): + """Create a query for a facet value. + + This creates a query which matches only those documents which have a + facet value in the specified range. + + For a numeric range facet, val should be a tuple holding the start and + end of the range. For other facets, val should be the value to look + for. + + The start and end values are both inclusive - any documents with a + value equal to start or end will be returned (unless end is less than + start, in which case no documents will be returned). + + """ + if self._index is None: + raise _errors.SearchError("SearchConnection has been closed") + + try: + actions = self._field_actions[field]._actions + except KeyError: + actions = {} + facettype = None + for action, kwargslist in actions.iteritems(): + if action == FieldActions.FACET: + for kwargs in kwargslist: + facettype = kwargs.get('type', None) + if facettype is not None: + break + if facettype is not None: + break + + if facettype == 'float': + assert(len(val) == 2) + try: + slot = self._field_mappings.get_slot(field) + except KeyError: + return _xapian.Query() + marshaller = SortableMarshaller(False) + fn = marshaller.get_marshall_function(field, sorttype) + begin = fn(field, val[0]) + end = fn(field, val[1]) + return _xapian.Query(_xapian.Query.OP_VALUE_RANGE, slot, begin, end) + else: + assert(facettype == 'string' or facettype is None) + prefix = self._field_mappings.get_prefix(field) + return _xapian.Query(prefix + val.lower()) + + def _prepare_queryparser(self, allow, deny, default_op): """Prepare (and return) a query parser using the specified fields and operator. @@ -429,7 +602,10 @@ class SearchConnection(object): allow = [key for key in allow if key not in deny] for field in allow: - actions = self._field_actions[field]._actions + try: + actions = self._field_actions[field]._actions + except KeyError: + actions = {} for action, kwargslist in actions.iteritems(): if action == FieldActions.INDEX_EXACT: # FIXME - need patched version of xapian to add exact prefixes @@ -459,8 +635,11 @@ class SearchConnection(object): Only one of `allow` and `deny` may be specified. - If any of the entries in `allow` or `deny` are not present in the - configuration for the database, an exception will be raised. + If any of the entries in `allow` are not present in the configuration + for the database, or are not specified for indexing (either as + INDEX_EXACT or INDEX_FREETEXT), they will be ignored. If any of the + entries in `deny` are not present in the configuration for the + database, they will be ignored. Returns a Query object, which may be passed to the search() method, or combined with other queries. @@ -468,11 +647,11 @@ class SearchConnection(object): """ qp = self._prepare_queryparser(allow, deny, default_op) try: - return qp.parse_query(string) + return qp.parse_query(string, self._qp_flags_std) except _xapian.QueryParserError, e: # If we got a parse error, retry without boolean operators (since # these are the usual cause of the parse error). - return qp.parse_query(string, 0) + return qp.parse_query(string, self._qp_flags_nobool) def query_field(self, field, value, default_op=OP_AND): """A query for a single field. @@ -487,7 +666,9 @@ class SearchConnection(object): # need to check on field type, and stem / split as appropriate for action, kwargslist in actions.iteritems(): - if action == FieldActions.INDEX_EXACT: + if action in (FieldActions.INDEX_EXACT, + FieldActions.TAG, + FieldActions.FACET,): prefix = self._field_mappings.get_prefix(field) if len(value) > 0: chval = ord(value[0]) @@ -505,9 +686,7 @@ class SearchConnection(object): qp.set_stemming_strategy(qp.STEM_SOME) except KeyError: pass - return qp.parse_query(value, - qp.FLAG_PHRASE | qp.FLAG_BOOLEAN | qp.FLAG_LOVEHATE, - prefix) + return qp.parse_query(value, self._qp_flags_std, prefix) return _xapian.Query() @@ -528,12 +707,15 @@ class SearchConnection(object): Only one of `allow` and `deny` may be specified. - If any of the entries in `allow` or `deny` are not present in the - configuration for the database, an exception will be raised. + If any of the entries in `allow` are not present in the configuration + for the database, or are not specified for indexing (either as + INDEX_EXACT or INDEX_FREETEXT), they will be ignored. If any of the + entries in `deny` are not present in the configuration for the + database, they will be ignored. """ qp = self._prepare_queryparser(allow, deny, self.OP_AND) - qp.parse_query(string, qp.FLAG_PHRASE|qp.FLAG_BOOLEAN|qp.FLAG_LOVEHATE|qp.FLAG_SPELLING_CORRECTION) + qp.parse_query(string, self._qp_flags_std | qp.FLAG_SPELLING_CORRECTION) corrected = qp.get_corrected_query_string() if len(corrected) == 0: if isinstance(string, unicode): @@ -544,7 +726,9 @@ class SearchConnection(object): return corrected def search(self, query, startrank, endrank, - checkatleast=0, sortby=None, collapse=None): + checkatleast=0, sortby=None, collapse=None, + gettags=None, + getfacets=None, allowfacets=None, denyfacets=None): """Perform a search, for documents matching a query. - `query` is the query to perform. @@ -556,7 +740,10 @@ class SearchConnection(object): be returned. - `checkatleast` is the minimum number of results to check for: the estimate of the total number of matches will always be exact if - the number of matches is less than `checkatleast`. + the number of matches is less than `checkatleast`. A value of ``-1`` + can be specified for the checkatleast parameter - this has the + special meaning of "check all matches", and is equivalent to passing + the result of get_doccount(). - `sortby` is the name of a field to sort by. It may be preceded by a '+' or a '-' to indicate ascending or descending order (respectively). If the first character is neither '+' or '-', the @@ -564,10 +751,23 @@ class SearchConnection(object): - `collapse` is the name of a field to collapse the result documents on. If this is specified, there will be at most one result in the result set for each value of the field. + - `gettags` is the name of a field to count tag occurrences in, or a + list of fields to do so. + - `getfacets` is a boolean - if True, the matching documents will be + examined to build up a list of the facet values contained in them. + - `allowfacets` is a list of the fieldnames of facets to consider. + - `denyfacets` is a list of fieldnames of facets which will not be + considered. + + If neither 'allowfacets' or 'denyfacets' is specified, all fields + holding facets will be considered. """ if self._index is None: raise _errors.SearchError("SearchConnection has been closed") + if checkatleast == -1: + checkatleast = self._index.get_doccount() + enq = _xapian.Enquire(self._index) enq.set_query(query) @@ -602,16 +802,103 @@ class SearchConnection(object): # there are more matches. checkatleast = max(checkatleast, endrank + 1) + # Build the matchspy. + matchspies = [] + + # First, add a matchspy for any gettags fields + if isinstance(gettags, basestring): + if len(gettags) != 0: + gettags = [gettags] + tagspy = None + if gettags is not None and len(gettags) != 0: + tagspy = _xapian.TermCountMatchSpy() + for field in gettags: + try: + prefix = self._field_mappings.get_prefix(field) + tagspy.add_prefix(prefix) + except KeyError: + raise _errors.SearchError("Field %r was not indexed for tagging" % field) + matchspies.append(tagspy) + + + # add a matchspy for facet selection here. + facetspy = None + facetfields = [] + if getfacets: + if allowfacets is not None and denyfacets is not None: + raise _errors.SearchError("Cannot specify both `allowfacets` and `denyfacets`") + if allowfacets is None: + allowfacets = [key for key in self._field_actions] + if denyfacets is not None: + allowfacets = [key for key in allowfacets if key not in denyfacets] + + for field in allowfacets: + try: + actions = self._field_actions[field]._actions + except KeyError: + actions = {} + for action, kwargslist in actions.iteritems(): + if action == FieldActions.FACET: + slot = self._field_mappings.get_slot(field) + if facetspy is None: + facetspy = _xapian.CategorySelectMatchSpy() + facetspy.add_slot(slot) + facetfields.append((field, slot, + kwargslist)) + if facetspy is not None: + matchspies.append(facetspy) + + + # Finally, build a single matchspy to pass to get_mset(). + if len(matchspies) == 0: + matchspy = None + elif len(matchspies) == 1: + matchspy = matchspies[0] + else: + matchspy = _xapian.MultipleMatchDecider() + for spy in matchspies: + matchspy.append(spy) + enq.set_docid_order(enq.DONT_CARE) # Repeat the search until we don't get a DatabaseModifiedError while True: try: - mset = enq.get_mset(startrank, maxitems, checkatleast) + mset = enq.get_mset(startrank, maxitems, checkatleast, None, + None, matchspy) break except _xapian.DatabaseModifiedError, e: self.reopen() - return SearchResults(self, enq, query, mset, self._field_mappings) + return SearchResults(self, enq, query, mset, self._field_mappings, + tagspy, facetspy, facetfields) + + def iter_synonyms(self, prefix=""): + """Get an iterator over the synonyms. + + - `prefix`: if specified, only synonym keys with this prefix will be + returned. + + The iterator returns 2-tuples, in which the first item is the key (ie, + a 2-tuple holding the term or terms which will be synonym expanded, + followed by the fieldname specified (or None if no fieldname)), and the + second item is a tuple of strings holding the synonyms for the first + item. + + These return values are suitable for the dict() builtin, so you can + write things like: + + >>> conn = _indexerconnection.IndexerConnection('foo') + >>> conn.add_synonym('foo', 'bar') + >>> conn.add_synonym('foo bar', 'baz') + >>> conn.add_synonym('foo bar', 'foo baz') + >>> conn.flush() + >>> conn = SearchConnection('foo') + >>> dict(conn.iter_synonyms()) + {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')} + + """ + return _indexerconnection.SynonymIter(self._index, self._field_mappings, prefix) + if __name__ == '__main__': import doctest, sys -- cgit v0.9.1