Revert "updated secore to latest"

This reverts commit a31cd062c96fbbd337d181a33b3890d8c2b5f32b.
author: Benjamin Saller <bcsaller@objectrealms.net> 2007-07-30 05:03:00 (GMT)
committer: Benjamin Saller <bcsaller@objectrealms.net> 2007-07-30 05:03:00 (GMT)
commit: dfefc1f8623611f5ce2615a62679f349f7428f75 (patch)
tree: 0aaa8d025d2a35d311e9510a7fb8b565882af0fb
parent: a31cd062c96fbbd337d181a33b3890d8c2b5f32b (diff)
6 files changed, 71 insertions, 495 deletions
diff --git a/secore/datastructures.py b/secore/datastructures.py
index b7061fa..414625d 100644
--- a/secore/datastructures.py
+++ b/secore/datastructures.py
@@ -20,7 +20,6 @@ r"""datastructures.py: Datastructures for search engine core.
 """
 __docformat__ = "restructuredtext en"
 
-import errors as _errors
 import xapian as _xapian
 import cPickle as _cPickle
 
@@ -118,13 +117,6 @@ class ProcessedDocument(object):
             # of our locale.
             if ord(term[0]) >= ord('A') and ord(term[0]) <= ord('Z'):
                 prefix = prefix + ':'
-
-        if len(prefix + term) > 220:
-            raise _errors.IndexerError("Field %r is too long: maximum length "
-                                       "220 - was %d (%r)" %
-                                       (field, len(prefix + term),
-                                        prefix + term))
-
         if positions is None:
             self._doc.add_term(prefix + term, wdfinc)
         elif isinstance(positions, int):
diff --git a/secore/fieldactions.py b/secore/fieldactions.py
index 3de7cc9..c595f0b 100644
--- a/secore/fieldactions.py
+++ b/secore/fieldactions.py
@@ -42,22 +42,6 @@ def _act_index_exact(fieldname, doc, value, context):
     """
     doc.add_term(fieldname, value, 0)
 
-def _act_tag(fieldname, doc, value, context):
-    """Perform the TAG action.
-    
-    """
-    doc.add_term(fieldname, value.lower(), 0)
-
-def _act_facet(fieldname, doc, value, context, type=None):
-    """Perform the FACET action.
-    
-    """
-    marshaller = SortableMarshaller()
-    fn = marshaller.get_marshall_function(fieldname, type)
-    doc.add_term(fieldname, value.lower(), 0)
-    value = fn(fieldname, value)
-    doc.add_value(fieldname, value)
-
 def _act_index_freetext(fieldname, doc, value, context, weight=1, 
                         language=None, stop=None, spell=False,
                         nopos=False, noprefix=False):
@@ -226,7 +210,7 @@ class FieldActions(object):
         - 'string' - sort in lexicographic (ie, alphabetical) order.
           This is the default, used if no type is set.
         - 'float' - treat the values as (decimal representations of) floating
-          point numbers, and sort in numerical order.  The values in the field
+          point numbers, and sort in numerical order .  The values in the field
           must be valid floating point numbers (according to Python's float()
           function).
         - 'date' - sort in date order.  The values must be valid dates (either
@@ -237,23 +221,6 @@ class FieldActions(object):
       "collapse" result sets, such that only the highest result with each value
       of the field will be returned.
 
-    - `TAG`: the field contains tags; these are strings, which will be matched
-      in a case insensitive way, but otherwise must be exact matches.  Tag
-      fields can be searched for by making an explict query (ie, using
-      query_field(), but not with query_parse()).  A list of the most frequent
-      tags in a result set can also be accessed easily.
-
-    - `FACET`: the field represents a classification facet; these are strings
-      which will be matched exactly, but a list of all the facets present in
-      the result set can also be accessed easily - in addition, a suitable
-      subset of the facets, and a selection of the facet values, present in the
-      result set can be calculated.  One optional parameter may be supplied:
-
-      - 'type' is a value indicating the type of facet contained in the field:
-
-        - 'string' - the facet values are exact binary strings.
-        - 'float' - the facet values are floating point numbers.
-
     """
 
     # See the class docstring for the meanings of the following constants.
@@ -262,8 +229,6 @@ class FieldActions(object):
     INDEX_FREETEXT = 3
     SORTABLE = 4 
     COLLAPSE = 5
-    TAG = 6
-    FACET = 7
 
     # Sorting and collapsing store the data in a value, but the format depends
     # on the sort type.  Easiest way to implement is to treat them as the same
@@ -288,10 +253,7 @@ class FieldActions(object):
                           FieldActions.INDEX_EXACT,
                           FieldActions.INDEX_FREETEXT,
                           FieldActions.SORTABLE,
-                          FieldActions.COLLAPSE,
-                          FieldActions.TAG,
-                          FieldActions.FACET,
-                         ):
+                          FieldActions.COLLAPSE,):
             raise _errors.IndexerError("Unknown field action: %r" % action)
 
         info = self._action_info[action]
@@ -350,7 +312,7 @@ class FieldActions(object):
                     raise _errors.IndexerError("Field %r is already marked for "
                                                "sorting, with a different "
                                                "sort type" % self._fieldname)
-
+        
         if self.NEED_PREFIX in info[3]:
             field_mappings.add_prefix(self._fieldname)
         if self.NEED_SLOT in info[3]:
@@ -389,8 +351,6 @@ class FieldActions(object):
         SORTABLE: ('SORTABLE', ('type', ), None, (NEED_SLOT,), ),
         COLLAPSE: ('COLLAPSE', (), None, (NEED_SLOT,), ),
         SORT_AND_COLLAPSE: ('SORT_AND_COLLAPSE', ('type', ), _act_sort_and_collapse, (NEED_SLOT,), ),
-        TAG: ('TAG', (), _act_tag, (NEED_PREFIX,), ),
-        FACET: ('FACET', ('type', ), _act_facet, (NEED_PREFIX, NEED_SLOT,), ),
     }
 
 if __name__ == '__main__':
diff --git a/secore/fieldmappings.py b/secore/fieldmappings.py
index 7347448..3838ce5 100644
--- a/secore/fieldmappings.py
+++ b/secore/fieldmappings.py
@@ -76,17 +76,6 @@ class FieldMappings(object):
             num = num // 26
         return 'X' + ''.join(res)
 
-    def get_fieldname_from_prefix(self, prefix):
-        """Get a fieldname from a prefix.
-
-        If the prefix is not found, return None.
-
-        """
-        for key, val in self._prefixes.iteritems():
-            if val == prefix:
-                return key
-        return None
-
     def get_prefix(self, fieldname):
         """Get the prefix used for a given field name.
 
diff --git a/secore/indexerconnection.py b/secore/indexerconnection.py
index 87fdd35..be82319 100644
--- a/secore/indexerconnection.py
+++ b/secore/indexerconnection.py
@@ -225,56 +225,6 @@ class IndexerConnection(object):
         xapdoc = document.prepare()
         self._index.replace_document('Q' + id, xapdoc)
 
-    def _make_synonym_key(self, original, field):
-        """Make a synonym key (ie, the term or group of terms to store in
-        xapian).
-
-        """
-        if field is not None:
-            prefix = self._field_mappings.get_prefix(field)
-        else:
-            prefix = ''
-        original = original.lower()
-        # Add the prefix to the start of each word.
-        return ' '.join((prefix + word for word in original.split(' ')))
-
-    def add_synonym(self, original, synonym, field=None):
-        """Add a synonym to the index.
-
-         - `original` is the word or words which will be synonym expanded in
-           searches (if multiple words are specified, each word should be
-           separated by a single space).
-         - `synonym` is a synonym for `original`.
-         - `field` is the field which this synonym is specific to.  If no field
-           is specified, the synonym will be used for searches which are not
-           specific to any particular field.
-
-        """
-        key = self._make_synonym_key(original, field)
-        self._index.add_synonym(key, synonym.lower())
-
-    def remove_synonym(self, original, synonym, field=None):
-        """Remove a synonym from the index.
-
-         - `field` is the field which this synonym is specific to.  If no field
-           is specified, the synonym will be used for searches which are not
-           specific to any particular field.
-
-        """
-        key = self._make_synonym_key(original, field)
-        self._index.remove_synonym(key, synonym)
-
-    def clear_synonyms(self, original, field=None):
-        """Remove all synonyms for a word (or phrase).
-
-         - `field` is the field which this synonym is specific to.  If no field
-           is specified, the synonym will be used for searches which are not
-           specific to any particular field.
-
-        """
-        key = self._make_synonym_key(original, field)
-        self._index.clear_synonyms(key)
-
     def delete(self, id):
         """Delete a document from the search engine index.
 
@@ -382,32 +332,6 @@ class IndexerConnection(object):
         result._doc = self._index.get_document(plitem.docid)
         return result
 
-    def iter_synonyms(self, prefix=""):
-        """Get an iterator over the synonyms.
-
-         - `prefix`: if specified, only synonym keys with this prefix will be
-           returned.
-
-        The iterator returns 2-tuples, in which the first item is the key (ie,
-        a 2-tuple holding the term or terms which will be synonym expanded,
-        followed by the fieldname specified (or None if no fieldname)), and the
-        second item is a tuple of strings holding the synonyms for the first
-        item.
-
-        These return values are suitable for the dict() builtin, so you can
-        write things like:
-
-         >>> conn = IndexerConnection('foo')
-         >>> conn.add_synonym('foo', 'bar')
-         >>> conn.add_synonym('foo bar', 'baz')
-         >>> conn.add_synonym('foo bar', 'foo baz')
-         >>> dict(conn.iter_synonyms())
-         {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')}
-
-        """
-        return SynonymIter(self._index, self._field_mappings, prefix)
-
-
 class PrefixedTermIter(object):
     """Iterate through all the terms with a given prefix.
 
@@ -416,7 +340,7 @@ class PrefixedTermIter(object):
         """Initialise the prefixed term iterator.
 
         - `prefix` is the prefix to return terms for.
-        - `termiter` is a xapian TermIterator, which should be at its start.
+        - `termiter` is a xapian TermIterator, which should be at it's start.
 
         """
 
@@ -440,6 +364,7 @@ class PrefixedTermIter(object):
     def next(self):
         """Get the next term with the specified prefix.
 
+
         """
         if not self._started:
             term = self._termiter.skip_to(self._prefix).term
@@ -450,45 +375,6 @@ class PrefixedTermIter(object):
             raise StopIteration
         return term[self._prefixlen:]
 
-
-class SynonymIter(object):
-    """Iterate through a list of synonyms.
-
-    """
-    def __init__(self, index, field_mappings, prefix):
-        """Initialise the synonym iterator.
-
-         - `index` is the index to get the synonyms from.
-         - `field_mappings` is the FieldMappings object for the iterator.
-         - `prefix` is the prefix to restrict the returned synonyms to.
-
-        """
-        self._index = index
-        self._field_mappings = field_mappings
-        self._syniter = self._index.synonym_keys(prefix)
-
-    def __iter__(self):
-        return self
-
-    def next(self):
-        """Get the next synonym.
-
-        """
-        synkey = self._syniter.next()
-        pos = 0
-        for char in synkey:
-            if char.isupper(): pos += 1
-            else: break
-        if pos == 0:
-            fieldname = None
-            terms = synkey
-        else:
-            prefix = synkey[:pos]
-            fieldname = self._field_mappings.get_fieldname_from_prefix(prefix)
-            terms = ' '.join((term[pos:] for term in synkey.split(' ')))
-        synval = tuple(self._index.synonyms(synkey))
-        return ((terms, fieldname), synval)
-
 if __name__ == '__main__':
     import doctest, sys
     doctest.testmod (sys.modules[__name__])
diff --git a/secore/marshall.py b/secore/marshall.py
index 8f1caee..ebcc71d 100644
--- a/secore/marshall.py
+++ b/secore/marshall.py
@@ -21,14 +21,50 @@ r"""marshall.py: Marshal values into strings
 __docformat__ = "restructuredtext en"
 
 import math
-import xapian
+
+def _long_to_base256_array(value, length, flip):
+    result = []
+    for i in xrange(length):
+        n = value % 256
+        if flip: n = 255 - n
+        result.insert(0, chr(n))
+        value /= 256
+    return result
 
 def float_to_string(value):
     """Marshall a floating point number to a string which sorts in the
     appropriate manner.
 
     """
-    return xapian.sortable_serialise(value)
+    mantissa, exponent = math.frexp(value)
+    sign = '1'
+    if mantissa < 0:
+        mantissa = -mantissa
+        sign = '0'
+
+    # IEEE representation of doubles uses 11 bits for the exponent, with a bias
+    # of 1023.  There's then another 52 bits in the mantissa, so we need to
+    # add 1075 to be sure that the exponent won't be negative.
+    # Even then, we check that the exponent isn't negative, and consider the
+    # value to be equal to zero if it is.
+    exponent += 1075
+    if exponent < 0: # Note - this can't happen on most architectures #pragma: no cover
+        exponent = 0
+        mantissa = 0
+    elif mantissa == 0:
+        exponent = 0
+
+    # IEEE representation of doubles uses 52 bits for the mantissa.  Convert it
+    # to a 7 character string, and convert the exponent to a 2 character
+    # string.
+
+    mantissa = long(mantissa * (2**52))
+
+    digits = [sign]
+    digits.extend(_long_to_base256_array(exponent, 2, sign == '0'))
+    digits.extend(_long_to_base256_array(mantissa, 7, sign == '0'))
+
+    return ''.join(digits)
 
 def date_to_string(date):
     """Marshall a date to a string which sorts in the appropriate manner.
diff --git a/secore/searchconnection.py b/secore/searchconnection.py
index f7caeab..79fa509 100644
--- a/secore/searchconnection.py
+++ b/secore/searchconnection.py
@@ -20,16 +20,14 @@ r"""searchconnection.py: A connection to the search engine for searching.
 """
 __docformat__ = "restructuredtext en"
 
-import os as _os
-import cPickle as _cPickle
-
 import xapian as _xapian
 from datastructures import *
 from fieldactions import *
 import fieldmappings as _fieldmappings
 import highlight as _highlight 
 import errors as _errors
-import indexerconnection as _indexerconnection
+import os as _os
+import cPickle as _cPickle
 
 class SearchResult(ProcessedDocument):
     """A result from a search.
@@ -44,10 +42,7 @@ class SearchResult(ProcessedDocument):
         """Get the language that should be used for a given field.
 
         """
-        try:
-            actions = self._results._conn._field_actions[field]._actions
-        except KeyError:
-            actions = {}
+        actions = self._results._conn._field_actions[field]._actions
         for action, kwargslist in actions.iteritems():
             if action == FieldActions.INDEX_FREETEXT:
                 for kwargs in kwargslist:
@@ -123,24 +118,20 @@ class SearchResultIter(object):
 
     def next(self):
         msetitem = self._iter.next()
-        return SearchResult(msetitem, self._results)
+        return SearchResult(msetitem,
+                            self._results)
 
 
 class SearchResults(object):
     """A set of results of a search.
 
     """
-    def __init__(self, conn, enq, query, mset, fieldmappings, tagspy,
-                 facetspy, facetfields):
+    def __init__(self, conn, enq, query, mset, fieldmappings):
         self._conn = conn
         self._enq = enq
         self._query = query
         self._mset = mset
         self._fieldmappings = fieldmappings
-        self._tagspy = tagspy
-        self._facetspy = facetspy
-        self._facetfields = facetfields
-        self._numeric_ranges_built = {}
 
     def __repr__(self):
         return ("<SearchResults(startrank=%d, "
@@ -234,106 +225,12 @@ class SearchResults(object):
         """
         return SearchResultIter(self)
 
-    def get_top_tags(self, field, maxtags):
-        """Get the most frequent tags in a given field.
-
-         - `field` - the field to get tags for.  This must have been specified
-           in the "gettags" argument of the search() call.
-         - `maxtags` - the maximum number of tags to return.
-
-        Returns a sequence of 2-item tuples, in which the first item in the
-        tuple is the tag, and the second is the frequency of the tag in the
-        matches seen (as an integer).
-
-        """
-        if self._tagspy is None:
-            raise _errors.SearchError("Field %r was not specified for getting tags" % field)
-        try:
-            prefix = self._conn._field_mappings.get_prefix(field)
-        except KeyError:
-            raise _errors.SearchError("Field %r was not indexed for tagging" % field)
-        return self._tagspy.get_top_terms(prefix, maxtags)
-
-    def get_suggested_facets(self, maxfacets=5, desired_num_of_categories=7):
-        """Get a suggested set of facets, to present to the user.
-
-        This returns a list, in descending order of the usefulness of the
-        facet, in which each item is a tuple holding:
-
-         - fieldname of facet.
-         - sequence of 2-tuples holding the suggested values or ranges for that
-           field:
-
-           For facets of type 'string', the first item in the 2-tuple will
-           simply be the string supplied when the facet value was added to its
-           document.  For facets of type 'float', it will be a 2-tuple, holding
-           floats giving the start and end of the suggested value range.
-
-           The second item in the 2-tuple will be the frequency of the facet
-           value or range in the result set.
-
-        """
-        if self._facetspy is None:
-            return []
-        scores = []
-        facettypes = {}
-        for field, slot, kwargslist in self._facetfields:
-            type = None
-            for kwargs in kwargslist:
-                type = kwargs.get('type', None)
-                if type is not None: break
-            if type is None: type = 'string'
-
-            if type == 'float':
-                if field not in self._numeric_ranges_built:
-                    field, self._facetspy.build_numeric_ranges(slot, desired_num_of_categories)
-                    self._numeric_ranges_built[field] = None
-            facettypes[field] = type
-            score = self._facetspy.score_categorisation(slot,
-                                                        desired_num_of_categories)
-            scores.append((score, field, slot))
-        scores.sort()
-
-        result = []
-        for score, field, slot in scores:
-            values = self._facetspy.get_values_as_dict(slot)
-            if len(values) <= 1:
-                continue
-            newvalues = []
-            if facettypes[field] == 'float':
-                # Convert numbers to python numbers, and number ranges to a
-                # python tuple of two numbers.
-                for value, frequency in values.iteritems():
-                    if len(value) <= 9:
-                        value1 = _xapian.sortable_unserialise(value)
-                        value2 = value1
-                    else:
-                        value1 = _xapian.sortable_unserialise(value[:9])
-                        value2 = _xapian.sortable_unserialise(value[9:])
-                    newvalues.append(((value1, value2), frequency))
-            else:
-                for value, frequency in values.iteritems():
-                    newvalues.append((value, frequency))
-                
-            newvalues.sort()
-            result.append((field, newvalues))
-            if len(result) >= maxfacets:
-                break
-        return result
-        
-
 class SearchConnection(object):
     """A connection to the search engine for searching.
 
     The connection will access a view of the database.
 
     """
-    _qp_flags_std = (_xapian.QueryParser.FLAG_PHRASE |
-                     _xapian.QueryParser.FLAG_BOOLEAN |
-                     _xapian.QueryParser.FLAG_LOVEHATE |
-                     _xapian.QueryParser.FLAG_AUTO_SYNONYMS |
-                     _xapian.QueryParser.FLAG_AUTO_MULTIWORD_SYNONYMS)
-    _qp_flags_nobool = (_qp_flags_std | _xapian.QueryParser.FLAG_BOOLEAN) ^ _xapian.QueryParser.FLAG_BOOLEAN
 
     def __init__(self, indexpath):
         """Create a new connection to the index for searching.
@@ -355,10 +252,7 @@ class SearchConnection(object):
         """Get the sort type that should be used for a given field.
 
         """
-        try:
-            actions = self._field_actions[field]._actions
-        except KeyError:
-            actions = {}
+        actions = self._field_actions[field]._actions
         for action, kwargslist in actions.iteritems():
             if action == FieldActions.SORT_AND_COLLAPSE:
                 for kwargs in kwargslist:
@@ -372,7 +266,6 @@ class SearchConnection(object):
         # class.  Move it to a shared location.
         config_file = _os.path.join(self._indexpath, 'config')
         if not _os.path.exists(config_file):
-            self._field_actions = {}
             self._field_mappings = _fieldmappings.FieldMappings()
             return
         fd = open(config_file)
@@ -475,35 +368,21 @@ class SearchConnection(object):
             raise _errors.SearchError("SearchConnection has been closed")
         return _xapian.Query(operator, list(queries))
 
-    def query_filter(self, query, filter, exclude=False):
+    def query_filter(self, query, filter):
         """Filter a query with another query.
 
-        If exclude is False (or not specified), documents will only match the
-        resulting query if they match the both the first and second query: the
-        results of the first query are "filtered" to only include those which
-        also match the second query.
-
-        If exclude is True, documents will only match the resulting query if
-        they match the first query, but not the second query: the results of
-        the first query are "filtered" to only include those which do not match
-        the second query.
-        
-        Documents will always be weighted according to only the first query.
+        Documents will only match the resulting query if they match both
+        queries, but will be weighted according to only the first query.
 
         - `query`: The query to filter.
         - `filter`: The filter to apply to the query.
-        - `exclude`: If True, the sense of the filter is reversed - only
-          documents which do not match the second query will be returned. 
 
         """
         if self._index is None:
             raise _errors.SearchError("SearchConnection has been closed")
         if not isinstance(filter, _xapian.Query):
             raise _errors.SearchError("Filter must be a Xapian Query object")
-        if exclude:
-            return _xapian.Query(_xapian.Query.OP_AND_NOT, query, filter)
-        else:
-            return _xapian.Query(_xapian.Query.OP_FILTER, query, filter)
+        return _xapian.Query(_xapian.Query.OP_FILTER, query, filter)
 
     def query_range(self, field, begin, end):
         """Create a query for a range search.
@@ -528,61 +407,9 @@ class SearchConnection(object):
         begin = fn(field, begin)
         end = fn(field, end)
 
-        try:
-            slot = self._field_mappings.get_slot(field)
-        except KeyError:
-            return _xapian.Query()
+        slot = self._field_mappings.get_slot(field)
         return _xapian.Query(_xapian.Query.OP_VALUE_RANGE, slot, begin, end)
 
-    def query_facet(self, field, val):
-        """Create a query for a facet value.
-        
-        This creates a query which matches only those documents which have a
-        facet value in the specified range.
-
-        For a numeric range facet, val should be a tuple holding the start and
-        end of the range.  For other facets, val should be the value to look
-        for.
-
-        The start and end values are both inclusive - any documents with a
-        value equal to start or end will be returned (unless end is less than
-        start, in which case no documents will be returned).
-
-        """
-        if self._index is None:
-            raise _errors.SearchError("SearchConnection has been closed")
-
-        try:
-            actions = self._field_actions[field]._actions
-        except KeyError:
-            actions = {}
-        facettype = None
-        for action, kwargslist in actions.iteritems():
-            if action == FieldActions.FACET:
-                for kwargs in kwargslist:
-                    facettype = kwargs.get('type', None)
-                    if facettype is not None:
-                        break
-            if facettype is not None:
-                break
-
-        if facettype == 'float':
-            assert(len(val) == 2)
-            try:
-                slot = self._field_mappings.get_slot(field)
-            except KeyError:
-                return _xapian.Query()
-            marshaller = SortableMarshaller(False)
-            fn = marshaller.get_marshall_function(field, sorttype)
-            begin = fn(field, val[0])
-            end = fn(field, val[1])
-            return _xapian.Query(_xapian.Query.OP_VALUE_RANGE, slot, begin, end)
-        else:
-            assert(facettype == 'string' or facettype is None)
-            prefix = self._field_mappings.get_prefix(field)
-            return _xapian.Query(prefix + val.lower())
-
-
     def _prepare_queryparser(self, allow, deny, default_op):
         """Prepare (and return) a query parser using the specified fields and
         operator.
@@ -602,10 +429,7 @@ class SearchConnection(object):
             allow = [key for key in allow if key not in deny]
 
         for field in allow:
-            try:
-                actions = self._field_actions[field]._actions
-            except KeyError:
-                actions = {}
+            actions = self._field_actions[field]._actions
             for action, kwargslist in actions.iteritems():
                 if action == FieldActions.INDEX_EXACT:
                     # FIXME - need patched version of xapian to add exact prefixes
@@ -635,11 +459,8 @@ class SearchConnection(object):
 
         Only one of `allow` and `deny` may be specified.
 
-        If any of the entries in `allow` are not present in the configuration
-        for the database, or are not specified for indexing (either as
-        INDEX_EXACT or INDEX_FREETEXT), they will be ignored.  If any of the
-        entries in `deny` are not present in the configuration for the
-        database, they will be ignored.
+        If any of the entries in `allow` or `deny` are not present in the
+        configuration for the database, an exception will be raised.
 
         Returns a Query object, which may be passed to the search() method, or
         combined with other queries.
@@ -647,11 +468,11 @@ class SearchConnection(object):
         """
         qp = self._prepare_queryparser(allow, deny, default_op)
         try:
-            return qp.parse_query(string, self._qp_flags_std)
+            return qp.parse_query(string)
         except _xapian.QueryParserError, e:
             # If we got a parse error, retry without boolean operators (since
             # these are the usual cause of the parse error).
-            return qp.parse_query(string, self._qp_flags_nobool)
+            return qp.parse_query(string, 0)
 
     def query_field(self, field, value, default_op=OP_AND):
         """A query for a single field.
@@ -666,9 +487,7 @@ class SearchConnection(object):
 
         # need to check on field type, and stem / split as appropriate
         for action, kwargslist in actions.iteritems():
-            if action in (FieldActions.INDEX_EXACT,
-                          FieldActions.TAG,
-                          FieldActions.FACET,):
+            if action == FieldActions.INDEX_EXACT:
                 prefix = self._field_mappings.get_prefix(field)
                 if len(value) > 0:
                     chval = ord(value[0])
@@ -686,7 +505,9 @@ class SearchConnection(object):
                         qp.set_stemming_strategy(qp.STEM_SOME)
                     except KeyError:
                         pass
-                return qp.parse_query(value, self._qp_flags_std, prefix)
+                return qp.parse_query(value,
+                                      qp.FLAG_PHRASE | qp.FLAG_BOOLEAN | qp.FLAG_LOVEHATE,
+                                      prefix)
 
         return _xapian.Query()
 
@@ -707,15 +528,12 @@ class SearchConnection(object):
 
         Only one of `allow` and `deny` may be specified.
 
-        If any of the entries in `allow` are not present in the configuration
-        for the database, or are not specified for indexing (either as
-        INDEX_EXACT or INDEX_FREETEXT), they will be ignored.  If any of the
-        entries in `deny` are not present in the configuration for the
-        database, they will be ignored.
+        If any of the entries in `allow` or `deny` are not present in the
+        configuration for the database, an exception will be raised.
 
         """
         qp = self._prepare_queryparser(allow, deny, self.OP_AND)
-        qp.parse_query(string, self._qp_flags_std | qp.FLAG_SPELLING_CORRECTION)
+        qp.parse_query(string, qp.FLAG_PHRASE|qp.FLAG_BOOLEAN|qp.FLAG_LOVEHATE|qp.FLAG_SPELLING_CORRECTION)
         corrected = qp.get_corrected_query_string()
         if len(corrected) == 0:
             if isinstance(string, unicode):
@@ -726,9 +544,7 @@ class SearchConnection(object):
         return corrected
 
     def search(self, query, startrank, endrank,
-               checkatleast=0, sortby=None, collapse=None,
-               gettags=None,
-               getfacets=None, allowfacets=None, denyfacets=None):
+               checkatleast=0, sortby=None, collapse=None):
         """Perform a search, for documents matching a query.
 
         - `query` is the query to perform.
@@ -740,10 +556,7 @@ class SearchConnection(object):
           be returned.
         - `checkatleast` is the minimum number of results to check for: the
           estimate of the total number of matches will always be exact if
-          the number of matches is less than `checkatleast`.  A value of ``-1``
-          can be specified for the checkatleast parameter - this has the
-          special meaning of "check all matches", and is equivalent to passing
-          the result of get_doccount().
+          the number of matches is less than `checkatleast`.
         - `sortby` is the name of a field to sort by.  It may be preceded by a
           '+' or a '-' to indicate ascending or descending order
           (respectively).  If the first character is neither '+' or '-', the
@@ -751,23 +564,10 @@ class SearchConnection(object):
         - `collapse` is the name of a field to collapse the result documents
           on.  If this is specified, there will be at most one result in the
           result set for each value of the field.
-        - `gettags` is the name of a field to count tag occurrences in, or a
-          list of fields to do so.
-        - `getfacets` is a boolean - if True, the matching documents will be
-          examined to build up a list of the facet values contained in them.
-        - `allowfacets` is a list of the fieldnames of facets to consider.
-        - `denyfacets` is a list of fieldnames of facets which will not be
-          considered.
-
-        If neither 'allowfacets' or 'denyfacets' is specified, all fields
-        holding facets will be considered.
 
         """
         if self._index is None:
             raise _errors.SearchError("SearchConnection has been closed")
-        if checkatleast == -1:
-            checkatleast = self._index.get_doccount()
-
         enq = _xapian.Enquire(self._index)
         enq.set_query(query)
 
@@ -802,103 +602,16 @@ class SearchConnection(object):
         # there are more matches.
         checkatleast = max(checkatleast, endrank + 1)
 
-        # Build the matchspy.
-        matchspies = []
-
-        # First, add a matchspy for any gettags fields
-        if isinstance(gettags, basestring):
-            if len(gettags) != 0:
-                gettags = [gettags]
-        tagspy = None
-        if gettags is not None and len(gettags) != 0:
-            tagspy = _xapian.TermCountMatchSpy()
-            for field in gettags:
-                try:
-                    prefix = self._field_mappings.get_prefix(field)
-                    tagspy.add_prefix(prefix)
-                except KeyError:
-                    raise _errors.SearchError("Field %r was not indexed for tagging" % field)
-            matchspies.append(tagspy)
-
-
-        # add a matchspy for facet selection here.
-        facetspy = None
-        facetfields = []
-        if getfacets:
-            if allowfacets is not None and denyfacets is not None:
-                raise _errors.SearchError("Cannot specify both `allowfacets` and `denyfacets`")
-            if allowfacets is None:
-                allowfacets = [key for key in self._field_actions]
-            if denyfacets is not None:
-                allowfacets = [key for key in allowfacets if key not in denyfacets]
-
-            for field in allowfacets:
-                try:
-                    actions = self._field_actions[field]._actions
-                except KeyError:
-                    actions = {}
-                for action, kwargslist in actions.iteritems():
-                    if action == FieldActions.FACET:
-                        slot = self._field_mappings.get_slot(field)
-                        if facetspy is None:
-                            facetspy = _xapian.CategorySelectMatchSpy()
-                        facetspy.add_slot(slot)
-                        facetfields.append((field, slot,
-                                            kwargslist))
-        if facetspy is not None:
-            matchspies.append(facetspy)
-
-
-        # Finally, build a single matchspy to pass to get_mset().
-        if len(matchspies) == 0:
-            matchspy = None
-        elif len(matchspies) == 1:
-            matchspy = matchspies[0]
-        else:
-            matchspy = _xapian.MultipleMatchDecider()
-            for spy in matchspies:
-                matchspy.append(spy)
-
         enq.set_docid_order(enq.DONT_CARE)
 
         # Repeat the search until we don't get a DatabaseModifiedError
         while True:
             try:
-                mset = enq.get_mset(startrank, maxitems, checkatleast, None,
-                                    None, matchspy)
+                mset = enq.get_mset(startrank, maxitems, checkatleast)
                 break
             except _xapian.DatabaseModifiedError, e:
                 self.reopen()
-        return SearchResults(self, enq, query, mset, self._field_mappings,
-                             tagspy, facetspy, facetfields)
-
-    def iter_synonyms(self, prefix=""):
-        """Get an iterator over the synonyms.
-
-         - `prefix`: if specified, only synonym keys with this prefix will be
-           returned.
-
-        The iterator returns 2-tuples, in which the first item is the key (ie,
-        a 2-tuple holding the term or terms which will be synonym expanded,
-        followed by the fieldname specified (or None if no fieldname)), and the
-        second item is a tuple of strings holding the synonyms for the first
-        item.
-
-        These return values are suitable for the dict() builtin, so you can
-        write things like:
-
-         >>> conn = _indexerconnection.IndexerConnection('foo')
-         >>> conn.add_synonym('foo', 'bar')
-         >>> conn.add_synonym('foo bar', 'baz')
-         >>> conn.add_synonym('foo bar', 'foo baz')
-         >>> conn.flush()
-         >>> conn = SearchConnection('foo')
-         >>> dict(conn.iter_synonyms())
-         {('foo', None): ('bar',), ('foo bar', None): ('baz', 'foo baz')}
-
-        """
-        return _indexerconnection.SynonymIter(self._index, self._field_mappings, prefix)
-
+        return SearchResults(self, enq, query, mset, self._field_mappings)
 
 if __name__ == '__main__':
     import doctest, sys
author	Benjamin Saller <bcsaller@objectrealms.net>	2007-07-30 05:03:00 (GMT)
committer	Benjamin Saller <bcsaller@objectrealms.net>	2007-07-30 05:03:00 (GMT)
commit	dfefc1f8623611f5ce2615a62679f349f7428f75 (patch)
tree	0aaa8d025d2a35d311e9510a7fb8b565882af0fb
parent	a31cd062c96fbbd337d181a33b3890d8c2b5f32b (diff)