From ec0d7091fc814555dfd2acb850bc4912d4378a2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jorge=20C=2E=20Leit=C3=A3o?= <jorgecarleitao@gmail.com>
Date: Thu, 22 May 2014 10:06:24 +0200
Subject: [PATCH] Added missing changes of previous commit.

---
 xapian_backend.py | 84 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 22 deletions(-)
diff --git a/xapian_backend.py b/xapian_backend.py
index 7584809..a680763 100755
--- a/xapian_backend.py
+++ b/xapian_backend.py
@@ -261,23 +261,64 @@ class XapianSearchBackend(BaseSearchBackend):
             if self.include_spelling is True:
                 term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
 
-            def add_text(termpos, prefix, term, weight):
-                term_generator.set_termpos(termpos + 1)
-                term_generator.index_text(term, weight)
-                term_generator.index_text(term, weight, prefix)
+            def _add_text(termpos, text, weight, prefix=''):
+                """
+                indexes text appending 2 extra terms
+                to identify beginning and ending of the text.
+                """
+                start_term = '%s^' % prefix
+                end_term = '%s$' % prefix
+                # add begin
+                document.add_posting(start_term, termpos, weight)
+                # add text
+                term_generator.index_text(text, weight, prefix)
+                termpos = term_generator.get_termpos()
+                # add ending
+                termpos += 1
+                document.add_posting(end_term, termpos, weight)
+
+                # increase termpos
+                term_generator.set_termpos(termpos)
                 term_generator.increase_termpos()
+
                 return term_generator.get_termpos()
 
+            def add_text(termpos, prefix, text, weight):
+                """
+                Adds text to the document with positional information
+                and processing (e.g. stemming).
+                """
+                termpos = _add_text(termpos, text, weight, prefix=prefix)
+                termpos = _add_text(termpos, text, weight, prefix='')
+                return termpos
+
             for obj in iterable:
                 document = xapian.Document()
                 term_generator.set_document(document)
 
-                def add_to_document(prefix, term, weight):
-                    document.add_term('%s' % term, weight)
-                    document.add_term(prefix + term, weight)
-                    document.add_term(prefix + '^%s$' % term, weight)
+                def add_to_document(prefix, sentence, weight):
+                    """
+                    Adds sentence to the document without positional information
+                    and without processing.
+
+                    If the term is alone, also adds it as "^<term>$"
+                    to allow exact matches on single terms.
+                    """
+                    if len(sentence.split()) > 1:
+                        for term in sentence.split():
+                            document.add_term(term, weight)
+                            document.add_term(prefix + term, weight)
+                    else:
+                        document.add_term(sentence, weight)
+                        document.add_term(prefix + sentence, weight)
+                        document.add_term("^%s$" % sentence, weight)
+                        document.add_term(prefix + "^%s$" % sentence, weight)
 
                 def add_datetime_to_document(termpos, prefix, term, weight):
+                    """
+                    Adds a datetime to document with positional order
+                    to allow exact matches on it.
+                    """
                     date, time = term.split()
                     document.add_posting(date, termpos, weight)
                     termpos += 1
@@ -292,11 +333,10 @@ class XapianSearchBackend(BaseSearchBackend):
                 data = index.full_prepare(obj)
                 weights = index.get_field_weights()
 
-                termpos = 0
+                termpos = term_generator.get_termpos()  # identifies the current position in the document.
                 for field in self.schema:
-                    termpos += 1
-                    # not supported fields are ignored.
                     if field['field_name'] not in list(data.keys()):
+                        # not supported fields are ignored.
                         continue
 
                     if field['field_name'] in weights:
@@ -305,10 +345,11 @@ class XapianSearchBackend(BaseSearchBackend):
                         weight = 1
 
                     value = data[field['field_name']]
-                    # Private fields are indexed in a different way:
-                    # `django_id` is an int and `django_ct` is text;
-                    # besides, they are indexed by their (unstemmed) value.
+
                     if field['field_name'] in ('id', 'django_id', 'django_ct'):
+                        # Private fields are indexed in a different way:
+                        # `django_id` is an int and `django_ct` is text;
+                        # besides, they are indexed by their (unstemmed) value.
                         if field['field_name'] == 'django_id':
                             value = int(value)
                         value = _term_to_xapian_value(value, field['type'])
@@ -327,10 +368,8 @@ class XapianSearchBackend(BaseSearchBackend):
                             for t in value:
                                 # add the exact match of each value
                                 term = _to_xapian_term(t)
+                                termpos = add_text(termpos, prefix, term, weight)
                                 add_to_document(prefix, term, weight)
-                                # index each value with positional information
-                                if ' ' in term:
-                                    termpos = add_text(termpos, prefix, term, weight)
                             continue
 
                         term = _to_xapian_term(value)
@@ -342,8 +381,8 @@ class XapianSearchBackend(BaseSearchBackend):
                             termpos = add_text(termpos, prefix, term, weight)
                         elif field['type'] == 'datetime':
                             termpos = add_datetime_to_document(termpos, prefix, term, weight)
-                        elif term != "":
-                            # all other terms are added without positional information
+                        if term != "":
+                            # other non-sentence terms are added without positional information
                             add_to_document(prefix, term, weight)
 
                 # store data without indexing it
@@ -1273,6 +1312,7 @@ class XapianSearchQuery(BaseSearchQuery):
         # the ideal would be to use the same idea as in _filter_contains.
         # However, it causes tests to fail.
         if field_type == 'text' and ' ' in term:
+            term = '^ %s $' % term
             query = self._phrase_query(term.split(), field_name, field_type)
         else:
             query = self._term_query(term, field_name, field_type, exact=True, stemmed=False)
@@ -1333,8 +1373,9 @@ class XapianSearchQuery(BaseSearchQuery):
             assert not exact
 
         constructor = '{prefix}{term}'
-        # ^{term}$ is for boolean match of the term
-        if exact:
+        # "" is to do a boolean match, but only works on indexed terms
+        # (constraint on Xapian side)
+        if exact and field_type == 'text':
             constructor = '{prefix}^{term}$'
 
         # construct the prefix to be used.
@@ -1353,7 +1394,6 @@ class XapianSearchQuery(BaseSearchQuery):
         # we construct the query dates in a slightly different way
         if field_type == 'datetime':
             date, time = term.split()
-            constructor = '{prefix}{term}'
             return xapian.Query(xapian.Query.OP_AND_MAYBE,
                                 constructor.format(prefix=prefix, term=date),
                                 constructor.format(prefix=prefix, term=time)