mirror of
https://github.com/Hopiu/xapian-haystack.git
synced 2026-04-21 15:14:50 +00:00
Added docstring for _do_query_facets
This commit is contained in:
parent
1a8e6d24b6
commit
648ca3a3d3
1 changed files with 150 additions and 136 deletions
|
|
@ -49,7 +49,7 @@ class XHValueRangeProcessor(xapian.ValueRangeProcessor):
|
|||
def __init__(self, sb):
|
||||
self.sb = sb
|
||||
xapian.ValueRangeProcessor.__init__(self)
|
||||
|
||||
|
||||
def __call__(self, begin, end):
|
||||
"""
|
||||
Construct a tuple for value range processing.
|
||||
|
|
@ -90,15 +90,15 @@ class SearchBackend(BaseSearchBackend):
|
|||
"""
|
||||
`SearchBackend` defines the Xapian search backend for use with the Haystack
|
||||
API for Django search.
|
||||
|
||||
|
||||
It uses the Xapian Python bindings to interface with Xapian, and as
|
||||
such is subject to this bug: <http://trac.xapian.org/ticket/364> when
|
||||
Django is running with mod_python or mod_wsgi under Apache.
|
||||
|
||||
|
||||
Until this issue has been fixed by Xapian, it is neccessary to set
|
||||
`WSGIApplicationGroup to %{GLOBAL}` when using mod_wsgi, or
|
||||
`PythonInterpreter main_interpreter` when using mod_python.
|
||||
|
||||
|
||||
In order to use this backend, `HAYSTACK_XAPIAN_PATH` must be set in
|
||||
your settings. This should point to a location where you would your
|
||||
indexes to reside.
|
||||
|
|
@ -111,66 +111,66 @@ class SearchBackend(BaseSearchBackend):
|
|||
'NEAR',
|
||||
'ADJ',
|
||||
)
|
||||
|
||||
|
||||
RESERVED_CHARACTERS = (
|
||||
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
|
||||
'\\', '+', '-', '&&', '||', '!', '(', ')', '{', '}',
|
||||
'[', ']', '^', '"', '~', '*', '?', ':',
|
||||
)
|
||||
|
||||
|
||||
def __init__(self, site=None, stemming_language='english'):
|
||||
"""
|
||||
Instantiates an instance of `SearchBackend`.
|
||||
|
||||
|
||||
Optional arguments:
|
||||
`site` -- The site to associate the backend with (default = None)
|
||||
`stemming_language` -- The stemming language (default = 'english')
|
||||
|
||||
|
||||
Also sets the stemming language to be used to `stemming_language`.
|
||||
"""
|
||||
super(SearchBackend, self).__init__(site)
|
||||
|
||||
|
||||
if not hasattr(settings, 'HAYSTACK_XAPIAN_PATH'):
|
||||
raise ImproperlyConfigured('You must specify a HAYSTACK_XAPIAN_PATH in your settings.')
|
||||
|
||||
|
||||
if not os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
|
||||
os.makedirs(settings.HAYSTACK_XAPIAN_PATH)
|
||||
|
||||
|
||||
self.stemmer = xapian.Stem(stemming_language)
|
||||
|
||||
|
||||
def get_identifier(self, obj_or_string):
|
||||
return DOCUMENT_ID_TERM_PREFIX + super(SearchBackend, self).get_identifier(obj_or_string)
|
||||
|
||||
|
||||
def update(self, index, iterable):
|
||||
"""
|
||||
Updates the `index` with any objects in `iterable` by adding/updating
|
||||
the database as needed.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`index` -- The `SearchIndex` to process
|
||||
`iterable` -- An iterable of model instances to index
|
||||
|
||||
|
||||
For each object in `iterable`, a document is created containing all
|
||||
of the terms extracted from `index.prepare(obj)` with stemming prefixes,
|
||||
field prefixes, and 'as-is'.
|
||||
|
||||
|
||||
eg. `content:Testing` ==> `testing, Ztest, ZXCONTENTtest`
|
||||
|
||||
|
||||
Each document also contains an extra term in the format:
|
||||
|
||||
`XCONTENTTYPE<app_name>.<model_name>`
|
||||
|
||||
As well as a unique identifier in the the format:
|
||||
|
||||
|
||||
`Q<app_name>.<model_name>.<pk>`
|
||||
|
||||
|
||||
eg.: foo.bar (pk=1) ==> `Qfoo.bar.1`, `XCONTENTTYPEfoo.bar`
|
||||
|
||||
This is useful for querying for a specific document corresponding to
|
||||
a model instance.
|
||||
|
||||
|
||||
The document also contains a pickled version of the object itself and
|
||||
the document ID in the document data field.
|
||||
|
||||
|
||||
Finally, we also store field values to be used for sorting data. We
|
||||
store these in the document value slots (position zero is reserver
|
||||
for the document ID). All values are stored as unicode strings with
|
||||
|
|
@ -184,7 +184,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
term_generator = self._term_generator(database, document)
|
||||
document_id = self.get_identifier(obj)
|
||||
model_data = index.prepare(obj)
|
||||
|
||||
|
||||
for field in self.schema:
|
||||
if field['field_name'] in model_data.keys():
|
||||
prefix = DOCUMENT_CUSTOM_TERM_PREFIX + field['field_name'].upper()
|
||||
|
|
@ -192,40 +192,40 @@ class SearchBackend(BaseSearchBackend):
|
|||
term_generator.index_text(force_unicode(value))
|
||||
term_generator.index_text(force_unicode(value), 1, prefix)
|
||||
document.add_value(field['column'], self._marshal_value(value))
|
||||
|
||||
|
||||
document.set_data(pickle.dumps(
|
||||
(obj._meta.app_label, obj._meta.module_name, obj.pk, model_data),
|
||||
(obj._meta.app_label, obj._meta.module_name, obj.pk, model_data),
|
||||
pickle.HIGHEST_PROTOCOL
|
||||
))
|
||||
document.add_term(document_id)
|
||||
document.add_term(
|
||||
DOCUMENT_CT_TERM_PREFIX + u'%s.%s' %
|
||||
DOCUMENT_CT_TERM_PREFIX + u'%s.%s' %
|
||||
(obj._meta.app_label, obj._meta.module_name)
|
||||
)
|
||||
database.replace_document(document_id, document)
|
||||
|
||||
|
||||
except UnicodeDecodeError:
|
||||
sys.stderr.write('Chunk failed.\n')
|
||||
pass
|
||||
|
||||
|
||||
def remove(self, obj):
|
||||
"""
|
||||
Remove indexes for `obj` from the database.
|
||||
|
||||
|
||||
We delete all instances of `Q<app_name>.<model_name>.<pk>` which
|
||||
should be unique to this object.
|
||||
"""
|
||||
database = self._database(writable=True)
|
||||
database.delete_document(self.get_identifier(obj))
|
||||
|
||||
|
||||
def clear(self, models=[]):
|
||||
"""
|
||||
Clear all instances of `models` from the database or all models, if
|
||||
not specified.
|
||||
|
||||
|
||||
Optional Arguments:
|
||||
`models` -- Models to clear from the database (default = [])
|
||||
|
||||
|
||||
If `models` is empty, an empty query is executed which matches all
|
||||
documents in the database. Afterwards, each match is deleted.
|
||||
|
||||
|
|
@ -242,19 +242,19 @@ class SearchBackend(BaseSearchBackend):
|
|||
else:
|
||||
for model in models:
|
||||
database.delete_document(
|
||||
DOCUMENT_CT_TERM_PREFIX + '%s.%s' %
|
||||
DOCUMENT_CT_TERM_PREFIX + '%s.%s' %
|
||||
(model._meta.app_label, model._meta.module_name)
|
||||
)
|
||||
|
||||
|
||||
def search(self, query_string, sort_by=None, start_offset=0, end_offset=DEFAULT_MAX_RESULTS,
|
||||
fields='', highlight=False, facets=None, date_facets=None, query_facets=None,
|
||||
narrow_queries=None, boost=None, **kwargs):
|
||||
"""
|
||||
Executes the search as defined in `query_string`.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`query_string` -- Search query to execute
|
||||
|
||||
|
||||
Optional arguments:
|
||||
`sort_by` -- Sort results by specified field (default = None)
|
||||
`start_offset` -- Slice results from `start_offset` (default = 0)
|
||||
|
|
@ -266,7 +266,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
`query_facets` -- Facet results on queries (default = None)
|
||||
`narrow_queries` -- Narrow queries (default = None)
|
||||
`boost` -- Dictionary of terms and weights to boost results
|
||||
|
||||
|
||||
Returns:
|
||||
A dictionary with the following keys:
|
||||
`results` -- A list of `SearchResult`
|
||||
|
|
@ -276,13 +276,13 @@ class SearchBackend(BaseSearchBackend):
|
|||
`dates` -- A list of date facets
|
||||
`queries` -- A list of query facets
|
||||
If faceting was not used, the `facets` key will not be present
|
||||
|
||||
|
||||
If `query_string` is empty, returns no results.
|
||||
|
||||
Otherwise, loads the available fields from the database meta data schema
|
||||
and sets up prefixes for each one along with a prefix for `django_ct`,
|
||||
used to filter by model, and loads the current stemmer instance.
|
||||
|
||||
|
||||
Afterwards, executes the Xapian query parser to create a query from
|
||||
`query_string` that is then passed to a new `enquire` instance.
|
||||
|
||||
|
|
@ -299,20 +299,20 @@ class SearchBackend(BaseSearchBackend):
|
|||
'results': [],
|
||||
'hits': 0,
|
||||
}
|
||||
|
||||
|
||||
if query_facets is not None:
|
||||
warnings.warn("Query faceting has not been implemented yet.", Warning, stacklevel=2)
|
||||
|
||||
|
||||
database = self._database()
|
||||
query, spelling_suggestion = self._query(
|
||||
database, query_string, narrow_queries, boost
|
||||
)
|
||||
enquire = self._enquire(database, query)
|
||||
|
||||
|
||||
if sort_by:
|
||||
sorter = self._sorter(sort_by)
|
||||
enquire.set_sort_by_key_then_relevance(sorter, True)
|
||||
|
||||
|
||||
results = []
|
||||
facets_dict = {
|
||||
'fields': {},
|
||||
|
|
@ -320,7 +320,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
'queries': {},
|
||||
}
|
||||
matches = enquire.get_mset(start_offset, end_offset)
|
||||
|
||||
|
||||
for match in matches:
|
||||
document = match.get_document()
|
||||
app_label, module_name, pk, model_data = pickle.loads(document.get_data())
|
||||
|
|
@ -337,19 +337,19 @@ class SearchBackend(BaseSearchBackend):
|
|||
results.append(
|
||||
SearchResult(app_label, module_name, pk, match.weight, **model_data)
|
||||
)
|
||||
|
||||
|
||||
if date_facets:
|
||||
facets_dict['dates'] = self._do_date_facets(results, date_facets)
|
||||
if query_facets:
|
||||
facets_dict['queries'] = self._do_query_facets(results, query_facets)
|
||||
|
||||
|
||||
return {
|
||||
'results': results,
|
||||
'hits': matches.get_matches_estimated(),
|
||||
'facets': facets_dict,
|
||||
'spelling_suggestion': spelling_suggestion,
|
||||
}
|
||||
|
||||
|
||||
def delete_index(self):
|
||||
"""
|
||||
Delete the index.
|
||||
|
|
@ -358,7 +358,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
"""
|
||||
if os.path.exists(settings.HAYSTACK_XAPIAN_PATH):
|
||||
shutil.rmtree(settings.HAYSTACK_XAPIAN_PATH)
|
||||
|
||||
|
||||
def document_count(self):
|
||||
"""
|
||||
Retrieves the total document count for the search index.
|
||||
|
|
@ -368,7 +368,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
except xapian.DatabaseOpeningError:
|
||||
return 0
|
||||
return database.get_doccount()
|
||||
|
||||
|
||||
def more_like_this(self, model_instance):
|
||||
"""
|
||||
Given a model instance, returns a result set of similar documents.
|
||||
|
|
@ -376,21 +376,21 @@ class SearchBackend(BaseSearchBackend):
|
|||
Required arguments:
|
||||
`model_instance` -- The model instance to use as a basis for
|
||||
retrieving similar documents.
|
||||
|
||||
|
||||
Returns:
|
||||
A dictionary with the following keys:
|
||||
`results` -- A list of `SearchResult`
|
||||
`hits` -- The total available results
|
||||
|
||||
|
||||
Opens a database connection, then builds a simple query using the
|
||||
`model_instance` to build the unique identifier.
|
||||
|
||||
|
||||
For each document retrieved(should always be one), adds an entry into
|
||||
an RSet (relevance set) with the document id, then, uses the RSet
|
||||
to query for an ESet (A set of terms that can be used to suggest
|
||||
expansions to the original query), omitting any document that was in
|
||||
the original query.
|
||||
|
||||
|
||||
Finally, processes the resulting matches and returns.
|
||||
"""
|
||||
database = self._database()
|
||||
|
|
@ -406,7 +406,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
xapian.Query.OP_AND_NOT, [query, self.get_identifier(model_instance)]
|
||||
)
|
||||
enquire.set_query(query)
|
||||
|
||||
|
||||
results = []
|
||||
matches = enquire.get_mset(0, DEFAULT_MAX_RESULTS)
|
||||
|
||||
|
|
@ -416,7 +416,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
results.append(
|
||||
SearchResult(app_label, module_name, pk, match.weight, **model_data)
|
||||
)
|
||||
|
||||
|
||||
return {
|
||||
'results': results,
|
||||
'hits': matches.get_matches_estimated(),
|
||||
|
|
@ -427,7 +427,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
},
|
||||
'spelling_suggestion': None,
|
||||
}
|
||||
|
||||
|
||||
def _do_highlight(self, content, text, tag='em'):
|
||||
"""
|
||||
Highlight `text` in `content` with html `tag`.
|
||||
|
|
@ -444,18 +444,18 @@ class SearchBackend(BaseSearchBackend):
|
|||
term_re = re.compile(re.escape(term), re.IGNORECASE)
|
||||
content = term_re.sub('<%s>%s</%s>' % (tag, term, tag), content)
|
||||
return content
|
||||
|
||||
|
||||
def _do_field_facets(self, document, facets, fields):
|
||||
"""
|
||||
Private method that facets a document by field name.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`document` -- The document to parse
|
||||
`facets` -- A list of facets to use when faceting
|
||||
`fields` -- A list of fields that have already been faceted. This
|
||||
will be extended with any new field names and counts
|
||||
found in the `document`.
|
||||
|
||||
|
||||
For each term in the document, extract the field name and determine
|
||||
if it is one of the `facets` we want. If so, verify if it already in
|
||||
the `fields` list. If it is, update the count, otherwise, add it and
|
||||
|
|
@ -469,16 +469,16 @@ class SearchBackend(BaseSearchBackend):
|
|||
else:
|
||||
fields[match.group(1).lower()] = [(match.group(2), term[1])]
|
||||
return fields
|
||||
|
||||
|
||||
def _do_date_facets(self, results, date_facets):
|
||||
"""
|
||||
Private method that facets a document by date ranges
|
||||
|
||||
|
||||
Required arguments:
|
||||
`results` -- A list SearchResults to facet
|
||||
`date_facets` -- A dictionary containg facet parameters:
|
||||
`date_facets` -- A dictionary containing facet parameters:
|
||||
{'field': {'start_date': ..., 'end_date': ...: 'gap': '...'}}
|
||||
nb., gap must satisfy the regex:
|
||||
nb., gap must satisfy the regex:
|
||||
(?P<type>year|month|day|hour|minute|second+)s?=?(?P<value>\d*)
|
||||
|
||||
For each date facet field in `date_facets`, generates a list
|
||||
|
|
@ -529,9 +529,9 @@ class SearchBackend(BaseSearchBackend):
|
|||
date_range += datetime.timedelta(minutes=int(gap_value))
|
||||
elif gap_type == 'second':
|
||||
date_range += datetime.timedelta(seconds=int(gap_value))
|
||||
|
||||
|
||||
facet_list = sorted(facet_list, key=lambda n:n[0], reverse=True)
|
||||
|
||||
|
||||
for result in results:
|
||||
result_date = getattr(result, date_facet)
|
||||
if result_date:
|
||||
|
|
@ -545,19 +545,33 @@ class SearchBackend(BaseSearchBackend):
|
|||
if result_date > datetime.datetime.strptime(facet_date[0], '%Y-%m-%dT%H:%M:%S'):
|
||||
facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1))
|
||||
break
|
||||
|
||||
|
||||
facet_dict[date_facet] = facet_list
|
||||
|
||||
|
||||
return facet_dict
|
||||
|
||||
|
||||
def _do_query_facets(self, results, query_facets):
|
||||
"""
|
||||
Private method that facets a document by query
|
||||
|
||||
Required arguments:
|
||||
`results` -- A list SearchResults to facet
|
||||
`query_facets` -- A dictionary containing facet parameters:
|
||||
{'field': 'query', [...]}
|
||||
|
||||
For each query in `query_facets`, generates a dictionary entry with
|
||||
the field name as the key and a tuple with the query and result count
|
||||
as the value.
|
||||
|
||||
eg. {'name': ('a*', 5)}
|
||||
"""
|
||||
facet_dict = {}
|
||||
|
||||
for field, query in query_facets.iteritems():
|
||||
facet_dict[field] = (query, self.search(query)['hits'])
|
||||
|
||||
return facet_dict
|
||||
|
||||
|
||||
def _marshal_value(self, value):
|
||||
"""
|
||||
Private method that converts Python values to a string for Xapian values.
|
||||
|
|
@ -565,12 +579,12 @@ class SearchBackend(BaseSearchBackend):
|
|||
if isinstance(value, datetime.datetime):
|
||||
if value.microsecond:
|
||||
value = u'%04d%02d%02d%02d%02d%02d%06d' % (
|
||||
value.year, value.month, value.day, value.hour,
|
||||
value.year, value.month, value.day, value.hour,
|
||||
value.minute, value.second, value.microsecond
|
||||
)
|
||||
else:
|
||||
value = u'%04d%02d%02d%02d%02d%02d' % (
|
||||
value.year, value.month, value.day, value.hour,
|
||||
value.year, value.month, value.day, value.hour,
|
||||
value.minute, value.second
|
||||
)
|
||||
elif isinstance(value, datetime.date):
|
||||
|
|
@ -585,39 +599,39 @@ class SearchBackend(BaseSearchBackend):
|
|||
else:
|
||||
value = force_unicode(value)
|
||||
return value
|
||||
|
||||
|
||||
def _database(self, writable=False):
|
||||
"""
|
||||
Private method that returns a xapian.Database for use and sets up
|
||||
schema and content_field definitions.
|
||||
|
||||
|
||||
Optional arguments:
|
||||
``writable`` -- Open the database in read/write mode (default=False)
|
||||
|
||||
|
||||
Returns an instance of a xapian.Database or xapian.WritableDatabase
|
||||
"""
|
||||
if writable:
|
||||
self.content_field_name, fields = self.site.build_unified_schema()
|
||||
self.schema = self._build_schema(fields)
|
||||
|
||||
|
||||
database = xapian.WritableDatabase(settings.HAYSTACK_XAPIAN_PATH, xapian.DB_CREATE_OR_OPEN)
|
||||
database.set_metadata('schema', pickle.dumps(self.schema, pickle.HIGHEST_PROTOCOL))
|
||||
database.set_metadata('content', pickle.dumps(self.content_field_name, pickle.HIGHEST_PROTOCOL))
|
||||
else:
|
||||
database = xapian.Database(settings.HAYSTACK_XAPIAN_PATH)
|
||||
|
||||
|
||||
self.schema = pickle.loads(database.get_metadata('schema'))
|
||||
self.content_field_name = pickle.loads(database.get_metadata('content'))
|
||||
|
||||
|
||||
return database
|
||||
|
||||
|
||||
def _term_generator(self, database, document):
|
||||
"""
|
||||
Private method that returns a Xapian.TermGenerator
|
||||
|
||||
|
||||
Required Argument:
|
||||
`document` -- The document to be indexed
|
||||
|
||||
|
||||
Returns a Xapian.TermGenerator instance. If `HAYSTACK_INCLUDE_SPELLING`
|
||||
is True, then the term generator will have spell-checking enabled.
|
||||
"""
|
||||
|
|
@ -628,7 +642,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
term_generator.set_flags(xapian.TermGenerator.FLAG_SPELLING)
|
||||
term_generator.set_document(document)
|
||||
return term_generator
|
||||
|
||||
|
||||
def _query(self, database, query_string, narrow_queries=None, boost=None):
|
||||
"""
|
||||
Private method that takes a query string and returns a xapian.Query.
|
||||
|
|
@ -644,7 +658,7 @@ class SearchBackend(BaseSearchBackend):
|
|||
setup as pulled from the `query_string`.
|
||||
"""
|
||||
spelling_suggestion = None
|
||||
|
||||
|
||||
if query_string == '*':
|
||||
query = xapian.Query('') # Make '*' match everything
|
||||
else:
|
||||
|
|
@ -655,13 +669,13 @@ class SearchBackend(BaseSearchBackend):
|
|||
query = qp.parse_query(query_string, flags)
|
||||
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
|
||||
spelling_suggestion = qp.get_corrected_query_string()
|
||||
|
||||
|
||||
if narrow_queries:
|
||||
subqueries = [
|
||||
qp.parse_query(narrow_query, flags) for narrow_query in narrow_queries
|
||||
]
|
||||
query = xapian.Query(
|
||||
xapian.Query.OP_FILTER,
|
||||
xapian.Query.OP_FILTER,
|
||||
query, xapian.Query(xapian.Query.OP_AND, subqueries)
|
||||
)
|
||||
if boost:
|
||||
|
|
@ -674,17 +688,17 @@ class SearchBackend(BaseSearchBackend):
|
|||
xapian.Query.OP_OR, query,
|
||||
xapian.Query(xapian.Query.OP_AND, subqueries)
|
||||
)
|
||||
|
||||
|
||||
return query, spelling_suggestion
|
||||
|
||||
|
||||
def _sorter(self, sort_by):
|
||||
"""
|
||||
Private methos that takes a list of fields to sort by and returns a
|
||||
Private methos that takes a list of fields to sort by and returns a
|
||||
xapian.MultiValueSorter
|
||||
|
||||
|
||||
Required Arguments:
|
||||
`sort_by` -- A list of fields to sort by
|
||||
|
||||
|
||||
Returns a xapian.MultiValueSorter instance
|
||||
"""
|
||||
sorter = xapian.MultiValueSorter()
|
||||
|
|
@ -696,9 +710,9 @@ class SearchBackend(BaseSearchBackend):
|
|||
else:
|
||||
reverse = False # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311
|
||||
sorter.add(self._value_column(sort_field), reverse)
|
||||
|
||||
|
||||
return sorter
|
||||
|
||||
|
||||
def _flags(self):
|
||||
"""
|
||||
Returns the commonly used Xapian.QueryParser flags
|
||||
|
|
@ -712,14 +726,14 @@ class SearchBackend(BaseSearchBackend):
|
|||
if getattr(settings, 'HAYSTACK_INCLUDE_SPELLING', False) is True:
|
||||
flags = flags | xapian.QueryParser.FLAG_SPELLING_CORRECTION
|
||||
return flags
|
||||
|
||||
|
||||
def _query_parser(self, database):
|
||||
"""
|
||||
Private method that returns a Xapian.QueryParser instance.
|
||||
|
||||
|
||||
Required arguments:
|
||||
`database` -- The database to be queried
|
||||
|
||||
|
||||
The query parser returned will have stemming enabled, a boolean prefix
|
||||
for `django_ct`, and prefixes for all of the fields in the `self.schema`.
|
||||
"""
|
||||
|
|
@ -730,19 +744,19 @@ class SearchBackend(BaseSearchBackend):
|
|||
qp.add_boolean_prefix('django_ct', DOCUMENT_CT_TERM_PREFIX)
|
||||
for field_dict in self.schema:
|
||||
qp.add_prefix(
|
||||
field_dict['field_name'],
|
||||
field_dict['field_name'],
|
||||
DOCUMENT_CUSTOM_TERM_PREFIX + field_dict['field_name'].upper()
|
||||
)
|
||||
return qp
|
||||
|
||||
|
||||
def _enquire(self, database, query):
|
||||
"""
|
||||
Private method that that returns a Xapian.Enquire instance for use with
|
||||
the specifed `query`.
|
||||
|
||||
|
||||
Required Arguments:
|
||||
`query` -- The query to run
|
||||
|
||||
|
||||
Returns a xapian.Enquire instance
|
||||
"""
|
||||
enquire = xapian.Enquire(database)
|
||||
|
|
@ -750,14 +764,14 @@ class SearchBackend(BaseSearchBackend):
|
|||
enquire.set_docid_order(enquire.ASCENDING)
|
||||
|
||||
return enquire
|
||||
|
||||
|
||||
def _build_schema(self, fields):
|
||||
"""
|
||||
Private method to build a schema.
|
||||
|
||||
|
||||
Required arguments:
|
||||
``fields`` -- A list of fields in the index
|
||||
|
||||
|
||||
Returns a list of fields in dictionary format ready for inclusion in
|
||||
an indexed meta-data.
|
||||
"""
|
||||
|
|
@ -769,15 +783,15 @@ class SearchBackend(BaseSearchBackend):
|
|||
n += 1
|
||||
schema.append(field)
|
||||
return schema
|
||||
|
||||
|
||||
def _value_column(self, field):
|
||||
"""
|
||||
Private method that returns the column value slot in the database
|
||||
for a given field.
|
||||
|
||||
|
||||
Required arguemnts:
|
||||
`field` -- The field to lookup
|
||||
|
||||
|
||||
Returns an integer with the column location (0 indexed).
|
||||
"""
|
||||
for field_dict in self.schema:
|
||||
|
|
@ -790,55 +804,55 @@ class SearchQuery(BaseSearchQuery):
|
|||
"""
|
||||
`SearchQuery` is responsible for converting search queries into a format
|
||||
that Xapian can understand.
|
||||
|
||||
|
||||
Most of the work is done by the :method:`build_query`.
|
||||
"""
|
||||
def __init__(self, backend=None):
|
||||
"""
|
||||
Create a new instance of the SearchQuery setting the backend as
|
||||
specified. If no backend is set, will use the Xapian `SearchBackend`.
|
||||
|
||||
|
||||
Optional arguments:
|
||||
`backend` -- The `SearchBackend` to use (default = None)
|
||||
"""
|
||||
super(SearchQuery, self).__init__(backend=backend)
|
||||
self.backend = backend or SearchBackend()
|
||||
|
||||
|
||||
def build_query(self):
|
||||
"""
|
||||
Builds a search query from previously set values, returning a query
|
||||
string in a format ready for use by the Xapian `SearchBackend`.
|
||||
|
||||
|
||||
Returns:
|
||||
A query string suitable for parsing by Xapian.
|
||||
"""
|
||||
query = ''
|
||||
|
||||
|
||||
if not self.query_filters:
|
||||
query = '*'
|
||||
else:
|
||||
query_chunks = []
|
||||
|
||||
|
||||
for the_filter in self.query_filters:
|
||||
if the_filter.is_and():
|
||||
query_chunks.append('AND')
|
||||
|
||||
if the_filter.is_not():
|
||||
query_chunks.append('NOT')
|
||||
|
||||
|
||||
if the_filter.is_or():
|
||||
query_chunks.append('OR')
|
||||
|
||||
|
||||
value = the_filter.value
|
||||
|
||||
|
||||
if not isinstance(value, (list, tuple)):
|
||||
# Convert whatever we find to what xapian wants.
|
||||
value = self.backend._marshal_value(value)
|
||||
|
||||
|
||||
# Check to see if it's a phrase for an exact match.
|
||||
if ' ' in value:
|
||||
value = '"%s"' % value
|
||||
|
||||
|
||||
# 'content' is a special reserved word, much like 'pk' in
|
||||
# Django's ORM layer. It indicates 'no special field'.
|
||||
if the_filter.field == 'content':
|
||||
|
|
@ -852,33 +866,33 @@ class SearchQuery(BaseSearchQuery):
|
|||
'lt': "NOT %s:%s..*",
|
||||
'startswith': "%s:%s*",
|
||||
}
|
||||
|
||||
|
||||
if the_filter.filter_type != 'in':
|
||||
query_chunks.append(filter_types[the_filter.filter_type] % (the_filter.field, value))
|
||||
else:
|
||||
in_options = []
|
||||
|
||||
|
||||
for possible_value in value:
|
||||
in_options.append("%s:%s" % (the_filter.field, possible_value))
|
||||
|
||||
|
||||
query_chunks.append("(%s)" % " OR ".join(in_options))
|
||||
|
||||
|
||||
if query_chunks[0] in ('AND', 'OR'):
|
||||
# Pull off an undesirable leading "AND" or "OR".
|
||||
del(query_chunks[0])
|
||||
|
||||
|
||||
query = " ".join(query_chunks)
|
||||
|
||||
|
||||
if len(self.models):
|
||||
models = ['django_ct:%s.%s' % (model._meta.app_label, model._meta.module_name) for model in self.models]
|
||||
models_clause = ' '.join(models)
|
||||
final_query = '(%s) %s' % (query, models_clause)
|
||||
|
||||
|
||||
else:
|
||||
final_query = query
|
||||
|
||||
return final_query
|
||||
|
||||
return final_query
|
||||
|
||||
def run(self):
|
||||
"""
|
||||
Builds and executes the query. Returns a list of search results.
|
||||
|
|
@ -887,52 +901,52 @@ class SearchQuery(BaseSearchQuery):
|
|||
kwargs = {
|
||||
'start_offset': self.start_offset,
|
||||
}
|
||||
|
||||
|
||||
if self.order_by:
|
||||
kwargs['sort_by'] = self.order_by
|
||||
|
||||
|
||||
if self.end_offset is not None:
|
||||
kwargs['end_offset'] = self.end_offset - self.start_offset
|
||||
|
||||
|
||||
if self.highlight:
|
||||
kwargs['highlight'] = self.highlight
|
||||
|
||||
|
||||
if self.facets:
|
||||
kwargs['facets'] = list(self.facets)
|
||||
|
||||
|
||||
if self.date_facets:
|
||||
kwargs['date_facets'] = self.date_facets
|
||||
|
||||
|
||||
if self.query_facets:
|
||||
kwargs['query_facets'] = self.query_facets
|
||||
|
||||
|
||||
if self.narrow_queries:
|
||||
kwargs['narrow_queries'] = self.narrow_queries
|
||||
|
||||
if self.boost:
|
||||
kwargs['boost'] = self.boost
|
||||
|
||||
|
||||
results = self.backend.search(final_query, **kwargs)
|
||||
self._results = results.get('results', [])
|
||||
self._hit_count = results.get('hits', 0)
|
||||
self._facet_counts = results.get('facets', {})
|
||||
self._spelling_suggestion = results.get('spelling_suggestion', None)
|
||||
|
||||
|
||||
def run_mlt(self):
|
||||
"""
|
||||
Builds and executes the query. Returns a list of search results.
|
||||
"""
|
||||
if self._more_like_this is False or self._mlt_instance is None:
|
||||
raise MoreLikeThisError("No instance was provided to determine 'More Like This' results.")
|
||||
|
||||
|
||||
additional_query_string = self.build_query()
|
||||
kwargs = {
|
||||
'start_offset': self.start_offset,
|
||||
}
|
||||
|
||||
|
||||
if self.end_offset is not None:
|
||||
kwargs['end_offset'] = self.end_offset - self.start_offset
|
||||
|
||||
|
||||
results = self.backend.more_like_this(self._mlt_instance, additional_query_string, **kwargs)
|
||||
self._results = results.get('results', [])
|
||||
self._hit_count = results.get('hits', 0)
|
||||
|
|
|
|||
Loading…
Reference in a new issue