diff --git a/tests/xapian_backend.py b/tests/xapian_backend.py index c226984..4884f4e 100644 --- a/tests/xapian_backend.py +++ b/tests/xapian_backend.py @@ -187,10 +187,24 @@ class XapianSearchBackendTestCase(TestCase): self.sb.update(self.msi, self.sample_objs) self.assertEqual(len(self.xapian_search('')), 3) - self.assertEqual(self.sb.search('', date_facets={'pub_date': {'start_date': datetime.date(2008, 2, 26), 'end_date': datetime.date(2008, 2, 26), 'gap': 'month=1'}}), {'hits': 0, 'results': []}) - results = self.sb.search('index', date_facets={'pub_date': {'start_date': datetime.date(2008, 2, 26), 'end_date': datetime.date(2008, 2, 26), 'gap': 'month=1'}}) + self.assertEqual(self.sb.search('', date_facets={'pub_date': {'start_date': datetime.datetime(2008, 2, 26), 'end_date': datetime.datetime(2009, 2, 26), 'gap': 'month=1'}}), {'hits': 0, 'results': []}) + results = self.sb.search('index', date_facets={'pub_date': {'start_date': datetime.datetime(2008, 2, 26), 'end_date': datetime.datetime(2009, 2, 26), 'gap': 'month=1'}}) self.assertEqual(results['hits'], 3) - self.assertEqual(results['facets']['dates']['pub_date'], {'end': '2009-02-22T00:00:00', 'gap': 'month=1'}) + self.assertEqual(results['facets']['dates']['pub_date'], [ + ('2008-02-26T00:00:00', 0), + ('2008-03-26T00:00:00', 0), + ('2008-04-26T00:00:00', 0), + ('2008-05-26T00:00:00', 0), + ('2008-06-26T00:00:00', 0), + ('2008-07-26T00:00:00', 0), + ('2008-08-26T00:00:00', 0), + ('2008-09-26T00:00:00', 0), + ('2008-10-26T00:00:00', 0), + ('2008-11-26T00:00:00', 0), + ('2008-12-26T00:00:00', 0), + ('2009-01-26T00:00:00', 0), + ('2009-02-26T00:00:00', 3), + ]) # def test_query_facets(self): # self.assertEqual(self.sb.search('', query_facets={'name': '[* TO e]'}), {'hits': 0, 'results': []}) diff --git a/xapian_backend.py b/xapian_backend.py index 454280a..5254d11 100644 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -328,10 +328,6 @@ class SearchBackend(BaseSearchBackend): facets_dict['fields'] = self._do_field_facets( document, facets, facets_dict['fields'] ) - if date_facets: - facets_dict['dates'] = self._do_date_facets( - document, date_facets, facets_dict['dates'] - ) if highlight and (len(query_string) > 0): model_data['highlighted'] = { self.content_field_name: self._do_highlight( @@ -342,6 +338,9 @@ class SearchBackend(BaseSearchBackend): SearchResult(app_label, module_name, pk, match.weight, **model_data) ) + if date_facets: + facets_dict['dates'] = self._do_date_facets(results, date_facets) + return { 'results': results, 'hits': matches.get_matches_estimated(), @@ -469,60 +468,84 @@ class SearchBackend(BaseSearchBackend): fields[match.group(1).lower()] = [(match.group(2), term[1])] return fields - def _do_date_facets(self, document, date_facets, dates): + def _do_date_facets(self, results, date_facets): """ Private method that facets a document by date ranges - + Required arguments: - `document` -- The document to parse - `schema` -- The database schema - `date_facets` -- A dictionary of date fields to facet with - keys for start_date, end_date, and gap: - eg. {'pub_date': 'start_date': datetime.date(2008, 2, 26), 'end_date': datetime.date(2008, 2, 26), 'gap': '/MONTH'}} - `start_date` -- The start date to facet - `end_date` -- The end date to facet - `gap` -- The size of the gap to facet. This is a string in - the format '(year|month|day|hour|minute|second+)s?=?(\d*)' - `dates` -- A list of dates that have already been faceted. This - will be extended with any new dates and counts found - in the `document`. + `results` -- A list SearchResults to facet + `date_facets` -- A dictionary containg facet parameters: + {'field': {'start_date': ..., 'end_date': ...: 'gap': '...'}} + nb., gap must satisfy the regex: + (?Pyear|month|day|hour|minute|second+)s?=?(?P\d*) + + For each date facet field in `date_facets`, generates a list + of date ranges (from `start_date` to `end_date` by `gap`) then + iterates through `results` and tallies the count for each date_facet. + + Returns a dictionary of date facets (fields) containing a list with + entries for each range and a count of documents matching the range. + + eg. { + 'pub_date': [ + ('2009-01-01T00:00:00Z', 5), + ('2009-02-01T00:00:00Z', 0), + ('2009-03-01T00:00:00Z', 0), + ('2009-04-01T00:00:00Z', 1), + ('2009-05-01T00:00:00Z', 2), + ], + } """ + facet_dict = {} + for date_facet, facet_params in date_facets.iteritems(): match = gap_re.search(facet_params['gap']).groupdict() gap_type = match['type'] gap_value = match.get('value', 1) - date_value = datetime.datetime.strptime( - document.get_value(self._value_column(date_facet)), '%Y%m%d%H%M%S' - ) - - if gap_type == 'year': - date_gap = datetime.timedelta(days=365) - elif gap_type == 'month': - if date_value.month % 2: - date_gap = datetime.timedelta(days=30) - else: - if date_value.month == 2: - date_gap = datetime.timedelta(days=28) # TODO: Add leap year handling + date_range = facet_params['start_date'] + facet_list = [] + while date_range <= facet_params['end_date']: + facet_list.append((date_range.isoformat(), 0)) + if gap_type == 'year': + date_range = date_range.replace( + year=date_range.year + int(gap_value) + ) + elif gap_type == 'month': + if date_range.month == 12: + date_range = date_range.replace( + month=1, year=date_range.year + int(gap_value) + ) else: - date_gap = datetime.timedelta(days=31) - elif gap_type == 'day': - date_gap = datetime.timedelta(days=int(gap_value)) - elif gap_type == 'hour': - date_gap = datetime.timedelta(hours=int(gap_value)) - elif gap_type == 'minute': - date_gap = datetime.timedelta(minues=int(gap_value)) - elif gap_type == 'second': - date_gap = datetime.timedelta(seconds=int(gap_value)) - else: - raise SearchBackendError('Invalid gap type in date facet') + date_range = date_range.replace( + month=date_range.month + int(gap_value) + ) + elif gap_type == 'day': + date_range += datetime.timedelta(days=gap_value) + elif gap_type == 'hour': + date_range += datetime.timedelta(hours=gap_value) + elif gap_type == 'minute': + date_range += datetime.timedelta(minutes=gap_value) + elif gap_type == 'second': + date_range += datetime.timedelta(seconds=gap_value) + + facet_list = sorted(facet_list, key=lambda n:n[0]) - dates[date_facet] = { - 'start': date_value.isoformat(), - 'end': (date_value + date_gap).isoformat(), - 'gap': facet_params['gap'], - 'count': 1, - } - return dates + for result in results: + result_date = getattr(result, date_facet) + if result_date: + if not isinstance(result_date, datetime.datetime): + result_date = datetime.datetime( + year=result_date.year, + month=result_date.month, + day=result_date.day, + ) + for n, facet_date in enumerate(facet_list): + if result_date < datetime.datetime.strptime(facet_date[0], '%Y-%m-%dT%H:%M:%S'): + facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1)) + + facet_dict[date_facet] = facet_list + + return facet_dict def _marshal_value(self, value): """