From 45191f89442f856892fe9868cb63527648e91823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20C=2E=20Leit=C3=A3o?= Date: Wed, 11 Nov 2015 23:38:30 +0100 Subject: [PATCH] Date facets now return datetimes instead of strings. Also added one more test to date facets. --- tests/xapian_tests/tests/test_backend.py | 36 ++++++--- xapian_backend.py | 93 ++++++++++++++---------- 2 files changed, 80 insertions(+), 49 deletions(-) diff --git a/tests/xapian_tests/tests/test_backend.py b/tests/xapian_tests/tests/test_backend.py index 6e30470..f7899ac 100644 --- a/tests/xapian_tests/tests/test_backend.py +++ b/tests/xapian_tests/tests/test_backend.py @@ -383,7 +383,7 @@ class BackendFeaturesTestCase(HaystackBackendTestCase, TestCase): """ self.assertRaises(InvalidIndexError, self.backend.search, xapian.Query(''), facets=['dsdas']) - def test_date_facets(self): + def test_date_facets_month(self): facets = {'datetime': {'start_date': datetime.datetime(2008, 10, 26), 'end_date': datetime.datetime(2009, 3, 26), 'gap_by': 'month'}} @@ -394,13 +394,31 @@ class BackendFeaturesTestCase(HaystackBackendTestCase, TestCase): results = self.backend.search(xapian.Query('indexed'), date_facets=facets) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['datetime'], [ - (b'2009-02-26T00:00:00', 0), - (b'2009-01-26T00:00:00', 3), - (b'2008-12-26T00:00:00', 0), - (b'2008-11-26T00:00:00', 0), - (b'2008-10-26T00:00:00', 0), + (datetime.datetime(2009, 2, 26, 0, 0), 0), + (datetime.datetime(2009, 1, 26, 0, 0), 3), + (datetime.datetime(2008, 12, 26, 0, 0), 0), + (datetime.datetime(2008, 11, 26, 0, 0), 0), + (datetime.datetime(2008, 10, 26, 0, 0), 0), ]) + def test_date_facets_seconds(self): + facets = {'datetime': {'start_date': datetime.datetime(2009, 2, 25, 1, 0, 57), + 'end_date': datetime.datetime(2009, 2, 25, 1, 1, 1), + 'gap_by': 'second'}} + + self.assertEqual(self.backend.search(xapian.Query(), date_facets=facets), + {'hits': 0, 'results': []}) + + results = self.backend.search(xapian.Query('indexed'), date_facets=facets) + self.assertEqual(results['hits'], 3) + self.assertEqual(results['facets']['dates']['datetime'], [ + (datetime.datetime(2009, 2, 25, 1, 1, 0), 0), + (datetime.datetime(2009, 2, 25, 1, 0, 59), 1), + (datetime.datetime(2009, 2, 25, 1, 0, 58), 1), + (datetime.datetime(2009, 2, 25, 1, 0, 57), 1), + ]) + + def test_date_facets_days(self): facets = {'date': {'start_date': datetime.datetime(2009, 2, 1), 'end_date': datetime.datetime(2009, 3, 15), 'gap_by': 'day', @@ -408,9 +426,9 @@ class BackendFeaturesTestCase(HaystackBackendTestCase, TestCase): results = self.backend.search(xapian.Query('indexed'), date_facets=facets) self.assertEqual(results['hits'], 3) self.assertEqual(results['facets']['dates']['date'], [ - (b'2009-03-03T00:00:00', 0), - (b'2009-02-16T00:00:00', 3), - (b'2009-02-01T00:00:00', 0) + (datetime.datetime(2009, 3, 3, 0, 0), 0), + (datetime.datetime(2009, 2, 16, 0, 0), 3), + (datetime.datetime(2009, 2, 1, 0, 0), 0) ]) def test_query_facets(self): diff --git a/xapian_backend.py b/xapian_backend.py index 5a6e55a..4083f4f 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -1,6 +1,5 @@ from __future__ import unicode_literals -import time import datetime import pickle import os @@ -1010,61 +1009,75 @@ class XapianSearchBackend(BaseSearchBackend): eg. { 'pub_date': [ - ('2009-01-01T00:00:00Z', 5), - ('2009-02-01T00:00:00Z', 0), - ('2009-03-01T00:00:00Z', 0), - ('2009-04-01T00:00:00Z', 1), - ('2009-05-01T00:00:00Z', 2), + (datetime.datetime(2009, 1, 1, 0, 0), 5), + (datetime.datetime(2009, 2, 1, 0, 0), 0), + (datetime.datetime(2009, 3, 1, 0, 0), 0), + (datetime.datetime(2008, 4, 1, 0, 0), 1), + (datetime.datetime(2008, 5, 1, 0, 0), 2), ], } """ + def next_datetime(previous, gap_value, gap_type): + year = previous.year + month = previous.month + + if gap_type == 'year': + next = previous.replace(year=year + gap_value) + elif gap_type == 'month': + if month + gap_value <= 12: + next = previous.replace(month=month + gap_value) + else: + next = previous.replace( + month=((month + gap_value) % 12), + year=(year + (month + gap_value) / 12) + ) + elif gap_type == 'day': + next = previous + datetime.timedelta(days=gap_value) + elif gap_type == 'hour': + return previous + datetime.timedelta(hours=gap_value) + elif gap_type == 'minute': + next = previous + datetime.timedelta(minutes=gap_value) + elif gap_type == 'second': + next = previous + datetime.timedelta(seconds=gap_value) + else: + raise TypeError('\'gap_by\' must be ' + '{second, minute, day, month, year}') + return next + facet_dict = {} for date_facet, facet_params in list(date_facets.items()): gap_type = facet_params.get('gap_by') gap_value = facet_params.get('gap_amount', 1) date_range = facet_params['start_date'] + + # construct the bins of the histogram facet_list = [] while date_range < facet_params['end_date']: - facet_list.append((date_range.isoformat(), 0)) - if gap_type == 'year': - date_range = date_range.replace( - year=date_range.year + int(gap_value) - ) - elif gap_type == 'month': - if date_range.month + int(gap_value) > 12: - date_range = date_range.replace( - month=((date_range.month + int(gap_value)) % 12), - year=(date_range.year + (date_range.month + int(gap_value)) / 12) - ) - else: - date_range = date_range.replace( - month=date_range.month + int(gap_value) - ) - elif gap_type == 'day': - date_range += datetime.timedelta(days=int(gap_value)) - elif gap_type == 'hour': - date_range += datetime.timedelta(hours=int(gap_value)) - elif gap_type == 'minute': - date_range += datetime.timedelta(minutes=int(gap_value)) - elif gap_type == 'second': - date_range += datetime.timedelta(seconds=int(gap_value)) + facet_list.append((date_range, 0)) + date_range = next_datetime(date_range, gap_value, gap_type) facet_list = sorted(facet_list, key=lambda x: x[0], reverse=True) for result in results: result_date = getattr(result, date_facet) - if result_date: - if not isinstance(result_date, datetime.datetime): - result_date = datetime.datetime( - year=result_date.year, - month=result_date.month, - day=result_date.day, - ) - for n, facet_date in enumerate(facet_list): - if result_date > datetime.datetime(*(time.strptime(facet_date[0], '%Y-%m-%dT%H:%M:%S')[0:6])): - facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1)) - break + + # convert date to datetime + if not isinstance(result_date, datetime.datetime): + result_date = datetime.datetime(result_date.year, + result_date.month, + result_date.day) + + # ignore results outside the boundaries. + if facet_list[0][0] < result_date < facet_list[-1][0]: + continue + + # populate the histogram by putting the result on the right bin. + for n, facet_date in enumerate(facet_list): + if result_date > facet_date[0]: + # equal to facet_list[n][1] += 1, but for a tuple + facet_list[n] = (facet_list[n][0], (facet_list[n][1] + 1)) + break # bin found; go to next result facet_dict[date_facet] = facet_list