From 2b6f470fa7c125beb3cddd37da3619f1cba2ecc1 Mon Sep 17 00:00:00 2001 From: Iwo Herka Date: Fri, 18 May 2018 13:18:24 +0200 Subject: [PATCH] Add Q-expr rewriter/fix relational algebra (#1) * Add debug Q-expr print function * Change old __unicode__ methods to __str__ format * Move __unicode__ methods to __str__ * Rewrite AND Q-expressions to safe form * Make Q-expr debug print method use nicer format * Document rewrite_q_expr function * Rewrite/improve queries' test * Document some more code * Reorganize and document queryset module * Add more tests for queries * Use generic relation attribute instead instead of 'eav_values' * Update docstring * Fix a typo * Move up a variable * Rename a function * Check if 'in' filter rhs is not a QuerySet * Add newline to the end of file * Fix typo; tweak docstring --- eav/decorators.py | 129 ++-------------------- eav/models.py | 10 +- eav/queryset.py | 274 ++++++++++++++++++++++++++++++++++++++++++---- eav/utils.py | 21 ++++ tests/models.py | 12 +- tests/queries.py | 116 ++++++++++++-------- 6 files changed, 373 insertions(+), 189 deletions(-) create mode 100644 eav/utils.py diff --git a/eav/decorators.py b/eav/decorators.py index a6ac4d6..e4fd772 100644 --- a/eav/decorators.py +++ b/eav/decorators.py @@ -1,19 +1,19 @@ -from functools import wraps - -from django.db import models - -from .models import Attribute, Value +''' +Decorators module. +This module contains pure wrapper functions used as decorators. +Functions in this module should be simple and not involve complex logic. +''' def register_eav(**kwargs): - """ + ''' Registers the given model(s) classes and wrapped Model class with - django-eav: + django-eav:: - @register_eav - class Author(models.Model): - pass - """ + @register_eav + class Author(models.Model): + pass + ''' from . import register from django.db.models import Model @@ -24,110 +24,3 @@ def register_eav(**kwargs): return model_class return _model_eav_wrapper - - -def eav_filter(func): - ''' - Decorator used to wrap filter and exclude methods. Passes args through - expand_q_filters and kwargs through expand_eav_filter. Returns the - called function (filter or exclude). - ''' - - @wraps(func) - def wrapper(self, *args, **kwargs): - nargs = [] - nkwargs = {} - - for arg in args: - if isinstance(arg, models.Q): - # Modify Q objects (warning: recursion ahead). - # import pdb; pdb.set_trace() - arg = expand_q_filters(arg, self.model) - nargs.append(arg) - - o = self.model.objects.all().first() - fn = self.model.objects.filter - # print(args, kwargs) - - for key, value in kwargs.items(): - # Modify kwargs (warning: recursion ahead). - nkey, nval = expand_eav_filter(self.model, key, value) - - if nkey in nkwargs: - # Apply AND to both querysets. - nkwargs[nkey] = (nkwargs[nkey] & nval).distinct() - else: - nkwargs.update({nkey: nval}) - - return func(self, *nargs, **nkwargs) - - return wrapper - - -def expand_q_filters(q, root_cls): - ''' - Takes a Q object and a model class. - Recursivley passes each filter / value in the Q object tree leaf nodes - through expand_eav_filter - ''' - new_children = [] - - for qi in q.children: - if type(qi) is tuple: - # this child is a leaf node: in Q this is a 2-tuple of: - # (filter parameter, value) - key, value = expand_eav_filter(root_cls, *qi) - new_children.append(models.Q(**{key: value})) - else: - # this child is another Q node: recursify! - new_children.append(expand_q_filters(qi, root_cls)) - - _q = models.Q() - _q.children = new_children - _q.connector = q.connector - _q.negated = q.negated - return _q - - -def expand_eav_filter(model_cls, key, value): - ''' - Accepts a model class and a key, value. - Recurisively replaces any eav filter with a subquery. - - For example:: - - key = 'eav__height' - value = 5 - - Would return:: - - key = 'eav_values__in' - value = Values.objects.filter(value_int=5, attribute__slug='height') - ''' - fields = key.split('__') - config_cls = getattr(model_cls, '_eav_config_cls', None) - - if len(fields) > 1 and config_cls and \ - fields[0] == config_cls.eav_attr: - slug = fields[1] - gr_name = config_cls.generic_relation_attr - datatype = Attribute.objects.get(slug=slug).datatype - - lookup = '__%s' % fields[2] if len(fields) > 2 else '' - kwargs = {'value_%s%s' % (datatype, lookup): value, - 'attribute__slug': slug} - value = Value.objects.filter(**kwargs) - - return '%s__in' % gr_name, value - - try: - field = model_cls._meta.get_field(fields[0]) - except models.FieldDoesNotExist: - return key, value - - if not field.auto_created or field.concrete: - return key, value - else: - sub_key = '__'.join(fields[1:]) - key, value = expand_eav_filter(field.model, sub_key, value) - return '%s__%s' % (fields[0], key), value diff --git a/eav/models.py b/eav/models.py index cd8c2fd..66e1f42 100644 --- a/eav/models.py +++ b/eav/models.py @@ -79,7 +79,7 @@ class EnumValue(models.Model): value = models.CharField(_(u"value"), db_index=True, unique=True, max_length=50) - def __unicode__(self): + def __str__(self): return self.value @@ -406,9 +406,11 @@ class Value(models.Model): value = property(_get_value, _set_value) - def __unicode__(self): - return u"%s - %s: \"%s\"" % (self.entity, self.attribute.name, - self.value) + def __str__(self): + return '{}: "{}" ({})'.format(self.attribute.name, self.value, self.entity) + + def __repr__(self): + return '{}: "{}" ({})'.format(self.attribute.name, self.value, self.entity.pk) class Entity(object): diff --git a/eav/queryset.py b/eav/queryset.py index be71470..4ac3cac 100644 --- a/eav/queryset.py +++ b/eav/queryset.py @@ -1,31 +1,263 @@ -#!/usr/bin/env python -# -# This software is derived from EAV-Django originally written and -# copyrighted by Andrey Mikhaylenko -# -# This is free software: you can redistribute it and/or modify -# it under the terms of the GNU Lesser General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This software is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public License -# along with EAV-Django. If not, see . +''' +Queryset module. +This module contains custom EavQuerySet class used for overriding +relational operators and pure functions for rewriting Q-expressions. +Q-expressions need to be rewritten for two reasons: + + 1. In order to hide implementation from the user and provide easy to use + syntax sugar, i.e.:: + + Supplier.objects.filter(eav__city__startswith='New') + + instead of:: + + city_values = Value.objects.filter(value__text__startswith='New') + Supplier.objects.filter(eav_values__in=city_values) + + For details see: ``eav_filter``. + + 2. To ensure that Q-expression tree is compiled to valid SQL. + For details see: ``rewrite_q_expr``. +''' + +import sys +from functools import reduce, wraps + +from django.db import models +from django.db.models import Q from django.db.models.query import QuerySet -from .decorators import eav_filter +from .models import Attribute, Value + + +def is_eav_and_leaf(expr, gr_name): + ''' + Checks whether Q-expression is an EAV AND leaf. + + Args: + expr (Q | tuple): Q-expression to be checked. + gr_name (str): Generic relation attribute name, by default 'eav_values' + + Returns: + bool + ''' + return (getattr(expr, 'connector', None) == 'AND' and + len(expr.children) == 1 and + expr.children[0][0] in ['pk__in', '{}__in'.format(gr_name)]) + + +def rewrite_q_expr(model_cls, expr): + ''' + Rewrites Q-expression to safe form, in order to ensure that + generated SQL is valid. + + Suppose we have the following Q-expression: + + └── OR + ├── AND + │ └── eav_values__in [1, 2, 3] + └── AND (1) + ├── AND + │ └── eav_values__in [4, 5] + └── AND + └── eav_values__in [6, 7, 8] + + All EAV values are stored in a single table. Therefore, INNER JOIN + generated for the AND-expression (1) will always fail, i.e. + single row in a eav_values table cannot be both in two disjoint sets at + the same time (and the whole point of using AND, usually, is two have + two different sets). Therefore, we must paritially rewrite the + expression so that the generated SQL is valid:: + + └── OR + ├── AND + │ └── eav_values__in [1, 2, 3] + └── AND + └── pk__in [1, 2] + + This is done by merging dangerous AND's and substituting them with + explicit ``pk__in`` filter, where pks are taken from evaluted + Q-expr branch. + + Args: + model_cls (Model class): model class used to construct QuerySet + from leaf attribute-value expression. + expr: (Q | tuple): Q-expression (or attr-val leaf) to be rewritten. + + Returns: + Q | tuple + ''' + # Node in a Q-expr can be a Q or an attribute-value tuple (leaf). + # We are only interested in Qs. + + if type(expr) == Q: + config_cls = getattr(model_cls, '_eav_config_cls', None) + assert config_cls + gr_name = config_cls.generic_relation_attr + + # Recurively check child nodes. + expr.children = [rewrite_q_expr(model_cls, c) for c in expr.children] + # Check which ones need a rewrite. + rewritable = [c for c in expr.children if is_eav_and_leaf(c, gr_name)] + + # Conflict occurs only with two or more AND-expressions. + # If there is only one we can ignore it. + + if len(rewritable) > 1: + q = None + # Save nodes which shouldn't be merged (non-EAV). + other = [c for c in expr.children if not c in rewritable] + + for child in rewritable: + assert child.children and len(child.children) == 1 + # Child to be merged is always a terminal Q node, + # i.e. it's an AND expression with attribute-value tuple child. + attrval = child.children[0] + assert type(attrval) == tuple + + fname = '{}__in'.format(gr_name) + + # Child can be either a 'eav_values__in' or 'pk__in' query. + # If it's the former then transform it into the latter. + # Otherwise, check if the value is valid for the '__in' query. + # If so, reverse it back to QuerySet so that set operators + # can be applied. + + if attrval[0] == fname or hasattr(attrval[1], '__contains__'): + # Create model queryset. + _q = model_cls.objects.filter(**{fname: attrval[1]}) + else: + # Second val in tuple is a queryset. + _q = attrval[1] + + # Explicitly check for None. 'or' doesn't work here + # as empty QuerySet, which is valid, is falsy. + q = q if q != None else _q + + if expr.connector == 'AND': + q &= _q + else: + q |= _q + + # If any two children were merged, + # update parent expression. + if q != None: + expr.children = other + [('pk__in', q)] + + return expr + + +def eav_filter(func): + ''' + Decorator used to wrap filter and exclude methods. Passes args through + expand_q_filters and kwargs through expand_eav_filter. Returns the + called function (filter or exclude). + ''' + @wraps(func) + def wrapper(self, *args, **kwargs): + nargs = [] + nkwargs = {} + + for arg in args: + if isinstance(arg, Q): + # Modify Q objects (warning: recursion ahead). + arg = expand_q_filters(arg, self.model) + # Rewrite Q-expression to safeform. + arg = rewrite_q_expr(self.model, arg) + nargs.append(arg) + + for key, value in kwargs.items(): + # Modify kwargs (warning: recursion ahead). + nkey, nval = expand_eav_filter(self.model, key, value) + + if nkey in nkwargs: + # Apply AND to both querysets. + nkwargs[nkey] = (nkwargs[nkey] & nval).distinct() + else: + nkwargs.update({nkey: nval}) + + return func(self, *nargs, **nkwargs) + + return wrapper + + +def expand_q_filters(q, root_cls): + ''' + Takes a Q object and a model class. + Recursively passes each filter / value in the Q object tree leaf nodes + through expand_eav_filter + ''' + new_children = [] + + for qi in q.children: + if type(qi) is tuple: + # This child is a leaf node: in Q this is a 2-tuple of: + # (filter parameter, value). + key, value = expand_eav_filter(root_cls, *qi) + new_children.append(Q(**{key: value})) + else: + # This child is another Q node: recursify! + new_children.append(expand_q_filters(qi, root_cls)) + + q.children = new_children + return q + + +def expand_eav_filter(model_cls, key, value): + ''' + Accepts a model class and a key, value. + Recurisively replaces any eav filter with a subquery. + + For example:: + + key = 'eav__height' + value = 5 + + Would return:: + + key = 'eav_values__in' + value = Values.objects.filter(value_int=5, attribute__slug='height') + ''' + fields = key.split('__') + config_cls = getattr(model_cls, '_eav_config_cls', None) + + if len(fields) > 1 and config_cls and fields[0] == config_cls.eav_attr: + slug = fields[1] + gr_name = config_cls.generic_relation_attr + datatype = Attribute.objects.get(slug=slug).datatype + + lookup = '__%s' % fields[2] if len(fields) > 2 else '' + kwargs = { + 'value_{}{}'.format(datatype, lookup): value, + 'attribute__slug': slug + } + value = Value.objects.filter(**kwargs) + + return '%s__in' % gr_name, value + + try: + field = model_cls._meta.get_field(fields[0]) + except models.FieldDoesNotExist: + return key, value + + if not field.auto_created or field.concrete: + return key, value + else: + sub_key = '__'.join(fields[1:]) + key, value = expand_eav_filter(field.model, sub_key, value) + return '{}__{}'.format(ields[0], key), value class EavQuerySet(QuerySet): + ''' + Overrides relational operators for EAV models. + ''' + @eav_filter def filter(self, *args, **kwargs): ''' - Pass *args* and *kwargs* through :func:`eav_filter`, then pass to + Pass *args* and *kwargs* through ``eav_filter``, then pass to the ``models.Manager`` filter method. ''' return super().filter(*args, **kwargs).distinct() @@ -33,7 +265,7 @@ class EavQuerySet(QuerySet): @eav_filter def exclude(self, *args, **kwargs): ''' - Pass *args* and *kwargs* through :func:`eav_filter`, then pass to + Pass *args* and *kwargs* through ``eav_filter``, then pass to the ``models.Manager`` exclude method. ''' return super().exclude(*args, **kwargs).distinct() @@ -41,7 +273,7 @@ class EavQuerySet(QuerySet): @eav_filter def get(self, *args, **kwargs): ''' - Pass *args* and *kwargs* through :func:`eav_filter`, then pass to + Pass *args* and *kwargs* through ``eav_filter``, then pass to the ``models.Manager`` get method. ''' return super().get(*args, **kwargs) diff --git a/eav/utils.py b/eav/utils.py new file mode 100644 index 0000000..b3e8367 --- /dev/null +++ b/eav/utils.py @@ -0,0 +1,21 @@ +import sys +from django.db.models import Q + + +def print_q_expr(expr, indent="", is_tail=True): + ''' + Simple print method for debugging Q-expressions' trees. + ''' + sys.stdout.write(indent) + sa, sb = (' └── ', ' ') if is_tail else (' ├── ', ' │ ') + + if type(expr) == Q: + sys.stdout.write('{}{}\n'.format(sa, expr.connector)) + for child in expr.children: + print_q_expr(child, indent + sb, expr.children[-1] == child) + else: + try: + queryset = ', '.join(repr(v) for v in expr[1]) + except TypeError: + queryset = repr(expr[1]) + sys.stdout.write(' └── {} {}\n'.format(expr[0], queryset)) diff --git a/tests/models.py b/tests/models.py index a42fd66..5072f42 100644 --- a/tests/models.py +++ b/tests/models.py @@ -5,16 +5,24 @@ from eav.decorators import register_eav class Patient(models.Model): name = models.CharField(max_length=12) - def __unicode__(self): + def __str__(self): return self.name + def __repr__(self): + return self.name + + class Encounter(models.Model): num = models.PositiveSmallIntegerField() patient = models.ForeignKey(Patient, on_delete=models.PROTECT) - def __unicode__(self): + def __str__(self): return '%s: encounter num %d' % (self.patient, self.num) + def __repr__(self): + return self.name + + @register_eav() class ExampleModel(models.Model): name = models.CharField(max_length=12) diff --git a/tests/queries.py b/tests/queries.py index 19d79e9..81c7e65 100644 --- a/tests/queries.py +++ b/tests/queries.py @@ -1,13 +1,13 @@ -from django.test import TestCase -from django.db.models import Q from django.contrib.auth.models import User from django.core.exceptions import MultipleObjectsReturned +from django.db.models import Q +from django.test import TestCase import eav +from eav.models import Attribute, EnumGroup, EnumValue, Value from eav.registry import EavConfig -from eav.models import EnumValue, EnumGroup, Attribute, Value -from .models import Patient, Encounter, ExampleModel +from .models import Encounter, ExampleModel, Patient class Queries(TestCase): @@ -23,13 +23,13 @@ class Queries(TestCase): self.yes = EnumValue.objects.create(value='yes') self.no = EnumValue.objects.create(value='no') - self.unkown = EnumValue.objects.create(value='unkown') - + self.unknown = EnumValue.objects.create(value='unknown') + ynu = EnumGroup.objects.create(name='Yes / No / Unknown') ynu.enums.add(self.yes) ynu.enums.add(self.no) - ynu.enums.add(self.unkown) - + ynu.enums.add(self.unknown) + Attribute.objects.create(name='fever', datatype=Attribute.TYPE_ENUM, enum_group=ynu) def tearDown(self): @@ -37,7 +37,6 @@ class Queries(TestCase): eav.unregister(Patient) def test_get_or_create_with_eav(self): - return p = Patient.objects.get_or_create(name='Bob', eav__age=5) self.assertEqual(Patient.objects.count(), 1) self.assertEqual(Value.objects.count(), 1) @@ -49,24 +48,25 @@ class Queries(TestCase): self.assertEqual(Value.objects.count(), 2) def test_get_with_eav(self): - return p1, _ = Patient.objects.get_or_create(name='Bob', eav__age=6) self.assertEqual(Patient.objects.get(eav__age=6), p1) - + p2, _ = Patient.objects.get_or_create(name='Fred', eav__age=6) self.assertRaises(MultipleObjectsReturned, lambda: Patient.objects.get(eav__age=6)) - def test_filtering_on_normal_and_eav_fields(self): + def test_filtering_on_normal_and_eav_fields(self): yes = self.yes no = self.no + data = [ - ['3_New_York_USA', 3, no, 'New York', 'USA'], - ['15_Bamako_Mali', 15, no, 'Bamako', 'Mali'], - ['15_Kisumu_Kenya', 15, yes, 'Kisumu', 'Kenya'], - ['3_Nice_France', 3, no, 'Nice', 'France'], - ['2_France_Nice', 2, yes, 'France', 'Nice'] + # Name, age, fever, city, country. + ['Anne', 3, no, 'New York', 'USA'], + ['Bob', 15, no, 'Bamako', 'Mali'], + ['Cyrill', 15, yes, 'Kisumu', 'Kenya'], + ['Daniel', 3, no, 'Nice', 'France'], + ['Eugene', 2, yes, 'France', 'Nice'] ] - + for row in data: Patient.objects.create( name=row[0], @@ -76,38 +76,66 @@ class Queries(TestCase): eav__country=row[4] ) - #1 = Q(eav__city__contains='Y') & Q(eav__fever=no) - #2 = Q(eav__age=3) - #3 = (q1 & q2) - #rint(vars(q3)) - - # = Patient.objects.filter(q3) - #print(q) - - # return + # Check number of objects in DB. self.assertEqual(Patient.objects.count(), 5) self.assertEqual(Value.objects.count(), 20) - self.assertEqual(Patient.objects.filter(eav__city__contains='Y').count(), 1) - self.assertEqual(Patient.objects.exclude(eav__city__contains='Y').count(), 4) + # Nobody + q1 = Q(eav__fever=yes) & Q(eav__fever=no) + p = Patient.objects.filter(q1) + self.assertEqual(p.count(), 0) - # Bob - self.assertEqual(Patient.objects.filter(Q(eav__city__contains='Y')).count(), 1) + # Anne, Daniel + q1 = Q(eav__age__gte=3) # Everyone except Eugene + q2 = Q(eav__age__lt=15) # Anne, Daniel, Eugene + p = Patient.objects.filter(q2 & q1) + self.assertEqual(p.count(), 2) - # Everyone except Bob - #self.assertEqual(Patient.objects.exclude(Q(eav__city__contains='Y')).count(), 4) - - # Bob, Fred, Joe - q1 = Q(eav__city__contains='Y') | Q(eav__fever=no) - self.assertEqual(Patient.objects.filter(q1).count(), 3) - - # Joe, Bob + # Anne + q1 = Q(eav__city__contains='Y') & Q(eav__fever=no) q2 = Q(eav__age=3) - self.assertEqual(Patient.objects.filter(q2).count(), 2) + p = Patient.objects.filter(q1 & q2) + self.assertEqual(p.count(), 1) - # Joe, Bob - #elf.assertEqual(Patient.objects.filter(q1 & q2).count(), 1) + # Anne, Daniel + q1 = Q(eav__city__contains='Y', eav__fever=no) + q2 = Q(eav__city='Nice') + q3 = Q(eav__age=3) + p = Patient.objects.filter((q1 | q2) & q3) + self.assertEqual(p.count(), 2) - # Jose - self.assertEqual(Patient.objects.filter(name__contains='F', eav__fever=yes).count(), 1) + # Everyone + q1 = Q(eav__fever=no) | Q(eav__fever=yes) + p = Patient.objects.filter(q1) + self.assertEqual(p.count(), 5) + # Anne, Bob, Daniel + q1 = Q(eav__fever=no) # Anne, Bob, Daniel + q2 = Q(eav__fever=yes) # Cyrill, Eugene + q3 = Q(eav__country__contains='e') # Cyrill, Daniel, Eugene + q4 = q2 & q3 # Cyrill, Daniel, Eugene + q5 = (q1 | q4) & q1 # Anne, Bob, Daniel + p = Patient.objects.filter(q5) + self.assertEqual(p.count(), 3) + + # Everyone except Anne + q1 = Q(eav__city__contains='Y') + p = Patient.objects.exclude(q1) + self.assertEqual(p.count(), 4) + + # Anne, Bob, Daniel + q1 = Q(eav__city__contains='Y') + q2 = Q(eav__fever=no) + q3 = q1 | q2 + p = Patient.objects.filter(q3) + self.assertEqual(p.count(), 3) + + # Anne, Daniel + q1 = Q(eav__age=3) + p = Patient.objects.filter(q1) + self.assertEqual(p.count(), 2) + + # Eugene + q1 = Q(name__contains='E', eav__fever=yes) + p = Patient.objects.filter(q1) + self.assertEqual(p.count(), 1)