Add Q-expr rewriter/fix relational algebra (#1)

* Add debug Q-expr print function * Change old __unicode__ methods to __str__ format * Move __unicode__ methods to __str__ * Rewrite AND Q-expressions to safe form * Make Q-expr debug print method use nicer format * Document rewrite_q_expr function * Rewrite/improve queries' test * Document some more code * Reorganize and document queryset module * Add more tests for queries * Use generic relation attribute instead instead of 'eav_values' * Update docstring * Fix a typo * Move up a variable * Rename a function * Check if 'in' filter rhs is not a QuerySet * Add newline to the end of file * Fix typo; tweak docstring
2026-05-08 23:54:48 +00:00 · 2018-05-18 13:18:24 +02:00 · 2018-05-18 13:18:24 +02:00 · 2b6f470fa7
commit 2b6f470fa7
parent bcd129a03d
6 changed files with 373 additions and 189 deletions
--- a/eav/decorators.py
+++ b/eav/decorators.py
@ -1,19 +1,19 @@
-from functools import wraps
-
-from django.db import models
-
-from .models import Attribute, Value
+'''
+Decorators module.

+This module contains pure wrapper functions used as decorators.
+Functions in this module should be simple and not involve complex logic.
+'''

 def register_eav(**kwargs):
-    """
+    '''
    Registers the given model(s) classes and wrapped Model class with
-    django-eav:
+    django-eav::

-    @register_eav
-    class Author(models.Model):
-        pass
-    """
+        @register_eav
+        class Author(models.Model):
+            pass
+    '''
    from . import register
    from django.db.models import Model

@ -24,110 +24,3 @@ def register_eav(**kwargs):
        return model_class

    return _model_eav_wrapper
-
-
-def eav_filter(func):
-    '''
-    Decorator used to wrap filter and exclude methods. Passes args through
-    expand_q_filters and kwargs through expand_eav_filter. Returns the
-    called function (filter or exclude).
-    '''
-
-    @wraps(func)
-    def wrapper(self, *args, **kwargs):
-        nargs = []
-        nkwargs = {}
-
-        for arg in args:
-            if isinstance(arg, models.Q):
-                # Modify Q objects (warning: recursion ahead).
-                # import pdb; pdb.set_trace()
-                arg = expand_q_filters(arg, self.model)
-            nargs.append(arg)
-
-        o = self.model.objects.all().first()
-        fn = self.model.objects.filter
-        # print(args, kwargs)
-
-        for key, value in kwargs.items():
-            # Modify kwargs (warning: recursion ahead).
-            nkey, nval = expand_eav_filter(self.model, key, value)
-
-            if nkey in nkwargs:
-                # Apply AND to both querysets.
-                nkwargs[nkey] = (nkwargs[nkey] & nval).distinct()
-            else:
-                nkwargs.update({nkey: nval})
-
-        return func(self, *nargs, **nkwargs)
-
-    return wrapper
-
-
-def expand_q_filters(q, root_cls):
-    '''
-    Takes a Q object and a model class.
-    Recursivley passes each filter / value in the Q object tree leaf nodes
-    through expand_eav_filter
-    '''
-    new_children = []
-
-    for qi in q.children:
-        if type(qi) is tuple:
-            # this child is a leaf node: in Q this is a 2-tuple of:
-            # (filter parameter, value)
-            key, value = expand_eav_filter(root_cls, *qi)
-            new_children.append(models.Q(**{key: value}))
-        else:
-            # this child is another Q node: recursify!
-            new_children.append(expand_q_filters(qi, root_cls))
-
-    _q = models.Q()
-    _q.children = new_children
-    _q.connector = q.connector
-    _q.negated = q.negated
-    return _q
-
-
-def expand_eav_filter(model_cls, key, value):
-    '''
-    Accepts a model class and a key, value.
-    Recurisively replaces any eav filter with a subquery.
-
-    For example::
-
-        key = 'eav__height'
-        value = 5
-
-    Would return::
-
-        key = 'eav_values__in'
-        value = Values.objects.filter(value_int=5, attribute__slug='height')
-    '''
-    fields = key.split('__')
-    config_cls = getattr(model_cls, '_eav_config_cls', None)
-
-    if len(fields) > 1 and config_cls and \
-       fields[0] == config_cls.eav_attr:
-        slug = fields[1]
-        gr_name = config_cls.generic_relation_attr
-        datatype = Attribute.objects.get(slug=slug).datatype
-
-        lookup = '__%s' % fields[2] if len(fields) > 2 else ''
-        kwargs = {'value_%s%s' % (datatype, lookup): value,
-                  'attribute__slug': slug}
-        value = Value.objects.filter(**kwargs)
-
-        return '%s__in' % gr_name, value
-
-    try:
-        field = model_cls._meta.get_field(fields[0])
-    except models.FieldDoesNotExist:
-        return key, value
-
-    if not field.auto_created or field.concrete:
-        return key, value
-    else:
-        sub_key = '__'.join(fields[1:])
-        key, value = expand_eav_filter(field.model, sub_key, value)
-        return '%s__%s' % (fields[0], key), value
--- a/eav/models.py
+++ b/eav/models.py
@ -79,7 +79,7 @@ class EnumValue(models.Model):
    value = models.CharField(_(u"value"), db_index=True,
                             unique=True, max_length=50)

-    def __unicode__(self):
+    def __str__(self):
        return self.value


@ -406,9 +406,11 @@ class Value(models.Model):

    value = property(_get_value, _set_value)

-    def __unicode__(self):
-        return u"%s - %s: \"%s\"" % (self.entity, self.attribute.name,
-                                     self.value)
+    def __str__(self):
+        return '{}: "{}" ({})'.format(self.attribute.name, self.value, self.entity)
+
+    def __repr__(self):
+        return '{}: "{}" ({})'.format(self.attribute.name, self.value, self.entity.pk)


 class Entity(object):
--- a/eav/queryset.py
+++ b/eav/queryset.py
@ -1,31 +1,263 @@
-#!/usr/bin/env python
-#
-#    This software is derived from EAV-Django originally written and
-#    copyrighted by Andrey Mikhaylenko <http://pypi.python.org/pypi/eav-django>
-#
-#    This is free software: you can redistribute it and/or modify
-#    it under the terms of the GNU Lesser General Public License as published
-#    by the Free Software Foundation, either version 3 of the License, or
-#    (at your option) any later version.
-#
-#    This software is distributed in the hope that it will be useful,
-#    but WITHOUT ANY WARRANTY; without even the implied warranty of
-#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#    GNU Lesser General Public License for more details.
-#
-#    You should have received a copy of the GNU Lesser General Public License
-#    along with EAV-Django.  If not, see <http://gnu.org/licenses/>.
+'''
+Queryset module.

+This module contains custom EavQuerySet class used for overriding
+relational operators and pure functions for rewriting Q-expressions.
+Q-expressions need to be rewritten for two reasons:
+
+    1. In order to hide implementation from the user and provide easy to use
+       syntax sugar, i.e.::
+
+           Supplier.objects.filter(eav__city__startswith='New')
+
+       instead of::
+
+           city_values = Value.objects.filter(value__text__startswith='New')
+           Supplier.objects.filter(eav_values__in=city_values)
+
+       For details see: ``eav_filter``.
+
+    2. To ensure that Q-expression tree is compiled to valid SQL.
+       For details see: ``rewrite_q_expr``.
+'''
+
+import sys
+from functools import reduce, wraps
+
+from django.db import models
+from django.db.models import Q
 from django.db.models.query import QuerySet

-from .decorators import eav_filter
+from .models import Attribute, Value
+
+
+def is_eav_and_leaf(expr, gr_name):
+    '''
+    Checks whether Q-expression is an EAV AND leaf.
+
+    Args:
+        expr (Q | tuple): Q-expression to be checked.
+        gr_name (str): Generic relation attribute name, by default 'eav_values'
+
+    Returns:
+        bool
+    '''
+    return (getattr(expr, 'connector', None) == 'AND' and
+            len(expr.children) == 1 and
+            expr.children[0][0] in ['pk__in', '{}__in'.format(gr_name)])
+
+
+def rewrite_q_expr(model_cls, expr):
+    '''
+    Rewrites Q-expression to safe form, in order to ensure that
+    generated SQL is valid.
+
+    Suppose we have the following Q-expression:
+
+    └── OR
+         ├── AND
+         │    └── eav_values__in [1, 2, 3]
+         └── AND (1)
+              ├── AND
+              │    └── eav_values__in [4, 5]
+              └── AND
+                   └── eav_values__in [6, 7, 8]
+
+    All EAV values are stored in a single table. Therefore, INNER JOIN
+    generated for the AND-expression (1) will always fail, i.e.
+    single row in a eav_values table cannot be both in two disjoint sets at
+    the same time (and the whole point of using AND, usually, is two have
+    two different sets). Therefore, we must paritially rewrite the
+    expression so that the generated SQL is valid::
+
+    └── OR
+         ├── AND
+         │    └── eav_values__in [1, 2, 3]
+         └── AND
+              └── pk__in [1, 2]
+
+    This is done by merging dangerous AND's and substituting them with
+    explicit ``pk__in`` filter, where pks are taken from evaluted
+    Q-expr branch.
+
+    Args:
+        model_cls (Model class): model class used to construct QuerySet
+        from leaf attribute-value expression.
+        expr: (Q | tuple): Q-expression (or attr-val leaf) to be rewritten.
+
+    Returns:
+        Q | tuple
+    '''
+    # Node in a Q-expr can be a Q or an attribute-value tuple (leaf).
+    # We are only interested in Qs.
+
+    if type(expr) == Q:
+        config_cls = getattr(model_cls, '_eav_config_cls', None)
+        assert config_cls
+        gr_name = config_cls.generic_relation_attr
+
+        # Recurively check child nodes.
+        expr.children = [rewrite_q_expr(model_cls, c) for c in expr.children]
+        # Check which ones need a rewrite.
+        rewritable = [c for c in expr.children if is_eav_and_leaf(c, gr_name)]
+
+        # Conflict occurs only with two or more AND-expressions.
+        # If there is only one we can ignore it.
+
+        if len(rewritable) > 1:
+            q = None
+            # Save nodes which shouldn't be merged (non-EAV).
+            other = [c for c in expr.children if not c in rewritable]
+
+            for child in rewritable:
+                assert child.children and len(child.children) == 1
+                # Child to be merged is always a terminal Q node,
+                # i.e. it's an AND expression with attribute-value tuple child.
+                attrval = child.children[0]
+                assert type(attrval) == tuple
+
+                fname = '{}__in'.format(gr_name)
+
+                # Child can be either a 'eav_values__in' or 'pk__in' query.
+                # If it's the former then transform it into the latter.
+                # Otherwise, check if the value is valid for the '__in' query.
+                # If so, reverse it back to QuerySet so that set operators
+                # can be applied.
+
+                if attrval[0] == fname or hasattr(attrval[1], '__contains__'):
+                    # Create model queryset.
+                    _q = model_cls.objects.filter(**{fname: attrval[1]})
+                else:
+                    # Second val in tuple is a queryset.
+                    _q = attrval[1]
+
+                # Explicitly check for None. 'or' doesn't work here
+                # as empty QuerySet, which is valid, is falsy.
+                q = q if q != None else _q
+
+                if expr.connector == 'AND':
+                    q &= _q
+                else:
+                    q |= _q
+
+            # If any two children were merged,
+            # update parent expression.
+            if q != None:
+                expr.children = other + [('pk__in', q)]
+
+    return expr
+
+
+def eav_filter(func):
+    '''
+    Decorator used to wrap filter and exclude methods. Passes args through
+    expand_q_filters and kwargs through expand_eav_filter. Returns the
+    called function (filter or exclude).
+    '''
+    @wraps(func)
+    def wrapper(self, *args, **kwargs):
+        nargs = []
+        nkwargs = {}
+
+        for arg in args:
+            if isinstance(arg, Q):
+                # Modify Q objects (warning: recursion ahead).
+                arg = expand_q_filters(arg, self.model)
+                # Rewrite Q-expression to safeform.
+                arg = rewrite_q_expr(self.model, arg)
+            nargs.append(arg)
+
+        for key, value in kwargs.items():
+            # Modify kwargs (warning: recursion ahead).
+            nkey, nval = expand_eav_filter(self.model, key, value)
+
+            if nkey in nkwargs:
+                # Apply AND to both querysets.
+                nkwargs[nkey] = (nkwargs[nkey] & nval).distinct()
+            else:
+                nkwargs.update({nkey: nval})
+
+        return func(self, *nargs, **nkwargs)
+
+    return wrapper
+
+
+def expand_q_filters(q, root_cls):
+    '''
+    Takes a Q object and a model class.
+    Recursively passes each filter / value in the Q object tree leaf nodes
+    through expand_eav_filter
+    '''
+    new_children = []
+
+    for qi in q.children:
+        if type(qi) is tuple:
+            # This child is a leaf node: in Q this is a 2-tuple of:
+            # (filter parameter, value).
+            key, value = expand_eav_filter(root_cls, *qi)
+            new_children.append(Q(**{key: value}))
+        else:
+            # This child is another Q node: recursify!
+            new_children.append(expand_q_filters(qi, root_cls))
+
+    q.children = new_children
+    return q
+
+
+def expand_eav_filter(model_cls, key, value):
+    '''
+    Accepts a model class and a key, value.
+    Recurisively replaces any eav filter with a subquery.
+
+    For example::
+
+        key = 'eav__height'
+        value = 5
+
+    Would return::
+
+        key = 'eav_values__in'
+        value = Values.objects.filter(value_int=5, attribute__slug='height')
+    '''
+    fields = key.split('__')
+    config_cls = getattr(model_cls, '_eav_config_cls', None)
+
+    if len(fields) > 1 and config_cls and fields[0] == config_cls.eav_attr:
+        slug = fields[1]
+        gr_name = config_cls.generic_relation_attr
+        datatype = Attribute.objects.get(slug=slug).datatype
+
+        lookup = '__%s' % fields[2] if len(fields) > 2 else ''
+        kwargs = {
+            'value_{}{}'.format(datatype, lookup): value,
+            'attribute__slug': slug
+        }
+        value = Value.objects.filter(**kwargs)
+
+        return '%s__in' % gr_name, value
+
+    try:
+        field = model_cls._meta.get_field(fields[0])
+    except models.FieldDoesNotExist:
+        return key, value
+
+    if not field.auto_created or field.concrete:
+        return key, value
+    else:
+        sub_key = '__'.join(fields[1:])
+        key, value = expand_eav_filter(field.model, sub_key, value)
+        return '{}__{}'.format(ields[0], key), value


 class EavQuerySet(QuerySet):
+    '''
+    Overrides relational operators for EAV models.
+    '''
+
    @eav_filter
    def filter(self, *args, **kwargs):
        '''
-        Pass *args* and *kwargs* through :func:`eav_filter`, then pass to
+        Pass *args* and *kwargs* through ``eav_filter``, then pass to
        the ``models.Manager`` filter method.
        '''
        return super().filter(*args, **kwargs).distinct()
@ -33,7 +265,7 @@ class EavQuerySet(QuerySet):
    @eav_filter
    def exclude(self, *args, **kwargs):
        '''
-        Pass *args* and *kwargs* through :func:`eav_filter`, then pass to
+        Pass *args* and *kwargs* through ``eav_filter``, then pass to
        the ``models.Manager`` exclude method.
        '''
        return super().exclude(*args, **kwargs).distinct()
@ -41,7 +273,7 @@ class EavQuerySet(QuerySet):
    @eav_filter
    def get(self, *args, **kwargs):
        '''
-        Pass *args* and *kwargs* through :func:`eav_filter`, then pass to
+        Pass *args* and *kwargs* through ``eav_filter``, then pass to
        the ``models.Manager`` get method.
        '''
        return super().get(*args, **kwargs)
--- a/eav/utils.py
+++ b/eav/utils.py
@ -0,0 +1,21 @@
+import sys
+from django.db.models import Q
+
+
+def print_q_expr(expr, indent="", is_tail=True):
+    '''
+    Simple print method for debugging Q-expressions' trees.
+    '''
+    sys.stdout.write(indent)
+    sa, sb = (' └── ', '    ') if is_tail else (' ├── ', ' │   ')
+
+    if type(expr) == Q:
+        sys.stdout.write('{}{}\n'.format(sa, expr.connector))
+        for child in expr.children:
+            print_q_expr(child, indent + sb, expr.children[-1] == child)
+    else:
+        try:
+            queryset = ', '.join(repr(v) for v in expr[1])
+        except TypeError:
+            queryset = repr(expr[1])
+        sys.stdout.write(' └── {} {}\n'.format(expr[0], queryset))
--- a/tests/models.py
+++ b/tests/models.py
@ -5,16 +5,24 @@ from eav.decorators import register_eav
 class Patient(models.Model):
    name = models.CharField(max_length=12)

-    def __unicode__(self):
+    def __str__(self):
        return self.name

+    def __repr__(self):
+        return self.name
+
+
 class Encounter(models.Model):
    num = models.PositiveSmallIntegerField()
    patient = models.ForeignKey(Patient, on_delete=models.PROTECT)

-    def __unicode__(self):
+    def __str__(self):
        return '%s: encounter num %d' % (self.patient, self.num)

+    def __repr__(self):
+        return self.name
+
+
@register_eav()
 class ExampleModel(models.Model):
    name = models.CharField(max_length=12)
--- a/tests/queries.py
+++ b/tests/queries.py
@ -1,13 +1,13 @@
-from django.test import TestCase
-from django.db.models import Q
 from django.contrib.auth.models import User
 from django.core.exceptions import MultipleObjectsReturned
+from django.db.models import Q
+from django.test import TestCase

 import eav
+from eav.models import Attribute, EnumGroup, EnumValue, Value
 from eav.registry import EavConfig
-from eav.models import EnumValue, EnumGroup, Attribute, Value

-from .models import Patient, Encounter, ExampleModel
+from .models import Encounter, ExampleModel, Patient


 class Queries(TestCase):
@ -23,13 +23,13 @@ class Queries(TestCase):

        self.yes = EnumValue.objects.create(value='yes')
        self.no = EnumValue.objects.create(value='no')
-        self.unkown = EnumValue.objects.create(value='unkown')
-        
+        self.unknown = EnumValue.objects.create(value='unknown')
+
        ynu = EnumGroup.objects.create(name='Yes / No / Unknown')
        ynu.enums.add(self.yes)
        ynu.enums.add(self.no)
-        ynu.enums.add(self.unkown)
-        
+        ynu.enums.add(self.unknown)
+
        Attribute.objects.create(name='fever', datatype=Attribute.TYPE_ENUM, enum_group=ynu)

    def tearDown(self):
@ -37,7 +37,6 @@ class Queries(TestCase):
        eav.unregister(Patient)

    def test_get_or_create_with_eav(self):
-        return
        p = Patient.objects.get_or_create(name='Bob', eav__age=5)
        self.assertEqual(Patient.objects.count(), 1)
        self.assertEqual(Value.objects.count(), 1)
@ -49,24 +48,25 @@ class Queries(TestCase):
        self.assertEqual(Value.objects.count(), 2)

    def test_get_with_eav(self):
-        return
        p1, _ = Patient.objects.get_or_create(name='Bob', eav__age=6)
        self.assertEqual(Patient.objects.get(eav__age=6), p1)
-        
+
        p2, _ = Patient.objects.get_or_create(name='Fred', eav__age=6)
        self.assertRaises(MultipleObjectsReturned, lambda: Patient.objects.get(eav__age=6))

-    def test_filtering_on_normal_and_eav_fields(self):       
+    def test_filtering_on_normal_and_eav_fields(self):
        yes = self.yes
        no = self.no
+
        data = [
-            ['3_New_York_USA', 3, no, 'New York', 'USA'],
-            ['15_Bamako_Mali', 15, no, 'Bamako', 'Mali'],
-            ['15_Kisumu_Kenya', 15, yes, 'Kisumu', 'Kenya'],
-            ['3_Nice_France', 3, no, 'Nice', 'France'],
-            ['2_France_Nice', 2, yes, 'France', 'Nice']
+            # Name,    age, fever, city,       country.
+            ['Anne',   3,   no,    'New York', 'USA'],
+            ['Bob',    15,  no,    'Bamako',   'Mali'],
+            ['Cyrill', 15,  yes,   'Kisumu',   'Kenya'],
+            ['Daniel', 3,   no,    'Nice',     'France'],
+            ['Eugene', 2,   yes,   'France',   'Nice']
        ]
-        
+
        for row in data:
            Patient.objects.create(
                name=row[0],
@ -76,38 +76,66 @@ class Queries(TestCase):
                eav__country=row[4]
            )

-        #1 = Q(eav__city__contains='Y') & Q(eav__fever=no)
-        #2 = Q(eav__age=3)
-        #3 = (q1 & q2)
-        #rint(vars(q3))
-        
-        # = Patient.objects.filter(q3)
-        #print(q)
-            
-        # return
+        # Check number of objects in DB.
        self.assertEqual(Patient.objects.count(), 5)
        self.assertEqual(Value.objects.count(), 20)

-        self.assertEqual(Patient.objects.filter(eav__city__contains='Y').count(), 1)
-        self.assertEqual(Patient.objects.exclude(eav__city__contains='Y').count(), 4)
+        # Nobody
+        q1 = Q(eav__fever=yes) & Q(eav__fever=no)
+        p = Patient.objects.filter(q1)
+        self.assertEqual(p.count(), 0)

-        # Bob
-        self.assertEqual(Patient.objects.filter(Q(eav__city__contains='Y')).count(), 1)
+        # Anne, Daniel
+        q1 = Q(eav__age__gte=3)        # Everyone except Eugene
+        q2 = Q(eav__age__lt=15)        # Anne, Daniel, Eugene
+        p = Patient.objects.filter(q2 & q1)
+        self.assertEqual(p.count(), 2)

-        # Everyone except Bob
-        #self.assertEqual(Patient.objects.exclude(Q(eav__city__contains='Y')).count(), 4)
-
-        # Bob, Fred, Joe
-        q1 = Q(eav__city__contains='Y') |  Q(eav__fever=no)
-        self.assertEqual(Patient.objects.filter(q1).count(), 3)
-
-        # Joe, Bob
+        # Anne
+        q1 = Q(eav__city__contains='Y') & Q(eav__fever=no)
        q2 = Q(eav__age=3)
-        self.assertEqual(Patient.objects.filter(q2).count(), 2)
+        p = Patient.objects.filter(q1 & q2)
+        self.assertEqual(p.count(), 1)

-        # Joe, Bob
-        #elf.assertEqual(Patient.objects.filter(q1 & q2).count(), 1)
+        # Anne, Daniel
+        q1 = Q(eav__city__contains='Y', eav__fever=no)
+        q2 = Q(eav__city='Nice')
+        q3 = Q(eav__age=3)
+        p = Patient.objects.filter((q1 | q2) & q3)
+        self.assertEqual(p.count(), 2)

-        # Jose
-        self.assertEqual(Patient.objects.filter(name__contains='F', eav__fever=yes).count(), 1)
+        # Everyone
+        q1 = Q(eav__fever=no) | Q(eav__fever=yes)
+        p = Patient.objects.filter(q1)
+        self.assertEqual(p.count(), 5)

+        # Anne, Bob, Daniel
+        q1 = Q(eav__fever=no)              # Anne, Bob, Daniel
+        q2 = Q(eav__fever=yes)             # Cyrill, Eugene
+        q3 = Q(eav__country__contains='e') # Cyrill, Daniel, Eugene
+        q4 = q2 & q3                       # Cyrill, Daniel, Eugene
+        q5 = (q1 | q4) & q1                # Anne, Bob, Daniel
+        p = Patient.objects.filter(q5)
+        self.assertEqual(p.count(), 3)
+
+        # Everyone except Anne
+        q1 = Q(eav__city__contains='Y')
+        p = Patient.objects.exclude(q1)
+        self.assertEqual(p.count(), 4)
+
+        # Anne, Bob, Daniel
+        q1 = Q(eav__city__contains='Y')
+        q2 = Q(eav__fever=no)
+        q3 = q1 | q2
+        p = Patient.objects.filter(q3)
+        self.assertEqual(p.count(), 3)
+
+        # Anne, Daniel
+        q1 = Q(eav__age=3)
+        p = Patient.objects.filter(q1)
+        self.assertEqual(p.count(), 2)
+
+        # Eugene
+        q1 = Q(name__contains='E', eav__fever=yes)
+        p = Patient.objects.filter(q1)
+        self.assertEqual(p.count(), 1)