Add Q-expr rewriter/fix relational algebra (#1)

* Add debug Q-expr print function

* Change old __unicode__ methods to __str__ format

* Move __unicode__ methods to __str__

* Rewrite AND Q-expressions to safe form

* Make Q-expr debug print method use nicer format

* Document rewrite_q_expr function

* Rewrite/improve queries' test

* Document some more code

* Reorganize and document queryset module

* Add more tests for queries

* Use generic relation attribute instead instead of 'eav_values'

* Update docstring

* Fix a typo

* Move up a variable

* Rename a function

* Check if 'in' filter rhs is not a QuerySet

* Add newline to the end of file

* Fix typo; tweak docstring
This commit is contained in:
Iwo Herka 2018-05-18 13:18:24 +02:00 committed by GitHub
parent bcd129a03d
commit 2b6f470fa7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 373 additions and 189 deletions

View file

@ -1,19 +1,19 @@
from functools import wraps
from django.db import models
from .models import Attribute, Value
'''
Decorators module.
This module contains pure wrapper functions used as decorators.
Functions in this module should be simple and not involve complex logic.
'''
def register_eav(**kwargs):
"""
'''
Registers the given model(s) classes and wrapped Model class with
django-eav:
django-eav::
@register_eav
class Author(models.Model):
pass
"""
@register_eav
class Author(models.Model):
pass
'''
from . import register
from django.db.models import Model
@ -24,110 +24,3 @@ def register_eav(**kwargs):
return model_class
return _model_eav_wrapper
def eav_filter(func):
'''
Decorator used to wrap filter and exclude methods. Passes args through
expand_q_filters and kwargs through expand_eav_filter. Returns the
called function (filter or exclude).
'''
@wraps(func)
def wrapper(self, *args, **kwargs):
nargs = []
nkwargs = {}
for arg in args:
if isinstance(arg, models.Q):
# Modify Q objects (warning: recursion ahead).
# import pdb; pdb.set_trace()
arg = expand_q_filters(arg, self.model)
nargs.append(arg)
o = self.model.objects.all().first()
fn = self.model.objects.filter
# print(args, kwargs)
for key, value in kwargs.items():
# Modify kwargs (warning: recursion ahead).
nkey, nval = expand_eav_filter(self.model, key, value)
if nkey in nkwargs:
# Apply AND to both querysets.
nkwargs[nkey] = (nkwargs[nkey] & nval).distinct()
else:
nkwargs.update({nkey: nval})
return func(self, *nargs, **nkwargs)
return wrapper
def expand_q_filters(q, root_cls):
'''
Takes a Q object and a model class.
Recursivley passes each filter / value in the Q object tree leaf nodes
through expand_eav_filter
'''
new_children = []
for qi in q.children:
if type(qi) is tuple:
# this child is a leaf node: in Q this is a 2-tuple of:
# (filter parameter, value)
key, value = expand_eav_filter(root_cls, *qi)
new_children.append(models.Q(**{key: value}))
else:
# this child is another Q node: recursify!
new_children.append(expand_q_filters(qi, root_cls))
_q = models.Q()
_q.children = new_children
_q.connector = q.connector
_q.negated = q.negated
return _q
def expand_eav_filter(model_cls, key, value):
'''
Accepts a model class and a key, value.
Recurisively replaces any eav filter with a subquery.
For example::
key = 'eav__height'
value = 5
Would return::
key = 'eav_values__in'
value = Values.objects.filter(value_int=5, attribute__slug='height')
'''
fields = key.split('__')
config_cls = getattr(model_cls, '_eav_config_cls', None)
if len(fields) > 1 and config_cls and \
fields[0] == config_cls.eav_attr:
slug = fields[1]
gr_name = config_cls.generic_relation_attr
datatype = Attribute.objects.get(slug=slug).datatype
lookup = '__%s' % fields[2] if len(fields) > 2 else ''
kwargs = {'value_%s%s' % (datatype, lookup): value,
'attribute__slug': slug}
value = Value.objects.filter(**kwargs)
return '%s__in' % gr_name, value
try:
field = model_cls._meta.get_field(fields[0])
except models.FieldDoesNotExist:
return key, value
if not field.auto_created or field.concrete:
return key, value
else:
sub_key = '__'.join(fields[1:])
key, value = expand_eav_filter(field.model, sub_key, value)
return '%s__%s' % (fields[0], key), value

View file

@ -79,7 +79,7 @@ class EnumValue(models.Model):
value = models.CharField(_(u"value"), db_index=True,
unique=True, max_length=50)
def __unicode__(self):
def __str__(self):
return self.value
@ -406,9 +406,11 @@ class Value(models.Model):
value = property(_get_value, _set_value)
def __unicode__(self):
return u"%s - %s: \"%s\"" % (self.entity, self.attribute.name,
self.value)
def __str__(self):
return '{}: "{}" ({})'.format(self.attribute.name, self.value, self.entity)
def __repr__(self):
return '{}: "{}" ({})'.format(self.attribute.name, self.value, self.entity.pk)
class Entity(object):

View file

@ -1,31 +1,263 @@
#!/usr/bin/env python
#
# This software is derived from EAV-Django originally written and
# copyrighted by Andrey Mikhaylenko <http://pypi.python.org/pypi/eav-django>
#
# This is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This software is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with EAV-Django. If not, see <http://gnu.org/licenses/>.
'''
Queryset module.
This module contains custom EavQuerySet class used for overriding
relational operators and pure functions for rewriting Q-expressions.
Q-expressions need to be rewritten for two reasons:
1. In order to hide implementation from the user and provide easy to use
syntax sugar, i.e.::
Supplier.objects.filter(eav__city__startswith='New')
instead of::
city_values = Value.objects.filter(value__text__startswith='New')
Supplier.objects.filter(eav_values__in=city_values)
For details see: ``eav_filter``.
2. To ensure that Q-expression tree is compiled to valid SQL.
For details see: ``rewrite_q_expr``.
'''
import sys
from functools import reduce, wraps
from django.db import models
from django.db.models import Q
from django.db.models.query import QuerySet
from .decorators import eav_filter
from .models import Attribute, Value
def is_eav_and_leaf(expr, gr_name):
'''
Checks whether Q-expression is an EAV AND leaf.
Args:
expr (Q | tuple): Q-expression to be checked.
gr_name (str): Generic relation attribute name, by default 'eav_values'
Returns:
bool
'''
return (getattr(expr, 'connector', None) == 'AND' and
len(expr.children) == 1 and
expr.children[0][0] in ['pk__in', '{}__in'.format(gr_name)])
def rewrite_q_expr(model_cls, expr):
'''
Rewrites Q-expression to safe form, in order to ensure that
generated SQL is valid.
Suppose we have the following Q-expression:
OR
AND
eav_values__in [1, 2, 3]
AND (1)
AND
eav_values__in [4, 5]
AND
eav_values__in [6, 7, 8]
All EAV values are stored in a single table. Therefore, INNER JOIN
generated for the AND-expression (1) will always fail, i.e.
single row in a eav_values table cannot be both in two disjoint sets at
the same time (and the whole point of using AND, usually, is two have
two different sets). Therefore, we must paritially rewrite the
expression so that the generated SQL is valid::
OR
AND
eav_values__in [1, 2, 3]
AND
pk__in [1, 2]
This is done by merging dangerous AND's and substituting them with
explicit ``pk__in`` filter, where pks are taken from evaluted
Q-expr branch.
Args:
model_cls (Model class): model class used to construct QuerySet
from leaf attribute-value expression.
expr: (Q | tuple): Q-expression (or attr-val leaf) to be rewritten.
Returns:
Q | tuple
'''
# Node in a Q-expr can be a Q or an attribute-value tuple (leaf).
# We are only interested in Qs.
if type(expr) == Q:
config_cls = getattr(model_cls, '_eav_config_cls', None)
assert config_cls
gr_name = config_cls.generic_relation_attr
# Recurively check child nodes.
expr.children = [rewrite_q_expr(model_cls, c) for c in expr.children]
# Check which ones need a rewrite.
rewritable = [c for c in expr.children if is_eav_and_leaf(c, gr_name)]
# Conflict occurs only with two or more AND-expressions.
# If there is only one we can ignore it.
if len(rewritable) > 1:
q = None
# Save nodes which shouldn't be merged (non-EAV).
other = [c for c in expr.children if not c in rewritable]
for child in rewritable:
assert child.children and len(child.children) == 1
# Child to be merged is always a terminal Q node,
# i.e. it's an AND expression with attribute-value tuple child.
attrval = child.children[0]
assert type(attrval) == tuple
fname = '{}__in'.format(gr_name)
# Child can be either a 'eav_values__in' or 'pk__in' query.
# If it's the former then transform it into the latter.
# Otherwise, check if the value is valid for the '__in' query.
# If so, reverse it back to QuerySet so that set operators
# can be applied.
if attrval[0] == fname or hasattr(attrval[1], '__contains__'):
# Create model queryset.
_q = model_cls.objects.filter(**{fname: attrval[1]})
else:
# Second val in tuple is a queryset.
_q = attrval[1]
# Explicitly check for None. 'or' doesn't work here
# as empty QuerySet, which is valid, is falsy.
q = q if q != None else _q
if expr.connector == 'AND':
q &= _q
else:
q |= _q
# If any two children were merged,
# update parent expression.
if q != None:
expr.children = other + [('pk__in', q)]
return expr
def eav_filter(func):
'''
Decorator used to wrap filter and exclude methods. Passes args through
expand_q_filters and kwargs through expand_eav_filter. Returns the
called function (filter or exclude).
'''
@wraps(func)
def wrapper(self, *args, **kwargs):
nargs = []
nkwargs = {}
for arg in args:
if isinstance(arg, Q):
# Modify Q objects (warning: recursion ahead).
arg = expand_q_filters(arg, self.model)
# Rewrite Q-expression to safeform.
arg = rewrite_q_expr(self.model, arg)
nargs.append(arg)
for key, value in kwargs.items():
# Modify kwargs (warning: recursion ahead).
nkey, nval = expand_eav_filter(self.model, key, value)
if nkey in nkwargs:
# Apply AND to both querysets.
nkwargs[nkey] = (nkwargs[nkey] & nval).distinct()
else:
nkwargs.update({nkey: nval})
return func(self, *nargs, **nkwargs)
return wrapper
def expand_q_filters(q, root_cls):
'''
Takes a Q object and a model class.
Recursively passes each filter / value in the Q object tree leaf nodes
through expand_eav_filter
'''
new_children = []
for qi in q.children:
if type(qi) is tuple:
# This child is a leaf node: in Q this is a 2-tuple of:
# (filter parameter, value).
key, value = expand_eav_filter(root_cls, *qi)
new_children.append(Q(**{key: value}))
else:
# This child is another Q node: recursify!
new_children.append(expand_q_filters(qi, root_cls))
q.children = new_children
return q
def expand_eav_filter(model_cls, key, value):
'''
Accepts a model class and a key, value.
Recurisively replaces any eav filter with a subquery.
For example::
key = 'eav__height'
value = 5
Would return::
key = 'eav_values__in'
value = Values.objects.filter(value_int=5, attribute__slug='height')
'''
fields = key.split('__')
config_cls = getattr(model_cls, '_eav_config_cls', None)
if len(fields) > 1 and config_cls and fields[0] == config_cls.eav_attr:
slug = fields[1]
gr_name = config_cls.generic_relation_attr
datatype = Attribute.objects.get(slug=slug).datatype
lookup = '__%s' % fields[2] if len(fields) > 2 else ''
kwargs = {
'value_{}{}'.format(datatype, lookup): value,
'attribute__slug': slug
}
value = Value.objects.filter(**kwargs)
return '%s__in' % gr_name, value
try:
field = model_cls._meta.get_field(fields[0])
except models.FieldDoesNotExist:
return key, value
if not field.auto_created or field.concrete:
return key, value
else:
sub_key = '__'.join(fields[1:])
key, value = expand_eav_filter(field.model, sub_key, value)
return '{}__{}'.format(ields[0], key), value
class EavQuerySet(QuerySet):
'''
Overrides relational operators for EAV models.
'''
@eav_filter
def filter(self, *args, **kwargs):
'''
Pass *args* and *kwargs* through :func:`eav_filter`, then pass to
Pass *args* and *kwargs* through ``eav_filter``, then pass to
the ``models.Manager`` filter method.
'''
return super().filter(*args, **kwargs).distinct()
@ -33,7 +265,7 @@ class EavQuerySet(QuerySet):
@eav_filter
def exclude(self, *args, **kwargs):
'''
Pass *args* and *kwargs* through :func:`eav_filter`, then pass to
Pass *args* and *kwargs* through ``eav_filter``, then pass to
the ``models.Manager`` exclude method.
'''
return super().exclude(*args, **kwargs).distinct()
@ -41,7 +273,7 @@ class EavQuerySet(QuerySet):
@eav_filter
def get(self, *args, **kwargs):
'''
Pass *args* and *kwargs* through :func:`eav_filter`, then pass to
Pass *args* and *kwargs* through ``eav_filter``, then pass to
the ``models.Manager`` get method.
'''
return super().get(*args, **kwargs)

21
eav/utils.py Normal file
View file

@ -0,0 +1,21 @@
import sys
from django.db.models import Q
def print_q_expr(expr, indent="", is_tail=True):
'''
Simple print method for debugging Q-expressions' trees.
'''
sys.stdout.write(indent)
sa, sb = (' └── ', ' ') if is_tail else (' ├── ', '')
if type(expr) == Q:
sys.stdout.write('{}{}\n'.format(sa, expr.connector))
for child in expr.children:
print_q_expr(child, indent + sb, expr.children[-1] == child)
else:
try:
queryset = ', '.join(repr(v) for v in expr[1])
except TypeError:
queryset = repr(expr[1])
sys.stdout.write(' └── {} {}\n'.format(expr[0], queryset))

View file

@ -5,16 +5,24 @@ from eav.decorators import register_eav
class Patient(models.Model):
name = models.CharField(max_length=12)
def __unicode__(self):
def __str__(self):
return self.name
def __repr__(self):
return self.name
class Encounter(models.Model):
num = models.PositiveSmallIntegerField()
patient = models.ForeignKey(Patient, on_delete=models.PROTECT)
def __unicode__(self):
def __str__(self):
return '%s: encounter num %d' % (self.patient, self.num)
def __repr__(self):
return self.name
@register_eav()
class ExampleModel(models.Model):
name = models.CharField(max_length=12)

View file

@ -1,13 +1,13 @@
from django.test import TestCase
from django.db.models import Q
from django.contrib.auth.models import User
from django.core.exceptions import MultipleObjectsReturned
from django.db.models import Q
from django.test import TestCase
import eav
from eav.models import Attribute, EnumGroup, EnumValue, Value
from eav.registry import EavConfig
from eav.models import EnumValue, EnumGroup, Attribute, Value
from .models import Patient, Encounter, ExampleModel
from .models import Encounter, ExampleModel, Patient
class Queries(TestCase):
@ -23,13 +23,13 @@ class Queries(TestCase):
self.yes = EnumValue.objects.create(value='yes')
self.no = EnumValue.objects.create(value='no')
self.unkown = EnumValue.objects.create(value='unkown')
self.unknown = EnumValue.objects.create(value='unknown')
ynu = EnumGroup.objects.create(name='Yes / No / Unknown')
ynu.enums.add(self.yes)
ynu.enums.add(self.no)
ynu.enums.add(self.unkown)
ynu.enums.add(self.unknown)
Attribute.objects.create(name='fever', datatype=Attribute.TYPE_ENUM, enum_group=ynu)
def tearDown(self):
@ -37,7 +37,6 @@ class Queries(TestCase):
eav.unregister(Patient)
def test_get_or_create_with_eav(self):
return
p = Patient.objects.get_or_create(name='Bob', eav__age=5)
self.assertEqual(Patient.objects.count(), 1)
self.assertEqual(Value.objects.count(), 1)
@ -49,24 +48,25 @@ class Queries(TestCase):
self.assertEqual(Value.objects.count(), 2)
def test_get_with_eav(self):
return
p1, _ = Patient.objects.get_or_create(name='Bob', eav__age=6)
self.assertEqual(Patient.objects.get(eav__age=6), p1)
p2, _ = Patient.objects.get_or_create(name='Fred', eav__age=6)
self.assertRaises(MultipleObjectsReturned, lambda: Patient.objects.get(eav__age=6))
def test_filtering_on_normal_and_eav_fields(self):
def test_filtering_on_normal_and_eav_fields(self):
yes = self.yes
no = self.no
data = [
['3_New_York_USA', 3, no, 'New York', 'USA'],
['15_Bamako_Mali', 15, no, 'Bamako', 'Mali'],
['15_Kisumu_Kenya', 15, yes, 'Kisumu', 'Kenya'],
['3_Nice_France', 3, no, 'Nice', 'France'],
['2_France_Nice', 2, yes, 'France', 'Nice']
# Name, age, fever, city, country.
['Anne', 3, no, 'New York', 'USA'],
['Bob', 15, no, 'Bamako', 'Mali'],
['Cyrill', 15, yes, 'Kisumu', 'Kenya'],
['Daniel', 3, no, 'Nice', 'France'],
['Eugene', 2, yes, 'France', 'Nice']
]
for row in data:
Patient.objects.create(
name=row[0],
@ -76,38 +76,66 @@ class Queries(TestCase):
eav__country=row[4]
)
#1 = Q(eav__city__contains='Y') & Q(eav__fever=no)
#2 = Q(eav__age=3)
#3 = (q1 & q2)
#rint(vars(q3))
# = Patient.objects.filter(q3)
#print(q)
# return
# Check number of objects in DB.
self.assertEqual(Patient.objects.count(), 5)
self.assertEqual(Value.objects.count(), 20)
self.assertEqual(Patient.objects.filter(eav__city__contains='Y').count(), 1)
self.assertEqual(Patient.objects.exclude(eav__city__contains='Y').count(), 4)
# Nobody
q1 = Q(eav__fever=yes) & Q(eav__fever=no)
p = Patient.objects.filter(q1)
self.assertEqual(p.count(), 0)
# Bob
self.assertEqual(Patient.objects.filter(Q(eav__city__contains='Y')).count(), 1)
# Anne, Daniel
q1 = Q(eav__age__gte=3) # Everyone except Eugene
q2 = Q(eav__age__lt=15) # Anne, Daniel, Eugene
p = Patient.objects.filter(q2 & q1)
self.assertEqual(p.count(), 2)
# Everyone except Bob
#self.assertEqual(Patient.objects.exclude(Q(eav__city__contains='Y')).count(), 4)
# Bob, Fred, Joe
q1 = Q(eav__city__contains='Y') | Q(eav__fever=no)
self.assertEqual(Patient.objects.filter(q1).count(), 3)
# Joe, Bob
# Anne
q1 = Q(eav__city__contains='Y') & Q(eav__fever=no)
q2 = Q(eav__age=3)
self.assertEqual(Patient.objects.filter(q2).count(), 2)
p = Patient.objects.filter(q1 & q2)
self.assertEqual(p.count(), 1)
# Joe, Bob
#elf.assertEqual(Patient.objects.filter(q1 & q2).count(), 1)
# Anne, Daniel
q1 = Q(eav__city__contains='Y', eav__fever=no)
q2 = Q(eav__city='Nice')
q3 = Q(eav__age=3)
p = Patient.objects.filter((q1 | q2) & q3)
self.assertEqual(p.count(), 2)
# Jose
self.assertEqual(Patient.objects.filter(name__contains='F', eav__fever=yes).count(), 1)
# Everyone
q1 = Q(eav__fever=no) | Q(eav__fever=yes)
p = Patient.objects.filter(q1)
self.assertEqual(p.count(), 5)
# Anne, Bob, Daniel
q1 = Q(eav__fever=no) # Anne, Bob, Daniel
q2 = Q(eav__fever=yes) # Cyrill, Eugene
q3 = Q(eav__country__contains='e') # Cyrill, Daniel, Eugene
q4 = q2 & q3 # Cyrill, Daniel, Eugene
q5 = (q1 | q4) & q1 # Anne, Bob, Daniel
p = Patient.objects.filter(q5)
self.assertEqual(p.count(), 3)
# Everyone except Anne
q1 = Q(eav__city__contains='Y')
p = Patient.objects.exclude(q1)
self.assertEqual(p.count(), 4)
# Anne, Bob, Daniel
q1 = Q(eav__city__contains='Y')
q2 = Q(eav__fever=no)
q3 = q1 | q2
p = Patient.objects.filter(q3)
self.assertEqual(p.count(), 3)
# Anne, Daniel
q1 = Q(eav__age=3)
p = Patient.objects.filter(q1)
self.assertEqual(p.count(), 2)
# Eugene
q1 = Q(name__contains='E', eav__fever=yes)
p = Patient.objects.filter(q1)
self.assertEqual(p.count(), 1)