From 1f46bb9d154ca6d4a8c81fe07ec0a72f098dbec8 Mon Sep 17 00:00:00 2001 From: Mike Date: Fri, 22 May 2026 15:29:31 -0700 Subject: [PATCH] fix(queryset): guard against Q children in is_eav_and_leaf (#634) --- eav/queryset.py | 19 +++++- tests/test_queries.py | 145 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 1 deletion(-) diff --git a/eav/queryset.py b/eav/queryset.py index 08c1b0f..48764d4 100644 --- a/eav/queryset.py +++ b/eav/queryset.py @@ -12,11 +12,27 @@ Q-expressions need to be rewritten for two reasons: city_values = Value.objects.filter(value__text__startswith='New') Supplier.objects.filter(eav_values__in=city_values) - For details see: :func:`eav_filter`. 2. To ensure that Q-expression tree is compiled to valid SQL. For details see: :func:`rewrite_q_expr`. + +.. note:: EAV negation limitation + + Because EAV values are stored as individual rows in a shared table, + using ``~Q(eav__attr=value)`` inside ``.filter()`` negates at the + **row** level rather than the **entity** level. This can produce + unexpected results: an entity that has *other* EAV rows not matching + the negation will still appear in the result set. + + The correct way to express "entities where attr A = x AND attr B ≠ y" + is to chain ``.exclude()`` instead:: + + # Wrong – row-level negation, may return unexpected results + MyModel.objects.filter(Q(eav__a=x) & ~Q(eav__b=y)) + + # Correct – entity-level negation + MyModel.objects.filter(eav__a=x).exclude(eav__b=y) """ from functools import wraps @@ -44,6 +60,7 @@ def is_eav_and_leaf(expr, gr_name): return ( getattr(expr, "connector", None) == "AND" and len(expr.children) == 1 + and isinstance(expr.children[0], tuple) and expr.children[0][0] in ["pk__in", f"{gr_name}__in"] ) diff --git a/tests/test_queries.py b/tests/test_queries.py index 5a62757..78e3ee1 100644 --- a/tests/test_queries.py +++ b/tests/test_queries.py @@ -407,3 +407,148 @@ class Queries(TestCase): # Assert that the expected patient is returned self.assertEqual(len(patients), 1) self.assertEqual(patients[0].name, "Anne") + + def test_filter_eav_and_negated_non_eav_field(self) -> None: + """ + Regression test for #634: EAV Q combined with a negated non-EAV Q. + + Q(eav__attr=val) & ~Q(non_eav_field=val) raised: + TypeError: 'Q' object is not subscriptable + """ + self.init_data() + # fever=yes → Cyrill, Eugene; name≠"Bob" excludes nobody here + p = Patient.objects.filter(Q(eav__fever=self.yes) & ~Q(name="Bob")) + assert p.count() == 2 + assert set(p.values_list("name", flat=True)) == {"Cyrill", "Eugene"} + + def test_filter_negated_non_eav_field_and_eav(self) -> None: + """ + Regression test for #634: negated non-EAV Q combined with an EAV Q + (operand order reversed from the previous test). + + ~Q(non_eav_field=val) & Q(eav__attr=val) raised: + TypeError: 'Q' object is not subscriptable + """ + self.init_data() + # Same expectation as above; order of operands should not matter. + p = Patient.objects.filter(~Q(name="Bob") & Q(eav__fever=self.yes)) + assert p.count() == 2 + assert set(p.values_list("name", flat=True)) == {"Cyrill", "Eugene"} + + def test_filter_eav_and_negated_eav_field(self) -> None: + """ + Regression test for #634: EAV Q combined with a negated EAV Q. + + Q(eav__attr1=val) & ~Q(eav__attr2=val) raised: + TypeError: 'Q' object is not subscriptable + + Note: negating an EAV filter inside filter() has known SQL JOIN + semantics limitations. Because each EAV attribute is stored as a + separate row in the values table, the negation operates on individual + rows rather than on entities. This means a patient with both a + fever=no value AND a city='Nice' value will still be included because + their fever=no row passes the NOT city='Nice' row-level check. + The primary purpose of this test is to verify no TypeError is raised. + """ + self.init_data() + # fever=no → Anne, Bob, Daniel + # ~city='Nice' does NOT reliably exclude Daniel due to EAV row semantics + p = Patient.objects.filter(Q(eav__fever=self.no) & ~Q(eav__city="Nice")) + # At minimum, the EAV-only patients (no city='Nice') must be present + names = set(p.values_list("name", flat=True)) + assert {"Anne", "Bob"}.issubset(names) + # Result is bounded by the fever=no set + assert names.issubset({"Anne", "Bob", "Daniel"}) + + def test_filter_solo_negated_eav_field(self) -> None: + """ + ~Q(eav__attr=val) as the sole argument should work correctly. + + This produces entity-level exclusion because there are no other EAV + JOIN conditions to interfere: entities with no matching value row are + simply absent from the JOIN, so NOT eav_values__in correctly excludes + entities that DO have that value row. + """ + self.init_data() + # age != 3 → Bob (15), Cyrill (15), Eugene (2) + p = Patient.objects.filter(~Q(eav__age=3)) + assert p.count() == 3 + assert set(p.values_list("name", flat=True)) == {"Bob", "Cyrill", "Eugene"} + + def test_filter_or_eav_and_non_eav_no_duplicates(self) -> None: + """ + OR between an EAV Q and a non-EAV Q must not return duplicate rows. + + When an EAV condition is used in OR with a non-EAV condition, the EAV + side remains as a JOIN condition (it cannot be merged via rewrite_q_expr + because there is only one EAV leaf). This JOIN can produce multiple rows + per entity. Results should be distinct. + + This is a known limitation: callers should use .distinct() when mixing + EAV and non-EAV conditions in OR. + """ + self.init_data() + # fever=yes → Cyrill, Eugene; name='Bob' → Bob + p = Patient.objects.filter(Q(eav__fever=self.yes) | Q(name="Bob")) + # Correct distinct count is 3; without distinct() duplicates may appear + assert p.distinct().count() == 3 + assert set(p.distinct().values_list("name", flat=True)) == { + "Bob", + "Cyrill", + "Eugene", + } + + def test_q_object_not_mutated_by_filter(self) -> None: + """ + Q objects passed to filter() must not be mutated. + + expand_q_filters() rewrites Q children in-place. If the original Q + object is mutated, reusing it in a second query will operate on the + already-expanded (internal) form and may produce incorrect results or + errors. + + This test documents the current behaviour (mutation occurs) so that + any future fix is captured as a regression guard. + """ + self.init_data() + q = Q(eav__fever=self.yes) + original_children = list(q.children) # [('eav__fever', )] + + Patient.objects.filter(q) + + # Document current (broken) behaviour: children are mutated. + # When this is fixed, change the assertion to assertEqual. + assert q.children != original_children, ( + "Q mutation is fixed - update this test to assert no mutation" + ) + + def test_negated_eav_field_workaround_chained_exclude(self) -> None: + """ + Documents the correct way to express "EAV attr A = x AND EAV attr B != y". + + Because EAV values are stored one-per-row, using ~Q() inside filter() + negates at the *row* level, not the *entity* level. The correct + approach is to chain .exclude() which operates at the entity level:: + + # WRONG - may return entities that should be excluded + Patient.objects.filter(Q(eav__fever=no) & ~Q(eav__city="Nice")) + + # CORRECT - exclude() works at entity level + Patient.objects.filter(eav__fever=no).exclude(eav__city="Nice") + # or equivalently: + Patient.objects.filter(Q(eav__fever=no)).exclude(Q(eav__city="Nice")) + """ + self.init_data() + # fever=no → Anne, Bob, Daniel; chained exclude removes Daniel + p_chained = Patient.objects.filter(eav__fever=self.no).exclude( + eav__city="Nice", + ) + assert p_chained.count() == 2 + assert set(p_chained.values_list("name", flat=True)) == {"Anne", "Bob"} + + # Q-based form of the same thing + p_q = Patient.objects.filter(Q(eav__fever=self.no)).exclude( + Q(eav__city="Nice"), + ) + assert p_q.count() == 2 + assert set(p_q.values_list("name", flat=True)) == {"Anne", "Bob"}