Add a JoinManager that helps with performance (#351)

* Add the join manager + tests

* Documentation for join manager

* Use order_by for consistent tests

* Use postgres instead sqlite for tests for better reliability

* Fix coverage

* Drop django 1.8
This commit is contained in:
Jonathan Sundqvist 2018-11-25 15:18:23 +01:00 committed by Asif Saif Uddin
parent 08161456d5
commit 2cb773372d
12 changed files with 247 additions and 36 deletions

View file

@ -1,5 +1,2 @@
[run]
source = model_utils
omit = .*
tests/*
*/_*
include = model_utils/*.py

View file

@ -9,6 +9,8 @@ python:
install: pip install tox-travis codecov
# positional args ({posargs}) to pass into tox.ini
script: tox -- --cov --cov-append
services:
- postgresql
after_success: codecov
deploy:
provider: pypi

View file

@ -22,6 +22,7 @@
| Jarek Glowacki <github.com/jarekwg>
| Javier García Sogo <jgsogo@gmail.com>
| Jeff Elmore <jeffelmore.org>
| Jonathan Sundqvist <jonathan@argpar.se>
| Keryn Knight <kerynknight.com>
| Martey Dodoo <martey+django-model-utils@mobolic.com>
| Matthew Schinckel <matt@schinckel.net>

View file

@ -28,6 +28,15 @@ Getting Help
Documentation for django-model-utils is available
https://django-model-utils.readthedocs.io/
Run tests
---------
.. code-block
pip install -e .
py.test
Contributing
============

View file

@ -86,6 +86,33 @@ it's safe to use as your default manager for the model.
.. _contributed by Jeff Elmore: http://jeffelmore.org/2010/11/11/automatic-downcasting-of-inherited-models-in-django/
JoinManager
-----------
The ``JoinManager`` will create a temporary table of your current queryset
and join that temporary table with the model of your current queryset. This can
be advantageous if you have to page through your entire DB and using django's
slice mechanism to do that. ``LIMIT .. OFFSET ..`` becomes slower the bigger
offset you use.
.. code-block:: python
sliced_qs = Place.objects.all()[2000:2010]
qs = sliced_qs.join()
# qs contains 10 objects, and there will be a much smaller performance hit
# for paging through all of first 2000 objects.
Alternatively, you can give it a queryset and the manager will create a temporary
table and join that to your current queryset. This can work as a more performant
alternative to using django's ``__in`` as described in the following
(`StackExchange answer`_).
.. code-block:: python
big_qs = Restaurant.objects.filter(menu='vegetarian')
qs = Country.objects.filter(country_code='SE').join(big_qs)
.. _StackExchange answer: https://dba.stackexchange.com/questions/91247/optimizing-a-postgres-query-with-a-large-in
.. _QueryManager:

View file

@ -3,16 +3,15 @@ import django
from django.db import models
from django.db.models.fields.related import OneToOneField, OneToOneRel
from django.db.models.query import QuerySet
try:
from django.db.models.query import BaseIterable, ModelIterable
except ImportError:
# Django 1.8 does not have iterable classes
BaseIterable, ModelIterable = object, object
from django.db.models.query import ModelIterable
from django.core.exceptions import ObjectDoesNotExist
from django.db.models.constants import LOOKUP_SEP
from django.utils.six import string_types
from django.db import connection
from django.db.models.sql.datastructures import Join
class InheritanceIterable(ModelIterable):
def __iter__(self):
@ -104,10 +103,6 @@ class InheritanceQuerySetMixin(object):
if hasattr(self, name):
kwargs[name] = getattr(self, name)
if django.VERSION < (1, 9):
kwargs['klass'] = klass
kwargs['setup'] = setup
return super(InheritanceQuerySetMixin, self)._clone(**kwargs)
def annotate(self, *args, **kwargs):
@ -189,10 +184,7 @@ class InheritanceQuerySetMixin(object):
if levels:
levels -= 1
while parent_link is not None:
if django.VERSION < (1, 9):
related = parent_link.rel
else:
related = parent_link.remote_field
related = parent_link.remote_field
ancestry.insert(0, related.get_accessor_name())
if levels or levels is None:
parent_model = related.model
@ -308,3 +300,111 @@ class SoftDeletableManagerMixin(object):
class SoftDeletableManager(SoftDeletableManagerMixin, models.Manager):
pass
class JoinQueryset(models.QuerySet):
def get_quoted_query(self, query):
query, params = query.sql_with_params()
# Put additional quotes around string.
params = [
'\'{}\''.format(p)
if isinstance(p, str) else p
for p in params
]
# Cast list of parameters to tuple because I got
# "not enough format characters" otherwise.
params = tuple(params)
return query % params
def join(self, qs=None):
'''
Join one queryset together with another using a temporary table. If
no queryset is used, it will use the current queryset and join that
to itself.
`Join` either uses the current queryset and effectively does a self-join to
create a new limited queryset OR it uses a querset given by the user.
The model of a given queryset needs to contain a valid foreign key to
the current queryset to perform a join. A new queryset is then created.
'''
to_field = 'id'
if qs:
fk = [
fk for fk in qs.model._meta.fields
if getattr(fk, 'related_model', None) == self.model
]
fk = fk[0] if fk else None
model_set = '{}_set'.format(self.model.__name__.lower())
key = fk or getattr(qs.model, model_set, None)
if not key:
raise ValueError('QuerySet is not related to current model')
try:
fk_column = key.column
except AttributeError:
fk_column = 'id'
to_field = key.field.column
qs = qs.only(fk_column)
# if we give a qs we need to keep the model qs to not lose anything
new_qs = self
else:
fk_column = 'id'
qs = self.only(fk_column)
new_qs = self.model.objects.all()
TABLE_NAME = 'temp_stuff'
query = self.get_quoted_query(qs.query)
sql = '''
DROP TABLE IF EXISTS {table_name};
DROP INDEX IF EXISTS {table_name}_id;
CREATE TEMPORARY TABLE {table_name} AS {query};
CREATE INDEX {table_name}_{fk_column} ON {table_name} ({fk_column});
'''.format(table_name=TABLE_NAME, fk_column=fk_column, query=str(query))
with connection.cursor() as cursor:
cursor.execute(sql)
class TempModel(models.Model):
temp_key = models.ForeignKey(
self.model,
on_delete=models.DO_NOTHING,
db_column=fk_column,
to_field=to_field
)
class Meta:
managed = False
db_table = TABLE_NAME
conn = Join(
table_name=TempModel._meta.db_table,
parent_alias=new_qs.query.get_initial_alias(),
table_alias=None,
join_type='INNER JOIN',
join_field=self.model.tempmodel_set.rel,
nullable=False
)
new_qs.query.join(conn, reuse=None)
return new_qs
class JoinManagerMixin(object):
"""
Manager that adds a method join. This method allows you to join two
querysets together.
"""
_queryset_class = JoinQueryset
def get_queryset(self):
return self._queryset_class(model=self.model, using=self._db)
class JoinManager(JoinManagerMixin, models.Manager):
pass

View file

@ -1,2 +1,3 @@
pytest==3.3.1
pytest-django==3.1.2
psycopg2==2.7.6.1

View file

@ -9,7 +9,11 @@ from django.utils.translation import ugettext_lazy as _
from model_utils import Choices
from model_utils.fields import SplitField, MonitorField, StatusField
from model_utils.managers import QueryManager, InheritanceManager
from model_utils.managers import (
QueryManager,
InheritanceManager,
JoinManagerMixin
)
from model_utils.models import (
SoftDeletableModel,
StatusModel,
@ -370,3 +374,22 @@ class ModelWithCustomDescriptor(models.Model):
tracked_regular_field = models.IntegerField()
tracker = FieldTracker(fields=['tracked_custom_field', 'tracked_regular_field'])
class JoinManager(JoinManagerMixin, models.Manager):
pass
class BoxJoinModel(models.Model):
name = models.CharField(max_length=32)
objects = JoinManager()
class JoinItemForeignKey(models.Model):
weight = models.IntegerField()
belonging = models.ForeignKey(
BoxJoinModel,
null=True,
on_delete=models.CASCADE
)
objects = JoinManager()

View file

@ -1,11 +1,16 @@
import os
INSTALLED_APPS = (
'model_utils',
'tests',
)
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3'
}
"default": {
"ENGINE": "django.db.backends.postgresql_psycopg2",
"NAME": os.environ.get("DJANGO_DATABASE_NAME_POSTGRES", "modelutils"),
"USER": os.environ.get("DJANGO_DATABASE_USER_POSTGRES", 'postgres'),
"PASSWORD": os.environ.get("DJANGO_DATABASE_PASSWORD_POSTGRES", ""),
},
}
SECRET_KEY = 'dummy'

View file

@ -123,10 +123,16 @@ class InheritanceManagerTests(TestCase):
ensure that the relation names and subclasses are obtained correctly.
"""
child3 = InheritanceManagerTestChild3.objects.create()
results = InheritanceManagerTestParent.objects.all().select_subclasses()
qs = InheritanceManagerTestParent.objects.all()
results = qs.select_subclasses().order_by('pk')
expected_objs = [self.child1, self.child2, self.grandchild1,
self.grandchild1_2, child3]
expected_objs = [
self.child1,
self.child2,
self.grandchild1,
self.grandchild1_2,
child3
]
self.assertEqual(list(results), expected_objs)
expected_related_names = [
@ -146,7 +152,8 @@ class InheritanceManagerTests(TestCase):
"""
related_name = 'manual_onetoone'
child3 = InheritanceManagerTestChild3.objects.create()
results = InheritanceManagerTestParent.objects.all().select_subclasses(related_name)
qs = InheritanceManagerTestParent.objects.all()
results = qs.select_subclasses(related_name).order_by('pk')
expected_objs = [InheritanceManagerTestParent(pk=self.child1.pk),
InheritanceManagerTestParent(pk=self.child2.pk),
@ -389,14 +396,16 @@ class InheritanceManagerUsingModelsTests(TestCase):
"""
child3 = InheritanceManagerTestChild3.objects.create()
results = InheritanceManagerTestParent.objects.all().select_subclasses(
InheritanceManagerTestChild3)
InheritanceManagerTestChild3).order_by('pk')
expected_objs = [InheritanceManagerTestParent(pk=self.parent1.pk),
InheritanceManagerTestParent(pk=self.child1.pk),
InheritanceManagerTestParent(pk=self.child2.pk),
InheritanceManagerTestParent(pk=self.grandchild1.pk),
InheritanceManagerTestParent(pk=self.grandchild1_2.pk),
child3]
expected_objs = [
InheritanceManagerTestParent(pk=self.parent1.pk),
InheritanceManagerTestParent(pk=self.child1.pk),
InheritanceManagerTestParent(pk=self.child2.pk),
InheritanceManagerTestParent(pk=self.grandchild1.pk),
InheritanceManagerTestParent(pk=self.grandchild1_2.pk),
child3
]
self.assertEqual(list(results), expected_objs)
expected_related_names = ['manual_onetoone']

View file

@ -0,0 +1,38 @@
from django.test import TestCase
from tests.models import JoinItemForeignKey, BoxJoinModel
class JoinManagerTest(TestCase):
def setUp(self):
for i in range(20):
BoxJoinModel.objects.create(name='name_{i}'.format(i=i))
JoinItemForeignKey.objects.create(
weight=10, belonging=BoxJoinModel.objects.get(name='name_1')
)
JoinItemForeignKey.objects.create(weight=20)
def test_self_join(self):
a_slice = BoxJoinModel.objects.all()[0:10]
with self.assertNumQueries(1):
result = a_slice.join()
self.assertEquals(result.count(), 10)
def test_self_join_with_where_statement(self):
qs = BoxJoinModel.objects.filter(name='name_1')
result = qs.join()
self.assertEquals(result.count(), 1)
def test_join_with_other_qs(self):
item_qs = JoinItemForeignKey.objects.filter(weight=10)
boxes = BoxJoinModel.objects.all().join(qs=item_qs)
self.assertEquals(boxes.count(), 1)
self.assertEquals(boxes[0].name, 'name_1')
def test_reverse_join(self):
box_qs = BoxJoinModel.objects.filter(name='name_1')
items = JoinItemForeignKey.objects.all().join(box_qs)
self.assertEquals(items.count(), 1)
self.assertEquals(items[0].weight, 10)

View file

@ -1,14 +1,13 @@
[tox]
envlist =
py27-django{18,19,110,111}
py34-django{18,19,110,111,200}
py35-django{18,19,110,111,200,201,trunk}
py27-django{19,110,111}
py34-django{19,110,111,200}
py35-django{19,110,111,200,201,trunk}
py36-django{111,200,201,trunk}
flake8
[testenv]
deps =
django18: Django>=1.8,<1.9
django19: Django>=1.9,<1.10
django110: Django>=1.10,<1.11
django111: Django>=1.11,<1.12