Fix performance regression: avoid pickling the whole instance when deepcopying fields on Django 3.1+ (#500)

* test: Add failing test for #gh-498

* fix: Fix performance regression on FileTracker for FileField on Django 3.1+
This commit is contained in:
Matthieu Rigal 2021-10-09 17:30:12 +02:00 committed by GitHub
parent dd0f62bdba
commit 6d4112e8ce
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 61 additions and 5 deletions

View file

@ -51,6 +51,7 @@
| Lucas Wiman <lucaswiman@counsyl.com>
| Martey Dodoo <martey@mobolic.com>
| Matthew Schinckel <matt@schinckel.net>
| Matthieu Rigal <matt.rigal@gmail.com>
| Michael van Tellingen <m.vantellingen@lukkien.com>
| Mike Bryant <m@ocado.com>
| Mikhail Silonov <m.silonov@corp.mail.ru>

View file

@ -8,6 +8,8 @@ Unreleased
- Added urlsafe token field.
- Introduce context manager for FieldTracker state reset (GH-#491)
- Fix performance regression of FieldTracker on FileField subclasses on Django 3.1+
(GH-#498)
4.1.1 (2020-12-01)
------------------

View file

@ -3,10 +3,43 @@ from functools import wraps
from django.core.exceptions import FieldError
from django.db import models
from django.db.models.fields.files import FileDescriptor
from django.db.models.fields.files import FieldFile, FileDescriptor
from django.db.models.query_utils import DeferredAttribute
class LightStateFieldFile(FieldFile):
"""
FieldFile subclass with the only aim to remove the instance from the state.
The change introduced in Django 3.1 on FieldFile subclasses results in pickling the
whole instance for every field tracked.
As this is done on the initialization of objects, a simple queryset evaluation on
Django 3.1+ can make the app unusable, as CPU and memory usage gets easily
multiplied by magnitudes.
"""
def __getstate__(self):
"""
We don't need to deepcopy the instance, so nullify if provided.
"""
state = super().__getstate__()
if 'instance' in state:
state['instance'] = None
return state
def lightweight_deepcopy(value):
"""
Use our lightweight class to avoid copying the instance on a FieldFile deepcopy.
"""
if isinstance(value, FieldFile):
value = LightStateFieldFile(
instance=value.instance,
field=value.field,
name=value.name,
)
return deepcopy(value)
class DescriptorMixin:
tracker_instance = None
@ -20,7 +53,7 @@ class DescriptorMixin:
was_deferred = True
value = super().__get__(instance, owner)
if was_deferred:
self.tracker_instance.saved_data[field_name] = deepcopy(value)
self.tracker_instance.saved_data[field_name] = lightweight_deepcopy(value)
return value
def _get_field_name(self):
@ -44,7 +77,7 @@ class DescriptorWrapper:
value = self.descriptor
if was_deferred:
tracker_instance = getattr(instance, self.tracker_attname)
tracker_instance.saved_data[self.field_name] = deepcopy(value)
tracker_instance.saved_data[self.field_name] = lightweight_deepcopy(value)
return value
def __set__(self, instance, value):
@ -184,7 +217,7 @@ class FieldInstanceTracker:
# preventing mutable fields side effects
for field, field_value in self.saved_data.items():
self.saved_data[field] = deepcopy(field_value)
self.saved_data[field] = lightweight_deepcopy(field_value)
def current(self, fields=None):
"""Returns dict of current values for all tracked fields"""
@ -225,7 +258,7 @@ class FieldInstanceTracker:
else:
current_value = self.get_field_value(field)
self.instance.refresh_from_db(fields=[field])
self.saved_data[field] = deepcopy(self.get_field_value(field))
self.saved_data[field] = lightweight_deepcopy(self.get_field_value(field))
setattr(self.instance, self.field_map[field], current_value)
return self.saved_data.get(field)

View file

@ -2,6 +2,7 @@ from unittest import skip
from django.core.cache import cache
from django.core.exceptions import FieldError
from django.db.models.fields.files import FieldFile
from django.test import TestCase
from model_utils import FieldTracker
@ -586,6 +587,25 @@ class FieldTrackerFileFieldTests(FieldTrackerTestCase):
self.some_file = 'something.txt'
self.another_file = 'another.txt'
def test_saved_data_without_instance(self):
"""
Tests that instance won't get copied by the Field Tracker.
This change was introduced in Django 3.1 with
https://github.com/django/django/pull/12055
It results in a dramatic CPU and memory usage of FieldTracker on FileField and
its subclasses.
The pickling/deepcopying the instance is useless in the context of FieldTracker
thus we are skipping it.
"""
self.assertEqual(self.tracker.saved_data, {})
self.update_instance(some_file=self.some_file)
field_file_copy = self.tracker.saved_data.get('some_file')
self.assertIsNotNone(field_file_copy)
self.assertEqual(field_file_copy.__getstate__().get('instance'), None)
self.assertEqual(self.instance.some_file.instance, self.instance)
self.assertIsInstance(self.instance.some_file, FieldFile)
def test_pre_save_changed(self):
self.assertChanged(some_file=None)
self.instance.some_file = self.some_file