From 6d4112e8ce07c10ee1f61c17fe7358fb270e8b6c Mon Sep 17 00:00:00 2001 From: Matthieu Rigal Date: Sat, 9 Oct 2021 17:30:12 +0200 Subject: [PATCH] Fix performance regression: avoid pickling the whole instance when deepcopying fields on Django 3.1+ (#500) * test: Add failing test for #gh-498 * fix: Fix performance regression on FileTracker for FileField on Django 3.1+ --- AUTHORS.rst | 1 + CHANGES.rst | 2 ++ model_utils/tracker.py | 43 ++++++++++++++++++++++--- tests/test_fields/test_field_tracker.py | 20 ++++++++++++ 4 files changed, 61 insertions(+), 5 deletions(-) diff --git a/AUTHORS.rst b/AUTHORS.rst index 7a4391a..f50fc91 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -51,6 +51,7 @@ | Lucas Wiman | Martey Dodoo | Matthew Schinckel +| Matthieu Rigal | Michael van Tellingen | Mike Bryant | Mikhail Silonov diff --git a/CHANGES.rst b/CHANGES.rst index 89ffe1a..34f7028 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,8 @@ Unreleased - Added urlsafe token field. - Introduce context manager for FieldTracker state reset (GH-#491) +- Fix performance regression of FieldTracker on FileField subclasses on Django 3.1+ + (GH-#498) 4.1.1 (2020-12-01) ------------------ diff --git a/model_utils/tracker.py b/model_utils/tracker.py index d19c8c4..682ed29 100644 --- a/model_utils/tracker.py +++ b/model_utils/tracker.py @@ -3,10 +3,43 @@ from functools import wraps from django.core.exceptions import FieldError from django.db import models -from django.db.models.fields.files import FileDescriptor +from django.db.models.fields.files import FieldFile, FileDescriptor from django.db.models.query_utils import DeferredAttribute +class LightStateFieldFile(FieldFile): + """ + FieldFile subclass with the only aim to remove the instance from the state. + + The change introduced in Django 3.1 on FieldFile subclasses results in pickling the + whole instance for every field tracked. + As this is done on the initialization of objects, a simple queryset evaluation on + Django 3.1+ can make the app unusable, as CPU and memory usage gets easily + multiplied by magnitudes. + """ + def __getstate__(self): + """ + We don't need to deepcopy the instance, so nullify if provided. + """ + state = super().__getstate__() + if 'instance' in state: + state['instance'] = None + return state + + +def lightweight_deepcopy(value): + """ + Use our lightweight class to avoid copying the instance on a FieldFile deepcopy. + """ + if isinstance(value, FieldFile): + value = LightStateFieldFile( + instance=value.instance, + field=value.field, + name=value.name, + ) + return deepcopy(value) + + class DescriptorMixin: tracker_instance = None @@ -20,7 +53,7 @@ class DescriptorMixin: was_deferred = True value = super().__get__(instance, owner) if was_deferred: - self.tracker_instance.saved_data[field_name] = deepcopy(value) + self.tracker_instance.saved_data[field_name] = lightweight_deepcopy(value) return value def _get_field_name(self): @@ -44,7 +77,7 @@ class DescriptorWrapper: value = self.descriptor if was_deferred: tracker_instance = getattr(instance, self.tracker_attname) - tracker_instance.saved_data[self.field_name] = deepcopy(value) + tracker_instance.saved_data[self.field_name] = lightweight_deepcopy(value) return value def __set__(self, instance, value): @@ -184,7 +217,7 @@ class FieldInstanceTracker: # preventing mutable fields side effects for field, field_value in self.saved_data.items(): - self.saved_data[field] = deepcopy(field_value) + self.saved_data[field] = lightweight_deepcopy(field_value) def current(self, fields=None): """Returns dict of current values for all tracked fields""" @@ -225,7 +258,7 @@ class FieldInstanceTracker: else: current_value = self.get_field_value(field) self.instance.refresh_from_db(fields=[field]) - self.saved_data[field] = deepcopy(self.get_field_value(field)) + self.saved_data[field] = lightweight_deepcopy(self.get_field_value(field)) setattr(self.instance, self.field_map[field], current_value) return self.saved_data.get(field) diff --git a/tests/test_fields/test_field_tracker.py b/tests/test_fields/test_field_tracker.py index cd9b485..670e587 100644 --- a/tests/test_fields/test_field_tracker.py +++ b/tests/test_fields/test_field_tracker.py @@ -2,6 +2,7 @@ from unittest import skip from django.core.cache import cache from django.core.exceptions import FieldError +from django.db.models.fields.files import FieldFile from django.test import TestCase from model_utils import FieldTracker @@ -586,6 +587,25 @@ class FieldTrackerFileFieldTests(FieldTrackerTestCase): self.some_file = 'something.txt' self.another_file = 'another.txt' + def test_saved_data_without_instance(self): + """ + Tests that instance won't get copied by the Field Tracker. + + This change was introduced in Django 3.1 with + https://github.com/django/django/pull/12055 + It results in a dramatic CPU and memory usage of FieldTracker on FileField and + its subclasses. + The pickling/deepcopying the instance is useless in the context of FieldTracker + thus we are skipping it. + """ + self.assertEqual(self.tracker.saved_data, {}) + self.update_instance(some_file=self.some_file) + field_file_copy = self.tracker.saved_data.get('some_file') + self.assertIsNotNone(field_file_copy) + self.assertEqual(field_file_copy.__getstate__().get('instance'), None) + self.assertEqual(self.instance.some_file.instance, self.instance) + self.assertIsInstance(self.instance.some_file, FieldFile) + def test_pre_save_changed(self): self.assertChanged(some_file=None) self.instance.some_file = self.some_file