feat: give users the option to run the json migration asyncly (#495)

This commit is contained in:
Abdullah Alaqeel 2023-08-13 12:38:21 +03:00 committed by GitHub
parent f2591e420b
commit 134ef73723
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 361 additions and 14 deletions

View file

@ -4,7 +4,7 @@
#### Breaking Changes
- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))
- feat: Change `LogEntry.change` field type to `JSONField` rather than `TextField`. This change include a migration that may take time to run depending on the number of records on your `LogEntry` table ([#407](https://github.com/jazzband/django-auditlog/pull/407))([#495](https://github.com/jazzband/django-auditlog/pull/495))
- Python: Drop support for Python 3.7 ([#546](https://github.com/jazzband/django-auditlog/pull/546))
#### Improvements

View file

@ -11,3 +11,7 @@ class AuditlogConfig(AppConfig):
from auditlog.registry import auditlog
auditlog.register_from_settings()
from auditlog import models
models.changes_func = models._changes_func()

View file

@ -32,3 +32,11 @@ settings.AUDITLOG_CID_HEADER = getattr(
settings, "AUDITLOG_CID_HEADER", "x-correlation-id"
)
settings.AUDITLOG_CID_GETTER = getattr(settings, "AUDITLOG_CID_GETTER", None)
# migration
settings.AUDITLOG_TWO_STEP_MIGRATION = getattr(
settings, "AUDITLOG_TWO_STEP_MIGRATION", False
)
settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = getattr(
settings, "AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT", False
)

View file

@ -0,0 +1,114 @@
from math import ceil
from django.conf import settings
from django.core.management.base import BaseCommand
from auditlog.models import LogEntry
class Command(BaseCommand):
help = "Migrates changes from changes_text to json changes."
def add_arguments(self, parser):
parser.add_argument(
"-d",
"--database",
default=None,
help="If provided, the script will use native db operations. "
"Otherwise, it will use LogEntry.objects.bulk_create",
dest="db",
type=str,
choices=["postgres", "mysql", "oracle"],
)
parser.add_argument(
"-b",
"--bactch-size",
default=500,
help="Split the migration into multiple batches. If 0, then no batching will be done. "
"When passing a -d/database, the batch value will be ignored.",
dest="batch_size",
type=int,
)
def handle(self, *args, **options):
database = options["db"]
batch_size = options["batch_size"]
if not self.check_logs():
return
if database:
result = self.migrate_using_sql(database)
self.stdout.write(
f"Updated {result} records using native database operations."
)
else:
result = self.migrate_using_django(batch_size)
self.stdout.write(f"Updated {result} records using django operations.")
self.check_logs()
def check_logs(self):
count = self.get_logs().count()
if count:
self.stdout.write(f"There are {count} records that needs migration.")
return True
self.stdout.write("All records are have been migrated.")
if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT:
self.stdout.write(
"You can now set AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT to False."
)
return False
def get_logs(self):
return LogEntry.objects.filter(
changes_text__isnull=False, changes__isnull=True
).exclude(changes_text__exact="")
def migrate_using_django(self, batch_size):
def _apply_django_migration(_logs) -> int:
import json
updated = []
for log in _logs:
try:
log.changes = json.loads(log.changes_text)
except ValueError:
self.stderr.write(
f"ValueError was raised while migrating the log with id {log.id}."
)
else:
updated.append(log)
LogEntry.objects.bulk_update(updated, fields=["changes"])
return len(updated)
logs = self.get_logs()
if not batch_size:
return _apply_django_migration(logs)
total_updated = 0
for _ in range(ceil(logs.count() / batch_size)):
total_updated += _apply_django_migration(self.get_logs()[:batch_size])
return total_updated
def migrate_using_sql(self, database):
from django.db import connection
def postgres():
with connection.cursor() as cursor:
cursor.execute(
'UPDATE auditlog_logentry SET changes="changes_text"::jsonb'
)
return cursor.cursor.rowcount
if database == "postgres":
return postgres()
else:
self.stderr.write(
"Not yet implemented. Run this management command without passing a -d/--database argument."
)
return 0

View file

@ -1,17 +1,42 @@
# Generated by Django 4.0 on 2022-08-04 15:41
from typing import List
from django.conf import settings
from django.db import migrations, models
def two_step_migrations() -> List:
if settings.AUDITLOG_TWO_STEP_MIGRATION:
return [
migrations.RenameField(
model_name="logentry",
old_name="changes",
new_name="changes_text",
),
migrations.AddField(
model_name="logentry",
name="changes",
field=models.JSONField(null=True, verbose_name="change message"),
),
]
return [
migrations.AddField(
model_name="logentry",
name="changes_text",
field=models.TextField(blank=True, verbose_name="text change message"),
),
migrations.AlterField(
model_name="logentry",
name="changes",
field=models.JSONField(null=True, verbose_name="change message"),
),
]
class Migration(migrations.Migration):
dependencies = [
("auditlog", "0014_logentry_cid"),
]
operations = [
migrations.AlterField(
model_name="logentry",
name="changes",
field=models.JSONField(null=True, verbose_name="change message"),
),
]
operations = [*two_step_migrations()]

View file

@ -1,8 +1,9 @@
import ast
import contextlib
import json
from copy import deepcopy
from datetime import timezone
from typing import Any, Dict, List, Union
from typing import Any, Callable, Dict, List, Union
from dateutil import parser
from dateutil.tz import gettz
@ -66,7 +67,7 @@ class LogEntryManager(models.Manager):
kwargs.setdefault("additional_data", get_additional_data())
# Delete log entries with the same pk as a newly created model.
# This should only be necessary when an pk is used twice.
# This should only be necessary when a pk is used twice.
if kwargs.get("action", None) is LogEntry.Action.CREATE:
if (
kwargs.get("object_id", None) is not None
@ -225,7 +226,7 @@ class LogEntryManager(models.Manager):
pk_field = instance._meta.pk.name
pk = getattr(instance, pk_field, None)
# Check to make sure that we got an pk not a model object.
# Check to make sure that we got a pk not a model object.
if isinstance(pk, models.Model):
pk = self._get_pk_value(pk)
return pk
@ -358,6 +359,7 @@ class LogEntry(models.Model):
action = models.PositiveSmallIntegerField(
choices=Action.choices, verbose_name=_("action"), db_index=True
)
changes_text = models.TextField(blank=True, verbose_name=_("change message"))
changes = models.JSONField(null=True, verbose_name=_("change message"))
actor = models.ForeignKey(
to=settings.AUTH_USER_MODEL,
@ -411,7 +413,7 @@ class LogEntry(models.Model):
"""
:return: The changes recorded in this log entry as a dictionary object.
"""
return self.changes or {}
return changes_func(self)
@property
def changes_str(self, colon=": ", arrow=" \u2192 ", separator="; "):
@ -459,7 +461,7 @@ class LogEntry(models.Model):
changes_display_dict[field_name] = values
continue
values_display = []
# handle choices fields and Postgres ArrayField to get human readable version
# handle choices fields and Postgres ArrayField to get human-readable version
choices_dict = None
if getattr(field, "choices", []):
choices_dict = dict(field.choices)
@ -546,7 +548,7 @@ class AuditlogHistoryField(GenericRelation):
A subclass of py:class:`django.contrib.contenttypes.fields.GenericRelation` that sets some default
variables. This makes it easier to access Auditlog's log entries, for example in templates.
By default this field will assume that your primary keys are numeric, simply because this is the most
By default, this field will assume that your primary keys are numeric, simply because this is the most
common case. However, if you have a non-integer primary key, you can simply pass ``pk_indexable=False``
to the constructor, and Auditlog will fall back to using a non-indexed text based field for this model.
@ -585,3 +587,24 @@ class AuditlogHistoryField(GenericRelation):
# method. However, because we don't want to delete these related
# objects, we simply return an empty list.
return []
# should I add a signal receiver for setting_changed?
changes_func = None
def _changes_func() -> Callable[[LogEntry], Dict]:
def json_then_text(instance: LogEntry) -> Dict:
if instance.changes:
return instance.changes
elif instance.changes_text:
with contextlib.suppress(ValueError):
return json.loads(instance.changes_text)
return {}
def default(instance: LogEntry) -> Dict:
return instance.changes or {}
if settings.AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT:
return json_then_text
return default

View file

@ -0,0 +1,151 @@
import json
from io import StringIO
from unittest.mock import patch
from django.core.management import call_command
from django.test import TestCase, override_settings
from auditlog.models import LogEntry
from auditlog_tests.models import SimpleModel
class TwoStepMigrationTest(TestCase):
def test_use_text_changes_first(self):
text_obj = '{"field": "changes_text"}'
json_obj = {"field": "changes"}
_params = [
(True, None, text_obj, {"field": "changes_text"}),
(True, json_obj, text_obj, json_obj),
(True, None, "not json", {}),
(False, json_obj, text_obj, json_obj),
]
for setting_value, changes_value, changes_text_value, expected in _params:
with self.subTest():
entry = LogEntry(changes=changes_value, changes_text=changes_text_value)
with self.settings(
AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT=setting_value
):
from auditlog import models
changes_dict = models._changes_func()(entry)
self.assertEqual(changes_dict, expected)
class AuditlogMigrateJsonTest(TestCase):
def make_logentry(self):
model = SimpleModel.objects.create(text="I am a simple model.")
log_entry: LogEntry = model.history.first()
log_entry.changes_text = json.dumps(log_entry.changes)
log_entry.changes = None
log_entry.save()
return log_entry
def call_command(self, *args, **kwargs):
outbuf = StringIO()
errbuf = StringIO()
call_command(
"auditlogmigratejson", *args, stdout=outbuf, stderr=errbuf, **kwargs
)
return outbuf.getvalue().strip(), errbuf.getvalue().strip()
def test_nothing_to_migrate(self):
outbuf, errbuf = self.call_command()
msg = "All records are have been migrated."
self.assertEqual(outbuf, msg)
@override_settings(AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT=True)
def test_nothing_to_migrate_with_conf_true(self):
outbuf, errbuf = self.call_command()
msg = (
"All records are have been migrated.\n"
"You can now set AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT to False."
)
self.assertEqual(outbuf, msg)
def test_using_django(self):
# Arrange
log_entry = self.make_logentry()
# Act
outbuf, errbuf = self.call_command("-b=0")
log_entry.refresh_from_db()
# Assert
self.assertEqual(errbuf, "")
self.assertIsNotNone(log_entry.changes)
def test_using_django_batched(self):
# Arrange
log_entry_1 = self.make_logentry()
log_entry_2 = self.make_logentry()
# Act
outbuf, errbuf = self.call_command("-b=1")
log_entry_1.refresh_from_db()
log_entry_2.refresh_from_db()
# Assert
self.assertEqual(errbuf, "")
self.assertIsNotNone(log_entry_1.changes)
self.assertIsNotNone(log_entry_2.changes)
def test_using_django_batched_call_count(self):
"""
This is split into a different test because I couldn't figure out how to properly patch bulk_update.
For some reason, then I
"""
# Arrange
self.make_logentry()
self.make_logentry()
# Act
with patch("auditlog.models.LogEntry.objects.bulk_update") as bulk_update:
outbuf, errbuf = self.call_command("-b=1")
call_count = bulk_update.call_count
# Assert
self.assertEqual(call_count, 2)
def test_native_postgres(self):
# Arrange
log_entry = self.make_logentry()
# Act
outbuf, errbuf = self.call_command("-d=postgres")
log_entry.refresh_from_db()
# Assert
self.assertEqual(errbuf, "")
self.assertIsNotNone(log_entry.changes)
def test_native_unsupported(self):
# Arrange
log_entry = self.make_logentry()
# Act
outbuf, errbuf = self.call_command("-d=oracle")
log_entry.refresh_from_db()
# Assert
msg = "Not yet implemented. Run this management command without passing a -d/--database argument."
self.assertEqual(errbuf, msg)
self.assertIsNone(log_entry.changes)
def test_using_django_with_error(self):
# Arrange
log_entry = self.make_logentry()
log_entry.changes_text = "not json"
log_entry.save()
# Act
outbuf, errbuf = self.call_command()
log_entry.refresh_from_db()
# Assert
msg = f"ValueError was raised while migrating the log with id {log_entry.id}."
self.assertEqual(errbuf, msg)
self.assertIsNone(log_entry.changes)

View file

@ -21,6 +21,7 @@ Contents
installation
usage
upgrade
internals

21
docs/source/upgrade.rst Normal file
View file

@ -0,0 +1,21 @@
Upgrading to version 3
======================
Version 3.0.0 introduces breaking changes. Please review the migration guide below before upgrading.
If you're new to django-auditlog, you can ignore this part.
The major change in the version is that we're finally storing changes as json instead of json-text.
To convert the existing records, this version has a database migration that does just that.
However, this migration will take a long time if you have a huge amount of records,
causing your database and application to be out of sync until the migration is complete.
To avoid this, follow these steps:
1. Before upgrading the package, add these two variables to ``settings.py``:
* ``AUDITLOG_TWO_STEP_MIGRATION = True``
* ``AUDITLOG_USE_TEXT_CHANGES_IF_JSON_IS_NOT_PRESENT = True``
2. Upgrade the package. Your app will now start storing new records as JSON, but the old records will accessible via ``LogEntry.changes_text``.
3. Use the newly added ``auditlogmigratejson`` command to migrate your records. Run ``django-admin auditlogmigratejson --help`` to get more information.
4. Once all records are migrated, remove the variables listed above, or set their values to ``False``.