diff --git a/CHANGELOG.md b/CHANGELOG.md index f28da62..594eaeb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changes +#### Improvements + +- feat: Add `serialized_data` field on `LogEntry` model. ([#412](https://github.com/jazzband/django-auditlog/pull/412)) + #### Fixes - fix: Display `created` timestamp in server timezone ([#404](https://github.com/jazzband/django-auditlog/pull/404)) diff --git a/auditlog/migrations/0011_logentry_serialized_data.py b/auditlog/migrations/0011_logentry_serialized_data.py new file mode 100644 index 0000000..39b9d65 --- /dev/null +++ b/auditlog/migrations/0011_logentry_serialized_data.py @@ -0,0 +1,18 @@ +# Generated by Django 4.0 on 2022-08-05 19:55 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("auditlog", "0010_alter_logentry_timestamp"), + ] + + operations = [ + migrations.AddField( + model_name="logentry", + name="serialized_data", + field=models.JSONField(null=True), + ), + ] diff --git a/auditlog/models.py b/auditlog/models.py index 537c21b..63c154f 100644 --- a/auditlog/models.py +++ b/auditlog/models.py @@ -1,11 +1,14 @@ import ast import json +from copy import deepcopy +from typing import Any, Dict, List from dateutil import parser from dateutil.tz import gettz from django.conf import settings from django.contrib.contenttypes.fields import GenericRelation from django.contrib.contenttypes.models import ContentType +from django.core import serializers from django.core.exceptions import FieldDoesNotExist from django.db import DEFAULT_DB_ALIAS, models from django.db.models import Q, QuerySet @@ -13,6 +16,8 @@ from django.utils import formats, timezone from django.utils.encoding import smart_str from django.utils.translation import gettext_lazy as _ +from auditlog.diff import mask_str + class LogEntryManager(models.Manager): """ @@ -39,6 +44,9 @@ class LogEntryManager(models.Manager): ) kwargs.setdefault("object_pk", pk) kwargs.setdefault("object_repr", smart_str(instance)) + kwargs.setdefault( + "serialized_data", self._get_serialized_data_or_none(instance) + ) if isinstance(pk, int): kwargs.setdefault("object_id", pk) @@ -208,6 +216,79 @@ class LogEntryManager(models.Manager): pk = self._get_pk_value(pk) return pk + def _get_serialized_data_or_none(self, instance): + from auditlog.registry import auditlog + + opts = auditlog.get_serialize_options(instance.__class__) + if not opts["serialize_data"]: + return None + + model_fields = auditlog.get_model_fields(instance.__class__) + kwargs = opts.get("serialize_kwargs", {}) + + if opts["serialize_auditlog_fields_only"]: + kwargs.setdefault( + "fields", self._get_applicable_model_fields(instance, model_fields) + ) + + instance_copy = self._get_copy_with_python_typed_fields(instance) + data = dict( + json.loads(serializers.serialize("json", (instance_copy,), **kwargs))[0] + ) + + mask_fields = model_fields["mask_fields"] + if mask_fields: + data = self._mask_serialized_fields(data, mask_fields) + + return data + + def _get_copy_with_python_typed_fields(self, instance): + """ + Attempt to create copy of instance and coerce types on instance fields + + The Django core serializer assumes that the values on object fields are + correctly typed to their respective fields. Updates made to an object's + in-memory state may not meet this assumption. To prevent this violation, values + are typed by calling `to_python` from the field object, the result is set on a + copy of the instance and the copy is sent to the serializer. + """ + try: + instance_copy = deepcopy(instance) + except TypeError: + instance_copy = instance + for field in instance_copy._meta.fields: + if not field.is_relation: + value = getattr(instance_copy, field.name) + setattr(instance_copy, field.name, field.to_python(value)) + return instance_copy + + def _get_applicable_model_fields( + self, instance, model_fields: Dict[str, List[str]] + ) -> List[str]: + include_fields = model_fields["include_fields"] + exclude_fields = model_fields["exclude_fields"] + all_field_names = [field.name for field in instance._meta.fields] + + if not include_fields and not exclude_fields: + return all_field_names + + return list(set(include_fields or all_field_names).difference(exclude_fields)) + + def _mask_serialized_fields( + self, data: Dict[str, Any], mask_fields: List[str] + ) -> Dict[str, Any]: + all_field_data = data.pop("fields") + + masked_field_data = {} + for key, value in all_field_data.items(): + if isinstance(value, str) and key in mask_fields: + masked_field_data[key] = mask_str(value) + else: + masked_field_data[key] = value + + data["fields"] = masked_field_data + return data + class LogEntry(models.Model): """ @@ -253,6 +334,7 @@ class LogEntry(models.Model): blank=True, db_index=True, null=True, verbose_name=_("object id") ) object_repr = models.TextField(verbose_name=_("object representation")) + serialized_data = models.JSONField(null=True) action = models.PositiveSmallIntegerField( choices=Action.choices, verbose_name=_("action"), db_index=True ) diff --git a/auditlog/registry.py b/auditlog/registry.py index 9275251..d2ab0b0 100644 --- a/auditlog/registry.py +++ b/auditlog/registry.py @@ -28,6 +28,10 @@ from auditlog.conf import settings DispatchUID = Tuple[int, int, int] +class AuditLogRegistrationError(Exception): + pass + + class AuditlogModelRegistry: """ A registry that keeps track of the models that use Auditlog to track changes. @@ -68,6 +72,9 @@ class AuditlogModelRegistry: mapping_fields: Optional[Dict[str, str]] = None, mask_fields: Optional[List[str]] = None, m2m_fields: Optional[Collection[str]] = None, + serialize_data: bool = False, + serialize_kwargs: Optional[Dict[str, Any]] = None, + serialize_auditlog_fields_only: bool = False, ): """ Register a model with auditlog. Auditlog will then track mutations on this model's instances. @@ -78,7 +85,9 @@ class AuditlogModelRegistry: :param mapping_fields: Mapping from field names to strings in diff. :param mask_fields: The fields to mask for sensitive info. :param m2m_fields: The fields to handle as many to many. - + :param serialize_data: Option to include a dictionary of the objects state in the auditlog. + :param serialize_kwargs: Optional kwargs to pass to Django serializer + :param serialize_auditlog_fields_only: Only fields being considered in changes will be serialized. """ if include_fields is None: @@ -91,6 +100,14 @@ class AuditlogModelRegistry: mask_fields = [] if m2m_fields is None: m2m_fields = set() + if serialize_kwargs is None: + serialize_kwargs = {} + + if (serialize_kwargs or serialize_auditlog_fields_only) and not serialize_data: + raise AuditLogRegistrationError( + "Serializer options were given but the 'serialize_data' option is not " + "set. Did you forget to set serialized_data to True?" + ) def registrar(cls): """Register models for a given class.""" @@ -103,6 +120,9 @@ class AuditlogModelRegistry: "mapping_fields": mapping_fields, "mask_fields": mask_fields, "m2m_fields": m2m_fields, + "serialize_data": serialize_data, + "serialize_kwargs": serialize_kwargs, + "serialize_auditlog_fields_only": serialize_auditlog_fields_only, } self._connect_signals(cls) @@ -153,6 +173,15 @@ class AuditlogModelRegistry: "mask_fields": list(self._registry[model]["mask_fields"]), } + def get_serialize_options(self, model: ModelBase): + return { + "serialize_data": bool(self._registry[model]["serialize_data"]), + "serialize_kwargs": dict(self._registry[model]["serialize_kwargs"]), + "serialize_auditlog_fields_only": bool( + self._registry[model]["serialize_auditlog_fields_only"] + ), + } + def _connect_signals(self, model): """ Connect signals for the model. diff --git a/auditlog_tests/models.py b/auditlog_tests/models.py index 0e6af49..1a6c5a2 100644 --- a/auditlog_tests/models.py +++ b/auditlog_tests/models.py @@ -262,6 +262,44 @@ class JSONModel(models.Model): history = AuditlogHistoryField(delete_related=False) +class SerializeThisModel(models.Model): + label = models.CharField(max_length=24, unique=True) + timestamp = models.DateTimeField() + nullable = models.IntegerField(null=True) + nested = models.JSONField() + mask_me = models.CharField(max_length=255, null=True) + code = models.UUIDField(null=True) + date = models.DateField(null=True) + + history = AuditlogHistoryField(delete_related=False) + + def natural_key(self): + return self.label + + +class SerializeOnlySomeOfThisModel(models.Model): + this = models.CharField(max_length=24) + not_this = models.CharField(max_length=24) + + history = AuditlogHistoryField(delete_related=False) + + +class SerializePrimaryKeyRelatedModel(models.Model): + serialize_this = models.ForeignKey(to=SerializeThisModel, on_delete=models.CASCADE) + subheading = models.CharField(max_length=255) + value = models.IntegerField() + + history = AuditlogHistoryField(delete_related=False) + + +class SerializeNaturalKeyRelatedModel(models.Model): + serialize_this = models.ForeignKey(to=SerializeThisModel, on_delete=models.CASCADE) + subheading = models.CharField(max_length=255) + value = models.IntegerField() + + history = AuditlogHistoryField(delete_related=False) + + auditlog.register(AltPrimaryKeyModel) auditlog.register(UUIDPrimaryKeyModel) auditlog.register(ProxyModel) @@ -278,3 +316,20 @@ auditlog.register(CharfieldTextfieldModel) auditlog.register(PostgresArrayFieldModel) auditlog.register(NoDeleteHistoryModel) auditlog.register(JSONModel) +auditlog.register( + SerializeThisModel, + serialize_data=True, + mask_fields=["mask_me"], +) +auditlog.register( + SerializeOnlySomeOfThisModel, + serialize_data=True, + serialize_auditlog_fields_only=True, + exclude_fields=["not_this"], +) +auditlog.register(SerializePrimaryKeyRelatedModel, serialize_data=True) +auditlog.register( + SerializeNaturalKeyRelatedModel, + serialize_data=True, + serialize_kwargs={"use_natural_foreign_keys": True}, +) diff --git a/auditlog_tests/tests.py b/auditlog_tests/tests.py index d2b84f6..0cb1e7a 100644 --- a/auditlog_tests/tests.py +++ b/auditlog_tests/tests.py @@ -21,7 +21,7 @@ from auditlog.context import set_actor from auditlog.diff import model_instance_diff from auditlog.middleware import AuditlogMiddleware from auditlog.models import LogEntry -from auditlog.registry import AuditlogModelRegistry, auditlog +from auditlog.registry import AuditlogModelRegistry, AuditLogRegistrationError, auditlog from auditlog_tests.models import ( AdditionalDataIncludedModel, AltPrimaryKeyModel, @@ -35,6 +35,10 @@ from auditlog_tests.models import ( PostgresArrayFieldModel, ProxyModel, RelatedModel, + SerializeNaturalKeyRelatedModel, + SerializeOnlySomeOfThisModel, + SerializePrimaryKeyRelatedModel, + SerializeThisModel, SimpleExcludeModel, SimpleIncludeModel, SimpleMappingModel, @@ -1000,7 +1004,7 @@ class RegisterModelSettingsTest(TestCase): self.assertTrue(self.test_auditlog.contains(SimpleExcludeModel)) self.assertTrue(self.test_auditlog.contains(ChoicesFieldModel)) - self.assertEqual(len(self.test_auditlog.get_models()), 19) + self.assertEqual(len(self.test_auditlog.get_models()), 23) def test_register_models_register_model_with_attrs(self): self.test_auditlog._register_models( @@ -1117,6 +1121,17 @@ class RegisterModelSettingsTest(TestCase): self.assertEqual(fields["include_fields"], ["label"]) self.assertEqual(fields["exclude_fields"], ["text"]) + def test_registration_error_if_bad_serialize_params(self): + with self.assertRaisesMessage( + AuditLogRegistrationError, + "Serializer options were given but the 'serialize_data' option is not " + "set. Did you forget to set serialized_data to True?", + ): + register = AuditlogModelRegistry() + register.register( + SimpleModel, serialize_kwargs={"fields": ["text", "integer"]} + ) + class ChoicesFieldModelTest(TestCase): def setUp(self): @@ -1534,3 +1549,225 @@ class ModelInstanceDiffTest(TestCase): {"boolean": ("True", "False")}, msg="ObjectDoesNotExist should be handled", ) + + +class TestModelSerialization(TestCase): + def setUp(self): + super().setUp() + self.test_date = datetime.datetime(2022, 1, 1, 12, tzinfo=datetime.timezone.utc) + self.test_date_string = datetime.datetime.strftime( + self.test_date, "%Y-%m-%dT%XZ" + ) + + def test_does_not_serialize_data_when_not_configured(self): + instance = SimpleModel.objects.create( + text="sample text here", boolean=True, integer=4 + ) + + log = instance.history.first() + self.assertIsNone(log.serialized_data) + + def test_serializes_data_on_create(self): + with freezegun.freeze_time(self.test_date): + instance = SerializeThisModel.objects.create( + label="test label", + timestamp=self.test_date, + nullable=4, + nested={"foo": True, "bar": False}, + ) + + log = instance.history.first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 0) + self.assertDictEqual( + log.serialized_data["fields"], + { + "label": "test label", + "timestamp": self.test_date_string, + "nullable": 4, + "nested": {"foo": True, "bar": False}, + "mask_me": None, + "date": None, + "code": None, + }, + ) + + def test_serializes_data_on_update(self): + with freezegun.freeze_time(self.test_date): + instance = SerializeThisModel.objects.create( + label="test label", + timestamp=self.test_date, + nullable=4, + nested={"foo": True, "bar": False}, + ) + + update_date = self.test_date + datetime.timedelta(days=4) + with freezegun.freeze_time(update_date): + instance.label = "test label change" + instance.save() + + log = instance.history.filter(timestamp=update_date).first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 1) + self.assertDictEqual( + log.serialized_data["fields"], + { + "label": "test label change", + "timestamp": self.test_date_string, + "nullable": 4, + "nested": {"foo": True, "bar": False}, + "mask_me": None, + "date": None, + "code": None, + }, + ) + + def test_serializes_data_on_delete(self): + with freezegun.freeze_time(self.test_date): + instance = SerializeThisModel.objects.create( + label="test label", + timestamp=self.test_date, + nullable=4, + nested={"foo": True, "bar": False}, + ) + + obj_id = int(instance.id) + delete_date = self.test_date + datetime.timedelta(days=4) + with freezegun.freeze_time(delete_date): + instance.delete() + + log = LogEntry.objects.filter(object_id=obj_id, timestamp=delete_date).first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 2) + self.assertDictEqual( + log.serialized_data["fields"], + { + "label": "test label", + "timestamp": self.test_date_string, + "nullable": 4, + "nested": {"foo": True, "bar": False}, + "mask_me": None, + "date": None, + "code": None, + }, + ) + + def test_serialize_string_representations(self): + with freezegun.freeze_time(self.test_date): + instance = SerializeThisModel.objects.create( + label="test label", + nullable=4, + nested={"foo": 10, "bar": False}, + timestamp="2022-03-01T12:00Z", + date="2022-04-05", + code="e82d5e53-ca80-4037-af55-b90752326460", + ) + + log = instance.history.first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 0) + self.assertDictEqual( + log.serialized_data["fields"], + { + "label": "test label", + "timestamp": "2022-03-01T12:00:00Z", + "date": "2022-04-05", + "code": "e82d5e53-ca80-4037-af55-b90752326460", + "nullable": 4, + "nested": {"foo": 10, "bar": False}, + "mask_me": None, + }, + ) + + def test_serialize_mask_fields(self): + with freezegun.freeze_time(self.test_date): + instance = SerializeThisModel.objects.create( + label="test label", + nullable=4, + timestamp=self.test_date, + nested={"foo": 10, "bar": False}, + mask_me="confidential", + ) + + log = instance.history.first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 0) + self.assertDictEqual( + log.serialized_data["fields"], + { + "label": "test label", + "timestamp": self.test_date_string, + "nullable": 4, + "nested": {"foo": 10, "bar": False}, + "mask_me": "******ential", + "date": None, + "code": None, + }, + ) + + def test_serialize_only_auditlog_fields(self): + with freezegun.freeze_time(self.test_date): + instance = SerializeOnlySomeOfThisModel.objects.create( + this="this should be there", not_this="leave this bit out" + ) + + log = instance.history.first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 0) + self.assertDictEqual( + log.serialized_data["fields"], {"this": "this should be there"} + ) + self.assertDictEqual( + log.changes_dict, + {"this": ["None", "this should be there"], "id": ["None", "1"]}, + ) + + def test_serialize_related(self): + with freezegun.freeze_time(self.test_date): + serialize_this = SerializeThisModel.objects.create( + label="test label", + nested={"foo": "bar"}, + timestamp=self.test_date, + ) + instance = SerializePrimaryKeyRelatedModel.objects.create( + serialize_this=serialize_this, + subheading="use a primary key for this serialization, please.", + value=10, + ) + + log = instance.history.first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 0) + self.assertDictEqual( + log.serialized_data["fields"], + { + "serialize_this": serialize_this.id, + "subheading": "use a primary key for this serialization, please.", + "value": 10, + }, + ) + + def test_serialize_related_with_kwargs(self): + with freezegun.freeze_time(self.test_date): + serialize_this = SerializeThisModel.objects.create( + label="test label", + nested={"foo": "bar"}, + timestamp=self.test_date, + ) + instance = SerializeNaturalKeyRelatedModel.objects.create( + serialize_this=serialize_this, + subheading="use a natural key for this serialization, please.", + value=11, + ) + + log = instance.history.first() + self.assertTrue(isinstance(log, LogEntry)) + self.assertEqual(log.action, 0) + self.assertDictEqual( + log.serialized_data["fields"], + { + "serialize_this": "test label", + "subheading": "use a natural key for this serialization, please.", + "value": 11, + }, + ) diff --git a/docs/source/usage.rst b/docs/source/usage.rst index 0fb0f38..00abffd 100644 --- a/docs/source/usage.rst +++ b/docs/source/usage.rst @@ -115,6 +115,38 @@ Note that when the user changes multiple many-to-many fields on the same object .. versionadded:: 2.1.0 +**Serialized Data** + +The state of an object following a change action may be optionally serialized and persisted in the ``LogEntry.serialized_data`` JSONField. To enable this feature for a registered model, add ``serialize_data=True`` to the kwargs on the ``auditlog.register(...)`` method. Object serialization will not occur unless this kwarg is set. + +.. code-block:: python + + auditlog.register(MyModel, serialize_data=True) + +Objects are serialized using the Django core serializer. Keyword arguments may be passed to the serializer through ``serialize_kwargs``. + +.. code-block:: python + + auditlog.register( + MyModel, + serialize_data=True, + serialize_kwargs={"fields": ["foo", "bar", "biz", "baz"]} + ) + +Note that all fields on the object will be serialized unless restricted with one or more configurations. The `serialize_kwargs` option contains a `fields` argument and this may be given an inclusive list of field names to serialize (as shown above). Alternatively, one may set ``serialize_auditlog_fields_only`` to ``True`` when registering a model with ``exclude_fields`` and ``include_fields`` set (as shown below). This will cause the data persisted in ``LogEntry.serialized_data`` to be limited to the same scope that is persisted within the ``LogEntry.changes`` field. + +.. code-block:: python + + auditlog.register( + MyModel, + exclude_fields=["ssn", "confidential"] + serialize_data=True, + serialize_auditlog_fields_only=True + ) + +Field masking is supported in object serialization. Any value belonging to a field whose name is found in the ``mask_fields`` list will be masked in the serialized object data. Masked values are obfuscated with asterisks in the same way as they are in the ``LogEntry.changes`` field. + + Settings --------