diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 8c463a68a..e8debde67 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -10,6 +10,7 @@ Changelog * Added new "Welcome to your Wagtail site" Starter Page when using wagtail start command (Timothy Allen, Scott Cranfill) * Added ability to run individual tests through tox (Benjamin Bach) * Collection listings are now ordered by name (Seb Brown) + * Added `file_hash` field to documents (Karl Hobley, Dan Braghis) * Fix: Query objects returned from `PageQuerySet.type_q` can now be merged with `|` (Brady Moe) * Fix: Add `rel="noopener noreferrer"` to target blank links (Anselm Bradford) * Fix: Additional fields on custom document models now show on the multiple document upload view (Robert Rollins, Sergey Fedoseev) diff --git a/docs/releases/2.4.rst b/docs/releases/2.4.rst index fe6619320..cd89ba6de 100644 --- a/docs/releases/2.4.rst +++ b/docs/releases/2.4.rst @@ -28,6 +28,7 @@ Other features * New ``image_url`` template tag allows to generate dynamic image URLs, so image renditions are being created outside the main request which improves performance. Requires extra configuration, see :doc:`/advanced_topics/images/image_serve_view` (Yannick Chabbert, Dan Braghis). * Added ability to run individual tests through tox (Benjamin Bach) * Collection listings are now ordered by name (Seb Brown) + * Added ``file_hash`` field to documents (Karl Hobley, Dan Braghis) Bug fixes ~~~~~~~~~ diff --git a/wagtail/documents/migrations/0010_document_file_hash.py b/wagtail/documents/migrations/0010_document_file_hash.py new file mode 100644 index 000000000..a30018bb8 --- /dev/null +++ b/wagtail/documents/migrations/0010_document_file_hash.py @@ -0,0 +1,18 @@ +# Generated by Django 2.1.2 on 2018-10-28 07:47 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('wagtaildocs', '0009_document_verbose_name_plural'), + ] + + operations = [ + migrations.AddField( + model_name='document', + name='file_hash', + field=models.CharField(blank=True, editable=False, max_length=40), + ), + ] diff --git a/wagtail/documents/models.py b/wagtail/documents/models.py index dd0f1b35d..c080a5359 100644 --- a/wagtail/documents/models.py +++ b/wagtail/documents/models.py @@ -1,4 +1,6 @@ +import hashlib import os.path +from contextlib import contextmanager from django.conf import settings from django.core.exceptions import ImproperlyConfigured @@ -34,6 +36,8 @@ class AbstractDocument(CollectionMember, index.Indexed, models.Model): tags = TaggableManager(help_text=None, blank=True, verbose_name=_('tags')) file_size = models.PositiveIntegerField(null=True, editable=False) + # A SHA-1 hash of the file contents + file_hash = models.CharField(max_length=40, blank=True, editable=False) objects = DocumentQuerySet.as_manager() @@ -48,6 +52,33 @@ class AbstractDocument(CollectionMember, index.Indexed, models.Model): index.FilterField('uploaded_by_user'), ] + @contextmanager + def open_file(self): + # Open file if it is closed + close_file = False + f = self.file + + if f.closed: + # Reopen the file + if self.is_stored_locally(): + f.open('rb') + else: + # Some external storage backends don't allow reopening + # the file. Get a fresh file instance. #1397 + storage = self._meta.get_field('file').storage + f = storage.open(f.name, 'rb') + + close_file = True + + # Seek to beginning + f.seek(0) + + try: + yield f + finally: + if close_file: + f.close() + def get_file_size(self): if self.file_size is None: try: @@ -60,6 +91,18 @@ class AbstractDocument(CollectionMember, index.Indexed, models.Model): return self.file_size + def _set_file_hash(self, file_contents): + self.file_hash = hashlib.sha1(file_contents).hexdigest() + + def get_file_hash(self): + if self.file_hash == '': + with self.open_file() as f: + self._set_file_hash(f.read()) + + self.save(update_fields=['file_hash']) + + return self.file_hash + def __str__(self): return self.title diff --git a/wagtail/documents/tests/test_admin_views.py b/wagtail/documents/tests/test_admin_views.py index e93abb706..1a43da953 100644 --- a/wagtail/documents/tests/test_admin_views.py +++ b/wagtail/documents/tests/test_admin_views.py @@ -150,8 +150,9 @@ class TestDocumentAddView(TestCase, WagtailTestUtils): root_collection ) - # Check that the file_size field was set + # Check that the file_size/hash field was set self.assertTrue(document.file_size) + self.assertTrue(document.file_hash) def test_post_with_collections(self): root_collection = Collection.get_first_root_node() @@ -445,6 +446,7 @@ class TestMultipleDocumentUploader(TestCase, WagtailTestUtils): self.assertIn('doc', response.context) self.assertEqual(response.context['doc'].title, 'test.png') self.assertTrue(response.context['doc'].file_size) + self.assertTrue(response.context['doc'].file_hash) # check that it is in the root collection doc = models.get_document_model().objects.get(title='test.png') @@ -493,6 +495,7 @@ class TestMultipleDocumentUploader(TestCase, WagtailTestUtils): self.assertIn('doc', response.context) self.assertEqual(response.context['doc'].title, 'test.png') self.assertTrue(response.context['doc'].file_size) + self.assertTrue(response.context['doc'].file_hash) # check that it is in the 'evil plans' collection doc = models.get_document_model().objects.get(title='test.png') diff --git a/wagtail/documents/views/chooser.py b/wagtail/documents/views/chooser.py index 628aa3625..a7607eb5a 100644 --- a/wagtail/documents/views/chooser.py +++ b/wagtail/documents/views/chooser.py @@ -129,6 +129,11 @@ def chooser_upload(request): if form.is_valid(): document.file_size = document.file.size + # Set new document file hash + document.file.seek(0) + document._set_file_hash(document.file.read()) + document.file.seek(0) + form.save() # Reindex the document to make sure all tags are indexed diff --git a/wagtail/documents/views/documents.py b/wagtail/documents/views/documents.py index 0c27434e5..e8eae2d9d 100644 --- a/wagtail/documents/views/documents.py +++ b/wagtail/documents/views/documents.py @@ -100,6 +100,11 @@ def add(request): if form.is_valid(): doc.file_size = doc.file.size + # Set new document file hash + doc.file.seek(0) + doc._set_file_hash(doc.file.read()) + doc.file.seek(0) + form.save() # Reindex the document to make sure all tags are indexed @@ -137,6 +142,11 @@ def edit(request, document_id): if 'file' in form.changed_data: doc.file_size = doc.file.size + # Set new document file hash + doc.file.seek(0) + doc._set_file_hash(doc.file.read()) + doc.file.seek(0) + # if providing a new document file, delete the old one. # NB Doing this via original_file.delete() clears the file field, # which definitely isn't what we want... diff --git a/wagtail/documents/views/multiple.py b/wagtail/documents/views/multiple.py index ec843f378..204804f22 100644 --- a/wagtail/documents/views/multiple.py +++ b/wagtail/documents/views/multiple.py @@ -51,6 +51,12 @@ def add(request): doc = form.save(commit=False) doc.uploaded_by_user = request.user doc.file_size = doc.file.size + + # Set new document file hash + doc.file.seek(0) + doc._set_file_hash(doc.file.read()) + doc.file.seek(0) + doc.save() # Success! Send back an edit form for this document to the user diff --git a/wagtail/tests/testapp/migrations/0040_customdocument_file_hash.py b/wagtail/tests/testapp/migrations/0040_customdocument_file_hash.py new file mode 100644 index 000000000..0f2d5d546 --- /dev/null +++ b/wagtail/tests/testapp/migrations/0040_customdocument_file_hash.py @@ -0,0 +1,18 @@ +# Generated by Django 2.1.2 on 2018-10-28 11:33 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('tests', '0039_customdocument_description'), + ] + + operations = [ + migrations.AddField( + model_name='customdocument', + name='file_hash', + field=models.CharField(blank=True, editable=False, max_length=40), + ), + ]