From f5b171979be58732fb28dce0295168ef9ab30494 Mon Sep 17 00:00:00 2001 From: Matthew Tretter Date: Thu, 25 Apr 2013 02:29:27 -0400 Subject: [PATCH] Canonicalize sets and dicts before hashing --- imagekit/hashers.py | 31 +++++++++++++++++++++++++++++++ imagekit/specs/__init__.py | 7 +++---- 2 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 imagekit/hashers.py diff --git a/imagekit/hashers.py b/imagekit/hashers.py new file mode 100644 index 0000000..4231fa5 --- /dev/null +++ b/imagekit/hashers.py @@ -0,0 +1,31 @@ +from copy import copy +from hashlib import md5 +from pickle import Pickler, MARK, DICT +from types import DictionaryType +from .lib import StringIO + + +class CanonicalizingPickler(Pickler): + dispatch = copy(Pickler.dispatch) + + def save_set(self, obj): + rv = obj.__reduce_ex__(0) + rv = (rv[0], (sorted(rv[1][0]),), rv[2]) + self.save_reduce(obj=obj, *rv) + + dispatch[set] = save_set + + def save_dict(self, obj): + write = self.write + write(MARK + DICT) + + self.memoize(obj) + self._batch_setitems(sorted(obj.iteritems())) + + dispatch[DictionaryType] = save_dict + + +def pickle(obj): + file = StringIO() + CanonicalizingPickler(file, 0).dump(obj) + return md5(file.getvalue()).hexdigest() diff --git a/imagekit/specs/__init__.py b/imagekit/specs/__init__.py index dbe56ed..2f6dc51 100644 --- a/imagekit/specs/__init__.py +++ b/imagekit/specs/__init__.py @@ -1,9 +1,8 @@ from django.conf import settings from django.db.models.fields.files import ImageFieldFile -from hashlib import md5 -import pickle from ..cachefiles.backends import get_default_cachefile_backend from ..cachefiles.strategies import StrategyWrapper +from .. import hashers from ..exceptions import AlreadyRegistered, MissingSource from ..processors import ProcessorPipeline from ..utils import open_image, img_to_fobj, get_by_qname @@ -115,13 +114,13 @@ class ImageSpec(BaseImageSpec): self.source = getattr(field_data['instance'], field_data['attname']) def get_hash(self): - return md5(pickle.dumps([ + return hashers.pickle([ self.source.name, self.processors, self.format, self.options, self.autoconvert, - ])).hexdigest() + ]) def generate(self): if not self.source: