From f2d5a92b99ff7545e47029f7234682c38d2cb02d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Beno=C3=AEt=20Bryon?= Date: Mon, 18 Nov 2013 12:12:02 +0100 Subject: [PATCH] Refs #7 - Urlencoded non US-ASCII characters in Content-Disposition header (file name). --- CHANGELOG | 5 ++- django_downloadview/response.py | 60 ++++++++++++++++++++++++++- django_downloadview/test.py | 23 ++++++++-- django_downloadview/tests/response.py | 19 +++++++++ 4 files changed, 101 insertions(+), 6 deletions(-) create mode 100644 django_downloadview/tests/response.py diff --git a/CHANGELOG b/CHANGELOG index f42a953..35cb1e5 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,7 +8,10 @@ future releases, check `milestones`_ and :doc:`/about/vision`. 1.4 (unreleased) ---------------- -- Feature #10 - Registered `django-downloadview` on djangopackages.com. +- Bugfix #7 - Special characters in file names (``Content-Disposition`` header) + are urlencoded. An US-ASCII fallback is also provided. + +- Feature #10 - `django-downloadview` is registered on djangopackages.com. 1.3 (2013-11-08) diff --git a/django_downloadview/response.py b/django_downloadview/response.py index e592d8f..6a88952 100644 --- a/django_downloadview/response.py +++ b/django_downloadview/response.py @@ -2,9 +2,58 @@ """:py:class:`django.http.HttpResponse` subclasses.""" import os import mimetypes +import re +import unicodedata +import urllib from django.conf import settings from django.http import HttpResponse, StreamingHttpResponse +from django.utils.encoding import force_str + + +def encode_basename_ascii(value): + """Return US-ASCII encoded ``value`` for use in Content-Disposition header. + + >>> encode_basename_ascii(unicode('éà', 'utf-8')) + u'ea' + + Spaces are converted to underscores. + + >>> encode_basename_ascii(' ') + u'_' + + Text with non US-ASCII characters is expected to be unicode. + + >>> encode_basename_ascii('éà') # doctest: +ELLIPSIS + Traceback (most recent call last): + ... + UnicodeDecodeError: \'ascii\' codec can\'t decode byte ... + + Of course, ASCII values are not modified. + + >>> encode_basename_ascii('ea') + u'ea' + + """ + ascii_basename = unicode(value) + ascii_basename = unicodedata.normalize('NFKD', ascii_basename) + ascii_basename = ascii_basename.encode('ascii', 'ignore') + ascii_basename = ascii_basename.decode('ascii') + ascii_basename = re.sub(r'[\s]', '_', ascii_basename) + return ascii_basename + + +def encode_basename_utf8(value): + """Return UTF-8 encoded ``value`` for use in Content-Disposition header. + + >>> encode_basename_utf8(u' .txt') + '%20.txt' + + >>> encode_basename_utf8(unicode('éà', 'utf-8')) + '%C3%A9%C3%A0' + + """ + return urllib.quote(force_str(value)) class DownloadResponse(StreamingHttpResponse): @@ -86,6 +135,10 @@ class DownloadResponse(StreamingHttpResponse): Uses an internal ``_default_headers`` cache. Default values are computed if only cache hasn't been set. + ``Content-Disposition`` header is encoded according to `RFC 5987 + `_. See also + http://stackoverflow.com/questions/93551/how-to-encode-the-filename-parameter-of-content-disposition-header-in-http. + """ try: return self._default_headers @@ -97,8 +150,11 @@ class DownloadResponse(StreamingHttpResponse): except (AttributeError, NotImplementedError): pass # Generated files. if self.attachment: - headers['Content-Disposition'] = 'attachment; filename=%s' \ - % self.get_basename() + basename = self.get_basename() + headers['Content-Disposition'] = \ + "attachment; filename={ascii}; filename*=UTF-8''{utf8}" \ + .format(ascii=encode_basename_ascii(basename), + utf8=encode_basename_utf8(basename)) self._default_headers = headers return self._default_headers diff --git a/django_downloadview/test.py b/django_downloadview/test.py index 31482b3..50622f9 100644 --- a/django_downloadview/test.py +++ b/django_downloadview/test.py @@ -6,6 +6,8 @@ from django.conf import settings from django.test.utils import override_settings from django_downloadview.middlewares import is_download_response +from django_downloadview.response import (encode_basename_ascii, + encode_basename_utf8) def setup_view(view, request, *args, **kwargs): @@ -108,9 +110,24 @@ class DownloadResponseValidator(object): def assert_basename(self, test_case, response, value): """Implies ``attachement is True``.""" - test_case.assertTrue( - response['Content-Disposition'].endswith( - 'filename={name}'.format(name=value))) + ascii_name = encode_basename_ascii(value) + utf8_name = encode_basename_utf8(value) + if ascii_name == utf8_name: # Only ASCII characters. + check_ascii = True + if "filename*=" in response['Content-Disposition']: + check_utf8 = True + else: + check_utf8 = True + if "filename=" in response['Content-Disposition']: + check_ascii = True + if check_ascii: + test_case.assertIn('filename={name}'.format( + name=ascii_name), + response['Content-Disposition']) + if check_utf8: + test_case.assertIn( + "filename*=UTF-8''{name}".format(name=utf8_name), + response['Content-Disposition']) def assert_content_type(self, test_case, response, value): test_case.assertEqual(response['Content-Type'], value) diff --git a/django_downloadview/tests/response.py b/django_downloadview/tests/response.py new file mode 100644 index 0000000..067d7c1 --- /dev/null +++ b/django_downloadview/tests/response.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +"""Unit tests around responses.""" +import unittest + +from django_downloadview.response import DownloadResponse + + +class DownloadResponseTestCase(unittest.TestCase): + """Tests around :class:`django_downloadviews.response.DownloadResponse`.""" + def test_content_disposition_encoding(self): + """Content-Disposition header is encoded.""" + response = DownloadResponse('fake file', + attachment=True, + basename=u'espacé .txt',) + headers = response.default_headers + self.assertIn("filename=espace_.txt", + headers['Content-Disposition']) + self.assertIn("filename*=UTF-8''espac%C3%A9%20.txt", + headers['Content-Disposition'])