From 1e3cd5116367a15e2c9855dced062de885e1f0f0 Mon Sep 17 00:00:00 2001 From: Tim Graham Date: Fri, 12 Oct 2018 12:15:26 -0400 Subject: [PATCH] Simplified django.utils.html.urlize(). --- django/utils/html.py | 36 +++++++++++++----------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/django/utils/html.py b/django/utils/html.py index 24754553b6..44a3f16459 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -259,23 +259,14 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): return x return '%s…' % x[:max(0, limit - 1)] - def unescape(text, trail): + def unescape(text): """ If input URL is HTML-escaped, unescape it so that it can be safely fed to smart_urlquote. For example: http://example.com?x=1&y=<2> => http://example.com?x=1&y=<2> """ - unescaped = (text + trail).replace( - '&', '&').replace('<', '<').replace( + return text.replace('&', '&').replace('<', '<').replace( '>', '>').replace('"', '"').replace(''', "'") - if trail and unescaped.endswith(trail): - # Remove trail for unescaped if it was not consumed by unescape - unescaped = unescaped[:-len(trail)] - elif trail == ';': - # Trail was consumed by unescape (as end-of-entity marker), move it to text - text += trail - trail = '' - return text, unescaped, trail def trim_punctuation(lead, middle, trail): """ @@ -286,14 +277,6 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): trimmed_something = True while trimmed_something: trimmed_something = False - - # Trim trailing punctuation. - stripped = middle.rstrip(TRAILING_PUNCTUATION_CHARS) - if middle != stripped: - trail = middle[len(stripped):] + trail - middle = stripped - trimmed_something = True - # Trim wrapping punctuation. for opening, closing in WRAPPING_PUNCTUATION: if middle.startswith(opening): @@ -306,6 +289,15 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): middle = middle[:-len(closing)] trail = closing + trail trimmed_something = True + # Trim trailing punctuation (after trimming wrapping punctuation, + # as encoded entities contain ';'). Unescape entites to avoid + # breaking them by removing ';'. + middle_unescaped = unescape(middle) + stripped = middle_unescaped.rstrip(TRAILING_PUNCTUATION_CHARS) + if middle_unescaped != stripped: + trail = middle[len(stripped):] + trail + middle = middle[:len(stripped) - len(middle_unescaped)] + trimmed_something = True return lead, middle, trail def is_email_simple(value): @@ -337,11 +329,9 @@ def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False): url = None nofollow_attr = ' rel="nofollow"' if nofollow else '' if simple_url_re.match(middle): - middle, middle_unescaped, trail = unescape(middle, trail) - url = smart_urlquote(middle_unescaped) + url = smart_urlquote(unescape(middle)) elif simple_url_2_re.match(middle): - middle, middle_unescaped, trail = unescape(middle, trail) - url = smart_urlquote('http://%s' % middle_unescaped) + url = smart_urlquote('http://%s' % unescape(middle)) elif ':' not in middle and is_email_simple(middle): local, domain = middle.rsplit('@', 1) try: