mirror of
https://github.com/Hopiu/wagtail.git
synced 2026-05-02 04:24:46 +00:00
Merge branch 'takeflight-bug/url-check'
This commit is contained in:
commit
8bb1a8a459
2 changed files with 27 additions and 6 deletions
|
|
@ -17,6 +17,13 @@ class TestCheckUrl(TestCase):
|
|||
def test_disallowed_url_scheme(self):
|
||||
self.assertFalse(bool(check_url("invalid://url")))
|
||||
|
||||
def test_crafty_disallowed_url_scheme(self):
|
||||
"""
|
||||
Some URL parsers do not parse 'jav\tascript:' as a valid scheme.
|
||||
Browsers, however, do. The checker needs to catch these crafty schemes
|
||||
"""
|
||||
self.assertFalse(bool(check_url("jav\tascript:alert('XSS')")))
|
||||
|
||||
|
||||
class TestAttributeRule(TestCase):
|
||||
def setUp(self):
|
||||
|
|
|
|||
|
|
@ -2,19 +2,33 @@
|
|||
A generic HTML whitelisting engine, designed to accommodate subclassing to override
|
||||
specific rules.
|
||||
"""
|
||||
from six.moves.urllib.parse import urlparse
|
||||
import re
|
||||
|
||||
|
||||
from bs4 import BeautifulSoup, NavigableString, Tag
|
||||
|
||||
|
||||
ALLOWED_URL_SCHEMES = ['', 'http', 'https', 'ftp', 'mailto', 'tel']
|
||||
ALLOWED_URL_SCHEMES = ['http', 'https', 'ftp', 'mailto', 'tel']
|
||||
|
||||
PROTOCOL_RE = re.compile("^[a-z0-9][-+.a-z0-9]*:")
|
||||
|
||||
|
||||
def check_url(url_string):
|
||||
# TODO: more paranoid checks (urlparse doesn't catch
|
||||
# "jav\tascript:alert('XSS')")
|
||||
url = urlparse(url_string)
|
||||
return (url_string if url.scheme in ALLOWED_URL_SCHEMES else None)
|
||||
# Remove control characters and other disallowed characters
|
||||
# Browsers sometimes ignore these, so that 'jav\tascript:alert("XSS")'
|
||||
# is treated as a valid javascript: link
|
||||
|
||||
unescaped = url_string.lower()
|
||||
unescaped = unescaped.replace("<", "<")
|
||||
unescaped = unescaped.replace(">", ">")
|
||||
unescaped = unescaped.replace("&", "&")
|
||||
unescaped = re.sub("[`\000-\040\177-\240\s]+", '', unescaped)
|
||||
unescaped = unescaped.replace("\ufffd", "")
|
||||
if PROTOCOL_RE.match(unescaped):
|
||||
protocol = unescaped.split(':', 1)[0]
|
||||
if protocol not in ALLOWED_URL_SCHEMES:
|
||||
return None
|
||||
return url_string
|
||||
|
||||
|
||||
def attribute_rule(allowed_attrs):
|
||||
|
|
|
|||
Loading…
Reference in a new issue