From df857aab8d8ea35d2676e1b760519cd460faf310 Mon Sep 17 00:00:00 2001 From: calvin Date: Mon, 21 Apr 2008 09:18:58 +0000 Subject: [PATCH] Intern patterns now accept URLs with and without "www." prefixes as default. This allows sites to check that use both variants. git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3714 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- ChangeLog | 5 +++++ linkcheck/checker/internpaturl.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/ChangeLog b/ChangeLog index 701c76de..4a48781a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -40,6 +40,11 @@ Changed: linkcheck/director/__init__.py Closes: SF bug #1720104 + * Intern patterns now accept URLs with and without "www." prefixes + as default. This allows sites to check that use both variants. + Type: feature + Changed: linkcheck/checker/internpaturl.py + 4.8 "Hallam Foe" (released 16.12.2007) * Fix message typo for not disclosing information. diff --git a/linkcheck/checker/internpaturl.py b/linkcheck/checker/internpaturl.py index 02dcf81e..da7034c7 100644 --- a/linkcheck/checker/internpaturl.py +++ b/linkcheck/checker/internpaturl.py @@ -51,4 +51,6 @@ class InternPatternUrl (urlbase.UrlBase): args = list(re.escape(x) for x in (scheme, domain, path)) if args[0] in ('http', 'https'): args[0] = 'https?' + if args[1].startswith('www\\.'): + args[1] = r"(www\.|)%s" % args[1][5:] return "%s://%s%s" % tuple(args)