From f2bd10e31bfaa5590aa823a4bdf0722cd4094917 Mon Sep 17 00:00:00 2001 From: calvin Date: Sat, 28 Oct 2000 16:15:56 +0000 Subject: [PATCH] fix tag parsing git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@180 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- FAQ | 4 ++-- debian/changelog | 6 ++++-- linkcheck/UrlData.py | 32 ++++++++++++++++---------------- 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/FAQ b/FAQ index 8fce930d..dcf63a80 100644 --- a/FAQ +++ b/FAQ @@ -1,11 +1,11 @@ Q: The link "mailto:john@company.com?subject=Hello John" is reported as an error. A: You have to quote special characters (e.g. spaces) in the subject field. - The correct link should be "mailto:...?subject=Hello%20John!" + The correct link should be "mailto:...?subject=Hello%20John" Unfortunately browsers like IE and Netscape do not enforce this. Q: I have a pretty large site to check. How can I restrict link checking - to only check my own pages? + to check only my own pages? A: Look at the options --intern, --extern, --strict and --recursion-level. diff --git a/debian/changelog b/debian/changelog index f51329f4..d66e20d9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,11 +3,13 @@ linkchecker (1.2.6) unstable; urgency=low * made a FAQ * configuration changes: distutils are now required; because of that we have no more .tmpl files - * correct db name in create.sql + * fix db name in create.sql * added timeoutsocket.py to supply a timeout for socket.connect() calls + * fix tag parsing when a quoted tag attribute value contains a > + character - -- Bastian Kleineidam Mon, 16 Oct 2000 14:55:51 +0200 + -- Bastian Kleineidam Sat, 28 Oct 2000 17:52:53 +0200 linkchecker (1.2.5) unstable; urgency=low diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index 7644d301..ed67b1ca 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -33,22 +33,22 @@ except ImportError: pass _linkMatcher = r""" - (?i) # case insensitive - < # open tag - \s* # whitespace - %s # tag name - \s+ # whitespace - [^>]*? # skip leading attributes - %s # attrib name - \s* # whitespace - = # equal sign - \s* # whitespace - (?P # attribute value - ".*?" | # in double quotes - '.*?' | # in single quotes - [^\s>]+) # unquoted - [^>]* # skip trailing attributes - > # close tag + (?i) # case insensitive + < # open tag + \s* # whitespace + %s # tag name + \s+ # whitespace + [^>]*? # skip leading attributes + %s # attrib name + \s* # whitespace + = # equal sign + \s* # whitespace + (?P # attribute value + ".*?" | # in double quotes + '.*?' | # in single quotes + [^\s>]+) # unquoted + ([^">]|".*?")* # skip trailing attributes + > # close tag """ LinkPatterns = (