From 40000306a64bc312db19dd2654992965a5af6458 Mon Sep 17 00:00:00 2001 From: calvin Date: Fri, 7 Jun 2002 19:50:56 +0000 Subject: [PATCH] fix linkname regexs git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@459 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/linkname.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/linkcheck/linkname.py b/linkcheck/linkname.py index 9f1e47bf..8c59a4df 100644 --- a/linkcheck/linkname.py +++ b/linkcheck/linkname.py @@ -16,9 +16,9 @@ import re, StringUtil -imgtag_re = re.compile("\s+alt\s*=\s*(?P(\".*?\"|'.*?'|[^\s>]+))", re.I) -img_re = re.compile("<\s*img\s+.*>", re.DOTALL|re.I) -href_re = re.compile("(?P.*?)", re.I) +imgtag_re = re.compile(r"""(?i)\s+alt\s*=\s*(?P("[^"\n]*"|'[^'\n]*'|[^\s>]+))""") +img_re = re.compile(r"""(?i)<\s*img\s+("[^"\n]*"|'[^'\n]*'|[^>]+)+>""") +href_re = re.compile(r"""(?i)(?P("[^"\n]*"|'[^'\n]*'|[^<]+|<(?!/a\s*>))*)""") def image_name(txt): name = "" @@ -44,3 +44,20 @@ def href_name(txt): name = StringUtil.unhtmlify(name) #print "NAME:", `name` return name + +_tests = ( + "", + "abc", + "guru guru", + "a\njo", + "test<", + "test", + "test", +) + +def _test (): + for t in _tests: + print repr(href_name(t)) + +if __name__=='__main__': + _test()