diff --git a/linkcheck/linkname.py b/linkcheck/linkname.py index 9f1e47bf..8c59a4df 100644 --- a/linkcheck/linkname.py +++ b/linkcheck/linkname.py @@ -16,9 +16,9 @@ import re, StringUtil -imgtag_re = re.compile("\s+alt\s*=\s*(?P(\".*?\"|'.*?'|[^\s>]+))", re.I) -img_re = re.compile("<\s*img\s+.*>", re.DOTALL|re.I) -href_re = re.compile("(?P.*?)", re.I) +imgtag_re = re.compile(r"""(?i)\s+alt\s*=\s*(?P("[^"\n]*"|'[^'\n]*'|[^\s>]+))""") +img_re = re.compile(r"""(?i)<\s*img\s+("[^"\n]*"|'[^'\n]*'|[^>]+)+>""") +href_re = re.compile(r"""(?i)(?P("[^"\n]*"|'[^'\n]*'|[^<]+|<(?!/a\s*>))*)""") def image_name(txt): name = "" @@ -44,3 +44,20 @@ def href_name(txt): name = StringUtil.unhtmlify(name) #print "NAME:", `name` return name + +_tests = ( + "", + "abc", + "guru guru", + "a\njo", + "test<", + "test", + "test", +) + +def _test (): + for t in _tests: + print repr(href_name(t)) + +if __name__=='__main__': + _test()