mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-26 18:50:32 +00:00
remove comments from CSS files before parsing for links
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3601 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
dc11717bce
commit
a1d911127b
6 changed files with 26 additions and 3 deletions
|
|
@ -31,6 +31,11 @@
|
|||
Type: documentation
|
||||
Changed: doc/{en,de}/linkchecker.1
|
||||
|
||||
* Remove comments from CSS content before searching for links.
|
||||
Type: bugfix
|
||||
Changed: linkcheck/linkparse.py, linkcheck/checker/urlbase.py
|
||||
Closes: SF bug #1831900
|
||||
|
||||
4.7 "300" (released 17.6.2007)
|
||||
|
||||
* Mention in the documentation that --anchors enables logging of
|
||||
|
|
|
|||
2
TODO
2
TODO
|
|
@ -1,7 +1,5 @@
|
|||
- [OPTIMIZATION] Don't store content in TagFinder, only in LinkFinder
|
||||
|
||||
- [BUGFIX] Ignore links in commented-out CSS data (SF Bug #1831900)
|
||||
|
||||
- [BUG REPORT] Running on Windows XP with threads and a local HTTP Server
|
||||
yields a lot of (10061 'Connection Refused') errors. Without threads (-t0)
|
||||
gets rid of these errors. Is it the server?
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@
|
|||
src:url(file.html)
|
||||
}
|
||||
background-image:url(file.html)
|
||||
/*background-image:url(broken.html)*/
|
||||
|
|
|
|||
|
|
@ -726,7 +726,8 @@ class UrlBase (object):
|
|||
"Parsing CSS %s", self)
|
||||
lineno = 0
|
||||
linkfinder = linkcheck.linkparse.css_url_re.finditer
|
||||
for line in self.get_content().splitlines():
|
||||
strip_comments = linkcheck.linkparse.strip_c_comments
|
||||
for line in strip_comments(self.get_content()).splitlines():
|
||||
lineno += 1
|
||||
for mo in linkfinder(line):
|
||||
column = mo.start("url")
|
||||
|
|
|
|||
|
|
@ -63,6 +63,13 @@ LinkTags = {
|
|||
refresh_re = re.compile(ur"(?i)^\d+;\s*url=(?P<url>.+)$")
|
||||
_quoted_pat = ur"('[^']+'|\"[^\"]+\"|[^\)\s]+)"
|
||||
css_url_re = re.compile(ur"url\(\s*(?P<url>%s)\s*\)" % _quoted_pat)
|
||||
c_comment_re = re.compile(ur"/\*.*?\*/", re.DOTALL)
|
||||
|
||||
def strip_c_comments (text):
|
||||
"""Remove C/CSS-style comments from text. Note that this method also
|
||||
deliberately removes comments inside of strings."""
|
||||
return c_comment_re.sub('', text)
|
||||
|
||||
|
||||
class TagFinder (object):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -77,6 +77,17 @@ class TestLinkparser (unittest.TestCase):
|
|||
content = u"<table style='background: url( \"%s\") no-repeat' >"
|
||||
self._test_one_link(content % url, url)
|
||||
|
||||
def test_comment_stripping (self):
|
||||
strip = linkcheck.linkparse.strip_c_comments
|
||||
content = "/* url('http://imadoofus.org')*/"
|
||||
self.assertEqual(strip(content), "")
|
||||
content = "/* * * **/"
|
||||
self.assertEqual(strip(content), "")
|
||||
content = "/* * /* * **//* */"
|
||||
self.assertEqual(strip(content), "")
|
||||
content = "a/* */b/* */c"
|
||||
self.assertEqual(strip(content), "abc")
|
||||
|
||||
|
||||
def test_suite ():
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue