mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-19 13:51:01 +00:00
cleanup the code and add some documentation
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3595 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
0571d32663
commit
370749cafb
1 changed files with 15 additions and 12 deletions
|
|
@ -650,18 +650,21 @@ class UrlBase (object):
|
|||
"""
|
||||
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK,
|
||||
"Parsing HTML %s", self)
|
||||
h = linkcheck.linkparse.LinkFinder(self.get_content())
|
||||
p = linkcheck.HtmlParser.htmlsax.parser(h)
|
||||
h.parser = p
|
||||
p.feed(self.get_content())
|
||||
p.flush()
|
||||
h.parser = None
|
||||
p.handler = None
|
||||
for url, line, column, name, codebase in h.urls:
|
||||
# construct parser object
|
||||
handler = linkcheck.linkparse.LinkFinder(self.get_content())
|
||||
parser = linkcheck.HtmlParser.htmlsax.parser(handler)
|
||||
handler.parser = parser
|
||||
# parse HTML
|
||||
parser.feed(self.get_content())
|
||||
parser.flush()
|
||||
# break cyclic dependencies
|
||||
handler.parser = None
|
||||
parser.handler = None
|
||||
for url, line, column, name, codebase in handler.urls:
|
||||
if codebase:
|
||||
base_ref = codebase
|
||||
else:
|
||||
base_ref = h.base_ref
|
||||
base_ref = handler.base_ref
|
||||
base_ref = linkcheck.url.url_norm(base_ref)[0]
|
||||
url_data = linkcheck.checker.get_url_from(url,
|
||||
self.recursion_level+1, self.aggregate, parent_url=self.url,
|
||||
|
|
@ -677,8 +680,7 @@ class UrlBase (object):
|
|||
"Parsing Opera bookmarks %s", self)
|
||||
name = ""
|
||||
lineno = 0
|
||||
lines = self.get_content().splitlines()
|
||||
for line in lines:
|
||||
for line in self.get_content().splitlines():
|
||||
lineno += 1
|
||||
line = line.strip()
|
||||
if line.startswith("NAME="):
|
||||
|
|
@ -719,9 +721,10 @@ class UrlBase (object):
|
|||
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK,
|
||||
"Parsing CSS %s", self)
|
||||
lineno = 0
|
||||
linkfinder = linkcheck.linkparse.css_url_re.finditer
|
||||
for line in self.get_content().splitlines():
|
||||
lineno += 1
|
||||
for mo in linkcheck.linkparse.css_url_re.finditer(line):
|
||||
for mo in linkfinder(line):
|
||||
column = mo.start("url")
|
||||
url = linkcheck.strformat.unquote(mo.group("url").strip())
|
||||
url_data = linkcheck.checker.get_url_from(url,
|
||||
|
|
|
|||
Loading…
Reference in a new issue