mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-02 03:44:43 +00:00
allow empty relative URLs
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2704 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
9eb96bc520
commit
2e207be127
17 changed files with 114 additions and 41 deletions
|
|
@ -1,5 +1,12 @@
|
|||
3.0 "" (released xx.xx.xxxx)
|
||||
|
||||
* Allow empty relative URLs. Note that a completely missing URL is
|
||||
still an error (ie. <a href=""> is valid, <a href> is an error).
|
||||
Type: bugfix
|
||||
Closes: SF bug #
|
||||
Changed: linkcheck/linkparse.py, linkcheck/logger/*.py,
|
||||
linkcheck/checker/urlbase.py
|
||||
|
||||
* Added checks for more <meta> URL entries, especially favicons
|
||||
was added.
|
||||
Type: feature
|
||||
|
|
|
|||
3
TODO
3
TODO
|
|
@ -1,8 +1,5 @@
|
|||
Possible improvements people could work on:
|
||||
|
||||
- [BUGFIX] Accept empty (relative) URIs. Solution: adjust check_syntax()
|
||||
Note: the empty string ('') seems to be already in the cache (?)
|
||||
|
||||
- [FEATURE] Assign numbers to warnings and let the user configure which to
|
||||
ignore.
|
||||
|
||||
|
|
|
|||
50
debian/control.gui
vendored
Normal file
50
debian/control.gui
vendored
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
Source: linkchecker
|
||||
Section: web
|
||||
Priority: optional
|
||||
Maintainer: Bastian Kleineidam <calvin@debian.org>
|
||||
Build-Depends: python2.4-dev, python, debhelper (>= 4.1.51), gettext, cdbs
|
||||
Standards-Version: 3.6.1
|
||||
|
||||
Package: linkchecker
|
||||
Architecture: any
|
||||
Depends: python2.4
|
||||
Conflicts: linkchecker-ssl
|
||||
Suggests: apache | httpd, python2.4-psyco, python2.4-optcomplete, python2.4-profiler
|
||||
Description: check websites and HTML documents for broken links
|
||||
Provides a command line program and web interface to check links
|
||||
of websites and HTML documents.
|
||||
Users preferring a graphical interface can install the linkchecker-gui
|
||||
package.
|
||||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output in colored or normal text, HTML, SQL, CSV or a sitemap
|
||||
graph in DOT, GML or XML
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Gopher, Telnet and local
|
||||
file links support
|
||||
o restrict link checking with regular expression filters for URLs
|
||||
o proxy support
|
||||
o username/password authorization for HTTP, FTP and Telnet
|
||||
o robots.txt exclusion protocol support
|
||||
o Cookie support
|
||||
o i18n support
|
||||
|
||||
Package: linkchecker-gui
|
||||
Architecture: all
|
||||
Depends: linkchecker, python2.4-glade2
|
||||
Description: check websites and HTML documents for broken links (GUI version)
|
||||
Provides a graphical interface "glinkchecker" to check links of websites
|
||||
and HTML documents.
|
||||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output in colored or normal text, HTML, SQL, CSV or a sitemap
|
||||
graph in DOT, GML or XML
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Gopher, Telnet and local
|
||||
file links support
|
||||
o restrict link checking with regular expression filters for URLs
|
||||
o proxy support
|
||||
o username/password authorization for HTTP, FTP and Telnet
|
||||
o robots.txt exclusion protocol support
|
||||
o Cookie support
|
||||
o i18n support
|
||||
|
|
@ -206,7 +206,7 @@ def get_url_from (base_url, recursion_level, consumer,
|
|||
Get url data from given base data.
|
||||
|
||||
@param base_url: base url from a link tag
|
||||
@type base_url: string
|
||||
@type base_url: string or None
|
||||
@param recursion_level: current recursion level
|
||||
@type recursion_level: number
|
||||
@param consumer: consumer object
|
||||
|
|
@ -222,7 +222,8 @@ def get_url_from (base_url, recursion_level, consumer,
|
|||
@param name: link name
|
||||
@type name: string
|
||||
"""
|
||||
base_url = linkcheck.strformat.unicode_safe(base_url)
|
||||
if base_url is not None:
|
||||
base_url = linkcheck.strformat.unicode_safe(base_url)
|
||||
if parent_url is not None:
|
||||
parent_url = linkcheck.strformat.unicode_safe(parent_url)
|
||||
if base_ref is not None:
|
||||
|
|
|
|||
|
|
@ -93,6 +93,8 @@ class FileUrl (urlbase.UrlBase):
|
|||
"""
|
||||
super(FileUrl, self).init(base_ref, base_url, parent_url,
|
||||
recursion_level, consumer, line, column, name)
|
||||
if self.base_url is None:
|
||||
return
|
||||
base_url = self.base_url
|
||||
if not (parent_url or base_ref or base_url.startswith("file:")):
|
||||
base_url = os.path.expanduser(base_url)
|
||||
|
|
|
|||
|
|
@ -286,7 +286,8 @@ class UrlBase (object):
|
|||
@rtype: bool
|
||||
"""
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "checking syntax")
|
||||
if not self.base_url:
|
||||
if (self.base_url is None) or \
|
||||
(not self.base_url and not self.parent_url):
|
||||
self.set_result(_("URL is empty"), valid=False)
|
||||
return
|
||||
try:
|
||||
|
|
@ -326,7 +327,11 @@ class UrlBase (object):
|
|||
self.scheme)
|
||||
self.url = urljoin(self.base_ref, base_url, self.scheme)
|
||||
elif self.parent_url:
|
||||
self.url = urljoin(self.parent_url, base_url, self.scheme)
|
||||
# strip the parent url query and anchor
|
||||
urlparts = list(urlparse.urlsplit(self.parent_url))
|
||||
urlparts[3] = urlparts[4] = ""
|
||||
parent_url = urlparse.urlunsplit(urlparts)
|
||||
self.url = urljoin(parent_url, base_url, self.scheme)
|
||||
else:
|
||||
self.url = base_url
|
||||
# note: urljoin can unnorm the url path, so norm it again
|
||||
|
|
@ -704,7 +709,8 @@ class UrlBase (object):
|
|||
Return serialized url check data as unicode string.
|
||||
"""
|
||||
sep = linkcheck.strformat.unicode_safe(os.linesep)
|
||||
assert isinstance(self.base_url, unicode), self
|
||||
if self.base_url is not None:
|
||||
assert isinstance(self.base_url, unicode), self
|
||||
if self.parent_url is not None:
|
||||
assert isinstance(self.parent_url, unicode), self
|
||||
if self.base_ref is not None:
|
||||
|
|
|
|||
|
|
@ -61,7 +61,7 @@ class TestLogger (linkcheck.logger.Logger):
|
|||
Append logger output to self.result.
|
||||
"""
|
||||
if self.has_part('url'):
|
||||
url = u"url %s" % url_data.base_url
|
||||
url = u"url %s" % (url_data.base_url or u"")
|
||||
if url_data.cached:
|
||||
url += u" (cached)"
|
||||
self.result.append(url)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ Just some HTTP links
|
|||
<a href=""></a>
|
||||
-->
|
||||
|
||||
<!-- empty form URL -->
|
||||
<form action="" method="GET">
|
||||
</form>
|
||||
|
||||
<!-- multiple links in one tag -->
|
||||
<applet archive="http.html" src="http.html">
|
||||
<!-- css urls -->
|
||||
|
|
|
|||
|
|
@ -95,6 +95,11 @@ name html entities
|
|||
warning Base URL is not properly normed. Normed URL is http://localhost:8001/?quoted=%%FC.
|
||||
valid
|
||||
|
||||
url (cached)
|
||||
cache key http://localhost:8001/linkcheck/ftests/data/http.html
|
||||
real url http://localhost:8001/linkcheck/ftests/data/http.html
|
||||
valid
|
||||
|
||||
url http.html (cached)
|
||||
cache key http://localhost:8001/linkcheck/ftests/data/http.html
|
||||
real url http://localhost:8001/linkcheck/ftests/data/http.html
|
||||
|
|
|
|||
|
|
@ -207,7 +207,8 @@ class LinkFinder (TagFinder):
|
|||
codebase = unquote(attrs.get_true('codebase', u''))
|
||||
else:
|
||||
codebase = u''
|
||||
value = unquote(attrs.get_true(attr, u''))
|
||||
# note: value can be None
|
||||
value = unquote(attrs.get(attr))
|
||||
# add link to url list
|
||||
self.add_link(tag, attr, value, name, codebase)
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK,
|
||||
|
|
@ -242,7 +243,7 @@ class LinkFinder (TagFinder):
|
|||
assert isinstance(attr, unicode), repr(attr)
|
||||
assert isinstance(name, unicode), repr(name)
|
||||
assert isinstance(base, unicode), repr(base)
|
||||
assert isinstance(url, unicode), repr(url)
|
||||
assert isinstance(url, unicode) or url is None, repr(url)
|
||||
urls = []
|
||||
# look for meta refresh
|
||||
if tag == u'meta':
|
||||
|
|
@ -261,7 +262,7 @@ class LinkFinder (TagFinder):
|
|||
# no url found
|
||||
return
|
||||
for u in urls:
|
||||
assert isinstance(u, unicode), repr(u)
|
||||
assert isinstance(u, unicode) or u is None, repr(u)
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK,
|
||||
u"LinkParser add link %s %s %s %s %s", tag, attr, u, name, base)
|
||||
self.urls.append((u, self.parser.last_lineno(),
|
||||
|
|
|
|||
|
|
@ -99,12 +99,12 @@ class CSVLogger (linkcheck.logger.Logger):
|
|||
if self.fd is None:
|
||||
return
|
||||
row = []
|
||||
for s in [url_data.base_url, url_data.recursion_level,
|
||||
url_data.parent_url or "", url_data.base_ref or "",
|
||||
for s in [url_data.base_url or u"", url_data.recursion_level,
|
||||
url_data.parent_url or u"", url_data.base_ref or u"",
|
||||
url_data.result,
|
||||
os.linesep.join(url_data.warning),
|
||||
os.linesep.join(url_data.info),
|
||||
url_data.valid, url_data.url or "",
|
||||
url_data.valid, url_data.url or u"",
|
||||
url_data.line, url_data.column,
|
||||
url_data.name, url_data.dltime,
|
||||
url_data.dlsize, url_data.checktime,
|
||||
|
|
|
|||
|
|
@ -100,8 +100,8 @@ class GMLLogger (linkcheck.logger.Logger):
|
|||
for node in self.nodes.values():
|
||||
if self.nodes.has_key(node.parent_url):
|
||||
self.writeln(u" edge [")
|
||||
self.writeln(u' label "%s"' % node.base_url)
|
||||
if self.has_part("parenturl"):
|
||||
self.writeln(u' label "%s"' % (node.base_url or u""))
|
||||
if self.has_part("parenturl") and node.parent_url:
|
||||
self.writeln(u" source %d" % \
|
||||
self.nodes[node.parent_url].id)
|
||||
self.writeln(u" target %d" % node.id)
|
||||
|
|
|
|||
|
|
@ -169,7 +169,7 @@ class HtmlLogger (linkcheck.logger.Logger):
|
|||
self.writeln(u"<td bgcolor=\""+self.colorurl+u"\">"+
|
||||
self.part("url")+u"</td>")
|
||||
self.write(u"<td bgcolor=\""+self.colorurl+u"\">"+
|
||||
cgi.escape(repr(url_data.base_url)[1:]))
|
||||
cgi.escape(repr(url_data.base_url or u"")[1:]))
|
||||
if url_data.cached:
|
||||
self.write(_(" (cached)"))
|
||||
self.writeln(u"</td></tr>")
|
||||
|
|
|
|||
|
|
@ -116,15 +116,15 @@ class SQLLogger (linkcheck.logger.Logger):
|
|||
"%(cached)d"
|
||||
")%(separator)s" % \
|
||||
{'table': self.dbname,
|
||||
'base_url': sqlify(url_data.base_url),
|
||||
'base_url': sqlify(url_data.base_url or u""),
|
||||
'recursion_level': url_data.recursion_level,
|
||||
'url_parent': sqlify((url_data.parent_url or "")),
|
||||
'base_ref': sqlify((url_data.base_ref or "")),
|
||||
'url_parent': sqlify((url_data.parent_url or u"")),
|
||||
'base_ref': sqlify((url_data.base_ref or u"")),
|
||||
'valid': intify(url_data.valid),
|
||||
'result': sqlify(url_data.result),
|
||||
'warning': sqlify(os.linesep.join(url_data.warning)),
|
||||
'info': sqlify(os.linesep.join(url_data.info)),
|
||||
'url': sqlify(linkcheck.url.url_quote(url_data.url or "")),
|
||||
'url': sqlify(linkcheck.url.url_quote(url_data.url or u"")),
|
||||
'line': url_data.line,
|
||||
'column': url_data.column,
|
||||
'name': sqlify(url_data.name),
|
||||
|
|
|
|||
|
|
@ -141,7 +141,7 @@ class TextLogger (linkcheck.logger.Logger):
|
|||
"""
|
||||
self.writeln()
|
||||
self.write(self.part('url') + self.spaces('url'))
|
||||
txt = unicode(repr(url_data.base_url)[1:])
|
||||
txt = unicode(repr(url_data.base_url or u"")[1:])
|
||||
if url_data.cached:
|
||||
txt += _(" (cached)")
|
||||
self.writeln(txt, color=self.colorurl)
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ class XMLLogger (linkcheck.logger.Logger):
|
|||
self.writeln(u' target="%d">' % node.id)
|
||||
if self.has_part("url"):
|
||||
self.writeln(u" <label>%s</label>" % \
|
||||
xmlquote(node.base_url))
|
||||
xmlquote(node.base_url or u""))
|
||||
self.writeln(u" <data>")
|
||||
if self.has_part("result"):
|
||||
self.writeln(u" <valid>%d</valid>" % \
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ msgid ""
|
|||
msgstr ""
|
||||
"Project-Id-Version: PACKAGE VERSION\n"
|
||||
"Report-Msgid-Bugs-To: calvin@users.sourceforge.net\n"
|
||||
"POT-Creation-Date: 2005-07-04 01:39+0200\n"
|
||||
"POT-Creation-Date: 2005-07-04 22:27+0200\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
|
|
@ -59,15 +59,15 @@ msgstr ""
|
|||
msgid "%s URL ignored."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/fileurl.py:117
|
||||
#: ../linkcheck/checker/fileurl.py:119
|
||||
msgid "Added trailing slash to directory."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/fileurl.py:127
|
||||
#: ../linkcheck/checker/fileurl.py:129
|
||||
msgid "directory"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/fileurl.py:143
|
||||
#: ../linkcheck/checker/fileurl.py:145
|
||||
#, python-format
|
||||
msgid ""
|
||||
"The URL path %r is not the same as the system path %r. You should always use "
|
||||
|
|
@ -109,16 +109,16 @@ msgstr ""
|
|||
msgid "Python %s on %s"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:290
|
||||
#: ../linkcheck/checker/urlbase.py:291
|
||||
msgid "URL is empty"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:297
|
||||
#: ../linkcheck/checker/urlbase.py:298
|
||||
#, python-format
|
||||
msgid "Effective URL %r."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:313
|
||||
#: ../linkcheck/checker/urlbase.py:314
|
||||
#, python-format
|
||||
msgid ""
|
||||
"URL %r has a unicode domain name which\n"
|
||||
|
|
@ -126,51 +126,51 @@ msgid ""
|
|||
" the URL %r instead."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:318
|
||||
#: ../linkcheck/checker/urlbase.py:319
|
||||
#, python-format
|
||||
msgid "Base URL is not properly normed. Normed URL is %(url)s."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:354
|
||||
#: ../linkcheck/checker/urlbase.py:359
|
||||
#, python-format
|
||||
msgid "URL has invalid port %r"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:388
|
||||
#: ../linkcheck/checker/urlbase.py:393
|
||||
#, python-format
|
||||
msgid "URL is located in %s."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:403 ../linkcheck/checker/httpurl.py:289
|
||||
#: ../linkcheck/checker/urlbase.py:408 ../linkcheck/checker/httpurl.py:289
|
||||
#: ../linkcheck/checker/ignoredurl.py:34
|
||||
msgid "Outside of domain filter, checked only syntax."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:419
|
||||
#: ../linkcheck/checker/urlbase.py:424
|
||||
msgid "Hostname not found"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:422
|
||||
#: ../linkcheck/checker/urlbase.py:427
|
||||
#, python-format
|
||||
msgid "Bad HTTP response %r"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:453
|
||||
#: ../linkcheck/checker/urlbase.py:458
|
||||
#, python-format
|
||||
msgid "could not parse content: %r"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:537
|
||||
#: ../linkcheck/checker/urlbase.py:542
|
||||
#, python-format
|
||||
msgid "Anchor #%s not found."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:589
|
||||
#: ../linkcheck/checker/urlbase.py:594
|
||||
#, python-format
|
||||
msgid "Found %r in link contents."
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/checker/urlbase.py:598
|
||||
#: ../linkcheck/checker/urlbase.py:603
|
||||
#, python-format
|
||||
msgid "Content size %s is larger than %s."
|
||||
msgstr ""
|
||||
|
|
|
|||
Loading…
Reference in a new issue