git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2900 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2005-10-13 22:26:12 +00:00
parent 90ba7af9e4
commit a2e422ce0d
13 changed files with 8 additions and 20 deletions

View file

@ -33,14 +33,14 @@ Fast HTML parser module written in C with the following features:
not need the bison error recovery.
Incomplete data is rescanned the next time the parser calls yylex() or
when it is being flush()ed.
The following syntax errors will be recognized correctly:
1. missing quotes around attribute values
2. "</...>" end tags in script modus
3. missing ">" in tags
4. invalid tag names
5. invalid characters inside tags or tag attributes
Additionally the parser has the following features:
1. NULL bytes are changed into spaces
2. <!-- ... --> inside a <script> or <style> are not treated as
@ -234,4 +234,3 @@ def set_doctype (parsobj, doctype):
"""
if u"XHTML" in doctype:
parsobj.doctype = "XHTML"

View file

@ -385,4 +385,3 @@ def get_index_html (urls):
lines.append('<a href="%s">%s</a>' % (url, name))
lines.extend(["</body>", "</html>"])
return os.linesep.join(lines)

View file

@ -248,4 +248,3 @@ class Cache (object):
if val:
cookievals.append(val)
return cookievals

View file

@ -41,4 +41,3 @@ class ErrorUrl (urlbase.UrlBase):
Cache key is forbidden.
"""
raise NotImplementedError, "cache keys are forbidden"

View file

@ -277,4 +277,3 @@ class FileUrl (urlbase.UrlBase):
segments = segments[:-1]
path = "/".join(segments)
return "file://%s" % re.escape(path)

View file

@ -97,4 +97,3 @@ def get_content_encoding (headers):
@rtype: string
"""
return headers.get("Content-Encoding", "").strip()

View file

@ -49,7 +49,7 @@ _is_amazon = re.compile(r'^www\.amazon\.(com|de|ca|fr|co\.(uk|jp))').search
httpresponses = {
100: 'Continue',
101: 'Switching Protocols',
200: 'OK',
201: 'Created',
202: 'Accepted',
@ -57,7 +57,7 @@ httpresponses = {
204: 'No Content',
205: 'Reset Content',
206: 'Partial Content',
300: 'Multiple Choices',
301: 'Moved Permanently',
302: 'Found',
@ -66,7 +66,7 @@ httpresponses = {
305: 'Use Proxy',
306: '(Unused)',
307: 'Temporary Redirect',
400: 'Bad Request',
401: 'Unauthorized',
402: 'Payment Required',
@ -85,7 +85,7 @@ httpresponses = {
415: 'Unsupported Media Type',
416: 'Requested Range Not Satisfiable',
417: 'Expectation Failed',
500: 'Internal Server Error',
501: 'Not Implemented',
502: 'Bad Gateway',
@ -310,7 +310,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
redirected)
urlparts = linkcheck.strformat.url_unicode_split(redirected)
# check if we still have the same scheme type, it could be a
# different one
# different one
if urlparts[0] != self.scheme:
self.add_warning(
_("Redirection to different URL type encountered; "
@ -621,4 +621,3 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
# ignore close errors
pass
self.url_connection = None

View file

@ -479,4 +479,3 @@ class Configuration (dict):
"""
fp.write("[filtering]%s" % os.linesep)
# XXX todo

View file

@ -184,4 +184,3 @@ Errors are logged.
</blockquote>
</body>
</html>""") % why)

View file

@ -151,7 +151,7 @@ def is_meta_url (attr, attrs):
res = False
if attr == "content":
equiv = attrs.get_true('http-equiv', u'').lower()
scheme = attrs.get_true('scheme', u'').lower()
scheme = attrs.get_true('scheme', u'').lower()
res = equiv in (u'refresh',) or scheme in (u'dcterms.uri',)
if attr == "href":
rel = attrs.get_true('rel', u'').lower()

View file

@ -55,4 +55,3 @@ class AssertLock (lock_klass):
See if this lock is owned.
"""
return self._is_owned()

View file

@ -146,4 +146,3 @@ class Threader (object):
"""
return "Threader with %d threads (max %d)" % \
(self.active_threads(), self.threads_max)

View file

@ -478,4 +478,3 @@ def url_split (url):
host = host.lower()
host, port = urllib.splitnport(host, port)
return scheme, host, port, document