import fixes

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1386 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-07-22 13:19:18 +00:00
parent 82d2ec5d51
commit 6942fccb50
23 changed files with 76 additions and 94 deletions

View file

@ -24,7 +24,7 @@ import Cookie
import sets
import urllib
import _linkchecker_configdata
import bk
import bk.log
import bk.containers
import linkcheck
import linkcheck.Threader

View file

@ -91,14 +91,12 @@ class FileUrlData (linkcheck.checker.UrlData.UrlData):
self.urlName = re.sub(r"^file://(/?)([a-zA-Z]):", r"file:///\2|",
self.urlName)
def buildUrl (self):
super(FileUrlData, self).buildUrl()
# ignore query and fragment url parts for filesystem urls
self.urlparts[3] = self.urlparts[4] = ''
self.url = urlparse.urlunsplit(self.urlparts)
def getCacheKeys (self):
# the host in urlparts is lowercase()d
if self.urlparts:
@ -108,7 +106,6 @@ class FileUrlData (linkcheck.checker.UrlData.UrlData):
return [key]
return []
def isHtml (self):
if linkcheck.extensions['html'].search(self.url):
return True
@ -116,11 +113,9 @@ class FileUrlData (linkcheck.checker.UrlData.UrlData):
return True
return False
def isFile (self):
return True
def isParseable (self):
# guess by extension
for ro in linkcheck.extensions.values():
@ -135,7 +130,6 @@ class FileUrlData (linkcheck.checker.UrlData.UrlData):
pass
return False
def parseUrl (self):
for key, ro in linkcheck.extensions.items():
if ro.search(self.url):

View file

@ -17,13 +17,18 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import ftplib
import urllib
import bk.i18n
import linkcheck
import linkcheck.checker.ProxyUrlData
import linkcheck.checker.HttpUrlData
class FtpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
"""
Url link with ftp scheme.
"""
def checkConnection (self):
# proxy support (we support only http)
self.setProxy(self.config["proxy"].get(self.scheme))
@ -52,27 +57,23 @@ class FtpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.retrieve(filename)
return None
def isHtml (self):
if linkcheck.extensions['html'].search(self.url):
return True
return False
def isParseable (self):
for ro in linkcheck.extensions.values():
if ro.search(self.url):
return True
return False
def parseUrl (self):
for key,ro in linkcheck.extensions.items():
if ro.search(self.url):
return getattr(self, "parse_"+key)()
return None
def login (self, _user, _password):
"""log into ftp server and check the welcome message"""
# ready to connect
@ -89,7 +90,6 @@ class FtpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
# dont set info anymore, this may change every time we logged in
#self.setInfo(info)
def cwd (self):
"""change directory to given path"""
# leeched from webcheck
@ -100,7 +100,6 @@ class FtpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.urlConnection.cwd(d)
return filename
def retrieve (self, filename):
"""initiate download of given filename"""
# it could be a directory if the trailing slash was forgotten
@ -119,7 +118,6 @@ class FtpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
#else:
# page = conn.makefile().read()
def closeConnection (self):
try: self.urlConnection.closet()
except: pass

View file

@ -16,7 +16,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
import linkcheck.checker.UrlData
class GopherUrlData (linkcheck.checker.UrlData.UrlData):
"Url link with gopher scheme"

View file

@ -18,7 +18,9 @@
import socket
import urllib
import linkcheck
import bk.i18n
import linkcheck.checker.UrlData
class HostCheckingUrlData (linkcheck.checker.UrlData.UrlData):
"Url link for which we have to connect to a specific host"
@ -31,16 +33,13 @@ class HostCheckingUrlData (linkcheck.checker.UrlData.UrlData):
self.host = None
self.url = urllib.unquote(self.urlName)
def buildUrl (self):
# to avoid anchor checking
self.urlparts = None
def getCacheKeys (self):
return ["%s:%s" % (self.scheme, self.host)]
def checkConnection (self):
ip = socket.gethostbyname(self.host)
self.setValid(self.host+"("+ip+") "+bk.i18n._("found"))
self.setValid(bk.i18n._("Host %s (%s) found") % (self.host, ip))

View file

@ -24,7 +24,13 @@ import zlib
import gzip
import socket
import cStringIO as StringIO
import bk.url
import bk.i18n
import linkcheck
import linkcheck.robotparser2
import linkcheck.httplib2
import linkcheck.checker.ProxyUrlData
supportHttps = hasattr(linkcheck.httplib2, "HTTPSConnection") and \
hasattr(socket, "ssl")
@ -47,7 +53,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.has301status = False
self.no_anchor = False # remove anchor in request url
def buildUrl (self):
super(HttpUrlData, self).buildUrl()
# encode userinfo
@ -58,7 +63,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.urlparts[2] = '/'
self.url = urlparse.urlunsplit(self.urlparts)
def checkConnection (self):
"""
Check a URL with HTTP protocol.
@ -195,7 +199,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
# check response
self.checkResponse(response, fallback_GET)
def followRedirections (self, response, redirectCache):
"""follow all redirections of http response"""
redirected = self.url
@ -244,7 +247,7 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.setWarning(bk.i18n._("HTTP redirection to non-http url encountered; "
"the original url was %r.")%self.url)
# make new UrlData object
newobj = linkcheck.UrlData.GetUrlDataFrom(redirected, self.recursionLevel, self.config,
newobj = linkcheck.checker.GetUrlDataFrom(redirected, self.recursionLevel, self.config,
parentName=self.parentName, baseRef=self.baseRef,
line=self.line, column=self.column, name=self.name)
newobj.warningString = self.warningString
@ -261,7 +264,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
tries += 1
return tries, response
def checkResponse (self, response, fallback_GET):
"""check final result"""
if response.status >= 400:
@ -293,13 +295,11 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
if modified:
self.setInfo(bk.i18n._("Last modified %s") % modified)
def getCacheKeys (self):
keys = super(HttpUrlData, self).getCacheKeys()
keys.extend(self.aliases)
return keys
def _getHttpResponse (self):
"""Put request and return (status code, status text, mime object).
host can be host:port format
@ -347,7 +347,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.urlConnection.endheaders()
return self.urlConnection.getresponse()
def getHTTPObject (self, host, scheme):
if scheme=="http":
h = linkcheck.httplib2.HTTPConnection(host)
@ -359,7 +358,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
h.connect()
return h
def getContent (self):
if not self.has_content:
self.method = "GET"
@ -382,7 +380,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.downloadtime = time.time() - t
return self.data
def isHtml (self):
if not (self.valid and self.headers):
return False
@ -395,18 +392,15 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
return False
return True
def isHttp (self):
return True
def getContentType (self):
ptype = self.headers.get('Content-Type', 'application/octet-stream')
if ";" in ptype:
ptype = ptype.split(';')[0]
return ptype
def isParseable (self):
if not (self.valid and self.headers):
return False
@ -419,7 +413,6 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
return False
return True
def parseUrl (self):
ptype = self.getContentType()
if ptype=="text/html":
@ -428,11 +421,9 @@ class HttpUrlData (linkcheck.checker.ProxyUrlData.ProxyUrlData):
self.parse_css()
return None
def getRobotsTxtUrl (self):
return "%s://%s/robots.txt"%tuple(self.urlparts[0:2])
def robotsTxtAllowsUrl (self):
roboturl = self.getRobotsTxtUrl()
debug(HURT_ME_PLENTY, "robots.txt url", roboturl)

View file

@ -16,7 +16,8 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import linkcheck
import linkcheck.checker.HttpUrlData
import bk.i18n
class HttpsUrlData (linkcheck.checker.HttpUrlData.HttpUrlData):

View file

@ -17,7 +17,8 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import re
import linkcheck
import linkcheck.checker.UrlData
import bk.i18n
ignored_schemes = r"""^(
acap # application configuration access protocol

View file

@ -23,7 +23,9 @@ import urllib
import smtplib
import rfc822
import linkcheck
import linkcheck.checker.HostCheckingUrlData
import bk.log
import bk.i18n
import bk.net.dns.lazy
# regular expression for RFC2368 compliant mailto: scanning
@ -43,7 +45,6 @@ class MailtoUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
self.adresses.extend(rfc822.AddressList(a).addresslist)
bk.log.debug(BRING_IT_ON, "adresses: ", self.adresses)
def _cutout_adresses (self):
mo = headers_re.search(self.urlName)
if mo:
@ -54,7 +55,6 @@ class MailtoUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
return self.urlName[7:mo.start()]
return self.urlName[7:]
def checkConnection (self):
"""Verify a list of email adresses. If one adress fails,
the whole list will fail.
@ -106,7 +106,6 @@ class MailtoUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
mxrecord = mxrecord[1]
self.setValid(bk.i18n._("found MX mail host %s") % mxrecord)
def _split_adress (self, adress):
split = adress.split("@", 1)
if len(split)==2:
@ -117,17 +116,14 @@ class MailtoUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
return (split[0], "localhost")
raise linkcheck.LinkCheckerError(bk.i18n._("could not split the mail adress"))
def closeConnection (self):
try: self.urlConnection.quit()
except: pass
self.urlConnection = None
def getCacheKeys (self):
return ["%s:%s" % (self.scheme, str(self.adresses))]
def hasContent (self):
return False

View file

@ -23,7 +23,9 @@ import nntplib
import urlparse
import random
import linkcheck
import linkcheck.checker.UrlData
import bk.log
import bk.i18n
random.seed()
@ -41,7 +43,6 @@ class NntpUrlData (linkcheck.checker.UrlData.UrlData):
self.urlparts = urlparse.urlsplit(self.url)
bk.log.debug(BRING_IT_ON, self.urlparts)
def checkConnection (self):
nntpserver = self.urlparts[1] or self.config["nntpserver"]
if not nntpserver:
@ -67,7 +68,6 @@ class NntpUrlData (linkcheck.checker.UrlData.UrlData):
# group name is the empty string
self.setWarning(bk.i18n._("No newsgroup specified in NNTP URL"))
def _connectNntp (self, nntpserver):
"""This is done only once per checking task. Also, the newly
introduced error codes 504 and 505 (both inclining "Too busy, retry
@ -90,10 +90,8 @@ class NntpUrlData (linkcheck.checker.UrlData.UrlData):
self.setWarning(bk.i18n._("NNTP busy: %s")%str(value))
return nntp
def getCacheKeys (self):
return [self.url]
def hasContent (self):
return False

View file

@ -14,23 +14,23 @@
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
from UrlData import UrlData
from urllib import splittype, splithost, splituser
import linkcheck.checker.UrlData
import urllib
class ProxyUrlData (linkcheck.checker.UrlData.UrlData):
"""urldata with ability for proxying and for urls with user:pass@host
setting"""
def setProxy (self, proxy):
self.proxy = proxy
self.proxyauth = None
if self.proxy:
if self.proxy[:7].lower() != "http://":
self.proxy = "http://"+self.proxy
self.proxy = splittype(self.proxy)[1]
self.proxy = splithost(self.proxy)[0]
self.proxyauth, self.proxy = splituser(self.proxy)
self.proxy = urllib.splittype(self.proxy)[1]
self.proxy = urllib.splithost(self.proxy)[0]
self.proxyauth, self.proxy = urllib.splituser(self.proxy)
if self.proxyauth is not None:
if ":" not in self.proxyauth: self.proxyauth += ":"
import base64

View file

@ -20,6 +20,7 @@ import telnetlib
import urlparse
import urllib
import linkcheck
import linkcheck.checker.HostCheckingUrlData
class TelnetUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
@ -31,6 +32,7 @@ class TelnetUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
userinfo, self.host = urllib.splituser(parts[1])
self.host, self.port = urllib.splitport(self.host)
if self.port is not None:
# XXX is_valid_port move?
if not linkcheck.UrlData.is_valid_port(self.port):
raise linkcheck.LinkCheckerError(bk.i18n._("URL has invalid port number %s")\
% self.port)
@ -42,7 +44,6 @@ class TelnetUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
else:
self.user, self.password = self.getUserPassword()
def checkConnection (self):
super(TelnetUrlData, self).checkConnection()
self.urlConnection = telnetlib.Telnet()
@ -57,7 +58,6 @@ class TelnetUrlData (linkcheck.checker.HostCheckingUrlData.HostCheckingUrlData):
# XXX how to tell if we are logged in??
self.urlConnection.write("exit\n")
def hasContent (self):
return False

View file

@ -17,6 +17,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys
import os
import re
import urlparse
import urllib2
@ -27,6 +28,7 @@ import socket
import select
import linkcheck
import bk.log
import bk.i18n
ws_at_start_or_end = re.compile(r"(^\s+)|(\s+$)").search
@ -55,7 +57,6 @@ I can work with ;).
def print_app_info ():
import os
print >>sys.stderr, bk.i18n._("System info:")
print >>sys.stderr, linkcheck.Config.App
print >>sys.stderr, "Python %s on %s" % (sys.version, sys.platform)

View file

@ -25,6 +25,7 @@ import nntplib
import ftplib
import linkcheck.httplib2
import bk.net.dns.Base
import bk.i18n
# we catch these exceptions, all other exceptions are internal

View file

@ -18,6 +18,7 @@
import time
import csv
import bk.i18n
import bk.url
import linkcheck.logger.StandardLogger
import linkcheck.logger.Logger
@ -63,7 +64,6 @@ class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.flush()
self.writer = csv.writer(self.fd, dialect='excel', delimiter=self.separator, lineterminator=self.lineterminator)
def newUrl (self, urlData):
if self.fd is None:
return
@ -79,7 +79,6 @@ class CSVLogger (linkcheck.logger.StandardLogger.StandardLogger):
self.writer.writerow(row)
self.flush()
def endOfOutput (self, linknumber=-1):
if self.fd is None:
return

View file

@ -16,6 +16,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import bk.i18n
import bk.url
import bk.ansicolor
import linkcheck.logger.StandardLogger

View file

@ -16,7 +16,9 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
import linkcheck.logger.StandardLogger
import bk.i18n
import bk.url
class GMLLogger (linkcheck.logger.StandardLogger.StandardLogger):

View file

@ -16,7 +16,9 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import linkcheck
import linkcheck.logger.StandardLogger
import bk.i18n
import bk.url
HTML_HEADER = """<!DOCTYPE html PUBLIC "-//W3C//DTD html 4.01//EN">

View file

@ -18,6 +18,7 @@
import time
import linkcheck
import bk.i18n
import bk.url
import linkcheck.logger.StandardLogger
import linkcheck.logger.Logger

View file

@ -18,6 +18,7 @@
import sys
import time
import bk.i18n
import bk.url
import linkcheck.logger.Logger

View file

@ -17,7 +17,9 @@
import time
import xml.sax.saxutils
import linkcheck
import linkcheck.logger.StandardLogger
import bk.i18n
import bk.url
xmlattr_entities = {

View file

@ -15,4 +15,3 @@
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

View file

@ -21,17 +21,15 @@ import time
import rotor
import types
_curses = None
_color = 0
try:
from ncurses import curses
_curses = curses
except ImportError:
try:
import curses
_curses = curses
except ImportError:
pass
print "Sorry, this operating system can not wash clothes."
sys.exit(1)
_bs = [
['\023\335\233\203\2323\016',
@ -70,11 +68,8 @@ _2 = '\035\177\271uC\203\016\306h\2016OHT\352Gw\3770\202fl\013S\021\016\370'
_3 = '\236\177\246\304\351F\203(\005z\375\220\324)\201\266z*j\342\344l\323\0325\374:Z\313\212hD\256\334?a\034\274\315\004r\012a\334\237$\203w\037'
_4 = '\222\360P\277\330\300\246\3670\256\303\223\036\311['
def abbuzze():
if not _curses:
print "Sorry, this operating system can not wash clothes."
return
w = _curses.initscr() # initialize the curses library
def abbuzze ():
w = curses.initscr() # initialize the curses library
config_curses()
my,mx = w.getmaxyx()
b = w.subwin(my-2, mx, 0, 0)
@ -94,29 +89,29 @@ def abbuzze():
abspann(curses.newwin(8, 30, 0, 0))
w.erase()
w.refresh()
_curses.endwin()
curses.endwin()
def config_curses():
def config_curses ():
global _color
_curses.nonl() # tell curses not to do NL->CR/NL on output
_curses.noecho() # don't echo input
_curses.cbreak() # take input chars one at a time, no wait for \n
if hasattr(_curses, "start_color") and hasattr(_curses, "set_color"):
curses.nonl() # tell curses not to do NL->CR/NL on output
curses.noecho() # don't echo input
curses.cbreak() # take input chars one at a time, no wait for \n
if hasattr(curses, "start_color") and hasattr(curses, "set_color"):
_color = 1
_curses.start_color() # start the colour system
if _curses.has_colors():
if _curses.can_change_color():
curses.start_color() # start the colour system
if curses.has_colors():
if curses.can_change_color():
pass
else:
_curses.init_pair(1, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
_curses.init_pair(2, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
_curses.init_pair(3, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
_curses.init_pair(4, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
curses.init_pair(1, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
curses.init_pair(2, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
curses.init_pair(3, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
curses.init_pair(4, curses.COLOR_MAGENTA, curses.COLOR_BLACK)
def waddemol(f):
def waddemol (f):
time.sleep(float(f))
def nassmache(henne):
def nassmache (henne):
if type(henne) == types.StringType:
return rotor.newrotor('ramdoesiger Malaker').decrypt(henne)
hase = []
@ -124,7 +119,7 @@ def nassmache(henne):
hase.append(nassmache(ei))
return hase
def allahopp(w, s, y=2):
def allahopp (w, s, y=2):
w.erase()
w.move(0,y)
for i in range(len(s)):
@ -133,7 +128,7 @@ def allahopp(w, s, y=2):
w.refresh()
waddemol(0.7)
def tadaaa(w, l):
def tadaaa (w, l):
w.erase()
my,mx = w.getmaxyx()
for p in range(mx/2):
@ -141,13 +136,13 @@ def tadaaa(w, l):
w.refresh()
waddemol(0.15)
def hotzenplotz(w,y,x,l):
def hotzenplotz (w,y,x,l):
for li in l:
w.move(y,x)
w.addstr(li)
y += 1
def wischi(w, ls):
def wischi (w, ls):
my,mx = w.getmaxyx()
f = 0.2
i=0
@ -163,10 +158,10 @@ def wischi(w, ls):
w.refresh()
waddemol(f)
def waschi(w, l):
def waschi (w, l):
wischi(w,l)
def abspann(w):
def abspann (w):
w.erase()
w.border(0, 0, 0, 0, 0, 0, 0, 0)
w.refresh()
@ -184,6 +179,6 @@ if __name__=='__main__':
try:
abbuzze()
except:
_curses.endwin()
curses.endwin()
print sys.exc_info()[:2]
print "Sorry, your washing machine is broken!"