import fixes

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1399 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-07-26 13:47:19 +00:00
parent 96dd6ef4b8
commit 1f6670e8cd
10 changed files with 31 additions and 21 deletions

View file

@ -21,10 +21,6 @@ import sys
markup_re = re.compile("<.*?>", re.DOTALL)
SQLTable = [
("'","''")
]
def stripQuotes (s):
"Strip optional quotes"

View file

@ -49,15 +49,6 @@ def getLinkPat (arg, strict=False):
}
def printStatus (config, curtime, start_time):
tocheck = len(config.urls)
links = config['linknumber']
active = config.threader.active_threads()
duration = bk.strtime.strduration(curtime - start_time)
print >>sys.stderr, bk.i18n._("%5d urls queued, %4d links checked, %2d active threads, runtime %s")%\
(tocheck, links, active, duration)
import linkcheck.logger.StandardLogger
import linkcheck.logger.HtmlLogger
import linkcheck.logger.ColoredLogger

View file

@ -79,7 +79,8 @@ class FtpUrlData (ProxyUrlData.ProxyUrlData):
# ready to connect
try:
self.urlConnection = ftplib.FTP()
self.urlConnection.set_debuglevel(get_debuglevel())
if self.config.get("debug"):
self.urlConnection.set_debuglevel(1)
self.urlConnection.connect(self.urlparts[1])
self.urlConnection.login(_user, _password)
except EOFError:

View file

@ -24,7 +24,7 @@ class HttpsUrlData (HttpUrlData.HttpUrlData):
"""Url link with https scheme"""
def _check (self):
if linkcheck.checker.HttpUrlData.supportHttps:
if HttpUrlData.supportHttps:
super(HttpsUrlData, self)._check()
else:
self.setWarning(bk.i18n._("%s url ignored")%self.scheme.capitalize())

View file

@ -77,7 +77,7 @@ class MailtoUrlData (HostCheckingUrlData.HostCheckingUrlData):
bk.log.debug(linkcheck.LOG_CHECK, "splitting address")
user,host = self._split_adress(mail)
bk.log.debug(linkcheck.LOG_CHECK, "looking up MX mailhost")
mxrecords = bk.net.dns.lazy.mxlookup(host, config.dnsconfig)
mxrecords = bk.net.dns.lazy.mxlookup(host, self.config.dnsconfig)
bk.log.debug(linkcheck.LOG_CHECK, "found mailhosts", mxrecords)
if not len(mxrecords):
self.setWarning(bk.i18n._("No MX mail host for %s found")%host)

View file

@ -21,6 +21,7 @@ import urlparse
import urllib
import linkcheck
import HostCheckingUrlData
import bk.i18n
class TelnetUrlData (HostCheckingUrlData.HostCheckingUrlData):
@ -47,7 +48,8 @@ class TelnetUrlData (HostCheckingUrlData.HostCheckingUrlData):
def checkConnection (self):
super(TelnetUrlData, self).checkConnection()
self.urlConnection = telnetlib.Telnet()
self.urlConnection.set_debuglevel(get_debuglevel())
if self.config.get("debug"):
self.urlConnection.set_debuglevel(1)
self.urlConnection.open(self.host, self.port)
if self.user:
self.urlConnection.read_until("login: ", 10)

View file

@ -17,6 +17,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import time
import sys
import socket
import select
import re
@ -94,6 +95,15 @@ ignored_schemes_re = re.compile(ignored_schemes, re.VERBOSE)
def printStatus (config, curtime, start_time):
tocheck = len(config.urls)
links = config['linknumber']
active = config.threader.active_threads()
duration = bk.strtime.strduration(curtime - start_time)
print >>sys.stderr, bk.i18n._("%5d urls queued, %4d links checked, %2d active threads, runtime %s")%\
(tocheck, links, active, duration)
# main check function
def checkUrls (config):
""" checkUrls gets a complete configuration object as parameter where all
@ -156,7 +166,7 @@ def set_intern_url (url, klass, config):
recursion level is zero (ie url given on the command line)"""
if klass == linkcheck.checker.FileUrlData.FileUrlData:
bk.log.debug(linkcheck.LOG_CHECK, "Add intern pattern ^file:")
config['internlinks'].append(getLinkPat("^file:"))
config['internlinks'].append(linkcheck.getLinkPat("^file:"))
elif klass in [linkcheck.checker.HttpUrlData.HttpUrlData,
linkcheck.checker.HttpsUrlData.HttpsUrlData,
linkcheck.checker.FtpUrlData.FtpUrlData]:
@ -165,7 +175,7 @@ def set_intern_url (url, klass, config):
domain = "://%s"%re.escape(domain)
bk.log.debug(linkcheck.LOG_CHECK, "Add intern domain", domain)
# add scheme colon to link pattern
config['internlinks'].append(getLinkPat(domain))
config['internlinks'].append(linkcheck.getLinkPat(domain))
def get_absolute_url (urlName, baseRef, parentName):

View file

@ -23,6 +23,10 @@ import time
import urlparse
import types
import linkcheck
import linkcheck.checker
import bk.url
import bk.i18n
import bk.strtime
_logfile = None
_supported_langs = ('de', 'fr', 'nl', 'C')
@ -68,7 +72,7 @@ def checklink (out=sys.stdout, form={}, env=os.environ):
pat = bk.url.safe_url_pattern
config["internlinks"].append(linkcheck.getLinkPat("^%s$" % pat))
# avoid checking of local files or other nasty stuff
config["externlinks"].append(linkcheck.getLinkPat("^%s$" % safe_url_pattern))
config["externlinks"].append(linkcheck.getLinkPat("^%s$" % bk.url.safe_url_pattern))
config["externlinks"].append(linkcheck.getLinkPat(".*", strict=True))
# start checking
config.appendUrl(linkcheck.checker.getUrlDataFrom(form["url"].value, 0, config))

View file

@ -29,6 +29,11 @@ def applyTable (table, s):
return s
SQLTable = [
("'","''")
]
def sqlify (s):
"Escape special SQL chars and strings"
if not s:

View file

@ -20,6 +20,7 @@ import zlib
import gzip
import cStringIO as StringIO
import linkcheck
import linkcheck.httplib2
__all__ = ["RobotFileParser"]
@ -297,7 +298,7 @@ def decode (page):
else:
fp = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
except zlib.error, msg:
# assuming non-compressed content
warn("uncompressing had error %s, assuming non-compressed content"%str(msg))
fp = StringIO.StringIO(content)
# remove content-encoding header
headers = {}