debugging improvements

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@349 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2002-01-04 18:27:14 +00:00
parent 0d6c16a83a
commit 4824c59f05
6 changed files with 30 additions and 13 deletions

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
linkchecker (1.3.14) unstable; urgency=low
* better debugging output
-- Bastian Kleineidam <calvin@debian.org> Fri, 4 Jan 2002 19:13:50 +0100
linkchecker (1.3.13) unstable; urgency=low
* remove deleted source files from po/Makefile

View file

@ -449,8 +449,7 @@ class Configuration(UserDict.UserDict):
self["recursionlevel"] = num
except ConfigParser.Error: pass
try:
self["robotstxt"] = cfgparser.getboolean(section,
"robotstxt")
self["robotstxt"] = cfgparser.getboolean(section, "robotstxt")
except ConfigParser.Error: pass
try: self["strict"] = cfgparser.getboolean(section, "strict")
except ConfigParser.Error: pass

View file

@ -17,6 +17,7 @@
import httplib, urlparse, sys, time, re
import Config, StringUtil, robotparser
robotparser.debug = 1
from UrlData import UrlData
from urllib import splittype, splithost, splituser, splitpasswd
from linkcheck import _
@ -219,12 +220,14 @@ class HttpUrlData(UrlData):
self.urlConnection.endheaders()
return self.urlConnection.getreply()
def _getHTTPObject(self, host):
h = httplib.HTTP()
h.set_debuglevel(Config.DebugLevel)
h.connect(host)
return h
def getContent(self):
if not self.has_content:
self.has_content = 1
@ -238,7 +241,7 @@ class HttpUrlData(UrlData):
Config.debug(HURT_ME_PLENTY, "comment spans", self.html_comments)
return self.data
def isHtml(self):
if not (self.valid and self.mime):
return 0
@ -246,9 +249,12 @@ class HttpUrlData(UrlData):
def robotsTxtAllowsUrl(self, config):
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
roboturl = "%s://%s/robots.txt" % self.urlTuple[0:2]
Config.debug(HURT_ME_PLENTY, "robots.txt url", roboturl)
Config.debug(HURT_ME_PLENTY, "url", self.url)
if not config.robotsTxtCache_has_key(roboturl):
rp = robotparser.RobotFileParser(roboturl)
rp = robotparser.RobotFileParser()
rp.set_url(roboturl)
rp.read()
config.robotsTxtCache_set(roboturl, rp)
rp = config.robotsTxtCache_get(roboturl)

View file

@ -21,7 +21,8 @@ import sys
if sys.version[:5] < "2.0":
raise SystemExit, "This program requires Python 2.0 or later."
import getopt, re, os, urlparse, linkcheck
import getopt, re, os, urlparse, pprint, linkcheck
from linkcheck.debuglevels import *
from linkcheck import _,StringUtil
if os.name!='nt':
from linkcheck import timeoutsocket
@ -189,6 +190,11 @@ except getopt.error:
type, value = sys.exc_info()[:2]
printUsage(value)
# set debug level as early as possible
for opt,arg in options:
if opt=="-D" or opt=="--debug":
linkcheck.Config.DebugLevel += 1
linkcheck.Config.debug(BRING_IT_ON, "Python", sys.version, "on", sys.platform)
# apply configuration
config = linkcheck.Config.Configuration()
configfiles = []
@ -196,7 +202,9 @@ for opt,arg in options:
if opt=="-f" or opt=="--config":
configfiles.append(arg)
config.read(configfiles)
# disable threading for debugging
if linkcheck.Config.DebugLevel > 0:
config.disableThreading()
# apply options and arguments
_user = "anonymous"
_password = "guest@"
@ -205,10 +213,6 @@ for opt,arg in options:
if opt=="-a" or opt=="--anchors":
config["anchors"] = 1
elif opt=="-D" or opt=="--debug":
linkcheck.Config.DebugLevel += 1
config.disableThreading()
elif opt=="-e" or opt=="--extern":
config["externlinks"].append((re.compile(arg), 0))
@ -320,6 +324,8 @@ if config["log"].__class__ == linkcheck.Logging.BlacklistLogger and \
os.path.exists(config['log'].filename):
args = open(config['log'].filename).readlines()
linkcheck.Config.debug(HURT_ME_PLENTY, pprint.pformat(config.data))
if len(args)==0:
if config['interactive']:
urls = raw_input(_("enter one or more urls, separated by white-space\n--> "))

View file

@ -99,7 +99,7 @@ threads=5
anchors=0
recursionlevel=1
# obey robots.txt exclusion?
robotstxt=0
robotstxt=1
# overall strict checking. You can specify for each extern URL
# separately if its strict or not. See the [filtering] section
strict=0

View file

@ -129,7 +129,7 @@ myname = "Bastian Kleineidam"
myemail = "calvin@users.sourceforge.net"
setup (name = "linkchecker",
version = "1.3.13",
version = "1.3.14",
description = "check HTML documents for broken links",
author = myname,
author_email = myemail,