mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-01 03:24:43 +00:00
debugging improvements
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@349 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
0d6c16a83a
commit
4824c59f05
6 changed files with 30 additions and 13 deletions
6
debian/changelog
vendored
6
debian/changelog
vendored
|
|
@ -1,3 +1,9 @@
|
|||
linkchecker (1.3.14) unstable; urgency=low
|
||||
|
||||
* better debugging output
|
||||
|
||||
-- Bastian Kleineidam <calvin@debian.org> Fri, 4 Jan 2002 19:13:50 +0100
|
||||
|
||||
linkchecker (1.3.13) unstable; urgency=low
|
||||
|
||||
* remove deleted source files from po/Makefile
|
||||
|
|
|
|||
|
|
@ -449,8 +449,7 @@ class Configuration(UserDict.UserDict):
|
|||
self["recursionlevel"] = num
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
self["robotstxt"] = cfgparser.getboolean(section,
|
||||
"robotstxt")
|
||||
self["robotstxt"] = cfgparser.getboolean(section, "robotstxt")
|
||||
except ConfigParser.Error: pass
|
||||
try: self["strict"] = cfgparser.getboolean(section, "strict")
|
||||
except ConfigParser.Error: pass
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
import httplib, urlparse, sys, time, re
|
||||
import Config, StringUtil, robotparser
|
||||
robotparser.debug = 1
|
||||
from UrlData import UrlData
|
||||
from urllib import splittype, splithost, splituser, splitpasswd
|
||||
from linkcheck import _
|
||||
|
|
@ -219,12 +220,14 @@ class HttpUrlData(UrlData):
|
|||
self.urlConnection.endheaders()
|
||||
return self.urlConnection.getreply()
|
||||
|
||||
|
||||
def _getHTTPObject(self, host):
|
||||
h = httplib.HTTP()
|
||||
h.set_debuglevel(Config.DebugLevel)
|
||||
h.connect(host)
|
||||
return h
|
||||
|
||||
|
||||
def getContent(self):
|
||||
if not self.has_content:
|
||||
self.has_content = 1
|
||||
|
|
@ -238,7 +241,7 @@ class HttpUrlData(UrlData):
|
|||
Config.debug(HURT_ME_PLENTY, "comment spans", self.html_comments)
|
||||
return self.data
|
||||
|
||||
|
||||
|
||||
def isHtml(self):
|
||||
if not (self.valid and self.mime):
|
||||
return 0
|
||||
|
|
@ -246,9 +249,12 @@ class HttpUrlData(UrlData):
|
|||
|
||||
|
||||
def robotsTxtAllowsUrl(self, config):
|
||||
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
|
||||
roboturl = "%s://%s/robots.txt" % self.urlTuple[0:2]
|
||||
Config.debug(HURT_ME_PLENTY, "robots.txt url", roboturl)
|
||||
Config.debug(HURT_ME_PLENTY, "url", self.url)
|
||||
if not config.robotsTxtCache_has_key(roboturl):
|
||||
rp = robotparser.RobotFileParser(roboturl)
|
||||
rp = robotparser.RobotFileParser()
|
||||
rp.set_url(roboturl)
|
||||
rp.read()
|
||||
config.robotsTxtCache_set(roboturl, rp)
|
||||
rp = config.robotsTxtCache_get(roboturl)
|
||||
|
|
|
|||
18
linkchecker
18
linkchecker
|
|
@ -21,7 +21,8 @@ import sys
|
|||
if sys.version[:5] < "2.0":
|
||||
raise SystemExit, "This program requires Python 2.0 or later."
|
||||
|
||||
import getopt, re, os, urlparse, linkcheck
|
||||
import getopt, re, os, urlparse, pprint, linkcheck
|
||||
from linkcheck.debuglevels import *
|
||||
from linkcheck import _,StringUtil
|
||||
if os.name!='nt':
|
||||
from linkcheck import timeoutsocket
|
||||
|
|
@ -189,6 +190,11 @@ except getopt.error:
|
|||
type, value = sys.exc_info()[:2]
|
||||
printUsage(value)
|
||||
|
||||
# set debug level as early as possible
|
||||
for opt,arg in options:
|
||||
if opt=="-D" or opt=="--debug":
|
||||
linkcheck.Config.DebugLevel += 1
|
||||
linkcheck.Config.debug(BRING_IT_ON, "Python", sys.version, "on", sys.platform)
|
||||
# apply configuration
|
||||
config = linkcheck.Config.Configuration()
|
||||
configfiles = []
|
||||
|
|
@ -196,7 +202,9 @@ for opt,arg in options:
|
|||
if opt=="-f" or opt=="--config":
|
||||
configfiles.append(arg)
|
||||
config.read(configfiles)
|
||||
|
||||
# disable threading for debugging
|
||||
if linkcheck.Config.DebugLevel > 0:
|
||||
config.disableThreading()
|
||||
# apply options and arguments
|
||||
_user = "anonymous"
|
||||
_password = "guest@"
|
||||
|
|
@ -205,10 +213,6 @@ for opt,arg in options:
|
|||
if opt=="-a" or opt=="--anchors":
|
||||
config["anchors"] = 1
|
||||
|
||||
elif opt=="-D" or opt=="--debug":
|
||||
linkcheck.Config.DebugLevel += 1
|
||||
config.disableThreading()
|
||||
|
||||
elif opt=="-e" or opt=="--extern":
|
||||
config["externlinks"].append((re.compile(arg), 0))
|
||||
|
||||
|
|
@ -320,6 +324,8 @@ if config["log"].__class__ == linkcheck.Logging.BlacklistLogger and \
|
|||
os.path.exists(config['log'].filename):
|
||||
args = open(config['log'].filename).readlines()
|
||||
|
||||
linkcheck.Config.debug(HURT_ME_PLENTY, pprint.pformat(config.data))
|
||||
|
||||
if len(args)==0:
|
||||
if config['interactive']:
|
||||
urls = raw_input(_("enter one or more urls, separated by white-space\n--> "))
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ threads=5
|
|||
anchors=0
|
||||
recursionlevel=1
|
||||
# obey robots.txt exclusion?
|
||||
robotstxt=0
|
||||
robotstxt=1
|
||||
# overall strict checking. You can specify for each extern URL
|
||||
# separately if its strict or not. See the [filtering] section
|
||||
strict=0
|
||||
|
|
|
|||
2
setup.py
2
setup.py
|
|
@ -129,7 +129,7 @@ myname = "Bastian Kleineidam"
|
|||
myemail = "calvin@users.sourceforge.net"
|
||||
|
||||
setup (name = "linkchecker",
|
||||
version = "1.3.13",
|
||||
version = "1.3.14",
|
||||
description = "check HTML documents for broken links",
|
||||
author = myname,
|
||||
author_email = myemail,
|
||||
|
|
|
|||
Loading…
Reference in a new issue