mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-19 05:41:00 +00:00
exception and log handling
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@103 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
91b8c8a897
commit
f82ecca558
17 changed files with 85 additions and 60 deletions
5
Makefile
5
Makefile
|
|
@ -44,10 +44,9 @@ clean:
|
|||
rm -rf $(ALLPACKAGES) $(PACKAGE)-out.*
|
||||
|
||||
dist:
|
||||
python setup.py sdist
|
||||
python setup.py bdist_rpm
|
||||
python setup.py sdist bdist_rpm
|
||||
fakeroot debian/rules binary
|
||||
|
||||
|
||||
files:
|
||||
./$(PACKAGE) $(LCOPTS) $(PROXY) -i$(HOST) http://$(HOST)/~calvin/
|
||||
|
||||
|
|
|
|||
16
README
16
README
|
|
@ -61,6 +61,7 @@ For german output execute "export LC_MESSAGES=de" in bash and
|
|||
"setenv LC_MESSAGES de" in tcsh.
|
||||
Under Windows, execute "set LC_MESSAGES=de".
|
||||
|
||||
|
||||
Code design
|
||||
-----------
|
||||
Only if you want to hack on the code.
|
||||
|
|
@ -71,8 +72,10 @@ commandline options and stores them in a Config object.
|
|||
(2) Which leads us directly to the Config class. This class stores all
|
||||
options and works a little magic: it tries to find out if your platform
|
||||
supports threads. If so, they are enabled. If not, they are disabled.
|
||||
Note: several functions are replaced with their non-threaded
|
||||
Several functions are replaced with their non-threaded
|
||||
equivalents if threading is disabled.
|
||||
Another thing are config files. A config object reads config file options
|
||||
on initialization so they get handled before any commandline options.
|
||||
|
||||
(3) The linkchecker script finally calls linkcheck.checkUrls(), which
|
||||
calls linkcheck.Config.checkUrl(), which calls
|
||||
|
|
@ -90,3 +93,14 @@ the subclasses define functions needed for their URL type.
|
|||
Each logger has functions init(), newUrl() and endOfOutput().
|
||||
You call init() once to initialize the Logger, newUrl() for each new URL
|
||||
we checked and endOfOutput() when all URLs are checked. Easy.
|
||||
New loggers are created with the Config.newLogger(name, fileoutput) function.
|
||||
|
||||
Nifty features you did not expect
|
||||
---------------------------------
|
||||
o Included brain enhancer. Just read Python code to gain intelligence.
|
||||
o Wash-O-matic. LinkChecker has a secret option which washes all your
|
||||
dirty clothes in a matter of seconds.
|
||||
o Y2K-Compatible(tm) guarantee. The fact that you can read this text
|
||||
in the Millenium age is proof enough!
|
||||
o Self destruction option (also called kamikaze option). Banzaaaiiii!
|
||||
o There is no spoon. Wake up already!
|
||||
|
|
|
|||
2
TODO
2
TODO
|
|
@ -14,5 +14,5 @@ o other translations
|
|||
|
||||
o Parse GML output and make a site map graphic (PNG format)
|
||||
Use an existing layout algorithm.
|
||||
|
||||
|
||||
Low priority
|
||||
|
|
|
|||
2
debian/changelog
vendored
2
debian/changelog
vendored
|
|
@ -17,6 +17,8 @@ linkchecker (1.2.3) unstable; urgency=low
|
|||
* fix for incorrect line number in logger output (reported by Michael
|
||||
Schmitz)
|
||||
* Debian package is now lintian clean
|
||||
* Only catch some exceptions in main check loop so the KeyboardInterrupt
|
||||
exception propagates through
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Sat, 27 May 2000 00:01:34 +0200
|
||||
|
||||
|
|
|
|||
4
lc.cgi
4
lc.cgi
|
|
@ -4,7 +4,7 @@ import re,cgi,sys,urlparse,time,os
|
|||
sys.stderr = sys.stdout
|
||||
|
||||
# begin user configuration
|
||||
dist_dir = "/home/calvin/linkchecker"
|
||||
dist_dir = "/home/calvin/projects/linkchecker"
|
||||
cgi.logfile = "linkchecker.log" # must be an existing file
|
||||
# end user configuration
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ if not linkcheck.lc_cgi.checkform(form):
|
|||
sys.exit(0)
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = linkcheck.Logging.HtmlLogger()
|
||||
config.newLogger('html')
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
if form.has_key("intern"):
|
||||
|
|
|
|||
2
lc.fcgi
2
lc.fcgi
|
|
@ -20,7 +20,7 @@ try:
|
|||
continue
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
|
||||
config["log"] = config.newLogger('html', {'fd':req.out})
|
||||
config.disableThreading()
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ def func(fcg, req):
|
|||
thread.exit()
|
||||
config = linkcheck.Config.Configuration()
|
||||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
|
||||
config["log"] = config.newLogger('html', {'fd':req.out})
|
||||
config.disableThreading()
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
|
|
|
|||
|
|
@ -27,6 +27,14 @@ from types import StringType
|
|||
import Logging
|
||||
from linkcheck import _
|
||||
|
||||
def dictjoin(d1, d2):
|
||||
d = {}
|
||||
for key in d1.keys():
|
||||
d[key] = d1[key]
|
||||
for key in d2.keys():
|
||||
d[key] = d2[key]
|
||||
return d
|
||||
|
||||
Version = "1.2.3"
|
||||
AppName = "LinkChecker"
|
||||
App = AppName+" "+Version
|
||||
|
|
@ -107,7 +115,7 @@ class Configuration(UserDict.UserDict):
|
|||
}
|
||||
ESC="\x1b"
|
||||
self.data['colored'] = {
|
||||
"filename": "linkchecker-out.ansi",
|
||||
"filename": "linkchecker-out.ansi",
|
||||
'colorparent': ESC+"[37m", # white
|
||||
'colorurl': ESC+"[0m", # standard
|
||||
'colorreal': ESC+"[36m", # cyan
|
||||
|
|
@ -134,7 +142,7 @@ class Configuration(UserDict.UserDict):
|
|||
self.data['blacklist'] = {
|
||||
"filename": "~/.blacklist",
|
||||
}
|
||||
self.newLogger('text')
|
||||
self.data['log'] = self.newLogger('text')
|
||||
self.data["quiet"] = 0
|
||||
self.data["warningregex"] = None
|
||||
self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None)
|
||||
|
|
@ -237,13 +245,8 @@ class Configuration(UserDict.UserDict):
|
|||
def robotsTxtCache_set_NoThreads(self, key, val):
|
||||
self.robotsTxtCache[key] = val
|
||||
|
||||
def newLogger(self, name, fileout=0):
|
||||
if fileout:
|
||||
self.data['fileoutput'].append(apply(Loggers[name], (fileout,),
|
||||
self.data[name]))
|
||||
else:
|
||||
self.data['log'] = apply(Loggers[name], (fileout,),
|
||||
self.data[name])
|
||||
def newLogger(self, name, dict={}):
|
||||
return apply(Loggers[name], (), dictjoin(self.data[name],dict))
|
||||
|
||||
def log_newUrl_NoThreads(self, url):
|
||||
if not self.data["quiet"]: self.data["log"].newUrl(url)
|
||||
|
|
@ -384,9 +387,9 @@ class Configuration(UserDict.UserDict):
|
|||
try:
|
||||
log = cfgparser.get(section, "log")
|
||||
if Loggers.has_key(log):
|
||||
self.newLogger(log)
|
||||
self.data['log'] = self.newLogger(log)
|
||||
else:
|
||||
self.warn("invalid log option "+log)
|
||||
self.warn(_("invalid log option '%s'") % log)
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
if cfgparser.getboolean(section, "verbose"):
|
||||
|
|
@ -402,7 +405,8 @@ class Configuration(UserDict.UserDict):
|
|||
for arg in filelist:
|
||||
# no file output for the blacklist Logger
|
||||
if Loggers.has_key(arg) and arg != "blacklist":
|
||||
self.newLogger(arg, 1)
|
||||
self.data['fileoutput'].append(
|
||||
self.newLogger(arg, {'fileoutput':1}))
|
||||
except ConfigParser.Error: pass
|
||||
for key in Loggers.keys():
|
||||
if cfgparser.has_section(key):
|
||||
|
|
@ -427,7 +431,7 @@ class Configuration(UserDict.UserDict):
|
|||
try:
|
||||
num = cfgparser.getint(section, "recursionlevel")
|
||||
if num<0:
|
||||
self.error("illegal recursionlevel number: "+`num`)
|
||||
self.error(_("illegal recursionlevel number %d") % num)
|
||||
self.data["recursionlevel"] = num
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import ftplib
|
||||
import ftplib,linkcheck
|
||||
from UrlData import UrlData
|
||||
from linkcheck import _
|
||||
|
||||
|
|
@ -31,7 +31,7 @@ class FtpUrlData(UrlData):
|
|||
info = self.urlConnection.getwelcome()
|
||||
if not info:
|
||||
self.closeConnection()
|
||||
raise Exception, _("Got no answer from FTP server")
|
||||
raise linkcheck.error, _("Got no answer from FTP server")
|
||||
self.setInfo(info)
|
||||
|
||||
def closeConnection(self):
|
||||
|
|
|
|||
|
|
@ -30,8 +30,7 @@ endOfOutput(self)
|
|||
Called at the end of checking to close filehandles and such.
|
||||
|
||||
Passing parameters to the constructor:
|
||||
__init__(self, fileoutput=None, **args)
|
||||
The fileoutput flag specifies if output goes to a file.
|
||||
__init__(self, **args)
|
||||
The args dictionary is filled in Config.py. There you can specify
|
||||
default parameters. Adjust these parameters in the configuration
|
||||
files in the appropriate logger section.
|
||||
|
|
@ -74,12 +73,14 @@ class StandardLogger:
|
|||
Unknown keywords will be ignored.
|
||||
"""
|
||||
|
||||
def __init__(self, fileout=None, **args):
|
||||
def __init__(self, **args):
|
||||
self.errors=0
|
||||
self.warnings=0
|
||||
if fileout:
|
||||
if args.has_key('fileoutput'):
|
||||
self.fd = open(args['filename'], "w")
|
||||
else:
|
||||
elif args.has_key('fd'):
|
||||
self.fd = args['fd']
|
||||
else:
|
||||
self.fd = sys.stdout
|
||||
|
||||
|
||||
|
|
@ -157,8 +158,8 @@ class StandardLogger:
|
|||
class HtmlLogger(StandardLogger):
|
||||
"""Logger with HTML output"""
|
||||
|
||||
def __init__(self, fileout=None, **args):
|
||||
apply(StandardLogger.__init__, (self, fileout), args)
|
||||
def __init__(self, **args):
|
||||
StandardLogger.__init__(self, args)
|
||||
self.colorbackground = args['colorbackground']
|
||||
self.colorurl = args['colorurl']
|
||||
self.colorborder = args['colorborder']
|
||||
|
|
@ -266,8 +267,8 @@ class HtmlLogger(StandardLogger):
|
|||
class ColoredLogger(StandardLogger):
|
||||
"""ANSI colorized output"""
|
||||
|
||||
def __init__(self, fileout=None, **args):
|
||||
apply(StandardLogger.__init__, (self, fileout), args)
|
||||
def __init__(self, **args):
|
||||
StandardLogger.__init__(self, args)
|
||||
self.colorparent = args['colorparent']
|
||||
self.colorurl = args['colorurl']
|
||||
self.colorreal = args['colorreal']
|
||||
|
|
@ -373,8 +374,8 @@ class GMLLogger(StandardLogger):
|
|||
"""GML means Graph Modeling Language. Use a GML tool to see
|
||||
your sitemap graph.
|
||||
"""
|
||||
def __init__(self, fileout=None, **args):
|
||||
apply(StandardLogger.__init__, (self, fileout), args)
|
||||
def __init__(self, **args):
|
||||
StandardLogger.__init__(self, args)
|
||||
self.nodes = []
|
||||
|
||||
def init(self):
|
||||
|
|
@ -431,8 +432,8 @@ class GMLLogger(StandardLogger):
|
|||
|
||||
class SQLLogger(StandardLogger):
|
||||
""" SQL output for PostgreSQL, not tested"""
|
||||
def __init__(self, fileout=None, **args):
|
||||
apply(StandardLogger.__init__, (self, fileout), args)
|
||||
def __init__(self, **args):
|
||||
StandardLogger.__init__(self, args)
|
||||
self.dbname = args['dbname']
|
||||
self.separator = args['separator']
|
||||
|
||||
|
|
@ -481,7 +482,8 @@ class BlacklistLogger:
|
|||
is working (again), it is removed from the list. So after n days
|
||||
we have only links on the list which failed for n days.
|
||||
"""
|
||||
def __init__(self, fileout=None, **args):
|
||||
def __init__(self, **args):
|
||||
self.errors = 0
|
||||
self.blacklist = {}
|
||||
self.filename = args['filename']
|
||||
|
||||
|
|
@ -492,6 +494,7 @@ class BlacklistLogger:
|
|||
if urlData.valid:
|
||||
self.blacklist[urlData.getCacheKey()] = None
|
||||
elif not urlData.cached:
|
||||
self.errors = 1
|
||||
self.blacklist[urlData.getCacheKey()] = urlData
|
||||
|
||||
def endOfOutput(self):
|
||||
|
|
@ -506,8 +509,8 @@ class CSVLogger(StandardLogger):
|
|||
""" CSV output. CSV consists of one line per entry. Entries are
|
||||
separated by a semicolon.
|
||||
"""
|
||||
def __init__(self, fileout=None, **args):
|
||||
apply(StandardLogger.__init__, (self, fileout), args)
|
||||
def __init__(self, **args):
|
||||
StandardLogger.__init__(self, args)
|
||||
self.separator = args['separator']
|
||||
|
||||
def init(self):
|
||||
|
|
|
|||
|
|
@ -15,11 +15,10 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import re,string,DNS,sys,Config,cgi,urllib
|
||||
import re,string,DNS,sys,Config,cgi,urllib,linkcheck
|
||||
from rfc822 import AddressList
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
from smtplib import SMTP
|
||||
from UrlData import LinkCheckerException
|
||||
from linkcheck import _
|
||||
|
||||
|
||||
|
|
@ -104,7 +103,7 @@ class MailtoUrlData(HostCheckingUrlData):
|
|||
return tuple(split)
|
||||
if len(split)==1:
|
||||
return (split[0], "localhost")
|
||||
raise LinkCheckerException, _("could not split the mail adress")
|
||||
raise linkcheck.error, _("could not split the mail adress")
|
||||
|
||||
|
||||
def closeConnection(self):
|
||||
|
|
|
|||
|
|
@ -15,9 +15,8 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import re,string,time,nntplib
|
||||
import re,string,time,nntplib,linkcheck
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
from UrlData import LinkCheckerException
|
||||
from linkcheck import _
|
||||
|
||||
nntp_re = re.compile("^news:[\w.\-]+$")
|
||||
|
|
@ -28,7 +27,7 @@ class NntpUrlData(HostCheckingUrlData):
|
|||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
if not nntp_re.match(self.urlName):
|
||||
raise LinkCheckerException, _("Illegal NNTP link syntax")
|
||||
raise linkcheck.error, _("Illegal NNTP link syntax")
|
||||
self.host = string.lower(self.urlName[5:])
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,8 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import telnetlib,re,string
|
||||
import telnetlib,re,string,linkcheck
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
from UrlData import LinkCheckerException
|
||||
from linkcheck import _
|
||||
|
||||
# regular expression for syntax checking
|
||||
|
|
@ -29,7 +28,7 @@ class TelnetUrlData(HostCheckingUrlData):
|
|||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
if not telnet_re.match(self.urlName):
|
||||
raise LinkCheckerException, _("Illegal telnet link syntax")
|
||||
raise linkcheck.error, _("Illegal telnet link syntax")
|
||||
self.host = string.lower(self.urlName[7:])
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -16,9 +16,16 @@
|
|||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import sys,re,string,urlparse,urllib,time
|
||||
import Config,StringUtil
|
||||
import Config,StringUtil,linkcheck
|
||||
from linkcheck import _
|
||||
|
||||
ExcList = [IOError, linkcheck.error]
|
||||
try:
|
||||
import socket
|
||||
ExcList.append(socket.error)
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
LinkTags = [("a", "href"),
|
||||
("img", "src"),
|
||||
("form", "action"),
|
||||
|
|
@ -30,9 +37,6 @@ LinkTags = [("a", "href"),
|
|||
("area", "href")]
|
||||
|
||||
|
||||
class LinkCheckerException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UrlData:
|
||||
"Representing a URL with additional information like validity etc"
|
||||
|
|
@ -128,7 +132,7 @@ class UrlData:
|
|||
try:
|
||||
self.buildUrl()
|
||||
self.extern = self._getExtern(config)
|
||||
except LinkCheckerException:
|
||||
except linkcheck.error:
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.setError(str(value))
|
||||
self.logMe(config)
|
||||
|
|
@ -155,8 +159,7 @@ class UrlData:
|
|||
self.checkConnection(config)
|
||||
if self.urlTuple and config["anchors"]:
|
||||
self.checkAnchors(self.urlTuple[5])
|
||||
# XXX should only catch some exceptions, not all!
|
||||
except:
|
||||
except tuple(ExcList):
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.setError(str(value))
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,8 @@ Here we find the main function to call: checkUrls.
|
|||
This is the only entry point into the linkcheck module and is used
|
||||
of course by the linkchecker script.
|
||||
"""
|
||||
class error(Exception):
|
||||
pass
|
||||
|
||||
# i18n suppport
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -196,14 +196,15 @@ for opt,arg in options:
|
|||
|
||||
elif opt=="-o" or opt=="--output":
|
||||
if linkcheck.Config.Loggers.has_key(arg):
|
||||
config.newLogger(arg)
|
||||
config['log'] = config.newLogger(arg)
|
||||
else:
|
||||
printUsage((_("Illegal argument '%s' for option ") % arg) +\
|
||||
"'-o, --output'")
|
||||
|
||||
elif opt=="-F" or opt=="--file-output":
|
||||
if linkcheck.Config.Loggers.has_key(arg) and arg != "blacklist":
|
||||
config.newLogger(arg, 1)
|
||||
config['fileoutput'].append(
|
||||
config.newLogger(arg, {'fileoutput':1}))
|
||||
else:
|
||||
printUsage((_("Illegal argument '%s' for option ") % arg) +\
|
||||
"'-F, --file-output'")
|
||||
|
|
|
|||
4
setup.py
4
setup.py
|
|
@ -113,10 +113,10 @@ o internationalization support
|
|||
distclass = LCDistribution,
|
||||
packages = ['','DNS','linkcheck'],
|
||||
scripts = ['linkchecker'],
|
||||
data_files = [('locale/de/LC_MESSAGES',
|
||||
data_files = [('share/locale/de/LC_MESSAGES',
|
||||
['locale/de/LC_MESSAGES/linkcheck.mo',
|
||||
'locale/de/LC_MESSAGES/linkcheck.po']),
|
||||
('locale/fr/LC_MESSAGES',
|
||||
('share/locale/fr/LC_MESSAGES',
|
||||
['locale/fr/LC_MESSAGES/linkcheck.mo',
|
||||
'locale/fr/LC_MESSAGES/linkcheck.po']),
|
||||
],
|
||||
|
|
|
|||
Loading…
Reference in a new issue