exception and log handling

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@103 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-06-10 18:06:43 +00:00
parent 91b8c8a897
commit f82ecca558
17 changed files with 85 additions and 60 deletions

View file

@ -44,10 +44,9 @@ clean:
rm -rf $(ALLPACKAGES) $(PACKAGE)-out.*
dist:
python setup.py sdist
python setup.py bdist_rpm
python setup.py sdist bdist_rpm
fakeroot debian/rules binary
files:
./$(PACKAGE) $(LCOPTS) $(PROXY) -i$(HOST) http://$(HOST)/~calvin/

16
README
View file

@ -61,6 +61,7 @@ For german output execute "export LC_MESSAGES=de" in bash and
"setenv LC_MESSAGES de" in tcsh.
Under Windows, execute "set LC_MESSAGES=de".
Code design
-----------
Only if you want to hack on the code.
@ -71,8 +72,10 @@ commandline options and stores them in a Config object.
(2) Which leads us directly to the Config class. This class stores all
options and works a little magic: it tries to find out if your platform
supports threads. If so, they are enabled. If not, they are disabled.
Note: several functions are replaced with their non-threaded
Several functions are replaced with their non-threaded
equivalents if threading is disabled.
Another thing are config files. A config object reads config file options
on initialization so they get handled before any commandline options.
(3) The linkchecker script finally calls linkcheck.checkUrls(), which
calls linkcheck.Config.checkUrl(), which calls
@ -90,3 +93,14 @@ the subclasses define functions needed for their URL type.
Each logger has functions init(), newUrl() and endOfOutput().
You call init() once to initialize the Logger, newUrl() for each new URL
we checked and endOfOutput() when all URLs are checked. Easy.
New loggers are created with the Config.newLogger(name, fileoutput) function.
Nifty features you did not expect
---------------------------------
o Included brain enhancer. Just read Python code to gain intelligence.
o Wash-O-matic. LinkChecker has a secret option which washes all your
dirty clothes in a matter of seconds.
o Y2K-Compatible(tm) guarantee. The fact that you can read this text
in the Millenium age is proof enough!
o Self destruction option (also called kamikaze option). Banzaaaiiii!
o There is no spoon. Wake up already!

2
TODO
View file

@ -14,5 +14,5 @@ o other translations
o Parse GML output and make a site map graphic (PNG format)
Use an existing layout algorithm.
Low priority

2
debian/changelog vendored
View file

@ -17,6 +17,8 @@ linkchecker (1.2.3) unstable; urgency=low
* fix for incorrect line number in logger output (reported by Michael
Schmitz)
* Debian package is now lintian clean
* Only catch some exceptions in main check loop so the KeyboardInterrupt
exception propagates through
-- Bastian Kleineidam <calvin@users.sourceforge.net> Sat, 27 May 2000 00:01:34 +0200

4
lc.cgi
View file

@ -4,7 +4,7 @@ import re,cgi,sys,urlparse,time,os
sys.stderr = sys.stdout
# begin user configuration
dist_dir = "/home/calvin/linkchecker"
dist_dir = "/home/calvin/projects/linkchecker"
cgi.logfile = "linkchecker.log" # must be an existing file
# end user configuration
@ -29,7 +29,7 @@ if not linkcheck.lc_cgi.checkform(form):
sys.exit(0)
config = linkcheck.Config.Configuration()
config["recursionlevel"] = int(form["level"].value)
config["log"] = linkcheck.Logging.HtmlLogger()
config.newLogger('html')
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1
if form.has_key("intern"):

View file

@ -20,7 +20,7 @@ try:
continue
config = linkcheck.Config.Configuration()
config["recursionlevel"] = int(form["level"].value)
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
config["log"] = config.newLogger('html', {'fd':req.out})
config.disableThreading()
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1

View file

@ -17,7 +17,7 @@ def func(fcg, req):
thread.exit()
config = linkcheck.Config.Configuration()
config["recursionlevel"] = int(form["level"].value)
config["log"] = linkcheck.Logging.HtmlLogger(req.out)
config["log"] = config.newLogger('html', {'fd':req.out})
config.disableThreading()
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1

View file

@ -27,6 +27,14 @@ from types import StringType
import Logging
from linkcheck import _
def dictjoin(d1, d2):
d = {}
for key in d1.keys():
d[key] = d1[key]
for key in d2.keys():
d[key] = d2[key]
return d
Version = "1.2.3"
AppName = "LinkChecker"
App = AppName+" "+Version
@ -107,7 +115,7 @@ class Configuration(UserDict.UserDict):
}
ESC="\x1b"
self.data['colored'] = {
"filename": "linkchecker-out.ansi",
"filename": "linkchecker-out.ansi",
'colorparent': ESC+"[37m", # white
'colorurl': ESC+"[0m", # standard
'colorreal': ESC+"[36m", # cyan
@ -134,7 +142,7 @@ class Configuration(UserDict.UserDict):
self.data['blacklist'] = {
"filename": "~/.blacklist",
}
self.newLogger('text')
self.data['log'] = self.newLogger('text')
self.data["quiet"] = 0
self.data["warningregex"] = None
self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None)
@ -237,13 +245,8 @@ class Configuration(UserDict.UserDict):
def robotsTxtCache_set_NoThreads(self, key, val):
self.robotsTxtCache[key] = val
def newLogger(self, name, fileout=0):
if fileout:
self.data['fileoutput'].append(apply(Loggers[name], (fileout,),
self.data[name]))
else:
self.data['log'] = apply(Loggers[name], (fileout,),
self.data[name])
def newLogger(self, name, dict={}):
return apply(Loggers[name], (), dictjoin(self.data[name],dict))
def log_newUrl_NoThreads(self, url):
if not self.data["quiet"]: self.data["log"].newUrl(url)
@ -384,9 +387,9 @@ class Configuration(UserDict.UserDict):
try:
log = cfgparser.get(section, "log")
if Loggers.has_key(log):
self.newLogger(log)
self.data['log'] = self.newLogger(log)
else:
self.warn("invalid log option "+log)
self.warn(_("invalid log option '%s'") % log)
except ConfigParser.Error: pass
try:
if cfgparser.getboolean(section, "verbose"):
@ -402,7 +405,8 @@ class Configuration(UserDict.UserDict):
for arg in filelist:
# no file output for the blacklist Logger
if Loggers.has_key(arg) and arg != "blacklist":
self.newLogger(arg, 1)
self.data['fileoutput'].append(
self.newLogger(arg, {'fileoutput':1}))
except ConfigParser.Error: pass
for key in Loggers.keys():
if cfgparser.has_section(key):
@ -427,7 +431,7 @@ class Configuration(UserDict.UserDict):
try:
num = cfgparser.getint(section, "recursionlevel")
if num<0:
self.error("illegal recursionlevel number: "+`num`)
self.error(_("illegal recursionlevel number %d") % num)
self.data["recursionlevel"] = num
except ConfigParser.Error: pass
try:

View file

@ -15,7 +15,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import ftplib
import ftplib,linkcheck
from UrlData import UrlData
from linkcheck import _
@ -31,7 +31,7 @@ class FtpUrlData(UrlData):
info = self.urlConnection.getwelcome()
if not info:
self.closeConnection()
raise Exception, _("Got no answer from FTP server")
raise linkcheck.error, _("Got no answer from FTP server")
self.setInfo(info)
def closeConnection(self):

View file

@ -30,8 +30,7 @@ endOfOutput(self)
Called at the end of checking to close filehandles and such.
Passing parameters to the constructor:
__init__(self, fileoutput=None, **args)
The fileoutput flag specifies if output goes to a file.
__init__(self, **args)
The args dictionary is filled in Config.py. There you can specify
default parameters. Adjust these parameters in the configuration
files in the appropriate logger section.
@ -74,12 +73,14 @@ class StandardLogger:
Unknown keywords will be ignored.
"""
def __init__(self, fileout=None, **args):
def __init__(self, **args):
self.errors=0
self.warnings=0
if fileout:
if args.has_key('fileoutput'):
self.fd = open(args['filename'], "w")
else:
elif args.has_key('fd'):
self.fd = args['fd']
else:
self.fd = sys.stdout
@ -157,8 +158,8 @@ class StandardLogger:
class HtmlLogger(StandardLogger):
"""Logger with HTML output"""
def __init__(self, fileout=None, **args):
apply(StandardLogger.__init__, (self, fileout), args)
def __init__(self, **args):
StandardLogger.__init__(self, args)
self.colorbackground = args['colorbackground']
self.colorurl = args['colorurl']
self.colorborder = args['colorborder']
@ -266,8 +267,8 @@ class HtmlLogger(StandardLogger):
class ColoredLogger(StandardLogger):
"""ANSI colorized output"""
def __init__(self, fileout=None, **args):
apply(StandardLogger.__init__, (self, fileout), args)
def __init__(self, **args):
StandardLogger.__init__(self, args)
self.colorparent = args['colorparent']
self.colorurl = args['colorurl']
self.colorreal = args['colorreal']
@ -373,8 +374,8 @@ class GMLLogger(StandardLogger):
"""GML means Graph Modeling Language. Use a GML tool to see
your sitemap graph.
"""
def __init__(self, fileout=None, **args):
apply(StandardLogger.__init__, (self, fileout), args)
def __init__(self, **args):
StandardLogger.__init__(self, args)
self.nodes = []
def init(self):
@ -431,8 +432,8 @@ class GMLLogger(StandardLogger):
class SQLLogger(StandardLogger):
""" SQL output for PostgreSQL, not tested"""
def __init__(self, fileout=None, **args):
apply(StandardLogger.__init__, (self, fileout), args)
def __init__(self, **args):
StandardLogger.__init__(self, args)
self.dbname = args['dbname']
self.separator = args['separator']
@ -481,7 +482,8 @@ class BlacklistLogger:
is working (again), it is removed from the list. So after n days
we have only links on the list which failed for n days.
"""
def __init__(self, fileout=None, **args):
def __init__(self, **args):
self.errors = 0
self.blacklist = {}
self.filename = args['filename']
@ -492,6 +494,7 @@ class BlacklistLogger:
if urlData.valid:
self.blacklist[urlData.getCacheKey()] = None
elif not urlData.cached:
self.errors = 1
self.blacklist[urlData.getCacheKey()] = urlData
def endOfOutput(self):
@ -506,8 +509,8 @@ class CSVLogger(StandardLogger):
""" CSV output. CSV consists of one line per entry. Entries are
separated by a semicolon.
"""
def __init__(self, fileout=None, **args):
apply(StandardLogger.__init__, (self, fileout), args)
def __init__(self, **args):
StandardLogger.__init__(self, args)
self.separator = args['separator']
def init(self):

View file

@ -15,11 +15,10 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import re,string,DNS,sys,Config,cgi,urllib
import re,string,DNS,sys,Config,cgi,urllib,linkcheck
from rfc822 import AddressList
from HostCheckingUrlData import HostCheckingUrlData
from smtplib import SMTP
from UrlData import LinkCheckerException
from linkcheck import _
@ -104,7 +103,7 @@ class MailtoUrlData(HostCheckingUrlData):
return tuple(split)
if len(split)==1:
return (split[0], "localhost")
raise LinkCheckerException, _("could not split the mail adress")
raise linkcheck.error, _("could not split the mail adress")
def closeConnection(self):

View file

@ -15,9 +15,8 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import re,string,time,nntplib
import re,string,time,nntplib,linkcheck
from HostCheckingUrlData import HostCheckingUrlData
from UrlData import LinkCheckerException
from linkcheck import _
nntp_re = re.compile("^news:[\w.\-]+$")
@ -28,7 +27,7 @@ class NntpUrlData(HostCheckingUrlData):
def buildUrl(self):
HostCheckingUrlData.buildUrl(self)
if not nntp_re.match(self.urlName):
raise LinkCheckerException, _("Illegal NNTP link syntax")
raise linkcheck.error, _("Illegal NNTP link syntax")
self.host = string.lower(self.urlName[5:])

View file

@ -15,9 +15,8 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import telnetlib,re,string
import telnetlib,re,string,linkcheck
from HostCheckingUrlData import HostCheckingUrlData
from UrlData import LinkCheckerException
from linkcheck import _
# regular expression for syntax checking
@ -29,7 +28,7 @@ class TelnetUrlData(HostCheckingUrlData):
def buildUrl(self):
HostCheckingUrlData.buildUrl(self)
if not telnet_re.match(self.urlName):
raise LinkCheckerException, _("Illegal telnet link syntax")
raise linkcheck.error, _("Illegal telnet link syntax")
self.host = string.lower(self.urlName[7:])

View file

@ -16,9 +16,16 @@
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import sys,re,string,urlparse,urllib,time
import Config,StringUtil
import Config,StringUtil,linkcheck
from linkcheck import _
ExcList = [IOError, linkcheck.error]
try:
import socket
ExcList.append(socket.error)
except ImportError:
pass
LinkTags = [("a", "href"),
("img", "src"),
("form", "action"),
@ -30,9 +37,6 @@ LinkTags = [("a", "href"),
("area", "href")]
class LinkCheckerException(Exception):
pass
class UrlData:
"Representing a URL with additional information like validity etc"
@ -128,7 +132,7 @@ class UrlData:
try:
self.buildUrl()
self.extern = self._getExtern(config)
except LinkCheckerException:
except linkcheck.error:
type, value = sys.exc_info()[:2]
self.setError(str(value))
self.logMe(config)
@ -155,8 +159,7 @@ class UrlData:
self.checkConnection(config)
if self.urlTuple and config["anchors"]:
self.checkAnchors(self.urlTuple[5])
# XXX should only catch some exceptions, not all!
except:
except tuple(ExcList):
type, value = sys.exc_info()[:2]
self.setError(str(value))

View file

@ -20,6 +20,8 @@ Here we find the main function to call: checkUrls.
This is the only entry point into the linkcheck module and is used
of course by the linkchecker script.
"""
class error(Exception):
pass
# i18n suppport
try:

View file

@ -196,14 +196,15 @@ for opt,arg in options:
elif opt=="-o" or opt=="--output":
if linkcheck.Config.Loggers.has_key(arg):
config.newLogger(arg)
config['log'] = config.newLogger(arg)
else:
printUsage((_("Illegal argument '%s' for option ") % arg) +\
"'-o, --output'")
elif opt=="-F" or opt=="--file-output":
if linkcheck.Config.Loggers.has_key(arg) and arg != "blacklist":
config.newLogger(arg, 1)
config['fileoutput'].append(
config.newLogger(arg, {'fileoutput':1}))
else:
printUsage((_("Illegal argument '%s' for option ") % arg) +\
"'-F, --file-output'")

View file

@ -113,10 +113,10 @@ o internationalization support
distclass = LCDistribution,
packages = ['','DNS','linkcheck'],
scripts = ['linkchecker'],
data_files = [('locale/de/LC_MESSAGES',
data_files = [('share/locale/de/LC_MESSAGES',
['locale/de/LC_MESSAGES/linkcheck.mo',
'locale/de/LC_MESSAGES/linkcheck.po']),
('locale/fr/LC_MESSAGES',
('share/locale/fr/LC_MESSAGES',
['locale/fr/LC_MESSAGES/linkcheck.mo',
'locale/fr/LC_MESSAGES/linkcheck.po']),
],