logger field configuration

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@232 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2001-02-20 09:32:36 +00:00
parent b9454bab2a
commit 6af5fad23d
13 changed files with 465 additions and 452 deletions

View file

@ -8,7 +8,7 @@ NAME = $(shell $(PYTHON) setup.py --name)
HOST=treasure.calvinsplayground.de
#LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
DEBPACKAGE = $(PACKAGE)_$(VERSION)_all.deb $(PACKAGE)-ssl_$(VERSION)_i386.deb
PULLHOST=phoenix.net.uni-sb.de
PULLPATH=/home/calvin/temp/linkchecker
@ -28,7 +28,7 @@ distclean: clean cleandeb
rm -f $(PACKAGE)-out.* VERSION $(PACKAGE)Conf.py MANIFEST Packages.gz
cleandeb:
rm -rf debian/$(PACKAGE) debian/tmp
rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl debian/tmp
rm -f debian/*.debhelper debian/{files,substvars}
rm -f configure-stamp build-stamp
@ -36,11 +36,12 @@ dist: locale
# cleandeb because distutils choke on dangling symlinks
# (linkchecker.1 -> undocumented.1)
$(MAKE) cleandeb
fakeroot debian/rules binary
$(PYTHON) setup.py sdist --formats=gztar,zip bdist_rpm
# extra run without SSL compilation
python setup.py bdist_wininst
fakeroot dpkg-buildpackage -sgpg -pgpg
cp -f ../$(DEBPACKAGE) dist
#fakeroot dpkg-buildpackage -sgpg -pgpg
mv -f ../$(DEBPACKAGE) dist
package:
cd dist && dpkg-scanpackages . ../override.txt | gzip --best > Packages.gz

5
debian/changelog vendored
View file

@ -3,6 +3,11 @@ linkchecker (1.3.0) unstable; urgency=low
* require and use Python >= 2.0
* fix agent matching in robotparser2.py
* added more LinkPatterns (ripped from HTML::Tagset.pm)
* fix ignored configfile settings for loggers
* optional filename argument for -F (patch from
Jamie Heilman <jamie@audible.transient.net>)
* config file option to control which fields the loggers should print
out
-- Bastian Kleineidam <calvin@users.sourceforge.net> Fri, 9 Feb 2001 10:51:24 +0100

16
debian/rules vendored
View file

@ -44,22 +44,16 @@ install: build
# remove example files, we install them with dh_installexamples
rm -rf debian/$(PACKAGE)/usr/share/linkchecker
# install additional doc files
install -d -m 755 $(DOCDIR)/examples
install -c -m 644 DNS/README $(DOCDIR)/README_DNS.txt
install -d -m 755 $(DOCDIR)/tests/linkcheck
install -c -m 644 test/*.html test/*.py test/robots.txt $(DOCDIR)/tests/linkcheck
install -d -m 755 $(DOCDIR)/tests/dns
install -c -m 644 tests/*.py $(DOCDIR)/tests/dns
install -c -m 644 test/*.py $(DOCDIR)/test
install -c -m 644 test/html/*.html $(DOCDIR)/test/html
install -c -m 644 test/output/test_* $(DOCDIR)/test/output
# install system wide configuration file in etc
install -c -m 644 linkcheckerrc debian/$(PACKAGE)/etc
# Build architecture-independent files here.
binary-indep: build install
# We have nothing to do by default.
# Build architecture-dependent files here.
binary-arch: build install
dh_testdir
dh_testroot
# dh_installdebconf
@ -73,6 +67,7 @@ binary-arch: build install
dh_installman linkchecker.1
# dh_installinfo
# dh_undocumented linkchecker.1
dh_movefiles -p"linkchecker-ssl" --sourcedir debian/linkchecker
dh_installchangelogs
# dh_link
dh_strip
@ -86,5 +81,8 @@ binary-arch: build install
dh_md5sums
dh_builddeb
# Build architecture-dependent files here.
binary-arch: build install
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install configure

View file

@ -106,7 +106,6 @@ class Configuration(UserDict.UserDict):
self["robotstxt"] = 1
self["strict"] = 0
self["fileoutput"] = []
self["loggingfields"] = "all"
# Logger configurations
self["text"] = {
"filename": "linkchecker-out.txt",
@ -155,7 +154,6 @@ class Configuration(UserDict.UserDict):
"filename": "linkchecker-out.xml",
}
self['test'] = {} # no args for test logger
# default values
self['log'] = self.newLogger('text')
self["quiet"] = 0
self["warningregex"] = None
@ -424,22 +422,24 @@ class Configuration(UserDict.UserDict):
self.newLogger(arg, {'fileoutput':1}))
except ConfigParser.Error: pass
for key in Loggers.keys():
debug(key+"\n")
if cfgparser.has_section(key):
for opt in cfgparser.options(key):
try: self[key][opt] = cfgparser.get(key, opt)
except ConfigParser.Error: pass
try:
self['loggingfields'] = map(string.strip, string.split(
cfgparser.get(section, 'loggingfields'), ","))
except ConfigParser.Error: pass
except ConfigParser.Error, msg: debug(str(msg))
try:
self[key]['fields'] = map(string.strip,
string.split(cfgparser.get(key, 'fields'), ','))
debug("fields %s"%str(self[key]['fields']))
except ConfigParser.Error, msg: debug(str(msg))
section="checking"
try:
num = cfgparser.getint(section, "threads")
if num<=0:
self.disableThreads()
self.disableThreading()
else:
self.enableThreads(num)
self.enableThreading(num)
except ConfigParser.Error: pass
try: self["anchors"] = cfgparser.getboolean(section, "anchors")
except ConfigParser.Error: pass
@ -455,9 +455,10 @@ class Configuration(UserDict.UserDict):
except ConfigParser.Error: pass
try: self["strict"] = cfgparser.getboolean(section, "strict")
except ConfigParser.Error: pass
try:
self["warningregex"] = re.compile(cfgparser.get(section,
"warningregex"))
try:
warn = cfgparser.get(section, "warningregex")
if warn:
self["warningregex"] = re.compile(warn)
except ConfigParser.Error: pass
try:
self["nntpserver"] = cfgparser.get(section, "nntpserver")
@ -488,3 +489,4 @@ class Configuration(UserDict.UserDict):
except ConfigParser.Error: pass
try: self["denyallow"] = cfgparser.getboolean(section, "denyallow")
except ConfigParser.Error: pass
self['log'] = self.newLogger('text')

View file

@ -40,6 +40,7 @@ __init__(self, **args)
files in the appropriate logger section.
"""
import sys,time,string
from types import ListType
import Config, StringUtil
import linkcheck
_ = linkcheck._
@ -52,7 +53,7 @@ LogFields = {
"parenturl": "Parent URL",
"info": "Info",
"warning": "Warning",
"downloadtime": "D/L Time",
"dltime": "D/L Time",
"checktime": "Check Time",
"url": "URL",
}
@ -100,20 +101,23 @@ class StandardLogger:
else:
self.fd = sys.stdout
self.logfields = None # all fields
if args.has_key('logfields'):
if type(args['logfields']) == ListType:
self.logfields = args
if args.has_key('fields'):
if type(args['fields']) == ListType:
self.logfields = args['fields']
def logfield(self, name):
return self.logfields and name in self.logfields
if self.logfields is None:
return 1
return name in self.logfields
def init(self):
self.starttime = time.time()
self.fd.write("%s\n%s\n" % (Config.AppInfo, Config.Freeware))
self.fd.write(_("Get the newest version at %s\n") % Config.Url)
self.fd.write(_("Write comments and bugs to %s\n\n") % Config.Email)
self.fd.write(_("Start checking at %s\n") % _strtime(self.starttime))
self.fd.flush()
if self.logfield('intro'):
self.starttime = time.time()
self.fd.write("%s\n%s\n" % (Config.AppInfo, Config.Freeware))
self.fd.write(_("Get the newest version at %s\n") % Config.Url)
self.fd.write(_("Write comments and bugs to %s\n\n") % Config.Email)
self.fd.write(_("Start checking at %s\n") % _strtime(self.starttime))
self.fd.flush()
def newUrl(self, urlData):
@ -125,69 +129,70 @@ class StandardLogger:
self.fd.write("\n")
if urlData.name and self.logfield('name'):
self.fd.write(_(LogFields["name"])+Spaces["name"]+urlData.name+"\n")
if urlData.parentName and self.logfield('parentname'):
self.fd.write(_("Parent URL")+Spaces["Parent URL"]+
if urlData.parentName and self.logfield('parenturl'):
self.fd.write(_(LogFields['parenturl'])+Spaces["parenturl"]+
urlData.parentName+_(", line ")+
str(urlData.line)+"\n")
if urlData.baseRef:
self.fd.write(_("Base")+Spaces["Base"]+urlData.baseRef+"\n")
if urlData.url:
self.fd.write(_("Real URL")+Spaces["Real URL"]+urlData.url+"\n")
if urlData.downloadtime:
self.fd.write(_("D/L Time")+Spaces["D/L Time"]+
if urlData.baseRef and self.logfield('base'):
self.fd.write(_(LogFields["base"])+Spaces["base"]+urlData.baseRef+"\n")
if urlData.url and self.logfield('realurl'):
self.fd.write(_(LogFields["realurl"])+Spaces["realurl"]+urlData.url+"\n")
if urlData.downloadtime and self.logfield('dltime'):
self.fd.write(_(LogFields["dltime"])+Spaces["dltime"]+
_("%.3f seconds\n") % urlData.downloadtime)
if urlData.checktime:
self.fd.write(_("Check Time")+Spaces["Check Time"]+
if urlData.checktime and self.logfield('checktime'):
self.fd.write(_(LogFields["checktime"])+Spaces["checktime"]+
_("%.3f seconds\n") % urlData.checktime)
if urlData.infoString:
self.fd.write(_("Info")+Spaces["Info"]+
if urlData.infoString and self.logfield('info'):
self.fd.write(_(LogFields["info"])+Spaces["info"]+
StringUtil.indent(
StringUtil.blocktext(urlData.infoString, 65),
MaxIndent)+"\n")
if urlData.warningString:
if urlData.warningString and self.logfield('warning'):
self.warnings += 1
self.fd.write(_("Warning")+Spaces["Warning"]+
self.fd.write(_(LogFields["warning"])+Spaces["warning"]+
StringUtil.indent(
StringUtil.blocktext(urlData.warningString, 65),
MaxIndent)+"\n")
self.fd.write(_("Result")+Spaces["Result"])
if urlData.valid:
self.fd.write(urlData.validString+"\n")
else:
self.errors += 1
self.fd.write(urlData.errorString+"\n")
if self.logfield('result'):
self.fd.write(_(LogFields["result"])+Spaces["result"])
if urlData.valid:
self.fd.write(urlData.validString+"\n")
else:
self.errors += 1
self.fd.write(urlData.errorString+"\n")
self.fd.flush()
def endOfOutput(self, linknumber=-1):
self.fd.write(_("\nThats it. "))
if self.warnings==1:
self.fd.write(_("1 warning, "))
else:
self.fd.write(str(self.warnings)+_(" warnings, "))
if self.errors==1:
self.fd.write(_("1 error"))
else:
self.fd.write(str(self.errors)+_(" errors"))
if linknumber >= 0:
if linknumber == 1:
self.fd.write(_(" in 1 link"))
if self.logfield('outro'):
self.fd.write(_("\nThats it. "))
if self.warnings==1:
self.fd.write(_("1 warning, "))
else:
self.fd.write(_(" in %d links") % linknumber)
self.fd.write(_(" found\n"))
self.stoptime = time.time()
duration = self.stoptime - self.starttime
name = _("seconds")
self.fd.write(_("Stopped checking at %s") % _strtime(self.stoptime))
if duration > 60:
duration = duration / 60
name = _("minutes")
if duration > 60:
duration = duration / 60
name = _("hours")
self.fd.write(" (%.3f %s)\n" % (duration, name))
self.fd.write(str(self.warnings)+_(" warnings, "))
if self.errors==1:
self.fd.write(_("1 error"))
else:
self.fd.write(str(self.errors)+_(" errors"))
if linknumber >= 0:
if linknumber == 1:
self.fd.write(_(" in 1 link"))
else:
self.fd.write(_(" in %d links") % linknumber)
self.fd.write(_(" found\n"))
self.stoptime = time.time()
duration = self.stoptime - self.starttime
name = _("seconds")
self.fd.write(_("Stopped checking at %s") % _strtime(self.stoptime))
if duration > 60:
duration = duration / 60
name = _("minutes")
if duration > 60:
duration = duration / 60
name = _("hours")
self.fd.write(" (%.3f %s)\n" % (duration, name))
self.fd.flush()
self.fd = None

View file

@ -239,7 +239,10 @@ class UrlData:
warningregex = config["warningregex"]
if warningregex and self.valid:
debug("DEBUG: checking content\n")
self.checkContent(warningregex)
try: self.checkContent(warningregex)
except tuple(ExcList):
type, value = sys.exc_info()[:2]
self.setError(str(value))
self.checktime = time.time() - t
# check recursion

View file

@ -98,7 +98,7 @@ For single-letter option arguments the space is not a necessity. So
message, for example 'This page has moved' or 'Oracle
Application Server error'.
This option implies -w.\n") % linkcheck.Config.LoggerKeys
"""
""")
Notes = _("""NOTES
o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL

View file

@ -5,7 +5,7 @@
[output]
# turn on/off debug messages
debug=0
# use the color logger
# use the text logger
log=text
# turn on/off --verbose
verbose=0
@ -15,34 +15,46 @@ warnings=0
quiet=0
# additional file output
fileoutput=
#fileoutput = text, colored, html, gml, sql XXX
# what fields should each logger print out?
fields = all
# field = url, parent url, base url
# fields names: XXX
#fileoutput = text, colored, html, gml, sql
# fields names:
# all (for all fields)
# realurl
# result
# base (base href=...)
# name (<a href=...>name</a>)
# parenturl
# info
# warning
# dltime
# checktime
# url
# parent url
# base url
# intro (the blurb at the beginning, "starting at ...")
# outro (the blurb at the end, "found x errors ...")
# each Logger can have separate configuration parameters
# standard text logger
[text]
filename=linkchecker-out.txt
fields=url,result
# GML logger
[gml]
filename=linkchecker-out.gml
fields=all
# CSV logger
[csv]
filename=linkchecker-out.csv
separator=;
fields=all
# SQL logger
[sql]
filename=linkchecker-out.sql
dbname=linksdb
commandsep=;
fields=all
# HTML logger
[html]
@ -55,6 +67,7 @@ colorlink="#191c83"
tablewarning=<td bgcolor="#e0954e">
tableerror=<td bgcolor="#db4930">
tableok=<td bgcolor="#3ba557">
fields=all
# ANSI color logger
[colored]
@ -71,6 +84,7 @@ colorinfo="\x1b[0m"
colorwarning="\x1b[1;33m"
colordltime="\x1b[0m"
colorreset="\x1b[0m"
fields=all
# blacklist logger
[blacklist]

298
po/de.po
View file

@ -63,12 +63,165 @@ msgstr "%.3f Sekunden"
msgid "Effective URL %s"
msgstr "Effektive URL %s"
#, fuzzy
msgid ""
"USAGE\tlinkchecker [options] file-or-url...\n"
"\n"
"OPTIONS\n"
"For single-letter option arguments the space is not a necessity. So\n"
"'-o colored' is the same as '-ocolored'.\n"
"-a, --anchors\n"
" Check anchor references. Default is don't check anchors.\n"
"-d, --denyallow\n"
" Swap checking order to extern/intern. Default checking order\n"
" is intern/extern.\n"
"-D, --debug\n"
" Print additional debugging information.\n"
"-e regex, --extern=regex\n"
" Assume urls that match the given expression as extern.\n"
" Only intern HTML links are checked recursively.\n"
"-f file, --config=file\n"
" Use file as configuration file. LinkChecker first searches\n"
" ~/.linkcheckerrc and then /etc/linkcheckerrc\n"
" (under Windows <path-to-program>\\linkcheckerrc).\n"
"-F type[/filename], --file-output=type[/filename]\n"
" Same as output, but write to a file linkchecker-out.<type>\n"
" or <filename> if specified. If the file already exists, it\n"
" is overwritten. You can specify this option more than once.\n"
" There is no file output for the blacklist logger. Default is\n"
" no file output.\n"
"-i regex, --intern=regex\n"
" Assume URLs that match the given expression as intern.\n"
" LinkChecker descends recursively only to intern URLs, not to "
"extern.\n"
"-h, --help\n"
" Help me! Print usage information for this program.\n"
"-N server, --nntp-server=server\n"
" Specify an NNTP server for 'news:...' links. Default is the\n"
" environment variable NNTP_SERVER. If no host is given,\n"
" only the syntax of the link is checked.\n"
"-o type, --output=type\n"
" Specify output type as %s.\n"
" Default type is text.\n"
"-p pwd, --password=pwd\n"
" Try password pwd for HTML and FTP authorization.\n"
" Default password is 'joe@'. See also -u.\n"
"-q, --quiet\n"
" Quiet operation. This is only useful with -F.\n"
"-r depth, --recursion-level=depth\n"
" Check recursively all links up to given depth (depth >= 0).\n"
" Default depth is 1.\n"
"-R, --robots-txt\n"
" Obey the robots exclusion standard.\n"
"-s, --strict\n"
" Check only syntax of extern links, do not try to connect to them.\n"
"-t num, --threads=num\n"
" Generate no more than num threads. Default number of threads is 5.\n"
" To disable threading specify a non-positive number.\n"
"-u name, --user=name\n"
" Try username name for HTML and FTP authorization.\n"
" Default is 'anonymous'. See also -p.\n"
"-V, --version\n"
" Print version and exit.\n"
"-v, --verbose\n"
" Log all checked URLs (implies -w). Default is to log only invalid\n"
" URLs.\n"
"-w, --warnings\n"
" Log warnings.\n"
"-W regex, --warning-regex=regex\n"
" Define a regular expression which prints a warning if it matches\n"
" any content of the checked link.\n"
" This applies of course only to pages which are valid, so we can\n"
" get their content.\n"
" Use this to check for pages that contain some form of error\n"
" message, for example 'This page has moved' or 'Oracle\n"
" Application Server error'.\n"
" This option implies -w.\n"
"\") % linkcheck.Config.LoggerKeys\n"
msgstr ""
"BENUTZUNG\tlinkchecker [options] datei_oder_url...\n"
"\n"
"OPTIONEN\n"
"-a, --anchors\n"
" Prüfe interne URLs. Standard ist keine Prüfung.\n"
"-d, --denyallow\n"
" Tausche die Prüfreihenfolge zu extern/intern. Standardmäßige\n"
" Reihenfolge ist intern/extern.\n"
"-D, --debug\n"
" Drucke zusätzlich Debug Information.\n"
"-e regex, --extern=regex\n"
" Behandle URLs welche diesen Ausdruck matchen als extern.\n"
" Nur interne HTTP Links werden rekursiv geprüft.\n"
"-f file, --config=file\n"
" Benutze file als Konfigurationsdatei. LinkChecker sucht zuerst\n"
" ~/.linkcheckerrc und dann /etc/linkcheckerrc\n"
" (unter Windows <Pfad-zum-Programm>\\linkcheckerrc).\n"
"-F type, --file-output=type\n"
" Wie --output, aber schreibe in eine Datei (Standard ist\n"
" linkchecker-out.<type>)\n"
" Falls die Datei bereits existiert wird sie überschrieben.\n"
" Sie können diese Option mehr als einmal verwenden. Es gibt keine\n"
" Ausgabedatei für den blacklist Logger. Standard ist keine Ausgabe\n"
" in eine Datei.\n"
"-i regex, --intern=regex\n"
" Behandle URLs welche diese Ausdruck matchen als intern.\n"
"-h, --help\n"
" Hilf mir! Druche Nutzungsinformation für dieses Programm.\n"
"-N, --nntp-server\n"
" Gibt ein NNTP Rechner für 'news:...' Links. Standard ist die\n"
" Umgebungsvariable NNTP_SERVER. Falls kein Rechner angegeben ist,\n"
" wird lediglich auf korrekte Syntax des Links geprüft.\n"
"-o type, --output=type\n"
" Verwende die Ausgabe als %s.\n"
" Standard Ausgabe ist text.\n"
"-p pwd, --password=pwd\n"
" Verwende das angegebene Passwort für HTML und FTP Authorisation.\n"
" Standard ist 'guest@'. Siehe -u.\n"
"-q, --quiet\n"
" Keine Ausgabe. Dies ist nur in Verbindung mit -F nützlich.\n"
"-r depth, --recursion-level=depth\n"
" Prüfe rekursiv alle URLs bis zu der angegebenen Tiefe\n"
" (depth >= 0). Standard Tiefe ist 1.\n"
"-R, --robots-txt\n"
" Befolge den Robots Exclusion Standard.\n"
"-s, --strict\n"
" Prüfe lediglich die Syntax von externen URLs. Es wird keine.\n"
" Verbindung zu diesen Rechner aufgebaut.\n"
"-t num, --threads=num\n"
" Generiere nicht mehr als num Threads. Standard Anzahl von Threads\n"
" ist 5. Um Threading auszuschalten geben Sie eine nichtpositive\n"
" Anzahl an.\n"
"-u name, --user=name\n"
" Verwende den angegebenen Benutzernamen für HTML und FTP\n"
" Authorisation. Standard ist 'anonymous'. Siehe -p.\n"
"-V, --version\n"
" Drucke die Version und beende das Programm.\n"
"-v, --verbose\n"
" Logge alle geprüften URLs (impliziert -w). Standard ist es, nur\n"
" fehlerhafte URLs zu loggen.\n"
"-w, --warnings\n"
" Logge Warnungen.\n"
"-W regex, --warning-regex=regex\n"
" Definieren Sie einen regulären Ausdruck, der eine Warnung ausdruckt\n"
" falls er den Inhalt einer geprüften URL matcht.\n"
" Dies gilt natürlich nur für gültige Seiten deren Inhalt wir\n"
" bekommen können.\n"
" Sie können dies verwenden, um Seiten mit Fehlermeldungen wie z.B.\n"
" 'Diese Seite ist umgezogen' oder 'Oracle Server Fehler'.\n"
" Diese Option impliziert -w.\n"
msgid ", line "
msgstr ", Zeile "
msgid "url"
msgstr ""
msgid "None of the mail hosts for %s accepts an SMTP connection: %s"
msgstr "Keiner der Mail Hosts für %s akzeptiert eine SMTP Verbindung: %s"
msgid "name"
msgstr ""
msgid "%.3f seconds\n"
msgstr "%.3f Sekunden\n"
@ -182,151 +335,6 @@ msgstr "Mail host %s gefunden"
msgid "Javascript url ignored"
msgstr "Javascript url ignoriert"
#, fuzzy
msgid ""
"USAGE\tlinkchecker [options] file-or-url...\n"
"\n"
"OPTIONS\n"
"For single-letter option arguments the space is not a necessity. So\n"
"'-o colored' is the same as '-ocolored'.\n"
"-a, --anchors\n"
" Check anchor references. Default is don't check anchors.\n"
"-d, --denyallow\n"
" Swap checking order to extern/intern. Default checking order\n"
" is intern/extern.\n"
"-D, --debug\n"
" Print additional debugging information.\n"
"-e regex, --extern=regex\n"
" Assume urls that match the given expression as extern.\n"
" Only intern HTML links are checked recursively.\n"
"-f file, --config=file\n"
" Use file as configuration file. LinkChecker first searches\n"
" ~/.linkcheckerrc and then /etc/linkcheckerrc\n"
" (under Windows <path-to-program>\\linkcheckerrc).\n"
"-F type, --file-output=type\n"
" Same as output, but write to a file linkchecker-out.<type>.\n"
" If the file already exists, it is overwritten. You can specify\n"
" this option more than once. There is no file output for the\n"
" blacklist logger. Default is no file output.\n"
"-i regex, --intern=regex\n"
" Assume URLs that match the given expression as intern.\n"
" LinkChecker descends recursively only to intern URLs, not to "
"extern.\n"
"-h, --help\n"
" Help me! Print usage information for this program.\n"
"-N server, --nntp-server=server\n"
" Specify an NNTP server for 'news:...' links. Default is the\n"
" environment variable NNTP_SERVER. If no host is given,\n"
" only the syntax of the link is checked.\n"
"-o type, --output=type\n"
" Specify output type as %s.\n"
" Default type is text.\n"
"-p pwd, --password=pwd\n"
" Try password pwd for HTML and FTP authorization.\n"
" Default password is 'joe@'. See also -u.\n"
"-q, --quiet\n"
" Quiet operation. This is only useful with -F.\n"
"-r depth, --recursion-level=depth\n"
" Check recursively all links up to given depth (depth >= 0).\n"
" Default depth is 1.\n"
"-R, --robots-txt\n"
" Obey the robots exclusion standard.\n"
"-s, --strict\n"
" Check only syntax of extern links, do not try to connect to them.\n"
"-t num, --threads=num\n"
" Generate no more than num threads. Default number of threads is 5.\n"
" To disable threading specify a non-positive number.\n"
"-u name, --user=name\n"
" Try username name for HTML and FTP authorization.\n"
" Default is 'anonymous'. See also -p.\n"
"-V, --version\n"
" Print version and exit.\n"
"-v, --verbose\n"
" Log all checked URLs (implies -w). Default is to log only invalid\n"
" URLs.\n"
"-w, --warnings\n"
" Log warnings.\n"
"-W regex, --warning-regex=regex\n"
" Define a regular expression which prints a warning if it matches\n"
" any content of the checked link.\n"
" This applies of course only to pages which are valid, so we can\n"
" get their content.\n"
" Use this to check for pages that contain some form of error\n"
" message, for example 'This page has moved' or 'Oracle\n"
" Application Server error'.\n"
" This option implies -w.\n"
msgstr ""
"BENUTZUNG\tlinkchecker [options] datei_oder_url...\n"
"\n"
"OPTIONEN\n"
"-a, --anchors\n"
" Prüfe interne URLs. Standard ist keine Prüfung.\n"
"-d, --denyallow\n"
" Tausche die Prüfreihenfolge zu extern/intern. Standardmäßige\n"
" Reihenfolge ist intern/extern.\n"
"-D, --debug\n"
" Drucke zusätzlich Debug Information.\n"
"-e regex, --extern=regex\n"
" Behandle URLs welche diesen Ausdruck matchen als extern.\n"
" Nur interne HTTP Links werden rekursiv geprüft.\n"
"-f file, --config=file\n"
" Benutze file als Konfigurationsdatei. LinkChecker sucht zuerst\n"
" ~/.linkcheckerrc und dann /etc/linkcheckerrc\n"
" (unter Windows <Pfad-zum-Programm>\\linkcheckerrc).\n"
"-F type, --file-output=type\n"
" Wie --output, aber schreibe in eine Datei (Standard ist\n"
" linkchecker-out.<type>)\n"
" Falls die Datei bereits existiert wird sie überschrieben.\n"
" Sie können diese Option mehr als einmal verwenden. Es gibt keine\n"
" Ausgabedatei für den blacklist Logger. Standard ist keine Ausgabe\n"
" in eine Datei.\n"
"-i regex, --intern=regex\n"
" Behandle URLs welche diese Ausdruck matchen als intern.\n"
"-h, --help\n"
" Hilf mir! Druche Nutzungsinformation für dieses Programm.\n"
"-N, --nntp-server\n"
" Gibt ein NNTP Rechner für 'news:...' Links. Standard ist die\n"
" Umgebungsvariable NNTP_SERVER. Falls kein Rechner angegeben ist,\n"
" wird lediglich auf korrekte Syntax des Links geprüft.\n"
"-o type, --output=type\n"
" Verwende die Ausgabe als %s.\n"
" Standard Ausgabe ist text.\n"
"-p pwd, --password=pwd\n"
" Verwende das angegebene Passwort für HTML und FTP Authorisation.\n"
" Standard ist 'guest@'. Siehe -u.\n"
"-q, --quiet\n"
" Keine Ausgabe. Dies ist nur in Verbindung mit -F nützlich.\n"
"-r depth, --recursion-level=depth\n"
" Prüfe rekursiv alle URLs bis zu der angegebenen Tiefe\n"
" (depth >= 0). Standard Tiefe ist 1.\n"
"-R, --robots-txt\n"
" Befolge den Robots Exclusion Standard.\n"
"-s, --strict\n"
" Prüfe lediglich die Syntax von externen URLs. Es wird keine.\n"
" Verbindung zu diesen Rechner aufgebaut.\n"
"-t num, --threads=num\n"
" Generiere nicht mehr als num Threads. Standard Anzahl von Threads\n"
" ist 5. Um Threading auszuschalten geben Sie eine nichtpositive\n"
" Anzahl an.\n"
"-u name, --user=name\n"
" Verwende den angegebenen Benutzernamen für HTML und FTP\n"
" Authorisation. Standard ist 'anonymous'. Siehe -p.\n"
"-V, --version\n"
" Drucke die Version und beende das Programm.\n"
"-v, --verbose\n"
" Logge alle geprüften URLs (impliziert -w). Standard ist es, nur\n"
" fehlerhafte URLs zu loggen.\n"
"-w, --warnings\n"
" Logge Warnungen.\n"
"-W regex, --warning-regex=regex\n"
" Definieren Sie einen regulären Ausdruck, der eine Warnung ausdruckt\n"
" falls er den Inhalt einer geprüften URL matcht.\n"
" Dies gilt natürlich nur für gültige Seiten deren Inhalt wir\n"
" bekommen können.\n"
" Sie können dies verwenden, um Seiten mit Fehlermeldungen wie z.B.\n"
" 'Diese Seite ist umgezogen' oder 'Oracle Server Fehler'.\n"
" Diese Option impliziert -w.\n"
msgid "could not split the mail adress"
msgstr "konnte Mail Adresse nicht splitten"

306
po/fr.po
View file

@ -63,12 +63,169 @@ msgstr "%.3f secondes"
msgid "Effective URL %s"
msgstr "URL effective %s"
#, fuzzy
msgid ""
"USAGE\tlinkchecker [options] file-or-url...\n"
"\n"
"OPTIONS\n"
"For single-letter option arguments the space is not a necessity. So\n"
"'-o colored' is the same as '-ocolored'.\n"
"-a, --anchors\n"
" Check anchor references. Default is don't check anchors.\n"
"-d, --denyallow\n"
" Swap checking order to extern/intern. Default checking order\n"
" is intern/extern.\n"
"-D, --debug\n"
" Print additional debugging information.\n"
"-e regex, --extern=regex\n"
" Assume urls that match the given expression as extern.\n"
" Only intern HTML links are checked recursively.\n"
"-f file, --config=file\n"
" Use file as configuration file. LinkChecker first searches\n"
" ~/.linkcheckerrc and then /etc/linkcheckerrc\n"
" (under Windows <path-to-program>\\linkcheckerrc).\n"
"-F type[/filename], --file-output=type[/filename]\n"
" Same as output, but write to a file linkchecker-out.<type>\n"
" or <filename> if specified. If the file already exists, it\n"
" is overwritten. You can specify this option more than once.\n"
" There is no file output for the blacklist logger. Default is\n"
" no file output.\n"
"-i regex, --intern=regex\n"
" Assume URLs that match the given expression as intern.\n"
" LinkChecker descends recursively only to intern URLs, not to "
"extern.\n"
"-h, --help\n"
" Help me! Print usage information for this program.\n"
"-N server, --nntp-server=server\n"
" Specify an NNTP server for 'news:...' links. Default is the\n"
" environment variable NNTP_SERVER. If no host is given,\n"
" only the syntax of the link is checked.\n"
"-o type, --output=type\n"
" Specify output type as %s.\n"
" Default type is text.\n"
"-p pwd, --password=pwd\n"
" Try password pwd for HTML and FTP authorization.\n"
" Default password is 'joe@'. See also -u.\n"
"-q, --quiet\n"
" Quiet operation. This is only useful with -F.\n"
"-r depth, --recursion-level=depth\n"
" Check recursively all links up to given depth (depth >= 0).\n"
" Default depth is 1.\n"
"-R, --robots-txt\n"
" Obey the robots exclusion standard.\n"
"-s, --strict\n"
" Check only syntax of extern links, do not try to connect to them.\n"
"-t num, --threads=num\n"
" Generate no more than num threads. Default number of threads is 5.\n"
" To disable threading specify a non-positive number.\n"
"-u name, --user=name\n"
" Try username name for HTML and FTP authorization.\n"
" Default is 'anonymous'. See also -p.\n"
"-V, --version\n"
" Print version and exit.\n"
"-v, --verbose\n"
" Log all checked URLs (implies -w). Default is to log only invalid\n"
" URLs.\n"
"-w, --warnings\n"
" Log warnings.\n"
"-W regex, --warning-regex=regex\n"
" Define a regular expression which prints a warning if it matches\n"
" any content of the checked link.\n"
" This applies of course only to pages which are valid, so we can\n"
" get their content.\n"
" Use this to check for pages that contain some form of error\n"
" message, for example 'This page has moved' or 'Oracle\n"
" Application Server error'.\n"
" This option implies -w.\n"
"\") % linkcheck.Config.LoggerKeys\n"
msgstr ""
"USAGE\tlinkchecker [options] fichier_ou_url...\n"
"\n"
"OPTIONS\n"
"-a, --anchors\n"
" Contrôle les références ancrées. Par défaut, il ne les contrôle "
"pas.\n"
"-d, --denyallow\n"
" Swap checking order to extern/intern. Default checking order\n"
" is intern/extern.\n"
"-D, --debug\n"
" Affiche des informations de débugage supplémentaires.\n"
"-e regex, --extern=regex\n"
" Assume urls that match the given expression as extern.\n"
" Only intern HTTP links are checked recursively.\n"
"-f file, --config=file\n"
" Utilise le fichier comme fichier de configuration. LinkChecker "
"recherche d'abord\n"
" ~/.linkcheckerrc puis /etc/linkcheckerrc\n"
" (sous Windows <chemin-vers-le-programe>\\linkcheckerrc).\n"
"-F name, --file-output=name\n"
" Identique à output, mais écrit dans un fichier "
"linkchecker-out.<nom>.\n"
" Si le fichier existe, il sera écrasé. Vous pouvez spécifier\n"
" cette option plus d'une fois. Il n'y a pas de fichier de sotie pour "
"les\n"
" logs de la liste noire. Par défaut, il n'y a pas de fichier de "
"sortie.\n"
"-i regex, --intern=regex\n"
" Assume urls that match the given expression as intern.\n"
"-h, --help\n"
" Aide moi! Affiche les informations d'utilisation pour ce programme.\n"
"-N, --nntp-server\n"
" Specify an NNTP server for 'news:...' links. Default is the\n"
" environment variable NNTP_SERVER. If no host is given,\n"
" only the syntax of the link is checked.\n"
"-o name, --output=name\n"
" Specify output as %s.\n"
" Default is text.\n"
"-p pwd, --password=pwd\n"
" Try given password for HTML and FTP authorization.\n"
" Default is 'guest@'. See -u.\n"
"-q, --quiet\n"
" Quiet operation. This is only useful with -F.\n"
"-r depth, --recursion-level=depth\n"
" Check recursively all links up to given depth (depth >= 0).\n"
" Default depth is 1.\n"
"-R, --robots-txt\n"
" Obey the robots exclusion standard.\n"
"-s, --strict\n"
" Contrôle seulement la syntaxe des liens externes, et ne pas essayer\n"
" de s'y connecter.\n"
"-t num, --threads=num\n"
" Generate no more than num threads. Default number of threads is 5.\n"
" To disable threading specify a non-positive number.\n"
"-u name, --user=name\n"
" Essayer le nom d'utilisateur donné pour l'autorisation HTTP et FTP.\n"
" La valeur par défaut est 'anonymous'. Regarder à -p.\n"
"-V, --version\n"
" Affiche la version et quitte.\n"
"-v, --verbose\n"
" Logger toutes les URLs contôlées (suppose -w). Par défaut, seulement "
"les URLS\n"
" invalides sont logguées.\n"
"-w, --warnings\n"
" Logger les avertissements.\n"
"-W regex, --warning-regex=regex\n"
" Define a regular expression which prints a warning if it matches\n"
" any content of the checked link.\n"
" This applies of course only to pages which are valid, so we can\n"
" get their content.\n"
" You can use this to check for pages that contain some form of\n"
" error message, for example 'This page has moved' or\n"
" 'Oracle Application Server error'.\n"
" This option implies -w.\n"
msgid ", line "
msgstr ", ligne "
msgid "url"
msgstr ""
msgid "None of the mail hosts for %s accepts an SMTP connection: %s"
msgstr "Aucun des hôtes de messagerie pour %s n'accepte de connection SMTP: %s"
msgid "name"
msgstr ""
msgid "%.3f seconds\n"
msgstr "%.3f secondes\n"
@ -173,155 +330,6 @@ msgstr "trouv
msgid "Javascript url ignored"
msgstr "Url Javascript ignorée"
#, fuzzy
msgid ""
"USAGE\tlinkchecker [options] file-or-url...\n"
"\n"
"OPTIONS\n"
"For single-letter option arguments the space is not a necessity. So\n"
"'-o colored' is the same as '-ocolored'.\n"
"-a, --anchors\n"
" Check anchor references. Default is don't check anchors.\n"
"-d, --denyallow\n"
" Swap checking order to extern/intern. Default checking order\n"
" is intern/extern.\n"
"-D, --debug\n"
" Print additional debugging information.\n"
"-e regex, --extern=regex\n"
" Assume urls that match the given expression as extern.\n"
" Only intern HTML links are checked recursively.\n"
"-f file, --config=file\n"
" Use file as configuration file. LinkChecker first searches\n"
" ~/.linkcheckerrc and then /etc/linkcheckerrc\n"
" (under Windows <path-to-program>\\linkcheckerrc).\n"
"-F type, --file-output=type\n"
" Same as output, but write to a file linkchecker-out.<type>.\n"
" If the file already exists, it is overwritten. You can specify\n"
" this option more than once. There is no file output for the\n"
" blacklist logger. Default is no file output.\n"
"-i regex, --intern=regex\n"
" Assume URLs that match the given expression as intern.\n"
" LinkChecker descends recursively only to intern URLs, not to "
"extern.\n"
"-h, --help\n"
" Help me! Print usage information for this program.\n"
"-N server, --nntp-server=server\n"
" Specify an NNTP server for 'news:...' links. Default is the\n"
" environment variable NNTP_SERVER. If no host is given,\n"
" only the syntax of the link is checked.\n"
"-o type, --output=type\n"
" Specify output type as %s.\n"
" Default type is text.\n"
"-p pwd, --password=pwd\n"
" Try password pwd for HTML and FTP authorization.\n"
" Default password is 'joe@'. See also -u.\n"
"-q, --quiet\n"
" Quiet operation. This is only useful with -F.\n"
"-r depth, --recursion-level=depth\n"
" Check recursively all links up to given depth (depth >= 0).\n"
" Default depth is 1.\n"
"-R, --robots-txt\n"
" Obey the robots exclusion standard.\n"
"-s, --strict\n"
" Check only syntax of extern links, do not try to connect to them.\n"
"-t num, --threads=num\n"
" Generate no more than num threads. Default number of threads is 5.\n"
" To disable threading specify a non-positive number.\n"
"-u name, --user=name\n"
" Try username name for HTML and FTP authorization.\n"
" Default is 'anonymous'. See also -p.\n"
"-V, --version\n"
" Print version and exit.\n"
"-v, --verbose\n"
" Log all checked URLs (implies -w). Default is to log only invalid\n"
" URLs.\n"
"-w, --warnings\n"
" Log warnings.\n"
"-W regex, --warning-regex=regex\n"
" Define a regular expression which prints a warning if it matches\n"
" any content of the checked link.\n"
" This applies of course only to pages which are valid, so we can\n"
" get their content.\n"
" Use this to check for pages that contain some form of error\n"
" message, for example 'This page has moved' or 'Oracle\n"
" Application Server error'.\n"
" This option implies -w.\n"
msgstr ""
"USAGE\tlinkchecker [options] fichier_ou_url...\n"
"\n"
"OPTIONS\n"
"-a, --anchors\n"
" Contrôle les références ancrées. Par défaut, il ne les contrôle "
"pas.\n"
"-d, --denyallow\n"
" Swap checking order to extern/intern. Default checking order\n"
" is intern/extern.\n"
"-D, --debug\n"
" Affiche des informations de débugage supplémentaires.\n"
"-e regex, --extern=regex\n"
" Assume urls that match the given expression as extern.\n"
" Only intern HTTP links are checked recursively.\n"
"-f file, --config=file\n"
" Utilise le fichier comme fichier de configuration. LinkChecker "
"recherche d'abord\n"
" ~/.linkcheckerrc puis /etc/linkcheckerrc\n"
" (sous Windows <chemin-vers-le-programe>\\linkcheckerrc).\n"
"-F name, --file-output=name\n"
" Identique à output, mais écrit dans un fichier "
"linkchecker-out.<nom>.\n"
" Si le fichier existe, il sera écrasé. Vous pouvez spécifier\n"
" cette option plus d'une fois. Il n'y a pas de fichier de sotie pour "
"les\n"
" logs de la liste noire. Par défaut, il n'y a pas de fichier de "
"sortie.\n"
"-i regex, --intern=regex\n"
" Assume urls that match the given expression as intern.\n"
"-h, --help\n"
" Aide moi! Affiche les informations d'utilisation pour ce programme.\n"
"-N, --nntp-server\n"
" Specify an NNTP server for 'news:...' links. Default is the\n"
" environment variable NNTP_SERVER. If no host is given,\n"
" only the syntax of the link is checked.\n"
"-o name, --output=name\n"
" Specify output as %s.\n"
" Default is text.\n"
"-p pwd, --password=pwd\n"
" Try given password for HTML and FTP authorization.\n"
" Default is 'guest@'. See -u.\n"
"-q, --quiet\n"
" Quiet operation. This is only useful with -F.\n"
"-r depth, --recursion-level=depth\n"
" Check recursively all links up to given depth (depth >= 0).\n"
" Default depth is 1.\n"
"-R, --robots-txt\n"
" Obey the robots exclusion standard.\n"
"-s, --strict\n"
" Contrôle seulement la syntaxe des liens externes, et ne pas essayer\n"
" de s'y connecter.\n"
"-t num, --threads=num\n"
" Generate no more than num threads. Default number of threads is 5.\n"
" To disable threading specify a non-positive number.\n"
"-u name, --user=name\n"
" Essayer le nom d'utilisateur donné pour l'autorisation HTTP et FTP.\n"
" La valeur par défaut est 'anonymous'. Regarder à -p.\n"
"-V, --version\n"
" Affiche la version et quitte.\n"
"-v, --verbose\n"
" Logger toutes les URLs contôlées (suppose -w). Par défaut, seulement "
"les URLS\n"
" invalides sont logguées.\n"
"-w, --warnings\n"
" Logger les avertissements.\n"
"-W regex, --warning-regex=regex\n"
" Define a regular expression which prints a warning if it matches\n"
" any content of the checked link.\n"
" This applies of course only to pages which are valid, so we can\n"
" get their content.\n"
" You can use this to check for pages that contain some form of\n"
" error message, for example 'This page has moved' or\n"
" 'Oracle Application Server error'.\n"
" This option implies -w.\n"
msgid "could not split the mail adress"
msgstr "impossible de partager l'adresse e-mail"

View file

@ -323,7 +323,7 @@ def main():
opts, args = getopt.getopt(
sys.argv[1:],
'ad:Ehk:Kno:p:S:Vvw:x:',
['extract-all', 'default-domain', 'escape', 'help',
['extract-all', 'default-domain=', 'escape', 'help',
'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',

View file

@ -1,43 +0,0 @@
"""
Copyright (C) 2000 Bastian Kleineidam
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import sys,StringIO,LinkChecker
def linkcheck(urls):
"Check a list of http://, file:// etc. urls"
config = LinkChecker.Config.Configuration()
config["verbose"]=1
config["warnings"]=1
# no more options, use defaults
# add urls
for url in urls:
config.appendUrl(LinkChecker.UrlData.GetUrlDataFrom(url, 0))
# check it
LinkChecker.checkUrls(config)
old_stdout = sys.stdout
sys.stdout = StringIO.StringIO()
linkcheck(['http://fsinfo.cs.uni-sb.de/~calvin'])
sys.stdout.seek(0)
reader = LinkChecker.OutputReader.OutputReader()
old_stdout.write(sys.stdout.getvalue())
result = reader.parse(sys.stdout)
sys.stdout = old_stdout
for url in result:
print str(url)

View file

@ -3,7 +3,40 @@
# Skip Montanaro (skip@mojam.com)
# - adapted from the robots.txt file at http://web.nexor.co.uk/
# disallow a bunch of ill-behaved user agents (doubt this will deter them...)
User-agent: ExtractorPro
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: EmailWolf
Disallow: /
User-agent: CherryPickerSE/1.0
Disallow: /
User-agent: CherryPickerElite/1.0
Disallow: /
User-agent: EmailCollector/1.0
Disallow: /
User-agent: EmailWolf 1.00
Disallow: /
User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32)
Disallow: /
# by default
User-agent: *
Disallow: /ccrd # not useful to spiders
Disallow: /click # not useful to spiders
@ -23,24 +56,3 @@ Disallow: /musician # defunct
Disallow: /~skip/volkswagen # defunct
Disallow: /%7Eskip/volkswagen # defunct
# disallow a bunch of ill-behaved user agents (doubt this will deter them...)
User-agent: ExtractorPro
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: EmailWolf
Disallow: /
User-agent: CherryPickerSE/1.0
Disallow: /
User-agent: CherryPickerElite/1.0
Disallow: /
User-agent: EmailCollector/1.0
Disallow: /
User-agent: EmailWolf 1.00
Disallow: /
User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32)
Disallow: /