diff --git a/Makefile b/Makefile index a4aca3d2..9ccd6e62 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ NAME = $(shell $(PYTHON) setup.py --name) HOST=treasure.calvinsplayground.de #LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -Fxml -R -t0 -v -s -DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb +DEBPACKAGE = $(PACKAGE)_$(VERSION)_all.deb $(PACKAGE)-ssl_$(VERSION)_i386.deb PULLHOST=phoenix.net.uni-sb.de PULLPATH=/home/calvin/temp/linkchecker @@ -28,7 +28,7 @@ distclean: clean cleandeb rm -f $(PACKAGE)-out.* VERSION $(PACKAGE)Conf.py MANIFEST Packages.gz cleandeb: - rm -rf debian/$(PACKAGE) debian/tmp + rm -rf debian/$(PACKAGE) debian/$(PACKAGE)-ssl debian/tmp rm -f debian/*.debhelper debian/{files,substvars} rm -f configure-stamp build-stamp @@ -36,11 +36,12 @@ dist: locale # cleandeb because distutils choke on dangling symlinks # (linkchecker.1 -> undocumented.1) $(MAKE) cleandeb + fakeroot debian/rules binary $(PYTHON) setup.py sdist --formats=gztar,zip bdist_rpm # extra run without SSL compilation python setup.py bdist_wininst - fakeroot dpkg-buildpackage -sgpg -pgpg - cp -f ../$(DEBPACKAGE) dist + #fakeroot dpkg-buildpackage -sgpg -pgpg + mv -f ../$(DEBPACKAGE) dist package: cd dist && dpkg-scanpackages . ../override.txt | gzip --best > Packages.gz diff --git a/debian/changelog b/debian/changelog index 06023e06..1af121e3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -3,6 +3,11 @@ linkchecker (1.3.0) unstable; urgency=low * require and use Python >= 2.0 * fix agent matching in robotparser2.py * added more LinkPatterns (ripped from HTML::Tagset.pm) + * fix ignored configfile settings for loggers + * optional filename argument for -F (patch from + Jamie Heilman ) + * config file option to control which fields the loggers should print + out -- Bastian Kleineidam Fri, 9 Feb 2001 10:51:24 +0100 diff --git a/debian/rules b/debian/rules index 080f3f5a..4ff2e06c 100755 --- a/debian/rules +++ b/debian/rules @@ -44,22 +44,16 @@ install: build # remove example files, we install them with dh_installexamples rm -rf debian/$(PACKAGE)/usr/share/linkchecker # install additional doc files - install -d -m 755 $(DOCDIR)/examples install -c -m 644 DNS/README $(DOCDIR)/README_DNS.txt - install -d -m 755 $(DOCDIR)/tests/linkcheck - install -c -m 644 test/*.html test/*.py test/robots.txt $(DOCDIR)/tests/linkcheck - install -d -m 755 $(DOCDIR)/tests/dns - install -c -m 644 tests/*.py $(DOCDIR)/tests/dns + install -c -m 644 test/*.py $(DOCDIR)/test + install -c -m 644 test/html/*.html $(DOCDIR)/test/html + install -c -m 644 test/output/test_* $(DOCDIR)/test/output # install system wide configuration file in etc install -c -m 644 linkcheckerrc debian/$(PACKAGE)/etc # Build architecture-independent files here. binary-indep: build install -# We have nothing to do by default. - -# Build architecture-dependent files here. -binary-arch: build install dh_testdir dh_testroot # dh_installdebconf @@ -73,6 +67,7 @@ binary-arch: build install dh_installman linkchecker.1 # dh_installinfo # dh_undocumented linkchecker.1 + dh_movefiles -p"linkchecker-ssl" --sourcedir debian/linkchecker dh_installchangelogs # dh_link dh_strip @@ -86,5 +81,8 @@ binary-arch: build install dh_md5sums dh_builddeb +# Build architecture-dependent files here. +binary-arch: build install + binary: binary-indep binary-arch .PHONY: build clean binary-indep binary-arch binary install configure diff --git a/linkcheck/Config.py b/linkcheck/Config.py index 270ebc38..a6932a9f 100644 --- a/linkcheck/Config.py +++ b/linkcheck/Config.py @@ -106,7 +106,6 @@ class Configuration(UserDict.UserDict): self["robotstxt"] = 1 self["strict"] = 0 self["fileoutput"] = [] - self["loggingfields"] = "all" # Logger configurations self["text"] = { "filename": "linkchecker-out.txt", @@ -155,7 +154,6 @@ class Configuration(UserDict.UserDict): "filename": "linkchecker-out.xml", } self['test'] = {} # no args for test logger - # default values self['log'] = self.newLogger('text') self["quiet"] = 0 self["warningregex"] = None @@ -424,22 +422,24 @@ class Configuration(UserDict.UserDict): self.newLogger(arg, {'fileoutput':1})) except ConfigParser.Error: pass for key in Loggers.keys(): + debug(key+"\n") if cfgparser.has_section(key): for opt in cfgparser.options(key): try: self[key][opt] = cfgparser.get(key, opt) - except ConfigParser.Error: pass - try: - self['loggingfields'] = map(string.strip, string.split( - cfgparser.get(section, 'loggingfields'), ",")) - except ConfigParser.Error: pass + except ConfigParser.Error, msg: debug(str(msg)) + try: + self[key]['fields'] = map(string.strip, + string.split(cfgparser.get(key, 'fields'), ',')) + debug("fields %s"%str(self[key]['fields'])) + except ConfigParser.Error, msg: debug(str(msg)) section="checking" try: num = cfgparser.getint(section, "threads") if num<=0: - self.disableThreads() + self.disableThreading() else: - self.enableThreads(num) + self.enableThreading(num) except ConfigParser.Error: pass try: self["anchors"] = cfgparser.getboolean(section, "anchors") except ConfigParser.Error: pass @@ -455,9 +455,10 @@ class Configuration(UserDict.UserDict): except ConfigParser.Error: pass try: self["strict"] = cfgparser.getboolean(section, "strict") except ConfigParser.Error: pass - try: - self["warningregex"] = re.compile(cfgparser.get(section, - "warningregex")) + try: + warn = cfgparser.get(section, "warningregex") + if warn: + self["warningregex"] = re.compile(warn) except ConfigParser.Error: pass try: self["nntpserver"] = cfgparser.get(section, "nntpserver") @@ -488,3 +489,4 @@ class Configuration(UserDict.UserDict): except ConfigParser.Error: pass try: self["denyallow"] = cfgparser.getboolean(section, "denyallow") except ConfigParser.Error: pass + self['log'] = self.newLogger('text') diff --git a/linkcheck/Logging.py b/linkcheck/Logging.py index 2821f343..ed9bd30c 100644 --- a/linkcheck/Logging.py +++ b/linkcheck/Logging.py @@ -40,6 +40,7 @@ __init__(self, **args) files in the appropriate logger section. """ import sys,time,string +from types import ListType import Config, StringUtil import linkcheck _ = linkcheck._ @@ -52,7 +53,7 @@ LogFields = { "parenturl": "Parent URL", "info": "Info", "warning": "Warning", - "downloadtime": "D/L Time", + "dltime": "D/L Time", "checktime": "Check Time", "url": "URL", } @@ -100,20 +101,23 @@ class StandardLogger: else: self.fd = sys.stdout self.logfields = None # all fields - if args.has_key('logfields'): - if type(args['logfields']) == ListType: - self.logfields = args + if args.has_key('fields'): + if type(args['fields']) == ListType: + self.logfields = args['fields'] def logfield(self, name): - return self.logfields and name in self.logfields + if self.logfields is None: + return 1 + return name in self.logfields def init(self): - self.starttime = time.time() - self.fd.write("%s\n%s\n" % (Config.AppInfo, Config.Freeware)) - self.fd.write(_("Get the newest version at %s\n") % Config.Url) - self.fd.write(_("Write comments and bugs to %s\n\n") % Config.Email) - self.fd.write(_("Start checking at %s\n") % _strtime(self.starttime)) - self.fd.flush() + if self.logfield('intro'): + self.starttime = time.time() + self.fd.write("%s\n%s\n" % (Config.AppInfo, Config.Freeware)) + self.fd.write(_("Get the newest version at %s\n") % Config.Url) + self.fd.write(_("Write comments and bugs to %s\n\n") % Config.Email) + self.fd.write(_("Start checking at %s\n") % _strtime(self.starttime)) + self.fd.flush() def newUrl(self, urlData): @@ -125,69 +129,70 @@ class StandardLogger: self.fd.write("\n") if urlData.name and self.logfield('name'): self.fd.write(_(LogFields["name"])+Spaces["name"]+urlData.name+"\n") - if urlData.parentName and self.logfield('parentname'): - self.fd.write(_("Parent URL")+Spaces["Parent URL"]+ + if urlData.parentName and self.logfield('parenturl'): + self.fd.write(_(LogFields['parenturl'])+Spaces["parenturl"]+ urlData.parentName+_(", line ")+ str(urlData.line)+"\n") - if urlData.baseRef: - self.fd.write(_("Base")+Spaces["Base"]+urlData.baseRef+"\n") - if urlData.url: - self.fd.write(_("Real URL")+Spaces["Real URL"]+urlData.url+"\n") - if urlData.downloadtime: - self.fd.write(_("D/L Time")+Spaces["D/L Time"]+ + if urlData.baseRef and self.logfield('base'): + self.fd.write(_(LogFields["base"])+Spaces["base"]+urlData.baseRef+"\n") + if urlData.url and self.logfield('realurl'): + self.fd.write(_(LogFields["realurl"])+Spaces["realurl"]+urlData.url+"\n") + if urlData.downloadtime and self.logfield('dltime'): + self.fd.write(_(LogFields["dltime"])+Spaces["dltime"]+ _("%.3f seconds\n") % urlData.downloadtime) - if urlData.checktime: - self.fd.write(_("Check Time")+Spaces["Check Time"]+ + if urlData.checktime and self.logfield('checktime'): + self.fd.write(_(LogFields["checktime"])+Spaces["checktime"]+ _("%.3f seconds\n") % urlData.checktime) - if urlData.infoString: - self.fd.write(_("Info")+Spaces["Info"]+ + if urlData.infoString and self.logfield('info'): + self.fd.write(_(LogFields["info"])+Spaces["info"]+ StringUtil.indent( StringUtil.blocktext(urlData.infoString, 65), MaxIndent)+"\n") - if urlData.warningString: + if urlData.warningString and self.logfield('warning'): self.warnings += 1 - self.fd.write(_("Warning")+Spaces["Warning"]+ + self.fd.write(_(LogFields["warning"])+Spaces["warning"]+ StringUtil.indent( StringUtil.blocktext(urlData.warningString, 65), MaxIndent)+"\n") - - self.fd.write(_("Result")+Spaces["Result"]) - if urlData.valid: - self.fd.write(urlData.validString+"\n") - else: - self.errors += 1 - self.fd.write(urlData.errorString+"\n") + + if self.logfield('result'): + self.fd.write(_(LogFields["result"])+Spaces["result"]) + if urlData.valid: + self.fd.write(urlData.validString+"\n") + else: + self.errors += 1 + self.fd.write(urlData.errorString+"\n") self.fd.flush() def endOfOutput(self, linknumber=-1): - self.fd.write(_("\nThats it. ")) - - if self.warnings==1: - self.fd.write(_("1 warning, ")) - else: - self.fd.write(str(self.warnings)+_(" warnings, ")) - if self.errors==1: - self.fd.write(_("1 error")) - else: - self.fd.write(str(self.errors)+_(" errors")) - if linknumber >= 0: - if linknumber == 1: - self.fd.write(_(" in 1 link")) + if self.logfield('outro'): + self.fd.write(_("\nThats it. ")) + if self.warnings==1: + self.fd.write(_("1 warning, ")) else: - self.fd.write(_(" in %d links") % linknumber) - self.fd.write(_(" found\n")) - self.stoptime = time.time() - duration = self.stoptime - self.starttime - name = _("seconds") - self.fd.write(_("Stopped checking at %s") % _strtime(self.stoptime)) - if duration > 60: - duration = duration / 60 - name = _("minutes") - if duration > 60: - duration = duration / 60 - name = _("hours") - self.fd.write(" (%.3f %s)\n" % (duration, name)) + self.fd.write(str(self.warnings)+_(" warnings, ")) + if self.errors==1: + self.fd.write(_("1 error")) + else: + self.fd.write(str(self.errors)+_(" errors")) + if linknumber >= 0: + if linknumber == 1: + self.fd.write(_(" in 1 link")) + else: + self.fd.write(_(" in %d links") % linknumber) + self.fd.write(_(" found\n")) + self.stoptime = time.time() + duration = self.stoptime - self.starttime + name = _("seconds") + self.fd.write(_("Stopped checking at %s") % _strtime(self.stoptime)) + if duration > 60: + duration = duration / 60 + name = _("minutes") + if duration > 60: + duration = duration / 60 + name = _("hours") + self.fd.write(" (%.3f %s)\n" % (duration, name)) self.fd.flush() self.fd = None diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index 9cea90f5..91316be6 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -239,7 +239,10 @@ class UrlData: warningregex = config["warningregex"] if warningregex and self.valid: debug("DEBUG: checking content\n") - self.checkContent(warningregex) + try: self.checkContent(warningregex) + except tuple(ExcList): + type, value = sys.exc_info()[:2] + self.setError(str(value)) self.checktime = time.time() - t # check recursion diff --git a/linkchecker b/linkchecker index 0077d092..3de53b0c 100755 --- a/linkchecker +++ b/linkchecker @@ -98,7 +98,7 @@ For single-letter option arguments the space is not a necessity. So message, for example 'This page has moved' or 'Oracle Application Server error'. This option implies -w.\n") % linkcheck.Config.LoggerKeys -""" +""") Notes = _("""NOTES o LinkChecker assumes an http:// resp. ftp:// link when a commandline URL diff --git a/linkcheckerrc b/linkcheckerrc index 370ab78e..4d1f9cf2 100644 --- a/linkcheckerrc +++ b/linkcheckerrc @@ -5,7 +5,7 @@ [output] # turn on/off debug messages debug=0 -# use the color logger +# use the text logger log=text # turn on/off --verbose verbose=0 @@ -15,34 +15,46 @@ warnings=0 quiet=0 # additional file output fileoutput= -#fileoutput = text, colored, html, gml, sql XXX -# what fields should each logger print out? -fields = all -# field = url, parent url, base url -# fields names: XXX +#fileoutput = text, colored, html, gml, sql + +# fields names: +# all (for all fields) +# realurl +# result +# base (base href=...) +# name (name) +# parenturl +# info +# warning +# dltime +# checktime # url -# parent url -# base url +# intro (the blurb at the beginning, "starting at ...") +# outro (the blurb at the end, "found x errors ...") # each Logger can have separate configuration parameters # standard text logger [text] filename=linkchecker-out.txt +fields=url,result # GML logger [gml] filename=linkchecker-out.gml +fields=all # CSV logger [csv] filename=linkchecker-out.csv separator=; +fields=all # SQL logger [sql] filename=linkchecker-out.sql dbname=linksdb commandsep=; +fields=all # HTML logger [html] @@ -55,6 +67,7 @@ colorlink="#191c83" tablewarning= tableerror= tableok= +fields=all # ANSI color logger [colored] @@ -71,6 +84,7 @@ colorinfo="\x1b[0m" colorwarning="\x1b[1;33m" colordltime="\x1b[0m" colorreset="\x1b[0m" +fields=all # blacklist logger [blacklist] diff --git a/po/de.po b/po/de.po index 1ced1397..8551e698 100644 --- a/po/de.po +++ b/po/de.po @@ -63,12 +63,165 @@ msgstr "%.3f Sekunden" msgid "Effective URL %s" msgstr "Effektive URL %s" +#, fuzzy +msgid "" +"USAGE\tlinkchecker [options] file-or-url...\n" +"\n" +"OPTIONS\n" +"For single-letter option arguments the space is not a necessity. So\n" +"'-o colored' is the same as '-ocolored'.\n" +"-a, --anchors\n" +" Check anchor references. Default is don't check anchors.\n" +"-d, --denyallow\n" +" Swap checking order to extern/intern. Default checking order\n" +" is intern/extern.\n" +"-D, --debug\n" +" Print additional debugging information.\n" +"-e regex, --extern=regex\n" +" Assume urls that match the given expression as extern.\n" +" Only intern HTML links are checked recursively.\n" +"-f file, --config=file\n" +" Use file as configuration file. LinkChecker first searches\n" +" ~/.linkcheckerrc and then /etc/linkcheckerrc\n" +" (under Windows \\linkcheckerrc).\n" +"-F type[/filename], --file-output=type[/filename]\n" +" Same as output, but write to a file linkchecker-out.\n" +" or if specified. If the file already exists, it\n" +" is overwritten. You can specify this option more than once.\n" +" There is no file output for the blacklist logger. Default is\n" +" no file output.\n" +"-i regex, --intern=regex\n" +" Assume URLs that match the given expression as intern.\n" +" LinkChecker descends recursively only to intern URLs, not to " +"extern.\n" +"-h, --help\n" +" Help me! Print usage information for this program.\n" +"-N server, --nntp-server=server\n" +" Specify an NNTP server for 'news:...' links. Default is the\n" +" environment variable NNTP_SERVER. If no host is given,\n" +" only the syntax of the link is checked.\n" +"-o type, --output=type\n" +" Specify output type as %s.\n" +" Default type is text.\n" +"-p pwd, --password=pwd\n" +" Try password pwd for HTML and FTP authorization.\n" +" Default password is 'joe@'. See also -u.\n" +"-q, --quiet\n" +" Quiet operation. This is only useful with -F.\n" +"-r depth, --recursion-level=depth\n" +" Check recursively all links up to given depth (depth >= 0).\n" +" Default depth is 1.\n" +"-R, --robots-txt\n" +" Obey the robots exclusion standard.\n" +"-s, --strict\n" +" Check only syntax of extern links, do not try to connect to them.\n" +"-t num, --threads=num\n" +" Generate no more than num threads. Default number of threads is 5.\n" +" To disable threading specify a non-positive number.\n" +"-u name, --user=name\n" +" Try username name for HTML and FTP authorization.\n" +" Default is 'anonymous'. See also -p.\n" +"-V, --version\n" +" Print version and exit.\n" +"-v, --verbose\n" +" Log all checked URLs (implies -w). Default is to log only invalid\n" +" URLs.\n" +"-w, --warnings\n" +" Log warnings.\n" +"-W regex, --warning-regex=regex\n" +" Define a regular expression which prints a warning if it matches\n" +" any content of the checked link.\n" +" This applies of course only to pages which are valid, so we can\n" +" get their content.\n" +" Use this to check for pages that contain some form of error\n" +" message, for example 'This page has moved' or 'Oracle\n" +" Application Server error'.\n" +" This option implies -w.\n" +"\") % linkcheck.Config.LoggerKeys\n" +msgstr "" +"BENUTZUNG\tlinkchecker [options] datei_oder_url...\n" +"\n" +"OPTIONEN\n" +"-a, --anchors\n" +" Prüfe interne URLs. Standard ist keine Prüfung.\n" +"-d, --denyallow\n" +" Tausche die Prüfreihenfolge zu extern/intern. Standardmäßige\n" +" Reihenfolge ist intern/extern.\n" +"-D, --debug\n" +" Drucke zusätzlich Debug Information.\n" +"-e regex, --extern=regex\n" +" Behandle URLs welche diesen Ausdruck matchen als extern.\n" +" Nur interne HTTP Links werden rekursiv geprüft.\n" +"-f file, --config=file\n" +" Benutze file als Konfigurationsdatei. LinkChecker sucht zuerst\n" +" ~/.linkcheckerrc und dann /etc/linkcheckerrc\n" +" (unter Windows \\linkcheckerrc).\n" +"-F type, --file-output=type\n" +" Wie --output, aber schreibe in eine Datei (Standard ist\n" +" linkchecker-out.)\n" +" Falls die Datei bereits existiert wird sie überschrieben.\n" +" Sie können diese Option mehr als einmal verwenden. Es gibt keine\n" +" Ausgabedatei für den blacklist Logger. Standard ist keine Ausgabe\n" +" in eine Datei.\n" +"-i regex, --intern=regex\n" +" Behandle URLs welche diese Ausdruck matchen als intern.\n" +"-h, --help\n" +" Hilf mir! Druche Nutzungsinformation für dieses Programm.\n" +"-N, --nntp-server\n" +" Gibt ein NNTP Rechner für 'news:...' Links. Standard ist die\n" +" Umgebungsvariable NNTP_SERVER. Falls kein Rechner angegeben ist,\n" +" wird lediglich auf korrekte Syntax des Links geprüft.\n" +"-o type, --output=type\n" +" Verwende die Ausgabe als %s.\n" +" Standard Ausgabe ist text.\n" +"-p pwd, --password=pwd\n" +" Verwende das angegebene Passwort für HTML und FTP Authorisation.\n" +" Standard ist 'guest@'. Siehe -u.\n" +"-q, --quiet\n" +" Keine Ausgabe. Dies ist nur in Verbindung mit -F nützlich.\n" +"-r depth, --recursion-level=depth\n" +" Prüfe rekursiv alle URLs bis zu der angegebenen Tiefe\n" +" (depth >= 0). Standard Tiefe ist 1.\n" +"-R, --robots-txt\n" +" Befolge den Robots Exclusion Standard.\n" +"-s, --strict\n" +" Prüfe lediglich die Syntax von externen URLs. Es wird keine.\n" +" Verbindung zu diesen Rechner aufgebaut.\n" +"-t num, --threads=num\n" +" Generiere nicht mehr als num Threads. Standard Anzahl von Threads\n" +" ist 5. Um Threading auszuschalten geben Sie eine nichtpositive\n" +" Anzahl an.\n" +"-u name, --user=name\n" +" Verwende den angegebenen Benutzernamen für HTML und FTP\n" +" Authorisation. Standard ist 'anonymous'. Siehe -p.\n" +"-V, --version\n" +" Drucke die Version und beende das Programm.\n" +"-v, --verbose\n" +" Logge alle geprüften URLs (impliziert -w). Standard ist es, nur\n" +" fehlerhafte URLs zu loggen.\n" +"-w, --warnings\n" +" Logge Warnungen.\n" +"-W regex, --warning-regex=regex\n" +" Definieren Sie einen regulären Ausdruck, der eine Warnung ausdruckt\n" +" falls er den Inhalt einer geprüften URL matcht.\n" +" Dies gilt natürlich nur für gültige Seiten deren Inhalt wir\n" +" bekommen können.\n" +" Sie können dies verwenden, um Seiten mit Fehlermeldungen wie z.B.\n" +" 'Diese Seite ist umgezogen' oder 'Oracle Server Fehler'.\n" +" Diese Option impliziert -w.\n" + msgid ", line " msgstr ", Zeile " +msgid "url" +msgstr "" + msgid "None of the mail hosts for %s accepts an SMTP connection: %s" msgstr "Keiner der Mail Hosts für %s akzeptiert eine SMTP Verbindung: %s" +msgid "name" +msgstr "" + msgid "%.3f seconds\n" msgstr "%.3f Sekunden\n" @@ -182,151 +335,6 @@ msgstr "Mail host %s gefunden" msgid "Javascript url ignored" msgstr "Javascript url ignoriert" -#, fuzzy -msgid "" -"USAGE\tlinkchecker [options] file-or-url...\n" -"\n" -"OPTIONS\n" -"For single-letter option arguments the space is not a necessity. So\n" -"'-o colored' is the same as '-ocolored'.\n" -"-a, --anchors\n" -" Check anchor references. Default is don't check anchors.\n" -"-d, --denyallow\n" -" Swap checking order to extern/intern. Default checking order\n" -" is intern/extern.\n" -"-D, --debug\n" -" Print additional debugging information.\n" -"-e regex, --extern=regex\n" -" Assume urls that match the given expression as extern.\n" -" Only intern HTML links are checked recursively.\n" -"-f file, --config=file\n" -" Use file as configuration file. LinkChecker first searches\n" -" ~/.linkcheckerrc and then /etc/linkcheckerrc\n" -" (under Windows \\linkcheckerrc).\n" -"-F type, --file-output=type\n" -" Same as output, but write to a file linkchecker-out..\n" -" If the file already exists, it is overwritten. You can specify\n" -" this option more than once. There is no file output for the\n" -" blacklist logger. Default is no file output.\n" -"-i regex, --intern=regex\n" -" Assume URLs that match the given expression as intern.\n" -" LinkChecker descends recursively only to intern URLs, not to " -"extern.\n" -"-h, --help\n" -" Help me! Print usage information for this program.\n" -"-N server, --nntp-server=server\n" -" Specify an NNTP server for 'news:...' links. Default is the\n" -" environment variable NNTP_SERVER. If no host is given,\n" -" only the syntax of the link is checked.\n" -"-o type, --output=type\n" -" Specify output type as %s.\n" -" Default type is text.\n" -"-p pwd, --password=pwd\n" -" Try password pwd for HTML and FTP authorization.\n" -" Default password is 'joe@'. See also -u.\n" -"-q, --quiet\n" -" Quiet operation. This is only useful with -F.\n" -"-r depth, --recursion-level=depth\n" -" Check recursively all links up to given depth (depth >= 0).\n" -" Default depth is 1.\n" -"-R, --robots-txt\n" -" Obey the robots exclusion standard.\n" -"-s, --strict\n" -" Check only syntax of extern links, do not try to connect to them.\n" -"-t num, --threads=num\n" -" Generate no more than num threads. Default number of threads is 5.\n" -" To disable threading specify a non-positive number.\n" -"-u name, --user=name\n" -" Try username name for HTML and FTP authorization.\n" -" Default is 'anonymous'. See also -p.\n" -"-V, --version\n" -" Print version and exit.\n" -"-v, --verbose\n" -" Log all checked URLs (implies -w). Default is to log only invalid\n" -" URLs.\n" -"-w, --warnings\n" -" Log warnings.\n" -"-W regex, --warning-regex=regex\n" -" Define a regular expression which prints a warning if it matches\n" -" any content of the checked link.\n" -" This applies of course only to pages which are valid, so we can\n" -" get their content.\n" -" Use this to check for pages that contain some form of error\n" -" message, for example 'This page has moved' or 'Oracle\n" -" Application Server error'.\n" -" This option implies -w.\n" -msgstr "" -"BENUTZUNG\tlinkchecker [options] datei_oder_url...\n" -"\n" -"OPTIONEN\n" -"-a, --anchors\n" -" Prüfe interne URLs. Standard ist keine Prüfung.\n" -"-d, --denyallow\n" -" Tausche die Prüfreihenfolge zu extern/intern. Standardmäßige\n" -" Reihenfolge ist intern/extern.\n" -"-D, --debug\n" -" Drucke zusätzlich Debug Information.\n" -"-e regex, --extern=regex\n" -" Behandle URLs welche diesen Ausdruck matchen als extern.\n" -" Nur interne HTTP Links werden rekursiv geprüft.\n" -"-f file, --config=file\n" -" Benutze file als Konfigurationsdatei. LinkChecker sucht zuerst\n" -" ~/.linkcheckerrc und dann /etc/linkcheckerrc\n" -" (unter Windows \\linkcheckerrc).\n" -"-F type, --file-output=type\n" -" Wie --output, aber schreibe in eine Datei (Standard ist\n" -" linkchecker-out.)\n" -" Falls die Datei bereits existiert wird sie überschrieben.\n" -" Sie können diese Option mehr als einmal verwenden. Es gibt keine\n" -" Ausgabedatei für den blacklist Logger. Standard ist keine Ausgabe\n" -" in eine Datei.\n" -"-i regex, --intern=regex\n" -" Behandle URLs welche diese Ausdruck matchen als intern.\n" -"-h, --help\n" -" Hilf mir! Druche Nutzungsinformation für dieses Programm.\n" -"-N, --nntp-server\n" -" Gibt ein NNTP Rechner für 'news:...' Links. Standard ist die\n" -" Umgebungsvariable NNTP_SERVER. Falls kein Rechner angegeben ist,\n" -" wird lediglich auf korrekte Syntax des Links geprüft.\n" -"-o type, --output=type\n" -" Verwende die Ausgabe als %s.\n" -" Standard Ausgabe ist text.\n" -"-p pwd, --password=pwd\n" -" Verwende das angegebene Passwort für HTML und FTP Authorisation.\n" -" Standard ist 'guest@'. Siehe -u.\n" -"-q, --quiet\n" -" Keine Ausgabe. Dies ist nur in Verbindung mit -F nützlich.\n" -"-r depth, --recursion-level=depth\n" -" Prüfe rekursiv alle URLs bis zu der angegebenen Tiefe\n" -" (depth >= 0). Standard Tiefe ist 1.\n" -"-R, --robots-txt\n" -" Befolge den Robots Exclusion Standard.\n" -"-s, --strict\n" -" Prüfe lediglich die Syntax von externen URLs. Es wird keine.\n" -" Verbindung zu diesen Rechner aufgebaut.\n" -"-t num, --threads=num\n" -" Generiere nicht mehr als num Threads. Standard Anzahl von Threads\n" -" ist 5. Um Threading auszuschalten geben Sie eine nichtpositive\n" -" Anzahl an.\n" -"-u name, --user=name\n" -" Verwende den angegebenen Benutzernamen für HTML und FTP\n" -" Authorisation. Standard ist 'anonymous'. Siehe -p.\n" -"-V, --version\n" -" Drucke die Version und beende das Programm.\n" -"-v, --verbose\n" -" Logge alle geprüften URLs (impliziert -w). Standard ist es, nur\n" -" fehlerhafte URLs zu loggen.\n" -"-w, --warnings\n" -" Logge Warnungen.\n" -"-W regex, --warning-regex=regex\n" -" Definieren Sie einen regulären Ausdruck, der eine Warnung ausdruckt\n" -" falls er den Inhalt einer geprüften URL matcht.\n" -" Dies gilt natürlich nur für gültige Seiten deren Inhalt wir\n" -" bekommen können.\n" -" Sie können dies verwenden, um Seiten mit Fehlermeldungen wie z.B.\n" -" 'Diese Seite ist umgezogen' oder 'Oracle Server Fehler'.\n" -" Diese Option impliziert -w.\n" - msgid "could not split the mail adress" msgstr "konnte Mail Adresse nicht splitten" diff --git a/po/fr.po b/po/fr.po index c6a793fe..f1747b49 100644 --- a/po/fr.po +++ b/po/fr.po @@ -63,12 +63,169 @@ msgstr "%.3f secondes" msgid "Effective URL %s" msgstr "URL effective %s" +#, fuzzy +msgid "" +"USAGE\tlinkchecker [options] file-or-url...\n" +"\n" +"OPTIONS\n" +"For single-letter option arguments the space is not a necessity. So\n" +"'-o colored' is the same as '-ocolored'.\n" +"-a, --anchors\n" +" Check anchor references. Default is don't check anchors.\n" +"-d, --denyallow\n" +" Swap checking order to extern/intern. Default checking order\n" +" is intern/extern.\n" +"-D, --debug\n" +" Print additional debugging information.\n" +"-e regex, --extern=regex\n" +" Assume urls that match the given expression as extern.\n" +" Only intern HTML links are checked recursively.\n" +"-f file, --config=file\n" +" Use file as configuration file. LinkChecker first searches\n" +" ~/.linkcheckerrc and then /etc/linkcheckerrc\n" +" (under Windows \\linkcheckerrc).\n" +"-F type[/filename], --file-output=type[/filename]\n" +" Same as output, but write to a file linkchecker-out.\n" +" or if specified. If the file already exists, it\n" +" is overwritten. You can specify this option more than once.\n" +" There is no file output for the blacklist logger. Default is\n" +" no file output.\n" +"-i regex, --intern=regex\n" +" Assume URLs that match the given expression as intern.\n" +" LinkChecker descends recursively only to intern URLs, not to " +"extern.\n" +"-h, --help\n" +" Help me! Print usage information for this program.\n" +"-N server, --nntp-server=server\n" +" Specify an NNTP server for 'news:...' links. Default is the\n" +" environment variable NNTP_SERVER. If no host is given,\n" +" only the syntax of the link is checked.\n" +"-o type, --output=type\n" +" Specify output type as %s.\n" +" Default type is text.\n" +"-p pwd, --password=pwd\n" +" Try password pwd for HTML and FTP authorization.\n" +" Default password is 'joe@'. See also -u.\n" +"-q, --quiet\n" +" Quiet operation. This is only useful with -F.\n" +"-r depth, --recursion-level=depth\n" +" Check recursively all links up to given depth (depth >= 0).\n" +" Default depth is 1.\n" +"-R, --robots-txt\n" +" Obey the robots exclusion standard.\n" +"-s, --strict\n" +" Check only syntax of extern links, do not try to connect to them.\n" +"-t num, --threads=num\n" +" Generate no more than num threads. Default number of threads is 5.\n" +" To disable threading specify a non-positive number.\n" +"-u name, --user=name\n" +" Try username name for HTML and FTP authorization.\n" +" Default is 'anonymous'. See also -p.\n" +"-V, --version\n" +" Print version and exit.\n" +"-v, --verbose\n" +" Log all checked URLs (implies -w). Default is to log only invalid\n" +" URLs.\n" +"-w, --warnings\n" +" Log warnings.\n" +"-W regex, --warning-regex=regex\n" +" Define a regular expression which prints a warning if it matches\n" +" any content of the checked link.\n" +" This applies of course only to pages which are valid, so we can\n" +" get their content.\n" +" Use this to check for pages that contain some form of error\n" +" message, for example 'This page has moved' or 'Oracle\n" +" Application Server error'.\n" +" This option implies -w.\n" +"\") % linkcheck.Config.LoggerKeys\n" +msgstr "" +"USAGE\tlinkchecker [options] fichier_ou_url...\n" +"\n" +"OPTIONS\n" +"-a, --anchors\n" +" Contrôle les références ancrées. Par défaut, il ne les contrôle " +"pas.\n" +"-d, --denyallow\n" +" Swap checking order to extern/intern. Default checking order\n" +" is intern/extern.\n" +"-D, --debug\n" +" Affiche des informations de débugage supplémentaires.\n" +"-e regex, --extern=regex\n" +" Assume urls that match the given expression as extern.\n" +" Only intern HTTP links are checked recursively.\n" +"-f file, --config=file\n" +" Utilise le fichier comme fichier de configuration. LinkChecker " +"recherche d'abord\n" +" ~/.linkcheckerrc puis /etc/linkcheckerrc\n" +" (sous Windows \\linkcheckerrc).\n" +"-F name, --file-output=name\n" +" Identique à output, mais écrit dans un fichier " +"linkchecker-out..\n" +" Si le fichier existe, il sera écrasé. Vous pouvez spécifier\n" +" cette option plus d'une fois. Il n'y a pas de fichier de sotie pour " +"les\n" +" logs de la liste noire. Par défaut, il n'y a pas de fichier de " +"sortie.\n" +"-i regex, --intern=regex\n" +" Assume urls that match the given expression as intern.\n" +"-h, --help\n" +" Aide moi! Affiche les informations d'utilisation pour ce programme.\n" +"-N, --nntp-server\n" +" Specify an NNTP server for 'news:...' links. Default is the\n" +" environment variable NNTP_SERVER. If no host is given,\n" +" only the syntax of the link is checked.\n" +"-o name, --output=name\n" +" Specify output as %s.\n" +" Default is text.\n" +"-p pwd, --password=pwd\n" +" Try given password for HTML and FTP authorization.\n" +" Default is 'guest@'. See -u.\n" +"-q, --quiet\n" +" Quiet operation. This is only useful with -F.\n" +"-r depth, --recursion-level=depth\n" +" Check recursively all links up to given depth (depth >= 0).\n" +" Default depth is 1.\n" +"-R, --robots-txt\n" +" Obey the robots exclusion standard.\n" +"-s, --strict\n" +" Contrôle seulement la syntaxe des liens externes, et ne pas essayer\n" +" de s'y connecter.\n" +"-t num, --threads=num\n" +" Generate no more than num threads. Default number of threads is 5.\n" +" To disable threading specify a non-positive number.\n" +"-u name, --user=name\n" +" Essayer le nom d'utilisateur donné pour l'autorisation HTTP et FTP.\n" +" La valeur par défaut est 'anonymous'. Regarder à -p.\n" +"-V, --version\n" +" Affiche la version et quitte.\n" +"-v, --verbose\n" +" Logger toutes les URLs contôlées (suppose -w). Par défaut, seulement " +"les URLS\n" +" invalides sont logguées.\n" +"-w, --warnings\n" +" Logger les avertissements.\n" +"-W regex, --warning-regex=regex\n" +" Define a regular expression which prints a warning if it matches\n" +" any content of the checked link.\n" +" This applies of course only to pages which are valid, so we can\n" +" get their content.\n" +" You can use this to check for pages that contain some form of\n" +" error message, for example 'This page has moved' or\n" +" 'Oracle Application Server error'.\n" +" This option implies -w.\n" + msgid ", line " msgstr ", ligne " +msgid "url" +msgstr "" + msgid "None of the mail hosts for %s accepts an SMTP connection: %s" msgstr "Aucun des hôtes de messagerie pour %s n'accepte de connection SMTP: %s" +msgid "name" +msgstr "" + msgid "%.3f seconds\n" msgstr "%.3f secondes\n" @@ -173,155 +330,6 @@ msgstr "trouv msgid "Javascript url ignored" msgstr "Url Javascript ignorée" -#, fuzzy -msgid "" -"USAGE\tlinkchecker [options] file-or-url...\n" -"\n" -"OPTIONS\n" -"For single-letter option arguments the space is not a necessity. So\n" -"'-o colored' is the same as '-ocolored'.\n" -"-a, --anchors\n" -" Check anchor references. Default is don't check anchors.\n" -"-d, --denyallow\n" -" Swap checking order to extern/intern. Default checking order\n" -" is intern/extern.\n" -"-D, --debug\n" -" Print additional debugging information.\n" -"-e regex, --extern=regex\n" -" Assume urls that match the given expression as extern.\n" -" Only intern HTML links are checked recursively.\n" -"-f file, --config=file\n" -" Use file as configuration file. LinkChecker first searches\n" -" ~/.linkcheckerrc and then /etc/linkcheckerrc\n" -" (under Windows \\linkcheckerrc).\n" -"-F type, --file-output=type\n" -" Same as output, but write to a file linkchecker-out..\n" -" If the file already exists, it is overwritten. You can specify\n" -" this option more than once. There is no file output for the\n" -" blacklist logger. Default is no file output.\n" -"-i regex, --intern=regex\n" -" Assume URLs that match the given expression as intern.\n" -" LinkChecker descends recursively only to intern URLs, not to " -"extern.\n" -"-h, --help\n" -" Help me! Print usage information for this program.\n" -"-N server, --nntp-server=server\n" -" Specify an NNTP server for 'news:...' links. Default is the\n" -" environment variable NNTP_SERVER. If no host is given,\n" -" only the syntax of the link is checked.\n" -"-o type, --output=type\n" -" Specify output type as %s.\n" -" Default type is text.\n" -"-p pwd, --password=pwd\n" -" Try password pwd for HTML and FTP authorization.\n" -" Default password is 'joe@'. See also -u.\n" -"-q, --quiet\n" -" Quiet operation. This is only useful with -F.\n" -"-r depth, --recursion-level=depth\n" -" Check recursively all links up to given depth (depth >= 0).\n" -" Default depth is 1.\n" -"-R, --robots-txt\n" -" Obey the robots exclusion standard.\n" -"-s, --strict\n" -" Check only syntax of extern links, do not try to connect to them.\n" -"-t num, --threads=num\n" -" Generate no more than num threads. Default number of threads is 5.\n" -" To disable threading specify a non-positive number.\n" -"-u name, --user=name\n" -" Try username name for HTML and FTP authorization.\n" -" Default is 'anonymous'. See also -p.\n" -"-V, --version\n" -" Print version and exit.\n" -"-v, --verbose\n" -" Log all checked URLs (implies -w). Default is to log only invalid\n" -" URLs.\n" -"-w, --warnings\n" -" Log warnings.\n" -"-W regex, --warning-regex=regex\n" -" Define a regular expression which prints a warning if it matches\n" -" any content of the checked link.\n" -" This applies of course only to pages which are valid, so we can\n" -" get their content.\n" -" Use this to check for pages that contain some form of error\n" -" message, for example 'This page has moved' or 'Oracle\n" -" Application Server error'.\n" -" This option implies -w.\n" -msgstr "" -"USAGE\tlinkchecker [options] fichier_ou_url...\n" -"\n" -"OPTIONS\n" -"-a, --anchors\n" -" Contrôle les références ancrées. Par défaut, il ne les contrôle " -"pas.\n" -"-d, --denyallow\n" -" Swap checking order to extern/intern. Default checking order\n" -" is intern/extern.\n" -"-D, --debug\n" -" Affiche des informations de débugage supplémentaires.\n" -"-e regex, --extern=regex\n" -" Assume urls that match the given expression as extern.\n" -" Only intern HTTP links are checked recursively.\n" -"-f file, --config=file\n" -" Utilise le fichier comme fichier de configuration. LinkChecker " -"recherche d'abord\n" -" ~/.linkcheckerrc puis /etc/linkcheckerrc\n" -" (sous Windows \\linkcheckerrc).\n" -"-F name, --file-output=name\n" -" Identique à output, mais écrit dans un fichier " -"linkchecker-out..\n" -" Si le fichier existe, il sera écrasé. Vous pouvez spécifier\n" -" cette option plus d'une fois. Il n'y a pas de fichier de sotie pour " -"les\n" -" logs de la liste noire. Par défaut, il n'y a pas de fichier de " -"sortie.\n" -"-i regex, --intern=regex\n" -" Assume urls that match the given expression as intern.\n" -"-h, --help\n" -" Aide moi! Affiche les informations d'utilisation pour ce programme.\n" -"-N, --nntp-server\n" -" Specify an NNTP server for 'news:...' links. Default is the\n" -" environment variable NNTP_SERVER. If no host is given,\n" -" only the syntax of the link is checked.\n" -"-o name, --output=name\n" -" Specify output as %s.\n" -" Default is text.\n" -"-p pwd, --password=pwd\n" -" Try given password for HTML and FTP authorization.\n" -" Default is 'guest@'. See -u.\n" -"-q, --quiet\n" -" Quiet operation. This is only useful with -F.\n" -"-r depth, --recursion-level=depth\n" -" Check recursively all links up to given depth (depth >= 0).\n" -" Default depth is 1.\n" -"-R, --robots-txt\n" -" Obey the robots exclusion standard.\n" -"-s, --strict\n" -" Contrôle seulement la syntaxe des liens externes, et ne pas essayer\n" -" de s'y connecter.\n" -"-t num, --threads=num\n" -" Generate no more than num threads. Default number of threads is 5.\n" -" To disable threading specify a non-positive number.\n" -"-u name, --user=name\n" -" Essayer le nom d'utilisateur donné pour l'autorisation HTTP et FTP.\n" -" La valeur par défaut est 'anonymous'. Regarder à -p.\n" -"-V, --version\n" -" Affiche la version et quitte.\n" -"-v, --verbose\n" -" Logger toutes les URLs contôlées (suppose -w). Par défaut, seulement " -"les URLS\n" -" invalides sont logguées.\n" -"-w, --warnings\n" -" Logger les avertissements.\n" -"-W regex, --warning-regex=regex\n" -" Define a regular expression which prints a warning if it matches\n" -" any content of the checked link.\n" -" This applies of course only to pages which are valid, so we can\n" -" get their content.\n" -" You can use this to check for pages that contain some form of\n" -" error message, for example 'This page has moved' or\n" -" 'Oracle Application Server error'.\n" -" This option implies -w.\n" - msgid "could not split the mail adress" msgstr "impossible de partager l'adresse e-mail" diff --git a/po/pygettext.py b/po/pygettext.py index ab97f03c..6dfa43fa 100755 --- a/po/pygettext.py +++ b/po/pygettext.py @@ -323,7 +323,7 @@ def main(): opts, args = getopt.getopt( sys.argv[1:], 'ad:Ehk:Kno:p:S:Vvw:x:', - ['extract-all', 'default-domain', 'escape', 'help', + ['extract-all', 'default-domain=', 'escape', 'help', 'keyword=', 'no-default-keywords', 'add-location', 'no-location', 'output=', 'output-dir=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=', diff --git a/test/parsetest.py b/test/parsetest.py deleted file mode 100644 index ca6c7831..00000000 --- a/test/parsetest.py +++ /dev/null @@ -1,43 +0,0 @@ -""" - Copyright (C) 2000 Bastian Kleineidam - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -""" -import sys,StringIO,LinkChecker - -def linkcheck(urls): - "Check a list of http://, file:// etc. urls" - config = LinkChecker.Config.Configuration() - config["verbose"]=1 - config["warnings"]=1 - # no more options, use defaults - - # add urls - for url in urls: - config.appendUrl(LinkChecker.UrlData.GetUrlDataFrom(url, 0)) - - # check it - LinkChecker.checkUrls(config) - -old_stdout = sys.stdout -sys.stdout = StringIO.StringIO() -linkcheck(['http://fsinfo.cs.uni-sb.de/~calvin']) -sys.stdout.seek(0) -reader = LinkChecker.OutputReader.OutputReader() -old_stdout.write(sys.stdout.getvalue()) -result = reader.parse(sys.stdout) -sys.stdout = old_stdout -for url in result: - print str(url) diff --git a/test/robots.txt b/test/robots.txt index 3a9d1075..570eb195 100644 --- a/test/robots.txt +++ b/test/robots.txt @@ -3,7 +3,40 @@ # Skip Montanaro (skip@mojam.com) # - adapted from the robots.txt file at http://web.nexor.co.uk/ +# disallow a bunch of ill-behaved user agents (doubt this will deter them...) + +User-agent: ExtractorPro +Disallow: / + +User-agent: EmailSiphon +Disallow: / + +User-agent: EmailWolf +Disallow: / + +User-agent: CherryPickerSE/1.0 +Disallow: / + +User-agent: CherryPickerElite/1.0 +Disallow: / + +User-agent: EmailCollector/1.0 +Disallow: / + +User-agent: EmailWolf 1.00 +Disallow: / + +User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0 +Disallow: / + +User-agent: EmailSiphon +Disallow: / + +User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32) +Disallow: / + # by default + User-agent: * Disallow: /ccrd # not useful to spiders Disallow: /click # not useful to spiders @@ -23,24 +56,3 @@ Disallow: /musician # defunct Disallow: /~skip/volkswagen # defunct Disallow: /%7Eskip/volkswagen # defunct -# disallow a bunch of ill-behaved user agents (doubt this will deter them...) -User-agent: ExtractorPro -Disallow: / -User-agent: EmailSiphon -Disallow: / -User-agent: EmailWolf -Disallow: / -User-agent: CherryPickerSE/1.0 -Disallow: / -User-agent: CherryPickerElite/1.0 -Disallow: / -User-agent: EmailCollector/1.0 -Disallow: / -User-agent: EmailWolf 1.00 -Disallow: / -User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0 -Disallow: / -User-agent: EmailSiphon -Disallow: / -User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32) -Disallow: / \ No newline at end of file