diff --git a/INSTALL b/INSTALL index 89f8b422..d2877f3d 100644 --- a/INSTALL +++ b/INSTALL @@ -13,17 +13,49 @@ Python! Setup ----- -Run "python setup.py build" to build. -Run "python setup.py install" to install. -Run "python setup.py --help" for help. - After installation, the system wide configuration file is in /share/linkchecker/linkcheckerrc. -The local configuration file is ~/.linkcheckerrc on Unix platforms. -To run the program type "linkchecker" followed by your URLs you want -to check. -Type "linkchecker -h" for help. +o Unix platforms + - open a commandline window and change to the linkchecker-x.x.x + directory + - run "python setup.py install" to install + + For help on setup.py options, run "python setup.py --help". + The local configuration file is $HOME/.linkcheckerrc + + To run the program type "linkchecker" followed by your URLs you want + to check. + Type "linkchecker -h" for help. + + +o Windows platforms + - make sure that python.exe is in your PATH + - open a commandline window (cmd.exe) and change to the + linkchecker-x.x.x directory + - run "python.exe setup.py install" to install + + For help on setup.py options, run "python.exe setup.py --help". + + To run the program, change to the scripts directory of your python + installation and type "linkchecker.bat" followed by your URLs you + want to check. + Type "linkchecker.bat -h" for help. + + +o MacOS 9.x platforms + - open the Python IDE + - open the setup.py file + - run it (click on "Run all") + - in the popup window, select the "install" command and click "Add" + - click "Ok"; this will copy files into the Python folder + + Read the MacOS Python documentation to find out about passing + commandline options to Python scripts. + + +o MacOS X platforms + - not tested (Fast)CGI web interface diff --git a/debian/changelog b/debian/changelog index a960e424..7aea4adb 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,13 @@ +linkchecker (1.3.11) unstable; urgency=low + + * setup.py: use os.getcwd(), not "." which breaks on MacOS 9.x + * added platform-specific install instructions + * use Pythons internal gettext module, get rid of fintl.py + * use Pythons internal robot.txt parser, get rid of + robotparser2.py + + -- Bastian Kleineidam Wed, 28 Nov 2001 17:57:20 +0100 + linkchecker (1.3.10) unstable; urgency=low * use Pythons builtin HTTPS support diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index 81958a2c..5404fb20 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -16,7 +16,7 @@ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. import httplib, urlparse, sys, time, re -import Config, StringUtil, robotparser2 +import Config, StringUtil, robotparser from UrlData import UrlData from urllib import splittype, splithost, splituser, splitpasswd from linkcheck import _ @@ -251,7 +251,7 @@ class HttpUrlData(UrlData): def robotsTxtAllowsUrl(self, config): roboturl="%s://%s/robots.txt" % self.urlTuple[0:2] if not config.robotsTxtCache_has_key(roboturl): - rp = robotparser2.RobotFileParser(roboturl) + rp = robotparser.RobotFileParser(roboturl) rp.read() config.robotsTxtCache_set(roboturl, rp) rp = config.robotsTxtCache_get(roboturl) diff --git a/linkcheck/__init__.py b/linkcheck/__init__.py index a257a439..084cd6f7 100644 --- a/linkcheck/__init__.py +++ b/linkcheck/__init__.py @@ -19,28 +19,15 @@ class error(Exception): pass # i18n suppport -LANG="EN" # default language (used for HTML output) import _linkchecker_configdata try: - import fintl,os,string - gettext = fintl.gettext + import gettext domain = 'linkcheck' localedir = os.path.join(_linkchecker_configdata.install_data, 'locale') - fintl.bindtextdomain(domain, localedir) - fintl.textdomain(domain) - languages = [] - for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): - if os.environ.has_key(envvar): - languages = string.split(os.environ[envvar], ':') - break - if languages: - LANG=string.upper(languages[0]) - -except ImportError: - def gettext(msg): - return msg -# set _ as an alias for gettext -_ = gettext + t = gettext.translation(domain, localedir) + _ = t.gettext +except IOError: + _ = lambda s: s #import timeoutsocket #timeoutsocket.setDefaultSocketTimeout(20) diff --git a/linkcheck/fintl.py b/linkcheck/fintl.py deleted file mode 100644 index 6f3f9a0a..00000000 --- a/linkcheck/fintl.py +++ /dev/null @@ -1,208 +0,0 @@ -## vim:ts=4:et:nowrap -"""i18n (multiple language) support. Reads .mo files from GNU gettext msgfmt - -If you want to prepare your Python programs for i18n you could simply -add the following lines to the top of a BASIC_MAIN module of your py-program: - try: - import fintl - gettext = fintl.gettext - fintl.bindtextdomain(YOUR_PROGRAM, YOUR_LOCALEDIR) - fintl.textdomain(YOUR_PROGRAM) - except ImportError: - def gettext(msg): - return msg - _ = gettext -and/or also add the following to the top of any module containing messages: - import BASIC_MAIN - _ = BASIC_MAIN.gettext - -Now you could use _("....") everywhere instead of "...." for message texts. - -Once you have written your internationalized program, you can use -the suite of utility programs contained in the GNU gettext package to aid -the translation into other languages. - -You ARE NOT REQUIRED to release the sourcecode of your program, since -linking of your program against GPL code is avoided by this module. -Although it is possible to use the GNU gettext library by using the -*intl.so* module written by Martin von Löwis if this is available. But it is -not required to use it in the first place. -""" -# Copyright 1999 by (Peter Funk) -# -# All Rights Reserved -# -# Permission to use, copy, modify, and distribute this software and its -# documentation for any purpose and without fee is hereby granted, -# provided that the above copyright notice appear in all copies. - -# ArtCom GmbH AND Peter Funk DISCLAIMS ALL WARRANTIES WITH REGARD TO -# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY -# AND FITNESS, IN NO EVENT SHALL ArtCom GmBH or Peter Funk BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING -# OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -_default_localedir = '/usr/share/locale' -_default_domain = 'python' - -# check out, if Martin v. Löwis 'intl' module interface to the GNU gettext -# library is available and use it only, if it is available: -try: - from intl import * -except ImportError: - # now do what the gettext library provides in pure Python: - error = 'fintl.error' - # some globals preserving state: - _languages = [] - _default_mo = None # This is default message outfile used by 'gettext' - _loaded_mos = {} # This is a dictionary of loaded message output files - - # some small little helper routines: - def _check_env(): - """examine language enviroment variables and return list of languages""" - # TODO: This should somehow try to find out locale information on - # Non-unix platforms like WinXX and MacOS. Suggestions welcome! - languages = [] - import os, string - for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'): - if os.environ.has_key(envvar): - languages = string.split(os.environ[envvar], ':') - break - # use locale 'C' as default fallback: - if 'C' not in _languages: - languages.append('C') - return languages - - # Utility function used to decode binary .mo file header and seek tables: - def _decode_Word(bin): - # This assumes little endian (intel, vax) byte order. - return ord(bin[0]) + (ord(bin[1]) << 8) + \ - (ord(bin[2]) << 16) + (ord(bin[3]) << 24) - - # Now the methods designed to be used from outside: - - def gettext(message): - """return localized version of a 'message' string""" - if _default_mo is None: - textdomain() - return _default_mo.gettext(message) - - _ = gettext - - def dgettext(domain, message): - """like gettext but looks up 'message' in a special 'domain'""" - # This may useful for larger software systems - if not _loaded_mos.has_key(domain): - raise error, "No '" + domain + "' message domain" - return _loaded_mos[domain].gettext(message) - - class _MoDict: - """read a .mo file into a python dictionary""" - __MO_MAGIC = 0x950412de # Magic number of .mo files - def __init__(self, domain=_default_domain, localedir=_default_localedir): - global _languages - self.catalog = {} - self.domain = domain - self.localedir = localedir - # delayed access to environment variables: - if not _languages: - _languages = _check_env() - for self.lang in _languages: - if self.lang == 'C': - return - mo_filename = "%s//%s/LC_MESSAGES/%s.mo" % ( - localedir, self.lang, domain) - try: - buffer = open(mo_filename, "rb").read() - break - except IOError: - pass - else: - return # assume C locale - # Decode the header of the .mo file (5 little endian 32 bit words): - if _decode_Word(buffer[:4]) != self.__MO_MAGIC : - raise error, '%s seems not be a valid .mo file' % mo_filename - self.mo_version = _decode_Word(buffer[4:8]) - num_messages = _decode_Word(buffer[8:12]) - master_index = _decode_Word(buffer[12:16]) - transl_index = _decode_Word(buffer[16:20]) - buf_len = len(buffer) - # now put all messages from the .mo file buffer in the catalog dict: - for i in xrange(0, num_messages): - start_master= _decode_Word(buffer[master_index+4:master_index+8]) - end_master = start_master + \ - _decode_Word(buffer[master_index:master_index+4]) - start_transl= _decode_Word(buffer[transl_index+4:transl_index+8]) - end_transl = start_transl + \ - _decode_Word(buffer[transl_index:transl_index+4]) - if end_master <= buf_len and end_transl <= buf_len: - self.catalog[buffer[start_master:end_master]]=\ - buffer[start_transl:end_transl] - else: - raise error, ".mo file '%s' is corrupt" % mo_filename - # advance to the next entry in seek tables: - master_index += 8 - transl_index += 8 - - def gettext(self, message): - """return the translation of a given message""" - try: - return self.catalog[message] - except KeyError: - return message - # _MoDict instances may be also accessed using mo[msg] or mo(msg): - __getitem = gettext - __call__ = gettext - - def textdomain(domain=_default_domain): - """Sets the 'domain' to be used by this program. Defaults to 'python'""" - global _default_mo - if not _loaded_mos.has_key(domain): - _loaded_mos[domain] = _MoDict(domain) - _default_mo = _loaded_mos[domain] - - def bindtextdomain(domain, localedir=_default_localedir): - global _default_mo - if not _loaded_mos.has_key(domain): - _loaded_mos[domain] = _MoDict(domain, localedir) - if _default_mo is not None: - _default_mo = _loaded_mos[domain] - - def translator(domain=_default_domain, localedir=_default_localedir): - """returns a gettext compatible function object - - which is bound to the domain given as parameter""" - pass # TODO implement this - -def _testdriver(argv): - message = "" - domain = _default_domain - localedir = _default_localedir - if len(argv) > 1: - message = argv[1] - if len(argv) > 2: - domain = argv[2] - if len(argv) > 3: - localedir = argv[3] - # now perform some testing of this module: - bindtextdomain(domain, localedir) - textdomain(domain) - info = gettext('') # this is where special info is often stored - if info: - print ".mo file for domain %s in %s contains:" % (domain, localedir) - print info - else: - print ".mo file contains no info" - if message: - print "Translation of '"+ message+ "' is '"+ _(message)+ "'" - else: - for msg in ("Cancel", "No", "OK", "Quit", "Yes"): - print "Translation of '"+ msg + "' is '"+ _(msg)+ "'" - -if __name__ == '__main__': - import sys - if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "-?"): - print "Usage :", sys.argv[0], "[ MESSAGE [ DOMAIN [ LOCALEDIR ]]]" - _testdriver(sys.argv) diff --git a/linkcheck/robotparser2.py b/linkcheck/robotparser2.py deleted file mode 100755 index 40403cb4..00000000 --- a/linkcheck/robotparser2.py +++ /dev/null @@ -1,250 +0,0 @@ -""" implements the robots.txt inclusion protocol - - Copyright (C) 2000 Bastian Kleineidam - - You can choose between two licenses when using this package: - 1) GNU GPLv2 - 2) PYTHON 2.0 OPEN SOURCE LICENSE - - The robots.txt Exclusion Protocol is implemented as specified in - http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html -""" -import re,string,urlparse,urllib - -__all__ = ['RobotFileParser'] - -debug = 0 - -def _debug(msg): - if debug: print msg - - -class RobotFileParser: - def __init__(self, url=''): - self.entries = [] - self.disallow_all = 0 - self.allow_all = 0 - self.set_url(url) - self.last_checked = 0 - - def mtime(self): - return self.last_checked - - def modified(self): - import time - self.last_checked = time.time() - - def set_url(self, url): - self.url = url - self.host, self.path = urlparse.urlparse(url)[1:3] - - def read(self): - import httplib - tries = 0 - # limit number of redirections to 5 - while tries<5: - connection = httplib.HTTP(self.host) - connection.putrequest("GET", self.path) - connection.putheader("Host", self.host) - connection.endheaders() - status, text, mime = connection.getreply() - if status in [301,302] and mime: - tries = tries + 1 - newurl = mime.get("Location", mime.get("Uri", "")) - newurl = urlparse.urljoin(self.url, newurl) - self.set_url(newurl) - else: - break - if status==401 or status==403: - self.disallow_all = 1 - elif status>=400: - self.allow_all = 1 - else: - # status < 400 - self.parse(connection.getfile().readlines()) - - def parse(self, lines): - """parse the input lines from a robot.txt file. - We allow that a user-agent: line is not preceded by - one or more blank lines.""" - state = 0 - linenumber = 0 - entry = Entry() - - for line in lines: - line = string.strip(line) - linenumber = linenumber + 1 - if not line: - if state==1: - _debug("line %d: warning: you should insert" - " allow: or disallow: directives below any" - " user-agent: line" % linenumber) - entry = Entry() - state = 0 - elif state==2: - self.entries.append(entry) - entry = Entry() - state = 0 - # remove optional comment and strip line - i = string.find(line, '#') - if i>=0: - line = line[:i] - line = string.strip(line) - if not line: - continue - line = string.split(line, ':', 1) - if len(line) == 2: - line[0] = string.lower(string.strip(line[0])) - line[1] = string.strip(line[1]) - if line[0] == "user-agent": - if state==2: - _debug("line %d: warning: you should insert a blank" - " line before any user-agent" - " directive" % linenumber) - self.entries.append(entry) - entry = Entry() - entry.useragents.append(line[1]) - state = 1 - elif line[0] == "disallow": - if state==0: - _debug("line %d: error: you must insert a user-agent:" - " directive before this line" % linenumber) - else: - entry.rulelines.append(RuleLine(line[1], 0)) - state = 2 - elif line[0] == "allow": - if state==0: - _debug("line %d: error: you must insert a user-agent:" - " directive before this line" % linenumber) - else: - entry.rulelines.append(RuleLine(line[1], 1)) - else: - _debug("line %d: warning: unknown key %s" % (linenumber, - line[0])) - else: - _debug("line %d: error: malformed line %s"%(linenumber, line)) - if state==2: - self.entries.append(entry) - _debug("Parsed rules:\n%s" % str(self)) - - - def can_fetch(self, useragent, url): - """using the parsed robots.txt decide if useragent can fetch url""" - _debug("Checking robot.txt allowance for\n%s\n%s" % (useragent, url)) - if self.disallow_all: - return 0 - if self.allow_all: - return 1 - # search for given user agent matches - # the first match counts - url = urllib.quote(urlparse.urlparse(url)[2]) or "/" - for entry in self.entries: - if entry.applies_to(useragent): - return entry.allowance(url) - # agent not found ==> access granted - return 1 - - - def __str__(self): - ret = "" - for entry in self.entries: - ret = ret + str(entry) + "\n" - return ret - - -class RuleLine: - """A rule line is a single "Allow:" (allowance==1) or "Disallow:" - (allowance==0) followed by a path.""" - def __init__(self, path, allowance): - self.path = urllib.quote(path) - self.allowance = allowance - - def applies_to(self, filename): - return self.path=="*" or re.match(self.path, filename) - - def __str__(self): - return (self.allowance and "Allow" or "Disallow")+": "+self.path - - -class Entry: - """An entry has one or more user-agents and zero or more rulelines""" - def __init__(self): - self.useragents = [] - self.rulelines = [] - - def __str__(self): - ret = "" - for agent in self.useragents: - ret = ret + "User-agent: "+agent+"\n" - for line in self.rulelines: - ret = ret + str(line) + "\n" - return ret - - def applies_to(self, useragent): - """check if this entry applies to the specified agent""" - # split the name token and make it lower case - useragent = string.lower(string.split(useragent,"/")[0]) - for agent in self.useragents: - if agent=='*': - # we have the catch-all agent - return 1 - agent = string.lower(agent) - # don't forget to re.escape - if re.search(re.escape(useragent), agent): - return 1 - return 0 - - def allowance(self, filename): - """Preconditions: - - our agent applies to this entry - - filename is URL decoded""" - for line in self.rulelines: - _debug((filename, str(line), line.allowance)) - if line.applies_to(filename): - return line.allowance - return 1 - -def _check(a,b): - if a!=b: - print "failed\n" - else: - print "ok\n" - -def _test(): - global debug - import sys - rp = RobotFileParser() - debug = 1 - if len(sys.argv) <= 1: - rp.set_url('http://www.musi-cal.com/robots.txt') - rp.read() - else: - rp.parse(open(sys.argv[1]).readlines()) - # test for re.escape - _check(rp.can_fetch('*', 'http://www.musi-cal.com/'), 1) - # empty url path - _check(rp.can_fetch('*', 'http://www.musi-cal.com'), 1) - # this should match the first rule, which is a disallow - _check(rp.can_fetch('', 'http://www.musi-cal.com/'), 0) - # various cherry pickers - _check(rp.can_fetch('CherryPickerSE', - 'http://www.musi-cal.com/cgi-bin/event-search' - '?city=San+Francisco'), 0) - _check(rp.can_fetch('CherryPickerSE/1.0', - 'http://www.musi-cal.com/cgi-bin/event-search' - '?city=San+Francisco'), 0) - _check(rp.can_fetch('CherryPickerSE/1.5', - 'http://www.musi-cal.com/cgi-bin/event-search' - '?city=San+Francisco'), 0) - # case sensitivity - _check(rp.can_fetch('ExtractorPro', 'http://www.musi-cal.com/blubba'), 0) - _check(rp.can_fetch('extractorpro', 'http://www.musi-cal.com/blubba'), 0) - # substring test - _check(rp.can_fetch('toolpak/1.1', 'http://www.musi-cal.com/blubba'), 0) - # tests for catch-all * agent - _check(rp.can_fetch('spam', 'http://www.musi-cal.com/musician/me'), 0) - _check(rp.can_fetch('spam', 'http://www.musi-cal.com/Musician/me'), 1) - _check(rp.can_fetch('spam', 'http://www.musi-cal.com/'), 1) - -if __name__ == '__main__': - _test() diff --git a/linkchecker.bat b/linkchecker.bat index 4469ca3e..76bd58eb 100644 --- a/linkchecker.bat +++ b/linkchecker.bat @@ -19,4 +19,9 @@ rem uncomment the next line to enable german output rem set LC_MESSAGES=de rem uncomment the next line to enable french output rem set LC_MESSAGES=fr + +rem If you see $python or $install_scripts on the next line, then you +rem are looking at a skeleton .bat file suited only for installation. +rem Look in c:\python21\scripts or wherever Python is installed for +rem the executable .bat file. $python -O linkchecker --interactive %* diff --git a/po/Makefile b/po/Makefile index 7373541d..779f4e07 100644 --- a/po/Makefile +++ b/po/Makefile @@ -6,8 +6,10 @@ MSGFMT=$(PYTHON) $(I18NTOOLS)/msgfmt.py #MSGFMT=msgfmt MSGMERGE=msgmerge SOURCES=\ +../linkcheck/ChromeUrlData.py \ ../linkcheck/Config.py \ ../linkcheck/FileUrlData.py \ +../linkcheck/FindUrlData.py \ ../linkcheck/FtpUrlData.py \ ../linkcheck/GopherUrlData.py \ ../linkcheck/HostCheckingUrlData.py \ diff --git a/po/msgfmt.py b/po/msgfmt.py index 6744b7c7..a758f755 100755 --- a/po/msgfmt.py +++ b/po/msgfmt.py @@ -1,4 +1,4 @@ -#! /usr/bin/env python2 +#!/usr/bin/python # Written by Martin v. Löwis @@ -11,6 +11,11 @@ GNU msgfmt program, however, it is a simpler implementation. Usage: msgfmt.py [OPTIONS] filename.po Options: + -o file + --output-file=file + Specify the output file to write to. If omitted, output will go to a + file named filename.mo (based off the input file name). + -h --help Print this message and exit. @@ -18,15 +23,15 @@ Options: -V --version Display version information and exit. - """ import sys +import os import getopt import struct import array -__version__ = "1.0" +__version__ = "1.1" MESSAGES = {} @@ -91,17 +96,18 @@ def generate(): -def make(filename): +def make(filename, outfile): ID = 1 STR = 2 - # Compute .mo name from .po name + # Compute .mo name from .po name and arguments if filename.endswith('.po'): infile = filename - outfile = filename[:-2] + 'mo' else: infile = filename + '.po' - outfile = filename + '.mo' + if outfile is None: + outfile = os.path.splitext(infile)[0] + '.mo' + try: lines = open(infile).readlines() except IOError, msg: @@ -159,7 +165,6 @@ def make(filename): # Compute output output = generate() - # Save output try: open(outfile,"wb").write(output) except IOError,msg: @@ -169,10 +174,12 @@ def make(filename): def main(): try: - opts, args = getopt.getopt(sys.argv[1:], 'hV', ['help','version']) + opts, args = getopt.getopt(sys.argv[1:], 'hVo:', + ['help', 'version', 'output-file=']) except getopt.error, msg: usage(1, msg) + outfile = None # parse options for opt, arg in opts: if opt in ('-h', '--help'): @@ -180,6 +187,8 @@ def main(): elif opt in ('-V', '--version'): print >> sys.stderr, "msgfmt.py", __version__ sys.exit(0) + elif opt in ('-o', '--output-file'): + outfile = arg # do it if not args: print >> sys.stderr, 'No input file given' @@ -187,7 +196,7 @@ def main(): return for filename in args: - make(filename) + make(filename, outfile) if __name__ == '__main__': diff --git a/po/pygettext.py b/po/pygettext.py index 6dfa43fa..5d1f099a 100755 --- a/po/pygettext.py +++ b/po/pygettext.py @@ -1,18 +1,13 @@ -#! /usr/bin/env python2 +#!/usr/bin/python # Originally written by Barry Warsaw # # minimally patched to make it even more xgettext compatible # by Peter Funk +# +# even more minimalistic patched to fix the default-domain= option +# by Bastian Kleineidam -# for selftesting -try: - import fintl - _ = fintl.gettext -except ImportError: - def _(s): return s - - -__doc__ = _("""pygettext -- Python equivalent of xgettext(1) +"""pygettext -- Python equivalent of xgettext(1) Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the internationalization of C programs. Most of these tools are independent of @@ -65,7 +60,13 @@ Options: -E --escape - replace non-ASCII characters with octal escape sequences. + Replace non-ASCII characters with octal escape sequences. + + -D + --docstrings + Extract module, class, method, and function docstrings. These do not + need to be wrapped in _() markers, and in fact cannot be for Python to + consider them docstrings. -h --help @@ -93,6 +94,15 @@ Options: each msgid. The style of comments is controlled by the -S/--style option. This is the default. + -o filename + --output=filename + Rename the default output file from messages.pot to filename. If + filename is `-' then the output is sent to standard out. + + -p dir + --output-dir=dir + Output files will be placed in directory dir. + -S stylename --style stylename Specify which style to use for location comments. Two styles are @@ -103,15 +113,6 @@ Options: The style name is case insensitive. GNU style is the default. - -o filename - --output=filename - Rename the default output file from messages.pot to filename. If - filename is `-' then the output is sent to standard out. - - -p dir - --output-dir=dir - Output files will be placed in directory dir. - -v --verbose Print the names of the files being processed. @@ -132,7 +133,7 @@ Options: If `inputfile' is -, standard input is read. -""") +""" import os import sys @@ -140,7 +141,14 @@ import time import getopt import tokenize -__version__ = '1.1' +# for selftesting +try: + import fintl + _ = fintl.gettext +except ImportError: + def _(s): return s + +__version__ = '1.3' default_keywords = ['_'] DEFAULTKEYWORDS = ', '.join(default_keywords) @@ -159,7 +167,8 @@ pot_header = _('''\ msgid "" msgstr "" "Project-Id-Version: PACKAGE VERSION\\n" -"PO-Revision-Date: %(time)s\\n" +"POT-Creation-Date: %(time)s\\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" "Last-Translator: FULL NAME \\n" "Language-Team: LANGUAGE \\n" "MIME-Version: 1.0\\n" @@ -171,9 +180,9 @@ msgstr "" def usage(code, msg=''): - print __doc__ % globals() + print >> sys.stderr, _(__doc__) % globals() if msg: - print msg + print >> sys.stderr, msg sys.exit(code) @@ -239,15 +248,48 @@ class TokenEater: self.__state = self.__waiting self.__data = [] self.__lineno = -1 + self.__freshmodule = 1 def __call__(self, ttype, tstring, stup, etup, line): # dispatch +## import token +## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \ +## 'tstring:', tstring self.__state(ttype, tstring, stup[0]) def __waiting(self, ttype, tstring, lineno): + # Do docstring extractions, if enabled + if self.__options.docstrings: + # module docstring? + if self.__freshmodule: + if ttype == tokenize.STRING: + self.__addentry(safe_eval(tstring), lineno) + self.__freshmodule = 0 + elif ttype not in (tokenize.COMMENT, tokenize.NL): + self.__freshmodule = 0 + return + # class docstring? + if ttype == tokenize.NAME and tstring in ('class', 'def'): + self.__state = self.__suiteseen + return if ttype == tokenize.NAME and tstring in self.__options.keywords: self.__state = self.__keywordseen + def __suiteseen(self, ttype, tstring, lineno): + # ignore anything until we see the colon + if ttype == tokenize.OP and tstring == ':': + self.__state = self.__suitedocstring + + def __suitedocstring(self, ttype, tstring, lineno): + # ignore any intervening noise + if ttype == tokenize.STRING: + self.__addentry(safe_eval(tstring), lineno) + self.__state = self.__waiting + elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, + tokenize.COMMENT): + # there was no class docstring + self.__state = self.__waiting + def __keywordseen(self, ttype, tstring, lineno): if ttype == tokenize.OP and tstring == '(': self.__data = [] @@ -263,58 +305,54 @@ class TokenEater: # of messages seen. Reset state for the next batch. If there # were no strings inside _(), then just ignore this entry. if self.__data: - msg = EMPTYSTRING.join(self.__data) - if not msg in self.__options.toexclude: - entry = (self.__curfile, self.__lineno) - linenos = self.__messages.get(msg) - if linenos is None: - self.__messages[msg] = [entry] - else: - linenos.append(entry) + self.__addentry(EMPTYSTRING.join(self.__data)) self.__state = self.__waiting elif ttype == tokenize.STRING: self.__data.append(safe_eval(tstring)) # TBD: should we warn if we seen anything else? + def __addentry(self, msg, lineno=None): + if lineno is None: + lineno = self.__lineno + if not msg in self.__options.toexclude: + entry = (self.__curfile, lineno) + self.__messages.setdefault(msg, []).append(entry) + def set_filename(self, filename): self.__curfile = filename def write(self, fp): options = self.__options timestamp = time.ctime(time.time()) - # common header - try: - sys.stdout = fp - # The time stamp in the header doesn't have the same format - # as that generated by xgettext... - print pot_header % {'time': timestamp, 'version': __version__} - for k, v in self.__messages.items(): - if not options.writelocations: - pass - # location comments are different b/w Solaris and GNU: - elif options.locationstyle == options.SOLARIS: - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - print _('# File: %(filename)s, line: %(lineno)d') % d - elif options.locationstyle == options.GNU: - # fit as many locations on one line, as long as the - # resulting line length doesn't exceeds 'options.width' - locline = '#:' - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - s = _(' %(filename)s:%(lineno)d') % d - if len(locline) + len(s) <= options.width: - locline = locline + s - else: - print locline - locline = "#:" + s - if len(locline) > 2: - print locline - # TBD: sorting, normalizing - print 'msgid', normalize(k) - print 'msgstr ""\n' - finally: - sys.stdout = sys.__stdout__ + # The time stamp in the header doesn't have the same format as that + # generated by xgettext... + print >> fp, pot_header % {'time': timestamp, 'version': __version__} + for k, v in self.__messages.items(): + if not options.writelocations: + pass + # location comments are different b/w Solaris and GNU: + elif options.locationstyle == options.SOLARIS: + for filename, lineno in v: + d = {'filename': filename, 'lineno': lineno} + print >>fp, _('# File: %(filename)s, line: %(lineno)d') % d + elif options.locationstyle == options.GNU: + # fit as many locations on one line, as long as the + # resulting line length doesn't exceeds 'options.width' + locline = '#:' + for filename, lineno in v: + d = {'filename': filename, 'lineno': lineno} + s = _(' %(filename)s:%(lineno)d') % d + if len(locline) + len(s) <= options.width: + locline = locline + s + else: + print >> fp, locline + locline = "#:" + s + if len(locline) > 2: + print >> fp, locline + # TBD: sorting, normalizing + print >> fp, 'msgid', normalize(k) + print >> fp, 'msgstr ""\n' + def main(): @@ -322,11 +360,12 @@ def main(): try: opts, args = getopt.getopt( sys.argv[1:], - 'ad:Ehk:Kno:p:S:Vvw:x:', + 'ad:DEhk:Kno:p:S:Vvw:x:', ['extract-all', 'default-domain=', 'escape', 'help', 'keyword=', 'no-default-keywords', 'add-location', 'no-location', 'output=', 'output-dir=', 'style=', 'verbose', 'version', 'width=', 'exclude-file=', + 'docstrings', ]) except getopt.error, msg: usage(1, msg) @@ -347,6 +386,7 @@ def main(): verbose = 0 width = 78 excludefilename = '' + docstrings = 0 options = Options() locations = {'gnu' : options.GNU, @@ -363,6 +403,8 @@ def main(): options.outfile = arg + '.pot' elif opt in ('-E', '--escape'): options.escape = 1 + elif opt in ('-D', '--docstrings'): + options.docstrings = 1 elif opt in ('-k', '--keyword'): options.keywords.append(arg) elif opt in ('-K', '--no-default-keywords'): @@ -426,7 +468,11 @@ def main(): closep = 1 try: eater.set_filename(filename) - tokenize.tokenize(fp.readline, eater) + try: + tokenize.tokenize(fp.readline, eater) + except tokenize.TokenError, e: + sys.stderr.write('%s: %s, line %d, column %d\n' % + (e[0], filename, e[1][0], e[1][1])) finally: if closep: fp.close() diff --git a/setup.py b/setup.py index bc038455..6e0adf8b 100755 --- a/setup.py +++ b/setup.py @@ -94,12 +94,14 @@ class MyDistribution(Distribution): data = [] data.append('config_dir = %s' % `os.path.join(cwd, "config")`) data.append("install_data = %s" % `cwd`) - self.create_conf_file(".", data) + self.create_conf_file("", data) Distribution.run_commands(self) def create_conf_file(self, directory, data=[]): data.insert(0, "# this file is automatically created by setup.py") + if not directory: + directory = os.getcwd() filename = os.path.join(directory, self.config_file) # add metadata metanames = ("name", "version", "author", "author_email", @@ -127,7 +129,7 @@ myname = "Bastian Kleineidam" myemail = "calvin@users.sourceforge.net" setup (name = "linkchecker", - version = "1.3.10", + version = "1.3.11", description = "check HTML documents for broken links", author = myname, author_email = myemail, diff --git a/test/output/test_http b/test/output/test_http index db16b7b7..6bf1229f 100644 --- a/test/output/test_http +++ b/test/output/test_http @@ -3,9 +3,11 @@ url file:///home/calvin/projects/linkchecker/test/html/http.html valid url http://www.garantiertnixgutt.bla name bad url +warning Missing '/' at end of URL error url http://www.heise.de name ok +warning Missing '/' at end of URL valid url http:/www.heise.de name one slash @@ -38,10 +40,12 @@ valid url HtTP://WWW.hEIsE.DE cached name should be cached +warning Missing '/' at end of URL valid url HTTP://WWW.HEISE.DE cached name should be cached +warning Missing '/' at end of URL valid url http://www.heise.de/?quoted=ü name html entities