From ba3b531dc7cd1c1113a03754ed7baa0556158896 Mon Sep 17 00:00:00 2001 From: calvin Date: Sun, 18 Feb 2001 22:10:18 +0000 Subject: [PATCH] dist git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@230 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- FAQ | 11 + debian/control | 21 +- po/msgfmt.py | 194 ------------------ po/pygettext.py | 453 ------------------------------------------ test/output/test_base | 34 +++- 5 files changed, 50 insertions(+), 663 deletions(-) delete mode 100755 po/msgfmt.py delete mode 100755 po/pygettext.py diff --git a/FAQ b/FAQ index 75992fd7..e29a0b0c 100644 --- a/FAQ +++ b/FAQ @@ -1,3 +1,14 @@ +Q: LinkChecker produced an error, but my web page is ok with + Netscape/IE/Opera/... + Is this a bug in LinkChecker? +A: Please check your web pages first. Are they really ok? Use + a syntax highlighting Editor!! Use HTML Tidy!!! + Ah, this is a good idea, I will not debug ANY web pages + which produce any output with the official HTML validator + "HTML Tidy" from www.w3c.org. + Check if the web server is accepting HEAD requests as well. + + Q: The link "mailto:john@company.com?subject=Hello John" is reported as an error. A: You have to quote special characters (e.g. spaces) in the subject field. diff --git a/debian/control b/debian/control index 1dc0a14b..d672b8e9 100644 --- a/debian/control +++ b/debian/control @@ -1,22 +1,22 @@ Source: linkchecker Section: web Priority: optional -Maintainer: Bastian Kleineidam -Build-Depends: python2-base (>= 2.0), python2-base (<= 2.0), python2-dev (>= 1.5.2), python2-dev (<= 2.0), debhelper (>= 3.0.0) +Maintainer: Bastian Kleineidam +Build-Depends: python2-base (>= 2.0), python2-dev (>= 2.0), debhelper (>= 3.0.0), libssl096-dev Build-Depends-Indep: gettext -Standards-Version: 3.2.1 +Standards-Version: 3.5.1 Package: linkchecker -Architecture: any -Depends: python2-base (>= 2.0), python-base (<= 2.0) -Suggests: libssl09|libssl095a, httpd +Architecture: all +Depends: python2-base (>= 2.0) +Suggests: linkchecker-ssl Description: check HTML documents for broken links Features: o recursive checking o multithreaded o output can be colored or normal text, HTML, SQL, CSV or a sitemap graph in GML or XML - o HTTP/1.1, HTTPS, FTP, mailto:, nntp:, news:, Gopher, Telnet and local + o HTTP/1.1, FTP, mailto:, nntp:, news:, Gopher, Telnet and local file links are supported o restrict link checking with regular expression filters for URLs o proxy support @@ -25,3 +25,10 @@ Description: check HTML documents for broken links o i18n support o command line interface o (Fast)CGI web interface (requires HTTP server) + +Package: linkchecker-ssl +Architecture: any +Depends: linkchecker (>= 1.3.0) +Description: HTTPS support for LinkChecker + Includes the Python modules 'ssl' and 'httpslib' to support https:// + links. diff --git a/po/msgfmt.py b/po/msgfmt.py deleted file mode 100755 index 6a396497..00000000 --- a/po/msgfmt.py +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python2 - -# Written by Martin v. Löwis - -"""Generate binary message catalog from textual translation description. - -This program converts a textual Uniforum-style message catalog (.po file) into -a binary GNU catalog (.mo file). This is essentially the same function as the -GNU msgfmt program, however, it is a simpler implementation. - -Usage: msgfmt.py [OPTIONS] filename.po - -Options: - -h - --help - Print this message and exit. - - -V - --version - Display version information and exit. - -""" - -import sys -import getopt -import struct -import array - -__version__ = "1.0" - -MESSAGES = {} - - - -def usage(code, msg=''): - print >> sys.stderr, __doc__ - if msg: - print >> sys.stderr, msg - sys.exit(code) - - - -def add(id, str, fuzzy): - "Add a non-fuzzy translation to the dictionary." - global MESSAGES - if not fuzzy and str: - MESSAGES[id] = str - - - -def generate(): - "Return the generated output." - global MESSAGES - keys = MESSAGES.keys() - # the keys are sorted in the .mo file - keys.sort() - offsets = [] - ids = strs = '' - for id in keys: - # For each string, we need size and file offset. Each string is NUL - # terminated; the NUL does not count into the size. - offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) - ids += id + '\0' - strs += MESSAGES[id] + '\0' - output = '' - # The header is 7 32-bit unsigned integers. We don't use hash tables, so - # the keys start right after the index tables. - # translated string. - keystart = 7*4+16*len(keys) - # and the values start after the keys - valuestart = keystart + len(ids) - koffsets = [] - voffsets = [] - # The string table first has the list of keys, then the list of values. - # Each entry has first the size of the string, then the file offset. - for o1, l1, o2, l2 in offsets: - koffsets += [l1, o1+keystart] - voffsets += [l2, o2+valuestart] - offsets = koffsets + voffsets - output = struct.pack("iiiiiii", - 0x950412de, # Magic - 0, # Version - len(keys), # # of entries - 7*4, # start of key index - 7*4+len(keys)*8, # start of value index - 0, 0) # size and offset of hash table - output += array.array("i", offsets).tostring() - output += ids - output += strs - return output - - - -def make(filename): - ID = 1 - STR = 2 - - # Compute .mo name from .po name - if filename.endswith('.po'): - infile = filename - outfile = filename[:-2] + 'mo' - else: - infile = filename + '.po' - outfile = filename + '.mo' - try: - lines = open(infile).readlines() - except IOError, msg: - print >> sys.stderr, msg - sys.exit(1) - - section = None - fuzzy = 0 - - # Parse the catalog - lno = 0 - for l in lines: - lno += 1 - # If we get a comment line after a msgstr, this is a new entry - if l[0] == '#' and section == STR: - add(msgid, msgstr, fuzzy) - section = None - fuzzy = 0 - # Record a fuzzy mark - if l[:2] == '#,' and l.find('fuzzy'): - fuzzy = 1 - # Skip comments - if l[0] == '#': - continue - # Now we are in a msgid section, output previous section - if l.startswith('msgid'): - if section == STR: - add(msgid, msgstr, fuzzy) - section = ID - l = l[5:] - msgid = msgstr = '' - # Now we are in a msgstr section - elif l.startswith('msgstr'): - section = STR - l = l[6:] - # Skip empty lines - l = l.strip() - if not l: - continue - # XXX: Does this always follow Python escape semantics? - l = eval(l) - if section == ID: - msgid += l - elif section == STR: - msgstr += l - else: - print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ - 'before:' - print >> sys.stderr, l - sys.exit(1) - # Add last entry - if section == STR: - add(msgid, msgstr, fuzzy) - - # Compute output - output = generate() - - # Save output - try: - open(outfile,"wb").write(output) - except IOError,msg: - print >> sys.stderr, msg - - - -def main(): - try: - opts, args = getopt.getopt(sys.argv[1:], 'hV', ['help','version']) - except getopt.error, msg: - usage(1, msg) - - # parse options - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-V', '--version'): - print >> sys.stderr, "msgfmt.py", __version__ - sys.exit(0) - # do it - if not args: - print >> sys.stderr, 'No input file given' - print >> sys.stderr, "Try `msgfmt --help' for more information." - return - - for filename in args: - make(filename) - - -if __name__ == '__main__': - main() diff --git a/po/pygettext.py b/po/pygettext.py deleted file mode 100755 index 6dfa43fa..00000000 --- a/po/pygettext.py +++ /dev/null @@ -1,453 +0,0 @@ -#! /usr/bin/env python2 -# Originally written by Barry Warsaw -# -# minimally patched to make it even more xgettext compatible -# by Peter Funk - -# for selftesting -try: - import fintl - _ = fintl.gettext -except ImportError: - def _(s): return s - - -__doc__ = _("""pygettext -- Python equivalent of xgettext(1) - -Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the -internationalization of C programs. Most of these tools are independent of -the programming language and can be used from within Python programs. Martin -von Loewis' work[1] helps considerably in this regard. - -There's one problem though; xgettext is the program that scans source code -looking for message strings, but it groks only C (or C++). Python introduces -a few wrinkles, such as dual quoting characters, triple quoted strings, and -raw strings. xgettext understands none of this. - -Enter pygettext, which uses Python's standard tokenize module to scan Python -source code, generating .pot files identical to what GNU xgettext[2] generates -for C and C++ code. From there, the standard GNU tools can be used. - -A word about marking Python strings as candidates for translation. GNU -xgettext recognizes the following keywords: gettext, dgettext, dcgettext, and -gettext_noop. But those can be a lot of text to include all over your code. -C and C++ have a trick: they use the C preprocessor. Most internationalized C -source includes a #define for gettext() to _() so that what has to be written -in the source is much less. Thus these are both translatable strings: - - gettext("Translatable String") - _("Translatable String") - -Python of course has no preprocessor so this doesn't work so well. Thus, -pygettext searches only for _() by default, but see the -k/--keyword flag -below for how to augment this. - - [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html - [2] http://www.gnu.org/software/gettext/gettext.html - -NOTE: pygettext attempts to be option and feature compatible with GNU xgettext -where ever possible. However some options are still missing or are not fully -implemented. Also, xgettext's use of command line switches with option -arguments is broken, and in these cases, pygettext just defines additional -switches. - -Usage: pygettext [options] inputfile ... - -Options: - - -a - --extract-all - Extract all strings - - -d name - --default-domain=name - Rename the default output file from messages.pot to name.pot - - -E - --escape - replace non-ASCII characters with octal escape sequences. - - -h - --help - print this help message and exit - - -k word - --keyword=word - Keywords to look for in addition to the default set, which are: - %(DEFAULTKEYWORDS)s - - You can have multiple -k flags on the command line. - - -K - --no-default-keywords - Disable the default set of keywords (see above). Any keywords - explicitly added with the -k/--keyword option are still recognized. - - --no-location - Do not write filename/lineno location comments. - - -n - --add-location - Write filename/lineno location comments indicating where each - extracted string is found in the source. These lines appear before - each msgid. The style of comments is controlled by the -S/--style - option. This is the default. - - -S stylename - --style stylename - Specify which style to use for location comments. Two styles are - supported: - - Solaris # File: filename, line: line-number - GNU #: filename:line - - The style name is case insensitive. GNU style is the default. - - -o filename - --output=filename - Rename the default output file from messages.pot to filename. If - filename is `-' then the output is sent to standard out. - - -p dir - --output-dir=dir - Output files will be placed in directory dir. - - -v - --verbose - Print the names of the files being processed. - - -V - --version - Print the version of pygettext and exit. - - -w columns - --width=columns - Set width of output to columns. - - -x filename - --exclude-file=filename - Specify a file that contains a list of strings that are not be - extracted from the input files. Each string to be excluded must - appear on a line by itself in the file. - -If `inputfile' is -, standard input is read. - -""") - -import os -import sys -import time -import getopt -import tokenize - -__version__ = '1.1' - -default_keywords = ['_'] -DEFAULTKEYWORDS = ', '.join(default_keywords) - -EMPTYSTRING = '' - - - -# The normal pot-file header. msgmerge and EMACS' po-mode work better if -# it's there. -pot_header = _('''\ -# SOME DESCRIPTIVE TITLE. -# Copyright (C) YEAR ORGANIZATION -# FIRST AUTHOR , YEAR. -# -msgid "" -msgstr "" -"Project-Id-Version: PACKAGE VERSION\\n" -"PO-Revision-Date: %(time)s\\n" -"Last-Translator: FULL NAME \\n" -"Language-Team: LANGUAGE \\n" -"MIME-Version: 1.0\\n" -"Content-Type: text/plain; charset=CHARSET\\n" -"Content-Transfer-Encoding: ENCODING\\n" -"Generated-By: pygettext.py %(version)s\\n" - -''') - - -def usage(code, msg=''): - print __doc__ % globals() - if msg: - print msg - sys.exit(code) - - - -escapes = [] - -def make_escapes(pass_iso8859): - global escapes - if pass_iso8859: - # Allow iso-8859 characters to pass through so that e.g. 'msgid - # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we - # escape any character outside the 32..126 range. - mod = 128 - else: - mod = 256 - for i in range(256): - if 32 <= (i % mod) <= 126: - escapes.append(chr(i)) - else: - escapes.append("\\%03o" % i) - escapes[ord('\\')] = '\\\\' - escapes[ord('\t')] = '\\t' - escapes[ord('\r')] = '\\r' - escapes[ord('\n')] = '\\n' - escapes[ord('\"')] = '\\"' - - -def escape(s): - global escapes - s = list(s) - for i in range(len(s)): - s[i] = escapes[ord(s[i])] - return EMPTYSTRING.join(s) - - -def safe_eval(s): - # unwrap quotes, safely - return eval(s, {'__builtins__':{}}, {}) - - -def normalize(s): - # This converts the various Python string types into a format that is - # appropriate for .po files, namely much closer to C style. - lines = s.split('\n') - if len(lines) == 1: - s = '"' + escape(s) + '"' - else: - if not lines[-1]: - del lines[-1] - lines[-1] = lines[-1] + '\n' - for i in range(len(lines)): - lines[i] = escape(lines[i]) - lineterm = '\\n"\n"' - s = '""\n"' + lineterm.join(lines) + '"' - return s - - - -class TokenEater: - def __init__(self, options): - self.__options = options - self.__messages = {} - self.__state = self.__waiting - self.__data = [] - self.__lineno = -1 - - def __call__(self, ttype, tstring, stup, etup, line): - # dispatch - self.__state(ttype, tstring, stup[0]) - - def __waiting(self, ttype, tstring, lineno): - if ttype == tokenize.NAME and tstring in self.__options.keywords: - self.__state = self.__keywordseen - - def __keywordseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == '(': - self.__data = [] - self.__lineno = lineno - self.__state = self.__openseen - else: - self.__state = self.__waiting - - def __openseen(self, ttype, tstring, lineno): - if ttype == tokenize.OP and tstring == ')': - # We've seen the last of the translatable strings. Record the - # line number of the first line of the strings and update the list - # of messages seen. Reset state for the next batch. If there - # were no strings inside _(), then just ignore this entry. - if self.__data: - msg = EMPTYSTRING.join(self.__data) - if not msg in self.__options.toexclude: - entry = (self.__curfile, self.__lineno) - linenos = self.__messages.get(msg) - if linenos is None: - self.__messages[msg] = [entry] - else: - linenos.append(entry) - self.__state = self.__waiting - elif ttype == tokenize.STRING: - self.__data.append(safe_eval(tstring)) - # TBD: should we warn if we seen anything else? - - def set_filename(self, filename): - self.__curfile = filename - - def write(self, fp): - options = self.__options - timestamp = time.ctime(time.time()) - # common header - try: - sys.stdout = fp - # The time stamp in the header doesn't have the same format - # as that generated by xgettext... - print pot_header % {'time': timestamp, 'version': __version__} - for k, v in self.__messages.items(): - if not options.writelocations: - pass - # location comments are different b/w Solaris and GNU: - elif options.locationstyle == options.SOLARIS: - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - print _('# File: %(filename)s, line: %(lineno)d') % d - elif options.locationstyle == options.GNU: - # fit as many locations on one line, as long as the - # resulting line length doesn't exceeds 'options.width' - locline = '#:' - for filename, lineno in v: - d = {'filename': filename, 'lineno': lineno} - s = _(' %(filename)s:%(lineno)d') % d - if len(locline) + len(s) <= options.width: - locline = locline + s - else: - print locline - locline = "#:" + s - if len(locline) > 2: - print locline - # TBD: sorting, normalizing - print 'msgid', normalize(k) - print 'msgstr ""\n' - finally: - sys.stdout = sys.__stdout__ - - -def main(): - global default_keywords - try: - opts, args = getopt.getopt( - sys.argv[1:], - 'ad:Ehk:Kno:p:S:Vvw:x:', - ['extract-all', 'default-domain=', 'escape', 'help', - 'keyword=', 'no-default-keywords', - 'add-location', 'no-location', 'output=', 'output-dir=', - 'style=', 'verbose', 'version', 'width=', 'exclude-file=', - ]) - except getopt.error, msg: - usage(1, msg) - - # for holding option values - class Options: - # constants - GNU = 1 - SOLARIS = 2 - # defaults - extractall = 0 # FIXME: currently this option has no effect at all. - escape = 0 - keywords = [] - outpath = '' - outfile = 'messages.pot' - writelocations = 1 - locationstyle = GNU - verbose = 0 - width = 78 - excludefilename = '' - - options = Options() - locations = {'gnu' : options.GNU, - 'solaris' : options.SOLARIS, - } - - # parse options - for opt, arg in opts: - if opt in ('-h', '--help'): - usage(0) - elif opt in ('-a', '--extract-all'): - options.extractall = 1 - elif opt in ('-d', '--default-domain'): - options.outfile = arg + '.pot' - elif opt in ('-E', '--escape'): - options.escape = 1 - elif opt in ('-k', '--keyword'): - options.keywords.append(arg) - elif opt in ('-K', '--no-default-keywords'): - default_keywords = [] - elif opt in ('-n', '--add-location'): - options.writelocations = 1 - elif opt in ('--no-location',): - options.writelocations = 0 - elif opt in ('-S', '--style'): - options.locationstyle = locations.get(arg.lower()) - if options.locationstyle is None: - usage(1, _('Invalid value for --style: %s') % arg) - elif opt in ('-o', '--output'): - options.outfile = arg - elif opt in ('-p', '--output-dir'): - options.outpath = arg - elif opt in ('-v', '--verbose'): - options.verbose = 1 - elif opt in ('-V', '--version'): - print _('pygettext.py (xgettext for Python) %s') % __version__ - sys.exit(0) - elif opt in ('-w', '--width'): - try: - options.width = int(arg) - except ValueError: - usage(1, _('--width argument must be an integer: %s') % arg) - elif opt in ('-x', '--exclude-file'): - options.excludefilename = arg - - # calculate escapes - make_escapes(options.escape) - - # calculate all keywords - options.keywords.extend(default_keywords) - - # initialize list of strings to exclude - if options.excludefilename: - try: - fp = open(options.excludefilename) - options.toexclude = fp.readlines() - fp.close() - except IOError: - sys.stderr.write(_("Can't read --exclude-file: %s") % - options.excludefilename) - sys.exit(1) - else: - options.toexclude = [] - - # slurp through all the files - eater = TokenEater(options) - for filename in args: - if filename == '-': - if options.verbose: - print _('Reading standard input') - fp = sys.stdin - closep = 0 - else: - if options.verbose: - print _('Working on %s') % filename - fp = open(filename) - closep = 1 - try: - eater.set_filename(filename) - tokenize.tokenize(fp.readline, eater) - finally: - if closep: - fp.close() - - # write the output - if options.outfile == '-': - fp = sys.stdout - closep = 0 - else: - if options.outpath: - options.outfile = os.path.join(options.outpath, options.outfile) - fp = open(options.outfile, 'w') - closep = 1 - try: - eater.write(fp) - finally: - if closep: - fp.close() - - -if __name__ == '__main__': - main() - # some more test strings - _(u'a unicode string') diff --git a/test/output/test_base b/test/output/test_base index 316df216..01583d8f 100644 --- a/test/output/test_base +++ b/test/output/test_base @@ -1,10 +1,26 @@ test_base -url file:///home/calvin/projects/linkchecker/test/output/base1.html -realurl file:/home/calvin/projects/linkchecker/test/output/base1.html -error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base1.html' -url file:///home/calvin/projects/linkchecker/test/output/base2.html -realurl file:/home/calvin/projects/linkchecker/test/output/base2.html -error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base2.html' -url file:///home/calvin/projects/linkchecker/test/output/base3.html -realurl file:/home/calvin/projects/linkchecker/test/output/base3.html -error Error: [Errno 2] No such file or directory: '/home/calvin/projects/linkchecker/test/output/base3.html' +url file:///home/calvin/projects/linkchecker/test/html/base1.html +valid Valid +url file:///home/calvin/projects/linkchecker/test/html/base2.html +valid Valid +url file:///home/calvin/projects/linkchecker/test/html/base3.html +valid Valid +url file:/etc +parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html +line 6 +valid Valid +url http://www.calvinandhobbes.com/ +parenturl file:/home/calvin/projects/linkchecker/test/html/base1.html +line 4 +warning Effective URL http://www.ucomics.com/calvinandhobbes/ +valid Valid: 200 OK +url passwd +parenturl file:/home/calvin/projects/linkchecker/test/html/base2.html +line 4 +baseurl file:/etc/ +valid Valid +url blubba.shtml +parenturl file:/home/calvin/projects/linkchecker/test/html/base3.html +line 4 +baseurl http://treasure.calvinsplayground.de/~calvin/ +error Error: 404 Not Found