updated tests

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@324 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2001-11-29 13:49:52 +00:00
parent 63ec8723e2
commit 14c9cbc4c4
12 changed files with 205 additions and 566 deletions

48
INSTALL
View file

@ -13,17 +13,49 @@ Python!
Setup
-----
Run "python setup.py build" to build.
Run "python setup.py install" to install.
Run "python setup.py --help" for help.
After installation, the system wide configuration file is in
<prefix>/share/linkchecker/linkcheckerrc.
The local configuration file is ~/.linkcheckerrc on Unix platforms.
To run the program type "linkchecker" followed by your URLs you want
to check.
Type "linkchecker -h" for help.
o Unix platforms
- open a commandline window and change to the linkchecker-x.x.x
directory
- run "python setup.py install" to install
For help on setup.py options, run "python setup.py --help".
The local configuration file is $HOME/.linkcheckerrc
To run the program type "linkchecker" followed by your URLs you want
to check.
Type "linkchecker -h" for help.
o Windows platforms
- make sure that python.exe is in your PATH
- open a commandline window (cmd.exe) and change to the
linkchecker-x.x.x directory
- run "python.exe setup.py install" to install
For help on setup.py options, run "python.exe setup.py --help".
To run the program, change to the scripts directory of your python
installation and type "linkchecker.bat" followed by your URLs you
want to check.
Type "linkchecker.bat -h" for help.
o MacOS 9.x platforms
- open the Python IDE
- open the setup.py file
- run it (click on "Run all")
- in the popup window, select the "install" command and click "Add"
- click "Ok"; this will copy files into the Python folder
Read the MacOS Python documentation to find out about passing
commandline options to Python scripts.
o MacOS X platforms
- not tested
(Fast)CGI web interface

10
debian/changelog vendored
View file

@ -1,3 +1,13 @@
linkchecker (1.3.11) unstable; urgency=low
* setup.py: use os.getcwd(), not "." which breaks on MacOS 9.x
* added platform-specific install instructions
* use Pythons internal gettext module, get rid of fintl.py
* use Pythons internal robot.txt parser, get rid of
robotparser2.py
-- Bastian Kleineidam <calvin@debian.org> Wed, 28 Nov 2001 17:57:20 +0100
linkchecker (1.3.10) unstable; urgency=low
* use Pythons builtin HTTPS support

View file

@ -16,7 +16,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import httplib, urlparse, sys, time, re
import Config, StringUtil, robotparser2
import Config, StringUtil, robotparser
from UrlData import UrlData
from urllib import splittype, splithost, splituser, splitpasswd
from linkcheck import _
@ -251,7 +251,7 @@ class HttpUrlData(UrlData):
def robotsTxtAllowsUrl(self, config):
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
if not config.robotsTxtCache_has_key(roboturl):
rp = robotparser2.RobotFileParser(roboturl)
rp = robotparser.RobotFileParser(roboturl)
rp.read()
config.robotsTxtCache_set(roboturl, rp)
rp = config.robotsTxtCache_get(roboturl)

View file

@ -19,28 +19,15 @@ class error(Exception):
pass
# i18n suppport
LANG="EN" # default language (used for HTML output)
import _linkchecker_configdata
try:
import fintl,os,string
gettext = fintl.gettext
import gettext
domain = 'linkcheck'
localedir = os.path.join(_linkchecker_configdata.install_data, 'locale')
fintl.bindtextdomain(domain, localedir)
fintl.textdomain(domain)
languages = []
for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
if os.environ.has_key(envvar):
languages = string.split(os.environ[envvar], ':')
break
if languages:
LANG=string.upper(languages[0])
except ImportError:
def gettext(msg):
return msg
# set _ as an alias for gettext
_ = gettext
t = gettext.translation(domain, localedir)
_ = t.gettext
except IOError:
_ = lambda s: s
#import timeoutsocket
#timeoutsocket.setDefaultSocketTimeout(20)

View file

@ -1,208 +0,0 @@
## vim:ts=4:et:nowrap
"""i18n (multiple language) support. Reads .mo files from GNU gettext msgfmt
If you want to prepare your Python programs for i18n you could simply
add the following lines to the top of a BASIC_MAIN module of your py-program:
try:
import fintl
gettext = fintl.gettext
fintl.bindtextdomain(YOUR_PROGRAM, YOUR_LOCALEDIR)
fintl.textdomain(YOUR_PROGRAM)
except ImportError:
def gettext(msg):
return msg
_ = gettext
and/or also add the following to the top of any module containing messages:
import BASIC_MAIN
_ = BASIC_MAIN.gettext
Now you could use _("....") everywhere instead of "...." for message texts.
Once you have written your internationalized program, you can use
the suite of utility programs contained in the GNU gettext package to aid
the translation into other languages.
You ARE NOT REQUIRED to release the sourcecode of your program, since
linking of your program against GPL code is avoided by this module.
Although it is possible to use the GNU gettext library by using the
*intl.so* module written by Martin von Löwis if this is available. But it is
not required to use it in the first place.
"""
# Copyright 1999 by <mailto: pf@artcom-gmbh.de> (Peter Funk)
#
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted,
# provided that the above copyright notice appear in all copies.
# ArtCom GmbH AND Peter Funk DISCLAIMS ALL WARRANTIES WITH REGARD TO
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL ArtCom GmBH or Peter Funk BE LIABLE
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
# OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
_default_localedir = '/usr/share/locale'
_default_domain = 'python'
# check out, if Martin v. Löwis 'intl' module interface to the GNU gettext
# library is available and use it only, if it is available:
try:
from intl import *
except ImportError:
# now do what the gettext library provides in pure Python:
error = 'fintl.error'
# some globals preserving state:
_languages = []
_default_mo = None # This is default message outfile used by 'gettext'
_loaded_mos = {} # This is a dictionary of loaded message output files
# some small little helper routines:
def _check_env():
"""examine language enviroment variables and return list of languages"""
# TODO: This should somehow try to find out locale information on
# Non-unix platforms like WinXX and MacOS. Suggestions welcome!
languages = []
import os, string
for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
if os.environ.has_key(envvar):
languages = string.split(os.environ[envvar], ':')
break
# use locale 'C' as default fallback:
if 'C' not in _languages:
languages.append('C')
return languages
# Utility function used to decode binary .mo file header and seek tables:
def _decode_Word(bin):
# This assumes little endian (intel, vax) byte order.
return ord(bin[0]) + (ord(bin[1]) << 8) + \
(ord(bin[2]) << 16) + (ord(bin[3]) << 24)
# Now the methods designed to be used from outside:
def gettext(message):
"""return localized version of a 'message' string"""
if _default_mo is None:
textdomain()
return _default_mo.gettext(message)
_ = gettext
def dgettext(domain, message):
"""like gettext but looks up 'message' in a special 'domain'"""
# This may useful for larger software systems
if not _loaded_mos.has_key(domain):
raise error, "No '" + domain + "' message domain"
return _loaded_mos[domain].gettext(message)
class _MoDict:
"""read a .mo file into a python dictionary"""
__MO_MAGIC = 0x950412de # Magic number of .mo files
def __init__(self, domain=_default_domain, localedir=_default_localedir):
global _languages
self.catalog = {}
self.domain = domain
self.localedir = localedir
# delayed access to environment variables:
if not _languages:
_languages = _check_env()
for self.lang in _languages:
if self.lang == 'C':
return
mo_filename = "%s//%s/LC_MESSAGES/%s.mo" % (
localedir, self.lang, domain)
try:
buffer = open(mo_filename, "rb").read()
break
except IOError:
pass
else:
return # assume C locale
# Decode the header of the .mo file (5 little endian 32 bit words):
if _decode_Word(buffer[:4]) != self.__MO_MAGIC :
raise error, '%s seems not be a valid .mo file' % mo_filename
self.mo_version = _decode_Word(buffer[4:8])
num_messages = _decode_Word(buffer[8:12])
master_index = _decode_Word(buffer[12:16])
transl_index = _decode_Word(buffer[16:20])
buf_len = len(buffer)
# now put all messages from the .mo file buffer in the catalog dict:
for i in xrange(0, num_messages):
start_master= _decode_Word(buffer[master_index+4:master_index+8])
end_master = start_master + \
_decode_Word(buffer[master_index:master_index+4])
start_transl= _decode_Word(buffer[transl_index+4:transl_index+8])
end_transl = start_transl + \
_decode_Word(buffer[transl_index:transl_index+4])
if end_master <= buf_len and end_transl <= buf_len:
self.catalog[buffer[start_master:end_master]]=\
buffer[start_transl:end_transl]
else:
raise error, ".mo file '%s' is corrupt" % mo_filename
# advance to the next entry in seek tables:
master_index += 8
transl_index += 8
def gettext(self, message):
"""return the translation of a given message"""
try:
return self.catalog[message]
except KeyError:
return message
# _MoDict instances may be also accessed using mo[msg] or mo(msg):
__getitem = gettext
__call__ = gettext
def textdomain(domain=_default_domain):
"""Sets the 'domain' to be used by this program. Defaults to 'python'"""
global _default_mo
if not _loaded_mos.has_key(domain):
_loaded_mos[domain] = _MoDict(domain)
_default_mo = _loaded_mos[domain]
def bindtextdomain(domain, localedir=_default_localedir):
global _default_mo
if not _loaded_mos.has_key(domain):
_loaded_mos[domain] = _MoDict(domain, localedir)
if _default_mo is not None:
_default_mo = _loaded_mos[domain]
def translator(domain=_default_domain, localedir=_default_localedir):
"""returns a gettext compatible function object
which is bound to the domain given as parameter"""
pass # TODO implement this
def _testdriver(argv):
message = ""
domain = _default_domain
localedir = _default_localedir
if len(argv) > 1:
message = argv[1]
if len(argv) > 2:
domain = argv[2]
if len(argv) > 3:
localedir = argv[3]
# now perform some testing of this module:
bindtextdomain(domain, localedir)
textdomain(domain)
info = gettext('') # this is where special info is often stored
if info:
print ".mo file for domain %s in %s contains:" % (domain, localedir)
print info
else:
print ".mo file contains no info"
if message:
print "Translation of '"+ message+ "' is '"+ _(message)+ "'"
else:
for msg in ("Cancel", "No", "OK", "Quit", "Yes"):
print "Translation of '"+ msg + "' is '"+ _(msg)+ "'"
if __name__ == '__main__':
import sys
if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "-?"):
print "Usage :", sys.argv[0], "[ MESSAGE [ DOMAIN [ LOCALEDIR ]]]"
_testdriver(sys.argv)

View file

@ -1,250 +0,0 @@
""" implements the robots.txt inclusion protocol
Copyright (C) 2000 Bastian Kleineidam
You can choose between two licenses when using this package:
1) GNU GPLv2
2) PYTHON 2.0 OPEN SOURCE LICENSE
The robots.txt Exclusion Protocol is implemented as specified in
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
"""
import re,string,urlparse,urllib
__all__ = ['RobotFileParser']
debug = 0
def _debug(msg):
if debug: print msg
class RobotFileParser:
def __init__(self, url=''):
self.entries = []
self.disallow_all = 0
self.allow_all = 0
self.set_url(url)
self.last_checked = 0
def mtime(self):
return self.last_checked
def modified(self):
import time
self.last_checked = time.time()
def set_url(self, url):
self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3]
def read(self):
import httplib
tries = 0
# limit number of redirections to 5
while tries<5:
connection = httplib.HTTP(self.host)
connection.putrequest("GET", self.path)
connection.putheader("Host", self.host)
connection.endheaders()
status, text, mime = connection.getreply()
if status in [301,302] and mime:
tries = tries + 1
newurl = mime.get("Location", mime.get("Uri", ""))
newurl = urlparse.urljoin(self.url, newurl)
self.set_url(newurl)
else:
break
if status==401 or status==403:
self.disallow_all = 1
elif status>=400:
self.allow_all = 1
else:
# status < 400
self.parse(connection.getfile().readlines())
def parse(self, lines):
"""parse the input lines from a robot.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines."""
state = 0
linenumber = 0
entry = Entry()
for line in lines:
line = string.strip(line)
linenumber = linenumber + 1
if not line:
if state==1:
_debug("line %d: warning: you should insert"
" allow: or disallow: directives below any"
" user-agent: line" % linenumber)
entry = Entry()
state = 0
elif state==2:
self.entries.append(entry)
entry = Entry()
state = 0
# remove optional comment and strip line
i = string.find(line, '#')
if i>=0:
line = line[:i]
line = string.strip(line)
if not line:
continue
line = string.split(line, ':', 1)
if len(line) == 2:
line[0] = string.lower(string.strip(line[0]))
line[1] = string.strip(line[1])
if line[0] == "user-agent":
if state==2:
_debug("line %d: warning: you should insert a blank"
" line before any user-agent"
" directive" % linenumber)
self.entries.append(entry)
entry = Entry()
entry.useragents.append(line[1])
state = 1
elif line[0] == "disallow":
if state==0:
_debug("line %d: error: you must insert a user-agent:"
" directive before this line" % linenumber)
else:
entry.rulelines.append(RuleLine(line[1], 0))
state = 2
elif line[0] == "allow":
if state==0:
_debug("line %d: error: you must insert a user-agent:"
" directive before this line" % linenumber)
else:
entry.rulelines.append(RuleLine(line[1], 1))
else:
_debug("line %d: warning: unknown key %s" % (linenumber,
line[0]))
else:
_debug("line %d: error: malformed line %s"%(linenumber, line))
if state==2:
self.entries.append(entry)
_debug("Parsed rules:\n%s" % str(self))
def can_fetch(self, useragent, url):
"""using the parsed robots.txt decide if useragent can fetch url"""
_debug("Checking robot.txt allowance for\n%s\n%s" % (useragent, url))
if self.disallow_all:
return 0
if self.allow_all:
return 1
# search for given user agent matches
# the first match counts
url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
# agent not found ==> access granted
return 1
def __str__(self):
ret = ""
for entry in self.entries:
ret = ret + str(entry) + "\n"
return ret
class RuleLine:
"""A rule line is a single "Allow:" (allowance==1) or "Disallow:"
(allowance==0) followed by a path."""
def __init__(self, path, allowance):
self.path = urllib.quote(path)
self.allowance = allowance
def applies_to(self, filename):
return self.path=="*" or re.match(self.path, filename)
def __str__(self):
return (self.allowance and "Allow" or "Disallow")+": "+self.path
class Entry:
"""An entry has one or more user-agents and zero or more rulelines"""
def __init__(self):
self.useragents = []
self.rulelines = []
def __str__(self):
ret = ""
for agent in self.useragents:
ret = ret + "User-agent: "+agent+"\n"
for line in self.rulelines:
ret = ret + str(line) + "\n"
return ret
def applies_to(self, useragent):
"""check if this entry applies to the specified agent"""
# split the name token and make it lower case
useragent = string.lower(string.split(useragent,"/")[0])
for agent in self.useragents:
if agent=='*':
# we have the catch-all agent
return 1
agent = string.lower(agent)
# don't forget to re.escape
if re.search(re.escape(useragent), agent):
return 1
return 0
def allowance(self, filename):
"""Preconditions:
- our agent applies to this entry
- filename is URL decoded"""
for line in self.rulelines:
_debug((filename, str(line), line.allowance))
if line.applies_to(filename):
return line.allowance
return 1
def _check(a,b):
if a!=b:
print "failed\n"
else:
print "ok\n"
def _test():
global debug
import sys
rp = RobotFileParser()
debug = 1
if len(sys.argv) <= 1:
rp.set_url('http://www.musi-cal.com/robots.txt')
rp.read()
else:
rp.parse(open(sys.argv[1]).readlines())
# test for re.escape
_check(rp.can_fetch('*', 'http://www.musi-cal.com/'), 1)
# empty url path
_check(rp.can_fetch('*', 'http://www.musi-cal.com'), 1)
# this should match the first rule, which is a disallow
_check(rp.can_fetch('', 'http://www.musi-cal.com/'), 0)
# various cherry pickers
_check(rp.can_fetch('CherryPickerSE',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco'), 0)
_check(rp.can_fetch('CherryPickerSE/1.0',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco'), 0)
_check(rp.can_fetch('CherryPickerSE/1.5',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco'), 0)
# case sensitivity
_check(rp.can_fetch('ExtractorPro', 'http://www.musi-cal.com/blubba'), 0)
_check(rp.can_fetch('extractorpro', 'http://www.musi-cal.com/blubba'), 0)
# substring test
_check(rp.can_fetch('toolpak/1.1', 'http://www.musi-cal.com/blubba'), 0)
# tests for catch-all * agent
_check(rp.can_fetch('spam', 'http://www.musi-cal.com/musician/me'), 0)
_check(rp.can_fetch('spam', 'http://www.musi-cal.com/Musician/me'), 1)
_check(rp.can_fetch('spam', 'http://www.musi-cal.com/'), 1)
if __name__ == '__main__':
_test()

View file

@ -19,4 +19,9 @@ rem uncomment the next line to enable german output
rem set LC_MESSAGES=de
rem uncomment the next line to enable french output
rem set LC_MESSAGES=fr
rem If you see $python or $install_scripts on the next line, then you
rem are looking at a skeleton .bat file suited only for installation.
rem Look in c:\python21\scripts or wherever Python is installed for
rem the executable .bat file.
$python -O linkchecker --interactive %*

View file

@ -6,8 +6,10 @@ MSGFMT=$(PYTHON) $(I18NTOOLS)/msgfmt.py
#MSGFMT=msgfmt
MSGMERGE=msgmerge
SOURCES=\
../linkcheck/ChromeUrlData.py \
../linkcheck/Config.py \
../linkcheck/FileUrlData.py \
../linkcheck/FindUrlData.py \
../linkcheck/FtpUrlData.py \
../linkcheck/GopherUrlData.py \
../linkcheck/HostCheckingUrlData.py \

View file

@ -1,4 +1,4 @@
#! /usr/bin/env python2
#!/usr/bin/python
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
@ -11,6 +11,11 @@ GNU msgfmt program, however, it is a simpler implementation.
Usage: msgfmt.py [OPTIONS] filename.po
Options:
-o file
--output-file=file
Specify the output file to write to. If omitted, output will go to a
file named filename.mo (based off the input file name).
-h
--help
Print this message and exit.
@ -18,15 +23,15 @@ Options:
-V
--version
Display version information and exit.
"""
import sys
import os
import getopt
import struct
import array
__version__ = "1.0"
__version__ = "1.1"
MESSAGES = {}
@ -91,17 +96,18 @@ def generate():
def make(filename):
def make(filename, outfile):
ID = 1
STR = 2
# Compute .mo name from .po name
# Compute .mo name from .po name and arguments
if filename.endswith('.po'):
infile = filename
outfile = filename[:-2] + 'mo'
else:
infile = filename + '.po'
outfile = filename + '.mo'
if outfile is None:
outfile = os.path.splitext(infile)[0] + '.mo'
try:
lines = open(infile).readlines()
except IOError, msg:
@ -159,7 +165,6 @@ def make(filename):
# Compute output
output = generate()
# Save output
try:
open(outfile,"wb").write(output)
except IOError,msg:
@ -169,10 +174,12 @@ def make(filename):
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'hV', ['help','version'])
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
['help', 'version', 'output-file='])
except getopt.error, msg:
usage(1, msg)
outfile = None
# parse options
for opt, arg in opts:
if opt in ('-h', '--help'):
@ -180,6 +187,8 @@ def main():
elif opt in ('-V', '--version'):
print >> sys.stderr, "msgfmt.py", __version__
sys.exit(0)
elif opt in ('-o', '--output-file'):
outfile = arg
# do it
if not args:
print >> sys.stderr, 'No input file given'
@ -187,7 +196,7 @@ def main():
return
for filename in args:
make(filename)
make(filename, outfile)
if __name__ == '__main__':

View file

@ -1,18 +1,13 @@
#! /usr/bin/env python2
#!/usr/bin/python
# Originally written by Barry Warsaw <bwarsaw@python.org>
#
# minimally patched to make it even more xgettext compatible
# by Peter Funk <pf@artcom-gmbh.de>
#
# even more minimalistic patched to fix the default-domain= option
# by Bastian Kleineidam <calvin@users.sourceforge.net>
# for selftesting
try:
import fintl
_ = fintl.gettext
except ImportError:
def _(s): return s
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
"""pygettext -- Python equivalent of xgettext(1)
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
internationalization of C programs. Most of these tools are independent of
@ -65,7 +60,13 @@ Options:
-E
--escape
replace non-ASCII characters with octal escape sequences.
Replace non-ASCII characters with octal escape sequences.
-D
--docstrings
Extract module, class, method, and function docstrings. These do not
need to be wrapped in _() markers, and in fact cannot be for Python to
consider them docstrings.
-h
--help
@ -93,6 +94,15 @@ Options:
each msgid. The style of comments is controlled by the -S/--style
option. This is the default.
-o filename
--output=filename
Rename the default output file from messages.pot to filename. If
filename is `-' then the output is sent to standard out.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-S stylename
--style stylename
Specify which style to use for location comments. Two styles are
@ -103,15 +113,6 @@ Options:
The style name is case insensitive. GNU style is the default.
-o filename
--output=filename
Rename the default output file from messages.pot to filename. If
filename is `-' then the output is sent to standard out.
-p dir
--output-dir=dir
Output files will be placed in directory dir.
-v
--verbose
Print the names of the files being processed.
@ -132,7 +133,7 @@ Options:
If `inputfile' is -, standard input is read.
""")
"""
import os
import sys
@ -140,7 +141,14 @@ import time
import getopt
import tokenize
__version__ = '1.1'
# for selftesting
try:
import fintl
_ = fintl.gettext
except ImportError:
def _(s): return s
__version__ = '1.3'
default_keywords = ['_']
DEFAULTKEYWORDS = ', '.join(default_keywords)
@ -159,7 +167,8 @@ pot_header = _('''\
msgid ""
msgstr ""
"Project-Id-Version: PACKAGE VERSION\\n"
"PO-Revision-Date: %(time)s\\n"
"POT-Creation-Date: %(time)s\\n"
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
"Language-Team: LANGUAGE <LL@li.org>\\n"
"MIME-Version: 1.0\\n"
@ -171,9 +180,9 @@ msgstr ""
def usage(code, msg=''):
print __doc__ % globals()
print >> sys.stderr, _(__doc__) % globals()
if msg:
print msg
print >> sys.stderr, msg
sys.exit(code)
@ -239,15 +248,48 @@ class TokenEater:
self.__state = self.__waiting
self.__data = []
self.__lineno = -1
self.__freshmodule = 1
def __call__(self, ttype, tstring, stup, etup, line):
# dispatch
## import token
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
## 'tstring:', tstring
self.__state(ttype, tstring, stup[0])
def __waiting(self, ttype, tstring, lineno):
# Do docstring extractions, if enabled
if self.__options.docstrings:
# module docstring?
if self.__freshmodule:
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno)
self.__freshmodule = 0
elif ttype not in (tokenize.COMMENT, tokenize.NL):
self.__freshmodule = 0
return
# class docstring?
if ttype == tokenize.NAME and tstring in ('class', 'def'):
self.__state = self.__suiteseen
return
if ttype == tokenize.NAME and tstring in self.__options.keywords:
self.__state = self.__keywordseen
def __suiteseen(self, ttype, tstring, lineno):
# ignore anything until we see the colon
if ttype == tokenize.OP and tstring == ':':
self.__state = self.__suitedocstring
def __suitedocstring(self, ttype, tstring, lineno):
# ignore any intervening noise
if ttype == tokenize.STRING:
self.__addentry(safe_eval(tstring), lineno)
self.__state = self.__waiting
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
tokenize.COMMENT):
# there was no class docstring
self.__state = self.__waiting
def __keywordseen(self, ttype, tstring, lineno):
if ttype == tokenize.OP and tstring == '(':
self.__data = []
@ -263,58 +305,54 @@ class TokenEater:
# of messages seen. Reset state for the next batch. If there
# were no strings inside _(), then just ignore this entry.
if self.__data:
msg = EMPTYSTRING.join(self.__data)
if not msg in self.__options.toexclude:
entry = (self.__curfile, self.__lineno)
linenos = self.__messages.get(msg)
if linenos is None:
self.__messages[msg] = [entry]
else:
linenos.append(entry)
self.__addentry(EMPTYSTRING.join(self.__data))
self.__state = self.__waiting
elif ttype == tokenize.STRING:
self.__data.append(safe_eval(tstring))
# TBD: should we warn if we seen anything else?
def __addentry(self, msg, lineno=None):
if lineno is None:
lineno = self.__lineno
if not msg in self.__options.toexclude:
entry = (self.__curfile, lineno)
self.__messages.setdefault(msg, []).append(entry)
def set_filename(self, filename):
self.__curfile = filename
def write(self, fp):
options = self.__options
timestamp = time.ctime(time.time())
# common header
try:
sys.stdout = fp
# The time stamp in the header doesn't have the same format
# as that generated by xgettext...
print pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items():
if not options.writelocations:
pass
# location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
print _('# File: %(filename)s, line: %(lineno)d') % d
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width'
locline = '#:'
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
s = _(' %(filename)s:%(lineno)d') % d
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print locline
locline = "#:" + s
if len(locline) > 2:
print locline
# TBD: sorting, normalizing
print 'msgid', normalize(k)
print 'msgstr ""\n'
finally:
sys.stdout = sys.__stdout__
# The time stamp in the header doesn't have the same format as that
# generated by xgettext...
print >> fp, pot_header % {'time': timestamp, 'version': __version__}
for k, v in self.__messages.items():
if not options.writelocations:
pass
# location comments are different b/w Solaris and GNU:
elif options.locationstyle == options.SOLARIS:
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
print >>fp, _('# File: %(filename)s, line: %(lineno)d') % d
elif options.locationstyle == options.GNU:
# fit as many locations on one line, as long as the
# resulting line length doesn't exceeds 'options.width'
locline = '#:'
for filename, lineno in v:
d = {'filename': filename, 'lineno': lineno}
s = _(' %(filename)s:%(lineno)d') % d
if len(locline) + len(s) <= options.width:
locline = locline + s
else:
print >> fp, locline
locline = "#:" + s
if len(locline) > 2:
print >> fp, locline
# TBD: sorting, normalizing
print >> fp, 'msgid', normalize(k)
print >> fp, 'msgstr ""\n'
def main():
@ -322,11 +360,12 @@ def main():
try:
opts, args = getopt.getopt(
sys.argv[1:],
'ad:Ehk:Kno:p:S:Vvw:x:',
'ad:DEhk:Kno:p:S:Vvw:x:',
['extract-all', 'default-domain=', 'escape', 'help',
'keyword=', 'no-default-keywords',
'add-location', 'no-location', 'output=', 'output-dir=',
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
'docstrings',
])
except getopt.error, msg:
usage(1, msg)
@ -347,6 +386,7 @@ def main():
verbose = 0
width = 78
excludefilename = ''
docstrings = 0
options = Options()
locations = {'gnu' : options.GNU,
@ -363,6 +403,8 @@ def main():
options.outfile = arg + '.pot'
elif opt in ('-E', '--escape'):
options.escape = 1
elif opt in ('-D', '--docstrings'):
options.docstrings = 1
elif opt in ('-k', '--keyword'):
options.keywords.append(arg)
elif opt in ('-K', '--no-default-keywords'):
@ -426,7 +468,11 @@ def main():
closep = 1
try:
eater.set_filename(filename)
tokenize.tokenize(fp.readline, eater)
try:
tokenize.tokenize(fp.readline, eater)
except tokenize.TokenError, e:
sys.stderr.write('%s: %s, line %d, column %d\n' %
(e[0], filename, e[1][0], e[1][1]))
finally:
if closep:
fp.close()

View file

@ -94,12 +94,14 @@ class MyDistribution(Distribution):
data = []
data.append('config_dir = %s' % `os.path.join(cwd, "config")`)
data.append("install_data = %s" % `cwd`)
self.create_conf_file(".", data)
self.create_conf_file("", data)
Distribution.run_commands(self)
def create_conf_file(self, directory, data=[]):
data.insert(0, "# this file is automatically created by setup.py")
if not directory:
directory = os.getcwd()
filename = os.path.join(directory, self.config_file)
# add metadata
metanames = ("name", "version", "author", "author_email",
@ -127,7 +129,7 @@ myname = "Bastian Kleineidam"
myemail = "calvin@users.sourceforge.net"
setup (name = "linkchecker",
version = "1.3.10",
version = "1.3.11",
description = "check HTML documents for broken links",
author = myname,
author_email = myemail,

View file

@ -3,9 +3,11 @@ url file:///home/calvin/projects/linkchecker/test/html/http.html
valid
url http://www.garantiertnixgutt.bla
name bad url
warning Missing '/' at end of URL
error
url http://www.heise.de
name ok
warning Missing '/' at end of URL
valid
url http:/www.heise.de
name one slash
@ -38,10 +40,12 @@ valid
url HtTP://WWW.hEIsE.DE
cached
name should be cached
warning Missing '/' at end of URL
valid
url HTTP://WWW.HEISE.DE
cached
name should be cached
warning Missing '/' at end of URL
valid
url http://www.heise.de/?quoted=ü
name html entities