mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-17 19:11:06 +00:00
updated tests
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@324 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
63ec8723e2
commit
14c9cbc4c4
12 changed files with 205 additions and 566 deletions
48
INSTALL
48
INSTALL
|
|
@ -13,17 +13,49 @@ Python!
|
|||
|
||||
Setup
|
||||
-----
|
||||
Run "python setup.py build" to build.
|
||||
Run "python setup.py install" to install.
|
||||
Run "python setup.py --help" for help.
|
||||
|
||||
After installation, the system wide configuration file is in
|
||||
<prefix>/share/linkchecker/linkcheckerrc.
|
||||
The local configuration file is ~/.linkcheckerrc on Unix platforms.
|
||||
|
||||
To run the program type "linkchecker" followed by your URLs you want
|
||||
to check.
|
||||
Type "linkchecker -h" for help.
|
||||
o Unix platforms
|
||||
- open a commandline window and change to the linkchecker-x.x.x
|
||||
directory
|
||||
- run "python setup.py install" to install
|
||||
|
||||
For help on setup.py options, run "python setup.py --help".
|
||||
The local configuration file is $HOME/.linkcheckerrc
|
||||
|
||||
To run the program type "linkchecker" followed by your URLs you want
|
||||
to check.
|
||||
Type "linkchecker -h" for help.
|
||||
|
||||
|
||||
o Windows platforms
|
||||
- make sure that python.exe is in your PATH
|
||||
- open a commandline window (cmd.exe) and change to the
|
||||
linkchecker-x.x.x directory
|
||||
- run "python.exe setup.py install" to install
|
||||
|
||||
For help on setup.py options, run "python.exe setup.py --help".
|
||||
|
||||
To run the program, change to the scripts directory of your python
|
||||
installation and type "linkchecker.bat" followed by your URLs you
|
||||
want to check.
|
||||
Type "linkchecker.bat -h" for help.
|
||||
|
||||
|
||||
o MacOS 9.x platforms
|
||||
- open the Python IDE
|
||||
- open the setup.py file
|
||||
- run it (click on "Run all")
|
||||
- in the popup window, select the "install" command and click "Add"
|
||||
- click "Ok"; this will copy files into the Python folder
|
||||
|
||||
Read the MacOS Python documentation to find out about passing
|
||||
commandline options to Python scripts.
|
||||
|
||||
|
||||
o MacOS X platforms
|
||||
- not tested
|
||||
|
||||
|
||||
(Fast)CGI web interface
|
||||
|
|
|
|||
10
debian/changelog
vendored
10
debian/changelog
vendored
|
|
@ -1,3 +1,13 @@
|
|||
linkchecker (1.3.11) unstable; urgency=low
|
||||
|
||||
* setup.py: use os.getcwd(), not "." which breaks on MacOS 9.x
|
||||
* added platform-specific install instructions
|
||||
* use Pythons internal gettext module, get rid of fintl.py
|
||||
* use Pythons internal robot.txt parser, get rid of
|
||||
robotparser2.py
|
||||
|
||||
-- Bastian Kleineidam <calvin@debian.org> Wed, 28 Nov 2001 17:57:20 +0100
|
||||
|
||||
linkchecker (1.3.10) unstable; urgency=low
|
||||
|
||||
* use Pythons builtin HTTPS support
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@
|
|||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import httplib, urlparse, sys, time, re
|
||||
import Config, StringUtil, robotparser2
|
||||
import Config, StringUtil, robotparser
|
||||
from UrlData import UrlData
|
||||
from urllib import splittype, splithost, splituser, splitpasswd
|
||||
from linkcheck import _
|
||||
|
|
@ -251,7 +251,7 @@ class HttpUrlData(UrlData):
|
|||
def robotsTxtAllowsUrl(self, config):
|
||||
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
|
||||
if not config.robotsTxtCache_has_key(roboturl):
|
||||
rp = robotparser2.RobotFileParser(roboturl)
|
||||
rp = robotparser.RobotFileParser(roboturl)
|
||||
rp.read()
|
||||
config.robotsTxtCache_set(roboturl, rp)
|
||||
rp = config.robotsTxtCache_get(roboturl)
|
||||
|
|
|
|||
|
|
@ -19,28 +19,15 @@ class error(Exception):
|
|||
pass
|
||||
|
||||
# i18n suppport
|
||||
LANG="EN" # default language (used for HTML output)
|
||||
import _linkchecker_configdata
|
||||
try:
|
||||
import fintl,os,string
|
||||
gettext = fintl.gettext
|
||||
import gettext
|
||||
domain = 'linkcheck'
|
||||
localedir = os.path.join(_linkchecker_configdata.install_data, 'locale')
|
||||
fintl.bindtextdomain(domain, localedir)
|
||||
fintl.textdomain(domain)
|
||||
languages = []
|
||||
for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
|
||||
if os.environ.has_key(envvar):
|
||||
languages = string.split(os.environ[envvar], ':')
|
||||
break
|
||||
if languages:
|
||||
LANG=string.upper(languages[0])
|
||||
|
||||
except ImportError:
|
||||
def gettext(msg):
|
||||
return msg
|
||||
# set _ as an alias for gettext
|
||||
_ = gettext
|
||||
t = gettext.translation(domain, localedir)
|
||||
_ = t.gettext
|
||||
except IOError:
|
||||
_ = lambda s: s
|
||||
|
||||
#import timeoutsocket
|
||||
#timeoutsocket.setDefaultSocketTimeout(20)
|
||||
|
|
|
|||
|
|
@ -1,208 +0,0 @@
|
|||
## vim:ts=4:et:nowrap
|
||||
"""i18n (multiple language) support. Reads .mo files from GNU gettext msgfmt
|
||||
|
||||
If you want to prepare your Python programs for i18n you could simply
|
||||
add the following lines to the top of a BASIC_MAIN module of your py-program:
|
||||
try:
|
||||
import fintl
|
||||
gettext = fintl.gettext
|
||||
fintl.bindtextdomain(YOUR_PROGRAM, YOUR_LOCALEDIR)
|
||||
fintl.textdomain(YOUR_PROGRAM)
|
||||
except ImportError:
|
||||
def gettext(msg):
|
||||
return msg
|
||||
_ = gettext
|
||||
and/or also add the following to the top of any module containing messages:
|
||||
import BASIC_MAIN
|
||||
_ = BASIC_MAIN.gettext
|
||||
|
||||
Now you could use _("....") everywhere instead of "...." for message texts.
|
||||
|
||||
Once you have written your internationalized program, you can use
|
||||
the suite of utility programs contained in the GNU gettext package to aid
|
||||
the translation into other languages.
|
||||
|
||||
You ARE NOT REQUIRED to release the sourcecode of your program, since
|
||||
linking of your program against GPL code is avoided by this module.
|
||||
Although it is possible to use the GNU gettext library by using the
|
||||
*intl.so* module written by Martin von Löwis if this is available. But it is
|
||||
not required to use it in the first place.
|
||||
"""
|
||||
# Copyright 1999 by <mailto: pf@artcom-gmbh.de> (Peter Funk)
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose and without fee is hereby granted,
|
||||
# provided that the above copyright notice appear in all copies.
|
||||
|
||||
# ArtCom GmbH AND Peter Funk DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||
# THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
# AND FITNESS, IN NO EVENT SHALL ArtCom GmBH or Peter Funk BE LIABLE
|
||||
# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
|
||||
# OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
|
||||
_default_localedir = '/usr/share/locale'
|
||||
_default_domain = 'python'
|
||||
|
||||
# check out, if Martin v. Löwis 'intl' module interface to the GNU gettext
|
||||
# library is available and use it only, if it is available:
|
||||
try:
|
||||
from intl import *
|
||||
except ImportError:
|
||||
# now do what the gettext library provides in pure Python:
|
||||
error = 'fintl.error'
|
||||
# some globals preserving state:
|
||||
_languages = []
|
||||
_default_mo = None # This is default message outfile used by 'gettext'
|
||||
_loaded_mos = {} # This is a dictionary of loaded message output files
|
||||
|
||||
# some small little helper routines:
|
||||
def _check_env():
|
||||
"""examine language enviroment variables and return list of languages"""
|
||||
# TODO: This should somehow try to find out locale information on
|
||||
# Non-unix platforms like WinXX and MacOS. Suggestions welcome!
|
||||
languages = []
|
||||
import os, string
|
||||
for envvar in ('LANGUAGE', 'LC_ALL', 'LC_MESSAGES', 'LANG'):
|
||||
if os.environ.has_key(envvar):
|
||||
languages = string.split(os.environ[envvar], ':')
|
||||
break
|
||||
# use locale 'C' as default fallback:
|
||||
if 'C' not in _languages:
|
||||
languages.append('C')
|
||||
return languages
|
||||
|
||||
# Utility function used to decode binary .mo file header and seek tables:
|
||||
def _decode_Word(bin):
|
||||
# This assumes little endian (intel, vax) byte order.
|
||||
return ord(bin[0]) + (ord(bin[1]) << 8) + \
|
||||
(ord(bin[2]) << 16) + (ord(bin[3]) << 24)
|
||||
|
||||
# Now the methods designed to be used from outside:
|
||||
|
||||
def gettext(message):
|
||||
"""return localized version of a 'message' string"""
|
||||
if _default_mo is None:
|
||||
textdomain()
|
||||
return _default_mo.gettext(message)
|
||||
|
||||
_ = gettext
|
||||
|
||||
def dgettext(domain, message):
|
||||
"""like gettext but looks up 'message' in a special 'domain'"""
|
||||
# This may useful for larger software systems
|
||||
if not _loaded_mos.has_key(domain):
|
||||
raise error, "No '" + domain + "' message domain"
|
||||
return _loaded_mos[domain].gettext(message)
|
||||
|
||||
class _MoDict:
|
||||
"""read a .mo file into a python dictionary"""
|
||||
__MO_MAGIC = 0x950412de # Magic number of .mo files
|
||||
def __init__(self, domain=_default_domain, localedir=_default_localedir):
|
||||
global _languages
|
||||
self.catalog = {}
|
||||
self.domain = domain
|
||||
self.localedir = localedir
|
||||
# delayed access to environment variables:
|
||||
if not _languages:
|
||||
_languages = _check_env()
|
||||
for self.lang in _languages:
|
||||
if self.lang == 'C':
|
||||
return
|
||||
mo_filename = "%s//%s/LC_MESSAGES/%s.mo" % (
|
||||
localedir, self.lang, domain)
|
||||
try:
|
||||
buffer = open(mo_filename, "rb").read()
|
||||
break
|
||||
except IOError:
|
||||
pass
|
||||
else:
|
||||
return # assume C locale
|
||||
# Decode the header of the .mo file (5 little endian 32 bit words):
|
||||
if _decode_Word(buffer[:4]) != self.__MO_MAGIC :
|
||||
raise error, '%s seems not be a valid .mo file' % mo_filename
|
||||
self.mo_version = _decode_Word(buffer[4:8])
|
||||
num_messages = _decode_Word(buffer[8:12])
|
||||
master_index = _decode_Word(buffer[12:16])
|
||||
transl_index = _decode_Word(buffer[16:20])
|
||||
buf_len = len(buffer)
|
||||
# now put all messages from the .mo file buffer in the catalog dict:
|
||||
for i in xrange(0, num_messages):
|
||||
start_master= _decode_Word(buffer[master_index+4:master_index+8])
|
||||
end_master = start_master + \
|
||||
_decode_Word(buffer[master_index:master_index+4])
|
||||
start_transl= _decode_Word(buffer[transl_index+4:transl_index+8])
|
||||
end_transl = start_transl + \
|
||||
_decode_Word(buffer[transl_index:transl_index+4])
|
||||
if end_master <= buf_len and end_transl <= buf_len:
|
||||
self.catalog[buffer[start_master:end_master]]=\
|
||||
buffer[start_transl:end_transl]
|
||||
else:
|
||||
raise error, ".mo file '%s' is corrupt" % mo_filename
|
||||
# advance to the next entry in seek tables:
|
||||
master_index += 8
|
||||
transl_index += 8
|
||||
|
||||
def gettext(self, message):
|
||||
"""return the translation of a given message"""
|
||||
try:
|
||||
return self.catalog[message]
|
||||
except KeyError:
|
||||
return message
|
||||
# _MoDict instances may be also accessed using mo[msg] or mo(msg):
|
||||
__getitem = gettext
|
||||
__call__ = gettext
|
||||
|
||||
def textdomain(domain=_default_domain):
|
||||
"""Sets the 'domain' to be used by this program. Defaults to 'python'"""
|
||||
global _default_mo
|
||||
if not _loaded_mos.has_key(domain):
|
||||
_loaded_mos[domain] = _MoDict(domain)
|
||||
_default_mo = _loaded_mos[domain]
|
||||
|
||||
def bindtextdomain(domain, localedir=_default_localedir):
|
||||
global _default_mo
|
||||
if not _loaded_mos.has_key(domain):
|
||||
_loaded_mos[domain] = _MoDict(domain, localedir)
|
||||
if _default_mo is not None:
|
||||
_default_mo = _loaded_mos[domain]
|
||||
|
||||
def translator(domain=_default_domain, localedir=_default_localedir):
|
||||
"""returns a gettext compatible function object
|
||||
|
||||
which is bound to the domain given as parameter"""
|
||||
pass # TODO implement this
|
||||
|
||||
def _testdriver(argv):
|
||||
message = ""
|
||||
domain = _default_domain
|
||||
localedir = _default_localedir
|
||||
if len(argv) > 1:
|
||||
message = argv[1]
|
||||
if len(argv) > 2:
|
||||
domain = argv[2]
|
||||
if len(argv) > 3:
|
||||
localedir = argv[3]
|
||||
# now perform some testing of this module:
|
||||
bindtextdomain(domain, localedir)
|
||||
textdomain(domain)
|
||||
info = gettext('') # this is where special info is often stored
|
||||
if info:
|
||||
print ".mo file for domain %s in %s contains:" % (domain, localedir)
|
||||
print info
|
||||
else:
|
||||
print ".mo file contains no info"
|
||||
if message:
|
||||
print "Translation of '"+ message+ "' is '"+ _(message)+ "'"
|
||||
else:
|
||||
for msg in ("Cancel", "No", "OK", "Quit", "Yes"):
|
||||
print "Translation of '"+ msg + "' is '"+ _(msg)+ "'"
|
||||
|
||||
if __name__ == '__main__':
|
||||
import sys
|
||||
if len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "-?"):
|
||||
print "Usage :", sys.argv[0], "[ MESSAGE [ DOMAIN [ LOCALEDIR ]]]"
|
||||
_testdriver(sys.argv)
|
||||
|
|
@ -1,250 +0,0 @@
|
|||
""" implements the robots.txt inclusion protocol
|
||||
|
||||
Copyright (C) 2000 Bastian Kleineidam
|
||||
|
||||
You can choose between two licenses when using this package:
|
||||
1) GNU GPLv2
|
||||
2) PYTHON 2.0 OPEN SOURCE LICENSE
|
||||
|
||||
The robots.txt Exclusion Protocol is implemented as specified in
|
||||
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
|
||||
"""
|
||||
import re,string,urlparse,urllib
|
||||
|
||||
__all__ = ['RobotFileParser']
|
||||
|
||||
debug = 0
|
||||
|
||||
def _debug(msg):
|
||||
if debug: print msg
|
||||
|
||||
|
||||
class RobotFileParser:
|
||||
def __init__(self, url=''):
|
||||
self.entries = []
|
||||
self.disallow_all = 0
|
||||
self.allow_all = 0
|
||||
self.set_url(url)
|
||||
self.last_checked = 0
|
||||
|
||||
def mtime(self):
|
||||
return self.last_checked
|
||||
|
||||
def modified(self):
|
||||
import time
|
||||
self.last_checked = time.time()
|
||||
|
||||
def set_url(self, url):
|
||||
self.url = url
|
||||
self.host, self.path = urlparse.urlparse(url)[1:3]
|
||||
|
||||
def read(self):
|
||||
import httplib
|
||||
tries = 0
|
||||
# limit number of redirections to 5
|
||||
while tries<5:
|
||||
connection = httplib.HTTP(self.host)
|
||||
connection.putrequest("GET", self.path)
|
||||
connection.putheader("Host", self.host)
|
||||
connection.endheaders()
|
||||
status, text, mime = connection.getreply()
|
||||
if status in [301,302] and mime:
|
||||
tries = tries + 1
|
||||
newurl = mime.get("Location", mime.get("Uri", ""))
|
||||
newurl = urlparse.urljoin(self.url, newurl)
|
||||
self.set_url(newurl)
|
||||
else:
|
||||
break
|
||||
if status==401 or status==403:
|
||||
self.disallow_all = 1
|
||||
elif status>=400:
|
||||
self.allow_all = 1
|
||||
else:
|
||||
# status < 400
|
||||
self.parse(connection.getfile().readlines())
|
||||
|
||||
def parse(self, lines):
|
||||
"""parse the input lines from a robot.txt file.
|
||||
We allow that a user-agent: line is not preceded by
|
||||
one or more blank lines."""
|
||||
state = 0
|
||||
linenumber = 0
|
||||
entry = Entry()
|
||||
|
||||
for line in lines:
|
||||
line = string.strip(line)
|
||||
linenumber = linenumber + 1
|
||||
if not line:
|
||||
if state==1:
|
||||
_debug("line %d: warning: you should insert"
|
||||
" allow: or disallow: directives below any"
|
||||
" user-agent: line" % linenumber)
|
||||
entry = Entry()
|
||||
state = 0
|
||||
elif state==2:
|
||||
self.entries.append(entry)
|
||||
entry = Entry()
|
||||
state = 0
|
||||
# remove optional comment and strip line
|
||||
i = string.find(line, '#')
|
||||
if i>=0:
|
||||
line = line[:i]
|
||||
line = string.strip(line)
|
||||
if not line:
|
||||
continue
|
||||
line = string.split(line, ':', 1)
|
||||
if len(line) == 2:
|
||||
line[0] = string.lower(string.strip(line[0]))
|
||||
line[1] = string.strip(line[1])
|
||||
if line[0] == "user-agent":
|
||||
if state==2:
|
||||
_debug("line %d: warning: you should insert a blank"
|
||||
" line before any user-agent"
|
||||
" directive" % linenumber)
|
||||
self.entries.append(entry)
|
||||
entry = Entry()
|
||||
entry.useragents.append(line[1])
|
||||
state = 1
|
||||
elif line[0] == "disallow":
|
||||
if state==0:
|
||||
_debug("line %d: error: you must insert a user-agent:"
|
||||
" directive before this line" % linenumber)
|
||||
else:
|
||||
entry.rulelines.append(RuleLine(line[1], 0))
|
||||
state = 2
|
||||
elif line[0] == "allow":
|
||||
if state==0:
|
||||
_debug("line %d: error: you must insert a user-agent:"
|
||||
" directive before this line" % linenumber)
|
||||
else:
|
||||
entry.rulelines.append(RuleLine(line[1], 1))
|
||||
else:
|
||||
_debug("line %d: warning: unknown key %s" % (linenumber,
|
||||
line[0]))
|
||||
else:
|
||||
_debug("line %d: error: malformed line %s"%(linenumber, line))
|
||||
if state==2:
|
||||
self.entries.append(entry)
|
||||
_debug("Parsed rules:\n%s" % str(self))
|
||||
|
||||
|
||||
def can_fetch(self, useragent, url):
|
||||
"""using the parsed robots.txt decide if useragent can fetch url"""
|
||||
_debug("Checking robot.txt allowance for\n%s\n%s" % (useragent, url))
|
||||
if self.disallow_all:
|
||||
return 0
|
||||
if self.allow_all:
|
||||
return 1
|
||||
# search for given user agent matches
|
||||
# the first match counts
|
||||
url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
|
||||
for entry in self.entries:
|
||||
if entry.applies_to(useragent):
|
||||
return entry.allowance(url)
|
||||
# agent not found ==> access granted
|
||||
return 1
|
||||
|
||||
|
||||
def __str__(self):
|
||||
ret = ""
|
||||
for entry in self.entries:
|
||||
ret = ret + str(entry) + "\n"
|
||||
return ret
|
||||
|
||||
|
||||
class RuleLine:
|
||||
"""A rule line is a single "Allow:" (allowance==1) or "Disallow:"
|
||||
(allowance==0) followed by a path."""
|
||||
def __init__(self, path, allowance):
|
||||
self.path = urllib.quote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
def applies_to(self, filename):
|
||||
return self.path=="*" or re.match(self.path, filename)
|
||||
|
||||
def __str__(self):
|
||||
return (self.allowance and "Allow" or "Disallow")+": "+self.path
|
||||
|
||||
|
||||
class Entry:
|
||||
"""An entry has one or more user-agents and zero or more rulelines"""
|
||||
def __init__(self):
|
||||
self.useragents = []
|
||||
self.rulelines = []
|
||||
|
||||
def __str__(self):
|
||||
ret = ""
|
||||
for agent in self.useragents:
|
||||
ret = ret + "User-agent: "+agent+"\n"
|
||||
for line in self.rulelines:
|
||||
ret = ret + str(line) + "\n"
|
||||
return ret
|
||||
|
||||
def applies_to(self, useragent):
|
||||
"""check if this entry applies to the specified agent"""
|
||||
# split the name token and make it lower case
|
||||
useragent = string.lower(string.split(useragent,"/")[0])
|
||||
for agent in self.useragents:
|
||||
if agent=='*':
|
||||
# we have the catch-all agent
|
||||
return 1
|
||||
agent = string.lower(agent)
|
||||
# don't forget to re.escape
|
||||
if re.search(re.escape(useragent), agent):
|
||||
return 1
|
||||
return 0
|
||||
|
||||
def allowance(self, filename):
|
||||
"""Preconditions:
|
||||
- our agent applies to this entry
|
||||
- filename is URL decoded"""
|
||||
for line in self.rulelines:
|
||||
_debug((filename, str(line), line.allowance))
|
||||
if line.applies_to(filename):
|
||||
return line.allowance
|
||||
return 1
|
||||
|
||||
def _check(a,b):
|
||||
if a!=b:
|
||||
print "failed\n"
|
||||
else:
|
||||
print "ok\n"
|
||||
|
||||
def _test():
|
||||
global debug
|
||||
import sys
|
||||
rp = RobotFileParser()
|
||||
debug = 1
|
||||
if len(sys.argv) <= 1:
|
||||
rp.set_url('http://www.musi-cal.com/robots.txt')
|
||||
rp.read()
|
||||
else:
|
||||
rp.parse(open(sys.argv[1]).readlines())
|
||||
# test for re.escape
|
||||
_check(rp.can_fetch('*', 'http://www.musi-cal.com/'), 1)
|
||||
# empty url path
|
||||
_check(rp.can_fetch('*', 'http://www.musi-cal.com'), 1)
|
||||
# this should match the first rule, which is a disallow
|
||||
_check(rp.can_fetch('', 'http://www.musi-cal.com/'), 0)
|
||||
# various cherry pickers
|
||||
_check(rp.can_fetch('CherryPickerSE',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search'
|
||||
'?city=San+Francisco'), 0)
|
||||
_check(rp.can_fetch('CherryPickerSE/1.0',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search'
|
||||
'?city=San+Francisco'), 0)
|
||||
_check(rp.can_fetch('CherryPickerSE/1.5',
|
||||
'http://www.musi-cal.com/cgi-bin/event-search'
|
||||
'?city=San+Francisco'), 0)
|
||||
# case sensitivity
|
||||
_check(rp.can_fetch('ExtractorPro', 'http://www.musi-cal.com/blubba'), 0)
|
||||
_check(rp.can_fetch('extractorpro', 'http://www.musi-cal.com/blubba'), 0)
|
||||
# substring test
|
||||
_check(rp.can_fetch('toolpak/1.1', 'http://www.musi-cal.com/blubba'), 0)
|
||||
# tests for catch-all * agent
|
||||
_check(rp.can_fetch('spam', 'http://www.musi-cal.com/musician/me'), 0)
|
||||
_check(rp.can_fetch('spam', 'http://www.musi-cal.com/Musician/me'), 1)
|
||||
_check(rp.can_fetch('spam', 'http://www.musi-cal.com/'), 1)
|
||||
|
||||
if __name__ == '__main__':
|
||||
_test()
|
||||
|
|
@ -19,4 +19,9 @@ rem uncomment the next line to enable german output
|
|||
rem set LC_MESSAGES=de
|
||||
rem uncomment the next line to enable french output
|
||||
rem set LC_MESSAGES=fr
|
||||
|
||||
rem If you see $python or $install_scripts on the next line, then you
|
||||
rem are looking at a skeleton .bat file suited only for installation.
|
||||
rem Look in c:\python21\scripts or wherever Python is installed for
|
||||
rem the executable .bat file.
|
||||
$python -O linkchecker --interactive %*
|
||||
|
|
|
|||
|
|
@ -6,8 +6,10 @@ MSGFMT=$(PYTHON) $(I18NTOOLS)/msgfmt.py
|
|||
#MSGFMT=msgfmt
|
||||
MSGMERGE=msgmerge
|
||||
SOURCES=\
|
||||
../linkcheck/ChromeUrlData.py \
|
||||
../linkcheck/Config.py \
|
||||
../linkcheck/FileUrlData.py \
|
||||
../linkcheck/FindUrlData.py \
|
||||
../linkcheck/FtpUrlData.py \
|
||||
../linkcheck/GopherUrlData.py \
|
||||
../linkcheck/HostCheckingUrlData.py \
|
||||
|
|
|
|||
29
po/msgfmt.py
29
po/msgfmt.py
|
|
@ -1,4 +1,4 @@
|
|||
#! /usr/bin/env python2
|
||||
#!/usr/bin/python
|
||||
|
||||
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
|
||||
|
||||
|
|
@ -11,6 +11,11 @@ GNU msgfmt program, however, it is a simpler implementation.
|
|||
Usage: msgfmt.py [OPTIONS] filename.po
|
||||
|
||||
Options:
|
||||
-o file
|
||||
--output-file=file
|
||||
Specify the output file to write to. If omitted, output will go to a
|
||||
file named filename.mo (based off the input file name).
|
||||
|
||||
-h
|
||||
--help
|
||||
Print this message and exit.
|
||||
|
|
@ -18,15 +23,15 @@ Options:
|
|||
-V
|
||||
--version
|
||||
Display version information and exit.
|
||||
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import getopt
|
||||
import struct
|
||||
import array
|
||||
|
||||
__version__ = "1.0"
|
||||
__version__ = "1.1"
|
||||
|
||||
MESSAGES = {}
|
||||
|
||||
|
|
@ -91,17 +96,18 @@ def generate():
|
|||
|
||||
|
||||
|
||||
def make(filename):
|
||||
def make(filename, outfile):
|
||||
ID = 1
|
||||
STR = 2
|
||||
|
||||
# Compute .mo name from .po name
|
||||
# Compute .mo name from .po name and arguments
|
||||
if filename.endswith('.po'):
|
||||
infile = filename
|
||||
outfile = filename[:-2] + 'mo'
|
||||
else:
|
||||
infile = filename + '.po'
|
||||
outfile = filename + '.mo'
|
||||
if outfile is None:
|
||||
outfile = os.path.splitext(infile)[0] + '.mo'
|
||||
|
||||
try:
|
||||
lines = open(infile).readlines()
|
||||
except IOError, msg:
|
||||
|
|
@ -159,7 +165,6 @@ def make(filename):
|
|||
# Compute output
|
||||
output = generate()
|
||||
|
||||
# Save output
|
||||
try:
|
||||
open(outfile,"wb").write(output)
|
||||
except IOError,msg:
|
||||
|
|
@ -169,10 +174,12 @@ def make(filename):
|
|||
|
||||
def main():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hV', ['help','version'])
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
|
||||
['help', 'version', 'output-file='])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
||||
outfile = None
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
|
|
@ -180,6 +187,8 @@ def main():
|
|||
elif opt in ('-V', '--version'):
|
||||
print >> sys.stderr, "msgfmt.py", __version__
|
||||
sys.exit(0)
|
||||
elif opt in ('-o', '--output-file'):
|
||||
outfile = arg
|
||||
# do it
|
||||
if not args:
|
||||
print >> sys.stderr, 'No input file given'
|
||||
|
|
@ -187,7 +196,7 @@ def main():
|
|||
return
|
||||
|
||||
for filename in args:
|
||||
make(filename)
|
||||
make(filename, outfile)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
182
po/pygettext.py
182
po/pygettext.py
|
|
@ -1,18 +1,13 @@
|
|||
#! /usr/bin/env python2
|
||||
#!/usr/bin/python
|
||||
# Originally written by Barry Warsaw <bwarsaw@python.org>
|
||||
#
|
||||
# minimally patched to make it even more xgettext compatible
|
||||
# by Peter Funk <pf@artcom-gmbh.de>
|
||||
#
|
||||
# even more minimalistic patched to fix the default-domain= option
|
||||
# by Bastian Kleineidam <calvin@users.sourceforge.net>
|
||||
|
||||
# for selftesting
|
||||
try:
|
||||
import fintl
|
||||
_ = fintl.gettext
|
||||
except ImportError:
|
||||
def _(s): return s
|
||||
|
||||
|
||||
__doc__ = _("""pygettext -- Python equivalent of xgettext(1)
|
||||
"""pygettext -- Python equivalent of xgettext(1)
|
||||
|
||||
Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the
|
||||
internationalization of C programs. Most of these tools are independent of
|
||||
|
|
@ -65,7 +60,13 @@ Options:
|
|||
|
||||
-E
|
||||
--escape
|
||||
replace non-ASCII characters with octal escape sequences.
|
||||
Replace non-ASCII characters with octal escape sequences.
|
||||
|
||||
-D
|
||||
--docstrings
|
||||
Extract module, class, method, and function docstrings. These do not
|
||||
need to be wrapped in _() markers, and in fact cannot be for Python to
|
||||
consider them docstrings.
|
||||
|
||||
-h
|
||||
--help
|
||||
|
|
@ -93,6 +94,15 @@ Options:
|
|||
each msgid. The style of comments is controlled by the -S/--style
|
||||
option. This is the default.
|
||||
|
||||
-o filename
|
||||
--output=filename
|
||||
Rename the default output file from messages.pot to filename. If
|
||||
filename is `-' then the output is sent to standard out.
|
||||
|
||||
-p dir
|
||||
--output-dir=dir
|
||||
Output files will be placed in directory dir.
|
||||
|
||||
-S stylename
|
||||
--style stylename
|
||||
Specify which style to use for location comments. Two styles are
|
||||
|
|
@ -103,15 +113,6 @@ Options:
|
|||
|
||||
The style name is case insensitive. GNU style is the default.
|
||||
|
||||
-o filename
|
||||
--output=filename
|
||||
Rename the default output file from messages.pot to filename. If
|
||||
filename is `-' then the output is sent to standard out.
|
||||
|
||||
-p dir
|
||||
--output-dir=dir
|
||||
Output files will be placed in directory dir.
|
||||
|
||||
-v
|
||||
--verbose
|
||||
Print the names of the files being processed.
|
||||
|
|
@ -132,7 +133,7 @@ Options:
|
|||
|
||||
If `inputfile' is -, standard input is read.
|
||||
|
||||
""")
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
|
@ -140,7 +141,14 @@ import time
|
|||
import getopt
|
||||
import tokenize
|
||||
|
||||
__version__ = '1.1'
|
||||
# for selftesting
|
||||
try:
|
||||
import fintl
|
||||
_ = fintl.gettext
|
||||
except ImportError:
|
||||
def _(s): return s
|
||||
|
||||
__version__ = '1.3'
|
||||
|
||||
default_keywords = ['_']
|
||||
DEFAULTKEYWORDS = ', '.join(default_keywords)
|
||||
|
|
@ -159,7 +167,8 @@ pot_header = _('''\
|
|||
msgid ""
|
||||
msgstr ""
|
||||
"Project-Id-Version: PACKAGE VERSION\\n"
|
||||
"PO-Revision-Date: %(time)s\\n"
|
||||
"POT-Creation-Date: %(time)s\\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\\n"
|
||||
"MIME-Version: 1.0\\n"
|
||||
|
|
@ -171,9 +180,9 @@ msgstr ""
|
|||
|
||||
|
||||
def usage(code, msg=''):
|
||||
print __doc__ % globals()
|
||||
print >> sys.stderr, _(__doc__) % globals()
|
||||
if msg:
|
||||
print msg
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(code)
|
||||
|
||||
|
||||
|
|
@ -239,15 +248,48 @@ class TokenEater:
|
|||
self.__state = self.__waiting
|
||||
self.__data = []
|
||||
self.__lineno = -1
|
||||
self.__freshmodule = 1
|
||||
|
||||
def __call__(self, ttype, tstring, stup, etup, line):
|
||||
# dispatch
|
||||
## import token
|
||||
## print >> sys.stderr, 'ttype:', token.tok_name[ttype], \
|
||||
## 'tstring:', tstring
|
||||
self.__state(ttype, tstring, stup[0])
|
||||
|
||||
def __waiting(self, ttype, tstring, lineno):
|
||||
# Do docstring extractions, if enabled
|
||||
if self.__options.docstrings:
|
||||
# module docstring?
|
||||
if self.__freshmodule:
|
||||
if ttype == tokenize.STRING:
|
||||
self.__addentry(safe_eval(tstring), lineno)
|
||||
self.__freshmodule = 0
|
||||
elif ttype not in (tokenize.COMMENT, tokenize.NL):
|
||||
self.__freshmodule = 0
|
||||
return
|
||||
# class docstring?
|
||||
if ttype == tokenize.NAME and tstring in ('class', 'def'):
|
||||
self.__state = self.__suiteseen
|
||||
return
|
||||
if ttype == tokenize.NAME and tstring in self.__options.keywords:
|
||||
self.__state = self.__keywordseen
|
||||
|
||||
def __suiteseen(self, ttype, tstring, lineno):
|
||||
# ignore anything until we see the colon
|
||||
if ttype == tokenize.OP and tstring == ':':
|
||||
self.__state = self.__suitedocstring
|
||||
|
||||
def __suitedocstring(self, ttype, tstring, lineno):
|
||||
# ignore any intervening noise
|
||||
if ttype == tokenize.STRING:
|
||||
self.__addentry(safe_eval(tstring), lineno)
|
||||
self.__state = self.__waiting
|
||||
elif ttype not in (tokenize.NEWLINE, tokenize.INDENT,
|
||||
tokenize.COMMENT):
|
||||
# there was no class docstring
|
||||
self.__state = self.__waiting
|
||||
|
||||
def __keywordseen(self, ttype, tstring, lineno):
|
||||
if ttype == tokenize.OP and tstring == '(':
|
||||
self.__data = []
|
||||
|
|
@ -263,58 +305,54 @@ class TokenEater:
|
|||
# of messages seen. Reset state for the next batch. If there
|
||||
# were no strings inside _(), then just ignore this entry.
|
||||
if self.__data:
|
||||
msg = EMPTYSTRING.join(self.__data)
|
||||
if not msg in self.__options.toexclude:
|
||||
entry = (self.__curfile, self.__lineno)
|
||||
linenos = self.__messages.get(msg)
|
||||
if linenos is None:
|
||||
self.__messages[msg] = [entry]
|
||||
else:
|
||||
linenos.append(entry)
|
||||
self.__addentry(EMPTYSTRING.join(self.__data))
|
||||
self.__state = self.__waiting
|
||||
elif ttype == tokenize.STRING:
|
||||
self.__data.append(safe_eval(tstring))
|
||||
# TBD: should we warn if we seen anything else?
|
||||
|
||||
def __addentry(self, msg, lineno=None):
|
||||
if lineno is None:
|
||||
lineno = self.__lineno
|
||||
if not msg in self.__options.toexclude:
|
||||
entry = (self.__curfile, lineno)
|
||||
self.__messages.setdefault(msg, []).append(entry)
|
||||
|
||||
def set_filename(self, filename):
|
||||
self.__curfile = filename
|
||||
|
||||
def write(self, fp):
|
||||
options = self.__options
|
||||
timestamp = time.ctime(time.time())
|
||||
# common header
|
||||
try:
|
||||
sys.stdout = fp
|
||||
# The time stamp in the header doesn't have the same format
|
||||
# as that generated by xgettext...
|
||||
print pot_header % {'time': timestamp, 'version': __version__}
|
||||
for k, v in self.__messages.items():
|
||||
if not options.writelocations:
|
||||
pass
|
||||
# location comments are different b/w Solaris and GNU:
|
||||
elif options.locationstyle == options.SOLARIS:
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
print _('# File: %(filename)s, line: %(lineno)d') % d
|
||||
elif options.locationstyle == options.GNU:
|
||||
# fit as many locations on one line, as long as the
|
||||
# resulting line length doesn't exceeds 'options.width'
|
||||
locline = '#:'
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
s = _(' %(filename)s:%(lineno)d') % d
|
||||
if len(locline) + len(s) <= options.width:
|
||||
locline = locline + s
|
||||
else:
|
||||
print locline
|
||||
locline = "#:" + s
|
||||
if len(locline) > 2:
|
||||
print locline
|
||||
# TBD: sorting, normalizing
|
||||
print 'msgid', normalize(k)
|
||||
print 'msgstr ""\n'
|
||||
finally:
|
||||
sys.stdout = sys.__stdout__
|
||||
# The time stamp in the header doesn't have the same format as that
|
||||
# generated by xgettext...
|
||||
print >> fp, pot_header % {'time': timestamp, 'version': __version__}
|
||||
for k, v in self.__messages.items():
|
||||
if not options.writelocations:
|
||||
pass
|
||||
# location comments are different b/w Solaris and GNU:
|
||||
elif options.locationstyle == options.SOLARIS:
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
print >>fp, _('# File: %(filename)s, line: %(lineno)d') % d
|
||||
elif options.locationstyle == options.GNU:
|
||||
# fit as many locations on one line, as long as the
|
||||
# resulting line length doesn't exceeds 'options.width'
|
||||
locline = '#:'
|
||||
for filename, lineno in v:
|
||||
d = {'filename': filename, 'lineno': lineno}
|
||||
s = _(' %(filename)s:%(lineno)d') % d
|
||||
if len(locline) + len(s) <= options.width:
|
||||
locline = locline + s
|
||||
else:
|
||||
print >> fp, locline
|
||||
locline = "#:" + s
|
||||
if len(locline) > 2:
|
||||
print >> fp, locline
|
||||
# TBD: sorting, normalizing
|
||||
print >> fp, 'msgid', normalize(k)
|
||||
print >> fp, 'msgstr ""\n'
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -322,11 +360,12 @@ def main():
|
|||
try:
|
||||
opts, args = getopt.getopt(
|
||||
sys.argv[1:],
|
||||
'ad:Ehk:Kno:p:S:Vvw:x:',
|
||||
'ad:DEhk:Kno:p:S:Vvw:x:',
|
||||
['extract-all', 'default-domain=', 'escape', 'help',
|
||||
'keyword=', 'no-default-keywords',
|
||||
'add-location', 'no-location', 'output=', 'output-dir=',
|
||||
'style=', 'verbose', 'version', 'width=', 'exclude-file=',
|
||||
'docstrings',
|
||||
])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
|
@ -347,6 +386,7 @@ def main():
|
|||
verbose = 0
|
||||
width = 78
|
||||
excludefilename = ''
|
||||
docstrings = 0
|
||||
|
||||
options = Options()
|
||||
locations = {'gnu' : options.GNU,
|
||||
|
|
@ -363,6 +403,8 @@ def main():
|
|||
options.outfile = arg + '.pot'
|
||||
elif opt in ('-E', '--escape'):
|
||||
options.escape = 1
|
||||
elif opt in ('-D', '--docstrings'):
|
||||
options.docstrings = 1
|
||||
elif opt in ('-k', '--keyword'):
|
||||
options.keywords.append(arg)
|
||||
elif opt in ('-K', '--no-default-keywords'):
|
||||
|
|
@ -426,7 +468,11 @@ def main():
|
|||
closep = 1
|
||||
try:
|
||||
eater.set_filename(filename)
|
||||
tokenize.tokenize(fp.readline, eater)
|
||||
try:
|
||||
tokenize.tokenize(fp.readline, eater)
|
||||
except tokenize.TokenError, e:
|
||||
sys.stderr.write('%s: %s, line %d, column %d\n' %
|
||||
(e[0], filename, e[1][0], e[1][1]))
|
||||
finally:
|
||||
if closep:
|
||||
fp.close()
|
||||
|
|
|
|||
6
setup.py
6
setup.py
|
|
@ -94,12 +94,14 @@ class MyDistribution(Distribution):
|
|||
data = []
|
||||
data.append('config_dir = %s' % `os.path.join(cwd, "config")`)
|
||||
data.append("install_data = %s" % `cwd`)
|
||||
self.create_conf_file(".", data)
|
||||
self.create_conf_file("", data)
|
||||
Distribution.run_commands(self)
|
||||
|
||||
|
||||
def create_conf_file(self, directory, data=[]):
|
||||
data.insert(0, "# this file is automatically created by setup.py")
|
||||
if not directory:
|
||||
directory = os.getcwd()
|
||||
filename = os.path.join(directory, self.config_file)
|
||||
# add metadata
|
||||
metanames = ("name", "version", "author", "author_email",
|
||||
|
|
@ -127,7 +129,7 @@ myname = "Bastian Kleineidam"
|
|||
myemail = "calvin@users.sourceforge.net"
|
||||
|
||||
setup (name = "linkchecker",
|
||||
version = "1.3.10",
|
||||
version = "1.3.11",
|
||||
description = "check HTML documents for broken links",
|
||||
author = myname,
|
||||
author_email = myemail,
|
||||
|
|
|
|||
|
|
@ -3,9 +3,11 @@ url file:///home/calvin/projects/linkchecker/test/html/http.html
|
|||
valid
|
||||
url http://www.garantiertnixgutt.bla
|
||||
name bad url
|
||||
warning Missing '/' at end of URL
|
||||
error
|
||||
url http://www.heise.de
|
||||
name ok
|
||||
warning Missing '/' at end of URL
|
||||
valid
|
||||
url http:/www.heise.de
|
||||
name one slash
|
||||
|
|
@ -38,10 +40,12 @@ valid
|
|||
url HtTP://WWW.hEIsE.DE
|
||||
cached
|
||||
name should be cached
|
||||
warning Missing '/' at end of URL
|
||||
valid
|
||||
url HTTP://WWW.HEISE.DE
|
||||
cached
|
||||
name should be cached
|
||||
warning Missing '/' at end of URL
|
||||
valid
|
||||
url http://www.heise.de/?quoted=ü
|
||||
name html entities
|
||||
|
|
|
|||
Loading…
Reference in a new issue