Makefile cleans and documentation

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@191 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-11-09 12:02:38 +00:00
parent d907866847
commit 27ae52dd56
29 changed files with 327 additions and 159 deletions

36
INSTALL
View file

@ -3,10 +3,25 @@
Requirements
------------
Python >= 2.0 from http://www.python.org/
Python >= 1.5.2 from http://www.python.org/
Distutils >= 0.9.1 from http://www.python.org/sigs/distutils-sig/
Python 1.6 includes the Distutils 0.9.1,
Python 2.0 includes the Distutils 1.0.1
Optionally packages
-------------------
OpenSSL from http://www.openssl.org/
You will need Perl for Win32 (available from
http://www.activestate.com/ActivePerl) if you want to install OpenSSL
on Windows!
Setup
-----
Run "python setup.py config" to configure.
Linux users should run "python setup.py config -lcrypto" to use the SSL
module.
Run "python setup.py install" to install.
Run "python setup.py --help" for help.
Debian users can build the .deb package with "debian/rules binary" as
@ -21,9 +36,26 @@ to check.
Type "linkchecker -h" for help.
Note
----
If you want to make your own distribution with "python setup.py sdist",
you will need Distutils >= 0.9.4. Older versions are hanging when
they try to parse the MANIFEST.in file.
(Fast)CGI web interface
-----------------------
The *cgi files are three CGI script which you can use to run LinkChecker
The *cgi files are three CGI scripts which you can use to run LinkChecker
with a nice graphical web interface.
You can use and adjust the example HTML files in the lconline directory
to run the script.
1) Choose a CGI script. The simplest is lc.cgi and you need a web server
with CGI support.
The scripts lc.fcgi (I tested this a while ago) and lc.sz_fcgi
(untested) need a web server with FastCGI support.
2) Copy the script of your choice in the CGI directory.
3) Adjust the "action=..." parameter in lconline/lc_cgi.html
to point to your CGI script.
4) load the lconline/index.html file, enter an URL and klick on the
check button
If something goes wrong, check the error log of your web server.

View file

@ -5,10 +5,11 @@ include lc.cgi lc.fcgi lc.sz_fcgi
include Makefile
include create.sql
include debian/rules debian/changelog debian/copyright debian/control
include debian/dirs debian/docs debian/links
include debian/dirs debian/docs debian/links debian/postinst
include debian/prerm
include DNS/README
include test/viewprof.py test/profiletest.py test/*.html
recursive-include locale *.po
recursive-include locale *.mo
recursive-include po *
recursive-include lconline *
recursive-include tests *.py

View file

@ -1,55 +1,81 @@
# This Makefile is only used by developers.
# You will need a Debian Linux system to use this Makefile!
VERSION=$(shell python setup.py --version)
PACKAGE=linkchecker
NAME=$(shell python setup.py --name)
HOST=treasure.calvinsplayground.de
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v -itreasure.calvinsplayground.de -s
DEBPACKAGE=$(PACKAGE)_$(VERSION)_i386.deb
PACKAGE = linkchecker
NAME = $(shell python setup.py --name)
HOST=fsinfo.cs.uni-sb.de
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
SOURCES = \
linkcheck/Config.py \
linkcheck/FileUrlData.py \
linkcheck/FtpUrlData.py \
linkcheck/GopherUrlData.py \
linkcheck/HostCheckingUrlData.py \
linkcheck/HttpUrlData.py \
linkcheck/HttpsUrlData.py \
linkcheck/JavascriptUrlData.py \
linkcheck/Logging.py \
linkcheck/MailtoUrlData.py \
linkcheck/NntpUrlData.py \
linkcheck/TelnetUrlData.py \
linkcheck/Threader.py \
linkcheck/UrlData.py \
linkcheck/__init__.py \
linkcheck/lc_cgi.py \
linkchecker
DESTDIR=/.
.PHONY: test clean files upload dist install all
.PHONY: test clean distclean package files upload dist locale all
all:
@echo "Read the file INSTALL to see how to build and install"
clean:
fakeroot debian/rules clean
rm -f .time.po
-python setup.py clean --all
$(MAKE) -C po clean
distclean: clean
distclean: clean cleandeb
rm -rf dist
rm -f $(PACKAGE)-out.* VERSION
rm -f $(PACKAGE)-out.* VERSION LinkCheckerConf.py* MANIFEST
.time.po:
$(MAKE) -C po
touch .time.po
cleandeb:
rm -rf debian/$(PACKAGE) debian/tmp
rm -f debian/*.debhelper debian/{files,substvars}
rm -f configure-stamp build-stamp
dist: .time.po
rm -rf debian/tmp
python setup.py sdist --formats=gztar,zip bdist_rpm bdist_wininst
dist: locale
fakeroot debian/rules binary
# cleandeb because distutils choke on dangling symlinks
# (linkchecker.1 -> undocumented.1)
$(MAKE) cleandeb
python setup.py sdist --formats=gztar,zip bdist_rpm
# extra run without SSL compilation
python setup.py bdist_wininst
mv -f ../$(DEBPACKAGE) dist
package:
cd dist && dpkg-scanpackages . ../override.txt | gzip --best > Packages.gz
files: .time.po
files: locale
./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/
VERSION:
echo $(VERSION) > VERSION
upload: dist package files VERSION
upload: distclean dist package files VERSION
scp debian/changelog shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/changes.txt
scp linkchecker-out.* shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs
scp VERSION shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/raw/
scp dist/* shell1.sourceforge.net:/home/groups/ftp/pub/$(PACKAGE)/
ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make"
test: .time.po
test:
rm -f test/*.result
@for i in test/*.html; do \
echo "Testing $$i. Results are in $$i.result"; \
./$(PACKAGE) -r1 -o text -N"news.rz.uni-sb.de" -v -a $$i > $$i.result 2>&1; \
done
locale:
$(MAKE) -C po

10
README
View file

@ -11,13 +11,14 @@ o output can be colored or normal text, HTML, SQL, CSV or a GML sitemap
graph
o HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Gopher, Telnet and local
file links are supported.
Javascript links are currently ignored
o restrict link checking with regular expression filters for URLs
o proxy support
o give username/password for HTTP and FTP authorization
o robots.txt exclusion protocol support
o i18n support
o command line interface
o (Fast)CGI web interface
o (Fast)CGI web interface (requires HTTP server)
Installing, Requirements, Running
@ -31,7 +32,8 @@ LinkChecker is licensed under the GNU Public License.
Credits go to Guido van Rossum for making Python. His hovercraft is
full of eels!
As this program is directly derived from my Java link checker, additional
credits go to Robert Forsman (the author of JCheckLinks).
credits go to Robert Forsman (the author of JCheckLinks) and his
robots.txt parse algorithm.
I want to thank everybody who gave me feedback, bug reports and
suggestions.
@ -48,10 +50,14 @@ So for example 1.1.5 is the fifth release of the 1.1 development package.
Included packages
-----------------
httplib from http://www.lyra.org/greg/python/
httpslib from http://home.att.net/~nvsoft1/ssl_wrapper.html
DNS see DNS/README
fcgi.py and sz_fcgi.py from http://saarland.sz-sb.de/~ajung/sz_fcgi/
fintl.py from http://sourceforge.net/snippet/detail.php?type=snippet&id=100059
Note that the following packages are modified by me:
httplib.py (renamed to http11lib.py and a bug fixed)
fcgi.py (implemented streamed output)
sz_fcgi.py (simplified the code)
DNS/Lib.py:566 fixed rdlength name error

7
TODO
View file

@ -1,11 +1,6 @@
High priority
o Proxy geht nicht:
- getrennter http/https/ftp proxy
- environment Variablen werden bei RobotParser benutzt, also muß ich
das auch machen.
o Robot parser testen
o Use Python 2.0 features
o I want to be able to supply a "break" command even when multiple
threads are running.

33
debian/changelog vendored
View file

@ -1,13 +1,30 @@
linkchecker (1.3.0) unstable; urgency=low
linkchecker (1.2.8) unstable; urgency=low
* require Python 2.0 so we can get rid of the robots.txt parser
and use the one provided within the Python library
* added <script src=> urls for link testing. Thanks to Tomas Cox
<cox@idecnet.com> for the suggestion
* we get now all proxy configuration values from $http_proxy,
$https_proxy on Unix,Windows and from Internet Config on the Mac
* INSTALL: more documentation for the CGI scripts
* Makefile: better cleaning (clean, cleandeb, distclean)
-- Bastian Kleineidam <calvin@users.sourceforge.net> Thu, 2 Nov 2000 11:17:16 +0100
-- Bastian Kleineidam <calvin@users.sourceforge.net> Thu, 9 Nov 2000 12:35:03 +0100
linkchecker (1.2.7) unstable; urgency=low
* new robot.txt parser module which is interface compatible with the
one in Python 2.0
* debian/control: new fields Build-Depends, Build-Depends-Indep
* debian/control: Architecture is any, not i386
* debian/control: Standards version 3.1.1,
* debian/control: depend on Python 1.5.2
* debian/copyright: first line was too long
* debian/postinst: compile .py files
* debian/rules: new configuration target
* debian/prerm: new file, delete compiled .py files
* use Python tools for i18n (backported from Python 2.0)
* proxy configuration is now detected automatically from system
(environment) variables. NOTE: this means the --proxy options
are gone!
* add <script src=> to url list
* include ssl.c again (was missing in 1.2.6)
-- Bastian Kleineidam <calvin@users.sourceforge.net> Sat, 4 Nov 2000 10:45:08 +0100
linkchecker (1.2.6) unstable; urgency=low

21
debian/control vendored
View file

@ -2,24 +2,27 @@ Source: linkchecker
Section: web
Priority: optional
Maintainer: Bastian Kleineidam <calvin@users.sourceforge.net>
Standards-Version: 3.0.1
Build-Depends: python-base (>= 1.5.2), python-base (<< 1.6), python-dev (>= 1.5.2), python-dev (<< 1.6), python-distutils (>= 0.9.4), debhelper
Build-Depends-Indep: gettext
Standards-Version: 3.1.1
Package: linkchecker
Architecture: i386
Depends: python-base (= 1.5.2)
Suggests: libssl09|libssl095a
Architecture: any
Depends: python-base (>= 1.5.2), python-base (<< 1.6)
Suggests: libssl09|libssl095a, httpd
Description: check/validate/test your HTML documents for broken links/URLs
Features:
o recursive checking
o multithreaded
o output can be colored or normal text, HTML, SQL, CSV or a GML sitemap
graph
o HTTP/1.1, HTTPS, FTP, mailto:, news:, Gopher, Telnet and local file links
are supported
o HTTP/1.1, HTTPS, FTP, mailto:, nntp:, news:, Gopher, Telnet and local
file links are supported
Javascript links are currently ignored
o restrict link checking with regular expression filters for URLs
o HTTP proxy support
o proxy support
o give username/password for HTTP and FTP authorization
o robots.txt exclusion protocol support
o internationalization support
o (Fast)CGI web interface
o i18n support
o command line interface
o (Fast)CGI web interface (requires HTTP server)

4
debian/copyright vendored
View file

@ -1,5 +1,5 @@
This is linkchecker, written and maintained by Bastian Kleineidam <calvin@users.sourceforge.net>
on Sat, 8 Jan 2000 11:00:35 +0100.
This is LinkChecker, written and maintained by Bastian Kleineidam
<calvin@users.sourceforge.net> on Sat, 8 Jan 2000 11:00:35 +0100.
The original source can always be found at:
http://linkchecker.sourceforge.net

47
debian/rules vendored
View file

@ -1,47 +1,50 @@
#!/usr/bin/make -f
# Sample debian/rules that uses debhelper.
# GNU copyright 1997 to 1999 by Joey Hess.
DOC_DIR = debian/tmp/usr/share/doc/linkchecker
PACKAGE=linkchecker
DOCDIR = debian/$(PACKAGE)/usr/share/doc/$(PACKAGE)
# Uncomment this to turn on verbose mode.
#export DH_VERBOSE=1
# This is the debhelper compatability version to use.
export DH_COMPAT=1
export DH_COMPAT=2
build: build-stamp
# This has to be exported to make some magic below work.
export DH_OPTIONS
configure: configure-stamp
configure-stamp:
dh_testdir
python setup.py config -lcrypto
touch configure-stamp
build: configure-stamp build-stamp
build-stamp:
dh_testdir
# Add here commands to compile the package.
rm -rf debian/tmp
rm -rf debian/$(PACKAGE)
python setup.py build
touch build-stamp
clean:
dh_testdir
dh_testroot
rm -f build-stamp
# Add here commands to clean up after the build process.
python setup.py clean --all
rm -f build-stamp configure-stamp
$(MAKE) clean
dh_clean
install: build
dh_testdir
dh_testroot
dh_clean -k
dh_installdirs
# ha! the root option finally made it into distutils
python setup.py install --root=`pwd`/debian/tmp
$(MAKE) locale
python setup.py install --root=`pwd`/debian/$(PACKAGE) --no-compile
# install additional doc files
install -c -m 644 DNS/README $(DOC_DIR)/README_DNS.txt
install -d -m 755 $(DOC_DIR)/test
install -c -m 644 test/*.html test/*.py $(DOC_DIR)/test
install -d -m 755 $(DOC_DIR)/tests
install -c -m 644 tests/*.py $(DOC_DIR)/tests
install -c -m 644 DNS/README $(DOCDIR)/README_DNS.txt
install -d -m 755 $(DOCDIR)/test
install -c -m 644 test/*.html test/*.py $(DOCDIR)/test
install -d -m 755 $(DOCDIR)/tests
install -c -m 644 tests/*.py $(DOCDIR)/tests
# Build architecture-independent files here.
@ -80,4 +83,4 @@ binary-arch: build install
dh_builddeb
binary: binary-indep binary-arch
.PHONY: build clean binary-indep binary-arch binary install
.PHONY: build clean binary-indep binary-arch binary install configure

View file

@ -1,4 +1,3 @@
<!doctype html public "-//w3c//dtd html 4.0//en">
<html><head>
<title>LinkChecker Online</title>
</head>

View file

@ -1,4 +1,3 @@
<!doctype html public "-//w3c//dtd html 4.0//en">
<html><head>
<title>LinkChecker Online</title>
</head>

View file

@ -1,4 +1,3 @@
<!doctype html public "-//w3c//dtd html 4.0//en">
<html><head>
<title>Empty</title>
</head>

View file

@ -21,8 +21,8 @@ This module stores
* Other configuration options
"""
import ConfigParser,sys,os,re,UserDict,string,time
import Logging,LinkCheckerConf
import ConfigParser, sys, os, re, UserDict, string, time
import Logging, LinkCheckerConf
from os.path import expanduser,normpath,normcase,join,isfile
from types import StringType
from urllib import getproxies
@ -377,8 +377,11 @@ class Configuration(UserDict.UserDict):
used in the linkchecker module.
"""
debug("DEBUG: reading configuration from %s\n" % files)
cfgparser = ConfigParser.ConfigParser()
cfgparser.read(files)
try:
cfgparser = ConfigParser.ConfigParser()
cfgparser.read(files)
except ConfigParser.Error:
return
section="output"
try:
@ -387,16 +390,16 @@ class Configuration(UserDict.UserDict):
self.data['log'] = self.newLogger(log)
else:
self.warn(_("invalid log option '%s'") % log)
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try:
if cfgparser.getboolean(section, "verbose"):
self.data["verbose"] = 1
self.data["warnings"] = 1
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try: self.data["quiet"] = cfgparser.getboolean(section, "quiet")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try: self.data["warnings"] = cfgparser.getboolean(section, "warnings")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try:
filelist = string.split(cfgparser.get(section, "fileoutput"))
for arg in filelist:
@ -404,12 +407,12 @@ class Configuration(UserDict.UserDict):
if Loggers.has_key(arg) and arg != "blacklist":
self.data['fileoutput'].append(
self.newLogger(arg, {'fileoutput':1}))
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
for key in Loggers.keys():
if cfgparser.has_section(key):
for opt in cfgparser.options(key):
try: self.data[key][opt] = cfgparser.get(key, opt)
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
section="checking"
try:
@ -418,28 +421,28 @@ class Configuration(UserDict.UserDict):
self.disableThreads()
else:
self.enableThreads(num)
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try: self.data["anchors"] = cfgparser.getboolean(section, "anchors")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try:
num = cfgparser.getint(section, "recursionlevel")
if num<0:
self.error(_("illegal recursionlevel number %d") % num)
self.data["recursionlevel"] = num
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try:
self.data["robotstxt"] = cfgparser.getboolean(section,
"robotstxt")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try: self.data["strict"] = cfgparser.getboolean(section, "strict")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try:
self.data["warningregex"] = re.compile(cfgparser.get(section,
"warningregex"))
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try:
self.data["nntpserver"] = cfgparser.get(section, "nntpserver")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
section = "authentication"
try:
@ -450,7 +453,7 @@ class Configuration(UserDict.UserDict):
tuple[0] = re.compile(tuple[0])
self.data["authentication"].append(tuple)
i = i + 1
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
section = "filtering"
try:
@ -461,9 +464,9 @@ class Configuration(UserDict.UserDict):
self.data["externlinks"].append((re.compile(tuple[0]),
int(tuple[1])))
i = i + 1
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks")))
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass
try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny")
except ConfigParser.NoOptionError: pass
except ConfigParser.Error: pass

View file

@ -27,7 +27,9 @@ ExcList.extend([
])
class FtpUrlData(UrlData):
"""Url link with ftp scheme."""
"""
Url link with ftp scheme.
"""
def checkConnection(self, config):
_user, _password = self._getUserPassword(config)

View file

@ -15,7 +15,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
import httplib,urlparse,sys,time,re
import http11lib,urlparse,sys,time,re
import Config,StringUtil,robotparser2
from UrlData import UrlData
from urllib import splittype, splithost
@ -67,7 +67,8 @@ class HttpUrlData(UrlData):
| "503" ; Service Unavailable
| extension-code
"""
self.proxy = config['proxy'].get(self.get_scheme(), None)
self.proxy = config["proxy"].get(self.get_scheme(), None)
if self.proxy:
self.proxy = splittype(self.proxy)[1]
self.proxy = splithost(self.proxy)[0]
@ -176,7 +177,7 @@ class HttpUrlData(UrlData):
return self.urlConnection.getreply()
def _getHTTPObject(self, host):
return httplib.HTTP(host)
return http11lib.HTTP(host)
def getContent(self):
if not self.data:
@ -193,13 +194,12 @@ class HttpUrlData(UrlData):
def isHtml(self):
if not (self.valid and self.mime):
return 0
return self.mime.gettype()=="text/html"
return self.mime.gettype()[:9]=="text/html"
def robotsTxtAllowsUrl(self, config):
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
if not config.robotsTxtCache_has_key(roboturl):
rp = robotparser2.RobotFileParser(roboturl)
robotparser2.debug=1
rp.read()
config.robotsTxtCache_set(roboturl, rp)
rp = config.robotsTxtCache_get(roboturl)

View file

@ -18,20 +18,21 @@
from UrlData import UrlData
from HttpUrlData import HttpUrlData
from linkcheck import _
import httplib,socket
_supportHttps=hasattr(socket, 'ssl')
_supportHttps=1
try: import httpslib
except ImportError: _supportHttps=0
class HttpsUrlData(HttpUrlData):
"""Url link with https scheme"""
def _getHTTPObject(self, host):
return httplib.HTTPS(host)
return httpslib.HTTPS(host)
def check(self, config):
if _supportHttps:
HttpUrlData.check(self, config)
else:
self.setWarning(_("HTTPS not supported"))
self.setWarning(_("HTTPS url ignored"))
self.logMe(config)
def get_scheme(self):

View file

@ -18,7 +18,7 @@
import os,re,string,DNS,sys,Config,cgi,urllib,linkcheck
from rfc822 import AddressList
from HostCheckingUrlData import HostCheckingUrlData
from smtplib import SMTP,SMTPException
from smtplib import SMTP
from linkcheck import _
# regular expression for RFC2368 compliant mailto: scanning
@ -32,10 +32,7 @@ if os.name=='posix':
class MailtoUrlData(HostCheckingUrlData):
"Url link with mailto scheme"
def get_scheme(self):
return "mailto"
def buildUrl(self):
HostCheckingUrlData.buildUrl(self)
self.headers = {}
@ -88,8 +85,9 @@ class MailtoUrlData(HostCheckingUrlData):
info = self.urlConnection.verify(user)
if info[0]==250:
self.setInfo("Verified adress: "+info[1])
except SMTPException:
pass
except:
type, value = sys.exc_info()[:2]
#print type,value
if smtpconnect: break
if not smtpconnect:
@ -120,3 +118,7 @@ class MailtoUrlData(HostCheckingUrlData):
def getCacheKey(self):
return self.get_scheme()+":"+str(self.adresses)
def get_scheme(self):
return "mailto"

View file

@ -28,7 +28,7 @@ ExcList.extend([nntplib.error_reply,
class NntpUrlData(UrlData):
"Url link with NNTP scheme"
def get_scheme(self):
return "nntp"

View file

@ -37,8 +37,8 @@ _linkMatcher = r"""
< # open tag
\s* # whitespace
%s # tag name
[^>]*? # skip leading attributes
\s+ # whitespace
[^>]*? # skip leading attributes
%s # attrib name
\s* # whitespace
= # equal sign
@ -91,7 +91,8 @@ class UrlData:
self.extern = 1
self.data = None
self.html_comments = []
def setError(self, s):
self.valid=0
self.errorString = _("Error")+": "+s
@ -233,6 +234,7 @@ class UrlData:
def allowsRecursion(self, config):
Config.debug("extern: %s\n" % str(self.extern))
return self.valid and \
self.isHtml() and \
not self.cached and \

View file

@ -26,12 +26,12 @@ class error(Exception):
# i18n suppport
import LinkCheckerConf
try:
import os
from gettext import gettext, bindtextdomain, textdomain
import fintl,os
gettext = fintl.gettext
domain = 'linkcheck'
localedir = os.path.join(LinkCheckerConf.install_data, 'locale')
bindtextdomain(domain, localedir)
textdomain(domain)
fintl.bindtextdomain(domain, localedir)
fintl.textdomain(domain)
except ImportError:
def gettext(msg):
return msg

View file

@ -2,8 +2,8 @@
# imports and checks
import sys
if (not hasattr(sys, 'version_info')) or sys.version_info < (2,0,0,'final',0):
raise SystemExit, "This program requires Python 2.0 or later."
if sys.version[:5] < "1.5.2":
raise SystemExit, "This program requires Python 1.5.2 or later."
import getopt,re,string,os,urlparse
# 90 seconds timeout for all connections
#import timeoutsocket

View file

@ -113,4 +113,4 @@
# At the moment, authentication is used/needed for http[s] and ftp links.
[authentication]
#entry1=^http://treasure\.calvinsplayground\.de/~calvin/isnichmehr/ lebowski lebowski
#entry2=^ftp://void.cs.uni-sb.de calvin schnuckl
#entry2=^ftp://void.cs.uni-sb.de calvin hutzli

View file

@ -1,7 +1,7 @@
# we use the scripts in Tools/i18n of the Python distribution
I18NTOOLS=/usr/local/src/Python-2.0/Tools/i18n
GETTEXT=$(I18NTOOLS)/pygettext.py
MSGFMT=$(I18NTOOLS)/msgfmt.py
# we use the scripts in Tools/i18n of the Python 2.0 distribution
I18NTOOLS=.
GETTEXT=python $(I18NTOOLS)/pygettext.py
MSGFMT=python $(I18NTOOLS)/msgfmt.py
MSGMERGE=msgmerge
SOURCES=\
../linkcheck/Config.py \

View file

@ -95,8 +95,9 @@ msgstr "Ung
msgid "hours"
msgstr "Stunden"
msgid "HTTPS not supported"
msgstr "HTTPS nicht unterstützt"
#, fuzzy
msgid "HTTPS url ignored"
msgstr "Javascript url ignoriert"
msgid "Start checking at %s\n"
msgstr "Beginne Prüfen am %s\n"
@ -413,6 +414,9 @@ msgstr "Kein NNTP Server angegeben; pr
msgid "Parent URL"
msgstr "Vater URL"
#~ msgid "HTTPS not supported"
#~ msgstr "HTTPS nicht unterstützt"
#~ msgid " (%.3f seconds)"
#~ msgstr " (%.3f Sekunden)"

View file

@ -87,8 +87,9 @@ msgstr "Argumen ill
msgid "hours"
msgstr "heures"
msgid "HTTPS not supported"
msgstr "HTTPS not supported"
#, fuzzy
msgid "HTTPS url ignored"
msgstr "Url Javascript ignorée"
msgid "Start checking at %s\n"
msgstr "Démarrage du controle à %s\n"
@ -409,5 +410,8 @@ msgstr "Auncun serveur NNTP sp
msgid "Parent URL"
msgstr "URL Parente"
#~ msgid "HTTPS not supported"
#~ msgstr "HTTPS not supported"
#~ msgid "Illegal NNTP link syntax"
#~ msgstr "Syntaxe illégale du lien NNTP"

View file

@ -36,7 +36,7 @@ class RobotFileParser:
connection.endheaders()
status, text, mime = connection.getreply()
if status in [301,302]:
tries += 1
tries = tries + 1
self.set_url(mime.getheader("Location"))
else:
break
@ -56,7 +56,7 @@ class RobotFileParser:
for line in lines:
line = string.strip(line)
linenumber += 1
linenumber = linenumber + 1
if not line:
if state==1:
_debug("line %d: no rules found" % linenumber)
@ -128,7 +128,7 @@ class RobotFileParser:
def __str__(self):
ret = ""
for entry in self.entries:
ret += str(entry) + "\n"
ret = ret + str(entry) + "\n"
return ret
class RuleLine:
@ -151,9 +151,9 @@ class Entry:
def __str__(self):
ret = ""
for agent in self.useragents:
ret += "User-agent: "+agent+"\n"
ret = ret + "User-agent: "+agent+"\n"
for line in self.rulelines:
ret += str(line) + "\n"
ret = ret + str(line) + "\n"
return ret
def applies_to(self, useragent):

107
setup.py
View file

@ -24,7 +24,7 @@ from distutils.command.install import install
from distutils.command.config import config
from distutils import util
from distutils.file_util import write_file
import os,string
import os
class LCInstall(install):
@ -44,12 +44,66 @@ class LCInstall(install):
data.append("%s = %s" % (attr, `val`))
from pprint import pformat
data.append('outputs = %s' % pformat(self.get_outputs()))
self.distribution.create_conf_file(self.install_lib, data)
self.distribution.create_conf_file(self.install_lib, data)
class LCConfig(config):
user_options = config.user_options + [
('ssl-include-dirs=', None,
"directories to search for SSL header files"),
('ssl-library-dirs=', None,
"directories to search for SSL library files"),
]
def initialize_options (self):
config.initialize_options(self)
self.ssl_include_dirs = None
self.ssl_library_dirs = None
def finalize_options(self):
# we have some default include and library directories
self.basic_finalize_options()
if self.ssl_include_dirs is None:
self.ssl_include_dirs = ['/usr/include/openssl',
'/usr/local/include/openssl']
if self.ssl_library_dirs is None:
self.ssl_library_dirs = ['/usr/lib',
'/usr/local/lib']
def basic_finalize_options(self):
"""fix up types of option values"""
# this should be in config.finalize_options
# I submitted a patch
if self.include_dirs is None:
self.include_dirs = self.distribution.include_dirs or []
elif type(self.include_dirs) is StringType:
self.include_dirs = string.split(self.include_dirs, os.pathsep)
if self.libraries is None:
self.libraries = []
elif type(self.libraries) is StringType:
self.libraries = [self.libraries]
if self.library_dirs is None:
self.library_dirs = []
elif type(self.library_dirs) is StringType:
self.library_dirs = [self.library_dirs]
def run (self):
data = ["install_data = %s" % `os.getcwd()`]
# try to compile a test program with SSL
config.run(self)
have_ssl = self.check_lib("ssl",
library_dirs = self.ssl_library_dirs,
include_dirs = self.ssl_include_dirs,
headers = ["ssl.h"])
# write the result in the configuration file
data = []
data.append("have_ssl = %d" % (have_ssl))
data.append("ssl_library_dirs = %s" % `self.ssl_library_dirs`)
data.append("ssl_include_dirs = %s" % `self.ssl_include_dirs`)
data.append("libraries = %s" % `['ssl', 'crypto']`)
data.append("install_data = %s" % `os.getcwd()`)
self.distribution.create_conf_file(".", data)
@ -59,10 +113,32 @@ class LCDistribution(Distribution):
self.config_file = self.get_name()+"Conf.py"
def run_commands(self):
if "config" not in self.commands:
self.check_ssl()
Distribution.run_commands(self)
def check_ssl(self):
if not os.path.exists(self.config_file):
raise SystemExit, "please run 'python setup.py config'"
#self.announce("generating default configuration")
#self.run_command('config')
import LinkCheckerConf
if 'bdist_wininst' in self.commands and os.name!='nt':
self.announce("bdist_wininst command found on non-Windows "
"platform. Disabling SSL compilation")
elif LinkCheckerConf.have_ssl:
self.ext_modules = [Extension('ssl', ['ssl.c'],
include_dirs=LinkCheckerConf.ssl_include_dirs,
library_dirs=LinkCheckerConf.ssl_library_dirs,
libraries=LinkCheckerConf.libraries)]
def create_conf_file(self, directory, data=[]):
data.insert(0, "# this file is automatically created by setup.py")
filename = os.path.join(directory, self.config_file)
# metadata
# add metadata
metanames = dir(self.metadata) + \
['fullname', 'contact', 'contact_email']
for name in metanames:
@ -73,12 +149,16 @@ class LCDistribution(Distribution):
util.execute(write_file, (filename, data),
"creating %s" % filename, self.verbose>=1, self.dry_run)
myname = "Bastian Kleineidam"
myemail = "calvin@users.sourceforge.net"
setup (name = "LinkChecker",
version = "1.3.0",
version = "1.2.8",
description = "check links of HTML pages",
author = "Bastian Kleineidam",
author_email = "calvin@users.sourceforge.net",
author = myname,
author_email = myemail,
maintainer = myname,
maintainer_email = myemail,
url = "http://linkchecker.sourceforge.net/",
licence = "GPL",
long_description =
@ -94,22 +174,21 @@ o HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Gopher, Telnet and local
file links are supported.
Javascript links are currently ignored
o restrict link checking with regular expression filters for URLs
o HTTP proxy support
o proxy support
o give username/password for HTTP and FTP authorization
o robots.txt exclusion protocol support
o internationalization support
o (Fast)CGI web interface
o i18n support
o command line interface
o (Fast)CGI web interface (requires HTTP server)
""",
distclass = LCDistribution,
cmdclass = {'config': LCConfig, 'install': LCInstall},
packages = ['','DNS','linkcheck'],
scripts = ['linkchecker'],
data_files = [('share/locale/de/LC_MESSAGES',
['locale/de/LC_MESSAGES/linkcheck.mo',
'locale/de/LC_MESSAGES/linkcheck.po']),
['locale/de/LC_MESSAGES/linkcheck.mo']),
('share/locale/fr/LC_MESSAGES',
['locale/fr/LC_MESSAGES/linkcheck.mo',
'locale/fr/LC_MESSAGES/linkcheck.po']),
['locale/fr/LC_MESSAGES/linkcheck.mo']),
('share/linkchecker',['linkchecker.bat',
'linkcheckerrc',]),
],

View file

@ -6,7 +6,6 @@ Just some HTTP links
<a href="http://">
<a href="http:/">
<a href="http:">
<a href="http://localhost:/">
<a href="http://www.blubb.de/stalter&sohn">
<a name="iswas"> <!-- anchor for test2.html -->
<a href=http://slashdot.org/>

View file

@ -16,11 +16,3 @@
<a href="ftp://treasure.calvinsplayground.de//pub">
<a href="ftp://treasure.calvinsplayground.de////////pub">
<a href="ftp:///treasure.calvinsplayground.de/pub">
< img src="blubb_image">
< img lowsrc="blubb_lowimage">
< link href="blubb_link">
< script src="blubb_script">
< area href="blubb_area">
< body background="blubb_body">
< area href="blubb_href">
< form action="blubb_action">