mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-09 07:04:44 +00:00
Makefile cleans and documentation
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@191 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
d907866847
commit
27ae52dd56
29 changed files with 327 additions and 159 deletions
36
INSTALL
36
INSTALL
|
|
@ -3,10 +3,25 @@
|
|||
|
||||
Requirements
|
||||
------------
|
||||
Python >= 2.0 from http://www.python.org/
|
||||
Python >= 1.5.2 from http://www.python.org/
|
||||
Distutils >= 0.9.1 from http://www.python.org/sigs/distutils-sig/
|
||||
Python 1.6 includes the Distutils 0.9.1,
|
||||
Python 2.0 includes the Distutils 1.0.1
|
||||
|
||||
|
||||
Optionally packages
|
||||
-------------------
|
||||
OpenSSL from http://www.openssl.org/
|
||||
You will need Perl for Win32 (available from
|
||||
http://www.activestate.com/ActivePerl) if you want to install OpenSSL
|
||||
on Windows!
|
||||
|
||||
|
||||
Setup
|
||||
-----
|
||||
Run "python setup.py config" to configure.
|
||||
Linux users should run "python setup.py config -lcrypto" to use the SSL
|
||||
module.
|
||||
Run "python setup.py install" to install.
|
||||
Run "python setup.py --help" for help.
|
||||
Debian users can build the .deb package with "debian/rules binary" as
|
||||
|
|
@ -21,9 +36,26 @@ to check.
|
|||
Type "linkchecker -h" for help.
|
||||
|
||||
|
||||
Note
|
||||
----
|
||||
If you want to make your own distribution with "python setup.py sdist",
|
||||
you will need Distutils >= 0.9.4. Older versions are hanging when
|
||||
they try to parse the MANIFEST.in file.
|
||||
|
||||
|
||||
(Fast)CGI web interface
|
||||
-----------------------
|
||||
The *cgi files are three CGI script which you can use to run LinkChecker
|
||||
The *cgi files are three CGI scripts which you can use to run LinkChecker
|
||||
with a nice graphical web interface.
|
||||
You can use and adjust the example HTML files in the lconline directory
|
||||
to run the script.
|
||||
1) Choose a CGI script. The simplest is lc.cgi and you need a web server
|
||||
with CGI support.
|
||||
The scripts lc.fcgi (I tested this a while ago) and lc.sz_fcgi
|
||||
(untested) need a web server with FastCGI support.
|
||||
2) Copy the script of your choice in the CGI directory.
|
||||
3) Adjust the "action=..." parameter in lconline/lc_cgi.html
|
||||
to point to your CGI script.
|
||||
4) load the lconline/index.html file, enter an URL and klick on the
|
||||
check button
|
||||
If something goes wrong, check the error log of your web server.
|
||||
|
|
|
|||
|
|
@ -5,10 +5,11 @@ include lc.cgi lc.fcgi lc.sz_fcgi
|
|||
include Makefile
|
||||
include create.sql
|
||||
include debian/rules debian/changelog debian/copyright debian/control
|
||||
include debian/dirs debian/docs debian/links
|
||||
include debian/dirs debian/docs debian/links debian/postinst
|
||||
include debian/prerm
|
||||
include DNS/README
|
||||
include test/viewprof.py test/profiletest.py test/*.html
|
||||
recursive-include locale *.po
|
||||
recursive-include locale *.mo
|
||||
recursive-include po *
|
||||
recursive-include lconline *
|
||||
recursive-include tests *.py
|
||||
|
|
|
|||
64
Makefile
64
Makefile
|
|
@ -1,55 +1,81 @@
|
|||
# This Makefile is only used by developers.
|
||||
# You will need a Debian Linux system to use this Makefile!
|
||||
VERSION=$(shell python setup.py --version)
|
||||
PACKAGE=linkchecker
|
||||
NAME=$(shell python setup.py --name)
|
||||
HOST=treasure.calvinsplayground.de
|
||||
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v -itreasure.calvinsplayground.de -s
|
||||
DEBPACKAGE=$(PACKAGE)_$(VERSION)_i386.deb
|
||||
PACKAGE = linkchecker
|
||||
NAME = $(shell python setup.py --name)
|
||||
HOST=fsinfo.cs.uni-sb.de
|
||||
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v
|
||||
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
|
||||
SOURCES = \
|
||||
linkcheck/Config.py \
|
||||
linkcheck/FileUrlData.py \
|
||||
linkcheck/FtpUrlData.py \
|
||||
linkcheck/GopherUrlData.py \
|
||||
linkcheck/HostCheckingUrlData.py \
|
||||
linkcheck/HttpUrlData.py \
|
||||
linkcheck/HttpsUrlData.py \
|
||||
linkcheck/JavascriptUrlData.py \
|
||||
linkcheck/Logging.py \
|
||||
linkcheck/MailtoUrlData.py \
|
||||
linkcheck/NntpUrlData.py \
|
||||
linkcheck/TelnetUrlData.py \
|
||||
linkcheck/Threader.py \
|
||||
linkcheck/UrlData.py \
|
||||
linkcheck/__init__.py \
|
||||
linkcheck/lc_cgi.py \
|
||||
linkchecker
|
||||
|
||||
DESTDIR=/.
|
||||
.PHONY: test clean files upload dist install all
|
||||
.PHONY: test clean distclean package files upload dist locale all
|
||||
|
||||
all:
|
||||
@echo "Read the file INSTALL to see how to build and install"
|
||||
|
||||
clean:
|
||||
fakeroot debian/rules clean
|
||||
rm -f .time.po
|
||||
-python setup.py clean --all
|
||||
$(MAKE) -C po clean
|
||||
|
||||
distclean: clean
|
||||
distclean: clean cleandeb
|
||||
rm -rf dist
|
||||
rm -f $(PACKAGE)-out.* VERSION
|
||||
rm -f $(PACKAGE)-out.* VERSION LinkCheckerConf.py* MANIFEST
|
||||
|
||||
.time.po:
|
||||
$(MAKE) -C po
|
||||
touch .time.po
|
||||
cleandeb:
|
||||
rm -rf debian/$(PACKAGE) debian/tmp
|
||||
rm -f debian/*.debhelper debian/{files,substvars}
|
||||
rm -f configure-stamp build-stamp
|
||||
|
||||
dist: .time.po
|
||||
rm -rf debian/tmp
|
||||
python setup.py sdist --formats=gztar,zip bdist_rpm bdist_wininst
|
||||
dist: locale
|
||||
fakeroot debian/rules binary
|
||||
# cleandeb because distutils choke on dangling symlinks
|
||||
# (linkchecker.1 -> undocumented.1)
|
||||
$(MAKE) cleandeb
|
||||
python setup.py sdist --formats=gztar,zip bdist_rpm
|
||||
# extra run without SSL compilation
|
||||
python setup.py bdist_wininst
|
||||
mv -f ../$(DEBPACKAGE) dist
|
||||
|
||||
package:
|
||||
cd dist && dpkg-scanpackages . ../override.txt | gzip --best > Packages.gz
|
||||
|
||||
files: .time.po
|
||||
files: locale
|
||||
./$(PACKAGE) $(LCOPTS) -i$(HOST) http://$(HOST)/~calvin/
|
||||
|
||||
VERSION:
|
||||
echo $(VERSION) > VERSION
|
||||
|
||||
upload: dist package files VERSION
|
||||
upload: distclean dist package files VERSION
|
||||
scp debian/changelog shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/changes.txt
|
||||
scp linkchecker-out.* shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs
|
||||
scp VERSION shell1.sourceforge.net:/home/groups/$(PACKAGE)/htdocs/raw/
|
||||
scp dist/* shell1.sourceforge.net:/home/groups/ftp/pub/$(PACKAGE)/
|
||||
ssh -C -t shell1.sourceforge.net "cd /home/groups/$(PACKAGE) && make"
|
||||
|
||||
test: .time.po
|
||||
test:
|
||||
rm -f test/*.result
|
||||
@for i in test/*.html; do \
|
||||
echo "Testing $$i. Results are in $$i.result"; \
|
||||
./$(PACKAGE) -r1 -o text -N"news.rz.uni-sb.de" -v -a $$i > $$i.result 2>&1; \
|
||||
done
|
||||
|
||||
locale:
|
||||
$(MAKE) -C po
|
||||
|
|
|
|||
10
README
10
README
|
|
@ -11,13 +11,14 @@ o output can be colored or normal text, HTML, SQL, CSV or a GML sitemap
|
|||
graph
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Gopher, Telnet and local
|
||||
file links are supported.
|
||||
Javascript links are currently ignored
|
||||
o restrict link checking with regular expression filters for URLs
|
||||
o proxy support
|
||||
o give username/password for HTTP and FTP authorization
|
||||
o robots.txt exclusion protocol support
|
||||
o i18n support
|
||||
o command line interface
|
||||
o (Fast)CGI web interface
|
||||
o (Fast)CGI web interface (requires HTTP server)
|
||||
|
||||
|
||||
Installing, Requirements, Running
|
||||
|
|
@ -31,7 +32,8 @@ LinkChecker is licensed under the GNU Public License.
|
|||
Credits go to Guido van Rossum for making Python. His hovercraft is
|
||||
full of eels!
|
||||
As this program is directly derived from my Java link checker, additional
|
||||
credits go to Robert Forsman (the author of JCheckLinks).
|
||||
credits go to Robert Forsman (the author of JCheckLinks) and his
|
||||
robots.txt parse algorithm.
|
||||
I want to thank everybody who gave me feedback, bug reports and
|
||||
suggestions.
|
||||
|
||||
|
|
@ -48,10 +50,14 @@ So for example 1.1.5 is the fifth release of the 1.1 development package.
|
|||
|
||||
Included packages
|
||||
-----------------
|
||||
httplib from http://www.lyra.org/greg/python/
|
||||
httpslib from http://home.att.net/~nvsoft1/ssl_wrapper.html
|
||||
DNS see DNS/README
|
||||
fcgi.py and sz_fcgi.py from http://saarland.sz-sb.de/~ajung/sz_fcgi/
|
||||
fintl.py from http://sourceforge.net/snippet/detail.php?type=snippet&id=100059
|
||||
|
||||
Note that the following packages are modified by me:
|
||||
httplib.py (renamed to http11lib.py and a bug fixed)
|
||||
fcgi.py (implemented streamed output)
|
||||
sz_fcgi.py (simplified the code)
|
||||
DNS/Lib.py:566 fixed rdlength name error
|
||||
|
|
|
|||
7
TODO
7
TODO
|
|
@ -1,11 +1,6 @@
|
|||
High priority
|
||||
|
||||
o Proxy geht nicht:
|
||||
- getrennter http/https/ftp proxy
|
||||
- environment Variablen werden bei RobotParser benutzt, also muß ich
|
||||
das auch machen.
|
||||
|
||||
o Robot parser testen
|
||||
o Use Python 2.0 features
|
||||
|
||||
o I want to be able to supply a "break" command even when multiple
|
||||
threads are running.
|
||||
|
|
|
|||
33
debian/changelog
vendored
33
debian/changelog
vendored
|
|
@ -1,13 +1,30 @@
|
|||
linkchecker (1.3.0) unstable; urgency=low
|
||||
linkchecker (1.2.8) unstable; urgency=low
|
||||
|
||||
* require Python 2.0 so we can get rid of the robots.txt parser
|
||||
and use the one provided within the Python library
|
||||
* added <script src=> urls for link testing. Thanks to Tomas Cox
|
||||
<cox@idecnet.com> for the suggestion
|
||||
* we get now all proxy configuration values from $http_proxy,
|
||||
$https_proxy on Unix,Windows and from Internet Config on the Mac
|
||||
* INSTALL: more documentation for the CGI scripts
|
||||
* Makefile: better cleaning (clean, cleandeb, distclean)
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Thu, 2 Nov 2000 11:17:16 +0100
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Thu, 9 Nov 2000 12:35:03 +0100
|
||||
|
||||
linkchecker (1.2.7) unstable; urgency=low
|
||||
|
||||
* new robot.txt parser module which is interface compatible with the
|
||||
one in Python 2.0
|
||||
* debian/control: new fields Build-Depends, Build-Depends-Indep
|
||||
* debian/control: Architecture is any, not i386
|
||||
* debian/control: Standards version 3.1.1,
|
||||
* debian/control: depend on Python 1.5.2
|
||||
* debian/copyright: first line was too long
|
||||
* debian/postinst: compile .py files
|
||||
* debian/rules: new configuration target
|
||||
* debian/prerm: new file, delete compiled .py files
|
||||
* use Python tools for i18n (backported from Python 2.0)
|
||||
* proxy configuration is now detected automatically from system
|
||||
(environment) variables. NOTE: this means the --proxy options
|
||||
are gone!
|
||||
* add <script src=> to url list
|
||||
* include ssl.c again (was missing in 1.2.6)
|
||||
|
||||
-- Bastian Kleineidam <calvin@users.sourceforge.net> Sat, 4 Nov 2000 10:45:08 +0100
|
||||
|
||||
linkchecker (1.2.6) unstable; urgency=low
|
||||
|
||||
|
|
|
|||
21
debian/control
vendored
21
debian/control
vendored
|
|
@ -2,24 +2,27 @@ Source: linkchecker
|
|||
Section: web
|
||||
Priority: optional
|
||||
Maintainer: Bastian Kleineidam <calvin@users.sourceforge.net>
|
||||
Standards-Version: 3.0.1
|
||||
Build-Depends: python-base (>= 1.5.2), python-base (<< 1.6), python-dev (>= 1.5.2), python-dev (<< 1.6), python-distutils (>= 0.9.4), debhelper
|
||||
Build-Depends-Indep: gettext
|
||||
Standards-Version: 3.1.1
|
||||
|
||||
Package: linkchecker
|
||||
Architecture: i386
|
||||
Depends: python-base (= 1.5.2)
|
||||
Suggests: libssl09|libssl095a
|
||||
Architecture: any
|
||||
Depends: python-base (>= 1.5.2), python-base (<< 1.6)
|
||||
Suggests: libssl09|libssl095a, httpd
|
||||
Description: check/validate/test your HTML documents for broken links/URLs
|
||||
Features:
|
||||
o recursive checking
|
||||
o multithreaded
|
||||
o output can be colored or normal text, HTML, SQL, CSV or a GML sitemap
|
||||
graph
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, news:, Gopher, Telnet and local file links
|
||||
are supported
|
||||
o HTTP/1.1, HTTPS, FTP, mailto:, nntp:, news:, Gopher, Telnet and local
|
||||
file links are supported
|
||||
Javascript links are currently ignored
|
||||
o restrict link checking with regular expression filters for URLs
|
||||
o HTTP proxy support
|
||||
o proxy support
|
||||
o give username/password for HTTP and FTP authorization
|
||||
o robots.txt exclusion protocol support
|
||||
o internationalization support
|
||||
o (Fast)CGI web interface
|
||||
o i18n support
|
||||
o command line interface
|
||||
o (Fast)CGI web interface (requires HTTP server)
|
||||
|
|
|
|||
4
debian/copyright
vendored
4
debian/copyright
vendored
|
|
@ -1,5 +1,5 @@
|
|||
This is linkchecker, written and maintained by Bastian Kleineidam <calvin@users.sourceforge.net>
|
||||
on Sat, 8 Jan 2000 11:00:35 +0100.
|
||||
This is LinkChecker, written and maintained by Bastian Kleineidam
|
||||
<calvin@users.sourceforge.net> on Sat, 8 Jan 2000 11:00:35 +0100.
|
||||
|
||||
The original source can always be found at:
|
||||
http://linkchecker.sourceforge.net
|
||||
|
|
|
|||
47
debian/rules
vendored
47
debian/rules
vendored
|
|
@ -1,47 +1,50 @@
|
|||
#!/usr/bin/make -f
|
||||
# Sample debian/rules that uses debhelper.
|
||||
# GNU copyright 1997 to 1999 by Joey Hess.
|
||||
DOC_DIR = debian/tmp/usr/share/doc/linkchecker
|
||||
PACKAGE=linkchecker
|
||||
DOCDIR = debian/$(PACKAGE)/usr/share/doc/$(PACKAGE)
|
||||
|
||||
# Uncomment this to turn on verbose mode.
|
||||
#export DH_VERBOSE=1
|
||||
|
||||
# This is the debhelper compatability version to use.
|
||||
export DH_COMPAT=1
|
||||
export DH_COMPAT=2
|
||||
|
||||
build: build-stamp
|
||||
# This has to be exported to make some magic below work.
|
||||
export DH_OPTIONS
|
||||
|
||||
configure: configure-stamp
|
||||
configure-stamp:
|
||||
dh_testdir
|
||||
python setup.py config -lcrypto
|
||||
touch configure-stamp
|
||||
|
||||
|
||||
build: configure-stamp build-stamp
|
||||
build-stamp:
|
||||
dh_testdir
|
||||
|
||||
# Add here commands to compile the package.
|
||||
rm -rf debian/tmp
|
||||
rm -rf debian/$(PACKAGE)
|
||||
python setup.py build
|
||||
|
||||
touch build-stamp
|
||||
|
||||
clean:
|
||||
dh_testdir
|
||||
dh_testroot
|
||||
rm -f build-stamp
|
||||
|
||||
# Add here commands to clean up after the build process.
|
||||
python setup.py clean --all
|
||||
|
||||
rm -f build-stamp configure-stamp
|
||||
$(MAKE) clean
|
||||
dh_clean
|
||||
|
||||
install: build
|
||||
dh_testdir
|
||||
dh_testroot
|
||||
dh_clean -k
|
||||
dh_installdirs
|
||||
# ha! the root option finally made it into distutils
|
||||
python setup.py install --root=`pwd`/debian/tmp
|
||||
$(MAKE) locale
|
||||
python setup.py install --root=`pwd`/debian/$(PACKAGE) --no-compile
|
||||
# install additional doc files
|
||||
install -c -m 644 DNS/README $(DOC_DIR)/README_DNS.txt
|
||||
install -d -m 755 $(DOC_DIR)/test
|
||||
install -c -m 644 test/*.html test/*.py $(DOC_DIR)/test
|
||||
install -d -m 755 $(DOC_DIR)/tests
|
||||
install -c -m 644 tests/*.py $(DOC_DIR)/tests
|
||||
install -c -m 644 DNS/README $(DOCDIR)/README_DNS.txt
|
||||
install -d -m 755 $(DOCDIR)/test
|
||||
install -c -m 644 test/*.html test/*.py $(DOCDIR)/test
|
||||
install -d -m 755 $(DOCDIR)/tests
|
||||
install -c -m 644 tests/*.py $(DOCDIR)/tests
|
||||
|
||||
|
||||
# Build architecture-independent files here.
|
||||
|
|
@ -80,4 +83,4 @@ binary-arch: build install
|
|||
dh_builddeb
|
||||
|
||||
binary: binary-indep binary-arch
|
||||
.PHONY: build clean binary-indep binary-arch binary install
|
||||
.PHONY: build clean binary-indep binary-arch binary install configure
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0//en">
|
||||
<html><head>
|
||||
<title>LinkChecker Online</title>
|
||||
</head>
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0//en">
|
||||
<html><head>
|
||||
<title>LinkChecker Online</title>
|
||||
</head>
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
<!doctype html public "-//w3c//dtd html 4.0//en">
|
||||
<html><head>
|
||||
<title>Empty</title>
|
||||
</head>
|
||||
|
|
|
|||
|
|
@ -21,8 +21,8 @@ This module stores
|
|||
* Other configuration options
|
||||
"""
|
||||
|
||||
import ConfigParser,sys,os,re,UserDict,string,time
|
||||
import Logging,LinkCheckerConf
|
||||
import ConfigParser, sys, os, re, UserDict, string, time
|
||||
import Logging, LinkCheckerConf
|
||||
from os.path import expanduser,normpath,normcase,join,isfile
|
||||
from types import StringType
|
||||
from urllib import getproxies
|
||||
|
|
@ -377,8 +377,11 @@ class Configuration(UserDict.UserDict):
|
|||
used in the linkchecker module.
|
||||
"""
|
||||
debug("DEBUG: reading configuration from %s\n" % files)
|
||||
cfgparser = ConfigParser.ConfigParser()
|
||||
cfgparser.read(files)
|
||||
try:
|
||||
cfgparser = ConfigParser.ConfigParser()
|
||||
cfgparser.read(files)
|
||||
except ConfigParser.Error:
|
||||
return
|
||||
|
||||
section="output"
|
||||
try:
|
||||
|
|
@ -387,16 +390,16 @@ class Configuration(UserDict.UserDict):
|
|||
self.data['log'] = self.newLogger(log)
|
||||
else:
|
||||
self.warn(_("invalid log option '%s'") % log)
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
if cfgparser.getboolean(section, "verbose"):
|
||||
self.data["verbose"] = 1
|
||||
self.data["warnings"] = 1
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try: self.data["quiet"] = cfgparser.getboolean(section, "quiet")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try: self.data["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
filelist = string.split(cfgparser.get(section, "fileoutput"))
|
||||
for arg in filelist:
|
||||
|
|
@ -404,12 +407,12 @@ class Configuration(UserDict.UserDict):
|
|||
if Loggers.has_key(arg) and arg != "blacklist":
|
||||
self.data['fileoutput'].append(
|
||||
self.newLogger(arg, {'fileoutput':1}))
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
for key in Loggers.keys():
|
||||
if cfgparser.has_section(key):
|
||||
for opt in cfgparser.options(key):
|
||||
try: self.data[key][opt] = cfgparser.get(key, opt)
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
section="checking"
|
||||
try:
|
||||
|
|
@ -418,28 +421,28 @@ class Configuration(UserDict.UserDict):
|
|||
self.disableThreads()
|
||||
else:
|
||||
self.enableThreads(num)
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try: self.data["anchors"] = cfgparser.getboolean(section, "anchors")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
num = cfgparser.getint(section, "recursionlevel")
|
||||
if num<0:
|
||||
self.error(_("illegal recursionlevel number %d") % num)
|
||||
self.data["recursionlevel"] = num
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
self.data["robotstxt"] = cfgparser.getboolean(section,
|
||||
"robotstxt")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try: self.data["strict"] = cfgparser.getboolean(section, "strict")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
self.data["warningregex"] = re.compile(cfgparser.get(section,
|
||||
"warningregex"))
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try:
|
||||
self.data["nntpserver"] = cfgparser.get(section, "nntpserver")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
section = "authentication"
|
||||
try:
|
||||
|
|
@ -450,7 +453,7 @@ class Configuration(UserDict.UserDict):
|
|||
tuple[0] = re.compile(tuple[0])
|
||||
self.data["authentication"].append(tuple)
|
||||
i = i + 1
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
section = "filtering"
|
||||
try:
|
||||
|
|
@ -461,9 +464,9 @@ class Configuration(UserDict.UserDict):
|
|||
self.data["externlinks"].append((re.compile(tuple[0]),
|
||||
int(tuple[1])))
|
||||
i = i + 1
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks")))
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny")
|
||||
except ConfigParser.NoOptionError: pass
|
||||
except ConfigParser.Error: pass
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,9 @@ ExcList.extend([
|
|||
])
|
||||
|
||||
class FtpUrlData(UrlData):
|
||||
"""Url link with ftp scheme."""
|
||||
"""
|
||||
Url link with ftp scheme.
|
||||
"""
|
||||
|
||||
def checkConnection(self, config):
|
||||
_user, _password = self._getUserPassword(config)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
import httplib,urlparse,sys,time,re
|
||||
import http11lib,urlparse,sys,time,re
|
||||
import Config,StringUtil,robotparser2
|
||||
from UrlData import UrlData
|
||||
from urllib import splittype, splithost
|
||||
|
|
@ -67,7 +67,8 @@ class HttpUrlData(UrlData):
|
|||
| "503" ; Service Unavailable
|
||||
| extension-code
|
||||
"""
|
||||
self.proxy = config['proxy'].get(self.get_scheme(), None)
|
||||
|
||||
self.proxy = config["proxy"].get(self.get_scheme(), None)
|
||||
if self.proxy:
|
||||
self.proxy = splittype(self.proxy)[1]
|
||||
self.proxy = splithost(self.proxy)[0]
|
||||
|
|
@ -176,7 +177,7 @@ class HttpUrlData(UrlData):
|
|||
return self.urlConnection.getreply()
|
||||
|
||||
def _getHTTPObject(self, host):
|
||||
return httplib.HTTP(host)
|
||||
return http11lib.HTTP(host)
|
||||
|
||||
def getContent(self):
|
||||
if not self.data:
|
||||
|
|
@ -193,13 +194,12 @@ class HttpUrlData(UrlData):
|
|||
def isHtml(self):
|
||||
if not (self.valid and self.mime):
|
||||
return 0
|
||||
return self.mime.gettype()=="text/html"
|
||||
return self.mime.gettype()[:9]=="text/html"
|
||||
|
||||
def robotsTxtAllowsUrl(self, config):
|
||||
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
|
||||
if not config.robotsTxtCache_has_key(roboturl):
|
||||
rp = robotparser2.RobotFileParser(roboturl)
|
||||
robotparser2.debug=1
|
||||
rp.read()
|
||||
config.robotsTxtCache_set(roboturl, rp)
|
||||
rp = config.robotsTxtCache_get(roboturl)
|
||||
|
|
|
|||
|
|
@ -18,20 +18,21 @@
|
|||
from UrlData import UrlData
|
||||
from HttpUrlData import HttpUrlData
|
||||
from linkcheck import _
|
||||
import httplib,socket
|
||||
_supportHttps=hasattr(socket, 'ssl')
|
||||
_supportHttps=1
|
||||
try: import httpslib
|
||||
except ImportError: _supportHttps=0
|
||||
|
||||
class HttpsUrlData(HttpUrlData):
|
||||
"""Url link with https scheme"""
|
||||
|
||||
def _getHTTPObject(self, host):
|
||||
return httplib.HTTPS(host)
|
||||
return httpslib.HTTPS(host)
|
||||
|
||||
def check(self, config):
|
||||
if _supportHttps:
|
||||
HttpUrlData.check(self, config)
|
||||
else:
|
||||
self.setWarning(_("HTTPS not supported"))
|
||||
self.setWarning(_("HTTPS url ignored"))
|
||||
self.logMe(config)
|
||||
|
||||
def get_scheme(self):
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
import os,re,string,DNS,sys,Config,cgi,urllib,linkcheck
|
||||
from rfc822 import AddressList
|
||||
from HostCheckingUrlData import HostCheckingUrlData
|
||||
from smtplib import SMTP,SMTPException
|
||||
from smtplib import SMTP
|
||||
from linkcheck import _
|
||||
|
||||
# regular expression for RFC2368 compliant mailto: scanning
|
||||
|
|
@ -32,10 +32,7 @@ if os.name=='posix':
|
|||
|
||||
class MailtoUrlData(HostCheckingUrlData):
|
||||
"Url link with mailto scheme"
|
||||
|
||||
def get_scheme(self):
|
||||
return "mailto"
|
||||
|
||||
|
||||
def buildUrl(self):
|
||||
HostCheckingUrlData.buildUrl(self)
|
||||
self.headers = {}
|
||||
|
|
@ -88,8 +85,9 @@ class MailtoUrlData(HostCheckingUrlData):
|
|||
info = self.urlConnection.verify(user)
|
||||
if info[0]==250:
|
||||
self.setInfo("Verified adress: "+info[1])
|
||||
except SMTPException:
|
||||
pass
|
||||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
#print type,value
|
||||
if smtpconnect: break
|
||||
|
||||
if not smtpconnect:
|
||||
|
|
@ -120,3 +118,7 @@ class MailtoUrlData(HostCheckingUrlData):
|
|||
|
||||
def getCacheKey(self):
|
||||
return self.get_scheme()+":"+str(self.adresses)
|
||||
|
||||
|
||||
def get_scheme(self):
|
||||
return "mailto"
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ ExcList.extend([nntplib.error_reply,
|
|||
|
||||
class NntpUrlData(UrlData):
|
||||
"Url link with NNTP scheme"
|
||||
|
||||
|
||||
def get_scheme(self):
|
||||
return "nntp"
|
||||
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ _linkMatcher = r"""
|
|||
< # open tag
|
||||
\s* # whitespace
|
||||
%s # tag name
|
||||
[^>]*? # skip leading attributes
|
||||
\s+ # whitespace
|
||||
[^>]*? # skip leading attributes
|
||||
%s # attrib name
|
||||
\s* # whitespace
|
||||
= # equal sign
|
||||
|
|
@ -91,7 +91,8 @@ class UrlData:
|
|||
self.extern = 1
|
||||
self.data = None
|
||||
self.html_comments = []
|
||||
|
||||
|
||||
|
||||
def setError(self, s):
|
||||
self.valid=0
|
||||
self.errorString = _("Error")+": "+s
|
||||
|
|
@ -233,6 +234,7 @@ class UrlData:
|
|||
|
||||
|
||||
def allowsRecursion(self, config):
|
||||
Config.debug("extern: %s\n" % str(self.extern))
|
||||
return self.valid and \
|
||||
self.isHtml() and \
|
||||
not self.cached and \
|
||||
|
|
|
|||
|
|
@ -26,12 +26,12 @@ class error(Exception):
|
|||
# i18n suppport
|
||||
import LinkCheckerConf
|
||||
try:
|
||||
import os
|
||||
from gettext import gettext, bindtextdomain, textdomain
|
||||
import fintl,os
|
||||
gettext = fintl.gettext
|
||||
domain = 'linkcheck'
|
||||
localedir = os.path.join(LinkCheckerConf.install_data, 'locale')
|
||||
bindtextdomain(domain, localedir)
|
||||
textdomain(domain)
|
||||
fintl.bindtextdomain(domain, localedir)
|
||||
fintl.textdomain(domain)
|
||||
except ImportError:
|
||||
def gettext(msg):
|
||||
return msg
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
|
||||
# imports and checks
|
||||
import sys
|
||||
if (not hasattr(sys, 'version_info')) or sys.version_info < (2,0,0,'final',0):
|
||||
raise SystemExit, "This program requires Python 2.0 or later."
|
||||
if sys.version[:5] < "1.5.2":
|
||||
raise SystemExit, "This program requires Python 1.5.2 or later."
|
||||
import getopt,re,string,os,urlparse
|
||||
# 90 seconds timeout for all connections
|
||||
#import timeoutsocket
|
||||
|
|
|
|||
|
|
@ -113,4 +113,4 @@
|
|||
# At the moment, authentication is used/needed for http[s] and ftp links.
|
||||
[authentication]
|
||||
#entry1=^http://treasure\.calvinsplayground\.de/~calvin/isnichmehr/ lebowski lebowski
|
||||
#entry2=^ftp://void.cs.uni-sb.de calvin schnuckl
|
||||
#entry2=^ftp://void.cs.uni-sb.de calvin hutzli
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
# we use the scripts in Tools/i18n of the Python distribution
|
||||
I18NTOOLS=/usr/local/src/Python-2.0/Tools/i18n
|
||||
GETTEXT=$(I18NTOOLS)/pygettext.py
|
||||
MSGFMT=$(I18NTOOLS)/msgfmt.py
|
||||
# we use the scripts in Tools/i18n of the Python 2.0 distribution
|
||||
I18NTOOLS=.
|
||||
GETTEXT=python $(I18NTOOLS)/pygettext.py
|
||||
MSGFMT=python $(I18NTOOLS)/msgfmt.py
|
||||
MSGMERGE=msgmerge
|
||||
SOURCES=\
|
||||
../linkcheck/Config.py \
|
||||
|
|
|
|||
8
po/de.po
8
po/de.po
|
|
@ -95,8 +95,9 @@ msgstr "Ung
|
|||
msgid "hours"
|
||||
msgstr "Stunden"
|
||||
|
||||
msgid "HTTPS not supported"
|
||||
msgstr "HTTPS nicht unterstützt"
|
||||
#, fuzzy
|
||||
msgid "HTTPS url ignored"
|
||||
msgstr "Javascript url ignoriert"
|
||||
|
||||
msgid "Start checking at %s\n"
|
||||
msgstr "Beginne Prüfen am %s\n"
|
||||
|
|
@ -413,6 +414,9 @@ msgstr "Kein NNTP Server angegeben; pr
|
|||
msgid "Parent URL"
|
||||
msgstr "Vater URL"
|
||||
|
||||
#~ msgid "HTTPS not supported"
|
||||
#~ msgstr "HTTPS nicht unterstützt"
|
||||
|
||||
#~ msgid " (%.3f seconds)"
|
||||
#~ msgstr " (%.3f Sekunden)"
|
||||
|
||||
|
|
|
|||
8
po/fr.po
8
po/fr.po
|
|
@ -87,8 +87,9 @@ msgstr "Argumen ill
|
|||
msgid "hours"
|
||||
msgstr "heures"
|
||||
|
||||
msgid "HTTPS not supported"
|
||||
msgstr "HTTPS not supported"
|
||||
#, fuzzy
|
||||
msgid "HTTPS url ignored"
|
||||
msgstr "Url Javascript ignorée"
|
||||
|
||||
msgid "Start checking at %s\n"
|
||||
msgstr "Démarrage du controle à %s\n"
|
||||
|
|
@ -409,5 +410,8 @@ msgstr "Auncun serveur NNTP sp
|
|||
msgid "Parent URL"
|
||||
msgstr "URL Parente"
|
||||
|
||||
#~ msgid "HTTPS not supported"
|
||||
#~ msgstr "HTTPS not supported"
|
||||
|
||||
#~ msgid "Illegal NNTP link syntax"
|
||||
#~ msgstr "Syntaxe illégale du lien NNTP"
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class RobotFileParser:
|
|||
connection.endheaders()
|
||||
status, text, mime = connection.getreply()
|
||||
if status in [301,302]:
|
||||
tries += 1
|
||||
tries = tries + 1
|
||||
self.set_url(mime.getheader("Location"))
|
||||
else:
|
||||
break
|
||||
|
|
@ -56,7 +56,7 @@ class RobotFileParser:
|
|||
|
||||
for line in lines:
|
||||
line = string.strip(line)
|
||||
linenumber += 1
|
||||
linenumber = linenumber + 1
|
||||
if not line:
|
||||
if state==1:
|
||||
_debug("line %d: no rules found" % linenumber)
|
||||
|
|
@ -128,7 +128,7 @@ class RobotFileParser:
|
|||
def __str__(self):
|
||||
ret = ""
|
||||
for entry in self.entries:
|
||||
ret += str(entry) + "\n"
|
||||
ret = ret + str(entry) + "\n"
|
||||
return ret
|
||||
|
||||
class RuleLine:
|
||||
|
|
@ -151,9 +151,9 @@ class Entry:
|
|||
def __str__(self):
|
||||
ret = ""
|
||||
for agent in self.useragents:
|
||||
ret += "User-agent: "+agent+"\n"
|
||||
ret = ret + "User-agent: "+agent+"\n"
|
||||
for line in self.rulelines:
|
||||
ret += str(line) + "\n"
|
||||
ret = ret + str(line) + "\n"
|
||||
return ret
|
||||
|
||||
def applies_to(self, useragent):
|
||||
|
|
|
|||
107
setup.py
107
setup.py
|
|
@ -24,7 +24,7 @@ from distutils.command.install import install
|
|||
from distutils.command.config import config
|
||||
from distutils import util
|
||||
from distutils.file_util import write_file
|
||||
import os,string
|
||||
import os
|
||||
|
||||
|
||||
class LCInstall(install):
|
||||
|
|
@ -44,12 +44,66 @@ class LCInstall(install):
|
|||
data.append("%s = %s" % (attr, `val`))
|
||||
from pprint import pformat
|
||||
data.append('outputs = %s' % pformat(self.get_outputs()))
|
||||
self.distribution.create_conf_file(self.install_lib, data)
|
||||
self.distribution.create_conf_file(self.install_lib, data)
|
||||
|
||||
|
||||
class LCConfig(config):
|
||||
user_options = config.user_options + [
|
||||
('ssl-include-dirs=', None,
|
||||
"directories to search for SSL header files"),
|
||||
('ssl-library-dirs=', None,
|
||||
"directories to search for SSL library files"),
|
||||
]
|
||||
|
||||
def initialize_options (self):
|
||||
config.initialize_options(self)
|
||||
self.ssl_include_dirs = None
|
||||
self.ssl_library_dirs = None
|
||||
|
||||
def finalize_options(self):
|
||||
# we have some default include and library directories
|
||||
self.basic_finalize_options()
|
||||
if self.ssl_include_dirs is None:
|
||||
self.ssl_include_dirs = ['/usr/include/openssl',
|
||||
'/usr/local/include/openssl']
|
||||
if self.ssl_library_dirs is None:
|
||||
self.ssl_library_dirs = ['/usr/lib',
|
||||
'/usr/local/lib']
|
||||
|
||||
def basic_finalize_options(self):
|
||||
"""fix up types of option values"""
|
||||
# this should be in config.finalize_options
|
||||
# I submitted a patch
|
||||
if self.include_dirs is None:
|
||||
self.include_dirs = self.distribution.include_dirs or []
|
||||
elif type(self.include_dirs) is StringType:
|
||||
self.include_dirs = string.split(self.include_dirs, os.pathsep)
|
||||
|
||||
if self.libraries is None:
|
||||
self.libraries = []
|
||||
elif type(self.libraries) is StringType:
|
||||
self.libraries = [self.libraries]
|
||||
|
||||
if self.library_dirs is None:
|
||||
self.library_dirs = []
|
||||
elif type(self.library_dirs) is StringType:
|
||||
self.library_dirs = [self.library_dirs]
|
||||
|
||||
|
||||
def run (self):
|
||||
data = ["install_data = %s" % `os.getcwd()`]
|
||||
# try to compile a test program with SSL
|
||||
config.run(self)
|
||||
have_ssl = self.check_lib("ssl",
|
||||
library_dirs = self.ssl_library_dirs,
|
||||
include_dirs = self.ssl_include_dirs,
|
||||
headers = ["ssl.h"])
|
||||
# write the result in the configuration file
|
||||
data = []
|
||||
data.append("have_ssl = %d" % (have_ssl))
|
||||
data.append("ssl_library_dirs = %s" % `self.ssl_library_dirs`)
|
||||
data.append("ssl_include_dirs = %s" % `self.ssl_include_dirs`)
|
||||
data.append("libraries = %s" % `['ssl', 'crypto']`)
|
||||
data.append("install_data = %s" % `os.getcwd()`)
|
||||
self.distribution.create_conf_file(".", data)
|
||||
|
||||
|
||||
|
|
@ -59,10 +113,32 @@ class LCDistribution(Distribution):
|
|||
self.config_file = self.get_name()+"Conf.py"
|
||||
|
||||
|
||||
def run_commands(self):
|
||||
if "config" not in self.commands:
|
||||
self.check_ssl()
|
||||
Distribution.run_commands(self)
|
||||
|
||||
|
||||
def check_ssl(self):
|
||||
if not os.path.exists(self.config_file):
|
||||
raise SystemExit, "please run 'python setup.py config'"
|
||||
#self.announce("generating default configuration")
|
||||
#self.run_command('config')
|
||||
import LinkCheckerConf
|
||||
if 'bdist_wininst' in self.commands and os.name!='nt':
|
||||
self.announce("bdist_wininst command found on non-Windows "
|
||||
"platform. Disabling SSL compilation")
|
||||
elif LinkCheckerConf.have_ssl:
|
||||
self.ext_modules = [Extension('ssl', ['ssl.c'],
|
||||
include_dirs=LinkCheckerConf.ssl_include_dirs,
|
||||
library_dirs=LinkCheckerConf.ssl_library_dirs,
|
||||
libraries=LinkCheckerConf.libraries)]
|
||||
|
||||
|
||||
def create_conf_file(self, directory, data=[]):
|
||||
data.insert(0, "# this file is automatically created by setup.py")
|
||||
filename = os.path.join(directory, self.config_file)
|
||||
# metadata
|
||||
# add metadata
|
||||
metanames = dir(self.metadata) + \
|
||||
['fullname', 'contact', 'contact_email']
|
||||
for name in metanames:
|
||||
|
|
@ -73,12 +149,16 @@ class LCDistribution(Distribution):
|
|||
util.execute(write_file, (filename, data),
|
||||
"creating %s" % filename, self.verbose>=1, self.dry_run)
|
||||
|
||||
myname = "Bastian Kleineidam"
|
||||
myemail = "calvin@users.sourceforge.net"
|
||||
|
||||
setup (name = "LinkChecker",
|
||||
version = "1.3.0",
|
||||
version = "1.2.8",
|
||||
description = "check links of HTML pages",
|
||||
author = "Bastian Kleineidam",
|
||||
author_email = "calvin@users.sourceforge.net",
|
||||
author = myname,
|
||||
author_email = myemail,
|
||||
maintainer = myname,
|
||||
maintainer_email = myemail,
|
||||
url = "http://linkchecker.sourceforge.net/",
|
||||
licence = "GPL",
|
||||
long_description =
|
||||
|
|
@ -94,22 +174,21 @@ o HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Gopher, Telnet and local
|
|||
file links are supported.
|
||||
Javascript links are currently ignored
|
||||
o restrict link checking with regular expression filters for URLs
|
||||
o HTTP proxy support
|
||||
o proxy support
|
||||
o give username/password for HTTP and FTP authorization
|
||||
o robots.txt exclusion protocol support
|
||||
o internationalization support
|
||||
o (Fast)CGI web interface
|
||||
o i18n support
|
||||
o command line interface
|
||||
o (Fast)CGI web interface (requires HTTP server)
|
||||
""",
|
||||
distclass = LCDistribution,
|
||||
cmdclass = {'config': LCConfig, 'install': LCInstall},
|
||||
packages = ['','DNS','linkcheck'],
|
||||
scripts = ['linkchecker'],
|
||||
data_files = [('share/locale/de/LC_MESSAGES',
|
||||
['locale/de/LC_MESSAGES/linkcheck.mo',
|
||||
'locale/de/LC_MESSAGES/linkcheck.po']),
|
||||
['locale/de/LC_MESSAGES/linkcheck.mo']),
|
||||
('share/locale/fr/LC_MESSAGES',
|
||||
['locale/fr/LC_MESSAGES/linkcheck.mo',
|
||||
'locale/fr/LC_MESSAGES/linkcheck.po']),
|
||||
['locale/fr/LC_MESSAGES/linkcheck.mo']),
|
||||
('share/linkchecker',['linkchecker.bat',
|
||||
'linkcheckerrc',]),
|
||||
],
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ Just some HTTP links
|
|||
<a href="http://">
|
||||
<a href="http:/">
|
||||
<a href="http:">
|
||||
<a href="http://localhost:/">
|
||||
<a href="http://www.blubb.de/stalter&sohn">
|
||||
<a name="iswas"> <!-- anchor for test2.html -->
|
||||
<a href=http://slashdot.org/>
|
||||
|
|
|
|||
|
|
@ -16,11 +16,3 @@
|
|||
<a href="ftp://treasure.calvinsplayground.de//pub">
|
||||
<a href="ftp://treasure.calvinsplayground.de////////pub">
|
||||
<a href="ftp:///treasure.calvinsplayground.de/pub">
|
||||
< img src="blubb_image">
|
||||
< img lowsrc="blubb_lowimage">
|
||||
< link href="blubb_link">
|
||||
< script src="blubb_script">
|
||||
< area href="blubb_area">
|
||||
< body background="blubb_body">
|
||||
< area href="blubb_href">
|
||||
< form action="blubb_action">
|
||||
|
|
|
|||
Loading…
Reference in a new issue