mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-23 23:54:44 +00:00
See ChangeLog
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@30 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
83f1151459
commit
cb5cf83d7f
12 changed files with 212 additions and 175 deletions
|
|
@ -1,3 +1,11 @@
|
|||
19.3.2000
|
||||
* report the duration of checking a link (Check Time)
|
||||
* rename httplib.py to http11lib.py so it does not silently
|
||||
replace the standard library httplib.py
|
||||
|
||||
12.3.2000
|
||||
* first version of distutils setup.py
|
||||
|
||||
11.3.2000
|
||||
* Better debugging:
|
||||
-D implies -t0
|
||||
|
|
|
|||
45
INSTALL
45
INSTALL
|
|
@ -1,19 +1,16 @@
|
|||
LinkChecker installation
|
||||
==========================
|
||||
|
||||
First, decompress the archive.
|
||||
With linkchecker-x.x.x.tar.bz2 do "tar xIvf linkchecker-x.x.x.tar.bz2".
|
||||
With linkchecker-x.x.x.zip do "unzip linkchecker-x.x.x.zip" or use Winzip.
|
||||
With linkchecker-x.x.x.deb do "dpkg -i linkchecker-x.x.x.deb" as root and you
|
||||
are done.
|
||||
Requirements:
|
||||
You need Python >= 1.5.2
|
||||
You get Python from http://www.python.org
|
||||
|
||||
Unix Users:
|
||||
1. Edit the file linkchecker.
|
||||
Adjust the argument to sys.path.append to point to the distribution
|
||||
directory.
|
||||
2. HTTPS support (optional, you need SSLeay)
|
||||
Adjust the paths at the top of the Makefile
|
||||
Type "make" to produce the SSL module
|
||||
See below
|
||||
3. Copy linkchecker to a location in your PATH (or make a symlink).
|
||||
4. Check links happily by typing `linkchecker`.
|
||||
|
||||
|
|
@ -23,13 +20,39 @@ Windows Users:
|
|||
Adjust the argument to sys.path.append to point to the distribution
|
||||
directory.
|
||||
2. Edit the file linkchecker.bat.
|
||||
a) Adjust the PYHTON variable to point to python.exe.
|
||||
a) Adjust the PYTHON variable to point to python.exe.
|
||||
b) Adjust the LINKCHECKER variable to point to the distribution directory.
|
||||
3. HTTPS support (optional, you need SSLeay)
|
||||
Compile ssl.dll from ssl.c
|
||||
See below
|
||||
4. Add the distribution directory to your PATH.
|
||||
5. Check links happily by typing `linkchecker.bat`.
|
||||
|
||||
|
||||
You need Python >= 1.5.2
|
||||
You get Python from http://www.python.org
|
||||
HTTPS support:
|
||||
Run "python setup.py build_ext" to compile the ssl library.
|
||||
Here is the overall usage guide for setup.py:
|
||||
Global options:
|
||||
--verbose (-v) run verbosely (default)
|
||||
--quiet (-q) run quietly (turns verbosity off)
|
||||
--dry-run (-n) don't actually do anything
|
||||
--force (-f) skip dependency checking between files
|
||||
--help (-h) show this help message
|
||||
|
||||
Options for 'build_ext' command:
|
||||
--build-dir (-d) directory for compiled extension modules
|
||||
--include-dirs (-I) list of directories to search for header files
|
||||
--define (-D) C preprocessor macros to define
|
||||
--undef (-U) C preprocessor macros to undefine
|
||||
--libs (-l) external C libraries to link with
|
||||
--library-dirs (-L) directories to search for external C libraries
|
||||
--rpath (-R) directories to search for shared C libraries at runtime
|
||||
--link-objects (-O) extra explicit link objects to include in the link
|
||||
|
||||
usage: ./setup.py [global_opts] cmd1 [cmd1_opts] [cmd2 [cmd2_opts] ...]
|
||||
or: ./setup.py --help
|
||||
or: ./setup.py --help-commands
|
||||
or: ./setup.py cmd --help
|
||||
|
||||
For example your openssl headers are in /usr/local/include/openssl and
|
||||
the library is in /usr/local/lib:
|
||||
./setup.py build_ext -I/usr/local/include/openssl -L/usr/local/lib
|
||||
|
|
|
|||
69
Makefile
69
Makefile
|
|
@ -1,65 +1,38 @@
|
|||
PY_INCLDIR = -I/usr/include/python1.5
|
||||
PY_LIBDIR = -L/usr/lib
|
||||
SSL_INCLDIR = -I/usr/include/openssl
|
||||
SSL_LIBDIR = -L/usr/lib
|
||||
|
||||
CC = gcc
|
||||
CFLAGS = -O6 -Wall
|
||||
LDFLAGS = -shared $(SSL_LIBDIR) $(PY_LIBDIR)
|
||||
CPPFLAGS = $(SSL_INCLDIR) $(PY_INCLDIR)
|
||||
|
||||
VERSION=1.1.2
|
||||
#HOST=treasure.calvinsplayground.de
|
||||
HOST=fsinfo.cs.uni-sb.de
|
||||
PROXY=www-proxy.uni-sb.de:3128
|
||||
VERSION=$(shell ./setup.py -q version)
|
||||
HOST=treasure.calvinsplayground.de
|
||||
PROXY=treasure.calvinsplayground.de:5050 -s
|
||||
#HOST=fsinfo.cs.uni-sb.de
|
||||
#PROXY=www-proxy.uni-sb.de:3128
|
||||
PACKAGE = linkchecker
|
||||
BZ2PACKAGE = $(PACKAGE)-$(VERSION).tar.bz2
|
||||
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
|
||||
ZIPPACKAGE = $(PACKAGE)-$(VERSION).zip
|
||||
ALLPACKAGES = ../$(BZ2PACKAGE) ../$(DEBPACKAGE) ../$(ZIPPACKAGE)
|
||||
.PHONY: test clean files install all
|
||||
ALLPACKAGES = ../$(DEBPACKAGE)
|
||||
DESTDIR=/.
|
||||
.PHONY: test clean files homepage dist install all
|
||||
TAR = tar
|
||||
ZIP = zip
|
||||
|
||||
all: ssl.so
|
||||
|
||||
ssl.so: ssl.o
|
||||
$(CC) $(LDFLAGS) -o $@ $? -lssl -lcrypto -lpython1.5
|
||||
all:
|
||||
|
||||
clean:
|
||||
rm -f ssl.{so,o} $(ALLPACKAGES) $(PACKAGE)-out.*
|
||||
./setup.py clean --all
|
||||
rm -rf $(ALLPACKAGES) $(PACKAGE)-out.*
|
||||
|
||||
.files-stamp: all
|
||||
./$(PACKAGE) -Wtext -Whtml -Wgml -Wsql -R -r2 -v -P $(PROXY) -i $(HOST) http://$(HOST)/~calvin/
|
||||
@touch .files-stamp
|
||||
install:
|
||||
./setup.py install --destdir=$(DESTDIR)
|
||||
install -c 755 linkchecker $(DESTDIR)/usr/bin
|
||||
install -c 644 linkcheckerrc $(DESTDIR)/etc
|
||||
|
||||
install: install-dirs
|
||||
install -m644 linkcheck/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck
|
||||
install -m644 DNS/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/DNS
|
||||
install -m644 ssl.so *.py? $(DESTDIR)/usr/share/$(PACKAGE)
|
||||
install -m755 $(PACKAGE) $(DESTDIR)/usr/bin
|
||||
install -m644 $(PACKAGE)rc $(DESTDIR)/etc
|
||||
|
||||
install-dirs:
|
||||
install -d -m755 \
|
||||
$(DESTDIR)/usr/share/$(PACKAGE)/linkcheck \
|
||||
$(DESTDIR)/usr/share/$(PACKAGE)/DNS \
|
||||
$(DESTDIR)/usr/bin \
|
||||
$(DESTDIR)/etc
|
||||
|
||||
dist: .files-stamp
|
||||
dh_clean
|
||||
cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE)
|
||||
cd .. && $(ZIP) -r $(ZIPPACKAGE) $(PACKAGE)
|
||||
dist:
|
||||
./setup.py sdist
|
||||
fakeroot debian/rules binary
|
||||
|
||||
homepage: .files-stamp
|
||||
files:
|
||||
./$(PACKAGE) -Wtext -Whtml -Wgml -Wsql -R -t0 -v -P$(PROXY) -i$(HOST) http://$(HOST)/~calvin/
|
||||
|
||||
homepage: files
|
||||
scp *-out.* shell1.sourceforge.net:/home/groups/linkchecker/htdocs/
|
||||
scp ChangeLog shell1.sourceforge.net:/home/groups/linkchecker/htdocs/changes.txt
|
||||
|
||||
package:
|
||||
cd .. && $(TAR) cIhf $(BZ2PACKAGE) $(PACKAGE)
|
||||
|
||||
test:
|
||||
rm -f test/*.result
|
||||
@for i in test/*.html; do \
|
||||
|
|
|
|||
19
README
19
README
|
|
@ -20,11 +20,26 @@ full of eels!
|
|||
As this program is directly derived from my Java link checker, additional
|
||||
credits go to Robert Forsman (the author of JCheckLinks) and his
|
||||
robots.txt parse algorithm.
|
||||
|
||||
I want to thank everybody who gave me feedback, bug reports and
|
||||
suggestions.
|
||||
|
||||
Versioning:
|
||||
Version numbers have the same meaning as Linux Kernel version numbers.
|
||||
The first number is the major package version. The second number is
|
||||
the minor package version. An odd second number stands for development
|
||||
versions, an even number for stable version. The third number is a
|
||||
package release sequence number.
|
||||
So for example 1.1.5 is the fifth release of the 1.1 development package.
|
||||
|
||||
Included packages:
|
||||
http11lib from http://www.lyra.org/greg/python/
|
||||
httpslib from http://home.att.net/~nvsoft1/ssl_wrapper.html
|
||||
PyLR parser from http://starship.python.net/crew/scott/PyLR.html
|
||||
PyLR parser generator from http://starship.python.net/crew/scott/PyLR.html
|
||||
DNS see README.dns
|
||||
distutils from http://www.python.org/sigs/distutils-sig/
|
||||
fcgi.py from ???
|
||||
sz_fcgi.py from ???
|
||||
|
||||
BEWARE: the PyLR and http11lib packages are modified by me!
|
||||
It seems that http11lib and distutils will be included in Python 1.6, but
|
||||
for now I provide them myself.
|
||||
|
|
|
|||
4
debian/dirs
vendored
4
debian/dirs
vendored
|
|
@ -0,0 +1,4 @@
|
|||
usr/lib/python1.5/site-packages/DNS
|
||||
usr/lib/python1.5/site-packages/linkcheck
|
||||
usr/bin
|
||||
etc
|
||||
3
debian/rules
vendored
3
debian/rules
vendored
|
|
@ -12,7 +12,6 @@ build: build-stamp
|
|||
build-stamp:
|
||||
dh_testdir
|
||||
|
||||
|
||||
# Add here commands to compile the package.
|
||||
$(MAKE)
|
||||
|
||||
|
|
@ -32,7 +31,7 @@ install: build
|
|||
dh_testdir
|
||||
# dh_testroot
|
||||
dh_clean -k
|
||||
# dh_installdirs
|
||||
dh_installdirs
|
||||
# Add here commands to install the package into debian/tmp.
|
||||
$(MAKE) install DESTDIR=`pwd`/debian/tmp
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,6 @@ class HttpUrlData(UrlData):
|
|||
if self.urlConnection:
|
||||
self.closeConnection()
|
||||
self.urlConnection = self._getHTTPObject(host)
|
||||
print host
|
||||
if self.proxy:
|
||||
path = urlparse.urlunparse(self.urlTuple)
|
||||
else:
|
||||
|
|
@ -109,9 +108,7 @@ class HttpUrlData(UrlData):
|
|||
if self.urlTuple[4]:
|
||||
path = path + "?" + self.urlTuple[4]
|
||||
self.urlConnection.putrequest(method, path)
|
||||
print path
|
||||
self.urlConnection.putheader("Host", self.urlTuple[1])
|
||||
print self.urlTuple[1]
|
||||
if auth:
|
||||
self.urlConnection.putheader("Authorization", auth)
|
||||
self.urlConnection.putheader("User-agent", Config.UserAgent)
|
||||
|
|
|
|||
|
|
@ -24,9 +24,9 @@ TableOK="<td bgcolor=\"3ba557\">"
|
|||
RowEnd="</td></tr>\n"
|
||||
MyFont="<font face=\"Lucida,Verdana,Arial,sans-serif,Helvetica\">"
|
||||
|
||||
# return current time
|
||||
def _currentTime():
|
||||
return time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(time.time()))
|
||||
# return formatted time
|
||||
def _strtime(t):
|
||||
return time.strftime("%d.%m.%Y %H:%M:%S", time.localtime(t))
|
||||
|
||||
class StandardLogger:
|
||||
"""Standard text logger.
|
||||
|
|
@ -43,6 +43,7 @@ class StandardLogger:
|
|||
Info
|
||||
Warning
|
||||
D/L Time
|
||||
Check Time
|
||||
|
||||
Unknown keywords will be ignored.
|
||||
"""
|
||||
|
|
@ -58,11 +59,12 @@ class StandardLogger:
|
|||
|
||||
|
||||
def init(self):
|
||||
self.fd.write(Config.AppInfo+"\n"+\
|
||||
Config.Freeware+"\n"+\
|
||||
"Get the newest version at "+Config.Url+"\n"+\
|
||||
"Write comments and bugs to "+Config.Email+"\n\n"+\
|
||||
"Start checking at "+_currentTime()+"\n")
|
||||
self.starttime = time.time()
|
||||
self.fd.write(Config.AppInfo+"\n"+
|
||||
Config.Freeware+"\n"+
|
||||
"Get the newest version at "+Config.Url+"\n"+
|
||||
"Write comments and bugs to "+Config.Email+"\n\n"+
|
||||
"Start checking at "+_strtime(self.starttime)+"\n")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
|
|
@ -73,15 +75,18 @@ class StandardLogger:
|
|||
else:
|
||||
self.fd.write("\n")
|
||||
if urldata.parentName:
|
||||
self.fd.write("Parent URL "+urldata.parentName+", line "+str(urldata.line)+"\n")
|
||||
self.fd.write("Parent URL "+urldata.parentName+", line "+
|
||||
str(urldata.line)+"\n")
|
||||
if urldata.baseRef:
|
||||
self.fd.write("Base "+urldata.baseRef+"\n")
|
||||
if urldata.url:
|
||||
self.fd.write("Real URL "+urldata.url+"\n")
|
||||
if urldata.time:
|
||||
self.fd.write("D/L Time %.3f seconds\n" % urldata.time)
|
||||
if urldata.downloadtime:
|
||||
self.fd.write("D/L Time %.3f seconds\n" % urldata.downloadtime)
|
||||
if urldata.checktime:
|
||||
self.fd.write("Check Time %.3f seconds\n" % urldata.checktime)
|
||||
if urldata.infoString:
|
||||
self.fd.write("Info "+StringUtil.indent(\
|
||||
self.fd.write("Info "+StringUtil.indent(
|
||||
StringUtil.blocktext(urldata.infoString, 65), 11)+"\n")
|
||||
if urldata.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
|
|
@ -108,7 +113,9 @@ class StandardLogger:
|
|||
else:
|
||||
self.fd.write(str(self.errors)+" errors")
|
||||
self.fd.write(" found.\n")
|
||||
self.fd.write("Stopped checking at "+_currentTime()+"\n")
|
||||
self.stoptime = time.time()
|
||||
self.fd.write("Stopped checking at "+_strtime(self.stoptime)+
|
||||
(" (%.3f seconds)" % (self.stoptime - self.starttime)))
|
||||
self.fd.flush()
|
||||
self.close()
|
||||
|
||||
|
|
@ -121,57 +128,62 @@ class HtmlLogger(StandardLogger):
|
|||
"""Logger with HTML output"""
|
||||
|
||||
def init(self):
|
||||
self.fd.write("<html><head><title>"+Config.AppName+"</title></head>"+\
|
||||
"<body bgcolor="+ColorBackground+" link="+ColorLink+\
|
||||
" vlink="+ColorLink+" alink="+ColorLink+">"+\
|
||||
"<center><h2>"+MyFont+Config.AppName+"</font>"+\
|
||||
"</center></h2>"+\
|
||||
"<br><blockquote>"+Config.Freeware+"<br><br>"+\
|
||||
"Start checking at "+_currentTime()+"<br><br>")
|
||||
self.starttime = time.time()
|
||||
self.fd.write("<html><head><title>"+Config.AppName+"</title></head>"+
|
||||
"<body bgcolor="+ColorBackground+" link="+ColorLink+
|
||||
" vlink="+ColorLink+" alink="+ColorLink+">"+
|
||||
"<center><h2>"+MyFont+Config.AppName+"</font>"+
|
||||
"</center></h2>"+
|
||||
"<br><blockquote>"+Config.Freeware+"<br><br>"+
|
||||
"Start checking at "+_strtime(self.starttime)+"<br><br>")
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write("<table align=left border=\"0\" cellspacing=\"0\""+\
|
||||
" cellpadding=\"1\" bgcolor="+ColorBorder+">"+\
|
||||
"<tr><td><table align=left border=\"0\" cellspacing=\"0\""+\
|
||||
" cellpadding=\"3\" bgcolor="+ColorBackground+">"+\
|
||||
"<tr><td bgcolor="+ColorUrl+">"+\
|
||||
MyFont+"URL</font></td><td bgcolor="+ColorUrl+">"+MyFont+\
|
||||
self.fd.write("<table align=left border=\"0\" cellspacing=\"0\""
|
||||
" cellpadding=\"1\" bgcolor="+ColorBorder+
|
||||
"><tr><td><table align=left border=\"0\" cellspacing=\"0\""
|
||||
" cellpadding=\"3\" bgcolor="+ColorBackground+
|
||||
"><tr><td bgcolor="+ColorUrl+">"+
|
||||
MyFont+"URL</font></td><td bgcolor="+ColorUrl+">"+MyFont+
|
||||
StringUtil.htmlify(urlData.urlName))
|
||||
if urlData.cached:
|
||||
self.fd.write("(cached)")
|
||||
self.fd.write("</font>"+RowEnd)
|
||||
|
||||
if urlData.parentName:
|
||||
self.fd.write("<tr><td>"+MyFont+"Parent URL</font></td><td>"+\
|
||||
MyFont+"<a href=\""+urlData.parentName+"\">"+\
|
||||
urlData.parentName+"</a> line "+str(urlData.line)+\
|
||||
self.fd.write("<tr><td>"+MyFont+"Parent URL</font></td><td>"+
|
||||
MyFont+"<a href=\""+urlData.parentName+"\">"+
|
||||
urlData.parentName+"</a> line "+str(urlData.line)+
|
||||
"</font>"+RowEnd)
|
||||
if urlData.baseRef:
|
||||
self.fd.write("<tr><td>"+MyFont+"Base</font></td><td>"+MyFont+\
|
||||
self.fd.write("<tr><td>"+MyFont+"Base</font></td><td>"+MyFont+
|
||||
urlData.baseRef+"</font>"+RowEnd)
|
||||
if urlData.url:
|
||||
self.fd.write("<tr><td>"+MyFont+"Real URL</font></td><td>"+MyFont+\
|
||||
"<a href=\""+StringUtil.htmlify(urlData.url)+"\">"+\
|
||||
self.fd.write("<tr><td>"+MyFont+"Real URL</font></td><td>"+MyFont+
|
||||
"<a href=\""+StringUtil.htmlify(urlData.url)+"\">"+
|
||||
urlData.url+"</a></font>"+RowEnd)
|
||||
if urlData.time:
|
||||
self.fd.write("<tr><td>"+MyFont+"D/L Time</font></td><td>"+MyFont+\
|
||||
("%.3f" % urlData.time)+" seconds</font>"+RowEnd)
|
||||
if urlData.downloadtime:
|
||||
self.fd.write("<tr><td>"+MyFont+"D/L Time</font></td><td>"+MyFont+
|
||||
("%.3f" % urlData.downloadtime)+" seconds</font>"+RowEnd)
|
||||
if urlData.checktime:
|
||||
self.fd.write("<tr><td>"+MyFont+"Check Time</font></td><td>"+
|
||||
MyFont+("%.3f" % urlData.checktime)+" seconds</font>"+
|
||||
RowEnd)
|
||||
if urlData.infoString:
|
||||
self.fd.write("<tr><td>"+MyFont+"Info</font></td><td>"+MyFont+\
|
||||
self.fd.write("<tr><td>"+MyFont+"Info</font></td><td>"+MyFont+
|
||||
StringUtil.htmlify(urlData.infoString)+"</font>"+RowEnd)
|
||||
if urlData.warningString:
|
||||
self.warnings = self.warnings+1
|
||||
self.fd.write("<tr>"+TableWarning+MyFont+"Warning</font></td>"+\
|
||||
TableWarning+MyFont+urlData.warningString+\
|
||||
self.fd.write("<tr>"+TableWarning+MyFont+"Warning</font></td>"+
|
||||
TableWarning+MyFont+urlData.warningString+
|
||||
"</font>"+RowEnd)
|
||||
if urlData.valid:
|
||||
self.fd.write("<tr>"+TableOK+MyFont+"Result</font></td>"+\
|
||||
self.fd.write("<tr>"+TableOK+MyFont+"Result</font></td>"+
|
||||
TableOK+MyFont+urlData.validString+"</font>"+RowEnd)
|
||||
else:
|
||||
self.errors = self.errors+1
|
||||
self.fd.write("<tr>"+TableError+MyFont+"Result</font></td>"+\
|
||||
self.fd.write("<tr>"+TableError+MyFont+"Result</font></td>"+
|
||||
TableError+MyFont+urlData.errorString+"</font>"+RowEnd)
|
||||
|
||||
self.fd.write("</table></td></tr></table><br clear=all><br>")
|
||||
|
|
@ -189,13 +201,15 @@ class HtmlLogger(StandardLogger):
|
|||
else:
|
||||
self.fd.write(str(self.errors)+" errors")
|
||||
self.fd.write(" found.<br>")
|
||||
self.fd.write("Stopped checking at"+_currentTime()+\
|
||||
"</font></blockquote><br><hr noshade size=1><small>"+\
|
||||
MyFont+Config.HtmlAppInfo+"<br>Get the newest version at "+\
|
||||
"<a href=\""+Config.Url+"\">"+Config.Url+"</a>.<br>"+\
|
||||
"Write comments and bugs to <a href=\"mailto:"+\
|
||||
Config.Email+"\">"+Config.Email+"</a>."+\
|
||||
"</font></small></body></html>")
|
||||
self.stoptime = time.time()
|
||||
self.fd.write("Stopped checking at"+_strtime(self.stoptime)+
|
||||
("(%.3f seconds)" % (self.stoptime - self.starttime))+
|
||||
"</font></blockquote><br><hr noshade size=1><small>"+
|
||||
MyFont+Config.HtmlAppInfo+"<br>Get the newest version at "
|
||||
"<a href=\""+Config.Url+"\">"+Config.Url+
|
||||
"</a>.<br>Write comments and bugs to <a href=\"mailto:"+
|
||||
Config.Email+"\">"+Config.Email+
|
||||
"</a>.</font></small></body></html>")
|
||||
self.fd.flush()
|
||||
self.close()
|
||||
|
||||
|
|
@ -213,7 +227,7 @@ class ColoredLogger(StandardLogger):
|
|||
if self.currentPage != urlData.parentName:
|
||||
if self.prefix:
|
||||
self.fd.write("o\n")
|
||||
self.fd.write("\nParent URL "+COL_PARENT+urlData.parentName+\
|
||||
self.fd.write("\nParent URL "+COL_PARENT+urlData.parentName+
|
||||
COL_RESET+"\n")
|
||||
self.currentPage = urlData.parentName
|
||||
self.prefix = 1
|
||||
|
|
@ -243,20 +257,25 @@ class ColoredLogger(StandardLogger):
|
|||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Real URL "+COL_REAL+urlData.url+COL_RESET+"\n")
|
||||
if urlData.time:
|
||||
if urlData.downloadtime:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("D/L Time "+COL_DLTIME+("%.3f" % urlData.time)+" seconds"+\
|
||||
COL_RESET+"\n")
|
||||
self.fd.write("D/L Time "+COL_DLTIME+
|
||||
("%.3f" % urlData.downloadtime)+" seconds"+COL_RESET+"\n")
|
||||
if urlData.checktime:
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Check Time "+COL_DLTIME+
|
||||
("%.3f" % urlData.checktime)+" seconds"+COL_RESET+"\n")
|
||||
|
||||
if urlData.infoString:
|
||||
if self.prefix:
|
||||
self.fd.write("| Info "+\
|
||||
StringUtil.indentWith(StringUtil.blocktext(\
|
||||
self.fd.write("| Info "+
|
||||
StringUtil.indentWith(StringUtil.blocktext(
|
||||
urlData.infoString, 65), "| "))
|
||||
else:
|
||||
self.fd.write("Info "+\
|
||||
StringUtil.indentWith(StringUtil.blocktext(\
|
||||
self.fd.write("Info "+
|
||||
StringUtil.indentWith(StringUtil.blocktext(
|
||||
urlData.infoString, 65), " "))
|
||||
self.fd.write(COL_RESET+"\n")
|
||||
|
||||
|
|
@ -264,7 +283,7 @@ class ColoredLogger(StandardLogger):
|
|||
self.warnings = self.warnings+1
|
||||
if self.prefix:
|
||||
self.fd.write("| ")
|
||||
self.fd.write("Warning "+COL_WARNING+urlData.warningString+\
|
||||
self.fd.write("Warning "+COL_WARNING+urlData.warningString+
|
||||
COL_RESET+"\n")
|
||||
|
||||
if self.prefix:
|
||||
|
|
@ -293,9 +312,10 @@ class GMLLogger(StandardLogger):
|
|||
self.nodes = []
|
||||
|
||||
def init(self):
|
||||
self.fd.write("# created by "+Config.AppName+" at "+_currentTime()+\
|
||||
"\n# you get "+Config.AppName+" at "+Config.Url+\
|
||||
"\n# write comments and bugs to "+Config.Email+\
|
||||
self.fd.write("# created by "+Config.AppName+" at "+
|
||||
_strtime(time.time())+
|
||||
"\n# you get "+Config.AppName+" at "+Config.Url+
|
||||
"\n# write comments and bugs to "+Config.Email+
|
||||
"\ngraph [\n directed 1\n")
|
||||
self.fd.flush()
|
||||
|
||||
|
|
@ -311,8 +331,10 @@ class GMLLogger(StandardLogger):
|
|||
self.fd.write(" node [\n")
|
||||
self.fd.write(" id "+`nodeid`+"\n")
|
||||
self.fd.write(' label "'+node.url+'"'+"\n")
|
||||
if node.time:
|
||||
self.fd.write(" dltime "+`node.time`+"\n")
|
||||
if node.downloadtime:
|
||||
self.fd.write(" dltime "+`node.downloadtime`+"\n")
|
||||
if node.checktime:
|
||||
self.fd.write(" checktime "+`node.checktime`+"\n")
|
||||
self.fd.write(" extern ")
|
||||
if node.extern: self.fd.write("1")
|
||||
else: self.fd.write("0")
|
||||
|
|
@ -324,7 +346,8 @@ class GMLLogger(StandardLogger):
|
|||
if node.url and node.parentName:
|
||||
self.fd.write(" edge [\n")
|
||||
self.fd.write(' label "'+node.urlName+'"\n')
|
||||
self.fd.write(" source "+`writtenNodes[node.parentName]`+"\n")
|
||||
self.fd.write(" source "+`writtenNodes[node.parentName]`+
|
||||
"\n")
|
||||
self.fd.write(" target "+`writtenNodes[node.url]`+"\n")
|
||||
self.fd.write(" valid ")
|
||||
if node.valid: self.fd.write("1")
|
||||
|
|
@ -339,36 +362,28 @@ class GMLLogger(StandardLogger):
|
|||
class SQLLogger(StandardLogger):
|
||||
""" SQL output for PostgreSQL, not tested"""
|
||||
def init(self):
|
||||
self.fd.write("-- created by "+Config.AppName+" at "+_currentTime()+\
|
||||
"\n-- you get "+Config.AppName+" at "+Config.Url+\
|
||||
self.fd.write("-- created by "+Config.AppName+" at "+
|
||||
_strtime(time.time())+
|
||||
"\n-- you get "+Config.AppName+" at "+Config.Url+
|
||||
"\n-- write comments and bugs to "+Config.Email+"\n\n")
|
||||
self.fd.flush()
|
||||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write("insert into linksdb(urlname,"+\
|
||||
"recursionlevel,"+\
|
||||
"parentname,"+\
|
||||
"baseref,"+\
|
||||
"errorstring,"+\
|
||||
"validstring,"+\
|
||||
"warningstring,"+\
|
||||
"infoString,"+\
|
||||
"valid,"+\
|
||||
"url,"+\
|
||||
"line,"+\
|
||||
"cached) values ")
|
||||
self.fd.write("'"+urlData.urlName+"',"+\
|
||||
`urlData.recursionLevel`+","+\
|
||||
StringUtil.sqlify(urlData.parentName)+","+\
|
||||
StringUtil.sqlify(urlData.baseRef)+","+\
|
||||
StringUtil.sqlify(urlData.errorString)+","+\
|
||||
StringUtil.sqlify(urlData.validString)+","+\
|
||||
StringUtil.sqlify(urlData.warningString)+","+\
|
||||
StringUtil.sqlify(urlData.infoString)+","+\
|
||||
`urlData.valid`+","+\
|
||||
StringUtil.sqlify(urlData.url)+","+\
|
||||
`urlData.line`+","+\
|
||||
`urlData.cached`+");\n")
|
||||
self.fd.write("insert into linksdb(urlname,recursionlevel,parentname,"
|
||||
"baseref,errorstring,validstring,warningstring,"
|
||||
"infoString,valid,url,line,cached) values '"+
|
||||
urlData.urlName+"',"+
|
||||
`urlData.recursionLevel`+","+
|
||||
StringUtil.sqlify(urlData.parentName)+","+
|
||||
StringUtil.sqlify(urlData.baseRef)+","+
|
||||
StringUtil.sqlify(urlData.errorString)+","+
|
||||
StringUtil.sqlify(urlData.validString)+","+
|
||||
StringUtil.sqlify(urlData.warningString)+","+
|
||||
StringUtil.sqlify(urlData.infoString)+","+
|
||||
`urlData.valid`+","+
|
||||
StringUtil.sqlify(urlData.url)+","+
|
||||
`urlData.line`+","+
|
||||
`urlData.cached`+");\n")
|
||||
self.fd.flush()
|
||||
|
||||
def endOfOutput(self):
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ class UrlData:
|
|||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None,
|
||||
line = 0, _time = 0):
|
||||
line = None, _time = None):
|
||||
self.urlName = urlName
|
||||
self.recursionLevel = recursionLevel
|
||||
self.parentName = parentName
|
||||
|
|
@ -30,7 +30,8 @@ class UrlData:
|
|||
self.valid = 1
|
||||
self.url = None
|
||||
self.line = line
|
||||
self.time = _time
|
||||
self.downloadtime = _time
|
||||
self.checktime = None
|
||||
self.cached = 0
|
||||
self.urlConnection = None
|
||||
self.extern = 1
|
||||
|
|
@ -65,7 +66,7 @@ class UrlData:
|
|||
self.warningString = urlData.warningString
|
||||
self.infoString = urlData.infoString
|
||||
self.valid = urlData.valid
|
||||
self.time = urlData.time
|
||||
self.downloadtime = urlData.downloadtime
|
||||
|
||||
def buildUrl(self):
|
||||
if self.baseRef:
|
||||
|
|
@ -89,6 +90,7 @@ class UrlData:
|
|||
def check(self, config):
|
||||
Config.debug(Config.DebugDelim+"Checking\n"+str(self)+"\n"+\
|
||||
Config.DebugDelim)
|
||||
t = time.time()
|
||||
# check syntax
|
||||
Config.debug("DEBUG: checking syntax\n")
|
||||
if not self.urlName or self.urlName=="":
|
||||
|
|
@ -128,7 +130,8 @@ class UrlData:
|
|||
except:
|
||||
type, value = sys.exc_info()[:2]
|
||||
self.setError(str(value))
|
||||
|
||||
|
||||
self.checktime = time.time() - t
|
||||
# check recursion
|
||||
Config.debug("DEBUG: checking recursion\n")
|
||||
if self.allowsRecursion(config):
|
||||
|
|
@ -203,7 +206,7 @@ class UrlData:
|
|||
"""
|
||||
t = time.time()
|
||||
data = StringUtil.stripHtmlComments(self.urlConnection.read())
|
||||
self.time = time.time() - t
|
||||
self.downloadtime = time.time() - t
|
||||
return data
|
||||
|
||||
def parseUrl(self, config):
|
||||
|
|
@ -216,8 +219,8 @@ class UrlData:
|
|||
baseRef = None
|
||||
if len(bases)>=1:
|
||||
baseRef = bases[0][0]
|
||||
if len(bases)>1:
|
||||
self.setWarning("more than one base tag found")
|
||||
if len(bases)>1:
|
||||
self.setWarning("more than one base tag found")
|
||||
|
||||
# search for tags and add found tags to URL queue
|
||||
for tag in LinkTags:
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ if sys.version[:5] < "1.5.2":
|
|||
sys.exit(1)
|
||||
|
||||
# add the path to linkcheck module
|
||||
sys.path.insert(0, "/usr/share/linkchecker")
|
||||
#sys.path.insert(0, "/home/calvin/projects/linkchecker")
|
||||
|
||||
import linkcheck
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
rem === adjust vars below ===
|
||||
set PYTHON=c:\progra~1\python\python.exe
|
||||
set LINKCHECKER=c:\progra~1\linkchecker-1.1.0
|
||||
set LINKCHECKER=c:\progra~1\linkchecker-1.1.2
|
||||
rem === end configure ===
|
||||
|
||||
%PYTHON% %LINKCHECKER%\linkchecker %1 %2 %3 %4 %5 %6 %7 %8 %9
|
||||
|
|
|
|||
|
|
@ -2,19 +2,19 @@
|
|||
# see linkchecker -h for help on these options
|
||||
|
||||
[output]
|
||||
debug=0
|
||||
log=text
|
||||
verbose=0
|
||||
warnings=0
|
||||
quiet=0
|
||||
#debug=0
|
||||
#log=text
|
||||
#verbose=0
|
||||
#warnings=0
|
||||
#quiet=0
|
||||
#fileoutput = text colored html gml sql
|
||||
|
||||
[checking]
|
||||
threads=5
|
||||
anchors=0
|
||||
recursionlevel=1
|
||||
robotstxt=0
|
||||
strict=0
|
||||
#threads=5
|
||||
#anchors=0
|
||||
#recursionlevel=1
|
||||
#robotstxt=0
|
||||
#strict=0
|
||||
#proxy=www-proxy.uni-sb.de
|
||||
#proxyport=3128
|
||||
|
||||
|
|
@ -23,7 +23,7 @@ strict=0
|
|||
# strict avoid checking of local files
|
||||
#extern1=^file:.* 1
|
||||
#internlinks=
|
||||
allowdeny=0
|
||||
#allowdeny=0
|
||||
|
||||
# You can provide different user/password pairs for different link types.
|
||||
# Entries are a triple with (link regular expression, username, password),
|
||||
|
|
@ -33,5 +33,5 @@ allowdeny=0
|
|||
# and therefore override the entries given here. The first match wins.
|
||||
# At the moment, authentication is used/needed for http[s] and ftp links.
|
||||
[authentication]
|
||||
entry1=^http://treasure\.calvinsplayground\.de/~calvin/isnichmehr/.* lebowski lebowski
|
||||
#entry1=^http://treasure\.calvinsplayground\.de/~calvin/isnichmehr/.* lebowski lebowski
|
||||
#entry2=^ftp://void.cs.uni-sb.de calvin hutzli
|
||||
|
|
|
|||
Loading…
Reference in a new issue