git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@101 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-06-03 17:09:10 +00:00
parent 2eb32965be
commit d9a64ffe8c
6 changed files with 35 additions and 31 deletions

View file

@ -2,13 +2,17 @@
# call make.
VERSION=$(shell python setup.py --version)
HOST=treasure.calvinsplayground.de
#PROXY=
PROXY=-P$(HOST):5050
PROXY=--proxy= -itreasure.calvinsplayground.de -s
#PROXY=-P$(HOST):5050
#HOST=fsinfo.cs.uni-sb.de
#PROXY=-Pwww-proxy.uni-sb.de:3128
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v
PACKAGE = linkchecker
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
ALLPACKAGES = ../$(DEBPACKAGE)
DEBPACKAGE = ../$(PACKAGE)_$(VERSION)_i386.deb
RPMPATH=build/bdist.linux2-i686/rpm
RPMPACKAGE=$(RPMPATH)/RPMS/i386/$(PACKAGE)-$(VERSION)-1.i386.rpm
SRPMPACKAGE=$(RPMPATH)/SRPMS/$(PACKAGE)-$(VERSION)-1.src.rpm
ALLPACKAGES = $(DEBPACKAGE) $(RPMPACKAGE) $(SRPMPACKAGE)
SOURCES = linkcheck/Config.py \
linkcheck/FileUrlData.py \
linkcheck/FtpUrlData.py \
@ -40,11 +44,12 @@ clean:
rm -rf $(ALLPACKAGES) $(PACKAGE)-out.*
dist:
python setup.py sdist bdist_rpm
python setup.py sdist
python setup.py bdist_rpm
fakeroot debian/rules binary
files:
./$(PACKAGE) -ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v -D $(PROXY) -i$(HOST) http://$(HOST)/~calvin/
./$(PACKAGE) $(LCOPTS) $(PROXY) -i$(HOST) http://$(HOST)/~calvin/
homepage:
scp debian/changelog shell1.sourceforge.net:/home/groups/linkchecker/htdocs/changes.txt

View file

@ -134,7 +134,7 @@ class Configuration(UserDict.UserDict):
self.data['blacklist'] = {
"filename": "~/.blacklist",
}
self.data["log"] = self.newLogger('text')
self.newLogger('text')
self.data["quiet"] = 0
self.data["warningregex"] = None
self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None)
@ -384,7 +384,7 @@ class Configuration(UserDict.UserDict):
try:
log = cfgparser.get(section, "log")
if Loggers.has_key(log):
self.data["log"] = self.newLogger(log)
self.newLogger(log)
else:
self.warn("invalid log option "+log)
except ConfigParser.Error: pass
@ -402,7 +402,7 @@ class Configuration(UserDict.UserDict):
for arg in filelist:
# no file output for the blacklist Logger
if Loggers.has_key(arg) and arg != "blacklist":
self.data["fileoutput"].append(self.newLogger(arg, 1))
self.newLogger(arg, 1)
except ConfigParser.Error: pass
for key in Loggers.keys():
if cfgparser.has_section(key):

View file

@ -88,7 +88,7 @@ class HttpUrlData(UrlData):
redirected = urlparse.urljoin(redirected, self.mime.getheader("Location"))
self.urlTuple = urlparse.urlparse(redirected)
status, statusText, self.mime = self._getHttpRequest()
Config.debug("\nRedirected\n"+str(self.mime))
Config.debug("DEBUG: Redirected\n"+str(self.mime))
tries = tries + 1
# authentication
@ -99,7 +99,7 @@ class HttpUrlData(UrlData):
self.auth = "Basic "+\
string.strip(base64.encodestring(_user+":"+_password))
status, statusText, self.mime = self._getHttpRequest()
Config.debug("Authentication "+_user+"/"+_password+"\n")
Config.debug("DEBUG: Authentication "+_user+"/"+_password+"\n")
# Netscape Enterprise Server returns errors with HEAD
# request, but valid urls with GET request. Bummer!
@ -107,7 +107,7 @@ class HttpUrlData(UrlData):
server = self.mime.getheader("Server")
if server and self.netscape_re.search(server):
status, statusText, self.mime = self._getHttpRequest("GET")
Config.debug("Netscape Enterprise Server detected\n")
Config.debug("DEBUG: Netscape Enterprise Server detected\n")
if status not in [301,302]: break
effectiveurl = urlparse.urlunparse(self.urlTuple)
@ -169,6 +169,7 @@ class HttpUrlData(UrlData):
self.data = self.urlConnection.read()
self.downloadtime = time.time() - t
self._init_html_comments()
Config.debug("DEBUG: comment spans %s\n" % self.html_comments)
return self.data
def isHtml(self):

View file

@ -172,7 +172,7 @@ class HtmlLogger(StandardLogger):
self.fd.write("<html><head><title>"+Config.App+"</title></head>"+
"<body bgcolor="+self.colorbackground+" link="+self.colorlink+
" vlink="+self.colorlink+" alink="+self.colorlink+">"+
"<center><h2>"+MyFont+Config.AppName+"</font>"+
"<center><h2>"+MyFont+Config.App+"</font>"+
"</center></h2>"+
"<br><blockquote>"+Config.Freeware+"<br><br>"+
(_("Start checking at %s\n") % _strtime(self.starttime))+
@ -447,8 +447,8 @@ class SQLLogger(StandardLogger):
def newUrl(self, urlData):
self.fd.write("insert into %s(urlname,recursionlevel,parentname,"
"baseref,errorstring,validstring,warningstring,infoString,"
"valid,url,line,checktime,downloadtime,cached) values ('%s',"
"%d,'%s','%s','%s','%s','%s','%s',%d,'%s',%d,%d,%d,%d)%s\n" % \
"valid,url,line,checktime,downloadtime,cached) values "
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%d,%d)%s\n" % \
(self.dbname,
StringUtil.sqlify(urlData.urlName),
urlData.recursionLevel,
@ -472,6 +472,7 @@ class SQLLogger(StandardLogger):
self.fd.write(_("-- Stopped checking at %s (%.3f seconds)\n") %\
(_strtime(self.stoptime),
(self.stoptime - self.starttime)))
self.fd.flush()
self.fd = None
@ -534,7 +535,7 @@ class CSVLogger(StandardLogger):
def newUrl(self, urlData):
self.fd.write(
"%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%%s%d%s%d%s%d\n" % (
"%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%s%d%s%d%s%d\n" % (
urlData.urlName, self.separator,
urlData.recursionLevel, self.separator,
urlData.parentName, self.separator,
@ -556,5 +557,6 @@ class CSVLogger(StandardLogger):
self.fd.write(_("# Stopped checking at %s (%.3f seconds)\n") %\
(_strtime(self.stoptime),
(self.stoptime - self.starttime)))
self.fd.flush()
self.fd = None

View file

@ -42,7 +42,7 @@ class UrlData:
recursionLevel,
parentName = None,
baseRef = None,
line = None):
line = 0):
self.urlName = urlName
self.recursionLevel = recursionLevel
self.parentName = parentName
@ -54,8 +54,8 @@ class UrlData:
self.valid = 1
self.url = None
self.line = line
self.downloadtime = None
self.checktime = None
self.downloadtime = 0
self.checktime = 0
self.cached = 0
self.urlConnection = None
self.extern = 1
@ -228,7 +228,6 @@ class UrlData:
if not (config["externlinks"] or config["internlinks"]):
return 0
# deny and allow external checking
Config.debug(self.url)
if config["allowdeny"]:
for pat in config["internlinks"]:
if pat.search(self.url):
@ -253,13 +252,14 @@ class UrlData:
self.data = self.urlConnection.read()
self.downloadtime = time.time() - t
self._init_html_comments()
Config.debug("DEBUG: comment spans %s\n" % self.html_comments)
return self.data
def _init_html_comments(self):
# if we find an URL inside HTML comments we ignore it
# so build a list of intervalls which are HTML comments
pattern = re.compile("<!--.*?-->")
pattern = re.compile("<!--.*?-->", re.DOTALL)
index = 0
while 1:
match = pattern.search(self.data, index)

View file

@ -8,7 +8,7 @@ if sys.version[:5] < "1.5.2":
sys.exit(1)
# add the path to linkcheck module if you do not install with distutils
#sys.path.append("/home/calvin/projects/linkchecker")
sys.path.append("/home/calvin/projects/linkchecker")
import getopt,re,string,os
import linkcheck,StringUtil
from linkcheck import _
@ -160,15 +160,11 @@ except getopt.error:
# apply configuration
config = linkcheck.Config.Configuration()
try:
configfiles = []
for opt,arg in options:
if opt=="-f" or opt=="--config":
configfiles.append(arg)
config.read(configfiles)
except IOError:
type, value = sys.exc_info()[:2]
printUsage(value)
configfiles = []
for opt,arg in options:
if opt=="-f" or opt=="--config":
configfiles.append(arg)
config.read(configfiles)
# check for environment variables (currently only http_proxy)
if os.environ.has_key("http_proxy"):