mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-22 08:50:24 +00:00
fixes
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@101 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
2eb32965be
commit
d9a64ffe8c
6 changed files with 35 additions and 31 deletions
17
Makefile
17
Makefile
|
|
@ -2,13 +2,17 @@
|
|||
# call make.
|
||||
VERSION=$(shell python setup.py --version)
|
||||
HOST=treasure.calvinsplayground.de
|
||||
#PROXY=
|
||||
PROXY=-P$(HOST):5050
|
||||
PROXY=--proxy= -itreasure.calvinsplayground.de -s
|
||||
#PROXY=-P$(HOST):5050
|
||||
#HOST=fsinfo.cs.uni-sb.de
|
||||
#PROXY=-Pwww-proxy.uni-sb.de:3128
|
||||
LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v
|
||||
PACKAGE = linkchecker
|
||||
DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb
|
||||
ALLPACKAGES = ../$(DEBPACKAGE)
|
||||
DEBPACKAGE = ../$(PACKAGE)_$(VERSION)_i386.deb
|
||||
RPMPATH=build/bdist.linux2-i686/rpm
|
||||
RPMPACKAGE=$(RPMPATH)/RPMS/i386/$(PACKAGE)-$(VERSION)-1.i386.rpm
|
||||
SRPMPACKAGE=$(RPMPATH)/SRPMS/$(PACKAGE)-$(VERSION)-1.src.rpm
|
||||
ALLPACKAGES = $(DEBPACKAGE) $(RPMPACKAGE) $(SRPMPACKAGE)
|
||||
SOURCES = linkcheck/Config.py \
|
||||
linkcheck/FileUrlData.py \
|
||||
linkcheck/FtpUrlData.py \
|
||||
|
|
@ -40,11 +44,12 @@ clean:
|
|||
rm -rf $(ALLPACKAGES) $(PACKAGE)-out.*
|
||||
|
||||
dist:
|
||||
python setup.py sdist bdist_rpm
|
||||
python setup.py sdist
|
||||
python setup.py bdist_rpm
|
||||
fakeroot debian/rules binary
|
||||
|
||||
files:
|
||||
./$(PACKAGE) -ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v -D $(PROXY) -i$(HOST) http://$(HOST)/~calvin/
|
||||
./$(PACKAGE) $(LCOPTS) $(PROXY) -i$(HOST) http://$(HOST)/~calvin/
|
||||
|
||||
homepage:
|
||||
scp debian/changelog shell1.sourceforge.net:/home/groups/linkchecker/htdocs/changes.txt
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class Configuration(UserDict.UserDict):
|
|||
self.data['blacklist'] = {
|
||||
"filename": "~/.blacklist",
|
||||
}
|
||||
self.data["log"] = self.newLogger('text')
|
||||
self.newLogger('text')
|
||||
self.data["quiet"] = 0
|
||||
self.data["warningregex"] = None
|
||||
self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None)
|
||||
|
|
@ -384,7 +384,7 @@ class Configuration(UserDict.UserDict):
|
|||
try:
|
||||
log = cfgparser.get(section, "log")
|
||||
if Loggers.has_key(log):
|
||||
self.data["log"] = self.newLogger(log)
|
||||
self.newLogger(log)
|
||||
else:
|
||||
self.warn("invalid log option "+log)
|
||||
except ConfigParser.Error: pass
|
||||
|
|
@ -402,7 +402,7 @@ class Configuration(UserDict.UserDict):
|
|||
for arg in filelist:
|
||||
# no file output for the blacklist Logger
|
||||
if Loggers.has_key(arg) and arg != "blacklist":
|
||||
self.data["fileoutput"].append(self.newLogger(arg, 1))
|
||||
self.newLogger(arg, 1)
|
||||
except ConfigParser.Error: pass
|
||||
for key in Loggers.keys():
|
||||
if cfgparser.has_section(key):
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ class HttpUrlData(UrlData):
|
|||
redirected = urlparse.urljoin(redirected, self.mime.getheader("Location"))
|
||||
self.urlTuple = urlparse.urlparse(redirected)
|
||||
status, statusText, self.mime = self._getHttpRequest()
|
||||
Config.debug("\nRedirected\n"+str(self.mime))
|
||||
Config.debug("DEBUG: Redirected\n"+str(self.mime))
|
||||
tries = tries + 1
|
||||
|
||||
# authentication
|
||||
|
|
@ -99,7 +99,7 @@ class HttpUrlData(UrlData):
|
|||
self.auth = "Basic "+\
|
||||
string.strip(base64.encodestring(_user+":"+_password))
|
||||
status, statusText, self.mime = self._getHttpRequest()
|
||||
Config.debug("Authentication "+_user+"/"+_password+"\n")
|
||||
Config.debug("DEBUG: Authentication "+_user+"/"+_password+"\n")
|
||||
|
||||
# Netscape Enterprise Server returns errors with HEAD
|
||||
# request, but valid urls with GET request. Bummer!
|
||||
|
|
@ -107,7 +107,7 @@ class HttpUrlData(UrlData):
|
|||
server = self.mime.getheader("Server")
|
||||
if server and self.netscape_re.search(server):
|
||||
status, statusText, self.mime = self._getHttpRequest("GET")
|
||||
Config.debug("Netscape Enterprise Server detected\n")
|
||||
Config.debug("DEBUG: Netscape Enterprise Server detected\n")
|
||||
if status not in [301,302]: break
|
||||
|
||||
effectiveurl = urlparse.urlunparse(self.urlTuple)
|
||||
|
|
@ -169,6 +169,7 @@ class HttpUrlData(UrlData):
|
|||
self.data = self.urlConnection.read()
|
||||
self.downloadtime = time.time() - t
|
||||
self._init_html_comments()
|
||||
Config.debug("DEBUG: comment spans %s\n" % self.html_comments)
|
||||
return self.data
|
||||
|
||||
def isHtml(self):
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ class HtmlLogger(StandardLogger):
|
|||
self.fd.write("<html><head><title>"+Config.App+"</title></head>"+
|
||||
"<body bgcolor="+self.colorbackground+" link="+self.colorlink+
|
||||
" vlink="+self.colorlink+" alink="+self.colorlink+">"+
|
||||
"<center><h2>"+MyFont+Config.AppName+"</font>"+
|
||||
"<center><h2>"+MyFont+Config.App+"</font>"+
|
||||
"</center></h2>"+
|
||||
"<br><blockquote>"+Config.Freeware+"<br><br>"+
|
||||
(_("Start checking at %s\n") % _strtime(self.starttime))+
|
||||
|
|
@ -447,8 +447,8 @@ class SQLLogger(StandardLogger):
|
|||
def newUrl(self, urlData):
|
||||
self.fd.write("insert into %s(urlname,recursionlevel,parentname,"
|
||||
"baseref,errorstring,validstring,warningstring,infoString,"
|
||||
"valid,url,line,checktime,downloadtime,cached) values ('%s',"
|
||||
"%d,'%s','%s','%s','%s','%s','%s',%d,'%s',%d,%d,%d,%d)%s\n" % \
|
||||
"valid,url,line,checktime,downloadtime,cached) values "
|
||||
"(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%d,%d)%s\n" % \
|
||||
(self.dbname,
|
||||
StringUtil.sqlify(urlData.urlName),
|
||||
urlData.recursionLevel,
|
||||
|
|
@ -472,6 +472,7 @@ class SQLLogger(StandardLogger):
|
|||
self.fd.write(_("-- Stopped checking at %s (%.3f seconds)\n") %\
|
||||
(_strtime(self.stoptime),
|
||||
(self.stoptime - self.starttime)))
|
||||
self.fd.flush()
|
||||
self.fd = None
|
||||
|
||||
|
||||
|
|
@ -534,7 +535,7 @@ class CSVLogger(StandardLogger):
|
|||
|
||||
def newUrl(self, urlData):
|
||||
self.fd.write(
|
||||
"%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%%s%d%s%d%s%d\n" % (
|
||||
"%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%s%d%s%d%s%d\n" % (
|
||||
urlData.urlName, self.separator,
|
||||
urlData.recursionLevel, self.separator,
|
||||
urlData.parentName, self.separator,
|
||||
|
|
@ -556,5 +557,6 @@ class CSVLogger(StandardLogger):
|
|||
self.fd.write(_("# Stopped checking at %s (%.3f seconds)\n") %\
|
||||
(_strtime(self.stoptime),
|
||||
(self.stoptime - self.starttime)))
|
||||
self.fd.flush()
|
||||
self.fd = None
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ class UrlData:
|
|||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None,
|
||||
line = None):
|
||||
line = 0):
|
||||
self.urlName = urlName
|
||||
self.recursionLevel = recursionLevel
|
||||
self.parentName = parentName
|
||||
|
|
@ -54,8 +54,8 @@ class UrlData:
|
|||
self.valid = 1
|
||||
self.url = None
|
||||
self.line = line
|
||||
self.downloadtime = None
|
||||
self.checktime = None
|
||||
self.downloadtime = 0
|
||||
self.checktime = 0
|
||||
self.cached = 0
|
||||
self.urlConnection = None
|
||||
self.extern = 1
|
||||
|
|
@ -228,7 +228,6 @@ class UrlData:
|
|||
if not (config["externlinks"] or config["internlinks"]):
|
||||
return 0
|
||||
# deny and allow external checking
|
||||
Config.debug(self.url)
|
||||
if config["allowdeny"]:
|
||||
for pat in config["internlinks"]:
|
||||
if pat.search(self.url):
|
||||
|
|
@ -253,13 +252,14 @@ class UrlData:
|
|||
self.data = self.urlConnection.read()
|
||||
self.downloadtime = time.time() - t
|
||||
self._init_html_comments()
|
||||
Config.debug("DEBUG: comment spans %s\n" % self.html_comments)
|
||||
return self.data
|
||||
|
||||
|
||||
def _init_html_comments(self):
|
||||
# if we find an URL inside HTML comments we ignore it
|
||||
# so build a list of intervalls which are HTML comments
|
||||
pattern = re.compile("<!--.*?-->")
|
||||
pattern = re.compile("<!--.*?-->", re.DOTALL)
|
||||
index = 0
|
||||
while 1:
|
||||
match = pattern.search(self.data, index)
|
||||
|
|
|
|||
16
linkchecker
16
linkchecker
|
|
@ -8,7 +8,7 @@ if sys.version[:5] < "1.5.2":
|
|||
sys.exit(1)
|
||||
|
||||
# add the path to linkcheck module if you do not install with distutils
|
||||
#sys.path.append("/home/calvin/projects/linkchecker")
|
||||
sys.path.append("/home/calvin/projects/linkchecker")
|
||||
import getopt,re,string,os
|
||||
import linkcheck,StringUtil
|
||||
from linkcheck import _
|
||||
|
|
@ -160,15 +160,11 @@ except getopt.error:
|
|||
|
||||
# apply configuration
|
||||
config = linkcheck.Config.Configuration()
|
||||
try:
|
||||
configfiles = []
|
||||
for opt,arg in options:
|
||||
if opt=="-f" or opt=="--config":
|
||||
configfiles.append(arg)
|
||||
config.read(configfiles)
|
||||
except IOError:
|
||||
type, value = sys.exc_info()[:2]
|
||||
printUsage(value)
|
||||
configfiles = []
|
||||
for opt,arg in options:
|
||||
if opt=="-f" or opt=="--config":
|
||||
configfiles.append(arg)
|
||||
config.read(configfiles)
|
||||
|
||||
# check for environment variables (currently only http_proxy)
|
||||
if os.environ.has_key("http_proxy"):
|
||||
|
|
|
|||
Loading…
Reference in a new issue