diff --git a/Makefile b/Makefile index d33cbc82..227b76b5 100644 --- a/Makefile +++ b/Makefile @@ -2,13 +2,17 @@ # call make. VERSION=$(shell python setup.py --version) HOST=treasure.calvinsplayground.de -#PROXY= -PROXY=-P$(HOST):5050 +PROXY=--proxy= -itreasure.calvinsplayground.de -s +#PROXY=-P$(HOST):5050 #HOST=fsinfo.cs.uni-sb.de #PROXY=-Pwww-proxy.uni-sb.de:3128 +LCOPTS=-ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v PACKAGE = linkchecker -DEBPACKAGE = $(PACKAGE)_$(VERSION)_i386.deb -ALLPACKAGES = ../$(DEBPACKAGE) +DEBPACKAGE = ../$(PACKAGE)_$(VERSION)_i386.deb +RPMPATH=build/bdist.linux2-i686/rpm +RPMPACKAGE=$(RPMPATH)/RPMS/i386/$(PACKAGE)-$(VERSION)-1.i386.rpm +SRPMPACKAGE=$(RPMPATH)/SRPMS/$(PACKAGE)-$(VERSION)-1.src.rpm +ALLPACKAGES = $(DEBPACKAGE) $(RPMPACKAGE) $(SRPMPACKAGE) SOURCES = linkcheck/Config.py \ linkcheck/FileUrlData.py \ linkcheck/FtpUrlData.py \ @@ -40,11 +44,12 @@ clean: rm -rf $(ALLPACKAGES) $(PACKAGE)-out.* dist: - python setup.py sdist bdist_rpm + python setup.py sdist + python setup.py bdist_rpm fakeroot debian/rules binary files: - ./$(PACKAGE) -ocolored -Ftext -Fhtml -Fgml -Fsql -Fcsv -R -t0 -v -D $(PROXY) -i$(HOST) http://$(HOST)/~calvin/ + ./$(PACKAGE) $(LCOPTS) $(PROXY) -i$(HOST) http://$(HOST)/~calvin/ homepage: scp debian/changelog shell1.sourceforge.net:/home/groups/linkchecker/htdocs/changes.txt diff --git a/linkcheck/Config.py b/linkcheck/Config.py index d6dc68fa..706ce700 100644 --- a/linkcheck/Config.py +++ b/linkcheck/Config.py @@ -134,7 +134,7 @@ class Configuration(UserDict.UserDict): self.data['blacklist'] = { "filename": "~/.blacklist", } - self.data["log"] = self.newLogger('text') + self.newLogger('text') self.data["quiet"] = 0 self.data["warningregex"] = None self.data["nntpserver"] = os.environ.get("NNTP_SERVER",None) @@ -384,7 +384,7 @@ class Configuration(UserDict.UserDict): try: log = cfgparser.get(section, "log") if Loggers.has_key(log): - self.data["log"] = self.newLogger(log) + self.newLogger(log) else: self.warn("invalid log option "+log) except ConfigParser.Error: pass @@ -402,7 +402,7 @@ class Configuration(UserDict.UserDict): for arg in filelist: # no file output for the blacklist Logger if Loggers.has_key(arg) and arg != "blacklist": - self.data["fileoutput"].append(self.newLogger(arg, 1)) + self.newLogger(arg, 1) except ConfigParser.Error: pass for key in Loggers.keys(): if cfgparser.has_section(key): diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index 148308c4..473a00f5 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -88,7 +88,7 @@ class HttpUrlData(UrlData): redirected = urlparse.urljoin(redirected, self.mime.getheader("Location")) self.urlTuple = urlparse.urlparse(redirected) status, statusText, self.mime = self._getHttpRequest() - Config.debug("\nRedirected\n"+str(self.mime)) + Config.debug("DEBUG: Redirected\n"+str(self.mime)) tries = tries + 1 # authentication @@ -99,7 +99,7 @@ class HttpUrlData(UrlData): self.auth = "Basic "+\ string.strip(base64.encodestring(_user+":"+_password)) status, statusText, self.mime = self._getHttpRequest() - Config.debug("Authentication "+_user+"/"+_password+"\n") + Config.debug("DEBUG: Authentication "+_user+"/"+_password+"\n") # Netscape Enterprise Server returns errors with HEAD # request, but valid urls with GET request. Bummer! @@ -107,7 +107,7 @@ class HttpUrlData(UrlData): server = self.mime.getheader("Server") if server and self.netscape_re.search(server): status, statusText, self.mime = self._getHttpRequest("GET") - Config.debug("Netscape Enterprise Server detected\n") + Config.debug("DEBUG: Netscape Enterprise Server detected\n") if status not in [301,302]: break effectiveurl = urlparse.urlunparse(self.urlTuple) @@ -169,6 +169,7 @@ class HttpUrlData(UrlData): self.data = self.urlConnection.read() self.downloadtime = time.time() - t self._init_html_comments() + Config.debug("DEBUG: comment spans %s\n" % self.html_comments) return self.data def isHtml(self): diff --git a/linkcheck/Logging.py b/linkcheck/Logging.py index 524aac49..fbb9fdbe 100644 --- a/linkcheck/Logging.py +++ b/linkcheck/Logging.py @@ -172,7 +172,7 @@ class HtmlLogger(StandardLogger): self.fd.write(""+Config.App+""+ ""+ - "

"+MyFont+Config.AppName+""+ + "

"+MyFont+Config.App+""+ "

"+ "
"+Config.Freeware+"

"+ (_("Start checking at %s\n") % _strtime(self.starttime))+ @@ -447,8 +447,8 @@ class SQLLogger(StandardLogger): def newUrl(self, urlData): self.fd.write("insert into %s(urlname,recursionlevel,parentname," "baseref,errorstring,validstring,warningstring,infoString," - "valid,url,line,checktime,downloadtime,cached) values ('%s'," - "%d,'%s','%s','%s','%s','%s','%s',%d,'%s',%d,%d,%d,%d)%s\n" % \ + "valid,url,line,checktime,downloadtime,cached) values " + "(%s,%d,%s,%s,%s,%s,%s,%s,%d,%s,%d,%d,%d,%d)%s\n" % \ (self.dbname, StringUtil.sqlify(urlData.urlName), urlData.recursionLevel, @@ -472,6 +472,7 @@ class SQLLogger(StandardLogger): self.fd.write(_("-- Stopped checking at %s (%.3f seconds)\n") %\ (_strtime(self.stoptime), (self.stoptime - self.starttime))) + self.fd.flush() self.fd = None @@ -534,7 +535,7 @@ class CSVLogger(StandardLogger): def newUrl(self, urlData): self.fd.write( - "%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%%s%d%s%d%s%d\n" % ( + "%s%s%d%s%s%s%s%s%s%s%s%s%s%s%s%s%d%s%s%s%d%s%d%s%d%s%d\n" % ( urlData.urlName, self.separator, urlData.recursionLevel, self.separator, urlData.parentName, self.separator, @@ -556,5 +557,6 @@ class CSVLogger(StandardLogger): self.fd.write(_("# Stopped checking at %s (%.3f seconds)\n") %\ (_strtime(self.stoptime), (self.stoptime - self.starttime))) + self.fd.flush() self.fd = None diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index d8fecb1e..27707110 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -42,7 +42,7 @@ class UrlData: recursionLevel, parentName = None, baseRef = None, - line = None): + line = 0): self.urlName = urlName self.recursionLevel = recursionLevel self.parentName = parentName @@ -54,8 +54,8 @@ class UrlData: self.valid = 1 self.url = None self.line = line - self.downloadtime = None - self.checktime = None + self.downloadtime = 0 + self.checktime = 0 self.cached = 0 self.urlConnection = None self.extern = 1 @@ -228,7 +228,6 @@ class UrlData: if not (config["externlinks"] or config["internlinks"]): return 0 # deny and allow external checking - Config.debug(self.url) if config["allowdeny"]: for pat in config["internlinks"]: if pat.search(self.url): @@ -253,13 +252,14 @@ class UrlData: self.data = self.urlConnection.read() self.downloadtime = time.time() - t self._init_html_comments() + Config.debug("DEBUG: comment spans %s\n" % self.html_comments) return self.data def _init_html_comments(self): # if we find an URL inside HTML comments we ignore it # so build a list of intervalls which are HTML comments - pattern = re.compile("") + pattern = re.compile("", re.DOTALL) index = 0 while 1: match = pattern.search(self.data, index) diff --git a/linkchecker b/linkchecker index 43b1ecdc..7a3a4d33 100755 --- a/linkchecker +++ b/linkchecker @@ -8,7 +8,7 @@ if sys.version[:5] < "1.5.2": sys.exit(1) # add the path to linkcheck module if you do not install with distutils -#sys.path.append("/home/calvin/projects/linkchecker") +sys.path.append("/home/calvin/projects/linkchecker") import getopt,re,string,os import linkcheck,StringUtil from linkcheck import _ @@ -160,15 +160,11 @@ except getopt.error: # apply configuration config = linkcheck.Config.Configuration() -try: - configfiles = [] - for opt,arg in options: - if opt=="-f" or opt=="--config": - configfiles.append(arg) - config.read(configfiles) -except IOError: - type, value = sys.exc_info()[:2] - printUsage(value) +configfiles = [] +for opt,arg in options: + if opt=="-f" or opt=="--config": + configfiles.append(arg) +config.read(configfiles) # check for environment variables (currently only http_proxy) if os.environ.has_key("http_proxy"):