diff --git a/ChangeLog b/ChangeLog index c8775741..b6393194 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,6 @@ +7.3.2000 + * support for multiple user/password pairs + 3.3.2000 * ignore HTTP status replies < 200 because some pages return -1 and binary data (Gabor Liptak ) diff --git a/linkcheck/Config.py b/linkcheck/Config.py index 300b266a..65acee97 100644 --- a/linkcheck/Config.py +++ b/linkcheck/Config.py @@ -1,4 +1,4 @@ -import ConfigParser,sys,os,re,UserDict +import ConfigParser,sys,os,re,UserDict,string from os.path import expanduser,normpath,normcase,join,isfile import Logging @@ -47,8 +47,7 @@ class Configuration(UserDict.UserDict): self.data["externlinks"] = [] self.data["internlinks"] = [] self.data["allowdeny"] = 0 - self.data["user"] = "anonymous" - self.data["password"] = "joe@" + self.data["authentication"] = [] self.data["proxy"] = 0 self.data["proxyport"] = 8080 self.data["recursionlevel"] = 1 @@ -218,17 +217,17 @@ class Configuration(UserDict.UserDict): self.logLock.release() def read(self, files = []): - files.insert(0,_norm("~/.pylicerc")) - if sys.platform=="win32": - if not sys.path[0]: - path=os.getcwd() + if not files: + files.insert(0,_norm("~/.pylicerc")) + if sys.platform=="win32": + if not sys.path[0]: + path=os.getcwd() + else: + path=sys.path[0] else: - path=sys.path[0] - else: - path="/etc" - files.insert(0,_norm(join(path, "pylicerc"))) - if len(files): - self.readConfig(files) + path="/etc" + files.insert(0,_norm(join(path, "pylicerc"))) + self.readConfig(files) def warn(self, msg): self.message("Config: WARNING: "+msg) @@ -244,7 +243,8 @@ class Configuration(UserDict.UserDict): try: cfgparser = ConfigParser.ConfigParser() cfgparser.read(files) - except: return + except: + return section="output" try: @@ -274,16 +274,6 @@ class Configuration(UserDict.UserDict): except: pass try: self.data["anchors"] = cfgparser.getboolean(section, "anchors") except: pass - try: self.data["externlinks"].append(re.compile(cfgparser.get(section, "externlinks"))) - except: pass - try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks"))) - except: pass - try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny") - except: pass - try: self.data["password"] = cfgparser.get(section, "password") - except: pass - try: self.data["user"] = cfgparser.get(section, "user") - except: pass try: self.data["proxy"] = cfgparser.get(section, "proxy") self.data["proxyport"] = cfgparser.getint(section, "proxyport") @@ -302,6 +292,26 @@ class Configuration(UserDict.UserDict): filelist = string.split(cfgparser.get(section, "fileoutput")) for arg in filelist: if Loggers.has_key(arg): - self.data["fileoutput"].append(Loggers[arg](open("pylice-out."+arg, "w"))) - except: - pass + self.data["fileoutput"].append(Loggers[arg](open("pylice-out."+arg, "w"))) + except: pass + + section = "authentication" + try: + i=1 + while 1: + tuple = string.split(cfgparser.get(section, "entry"+`i`)) + if len(tuple)!=3: break + tuple[0] = re.compile(tuple[0]) + self.data["authentication"].append(tuple) + i = i + 1 + except: pass + self.data["authentication"].append((re.compile(".*"), "anonymous", "guest@")) + + section = "filtering" + try: self.data["externlinks"].append(re.compile(cfgparser.get(section, "externlinks"))) + except: pass + try: self.data["internlinks"].append(re.compile(cfgparser.get(section, "internlinks"))) + except: pass + try: self.data["allowdeny"] = cfgparser.getboolean(section, "allowdeny") + except: pass + diff --git a/linkcheck/FtpUrlData.py b/linkcheck/FtpUrlData.py index 61b7b61f..0895a74d 100644 --- a/linkcheck/FtpUrlData.py +++ b/linkcheck/FtpUrlData.py @@ -7,8 +7,8 @@ class FtpUrlData(UrlData): """ def checkConnection(self, config): - self.urlConnection = ftplib.FTP(self.urlTuple[1], - config["user"], config["password"]) + _user, _password = self._getUserPassword(config) + self.urlConnection = ftplib.FTP(self.urlTuple[1], _user, _password) info = self.urlConnection.getwelcome() if not info: self.closeConnection() diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index 3092101b..b23e450f 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -56,7 +56,8 @@ class HttpUrlData(UrlData): status, statusText, self.mime = self._getHttpRequest() Config.debug(str(status)+", "+str(statusText)+", "+str(self.mime)+"\n") if status == 401: - self.auth = base64.encodestring(LinkChecker.User+":"+LinkChecker.Password) + _user, _password = self._getUserPassword(config) + self.auth = base64.encodestring(_user+":"+_password) status, statusText, self.mime = self._getHttpRequest() if status >= 400: self.setError(`status`+" "+statusText) diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index 30833636..fcdcf348 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -254,6 +254,12 @@ class UrlData: "\nrecursionLevel="+`self.recursionLevel`+\ "\nurlConnection="+str(self.urlConnection) + def _getUserPassword(self, config): + for rx, _user, _password in config["authentication"]: + if rx.match(self.url): + return _user, _password + + from FileUrlData import FileUrlData from FtpUrlData import FtpUrlData from GopherUrlData import GopherUrlData diff --git a/linkchecker b/linkchecker index abfcb67f..a6612af6 100755 --- a/linkchecker +++ b/linkchecker @@ -138,18 +138,20 @@ except getopt.error: # apply configuration config = linkcheck.Config.Configuration() try: - configfile = [] + configfiles = [] for opt,arg in options: if opt=="-f" or opt=="--config": - configfile.append(arg) - config.read(configfile) + configfiles.append(arg) + config.read(configfiles) except: type, value = sys.exc_info()[:2] printUsage(value) # apply options and arguments -constructAuth = 0 +_user = "anonymous" +_password = "guest@" +constructauth = 0 for opt,arg in options: if opt=="-a" or opt=="--anchors": config["anchors"] = 1 @@ -190,8 +192,8 @@ for opt,arg in options: config["proxy"] = arg elif opt=="-p" or opt=="--password": - config["password"]=arg - constructAuth=constructAuth+1 + _password=arg + constructAuth=1 elif opt=="-q" or opt=="--quiet": config["quiet"]=1 @@ -217,8 +219,8 @@ for opt,arg in options: config.disableThreading() elif opt=="-u" or opt=="--user": - config["user"] = arg - constructAuth=constructAuth+1 + _user = arg + constructauth = 1 elif opt=="-V" or opt=="--version": printVersion() @@ -230,8 +232,8 @@ for opt,arg in options: elif opt=="-w" or opt=="--warnings": config["warnings"] = 1 -if constructAuth and constructAuth!=2: - sys.stderr.write("Warning: try to give me both Username and Password\n") +if constructauth: + congif["authentication"].insert(0, (re.compile(".*"), _user, _password)) if len(args)==0: printUsage("no files or urls given") diff --git a/linkcheckerrc b/linkcheckerrc index 2c303c39..81aed730 100644 --- a/linkcheckerrc +++ b/linkcheckerrc @@ -2,23 +2,34 @@ # see linkchecker -h for help on these options [output] -#debug=1 -#log=colored -#verbose=1 -#warnings=1 -#quiet=0 +debug=0 +log=text +verbose=0 +warnings=0 +quiet=0 +#fileoutput = text colored html gml sql [checking] -#threads=15 -#anchors=1 -#externlinks= -#internlinks= -#allowdeny=1 -#password=calvin@ -#user=anonymous -#recursionlevel=1 -#robotstxt=1 -#strict=1 -#proxy= -#proxyport=8080 +threads=5 +anchors=0 +recursionlevel=1 +robotstxt=0 +strict=0 +#proxy=www-proxy.uni-sb.de +#proxyport=3128 +[filtering] +externlinks= +internlinks= +allowdeny=0 + +# You can provide different user/password pairs for different link types. +# Entries are a triple with (link regular expression, username, password), +# separated by whitespace. +# If the regular expression matches, the given user/password pair is used +# for authentication. The commandline options -u,-p match every link +# and therefore override the entries given here. The first match wins. +# At the moment, authentication is used/needed for http[s] and ftp links. +[authentication] +entry1=^http://treasure\.calvinsplayground\.de/~calvin/isnichmehr/.* lebowski lebowski +#entry2=^ftp://void.cs.uni-sb.de calvin hutzli