rename urlTuple to urlparts

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@656 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2002-12-06 01:36:19 +00:00
parent 9eaa6c737f
commit 5a9729c1b8
8 changed files with 56 additions and 54 deletions

View file

@ -1,3 +1,7 @@
1.8.1
* Add missing () to function call in proxy handling code
Changed files: FtpUrlData
1.8.0
* Require Python >= 2.2.1, remove httplib.
Changed files: setup.py, INSTALL, linkchecker

View file

@ -114,8 +114,8 @@ class FileUrlData (UrlData):
def buildUrl (self):
UrlData.buildUrl(self)
# cut off parameter, query and fragment
self.url = urlparse.urlunparse(self.urlTuple[:3] + ('','',''))
# ignore query and fragment url parts
self.urlparts[4] = self.urlparts[5] = ''
def adjustWinPath (self):

View file

@ -50,7 +50,9 @@ class FtpUrlData (ProxyUrlData):
if _user is None or _password is None:
raise linkcheck.error, linkcheck._("No user or password found")
try:
self.urlConnection = ftplib.FTP(self.urlTuple[1], _user, _password)
self.urlConnection = ftplib.FTP()
self.urlConnection.connect(self.urlparts[1])
self.urlConnection.login(_user, _password)
except EOFError:
raise linkcheck.error, linkcheck._("Remote host has closed connection")
info = self.urlConnection.getwelcome()

View file

@ -31,7 +31,7 @@ class HostCheckingUrlData (UrlData):
def buildUrl (self):
# to avoid anchor checking
self.urlTuple=None
self.urlparts = None
def getCacheKey (self):
return "%s:%s" % (self.scheme, self.host)

View file

@ -15,7 +15,7 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import urlparse, sys, time, re
import urlparse, urllib, sys, time, re
import httplib
import Config, StringUtil, robotparser, linkcheck
if Config.DebugLevel > 0:
@ -31,13 +31,11 @@ class HttpUrlData (ProxyUrlData):
def buildUrl (self):
ProxyUrlData.buildUrl(self)
if not self.urlTuple[2]:
# check for empty paths
if not self.urlparts[2]:
self.setWarning(linkcheck._("Path is empty"))
self.urlTuple = (self.urlTuple[0], self.urlTuple[1], "/",
self.urlTuple[3], self.urlTuple[4], self.urlTuple[5])
self.url = urlparse.urlunparse(self.urlTuple)
# resolve HTML entities
self.url = StringUtil.unhtmlify(self.url)
self.urlparts[2] = "/"
self.url = urlparse.urlunsplit(self.urlparts)
def checkConnection (self):
@ -98,7 +96,6 @@ class HttpUrlData (ProxyUrlData):
Config.debug(BRING_IT_ON, response.status, response.reason, self.headers)
has301status = 0
while 1:
# proxy enforcement (overrides standard proxy)
if response.status == 305 and self.headers:
oldproxy = (self.proxy, self.proxyauth)
@ -112,11 +109,11 @@ class HttpUrlData (ProxyUrlData):
redirected = self.urlName
while response.status in [301,302] and self.headers and tries < 5:
has301status = (response.status==301)
newurl = self.headers.getheader("Location",
self.headers.getheader("Uri", ""))
self.headers.getheader("Uri", ""))
redirected = urlparse.urljoin(redirected, newurl)
self.urlTuple = urlparse.urlparse(redirected)
redirected = urllib.unquote(redirected)
self.urlparts = urlparse.urlsplit(redirected)
response = self._getHttpResponse()
self.headers = response.msg
Config.debug(BRING_IT_ON, "Redirected", self.headers)
@ -168,7 +165,7 @@ class HttpUrlData (ProxyUrlData):
self.headers = response.msg
if response.status not in [301,302]: break
effectiveurl = urlparse.urlunparse(self.urlTuple)
effectiveurl = urlparse.urlunsplit(self.urlparts)
if self.url != effectiveurl:
self.setWarning(linkcheck._("Effective URL %s") % effectiveurl)
self.url = effectiveurl
@ -193,7 +190,7 @@ class HttpUrlData (ProxyUrlData):
if self.config['cookies']:
for c in self.cookies:
self.setInfo("Cookie: %s"%c)
out = self.config.storeCookies(self.headers, self.urlTuple[1])
out = self.config.storeCookies(self.headers, self.urlparts[1])
for h in out:
self.setInfo(h)
if response.status >= 200:
@ -208,16 +205,16 @@ class HttpUrlData (ProxyUrlData):
if self.proxy:
host = self.proxy
else:
host = self.urlTuple[1]
host = self.urlparts[1]
Config.debug(HURT_ME_PLENTY, "host", host)
if self.urlConnection:
self.closeConnection()
self.urlConnection = self._getHTTPObject(host)
if self.proxy:
path = urlparse.urlunparse(self.urlTuple)
path = urlparse.urlunsplit(self.urlparts)
else:
path = urlparse.urlunparse(('', '', self.urlTuple[2],
self.urlTuple[3], self.urlTuple[4], ''))
path = urlparse.urlunsplit(('', '', self.urlparts[2],
self.urlparts[3], self.urlparts[4]))
self.urlConnection.putrequest(method, path, skip_host=1)
self.urlConnection.putheader("Host", host)
if self.auth:
@ -230,8 +227,8 @@ class HttpUrlData (ProxyUrlData):
self.urlConnection.putheader("User-Agent", Config.UserAgent)
self.urlConnection.putheader("Accept-Encoding", "gzip;q=1.0, deflate;q=0.9, identity;q=0.5")
if self.config['cookies']:
self.cookies = self.config.getCookies(self.urlTuple[1],
self.urlTuple[2])
self.cookies = self.config.getCookies(self.urlparts[1],
self.urlparts[2])
for c in self.cookies:
self.urlConnection.putheader("Cookie", c)
self.urlConnection.endheaders()
@ -278,7 +275,7 @@ class HttpUrlData (ProxyUrlData):
return 1
def robotsTxtAllowsUrl (self):
roboturl = "%s://%s/robots.txt" % self.urlTuple[0:2]
roboturl = self.urlparts[0]+"://"+self.urlparts[1]+"/robots.txt"
Config.debug(HURT_ME_PLENTY, "robots.txt url", roboturl)
Config.debug(HURT_ME_PLENTY, "url", self.url)
if not self.config.robotsTxtCache_has_key(roboturl):

View file

@ -39,16 +39,16 @@ class NntpUrlData (UrlData):
self.url = 'nntp'+self.urlName[4:]
else:
self.url = self.urlName
self.urlTuple = urlparse.urlparse(self.url)
debug(BRING_IT_ON, self.urlTuple)
self.urlparts = urlparse.urlsplit(self.url)
debug(BRING_IT_ON, self.urlparts)
def checkConnection (self):
nntpserver = self.urlTuple[1] or self.config["nntpserver"]
nntpserver = self.urlparts[1] or self.config["nntpserver"]
if not nntpserver:
self.setWarning(linkcheck._("No NNTP server specified, skipping this URL"))
return
nntp = self._connectNntp(nntpserver)
group = self.urlTuple[2]
group = self.urlparts[2]
while group[:1]=='/':
group = group[1:]
if '@' in group:

View file

@ -2,7 +2,8 @@ from UrlData import UrlData
from urllib import splittype, splithost, splituser, splitpasswd
class ProxyUrlData (UrlData):
"""urldata with ability for proxying"""
"""urldata with ability for proxying and for urls with user:pass@host
setting"""
def setProxy (self, proxy):
self.proxy = proxy
@ -19,3 +20,9 @@ class ProxyUrlData (UrlData):
self.proxyauth = base64.encodestring(self.proxyauth).strip()
self.proxyauth = "Basic "+self.proxyauth
def getUserPassword (self):
for auth in self.config["authentication"]:
if auth['pattern'].match(self.url):
return auth['user'], auth['password']
return None,None

View file

@ -16,6 +16,7 @@
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys, re, urlparse, urllib, time, traceback, socket, select
from urllib import splituser, splithost, splitport
#try:
# from linkcheck import DNS
#except ImportError:
@ -159,20 +160,18 @@ class UrlData:
self.url = urlparse.urljoin(self.parentName, self.urlName)
else:
self.url = self.urlName
self.urlTuple = urlparse.urlparse(self.url)
# make host lowercase
self.urlTuple = (self.urlTuple[0], self.urlTuple[1].lower(),
self.urlTuple[2], self.urlTuple[3], self.urlTuple[4],
self.urlTuple[5])
self.url = urlparse.urlunparse(self.urlTuple)
# resolve HTML entities
self.url = StringUtil.unhtmlify(self.url)
# check host:port syntax
host = self.urlTuple[1]
if ":" in host:
host,port = host.split(":", 1)
if not port_re.match(port):
raise linkcheck.error(linkcheck._("URL has invalid port number"))
# unquote url
self.url = urllib.unquote(self.url)
# split into (modifiable) list
self.urlparts = list(urlparse.urlsplit(self.url))
# check userinfo@host:port syntax
self.userinfo, host = splituser(self.urlparts[1])
x, port = splitport(host)
if port is not None and not port_re.match(port):
raise linkcheck.error(linkcheck._("URL has invalid port number %s")\
% str(port))
# set host lowercase and without userinfo
self.urlparts[1] = host.lower()
def logMe (self):
@ -242,8 +241,8 @@ class UrlData:
debug(BRING_IT_ON, "checking connection")
try:
self.checkConnection()
if self.urlTuple and self.config["anchors"]:
self.checkAnchors(self.urlTuple[5])
if self.urlparts and self.config["anchors"]:
self.checkAnchors(self.urlparts[4])
except tuple(ExcList):
type, value, tb = sys.exc_info()
debug(HURT_ME_PLENTY, "exception", traceback.format_tb(tb))
@ -291,8 +290,8 @@ class UrlData:
def getCacheKey (self):
if self.urlTuple:
return urlparse.urlunparse(self.urlTuple)
if self.urlparts:
return urlparse.urlunsplit(self.urlparts)
return None
@ -411,13 +410,6 @@ class UrlData:
self.column, self.name))
def _getUserPassword (self):
for auth in self.config["authentication"]:
if auth['pattern'].match(self.url):
return auth['user'], auth['password']
return None,None
from FileUrlData import FileUrlData
from IgnoredUrlData import IgnoredUrlData, ignored_schemes_re
from FtpUrlData import FtpUrlData