mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-26 09:04:44 +00:00
rename urlTuple to urlparts
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@656 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
9eaa6c737f
commit
5a9729c1b8
8 changed files with 56 additions and 54 deletions
|
|
@ -1,3 +1,7 @@
|
|||
1.8.1
|
||||
* Add missing () to function call in proxy handling code
|
||||
Changed files: FtpUrlData
|
||||
|
||||
1.8.0
|
||||
* Require Python >= 2.2.1, remove httplib.
|
||||
Changed files: setup.py, INSTALL, linkchecker
|
||||
|
|
|
|||
|
|
@ -114,8 +114,8 @@ class FileUrlData (UrlData):
|
|||
|
||||
def buildUrl (self):
|
||||
UrlData.buildUrl(self)
|
||||
# cut off parameter, query and fragment
|
||||
self.url = urlparse.urlunparse(self.urlTuple[:3] + ('','',''))
|
||||
# ignore query and fragment url parts
|
||||
self.urlparts[4] = self.urlparts[5] = ''
|
||||
|
||||
|
||||
def adjustWinPath (self):
|
||||
|
|
|
|||
|
|
@ -50,7 +50,9 @@ class FtpUrlData (ProxyUrlData):
|
|||
if _user is None or _password is None:
|
||||
raise linkcheck.error, linkcheck._("No user or password found")
|
||||
try:
|
||||
self.urlConnection = ftplib.FTP(self.urlTuple[1], _user, _password)
|
||||
self.urlConnection = ftplib.FTP()
|
||||
self.urlConnection.connect(self.urlparts[1])
|
||||
self.urlConnection.login(_user, _password)
|
||||
except EOFError:
|
||||
raise linkcheck.error, linkcheck._("Remote host has closed connection")
|
||||
info = self.urlConnection.getwelcome()
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class HostCheckingUrlData (UrlData):
|
|||
|
||||
def buildUrl (self):
|
||||
# to avoid anchor checking
|
||||
self.urlTuple=None
|
||||
self.urlparts = None
|
||||
|
||||
def getCacheKey (self):
|
||||
return "%s:%s" % (self.scheme, self.host)
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import urlparse, sys, time, re
|
||||
import urlparse, urllib, sys, time, re
|
||||
import httplib
|
||||
import Config, StringUtil, robotparser, linkcheck
|
||||
if Config.DebugLevel > 0:
|
||||
|
|
@ -31,13 +31,11 @@ class HttpUrlData (ProxyUrlData):
|
|||
|
||||
def buildUrl (self):
|
||||
ProxyUrlData.buildUrl(self)
|
||||
if not self.urlTuple[2]:
|
||||
# check for empty paths
|
||||
if not self.urlparts[2]:
|
||||
self.setWarning(linkcheck._("Path is empty"))
|
||||
self.urlTuple = (self.urlTuple[0], self.urlTuple[1], "/",
|
||||
self.urlTuple[3], self.urlTuple[4], self.urlTuple[5])
|
||||
self.url = urlparse.urlunparse(self.urlTuple)
|
||||
# resolve HTML entities
|
||||
self.url = StringUtil.unhtmlify(self.url)
|
||||
self.urlparts[2] = "/"
|
||||
self.url = urlparse.urlunsplit(self.urlparts)
|
||||
|
||||
|
||||
def checkConnection (self):
|
||||
|
|
@ -98,7 +96,6 @@ class HttpUrlData (ProxyUrlData):
|
|||
Config.debug(BRING_IT_ON, response.status, response.reason, self.headers)
|
||||
has301status = 0
|
||||
while 1:
|
||||
|
||||
# proxy enforcement (overrides standard proxy)
|
||||
if response.status == 305 and self.headers:
|
||||
oldproxy = (self.proxy, self.proxyauth)
|
||||
|
|
@ -112,11 +109,11 @@ class HttpUrlData (ProxyUrlData):
|
|||
redirected = self.urlName
|
||||
while response.status in [301,302] and self.headers and tries < 5:
|
||||
has301status = (response.status==301)
|
||||
|
||||
newurl = self.headers.getheader("Location",
|
||||
self.headers.getheader("Uri", ""))
|
||||
self.headers.getheader("Uri", ""))
|
||||
redirected = urlparse.urljoin(redirected, newurl)
|
||||
self.urlTuple = urlparse.urlparse(redirected)
|
||||
redirected = urllib.unquote(redirected)
|
||||
self.urlparts = urlparse.urlsplit(redirected)
|
||||
response = self._getHttpResponse()
|
||||
self.headers = response.msg
|
||||
Config.debug(BRING_IT_ON, "Redirected", self.headers)
|
||||
|
|
@ -168,7 +165,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
self.headers = response.msg
|
||||
if response.status not in [301,302]: break
|
||||
|
||||
effectiveurl = urlparse.urlunparse(self.urlTuple)
|
||||
effectiveurl = urlparse.urlunsplit(self.urlparts)
|
||||
if self.url != effectiveurl:
|
||||
self.setWarning(linkcheck._("Effective URL %s") % effectiveurl)
|
||||
self.url = effectiveurl
|
||||
|
|
@ -193,7 +190,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
if self.config['cookies']:
|
||||
for c in self.cookies:
|
||||
self.setInfo("Cookie: %s"%c)
|
||||
out = self.config.storeCookies(self.headers, self.urlTuple[1])
|
||||
out = self.config.storeCookies(self.headers, self.urlparts[1])
|
||||
for h in out:
|
||||
self.setInfo(h)
|
||||
if response.status >= 200:
|
||||
|
|
@ -208,16 +205,16 @@ class HttpUrlData (ProxyUrlData):
|
|||
if self.proxy:
|
||||
host = self.proxy
|
||||
else:
|
||||
host = self.urlTuple[1]
|
||||
host = self.urlparts[1]
|
||||
Config.debug(HURT_ME_PLENTY, "host", host)
|
||||
if self.urlConnection:
|
||||
self.closeConnection()
|
||||
self.urlConnection = self._getHTTPObject(host)
|
||||
if self.proxy:
|
||||
path = urlparse.urlunparse(self.urlTuple)
|
||||
path = urlparse.urlunsplit(self.urlparts)
|
||||
else:
|
||||
path = urlparse.urlunparse(('', '', self.urlTuple[2],
|
||||
self.urlTuple[3], self.urlTuple[4], ''))
|
||||
path = urlparse.urlunsplit(('', '', self.urlparts[2],
|
||||
self.urlparts[3], self.urlparts[4]))
|
||||
self.urlConnection.putrequest(method, path, skip_host=1)
|
||||
self.urlConnection.putheader("Host", host)
|
||||
if self.auth:
|
||||
|
|
@ -230,8 +227,8 @@ class HttpUrlData (ProxyUrlData):
|
|||
self.urlConnection.putheader("User-Agent", Config.UserAgent)
|
||||
self.urlConnection.putheader("Accept-Encoding", "gzip;q=1.0, deflate;q=0.9, identity;q=0.5")
|
||||
if self.config['cookies']:
|
||||
self.cookies = self.config.getCookies(self.urlTuple[1],
|
||||
self.urlTuple[2])
|
||||
self.cookies = self.config.getCookies(self.urlparts[1],
|
||||
self.urlparts[2])
|
||||
for c in self.cookies:
|
||||
self.urlConnection.putheader("Cookie", c)
|
||||
self.urlConnection.endheaders()
|
||||
|
|
@ -278,7 +275,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
return 1
|
||||
|
||||
def robotsTxtAllowsUrl (self):
|
||||
roboturl = "%s://%s/robots.txt" % self.urlTuple[0:2]
|
||||
roboturl = self.urlparts[0]+"://"+self.urlparts[1]+"/robots.txt"
|
||||
Config.debug(HURT_ME_PLENTY, "robots.txt url", roboturl)
|
||||
Config.debug(HURT_ME_PLENTY, "url", self.url)
|
||||
if not self.config.robotsTxtCache_has_key(roboturl):
|
||||
|
|
|
|||
|
|
@ -39,16 +39,16 @@ class NntpUrlData (UrlData):
|
|||
self.url = 'nntp'+self.urlName[4:]
|
||||
else:
|
||||
self.url = self.urlName
|
||||
self.urlTuple = urlparse.urlparse(self.url)
|
||||
debug(BRING_IT_ON, self.urlTuple)
|
||||
self.urlparts = urlparse.urlsplit(self.url)
|
||||
debug(BRING_IT_ON, self.urlparts)
|
||||
|
||||
def checkConnection (self):
|
||||
nntpserver = self.urlTuple[1] or self.config["nntpserver"]
|
||||
nntpserver = self.urlparts[1] or self.config["nntpserver"]
|
||||
if not nntpserver:
|
||||
self.setWarning(linkcheck._("No NNTP server specified, skipping this URL"))
|
||||
return
|
||||
nntp = self._connectNntp(nntpserver)
|
||||
group = self.urlTuple[2]
|
||||
group = self.urlparts[2]
|
||||
while group[:1]=='/':
|
||||
group = group[1:]
|
||||
if '@' in group:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@ from UrlData import UrlData
|
|||
from urllib import splittype, splithost, splituser, splitpasswd
|
||||
|
||||
class ProxyUrlData (UrlData):
|
||||
"""urldata with ability for proxying"""
|
||||
"""urldata with ability for proxying and for urls with user:pass@host
|
||||
setting"""
|
||||
|
||||
def setProxy (self, proxy):
|
||||
self.proxy = proxy
|
||||
|
|
@ -19,3 +20,9 @@ class ProxyUrlData (UrlData):
|
|||
self.proxyauth = base64.encodestring(self.proxyauth).strip()
|
||||
self.proxyauth = "Basic "+self.proxyauth
|
||||
|
||||
def getUserPassword (self):
|
||||
for auth in self.config["authentication"]:
|
||||
if auth['pattern'].match(self.url):
|
||||
return auth['user'], auth['password']
|
||||
return None,None
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import sys, re, urlparse, urllib, time, traceback, socket, select
|
||||
from urllib import splituser, splithost, splitport
|
||||
#try:
|
||||
# from linkcheck import DNS
|
||||
#except ImportError:
|
||||
|
|
@ -159,20 +160,18 @@ class UrlData:
|
|||
self.url = urlparse.urljoin(self.parentName, self.urlName)
|
||||
else:
|
||||
self.url = self.urlName
|
||||
self.urlTuple = urlparse.urlparse(self.url)
|
||||
# make host lowercase
|
||||
self.urlTuple = (self.urlTuple[0], self.urlTuple[1].lower(),
|
||||
self.urlTuple[2], self.urlTuple[3], self.urlTuple[4],
|
||||
self.urlTuple[5])
|
||||
self.url = urlparse.urlunparse(self.urlTuple)
|
||||
# resolve HTML entities
|
||||
self.url = StringUtil.unhtmlify(self.url)
|
||||
# check host:port syntax
|
||||
host = self.urlTuple[1]
|
||||
if ":" in host:
|
||||
host,port = host.split(":", 1)
|
||||
if not port_re.match(port):
|
||||
raise linkcheck.error(linkcheck._("URL has invalid port number"))
|
||||
# unquote url
|
||||
self.url = urllib.unquote(self.url)
|
||||
# split into (modifiable) list
|
||||
self.urlparts = list(urlparse.urlsplit(self.url))
|
||||
# check userinfo@host:port syntax
|
||||
self.userinfo, host = splituser(self.urlparts[1])
|
||||
x, port = splitport(host)
|
||||
if port is not None and not port_re.match(port):
|
||||
raise linkcheck.error(linkcheck._("URL has invalid port number %s")\
|
||||
% str(port))
|
||||
# set host lowercase and without userinfo
|
||||
self.urlparts[1] = host.lower()
|
||||
|
||||
|
||||
def logMe (self):
|
||||
|
|
@ -242,8 +241,8 @@ class UrlData:
|
|||
debug(BRING_IT_ON, "checking connection")
|
||||
try:
|
||||
self.checkConnection()
|
||||
if self.urlTuple and self.config["anchors"]:
|
||||
self.checkAnchors(self.urlTuple[5])
|
||||
if self.urlparts and self.config["anchors"]:
|
||||
self.checkAnchors(self.urlparts[4])
|
||||
except tuple(ExcList):
|
||||
type, value, tb = sys.exc_info()
|
||||
debug(HURT_ME_PLENTY, "exception", traceback.format_tb(tb))
|
||||
|
|
@ -291,8 +290,8 @@ class UrlData:
|
|||
|
||||
|
||||
def getCacheKey (self):
|
||||
if self.urlTuple:
|
||||
return urlparse.urlunparse(self.urlTuple)
|
||||
if self.urlparts:
|
||||
return urlparse.urlunsplit(self.urlparts)
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -411,13 +410,6 @@ class UrlData:
|
|||
self.column, self.name))
|
||||
|
||||
|
||||
def _getUserPassword (self):
|
||||
for auth in self.config["authentication"]:
|
||||
if auth['pattern'].match(self.url):
|
||||
return auth['user'], auth['password']
|
||||
return None,None
|
||||
|
||||
|
||||
from FileUrlData import FileUrlData
|
||||
from IgnoredUrlData import IgnoredUrlData, ignored_schemes_re
|
||||
from FtpUrlData import FtpUrlData
|
||||
|
|
|
|||
Loading…
Reference in a new issue