robotparser2

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@218 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2001-01-04 23:38:06 +00:00
parent 29921eb3ef
commit c9b63453d1
4 changed files with 14 additions and 4 deletions

9
debian/changelog vendored
View file

@ -1,3 +1,12 @@
linkchecker (1.2.13) unstable; urgency=low
* linkcheck/HttpUrlData.py:
- better redirection handling
- really use host variable in "Host:" header
* linkcheck/robotparser2.py: better redirection handling
-- Bastian Kleineidam <calvin@users.sourceforge.net> Fri, 5 Jan 2001 00:34:21 +0100
linkchecker (1.2.12) unstable; urgency=low
* MANIFEST.in: include rpm_build_script in source distribution

View file

@ -171,7 +171,7 @@ class HttpUrlData(UrlData):
path = urlparse.urlunparse(('', '', self.urlTuple[2],
self.urlTuple[3], self.urlTuple[4], ''))
self.urlConnection.putrequest(method, path)
self.urlConnection.putheader("Host", self.urlTuple[1])
self.urlConnection.putheader("Host", host)
if self.auth:
self.urlConnection.putheader("Authorization", self.auth)
self.urlConnection.putheader("User-agent", Config.UserAgent)

View file

@ -53,6 +53,7 @@ class RobotFileParser:
_debug(self.host+self.path)
connection = httplib.HTTP(self.host)
connection.putrequest("GET", self.path)
connection.putheader("Host", self.host)
connection.endheaders()
status, text, mime = connection.getreply()
if status in [301,302] and mime:
@ -208,8 +209,8 @@ def _test():
else:
rp.parse(open(sys.argv[1]).readlines())
print rp
print rp.can_fetch('*', 'http://www.musi-cal.com.com/')
print rp.can_fetch('Musi-Cal-Robot',
print rp.can_fetch('*', 'http://www.musi-cal.com/')
print rp.can_fetch('Musi-Cal-Robot/1.0',
'http://www.musi-cal.com/cgi-bin/event-search'
'?city=San+Francisco')

View file

@ -215,7 +215,7 @@ myname = "Bastian Kleineidam"
myemail = "calvin@users.sourceforge.net"
setup (name = "LinkChecker",
version = "1.2.12",
version = "1.2.13",
description = "check HTML documents for broken links",
author = myname,
author_email = myemail,