use urllib2

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@839 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2003-04-18 01:03:03 +00:00
parent e0f26a8994
commit f2d97ecdc3

View file

@ -9,7 +9,8 @@
The robots.txt Exclusion Protocol is implemented as specified in
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
"""
import re,urlparse,urllib
import re, urlparse, urllib2
from urllib import quote
__all__ = ["RobotFileParser"]
@ -39,7 +40,7 @@ class RobotFileParser:
self.host, self.path = urlparse.urlparse(url)[1:3]
def read(self):
opener = URLopener()
opener = urllib2.build_opener()
f = opener.open(self.url)
lines = []
line = f.readline()
@ -131,7 +132,7 @@ class RobotFileParser:
return 1
# search for given user agent matches
# the first match counts
url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
url = quote(urlparse.urlparse(url)[2]) or "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
@ -150,7 +151,7 @@ class RuleLine:
"""A rule line is a single "Allow:" (allowance==1) or "Disallow:"
(allowance==0) followed by a path."""
def __init__(self, path, allowance):
self.path = urllib.quote(path)
self.path = quote(path)
self.allowance = allowance
def applies_to(self, filename):
@ -198,15 +199,6 @@ class Entry:
return line.allowance
return 1
class URLopener(urllib.FancyURLopener):
def __init__(self, *args):
apply(urllib.FancyURLopener.__init__, (self,) + args)
self.errcode = 200
def http_error_default(self, url, fp, errcode, errmsg, headers):
self.errcode = errcode
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
errmsg, headers)
def _check(a,b):
if not b: