From f2d97ecdc3145f15041cb8ddd20cdb10fbdad94d Mon Sep 17 00:00:00 2001 From: calvin Date: Fri, 18 Apr 2003 01:03:03 +0000 Subject: [PATCH] use urllib2 git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@839 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/robotparser.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/linkcheck/robotparser.py b/linkcheck/robotparser.py index ba4af189..2e7ebcf7 100644 --- a/linkcheck/robotparser.py +++ b/linkcheck/robotparser.py @@ -9,7 +9,8 @@ The robots.txt Exclusion Protocol is implemented as specified in http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html """ -import re,urlparse,urllib +import re, urlparse, urllib2 +from urllib import quote __all__ = ["RobotFileParser"] @@ -39,7 +40,7 @@ class RobotFileParser: self.host, self.path = urlparse.urlparse(url)[1:3] def read(self): - opener = URLopener() + opener = urllib2.build_opener() f = opener.open(self.url) lines = [] line = f.readline() @@ -131,7 +132,7 @@ class RobotFileParser: return 1 # search for given user agent matches # the first match counts - url = urllib.quote(urlparse.urlparse(url)[2]) or "/" + url = quote(urlparse.urlparse(url)[2]) or "/" for entry in self.entries: if entry.applies_to(useragent): return entry.allowance(url) @@ -150,7 +151,7 @@ class RuleLine: """A rule line is a single "Allow:" (allowance==1) or "Disallow:" (allowance==0) followed by a path.""" def __init__(self, path, allowance): - self.path = urllib.quote(path) + self.path = quote(path) self.allowance = allowance def applies_to(self, filename): @@ -198,15 +199,6 @@ class Entry: return line.allowance return 1 -class URLopener(urllib.FancyURLopener): - def __init__(self, *args): - apply(urllib.FancyURLopener.__init__, (self,) + args) - self.errcode = 200 - - def http_error_default(self, url, fp, errcode, errmsg, headers): - self.errcode = errcode - return urllib.FancyURLopener.http_error_default(self, url, fp, errcode, - errmsg, headers) def _check(a,b): if not b: