mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-30 04:30:28 +00:00
use urllib2
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@839 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
e0f26a8994
commit
f2d97ecdc3
1 changed files with 5 additions and 13 deletions
|
|
@ -9,7 +9,8 @@
|
|||
The robots.txt Exclusion Protocol is implemented as specified in
|
||||
http://info.webcrawler.com/mak/projects/robots/norobots-rfc.html
|
||||
"""
|
||||
import re,urlparse,urllib
|
||||
import re, urlparse, urllib2
|
||||
from urllib import quote
|
||||
|
||||
__all__ = ["RobotFileParser"]
|
||||
|
||||
|
|
@ -39,7 +40,7 @@ class RobotFileParser:
|
|||
self.host, self.path = urlparse.urlparse(url)[1:3]
|
||||
|
||||
def read(self):
|
||||
opener = URLopener()
|
||||
opener = urllib2.build_opener()
|
||||
f = opener.open(self.url)
|
||||
lines = []
|
||||
line = f.readline()
|
||||
|
|
@ -131,7 +132,7 @@ class RobotFileParser:
|
|||
return 1
|
||||
# search for given user agent matches
|
||||
# the first match counts
|
||||
url = urllib.quote(urlparse.urlparse(url)[2]) or "/"
|
||||
url = quote(urlparse.urlparse(url)[2]) or "/"
|
||||
for entry in self.entries:
|
||||
if entry.applies_to(useragent):
|
||||
return entry.allowance(url)
|
||||
|
|
@ -150,7 +151,7 @@ class RuleLine:
|
|||
"""A rule line is a single "Allow:" (allowance==1) or "Disallow:"
|
||||
(allowance==0) followed by a path."""
|
||||
def __init__(self, path, allowance):
|
||||
self.path = urllib.quote(path)
|
||||
self.path = quote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
def applies_to(self, filename):
|
||||
|
|
@ -198,15 +199,6 @@ class Entry:
|
|||
return line.allowance
|
||||
return 1
|
||||
|
||||
class URLopener(urllib.FancyURLopener):
|
||||
def __init__(self, *args):
|
||||
apply(urllib.FancyURLopener.__init__, (self,) + args)
|
||||
self.errcode = 200
|
||||
|
||||
def http_error_default(self, url, fp, errcode, errmsg, headers):
|
||||
self.errcode = errcode
|
||||
return urllib.FancyURLopener.http_error_default(self, url, fp, errcode,
|
||||
errmsg, headers)
|
||||
|
||||
def _check(a,b):
|
||||
if not b:
|
||||
|
|
|
|||
Loading…
Reference in a new issue