mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-30 02:54:42 +00:00
use boolean values, timeout changes
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@998 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
b2d6226b79
commit
8c1deec0c9
8 changed files with 54 additions and 898 deletions
418
linkcheck/CSV.py
418
linkcheck/CSV.py
|
|
@ -1,418 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
"""read/write Comma Separated Values (CSV)"""
|
||||
# CSV 0.17 8 June 1999 Copyright ©Laurence Tratt 1998 - 1999
|
||||
# e-mail: tratt@dcs.kcl.ac.uk
|
||||
# home-page: http://eh.org/~laurie/comp/python/csv/index.html
|
||||
#
|
||||
#
|
||||
# CSV.py is copyright ©1998 - 1999 by Laurence Tratt
|
||||
#
|
||||
# All rights reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software and its
|
||||
# documentation for any purpose and without fee is hereby granted, provided that
|
||||
# the above copyright notice appear in all copies and that both that copyright
|
||||
# notice and this permission notice appear in supporting documentation.
|
||||
#
|
||||
# THE AUTHOR - LAURENCE TRATT - DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
|
||||
# NO EVENT SHALL THE AUTHOR FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
|
||||
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
|
||||
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR
|
||||
# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
|
||||
import string, types, UserList
|
||||
|
||||
|
||||
###################################################################################################
|
||||
#
|
||||
# CSV class
|
||||
#
|
||||
class CSV(UserList.UserList):
|
||||
""" Manage a CSV (comma separated values) file
|
||||
|
||||
The data is held in a list.
|
||||
|
||||
Methods:
|
||||
__init__()
|
||||
load() load from file
|
||||
save() save to file
|
||||
input() input from string
|
||||
output() save to string
|
||||
append() appends one entry
|
||||
__str__() printable represenation
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, separator = ','):
|
||||
""" Initialise CVS class instance.
|
||||
|
||||
Arguments:
|
||||
separator : The field delimiter. Defaults to ','
|
||||
"""
|
||||
UserList.UserList.__init__(self)
|
||||
self.separator = separator
|
||||
|
||||
self.fields__title__have = self.fields__title = None
|
||||
|
||||
|
||||
|
||||
def load(self, file__data__name, fields__title__have, convert_numbers = 0, separator = None, comments = None):
|
||||
""" Load up a CSV file
|
||||
|
||||
Arguments:
|
||||
file__data__name : The name of the CSV file
|
||||
fields__title__have : 0 : file has no title fields
|
||||
otherwise : file has title fields
|
||||
convert_numbers : 0 : store everything as string's
|
||||
otherwise : store fields that can be converted
|
||||
to ints or floats to that Python
|
||||
type defaults to 0
|
||||
separator : The field delimiter (optional)
|
||||
comments : A list of strings and regular expressions to remove comments
|
||||
"""
|
||||
|
||||
file__data = open(file__data__name, 'r')
|
||||
self.input(file__data.read(-1), fields__title__have, convert_numbers, separator or self.separator, comments or ["#"])
|
||||
file__data.close()
|
||||
|
||||
|
||||
|
||||
def save(self, file__data__name, separator = None):
|
||||
""" Save data to CSV file.
|
||||
|
||||
Arguments:
|
||||
file__data__name : The name of the CSV file to save to
|
||||
separator : The field delimiter (optional)
|
||||
"""
|
||||
|
||||
file__data = open(file__data__name, 'w')
|
||||
file__data.write(self.output(separator or self.separator))
|
||||
file__data.close()
|
||||
|
||||
|
||||
|
||||
def input(self, data, fields__title__have, convert_numbers = 0, separator = None, comments = None):
|
||||
|
||||
""" Take wodge of CSV data & convert it into internal format.
|
||||
|
||||
Arguments:
|
||||
data : A string containing the CSV data
|
||||
fields__title__have : 0 : file has no title fields
|
||||
otherwise : file has title fields
|
||||
convert_numbers : 0 : store everything as string's
|
||||
otherwise : store fields that can be
|
||||
converted to ints or
|
||||
floats to that Python type
|
||||
defaults to 0
|
||||
separator : The field delimiter (Optional)
|
||||
comments : A list of strings and regular expressions to remove comments
|
||||
(defaults to ["#"])
|
||||
"""
|
||||
|
||||
def line__process(line, convert_numbers, separator):
|
||||
|
||||
fields = []
|
||||
line__pos = 0
|
||||
|
||||
while line__pos < len(line):
|
||||
|
||||
# Skip any space at the beginning of the field (if there should be leading space,
|
||||
# there should be a " character in the CSV file)
|
||||
|
||||
while line__pos < len(line) and line[line__pos] == " ":
|
||||
line__pos = line__pos + 1
|
||||
|
||||
field = ""
|
||||
quotes__level = 0
|
||||
while line__pos < len(line):
|
||||
|
||||
# Skip space at the end of a field (if there is trailing space, it should be
|
||||
# encompassed by speech marks)
|
||||
|
||||
if quotes__level == 0 and line[line__pos] == " ":
|
||||
line__pos__temp = line__pos
|
||||
while line__pos__temp < len(line) and line[line__pos__temp] == " ":
|
||||
line__pos__temp = line__pos__temp + 1
|
||||
if line__pos__temp >= len(line):
|
||||
break
|
||||
elif line[line__pos__temp : line__pos__temp + len(separator)] == separator:
|
||||
line__pos = line__pos__temp
|
||||
if quotes__level == 0 and line[line__pos : line__pos + len(separator)] == separator:
|
||||
break
|
||||
elif line[line__pos] == "\"":
|
||||
if quotes__level == 0:
|
||||
quotes__level = 1
|
||||
else:
|
||||
quotes__level = 0
|
||||
else:
|
||||
field = field + line[line__pos]
|
||||
line__pos = line__pos + 1
|
||||
line__pos = line__pos + len(separator)
|
||||
if convert_numbers:
|
||||
for char in field:
|
||||
if char not in "0123456789.-":
|
||||
fields.append(field)
|
||||
break
|
||||
else:
|
||||
try:
|
||||
if "." not in field:
|
||||
fields.append(int(field))
|
||||
else:
|
||||
fields.append(float(field))
|
||||
except:
|
||||
fields.append(field)
|
||||
else:
|
||||
fields.append(field)
|
||||
if line[-len(separator)] == separator:
|
||||
fields.append(field)
|
||||
|
||||
return fields
|
||||
|
||||
|
||||
separator = separator or self.separator
|
||||
comments = comments or ["#"]
|
||||
|
||||
self.fields__title__have = fields__title__have
|
||||
|
||||
# Remove comments from the input file
|
||||
|
||||
comments__strings = []
|
||||
for comment in comments:
|
||||
if type(comment) == types.InstanceType:
|
||||
data = comment.sub("", data)
|
||||
elif type(comment) == types.StringType:
|
||||
comments__strings.append(comment)
|
||||
else:
|
||||
raise Exception("Invalid comment type '" + comment + "'")
|
||||
|
||||
lines = map(string.strip, data.splitlines())
|
||||
|
||||
# Remove all comments that are of type string
|
||||
|
||||
lines__pos = 0
|
||||
while lines__pos < len(lines):
|
||||
line = lines[lines__pos]
|
||||
line__pos = 0
|
||||
while line__pos < len(line) and line[line__pos] == " ":
|
||||
line__pos = line__pos + 1
|
||||
found_comment = 0
|
||||
for comment in comments__strings:
|
||||
if line__pos + len(comment) < len(line) and line[line__pos : line__pos + len(comment)] == comment:
|
||||
found_comment = 1
|
||||
break
|
||||
if found_comment:
|
||||
del lines[lines__pos]
|
||||
else:
|
||||
lines__pos += 1
|
||||
|
||||
# Process the input data
|
||||
|
||||
if fields__title__have:
|
||||
self.fields__title = line__process(lines[0], convert_numbers, separator)
|
||||
pos__start = 1
|
||||
else:
|
||||
self.fields__title = []
|
||||
pos__start = 0
|
||||
self.data = []
|
||||
for line in lines[pos__start : ]:
|
||||
if line != "":
|
||||
self.data.append(Entry(line__process(line, convert_numbers, separator), self.fields__title))
|
||||
|
||||
|
||||
|
||||
def output(self, separator = None):
|
||||
|
||||
""" Convert internal data into CSV string.
|
||||
|
||||
Arguments:
|
||||
separator : The field delimiter (optional)
|
||||
|
||||
Returns:
|
||||
String containing CSV data
|
||||
"""
|
||||
|
||||
separator = separator or self.separator
|
||||
|
||||
|
||||
def line__make(entry, separator = separator):
|
||||
|
||||
str = ""
|
||||
done__any = 0
|
||||
for field in entry:
|
||||
if done__any:
|
||||
str += separator
|
||||
else:
|
||||
done__any = 1
|
||||
if type(field) != types.StringType:
|
||||
field = `field`
|
||||
if len(field) > 0 and (string.find(field, separator) != -1 or (field[0] == " " or field[-1] == " ")):
|
||||
str += '"%s"'%field
|
||||
else:
|
||||
str += field
|
||||
|
||||
return str
|
||||
|
||||
|
||||
if self.fields__title__have:
|
||||
str = line__make(self.fields__title) + "\n\n"
|
||||
else:
|
||||
str = ""
|
||||
str += string.join(map(line__make, self.data), "\n") + "\n"
|
||||
|
||||
return str
|
||||
|
||||
|
||||
|
||||
def append(self, entry):
|
||||
|
||||
""" Add an entry. """
|
||||
|
||||
if self.fields__title:
|
||||
entry.fields__title = self.fields__title
|
||||
self.data.append(entry)
|
||||
|
||||
|
||||
|
||||
def field__append(self, func, field__title = None):
|
||||
|
||||
""" Append a field with values specified by a function
|
||||
|
||||
Arguments:
|
||||
func : Function to be called func(entry) to get the value of the new field
|
||||
field__title : Name of new field (if applicable)
|
||||
|
||||
"""
|
||||
|
||||
for data__pos in range(len(self)):
|
||||
entry = self.data[data__pos]
|
||||
entry.append(func(entry))
|
||||
self.data[data__pos] = entry
|
||||
|
||||
if self.fields__title__have:
|
||||
self.fields__title.append(field__title)
|
||||
|
||||
|
||||
|
||||
def duplicates__eliminate(self):
|
||||
|
||||
""" Eliminate duplicates (this may result in a reordering of the entries) """
|
||||
|
||||
# To eliminate duplicates, we first get Python to sort the list for us; then all we have to
|
||||
# do is to check to see whether consecutive elements are the same, and delete them
|
||||
# This give us O(<sort>) * O(n) rather than the more obvious O(n * n) speed algorithm
|
||||
|
||||
# XXX Could be done more efficiently for multiplicate duplicates by deleting a slice of
|
||||
# similar elements rather than deleting them individually
|
||||
|
||||
self.sort()
|
||||
data__pos = 1
|
||||
entry__last = self.data[0]
|
||||
while data__pos < len(self.data):
|
||||
if self.data[data__pos] == entry__last:
|
||||
del self.data[data__pos]
|
||||
else:
|
||||
entry__last = self.data[data__pos]
|
||||
data__pos = data__pos + 1
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
|
||||
""" Construct a printable representation of the internal data. """
|
||||
|
||||
columns__width = []
|
||||
|
||||
# Work out the maximum width of each column
|
||||
|
||||
for column in range(len(self.data[0])):
|
||||
if self.fields__title__have:
|
||||
width = len(`self.fields__title[column]`)
|
||||
else:
|
||||
width = 0
|
||||
for entry in self:
|
||||
width__possible = len(`entry.data[column]`)
|
||||
if width__possible > width:
|
||||
width = width__possible
|
||||
columns__width.append(width)
|
||||
|
||||
if self.fields__title__have:
|
||||
str = string.join(map(string.ljust, self.fields__title, columns__width), " ") + "\n\n"
|
||||
else:
|
||||
str = ""
|
||||
for entry in self:
|
||||
str += string.join(map(string.ljust, map(lambda a : ((type(a)==types.StringType) and [a] or [eval("`a`")])[0], entry.data), columns__width), " ") + "\n"
|
||||
|
||||
return str
|
||||
|
||||
|
||||
|
||||
###################################################################################################
|
||||
#
|
||||
# CSV data entry class
|
||||
#
|
||||
#
|
||||
|
||||
class Entry (UserList.UserList):
|
||||
""" CSV data entry, UserList subclass.
|
||||
|
||||
Has the same properties as a list, but has a few dictionary
|
||||
like properties for easy access of fields if they have titles.
|
||||
|
||||
Methods(Override):
|
||||
__init__
|
||||
__getitem__
|
||||
__setitem__
|
||||
__delitem__
|
||||
"""
|
||||
|
||||
|
||||
def __init__(self, fields, fields__title = None):
|
||||
""" Initialise with fields data and field title.
|
||||
|
||||
Arguments:
|
||||
fields : a list containing the data for each field
|
||||
of this entry
|
||||
fields__title : a list with the titles of each field
|
||||
(an empty list means there are no titles)
|
||||
"""
|
||||
UserList.UserList.__init__(self, fields)
|
||||
|
||||
if fields__title != None:
|
||||
self.fields__title = fields__title
|
||||
else:
|
||||
self.fields__title = []
|
||||
|
||||
|
||||
|
||||
def __getitem__(self, x):
|
||||
|
||||
if type(x) == types.IntType:
|
||||
return self.data[x]
|
||||
else:
|
||||
return self.data[self.fields__title.index(x)]
|
||||
|
||||
|
||||
|
||||
def __setitem__(self, x, item):
|
||||
|
||||
if type(x) == types.IntType:
|
||||
self.data[x] = item
|
||||
else:
|
||||
self.data[self.fields__title.index(x)] = item
|
||||
|
||||
|
||||
|
||||
def __delitem__(self, x):
|
||||
|
||||
if type(x) == types.IntType:
|
||||
del self.data[x]
|
||||
else:
|
||||
del self.data[self.fields__title.index(x)]
|
||||
|
||||
|
||||
|
||||
def __str__(self):
|
||||
|
||||
return `self.data`
|
||||
|
|
@ -121,7 +121,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
response = self._getHttpResponse()
|
||||
self.headers = response.msg
|
||||
debug(BRING_IT_ON, response.status, response.reason, self.headers)
|
||||
has301status = 0
|
||||
has301status = False
|
||||
while 1:
|
||||
# proxy enforcement (overrides standard proxy)
|
||||
if response.status == 305 and self.headers:
|
||||
|
|
@ -157,13 +157,13 @@ class HttpUrlData (ProxyUrlData):
|
|||
self.setWarning(i18n._("A HTTP 301 redirection occured and the url has no "
|
||||
"trailing / at the end. All urls which point to (home) "
|
||||
"directories should end with a / to avoid redirection."))
|
||||
has301status = 1
|
||||
has301status = True
|
||||
self.aliases.append(redirected)
|
||||
# check cache again on possibly changed URL
|
||||
key = self.getCacheKey()
|
||||
if self.config.urlCache_has_key(key):
|
||||
self.copyFrom(self.config.urlCache_get(key))
|
||||
self.cached = 1
|
||||
self.cached = True
|
||||
self.logMe()
|
||||
return
|
||||
# check if we still have a http url, it could be another
|
||||
|
|
@ -180,7 +180,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
# append new object to queue
|
||||
self.config.appendUrl(newobj)
|
||||
# pretend to be finished and logged
|
||||
self.cached = 1
|
||||
self.cached = True
|
||||
return
|
||||
# new response data
|
||||
response = self._getHttpResponse()
|
||||
|
|
@ -335,7 +335,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
|
||||
def getContent (self):
|
||||
if not self.has_content:
|
||||
self.has_content = 1
|
||||
self.has_content = True
|
||||
self.closeConnection()
|
||||
t = time.time()
|
||||
response = self._getHttpResponse("GET")
|
||||
|
|
@ -357,16 +357,16 @@ class HttpUrlData (ProxyUrlData):
|
|||
|
||||
def isHtml (self):
|
||||
if not (self.valid and self.headers):
|
||||
return 0
|
||||
return False
|
||||
if self.headers.gettype()[:9]!="text/html":
|
||||
return 0
|
||||
return False
|
||||
encoding = self.headers.get("Content-Encoding")
|
||||
if encoding and encoding not in _supported_encodings and \
|
||||
encoding!='identity':
|
||||
self.setWarning(i18n._('Unsupported content encoding %s.')%\
|
||||
`encoding`)
|
||||
return 0
|
||||
return 1
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def getRobotsTxtUrl (self):
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from urllib import splituser, splithost, splitport, unquote
|
|||
from linkcheck import DNS, LinkCheckerError, getLinkPat
|
||||
DNS.DiscoverNameServers()
|
||||
|
||||
import Config, StringUtil, linkname, test_support, timeoutsocket
|
||||
import Config, StringUtil, linkname, test_support
|
||||
from linkparse import LinkParser
|
||||
from debug import *
|
||||
|
||||
|
|
@ -84,7 +84,7 @@ ExcList = [
|
|||
ValueError, # from httplib.py
|
||||
LinkCheckerError,
|
||||
DNS.Error,
|
||||
timeoutsocket.Timeout,
|
||||
socket.timeout,
|
||||
socket.error,
|
||||
select.error,
|
||||
]
|
||||
|
|
@ -182,7 +182,7 @@ class UrlData:
|
|||
self.validString = i18n._("Valid")
|
||||
self.warningString = None
|
||||
self.infoString = None
|
||||
self.valid = 1
|
||||
self.valid = True
|
||||
self.url = None
|
||||
self.line = line
|
||||
self.column = column
|
||||
|
|
@ -190,28 +190,28 @@ class UrlData:
|
|||
self.dltime = -1
|
||||
self.dlsize = -1
|
||||
self.checktime = 0
|
||||
self.cached = 0
|
||||
self.cached = True
|
||||
self.urlConnection = None
|
||||
self.extern = (1, 0)
|
||||
self.data = None
|
||||
self.has_content = 0
|
||||
self.has_content = False
|
||||
url = get_absolute_url(self.urlName, self.baseRef, self.parentName)
|
||||
# assume file link if no scheme is found
|
||||
self.scheme = url.split(":", 1)[0] or "file"
|
||||
|
||||
|
||||
def setError (self, s):
|
||||
self.valid=0
|
||||
self.valid = False
|
||||
self.errorString = i18n._("Error")+": "+s
|
||||
|
||||
|
||||
def setValid (self, s):
|
||||
self.valid=1
|
||||
self.valid = True
|
||||
self.validString = i18n._("Valid")+": "+s
|
||||
|
||||
|
||||
def isHtml (self):
|
||||
return 0
|
||||
return False
|
||||
|
||||
|
||||
def setWarning (self, s):
|
||||
|
|
@ -318,7 +318,7 @@ class UrlData:
|
|||
for key in self.getCacheKeys():
|
||||
if self.config.urlCache_has_key(key):
|
||||
self.copyFrom(self.config.urlCache_get(key))
|
||||
self.cached = 1
|
||||
self.cached = True
|
||||
self.logMe()
|
||||
return
|
||||
|
||||
|
|
@ -384,7 +384,7 @@ class UrlData:
|
|||
if not self.cached:
|
||||
for key in self.getCacheKeys():
|
||||
self.config.urlCache_set(key, self)
|
||||
self.cached = 1
|
||||
self.cached = True
|
||||
|
||||
|
||||
def getCacheKeys (self):
|
||||
|
|
@ -470,13 +470,13 @@ class UrlData:
|
|||
|
||||
def hasContent (self):
|
||||
"""indicate wether url getContent() can be called"""
|
||||
return 1
|
||||
return True
|
||||
|
||||
|
||||
def getContent (self):
|
||||
"""Precondition: urlConnection is an opened URL."""
|
||||
if not self.has_content:
|
||||
self.has_content = 1
|
||||
self.has_content = True
|
||||
t = time.time()
|
||||
self.data = self.urlConnection.read()
|
||||
self.dltime = time.time() - t
|
||||
|
|
|
|||
|
|
@ -20,15 +20,15 @@ class LinkCheckerError (Exception):
|
|||
pass
|
||||
|
||||
import re, i18n
|
||||
def getLinkPat (arg, strict=0):
|
||||
def getLinkPat (arg, strict=False):
|
||||
"""get a link pattern matcher for intern/extern links"""
|
||||
debug(BRING_IT_ON, "Link pattern", `arg`)
|
||||
if arg[0:1] == '!':
|
||||
pattern = arg[1:]
|
||||
negate = 1
|
||||
negate = True
|
||||
else:
|
||||
pattern = arg
|
||||
negate = 0
|
||||
negate = False
|
||||
return {
|
||||
"pattern": re.compile(pattern),
|
||||
"negate": negate,
|
||||
|
|
|
|||
|
|
@ -42,10 +42,10 @@ def startoutput (out=sys.stdout):
|
|||
def checkaccess (out=sys.stdout, hosts=[], servers=[], env=os.environ):
|
||||
if os.environ.get('REMOTE_ADDR') in hosts and \
|
||||
os.environ.get('SERVER_ADDR') in servers:
|
||||
return 1
|
||||
return True
|
||||
logit({}, env)
|
||||
printError(out, "Access denied")
|
||||
return 0
|
||||
return False
|
||||
|
||||
|
||||
def checklink (out=sys.stdout, form={}, env=os.environ):
|
||||
|
|
@ -59,16 +59,16 @@ def checklink (out=sys.stdout, form={}, env=os.environ):
|
|||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["log"] = config.newLogger('html', {'fd': out})
|
||||
config.disableThreading()
|
||||
if form.has_key('strict'): config['strict'] = 1
|
||||
if form.has_key("anchors"): config["anchors"] = 1
|
||||
if not form.has_key("errors"): config["verbose"] = 1
|
||||
if form.has_key('strict'): config['strict'] = True
|
||||
if form.has_key("anchors"): config["anchors"] = True
|
||||
if not form.has_key("errors"): config["verbose"] = True
|
||||
if form.has_key("intern"):
|
||||
pat = "^(ftp|https?)://"+re.escape(getHostName(form))
|
||||
else:
|
||||
pat = ".+"
|
||||
config["internlinks"].append(getLinkPat(pat))
|
||||
# avoid checking of local files
|
||||
config["externlinks"].append(getLinkPat("^file:", strict=1))
|
||||
config["externlinks"].append(getLinkPat("^file:", strict=True))
|
||||
# start checking
|
||||
config.appendUrl(GetUrlDataFrom(form["url"].value, 0, config))
|
||||
checkUrls(config)
|
||||
|
|
|
|||
|
|
@ -1,425 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
####
|
||||
# Copyright 2000,2001 by Timothy O'Malley <timo@alum.mit.edu>
|
||||
#
|
||||
# All Rights Reserved
|
||||
#
|
||||
# Permission to use, copy, modify, and distribute this software
|
||||
# and its documentation for any purpose and without fee is hereby
|
||||
# granted, provided that the above copyright notice appear in all
|
||||
# copies and that both that copyright notice and this permission
|
||||
# notice appear in supporting documentation, and that the name of
|
||||
# Timothy O'Malley not be used in advertising or publicity
|
||||
# pertaining to distribution of the software without specific, written
|
||||
# prior permission.
|
||||
#
|
||||
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
|
||||
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
|
||||
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
||||
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
||||
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
####
|
||||
|
||||
"""Timeout Socket
|
||||
|
||||
This module enables a timeout mechanism on all TCP connections. It
|
||||
does this by inserting a shim into the socket module. After this module
|
||||
has been imported, all socket creation goes through this shim. As a
|
||||
result, every TCP connection will support a timeout.
|
||||
|
||||
The beauty of this method is that it immediately and transparently
|
||||
enables the entire python library to support timeouts on TCP sockets.
|
||||
As an example, if you wanted to SMTP connections to have a 20 second
|
||||
timeout:
|
||||
|
||||
import timeoutsocket
|
||||
import smtplib
|
||||
timeoutsocket.setDefaultSocketTimeout(20)
|
||||
|
||||
|
||||
The timeout applies to the socket functions that normally block on
|
||||
execution: read, write, connect, and accept. If any of these
|
||||
operations exceeds the specified timeout, the exception Timeout
|
||||
will be raised.
|
||||
|
||||
The default timeout value is set to None. As a result, importing
|
||||
this module does not change the default behavior of a socket. The
|
||||
timeout mechanism only activates when the timeout has been set to
|
||||
a numeric value. (This behavior mimics the behavior of the
|
||||
select.select() function.)
|
||||
|
||||
This module implements two classes: TimeoutSocket and TimeoutFile.
|
||||
|
||||
The TimeoutSocket class defines a socket-like object that attempts to
|
||||
avoid the condition where a socket may block indefinitely. The
|
||||
TimeoutSocket class raises a Timeout exception whenever the
|
||||
current operation delays too long.
|
||||
|
||||
The TimeoutFile class defines a file-like object that uses the TimeoutSocket
|
||||
class. When the makefile() method of TimeoutSocket is called, it returns
|
||||
an instance of a TimeoutFile.
|
||||
|
||||
Each of these objects adds two methods to manage the timeout value:
|
||||
|
||||
get_timeout() --> returns the timeout of the socket or file
|
||||
set_timeout() --> sets the timeout of the socket or file
|
||||
|
||||
|
||||
As an example, one might use the timeout feature to create httplib
|
||||
connections that will timeout after 30 seconds:
|
||||
|
||||
import timeoutsocket
|
||||
import httplib
|
||||
H = httplib.HTTP("www.python.org")
|
||||
H.sock.set_timeout(30)
|
||||
|
||||
Note: When used in this manner, the connect() routine may still
|
||||
block because it happens before the timeout is set. To avoid
|
||||
this, use the 'timeoutsocket.setDefaultSocketTimeout()' function.
|
||||
|
||||
Good Luck!
|
||||
|
||||
"""
|
||||
|
||||
__version__ = "$Revision$"
|
||||
__author__ = "Timothy O'Malley <timo@alum.mit.edu>"
|
||||
|
||||
#
|
||||
# Imports
|
||||
#
|
||||
import select, string
|
||||
import socket
|
||||
if not hasattr(socket, "_no_timeoutsocket"):
|
||||
_socket = socket.socket
|
||||
else:
|
||||
_socket = socket._no_timeoutsocket
|
||||
|
||||
|
||||
#
|
||||
# Set up constants to test for Connected and Blocking operations.
|
||||
# We delete 'os' and 'errno' to keep our namespace clean(er).
|
||||
# Thanks to Alex Martelli and G. Li for the Windows error codes.
|
||||
#
|
||||
import os
|
||||
if os.name == "nt":
|
||||
_IsConnected = ( 10022, 10056 )
|
||||
_ConnectBusy = ( 10035, )
|
||||
_AcceptBusy = ( 10035, )
|
||||
else:
|
||||
import errno
|
||||
_IsConnected = ( errno.EISCONN, )
|
||||
_ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK )
|
||||
_AcceptBusy = ( errno.EAGAIN, errno.EWOULDBLOCK )
|
||||
del errno
|
||||
del os
|
||||
|
||||
|
||||
#
|
||||
# Default timeout value for ALL TimeoutSockets
|
||||
#
|
||||
_DefaultTimeout = None
|
||||
def setDefaultSocketTimeout(timeout):
|
||||
global _DefaultTimeout
|
||||
_DefaultTimeout = timeout
|
||||
def getDefaultSocketTimeout():
|
||||
return _DefaultTimeout
|
||||
|
||||
#
|
||||
# Exceptions for socket errors and timeouts
|
||||
#
|
||||
Error = socket.error
|
||||
class Timeout(Exception):
|
||||
pass
|
||||
|
||||
|
||||
#
|
||||
# Factory function
|
||||
#
|
||||
from socket import AF_INET, SOCK_STREAM
|
||||
def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None):
|
||||
if family != AF_INET or type != SOCK_STREAM:
|
||||
if proto:
|
||||
return _socket(family, type, proto)
|
||||
else:
|
||||
return _socket(family, type)
|
||||
return TimeoutSocket( _socket(family, type), _DefaultTimeout )
|
||||
# end timeoutsocket
|
||||
|
||||
#
|
||||
# The TimeoutSocket class definition
|
||||
#
|
||||
class TimeoutSocket:
|
||||
"""TimeoutSocket object
|
||||
Implements a socket-like object that raises Timeout whenever
|
||||
an operation takes too long.
|
||||
The definition of 'too long' can be changed using the
|
||||
set_timeout() method.
|
||||
"""
|
||||
|
||||
_copies = 0
|
||||
_blocking = 1
|
||||
|
||||
def __init__(self, sock, timeout):
|
||||
self._sock = sock
|
||||
self._timeout = timeout
|
||||
# end __init__
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self._sock, key)
|
||||
# end __getattr__
|
||||
|
||||
def get_timeout(self):
|
||||
return self._timeout
|
||||
# end set_timeout
|
||||
|
||||
def set_timeout(self, timeout=None):
|
||||
self._timeout = timeout
|
||||
# end set_timeout
|
||||
|
||||
def setblocking(self, blocking):
|
||||
self._blocking = blocking
|
||||
return self._sock.setblocking(blocking)
|
||||
# end set_timeout
|
||||
|
||||
def connect_ex(self, addr):
|
||||
errcode = 0
|
||||
try:
|
||||
self.connect(addr)
|
||||
except Error, why:
|
||||
errcode = why[0]
|
||||
return errcode
|
||||
# end connect_ex
|
||||
|
||||
def connect(self, addr, port=None, dumbhack=None):
|
||||
# In case we were called as connect(host, port)
|
||||
if port != None: addr = (addr, port)
|
||||
|
||||
# Shortcuts
|
||||
sock = self._sock
|
||||
timeout = self._timeout
|
||||
blocking = self._blocking
|
||||
|
||||
# First, make a non-blocking call to connect
|
||||
try:
|
||||
sock.setblocking(0)
|
||||
sock.connect(addr)
|
||||
sock.setblocking(blocking)
|
||||
return
|
||||
except Error, why:
|
||||
# Set the socket's blocking mode back
|
||||
sock.setblocking(blocking)
|
||||
|
||||
# If we are not blocking, re-raise
|
||||
if not blocking:
|
||||
raise
|
||||
|
||||
# If we are already connected, then return success.
|
||||
# If we got a genuine error, re-raise it.
|
||||
errcode = why[0]
|
||||
if dumbhack and errcode in _IsConnected:
|
||||
return
|
||||
elif errcode not in _ConnectBusy:
|
||||
raise
|
||||
|
||||
# Now, wait for the connect to happen
|
||||
# ONLY if dumbhack indicates this is pass number one.
|
||||
# If select raises an error, we pass it on.
|
||||
# Is this the right behavior?
|
||||
if not dumbhack:
|
||||
r,w,e = select.select([], [sock], [], timeout)
|
||||
if w:
|
||||
return self.connect(addr, dumbhack=1)
|
||||
|
||||
# If we get here, then we should raise Timeout
|
||||
raise Timeout("Attempted connect to %s timed out." % str(addr) )
|
||||
# end connect
|
||||
|
||||
def accept(self, dumbhack=None):
|
||||
# Shortcuts
|
||||
sock = self._sock
|
||||
timeout = self._timeout
|
||||
blocking = self._blocking
|
||||
|
||||
# First, make a non-blocking call to accept
|
||||
# If we get a valid result, then convert the
|
||||
# accept'ed socket into a TimeoutSocket.
|
||||
# Be carefult about the blocking mode of ourselves.
|
||||
try:
|
||||
sock.setblocking(0)
|
||||
newsock, addr = sock.accept()
|
||||
sock.setblocking(blocking)
|
||||
timeoutnewsock = self.__class__(newsock, timeout)
|
||||
timeoutnewsock.setblocking(blocking)
|
||||
return (timeoutnewsock, addr)
|
||||
except Error, why:
|
||||
# Set the socket's blocking mode back
|
||||
sock.setblocking(blocking)
|
||||
|
||||
# If we are not supposed to block, then re-raise
|
||||
if not blocking:
|
||||
raise
|
||||
|
||||
# If we got a genuine error, re-raise it.
|
||||
errcode = why[0]
|
||||
if errcode not in _AcceptBusy:
|
||||
raise
|
||||
|
||||
# Now, wait for the accept to happen
|
||||
# ONLY if dumbhack indicates this is pass number one.
|
||||
# If select raises an error, we pass it on.
|
||||
# Is this the right behavior?
|
||||
if not dumbhack:
|
||||
r,w,e = select.select([sock], [], [], timeout)
|
||||
if r:
|
||||
return self.accept(dumbhack=1)
|
||||
|
||||
# If we get here, then we should raise Timeout
|
||||
raise Timeout("Attempted accept timed out.")
|
||||
# end accept
|
||||
|
||||
def send(self, data, flags=0):
|
||||
sock = self._sock
|
||||
if self._blocking:
|
||||
r,w,e = select.select([],[sock],[], self._timeout)
|
||||
if not w:
|
||||
raise Timeout("Send timed out")
|
||||
return sock.send(data, flags)
|
||||
# end send
|
||||
|
||||
def recv(self, bufsize, flags=0):
|
||||
sock = self._sock
|
||||
if self._blocking:
|
||||
r,w,e = select.select([sock], [], [], self._timeout)
|
||||
if not r:
|
||||
raise Timeout("Recv timed out")
|
||||
return sock.recv(bufsize, flags)
|
||||
# end recv
|
||||
|
||||
def makefile(self, flags="r", bufsize=-1):
|
||||
self._copies = self._copies +1
|
||||
return TimeoutFile(self, flags, bufsize)
|
||||
# end makefile
|
||||
|
||||
def close(self):
|
||||
if self._copies <= 0:
|
||||
self._sock.close()
|
||||
else:
|
||||
self._copies = self._copies -1
|
||||
# end close
|
||||
|
||||
# end TimeoutSocket
|
||||
|
||||
|
||||
class TimeoutFile:
|
||||
"""TimeoutFile object
|
||||
Implements a file-like object on top of TimeoutSocket.
|
||||
"""
|
||||
|
||||
def __init__(self, sock, mode="r", bufsize=4096):
|
||||
self._sock = sock
|
||||
self._bufsize = 4096
|
||||
if bufsize > 0: self._bufsize = bufsize
|
||||
if not hasattr(sock, "_inqueue"): self._sock._inqueue = ""
|
||||
|
||||
# end __init__
|
||||
|
||||
def __getattr__(self, key):
|
||||
return getattr(self._sock, key)
|
||||
# end __getattr__
|
||||
|
||||
def close(self):
|
||||
self._sock.close()
|
||||
self._sock = None
|
||||
# end close
|
||||
|
||||
def write(self, data):
|
||||
self.send(data)
|
||||
# end write
|
||||
|
||||
def read(self, size=-1):
|
||||
_sock = self._sock
|
||||
_bufsize = self._bufsize
|
||||
while 1:
|
||||
datalen = len(_sock._inqueue)
|
||||
if datalen >= size >= 0:
|
||||
break
|
||||
bufsize = _bufsize
|
||||
if size > 0:
|
||||
bufsize = min(bufsize, size - datalen )
|
||||
buf = self.recv(bufsize)
|
||||
if not buf:
|
||||
break
|
||||
_sock._inqueue = _sock._inqueue + buf
|
||||
data = _sock._inqueue
|
||||
_sock._inqueue = ""
|
||||
if size > 0 and datalen > size:
|
||||
_sock._inqueue = data[size:]
|
||||
data = data[:size]
|
||||
return data
|
||||
# end read
|
||||
|
||||
def readline(self, size=-1):
|
||||
_sock = self._sock
|
||||
_bufsize = self._bufsize
|
||||
while 1:
|
||||
idx = string.find(_sock._inqueue, "\n")
|
||||
if idx >= 0:
|
||||
break
|
||||
datalen = len(_sock._inqueue)
|
||||
if datalen >= size >= 0:
|
||||
break
|
||||
bufsize = _bufsize
|
||||
if size > 0:
|
||||
bufsize = min(bufsize, size - datalen )
|
||||
buf = self.recv(bufsize)
|
||||
if not buf:
|
||||
break
|
||||
_sock._inqueue = _sock._inqueue + buf
|
||||
|
||||
data = _sock._inqueue
|
||||
_sock._inqueue = ""
|
||||
if idx >= 0:
|
||||
idx = idx + 1
|
||||
_sock._inqueue = data[idx:]
|
||||
data = data[:idx]
|
||||
elif size > 0 and datalen > size:
|
||||
_sock._inqueue = data[size:]
|
||||
data = data[:size]
|
||||
return data
|
||||
# end readline
|
||||
|
||||
def readlines(self, sizehint=-1):
|
||||
result = []
|
||||
data = self.read()
|
||||
while data:
|
||||
idx = string.find(data, "\n")
|
||||
if idx >= 0:
|
||||
idx = idx + 1
|
||||
result.append( data[:idx] )
|
||||
data = data[idx:]
|
||||
else:
|
||||
result.append( data )
|
||||
data = ""
|
||||
return result
|
||||
# end readlines
|
||||
|
||||
def flush(self): pass
|
||||
|
||||
# end TimeoutFile
|
||||
|
||||
|
||||
#
|
||||
# Silently replace the socket() builtin function with
|
||||
# our timeoutsocket() definition.
|
||||
#
|
||||
if not hasattr(socket, "_no_timeoutsocket"):
|
||||
socket._no_timeoutsocket = socket.socket
|
||||
socket.socket = timeoutsocket
|
||||
del socket
|
||||
socket = timeoutsocket
|
||||
# Finis
|
||||
47
linkchecker
47
linkchecker
|
|
@ -22,10 +22,9 @@ import sys
|
|||
if not hasattr(sys, 'version_info') or sys.version_info<(2, 3, 0, 'final', 0):
|
||||
raise SystemExit, "This program requires Python 2.3 or later."
|
||||
|
||||
import getopt, re, os, pprint, linkcheck
|
||||
import linkcheck.timeoutsocket
|
||||
import getopt, re, os, pprint, socket, linkcheck
|
||||
# set default 30 seconds timeout
|
||||
linkcheck.timeoutsocket.setDefaultSocketTimeout(30)
|
||||
socket.setdefaulttimeout(30)
|
||||
# import several helper debugging things
|
||||
from linkcheck.debug import *
|
||||
from linkcheck.log import LoggerKeys
|
||||
|
|
@ -111,7 +110,7 @@ For single-letter option arguments the space is not a necessity. So
|
|||
To disable threading specify a non-positive number.
|
||||
--timeout=secs
|
||||
Set the timeout for TCP connection attempts in seconds. The default
|
||||
timeout is 10 seconds.
|
||||
timeout is 30 seconds.
|
||||
-u name, --user=name
|
||||
Try username name for HTTP and FTP authorization.
|
||||
Default is 'anonymous'. See also -p.
|
||||
|
|
@ -265,12 +264,12 @@ if get_debuglevel() > 0:
|
|||
# apply commandline options and arguments
|
||||
_user = "anonymous"
|
||||
_password = "guest@"
|
||||
constructauth = 0
|
||||
do_profile = 0
|
||||
constructauth = False
|
||||
do_profile = False
|
||||
for opt,arg in options:
|
||||
if opt=="-a" or opt=="--anchors":
|
||||
config["anchors"] = "True"
|
||||
config["warnings"] = "True"
|
||||
config["anchors"] = True
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="-e" or opt=="--extern":
|
||||
config["externlinks"].append(linkcheck.getLinkPat(arg))
|
||||
|
|
@ -286,7 +285,7 @@ for opt,arg in options:
|
|||
"'-o, --output'")
|
||||
|
||||
elif opt=="-F" or opt=="--file-output":
|
||||
ns = {'fileoutput':1}
|
||||
ns = {'fileoutput': 1}
|
||||
try:
|
||||
type, ns['filename'] = arg.split('/', 1)
|
||||
if not ns['filename']: raise ValueError
|
||||
|
|
@ -298,23 +297,23 @@ for opt,arg in options:
|
|||
"'-F, --file-output'")
|
||||
|
||||
elif opt=="-I" or opt=="--interactive":
|
||||
config['interactive'] = "True"
|
||||
config['interactive'] = True
|
||||
|
||||
elif opt=="-i" or opt=="--intern":
|
||||
config["internlinks"].append(linkcheck.getLinkPat(arg))
|
||||
|
||||
elif opt=="-l" or opt=="--denyallow":
|
||||
config["denyallow"] = "True"
|
||||
config["denyallow"] = True
|
||||
|
||||
elif opt=="-N" or opt=="--nntp-server":
|
||||
config["nntpserver"] = arg
|
||||
|
||||
elif opt=="--no-anchor-caching":
|
||||
config["noanchorcaching"] = "True"
|
||||
config["noanchorcaching"] = True
|
||||
|
||||
elif opt=="-p" or opt=="--password":
|
||||
_password = arg
|
||||
constructauth = "True"
|
||||
constructauth = True
|
||||
|
||||
elif opt=="-P" or opt=="--pause":
|
||||
try:
|
||||
|
|
@ -329,10 +328,10 @@ for opt,arg in options:
|
|||
(`arg`, "'-P, --pause'"))
|
||||
|
||||
elif opt=="--profile":
|
||||
do_profile = "True"
|
||||
do_profile = True
|
||||
|
||||
elif opt=="-q" or opt=="--quiet":
|
||||
config["quiet"] = "True"
|
||||
config["quiet"] = True
|
||||
|
||||
elif opt=="-r" or opt=="--recursion-level":
|
||||
try:
|
||||
|
|
@ -348,7 +347,7 @@ for opt,arg in options:
|
|||
elif opt=="-R" or opt=="--robots-txt": pass
|
||||
|
||||
elif opt=="-s" or opt=="--strict":
|
||||
config["strict"] = "True"
|
||||
config["strict"] = True
|
||||
|
||||
elif opt=="-t" or opt=="--threads":
|
||||
try:
|
||||
|
|
@ -370,18 +369,18 @@ for opt,arg in options:
|
|||
if timeout <= 0:
|
||||
printUsage(i18n._("Illegal argument %s for option %s") % \
|
||||
(`arg`, "'--timeout'"))
|
||||
linkcheck.timeoutsocket.setDefaultSocketTimeout(timeout)
|
||||
socket.setdefaulttimeout(timeout)
|
||||
|
||||
elif opt=="-u" or opt=="--user":
|
||||
_user = arg
|
||||
constructauth = "True"
|
||||
constructauth = True
|
||||
|
||||
elif opt=="-V" or opt=="--version":
|
||||
printVersion()
|
||||
|
||||
elif opt=="-v" or opt=="--verbose":
|
||||
config["verbose"] = "True"
|
||||
config["warnings"] = "True"
|
||||
config["verbose"] = True
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="--viewprof":
|
||||
viewprof()
|
||||
|
|
@ -391,14 +390,14 @@ for opt,arg in options:
|
|||
util1.abbuzze()
|
||||
sys.exit(0)
|
||||
elif opt=="-w" or opt=="--warnings":
|
||||
config["warnings"] = "True"
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="-W" or opt=="--warning-regex":
|
||||
config["warningregex"] = re.compile(arg)
|
||||
config["warnings"] = "True"
|
||||
config["warnings"] = True
|
||||
|
||||
elif opt=="-C" or opt=="--cookies":
|
||||
config['cookies'] = "True"
|
||||
config['cookies'] = True
|
||||
|
||||
if constructauth:
|
||||
config["authentication"].insert(0, {'pattern': re.compile(".*"),
|
||||
|
|
@ -431,7 +430,7 @@ for url in args:
|
|||
url = "http://%s"%url
|
||||
elif url.startswith("ftp."):
|
||||
url = "ftp://%s"%url
|
||||
config.appendUrl(UrlData.GetUrlDataFrom(url, 0, config, cmdline="True"))
|
||||
config.appendUrl(UrlData.GetUrlDataFrom(url, 0, config, cmdline=True))
|
||||
|
||||
############################# check the urls ################################
|
||||
if do_profile:
|
||||
|
|
|
|||
|
|
@ -116,7 +116,7 @@ To disable threading specify a non-positive number.
|
|||
.TP
|
||||
\fB--timeout=\fIsecs\fP
|
||||
Set the timeout for connection attempts in seconds. The default timeout
|
||||
is 10 seconds.
|
||||
is 30 seconds.
|
||||
.TP
|
||||
\fB-u \fIname\fP, \fB--user=\fIname\fP
|
||||
Try username \fIname\fP for HTTP and FTP authorization.
|
||||
|
|
|
|||
Loading…
Reference in a new issue