use boolean values, timeout changes

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@998 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2003-08-11 11:49:30 +00:00
parent b2d6226b79
commit 8c1deec0c9
8 changed files with 54 additions and 898 deletions

View file

@ -1,418 +0,0 @@
# -*- coding: iso-8859-1 -*-
"""read/write Comma Separated Values (CSV)"""
# CSV 0.17 8 June 1999 Copyright ©Laurence Tratt 1998 - 1999
# e-mail: tratt@dcs.kcl.ac.uk
# home-page: http://eh.org/~laurie/comp/python/csv/index.html
#
#
# CSV.py is copyright ©1998 - 1999 by Laurence Tratt
#
# All rights reserved
#
# Permission to use, copy, modify, and distribute this software and its
# documentation for any purpose and without fee is hereby granted, provided that
# the above copyright notice appear in all copies and that both that copyright
# notice and this permission notice appear in supporting documentation.
#
# THE AUTHOR - LAURENCE TRATT - DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
# NO EVENT SHALL THE AUTHOR FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
# ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTUOUS ACTION, ARISING OUT OF OR
# IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
#
import string, types, UserList
###################################################################################################
#
# CSV class
#
class CSV(UserList.UserList):
""" Manage a CSV (comma separated values) file
The data is held in a list.
Methods:
__init__()
load() load from file
save() save to file
input() input from string
output() save to string
append() appends one entry
__str__() printable represenation
"""
def __init__(self, separator = ','):
""" Initialise CVS class instance.
Arguments:
separator : The field delimiter. Defaults to ','
"""
UserList.UserList.__init__(self)
self.separator = separator
self.fields__title__have = self.fields__title = None
def load(self, file__data__name, fields__title__have, convert_numbers = 0, separator = None, comments = None):
""" Load up a CSV file
Arguments:
file__data__name : The name of the CSV file
fields__title__have : 0 : file has no title fields
otherwise : file has title fields
convert_numbers : 0 : store everything as string's
otherwise : store fields that can be converted
to ints or floats to that Python
type defaults to 0
separator : The field delimiter (optional)
comments : A list of strings and regular expressions to remove comments
"""
file__data = open(file__data__name, 'r')
self.input(file__data.read(-1), fields__title__have, convert_numbers, separator or self.separator, comments or ["#"])
file__data.close()
def save(self, file__data__name, separator = None):
""" Save data to CSV file.
Arguments:
file__data__name : The name of the CSV file to save to
separator : The field delimiter (optional)
"""
file__data = open(file__data__name, 'w')
file__data.write(self.output(separator or self.separator))
file__data.close()
def input(self, data, fields__title__have, convert_numbers = 0, separator = None, comments = None):
""" Take wodge of CSV data & convert it into internal format.
Arguments:
data : A string containing the CSV data
fields__title__have : 0 : file has no title fields
otherwise : file has title fields
convert_numbers : 0 : store everything as string's
otherwise : store fields that can be
converted to ints or
floats to that Python type
defaults to 0
separator : The field delimiter (Optional)
comments : A list of strings and regular expressions to remove comments
(defaults to ["#"])
"""
def line__process(line, convert_numbers, separator):
fields = []
line__pos = 0
while line__pos < len(line):
# Skip any space at the beginning of the field (if there should be leading space,
# there should be a " character in the CSV file)
while line__pos < len(line) and line[line__pos] == " ":
line__pos = line__pos + 1
field = ""
quotes__level = 0
while line__pos < len(line):
# Skip space at the end of a field (if there is trailing space, it should be
# encompassed by speech marks)
if quotes__level == 0 and line[line__pos] == " ":
line__pos__temp = line__pos
while line__pos__temp < len(line) and line[line__pos__temp] == " ":
line__pos__temp = line__pos__temp + 1
if line__pos__temp >= len(line):
break
elif line[line__pos__temp : line__pos__temp + len(separator)] == separator:
line__pos = line__pos__temp
if quotes__level == 0 and line[line__pos : line__pos + len(separator)] == separator:
break
elif line[line__pos] == "\"":
if quotes__level == 0:
quotes__level = 1
else:
quotes__level = 0
else:
field = field + line[line__pos]
line__pos = line__pos + 1
line__pos = line__pos + len(separator)
if convert_numbers:
for char in field:
if char not in "0123456789.-":
fields.append(field)
break
else:
try:
if "." not in field:
fields.append(int(field))
else:
fields.append(float(field))
except:
fields.append(field)
else:
fields.append(field)
if line[-len(separator)] == separator:
fields.append(field)
return fields
separator = separator or self.separator
comments = comments or ["#"]
self.fields__title__have = fields__title__have
# Remove comments from the input file
comments__strings = []
for comment in comments:
if type(comment) == types.InstanceType:
data = comment.sub("", data)
elif type(comment) == types.StringType:
comments__strings.append(comment)
else:
raise Exception("Invalid comment type '" + comment + "'")
lines = map(string.strip, data.splitlines())
# Remove all comments that are of type string
lines__pos = 0
while lines__pos < len(lines):
line = lines[lines__pos]
line__pos = 0
while line__pos < len(line) and line[line__pos] == " ":
line__pos = line__pos + 1
found_comment = 0
for comment in comments__strings:
if line__pos + len(comment) < len(line) and line[line__pos : line__pos + len(comment)] == comment:
found_comment = 1
break
if found_comment:
del lines[lines__pos]
else:
lines__pos += 1
# Process the input data
if fields__title__have:
self.fields__title = line__process(lines[0], convert_numbers, separator)
pos__start = 1
else:
self.fields__title = []
pos__start = 0
self.data = []
for line in lines[pos__start : ]:
if line != "":
self.data.append(Entry(line__process(line, convert_numbers, separator), self.fields__title))
def output(self, separator = None):
""" Convert internal data into CSV string.
Arguments:
separator : The field delimiter (optional)
Returns:
String containing CSV data
"""
separator = separator or self.separator
def line__make(entry, separator = separator):
str = ""
done__any = 0
for field in entry:
if done__any:
str += separator
else:
done__any = 1
if type(field) != types.StringType:
field = `field`
if len(field) > 0 and (string.find(field, separator) != -1 or (field[0] == " " or field[-1] == " ")):
str += '"%s"'%field
else:
str += field
return str
if self.fields__title__have:
str = line__make(self.fields__title) + "\n\n"
else:
str = ""
str += string.join(map(line__make, self.data), "\n") + "\n"
return str
def append(self, entry):
""" Add an entry. """
if self.fields__title:
entry.fields__title = self.fields__title
self.data.append(entry)
def field__append(self, func, field__title = None):
""" Append a field with values specified by a function
Arguments:
func : Function to be called func(entry) to get the value of the new field
field__title : Name of new field (if applicable)
"""
for data__pos in range(len(self)):
entry = self.data[data__pos]
entry.append(func(entry))
self.data[data__pos] = entry
if self.fields__title__have:
self.fields__title.append(field__title)
def duplicates__eliminate(self):
""" Eliminate duplicates (this may result in a reordering of the entries) """
# To eliminate duplicates, we first get Python to sort the list for us; then all we have to
# do is to check to see whether consecutive elements are the same, and delete them
# This give us O(<sort>) * O(n) rather than the more obvious O(n * n) speed algorithm
# XXX Could be done more efficiently for multiplicate duplicates by deleting a slice of
# similar elements rather than deleting them individually
self.sort()
data__pos = 1
entry__last = self.data[0]
while data__pos < len(self.data):
if self.data[data__pos] == entry__last:
del self.data[data__pos]
else:
entry__last = self.data[data__pos]
data__pos = data__pos + 1
def __str__(self):
""" Construct a printable representation of the internal data. """
columns__width = []
# Work out the maximum width of each column
for column in range(len(self.data[0])):
if self.fields__title__have:
width = len(`self.fields__title[column]`)
else:
width = 0
for entry in self:
width__possible = len(`entry.data[column]`)
if width__possible > width:
width = width__possible
columns__width.append(width)
if self.fields__title__have:
str = string.join(map(string.ljust, self.fields__title, columns__width), " ") + "\n\n"
else:
str = ""
for entry in self:
str += string.join(map(string.ljust, map(lambda a : ((type(a)==types.StringType) and [a] or [eval("`a`")])[0], entry.data), columns__width), " ") + "\n"
return str
###################################################################################################
#
# CSV data entry class
#
#
class Entry (UserList.UserList):
""" CSV data entry, UserList subclass.
Has the same properties as a list, but has a few dictionary
like properties for easy access of fields if they have titles.
Methods(Override):
__init__
__getitem__
__setitem__
__delitem__
"""
def __init__(self, fields, fields__title = None):
""" Initialise with fields data and field title.
Arguments:
fields : a list containing the data for each field
of this entry
fields__title : a list with the titles of each field
(an empty list means there are no titles)
"""
UserList.UserList.__init__(self, fields)
if fields__title != None:
self.fields__title = fields__title
else:
self.fields__title = []
def __getitem__(self, x):
if type(x) == types.IntType:
return self.data[x]
else:
return self.data[self.fields__title.index(x)]
def __setitem__(self, x, item):
if type(x) == types.IntType:
self.data[x] = item
else:
self.data[self.fields__title.index(x)] = item
def __delitem__(self, x):
if type(x) == types.IntType:
del self.data[x]
else:
del self.data[self.fields__title.index(x)]
def __str__(self):
return `self.data`

View file

@ -121,7 +121,7 @@ class HttpUrlData (ProxyUrlData):
response = self._getHttpResponse()
self.headers = response.msg
debug(BRING_IT_ON, response.status, response.reason, self.headers)
has301status = 0
has301status = False
while 1:
# proxy enforcement (overrides standard proxy)
if response.status == 305 and self.headers:
@ -157,13 +157,13 @@ class HttpUrlData (ProxyUrlData):
self.setWarning(i18n._("A HTTP 301 redirection occured and the url has no "
"trailing / at the end. All urls which point to (home) "
"directories should end with a / to avoid redirection."))
has301status = 1
has301status = True
self.aliases.append(redirected)
# check cache again on possibly changed URL
key = self.getCacheKey()
if self.config.urlCache_has_key(key):
self.copyFrom(self.config.urlCache_get(key))
self.cached = 1
self.cached = True
self.logMe()
return
# check if we still have a http url, it could be another
@ -180,7 +180,7 @@ class HttpUrlData (ProxyUrlData):
# append new object to queue
self.config.appendUrl(newobj)
# pretend to be finished and logged
self.cached = 1
self.cached = True
return
# new response data
response = self._getHttpResponse()
@ -335,7 +335,7 @@ class HttpUrlData (ProxyUrlData):
def getContent (self):
if not self.has_content:
self.has_content = 1
self.has_content = True
self.closeConnection()
t = time.time()
response = self._getHttpResponse("GET")
@ -357,16 +357,16 @@ class HttpUrlData (ProxyUrlData):
def isHtml (self):
if not (self.valid and self.headers):
return 0
return False
if self.headers.gettype()[:9]!="text/html":
return 0
return False
encoding = self.headers.get("Content-Encoding")
if encoding and encoding not in _supported_encodings and \
encoding!='identity':
self.setWarning(i18n._('Unsupported content encoding %s.')%\
`encoding`)
return 0
return 1
return False
return True
def getRobotsTxtUrl (self):

View file

@ -26,7 +26,7 @@ from urllib import splituser, splithost, splitport, unquote
from linkcheck import DNS, LinkCheckerError, getLinkPat
DNS.DiscoverNameServers()
import Config, StringUtil, linkname, test_support, timeoutsocket
import Config, StringUtil, linkname, test_support
from linkparse import LinkParser
from debug import *
@ -84,7 +84,7 @@ ExcList = [
ValueError, # from httplib.py
LinkCheckerError,
DNS.Error,
timeoutsocket.Timeout,
socket.timeout,
socket.error,
select.error,
]
@ -182,7 +182,7 @@ class UrlData:
self.validString = i18n._("Valid")
self.warningString = None
self.infoString = None
self.valid = 1
self.valid = True
self.url = None
self.line = line
self.column = column
@ -190,28 +190,28 @@ class UrlData:
self.dltime = -1
self.dlsize = -1
self.checktime = 0
self.cached = 0
self.cached = True
self.urlConnection = None
self.extern = (1, 0)
self.data = None
self.has_content = 0
self.has_content = False
url = get_absolute_url(self.urlName, self.baseRef, self.parentName)
# assume file link if no scheme is found
self.scheme = url.split(":", 1)[0] or "file"
def setError (self, s):
self.valid=0
self.valid = False
self.errorString = i18n._("Error")+": "+s
def setValid (self, s):
self.valid=1
self.valid = True
self.validString = i18n._("Valid")+": "+s
def isHtml (self):
return 0
return False
def setWarning (self, s):
@ -318,7 +318,7 @@ class UrlData:
for key in self.getCacheKeys():
if self.config.urlCache_has_key(key):
self.copyFrom(self.config.urlCache_get(key))
self.cached = 1
self.cached = True
self.logMe()
return
@ -384,7 +384,7 @@ class UrlData:
if not self.cached:
for key in self.getCacheKeys():
self.config.urlCache_set(key, self)
self.cached = 1
self.cached = True
def getCacheKeys (self):
@ -470,13 +470,13 @@ class UrlData:
def hasContent (self):
"""indicate wether url getContent() can be called"""
return 1
return True
def getContent (self):
"""Precondition: urlConnection is an opened URL."""
if not self.has_content:
self.has_content = 1
self.has_content = True
t = time.time()
self.data = self.urlConnection.read()
self.dltime = time.time() - t

View file

@ -20,15 +20,15 @@ class LinkCheckerError (Exception):
pass
import re, i18n
def getLinkPat (arg, strict=0):
def getLinkPat (arg, strict=False):
"""get a link pattern matcher for intern/extern links"""
debug(BRING_IT_ON, "Link pattern", `arg`)
if arg[0:1] == '!':
pattern = arg[1:]
negate = 1
negate = True
else:
pattern = arg
negate = 0
negate = False
return {
"pattern": re.compile(pattern),
"negate": negate,

View file

@ -42,10 +42,10 @@ def startoutput (out=sys.stdout):
def checkaccess (out=sys.stdout, hosts=[], servers=[], env=os.environ):
if os.environ.get('REMOTE_ADDR') in hosts and \
os.environ.get('SERVER_ADDR') in servers:
return 1
return True
logit({}, env)
printError(out, "Access denied")
return 0
return False
def checklink (out=sys.stdout, form={}, env=os.environ):
@ -59,16 +59,16 @@ def checklink (out=sys.stdout, form={}, env=os.environ):
config["recursionlevel"] = int(form["level"].value)
config["log"] = config.newLogger('html', {'fd': out})
config.disableThreading()
if form.has_key('strict'): config['strict'] = 1
if form.has_key("anchors"): config["anchors"] = 1
if not form.has_key("errors"): config["verbose"] = 1
if form.has_key('strict'): config['strict'] = True
if form.has_key("anchors"): config["anchors"] = True
if not form.has_key("errors"): config["verbose"] = True
if form.has_key("intern"):
pat = "^(ftp|https?)://"+re.escape(getHostName(form))
else:
pat = ".+"
config["internlinks"].append(getLinkPat(pat))
# avoid checking of local files
config["externlinks"].append(getLinkPat("^file:", strict=1))
config["externlinks"].append(getLinkPat("^file:", strict=True))
# start checking
config.appendUrl(GetUrlDataFrom(form["url"].value, 0, config))
checkUrls(config)

View file

@ -1,425 +0,0 @@
# -*- coding: iso-8859-1 -*-
####
# Copyright 2000,2001 by Timothy O'Malley <timo@alum.mit.edu>
#
# All Rights Reserved
#
# Permission to use, copy, modify, and distribute this software
# and its documentation for any purpose and without fee is hereby
# granted, provided that the above copyright notice appear in all
# copies and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Timothy O'Malley not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
# SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
# AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR
# ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
# PERFORMANCE OF THIS SOFTWARE.
#
####
"""Timeout Socket
This module enables a timeout mechanism on all TCP connections. It
does this by inserting a shim into the socket module. After this module
has been imported, all socket creation goes through this shim. As a
result, every TCP connection will support a timeout.
The beauty of this method is that it immediately and transparently
enables the entire python library to support timeouts on TCP sockets.
As an example, if you wanted to SMTP connections to have a 20 second
timeout:
import timeoutsocket
import smtplib
timeoutsocket.setDefaultSocketTimeout(20)
The timeout applies to the socket functions that normally block on
execution: read, write, connect, and accept. If any of these
operations exceeds the specified timeout, the exception Timeout
will be raised.
The default timeout value is set to None. As a result, importing
this module does not change the default behavior of a socket. The
timeout mechanism only activates when the timeout has been set to
a numeric value. (This behavior mimics the behavior of the
select.select() function.)
This module implements two classes: TimeoutSocket and TimeoutFile.
The TimeoutSocket class defines a socket-like object that attempts to
avoid the condition where a socket may block indefinitely. The
TimeoutSocket class raises a Timeout exception whenever the
current operation delays too long.
The TimeoutFile class defines a file-like object that uses the TimeoutSocket
class. When the makefile() method of TimeoutSocket is called, it returns
an instance of a TimeoutFile.
Each of these objects adds two methods to manage the timeout value:
get_timeout() --> returns the timeout of the socket or file
set_timeout() --> sets the timeout of the socket or file
As an example, one might use the timeout feature to create httplib
connections that will timeout after 30 seconds:
import timeoutsocket
import httplib
H = httplib.HTTP("www.python.org")
H.sock.set_timeout(30)
Note: When used in this manner, the connect() routine may still
block because it happens before the timeout is set. To avoid
this, use the 'timeoutsocket.setDefaultSocketTimeout()' function.
Good Luck!
"""
__version__ = "$Revision$"
__author__ = "Timothy O'Malley <timo@alum.mit.edu>"
#
# Imports
#
import select, string
import socket
if not hasattr(socket, "_no_timeoutsocket"):
_socket = socket.socket
else:
_socket = socket._no_timeoutsocket
#
# Set up constants to test for Connected and Blocking operations.
# We delete 'os' and 'errno' to keep our namespace clean(er).
# Thanks to Alex Martelli and G. Li for the Windows error codes.
#
import os
if os.name == "nt":
_IsConnected = ( 10022, 10056 )
_ConnectBusy = ( 10035, )
_AcceptBusy = ( 10035, )
else:
import errno
_IsConnected = ( errno.EISCONN, )
_ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK )
_AcceptBusy = ( errno.EAGAIN, errno.EWOULDBLOCK )
del errno
del os
#
# Default timeout value for ALL TimeoutSockets
#
_DefaultTimeout = None
def setDefaultSocketTimeout(timeout):
global _DefaultTimeout
_DefaultTimeout = timeout
def getDefaultSocketTimeout():
return _DefaultTimeout
#
# Exceptions for socket errors and timeouts
#
Error = socket.error
class Timeout(Exception):
pass
#
# Factory function
#
from socket import AF_INET, SOCK_STREAM
def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None):
if family != AF_INET or type != SOCK_STREAM:
if proto:
return _socket(family, type, proto)
else:
return _socket(family, type)
return TimeoutSocket( _socket(family, type), _DefaultTimeout )
# end timeoutsocket
#
# The TimeoutSocket class definition
#
class TimeoutSocket:
"""TimeoutSocket object
Implements a socket-like object that raises Timeout whenever
an operation takes too long.
The definition of 'too long' can be changed using the
set_timeout() method.
"""
_copies = 0
_blocking = 1
def __init__(self, sock, timeout):
self._sock = sock
self._timeout = timeout
# end __init__
def __getattr__(self, key):
return getattr(self._sock, key)
# end __getattr__
def get_timeout(self):
return self._timeout
# end set_timeout
def set_timeout(self, timeout=None):
self._timeout = timeout
# end set_timeout
def setblocking(self, blocking):
self._blocking = blocking
return self._sock.setblocking(blocking)
# end set_timeout
def connect_ex(self, addr):
errcode = 0
try:
self.connect(addr)
except Error, why:
errcode = why[0]
return errcode
# end connect_ex
def connect(self, addr, port=None, dumbhack=None):
# In case we were called as connect(host, port)
if port != None: addr = (addr, port)
# Shortcuts
sock = self._sock
timeout = self._timeout
blocking = self._blocking
# First, make a non-blocking call to connect
try:
sock.setblocking(0)
sock.connect(addr)
sock.setblocking(blocking)
return
except Error, why:
# Set the socket's blocking mode back
sock.setblocking(blocking)
# If we are not blocking, re-raise
if not blocking:
raise
# If we are already connected, then return success.
# If we got a genuine error, re-raise it.
errcode = why[0]
if dumbhack and errcode in _IsConnected:
return
elif errcode not in _ConnectBusy:
raise
# Now, wait for the connect to happen
# ONLY if dumbhack indicates this is pass number one.
# If select raises an error, we pass it on.
# Is this the right behavior?
if not dumbhack:
r,w,e = select.select([], [sock], [], timeout)
if w:
return self.connect(addr, dumbhack=1)
# If we get here, then we should raise Timeout
raise Timeout("Attempted connect to %s timed out." % str(addr) )
# end connect
def accept(self, dumbhack=None):
# Shortcuts
sock = self._sock
timeout = self._timeout
blocking = self._blocking
# First, make a non-blocking call to accept
# If we get a valid result, then convert the
# accept'ed socket into a TimeoutSocket.
# Be carefult about the blocking mode of ourselves.
try:
sock.setblocking(0)
newsock, addr = sock.accept()
sock.setblocking(blocking)
timeoutnewsock = self.__class__(newsock, timeout)
timeoutnewsock.setblocking(blocking)
return (timeoutnewsock, addr)
except Error, why:
# Set the socket's blocking mode back
sock.setblocking(blocking)
# If we are not supposed to block, then re-raise
if not blocking:
raise
# If we got a genuine error, re-raise it.
errcode = why[0]
if errcode not in _AcceptBusy:
raise
# Now, wait for the accept to happen
# ONLY if dumbhack indicates this is pass number one.
# If select raises an error, we pass it on.
# Is this the right behavior?
if not dumbhack:
r,w,e = select.select([sock], [], [], timeout)
if r:
return self.accept(dumbhack=1)
# If we get here, then we should raise Timeout
raise Timeout("Attempted accept timed out.")
# end accept
def send(self, data, flags=0):
sock = self._sock
if self._blocking:
r,w,e = select.select([],[sock],[], self._timeout)
if not w:
raise Timeout("Send timed out")
return sock.send(data, flags)
# end send
def recv(self, bufsize, flags=0):
sock = self._sock
if self._blocking:
r,w,e = select.select([sock], [], [], self._timeout)
if not r:
raise Timeout("Recv timed out")
return sock.recv(bufsize, flags)
# end recv
def makefile(self, flags="r", bufsize=-1):
self._copies = self._copies +1
return TimeoutFile(self, flags, bufsize)
# end makefile
def close(self):
if self._copies <= 0:
self._sock.close()
else:
self._copies = self._copies -1
# end close
# end TimeoutSocket
class TimeoutFile:
"""TimeoutFile object
Implements a file-like object on top of TimeoutSocket.
"""
def __init__(self, sock, mode="r", bufsize=4096):
self._sock = sock
self._bufsize = 4096
if bufsize > 0: self._bufsize = bufsize
if not hasattr(sock, "_inqueue"): self._sock._inqueue = ""
# end __init__
def __getattr__(self, key):
return getattr(self._sock, key)
# end __getattr__
def close(self):
self._sock.close()
self._sock = None
# end close
def write(self, data):
self.send(data)
# end write
def read(self, size=-1):
_sock = self._sock
_bufsize = self._bufsize
while 1:
datalen = len(_sock._inqueue)
if datalen >= size >= 0:
break
bufsize = _bufsize
if size > 0:
bufsize = min(bufsize, size - datalen )
buf = self.recv(bufsize)
if not buf:
break
_sock._inqueue = _sock._inqueue + buf
data = _sock._inqueue
_sock._inqueue = ""
if size > 0 and datalen > size:
_sock._inqueue = data[size:]
data = data[:size]
return data
# end read
def readline(self, size=-1):
_sock = self._sock
_bufsize = self._bufsize
while 1:
idx = string.find(_sock._inqueue, "\n")
if idx >= 0:
break
datalen = len(_sock._inqueue)
if datalen >= size >= 0:
break
bufsize = _bufsize
if size > 0:
bufsize = min(bufsize, size - datalen )
buf = self.recv(bufsize)
if not buf:
break
_sock._inqueue = _sock._inqueue + buf
data = _sock._inqueue
_sock._inqueue = ""
if idx >= 0:
idx = idx + 1
_sock._inqueue = data[idx:]
data = data[:idx]
elif size > 0 and datalen > size:
_sock._inqueue = data[size:]
data = data[:size]
return data
# end readline
def readlines(self, sizehint=-1):
result = []
data = self.read()
while data:
idx = string.find(data, "\n")
if idx >= 0:
idx = idx + 1
result.append( data[:idx] )
data = data[idx:]
else:
result.append( data )
data = ""
return result
# end readlines
def flush(self): pass
# end TimeoutFile
#
# Silently replace the socket() builtin function with
# our timeoutsocket() definition.
#
if not hasattr(socket, "_no_timeoutsocket"):
socket._no_timeoutsocket = socket.socket
socket.socket = timeoutsocket
del socket
socket = timeoutsocket
# Finis

View file

@ -22,10 +22,9 @@ import sys
if not hasattr(sys, 'version_info') or sys.version_info<(2, 3, 0, 'final', 0):
raise SystemExit, "This program requires Python 2.3 or later."
import getopt, re, os, pprint, linkcheck
import linkcheck.timeoutsocket
import getopt, re, os, pprint, socket, linkcheck
# set default 30 seconds timeout
linkcheck.timeoutsocket.setDefaultSocketTimeout(30)
socket.setdefaulttimeout(30)
# import several helper debugging things
from linkcheck.debug import *
from linkcheck.log import LoggerKeys
@ -111,7 +110,7 @@ For single-letter option arguments the space is not a necessity. So
To disable threading specify a non-positive number.
--timeout=secs
Set the timeout for TCP connection attempts in seconds. The default
timeout is 10 seconds.
timeout is 30 seconds.
-u name, --user=name
Try username name for HTTP and FTP authorization.
Default is 'anonymous'. See also -p.
@ -265,12 +264,12 @@ if get_debuglevel() > 0:
# apply commandline options and arguments
_user = "anonymous"
_password = "guest@"
constructauth = 0
do_profile = 0
constructauth = False
do_profile = False
for opt,arg in options:
if opt=="-a" or opt=="--anchors":
config["anchors"] = "True"
config["warnings"] = "True"
config["anchors"] = True
config["warnings"] = True
elif opt=="-e" or opt=="--extern":
config["externlinks"].append(linkcheck.getLinkPat(arg))
@ -286,7 +285,7 @@ for opt,arg in options:
"'-o, --output'")
elif opt=="-F" or opt=="--file-output":
ns = {'fileoutput':1}
ns = {'fileoutput': 1}
try:
type, ns['filename'] = arg.split('/', 1)
if not ns['filename']: raise ValueError
@ -298,23 +297,23 @@ for opt,arg in options:
"'-F, --file-output'")
elif opt=="-I" or opt=="--interactive":
config['interactive'] = "True"
config['interactive'] = True
elif opt=="-i" or opt=="--intern":
config["internlinks"].append(linkcheck.getLinkPat(arg))
elif opt=="-l" or opt=="--denyallow":
config["denyallow"] = "True"
config["denyallow"] = True
elif opt=="-N" or opt=="--nntp-server":
config["nntpserver"] = arg
elif opt=="--no-anchor-caching":
config["noanchorcaching"] = "True"
config["noanchorcaching"] = True
elif opt=="-p" or opt=="--password":
_password = arg
constructauth = "True"
constructauth = True
elif opt=="-P" or opt=="--pause":
try:
@ -329,10 +328,10 @@ for opt,arg in options:
(`arg`, "'-P, --pause'"))
elif opt=="--profile":
do_profile = "True"
do_profile = True
elif opt=="-q" or opt=="--quiet":
config["quiet"] = "True"
config["quiet"] = True
elif opt=="-r" or opt=="--recursion-level":
try:
@ -348,7 +347,7 @@ for opt,arg in options:
elif opt=="-R" or opt=="--robots-txt": pass
elif opt=="-s" or opt=="--strict":
config["strict"] = "True"
config["strict"] = True
elif opt=="-t" or opt=="--threads":
try:
@ -370,18 +369,18 @@ for opt,arg in options:
if timeout <= 0:
printUsage(i18n._("Illegal argument %s for option %s") % \
(`arg`, "'--timeout'"))
linkcheck.timeoutsocket.setDefaultSocketTimeout(timeout)
socket.setdefaulttimeout(timeout)
elif opt=="-u" or opt=="--user":
_user = arg
constructauth = "True"
constructauth = True
elif opt=="-V" or opt=="--version":
printVersion()
elif opt=="-v" or opt=="--verbose":
config["verbose"] = "True"
config["warnings"] = "True"
config["verbose"] = True
config["warnings"] = True
elif opt=="--viewprof":
viewprof()
@ -391,14 +390,14 @@ for opt,arg in options:
util1.abbuzze()
sys.exit(0)
elif opt=="-w" or opt=="--warnings":
config["warnings"] = "True"
config["warnings"] = True
elif opt=="-W" or opt=="--warning-regex":
config["warningregex"] = re.compile(arg)
config["warnings"] = "True"
config["warnings"] = True
elif opt=="-C" or opt=="--cookies":
config['cookies'] = "True"
config['cookies'] = True
if constructauth:
config["authentication"].insert(0, {'pattern': re.compile(".*"),
@ -431,7 +430,7 @@ for url in args:
url = "http://%s"%url
elif url.startswith("ftp."):
url = "ftp://%s"%url
config.appendUrl(UrlData.GetUrlDataFrom(url, 0, config, cmdline="True"))
config.appendUrl(UrlData.GetUrlDataFrom(url, 0, config, cmdline=True))
############################# check the urls ################################
if do_profile:

View file

@ -116,7 +116,7 @@ To disable threading specify a non-positive number.
.TP
\fB--timeout=\fIsecs\fP
Set the timeout for connection attempts in seconds. The default timeout
is 10 seconds.
is 30 seconds.
.TP
\fB-u \fIname\fP, \fB--user=\fIname\fP
Try username \fIname\fP for HTTP and FTP authorization.