mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-03 20:34:43 +00:00
Remove addrinfo cache.
This commit is contained in:
parent
20be0f2519
commit
e1e80b7dd5
7 changed files with 20 additions and 155 deletions
80
linkcheck/cache/addrinfo.py
vendored
80
linkcheck/cache/addrinfo.py
vendored
|
|
@ -1,80 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2006-2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Cache for DNS lookups.
|
||||
"""
|
||||
import socket
|
||||
import sys
|
||||
from .. import LinkCheckerError
|
||||
from ..lock import get_lock
|
||||
from ..containers import LFUCache
|
||||
from ..decorators import synchronized
|
||||
|
||||
_lock = get_lock("addrinfo")
|
||||
|
||||
class AddrInfo(object):
|
||||
"""Cache for socket.getaddrinfo() results."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize address info cache and cache statistics."""
|
||||
self.addrinfos = LFUCache(size=100)
|
||||
self.misses = self.hits = 0
|
||||
|
||||
def getaddrinfo(self, host, port):
|
||||
"""Determine address information for given host and port for
|
||||
streaming sockets (SOCK_STREAM).
|
||||
Already cached information is used."""
|
||||
key = u"%s:%s" % (unicode(host), unicode(port))
|
||||
if key in self.addrinfos:
|
||||
self.hits += 1
|
||||
value = self.addrinfos[key]
|
||||
else:
|
||||
self.misses += 1
|
||||
# check if it's an ascii host
|
||||
if isinstance(host, unicode):
|
||||
try:
|
||||
host = host.encode('ascii')
|
||||
except UnicodeEncodeError:
|
||||
pass
|
||||
try:
|
||||
value = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
|
||||
except socket.error:
|
||||
value = sys.exc_info()[1]
|
||||
except UnicodeError, msg:
|
||||
args = dict(host=host, msg=str(msg))
|
||||
value = LinkCheckerError(_("could not parse host %(host)r: %(msg)s") % args)
|
||||
self.addrinfos[key] = value
|
||||
if isinstance(value, Exception):
|
||||
raise value
|
||||
return value
|
||||
|
||||
_addrinfo = AddrInfo()
|
||||
|
||||
@synchronized(_lock)
|
||||
def getaddrinfo(host, port):
|
||||
"""Determine address information for given host and port for
|
||||
streaming sockets (SOCK_STREAM).
|
||||
Already cached information is used."""
|
||||
return _addrinfo.getaddrinfo(host, port)
|
||||
|
||||
@synchronized(_lock)
|
||||
def getstats():
|
||||
"""Get cache statistics.
|
||||
@return: hits and misses
|
||||
@rtype tuple(int, int)
|
||||
"""
|
||||
return _addrinfo.hits, _addrinfo.misses
|
||||
|
|
@ -526,8 +526,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledc
|
|||
self.get_http_object(scheme, host, port)
|
||||
self.add_connection_request()
|
||||
self.add_connection_headers()
|
||||
buffering = True
|
||||
response = self.url_connection.getresponse(buffering)
|
||||
response = self.url_connection.getresponse(buffering=True)
|
||||
self.headers = response.msg
|
||||
self.content_type = None
|
||||
self.persistent = not response.will_close
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ import time
|
|||
import threading
|
||||
from .. import log, LOG_CHECK
|
||||
from ..decorators import synchronized
|
||||
from ..cache import urlqueue, addrinfo, content
|
||||
from ..cache import urlqueue, content
|
||||
from . import logger, status, checker, cleanup
|
||||
|
||||
|
||||
|
|
@ -145,7 +145,5 @@ class Aggregate (object):
|
|||
logger.
|
||||
"""
|
||||
robots_txt_stats = self.robots_txt.hits, self.robots_txt.misses
|
||||
addrinfo_stats = addrinfo.getstats()
|
||||
download_stats = self.downloaded_bytes
|
||||
self.logger.add_statistics(robots_txt_stats, addrinfo_stats,
|
||||
download_stats)
|
||||
self.logger.add_statistics(robots_txt_stats, download_stats)
|
||||
|
|
|
|||
|
|
@ -46,10 +46,10 @@ class Logger (object):
|
|||
for logger in self.loggers:
|
||||
logger.end_output()
|
||||
|
||||
def add_statistics(self, robots_txt_stats, addrinfo_stats, download_stats):
|
||||
def add_statistics(self, robots_txt_stats, download_stats):
|
||||
"""Add statistics to logger."""
|
||||
for logger in self.loggers:
|
||||
logger.add_statistics(robots_txt_stats, addrinfo_stats, download_stats)
|
||||
logger.add_statistics(robots_txt_stats, download_stats)
|
||||
|
||||
def do_print (self, url_data):
|
||||
"""Determine if URL entry should be logged or not."""
|
||||
|
|
|
|||
|
|
@ -81,7 +81,6 @@ try:
|
|||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
import cache.addrinfo
|
||||
|
||||
__all__ = ["HTTP", "HTTPResponse", "HTTPConnection",
|
||||
"HTTPException", "NotConnected", "UnknownProtocol",
|
||||
|
|
@ -366,7 +365,9 @@ class HTTPResponse:
|
|||
|
||||
def _read_status(self):
|
||||
# Initialize with Simple-Response defaults
|
||||
line = self.fp.readline()
|
||||
line = self.fp.readline(_MAXLINE + 1)
|
||||
if len(line) > _MAXLINE:
|
||||
raise LineTooLong("header line")
|
||||
if self.debuglevel > 0:
|
||||
print "reply:", repr(line)
|
||||
if not line:
|
||||
|
|
@ -683,8 +684,9 @@ class HTTPConnection:
|
|||
strict = 0
|
||||
|
||||
def __init__(self, host, port=None, strict=None,
|
||||
timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
|
||||
timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
|
||||
self.timeout = timeout
|
||||
self.source_address = source_address
|
||||
self.sock = None
|
||||
self._buffer = []
|
||||
self.__response = None
|
||||
|
|
@ -763,26 +765,12 @@ class HTTPConnection:
|
|||
if line == '\r\n':
|
||||
break
|
||||
|
||||
|
||||
def connect(self):
|
||||
"""Connect to the host and port specified in __init__."""
|
||||
msg = "getaddrinfo returns an empty list"
|
||||
for res in cache.addrinfo.getaddrinfo(self.host, self.port):
|
||||
af, socktype, proto, canonname, sa = res
|
||||
try:
|
||||
self.sock = socket.socket(af, socktype, proto)
|
||||
if self.debuglevel > 0:
|
||||
print "connect: (%s, %s)" % (self.host, self.port)
|
||||
self.sock.connect(sa)
|
||||
except socket.error, msg:
|
||||
if self.debuglevel > 0:
|
||||
print 'connect fail:', (self.host, self.port), msg
|
||||
if self.sock is not None:
|
||||
self.sock.close()
|
||||
self.sock = None
|
||||
continue
|
||||
break
|
||||
if not self.sock:
|
||||
raise socket.error, msg
|
||||
self.sock = socket.create_connection((self.host,self.port),
|
||||
self.timeout, self.source_address)
|
||||
|
||||
if self._tunnel_host:
|
||||
self._tunnel()
|
||||
|
||||
|
|
@ -1161,7 +1149,7 @@ class HTTP:
|
|||
|
||||
### should we keep this behavior? do people use it?
|
||||
# keep the socket open (as a file), and return it
|
||||
self.file = self._conn.sock.makefile('rb')
|
||||
self.file = self._conn.sock.makefile('rb', 0)
|
||||
|
||||
# close our socket -- we want to restart after any protocol error
|
||||
self.close()
|
||||
|
|
@ -1183,7 +1171,6 @@ class HTTP:
|
|||
### do it
|
||||
self.file = None
|
||||
|
||||
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
|
|
@ -1196,8 +1183,9 @@ else:
|
|||
|
||||
def __init__(self, host, port=None, key_file=None, cert_file=None,
|
||||
strict=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
||||
ca_certs=None):
|
||||
HTTPConnection.__init__(self, host, port, strict, timeout)
|
||||
source_address=None, ca_certs=None):
|
||||
HTTPConnection.__init__(self, host, port, strict, timeout,
|
||||
source_address)
|
||||
self.key_file = key_file
|
||||
self.cert_file = cert_file
|
||||
self.ca_certs = ca_certs
|
||||
|
|
@ -1209,7 +1197,8 @@ else:
|
|||
def connect(self):
|
||||
"Connect to a host on a given (SSL) port."
|
||||
|
||||
sock = socket.create_connection((self.host, self.port), self.timeout)
|
||||
sock = socket.create_connection((self.host, self.port),
|
||||
self.timeout, self.source_address)
|
||||
if self._tunnel_host:
|
||||
self.sock = sock
|
||||
self._tunnel()
|
||||
|
|
|
|||
|
|
@ -249,8 +249,6 @@ class TextLogger (Logger):
|
|||
self.writeln(_("Downloaded: %s") % strformat.strsize(self.stats.downloaded_bytes))
|
||||
hitsmisses = strformat.str_cache_stats(*self.stats.robots_txt_stats)
|
||||
self.writeln(_("Robots.txt cache: %s") % hitsmisses)
|
||||
hitsmisses = strformat.str_cache_stats(*self.stats.addrinfo_stats)
|
||||
self.writeln(_("DNS cache: %s") % hitsmisses)
|
||||
if len(self.stats.domains) > 1:
|
||||
self.writeln(_("Number of domains: %d") % len(self.stats.domains))
|
||||
if self.stats.number > 0:
|
||||
|
|
|
|||
39
tests/cache/test_addrinfo.py
vendored
39
tests/cache/test_addrinfo.py
vendored
|
|
@ -1,39 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2011 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test address info caching.
|
||||
"""
|
||||
import unittest
|
||||
import socket
|
||||
from linkcheck import LinkCheckerError
|
||||
from linkcheck.cache.addrinfo import getaddrinfo
|
||||
|
||||
class TestAddrinfoCache (unittest.TestCase):
|
||||
"""Test address info caching."""
|
||||
|
||||
def test_addrinfo_cache1 (self):
|
||||
# pure ascii hostname with >63 chars
|
||||
host = u"a"*64
|
||||
port = 80
|
||||
# must not raise UnicodeEncodeError
|
||||
self.assertRaises(socket.error, getaddrinfo, host, port)
|
||||
|
||||
def test_addrinfo_cache2 (self):
|
||||
# non-ascii hostname with >63 chars
|
||||
host = u"ä"*64
|
||||
port = 80
|
||||
self.assertRaises(LinkCheckerError, getaddrinfo, host, port)
|
||||
Loading…
Reference in a new issue