diff --git a/TODO b/TODO index 1623cb6c..c472390f 100644 --- a/TODO +++ b/TODO @@ -5,7 +5,7 @@ Possible improvements people could work on: Must be thread-safe, must handle timeouts and connection expiration (HTTP: only pool persistent (ie. HTTP/1.1) connections; Keepalive header parsing). - Note: FTP connection pooling is already there, but without timeouts. + Note: FTP connection pooling is already there. - [USAGE] rethink intern/extern stuff diff --git a/linkcheck/checker/cache.py b/linkcheck/checker/cache.py index 0ee75241..82ebe31c 100644 --- a/linkcheck/checker/cache.py +++ b/linkcheck/checker/cache.py @@ -30,6 +30,8 @@ import linkcheck.containers import linkcheck.configuration import linkcheck.threader +FTP_CONNECTION_TIMEOUT = 300 + def _check_morsel (m, host, path): """ @@ -224,12 +226,18 @@ class Cache (object): try: key = (host, username, password) if key in self.ftp_connections: - conn_and_status = self.ftp_connections[key] - if conn_and_status[1] == 'busy': + conn_data = self.ftp_connections[key] + t = time.time() + if conn_data[2] - t > FTP_CONNECTION_TIMEOUT: + # timed out + del self.ftp_connections[key] + return None + if conn_data[1] == 'busy': # connection is in use return "busy" - conn_and_status[1] = 'busy' - return conn_and_status[0] + conn_data[1] = 'busy' + conn_data[2] = t + return conn_data[0] return None finally: self.lock.release() @@ -243,7 +251,7 @@ class Cache (object): key = (host, username, password) cached = key in self.ftp_connections if not cached: - self.ftp_connections[key] = [conn, 'busy'] + self.ftp_connections[key] = [conn, 'busy', time.time()] return cached finally: self.lock.release()