From fd2c9ad64801612dffdd752cdb210ee2dac1acaf Mon Sep 17 00:00:00 2001 From: calvin Date: Mon, 8 May 2000 22:18:45 +0000 Subject: [PATCH] see changelog git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@80 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- INSTALL | 4 +++- TODO | 8 +++----- debian/changelog | 4 ++-- linkcheck/MailtoUrlData.py | 26 +++++++++++++++++--------- linkchecker | 3 ++- 5 files changed, 27 insertions(+), 18 deletions(-) diff --git a/INSTALL b/INSTALL index b5568765..dce29b89 100644 --- a/INSTALL +++ b/INSTALL @@ -8,7 +8,9 @@ You get Python from http://www.python.org Optionally packages: Distutils >= 0.8.1 from http://www.python.org/sigs/distutils-sig/ OpenSSL from http://www.openssl.org - +You will need Perl for Win32 (available from +http://www.activestate.com/ActivePerl) if you want to install OpenSSL +on Windows! Install with Distutils: If you have the Distutils, run "python setup.py install". diff --git a/TODO b/TODO index 3ff3d38b..9413b0b8 100644 --- a/TODO +++ b/TODO @@ -3,16 +3,14 @@ High priority o DNS (and some URLs) seem to hang sometimes. This is not entirely true. The SMTP connect has a very long timeout (>200 seconds). But I can not use the signal module with threads. - For convenient timeout setting I have to wait for Python 1.6 and urllib2. + Wait for Python 1.6 and urllib2. o I want to be able to supply a "break" command even when multiple threads are running. The thread implementation of Python is somewhat sparse and does not allow suspending/stopping of threads. What I am doing is to call sys.exit(1). - This kills the entire Python interpreter. - -o Internationalization o Parse GML output and make a site map graphic (PNG format) - + Use an existing layout algorithm. + Low priority diff --git a/debian/changelog b/debian/changelog index 7ed409a7..fb1d0057 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,15 +6,15 @@ linkchecker (1.2.3) unstable; urgency=low * configuration file option for output filenames * linkchecker.bat installation support for windows * included test suite in distribution - * improved mailto: link parsing * blacklist output support * CSV output support * SSL autodetection in setup.py * added GPL copyright header to each of my .py files * i18n support and german translation of the logger outputs * use http_proxy environment variable if present + * be more RFC822 and RFC2368 compliant when scanning mail syntax - -- Bastian Kleineidam Mon, 1 May 2000 13:03:32 +0200 + -- Bastian Kleineidam Tue, 9 May 2000 00:15:12 +0200 linkchecker (1.2.2) unstable; urgency=low diff --git a/linkcheck/MailtoUrlData.py b/linkcheck/MailtoUrlData.py index 1f719288..800c325c 100644 --- a/linkcheck/MailtoUrlData.py +++ b/linkcheck/MailtoUrlData.py @@ -20,17 +20,24 @@ from HostCheckingUrlData import HostCheckingUrlData from smtplib import SMTP from UrlData import LinkCheckerException -# regular expression strings -tag_str = r"^mailto:" -adress_str = r"([a-zA-Z]['\-\w.]*)@([\w\-]+(?:\.[\w\-]+)*)" -complete_adress_str = "("+adress_str+"|[\w\-\s]*<"+adress_str+">)" -suffix_str = r"(\?.+)?" -mailto_str = tag_str+complete_adress_str+\ - "(\s*,"+complete_adress_str+")*"+suffix_str +# regular expression strings for partially RFC822 compliant adress scanning +# XXX far from complete mail adress scanning; enhance only when needed! +word = r"[\w\-%']+" +words = r"[\w\-%'\s]+" +dotwords = "("+word+r"(?:\."+word+")*) +adress = dotwords+"@"+dotwords +route_adress = words+"<"+adress+">" +mailbox = "("+adress+"|"+route_adress+")" +mailboxes = mailbox+r"?(,+"+mailbox+")*" + +# regular expression strings for RFC2368 compliant mailto: scanning +header = word+"="+word +headers = "?"+header+"(&"+header+")* +mailto = "^mailto:"+mailboxes+headers # compiled -adress_re = re.compile(adress_str) -mailto_re = re.compile(mailto_str) +adress_re = re.compile(adress) +mailto_re = re.compile(mailto) class MailtoUrlData(HostCheckingUrlData): "Url link with mailto scheme" @@ -40,6 +47,7 @@ class MailtoUrlData(HostCheckingUrlData): mo = mailto_re.match(self.urlName) if not mo: raise LinkCheckerException, "Illegal mailto link syntax" + # note: this catches also cc= headers and such! self.adresses = map(lambda x: (x[0], string.lower(x[1])), re.findall(adress_re, self.urlName)) diff --git a/linkchecker b/linkchecker index 68dcfc63..a716544c 100755 --- a/linkchecker +++ b/linkchecker @@ -172,7 +172,8 @@ except: # check for environment variables (currently only http_proxy) if os.environ.has_key("http_proxy"): - proxy = re.compile("(.+):(.+)").match(os.environ["http_proxy"]) + proxy = re.compile("(?:http://)?(.+):(.+)").match( + os.environ["http_proxy"]) if proxy: config["proxy"] = proxy.group(1) config["proxyport"] = int(proxy.group(2))