diff --git a/TODO b/TODO
index 8eb1b90d..7d460a51 100644
--- a/TODO
+++ b/TODO
@@ -1,6 +1,11 @@
High priority
-o Use Python 2.0 features
+o Proxy geht nicht:
+ - getrennter http/https/ftp proxy
+ - environment Variablen werden bei RobotParser benutzt, also muß ich
+ das auch machen.
+
+o Robot parser testen
o I want to be able to supply a "break" command even when multiple
threads are running.
diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py
index ed97ab7c..947a8ba5 100644
--- a/linkcheck/HttpUrlData.py
+++ b/linkcheck/HttpUrlData.py
@@ -196,7 +196,9 @@ class HttpUrlData(UrlData):
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
rp = robotparser.RobotFileParser()
rp.set_url(roboturl)
+ print roboturl
rp.read()
+ print "2"
robotsTxt = rp.can_fetch(Config.UserAgent, self.url)
config.robotsTxtCache_set(self.urlTuple[0:2], robotsTxt)
return config.robotsTxtCache_get(self.url)
diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py
index 404bd2e8..12ca1675 100644
--- a/linkcheck/UrlData.py
+++ b/linkcheck/UrlData.py
@@ -37,8 +37,8 @@ _linkMatcher = r"""
< # open tag
\s* # whitespace
%s # tag name
- \s+ # whitespace
[^>]*? # skip leading attributes
+ \s+ # whitespace
%s # attrib name
\s* # whitespace
= # equal sign
diff --git a/test/test1.html b/test/test1.html
index 46ad1564..bf2371da 100644
--- a/test/test1.html
+++ b/test/test1.html
@@ -6,6 +6,7 @@ Just some HTTP links
+
diff --git a/test/test2.html b/test/test2.html
index 016cc7ac..e582702a 100644
--- a/test/test2.html
+++ b/test/test2.html
@@ -16,6 +16,11 @@
-< img src="blubb">
-< link href="blubb">
-< script src="bla">
+< img src="blubb_image">
+< img lowsrc="blubb_lowimage">
+< link href="blubb_link">
+< script src="blubb_script">
+< area href="blubb_area">
+< body background="blubb_body">
+< area href="blubb_href">
+< form action="blubb_action">