proxy config

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@185 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2000-11-02 08:26:14 +00:00
parent b57f1d2274
commit b6de623f4e
5 changed files with 18 additions and 5 deletions

7
TODO
View file

@ -1,6 +1,11 @@
High priority
o Use Python 2.0 features
o Proxy geht nicht:
- getrennter http/https/ftp proxy
- environment Variablen werden bei RobotParser benutzt, also muß ich
das auch machen.
o Robot parser testen
o I want to be able to supply a "break" command even when multiple
threads are running.

View file

@ -196,7 +196,9 @@ class HttpUrlData(UrlData):
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
rp = robotparser.RobotFileParser()
rp.set_url(roboturl)
print roboturl
rp.read()
print "2"
robotsTxt = rp.can_fetch(Config.UserAgent, self.url)
config.robotsTxtCache_set(self.urlTuple[0:2], robotsTxt)
return config.robotsTxtCache_get(self.url)

View file

@ -37,8 +37,8 @@ _linkMatcher = r"""
< # open tag
\s* # whitespace
%s # tag name
\s+ # whitespace
[^>]*? # skip leading attributes
\s+ # whitespace
%s # attrib name
\s* # whitespace
= # equal sign

View file

@ -6,6 +6,7 @@ Just some HTTP links
<a href="http://">
<a href="http:/">
<a href="http:">
<a href="http://localhost:/">
<a href="http://www.blubb.de/stalter&sohn">
<a name="iswas"> <!-- anchor for test2.html -->
<a href=http://slashdot.org/>

View file

@ -16,6 +16,11 @@
<a href="ftp://treasure.calvinsplayground.de//pub">
<a href="ftp://treasure.calvinsplayground.de////////pub">
<a href="ftp:///treasure.calvinsplayground.de/pub">
< img src="blubb">
< link href="blubb">
< script src="bla">
< img src="blubb_image">
< img lowsrc="blubb_lowimage">
< link href="blubb_link">
< script src="blubb_script">
< area href="blubb_area">
< body background="blubb_body">
< area href="blubb_href">
< form action="blubb_action">