mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-03 06:30:23 +00:00
proxy config
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@185 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
b57f1d2274
commit
b6de623f4e
5 changed files with 18 additions and 5 deletions
7
TODO
7
TODO
|
|
@ -1,6 +1,11 @@
|
|||
High priority
|
||||
|
||||
o Use Python 2.0 features
|
||||
o Proxy geht nicht:
|
||||
- getrennter http/https/ftp proxy
|
||||
- environment Variablen werden bei RobotParser benutzt, also muß ich
|
||||
das auch machen.
|
||||
|
||||
o Robot parser testen
|
||||
|
||||
o I want to be able to supply a "break" command even when multiple
|
||||
threads are running.
|
||||
|
|
|
|||
|
|
@ -196,7 +196,9 @@ class HttpUrlData(UrlData):
|
|||
roboturl="%s://%s/robots.txt" % self.urlTuple[0:2]
|
||||
rp = robotparser.RobotFileParser()
|
||||
rp.set_url(roboturl)
|
||||
print roboturl
|
||||
rp.read()
|
||||
print "2"
|
||||
robotsTxt = rp.can_fetch(Config.UserAgent, self.url)
|
||||
config.robotsTxtCache_set(self.urlTuple[0:2], robotsTxt)
|
||||
return config.robotsTxtCache_get(self.url)
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ _linkMatcher = r"""
|
|||
< # open tag
|
||||
\s* # whitespace
|
||||
%s # tag name
|
||||
\s+ # whitespace
|
||||
[^>]*? # skip leading attributes
|
||||
\s+ # whitespace
|
||||
%s # attrib name
|
||||
\s* # whitespace
|
||||
= # equal sign
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ Just some HTTP links
|
|||
<a href="http://">
|
||||
<a href="http:/">
|
||||
<a href="http:">
|
||||
<a href="http://localhost:/">
|
||||
<a href="http://www.blubb.de/stalter&sohn">
|
||||
<a name="iswas"> <!-- anchor for test2.html -->
|
||||
<a href=http://slashdot.org/>
|
||||
|
|
|
|||
|
|
@ -16,6 +16,11 @@
|
|||
<a href="ftp://treasure.calvinsplayground.de//pub">
|
||||
<a href="ftp://treasure.calvinsplayground.de////////pub">
|
||||
<a href="ftp:///treasure.calvinsplayground.de/pub">
|
||||
< img src="blubb">
|
||||
< link href="blubb">
|
||||
< script src="bla">
|
||||
< img src="blubb_image">
|
||||
< img lowsrc="blubb_lowimage">
|
||||
< link href="blubb_link">
|
||||
< script src="blubb_script">
|
||||
< area href="blubb_area">
|
||||
< body background="blubb_body">
|
||||
< area href="blubb_href">
|
||||
< form action="blubb_action">
|
||||
|
|
|
|||
Loading…
Reference in a new issue