mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-28 15:48:16 +00:00
Prevent unallowed content read when checking for robots.txt allowance in HTML files.
This commit is contained in:
parent
25cde6775b
commit
61e611e4bf
3 changed files with 10 additions and 2 deletions
|
|
@ -4,6 +4,8 @@ Fixes:
|
|||
- gui: Enable the cancel button again after it has been clicked and
|
||||
disabled.
|
||||
- checking: Fix printing of active URLs on Ctrl-C.
|
||||
- checking: Check for allowed content read before trying to
|
||||
parse robots.txt allowance.
|
||||
|
||||
Changes:
|
||||
- gui: Display cancel message in progress window.
|
||||
|
|
@ -14,7 +16,8 @@ Features:
|
|||
Closes: SF bug #3040378
|
||||
- gui: Read default options from configuration file.
|
||||
Closes: SF bug #2931320
|
||||
|
||||
- config: Added configuration file option for the --cookies command line
|
||||
option.
|
||||
|
||||
5.3 "Inception" (released 29.9.2010)
|
||||
|
||||
|
|
|
|||
|
|
@ -646,6 +646,11 @@ Use URL `%(newurl)s' instead for checking.""") % {
|
|||
if self.method_get_allowed:
|
||||
super(HttpUrl, self).set_title_from_content()
|
||||
|
||||
def content_allows_robots (self):
|
||||
if not self.method_get_allowed:
|
||||
return False
|
||||
return super(HttpUrl, self).content_allows_robots()
|
||||
|
||||
def is_html (self):
|
||||
"""
|
||||
See if this URL points to a HTML file by looking at the
|
||||
|
|
|
|||
|
|
@ -565,7 +565,7 @@ class UrlBase (object):
|
|||
|
||||
def content_allows_robots (self):
|
||||
"""
|
||||
Return True if the content of this URL forbids robots to
|
||||
Return False if the content of this URL forbids robots to
|
||||
search for recursive links.
|
||||
"""
|
||||
if not self.is_html():
|
||||
|
|
|
|||
Loading…
Reference in a new issue