mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-21 14:44:44 +00:00
Catch errors when getting content for title.
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3814 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
2a3e855c48
commit
d26386d03f
1 changed files with 5 additions and 2 deletions
|
|
@ -192,7 +192,10 @@ class UrlBase (object):
|
|||
def set_title_from_content (self):
|
||||
"""Set title of page the URL refers to.from page content."""
|
||||
if self.valid and self.is_html():
|
||||
handler = titleparse.TitleFinder(self.get_content())
|
||||
try:
|
||||
handler = titleparse.TitleFinder(self.get_content())
|
||||
except tuple(ExcList):
|
||||
return
|
||||
parser = htmlsax.parser(handler)
|
||||
handler.parser = parser
|
||||
# parse
|
||||
|
|
@ -390,7 +393,6 @@ class UrlBase (object):
|
|||
trace.trace_on()
|
||||
try:
|
||||
self.local_check()
|
||||
self.set_title_from_content()
|
||||
except (socket.error, select.error):
|
||||
# on Unix, ctrl-c can raise
|
||||
# error: (4, 'Interrupted system call')
|
||||
|
|
@ -439,6 +441,7 @@ class UrlBase (object):
|
|||
value = _('Bad HTTP response %(line)r') % {"line": str(value)}
|
||||
self.set_result(unicode_safe(value), valid=False)
|
||||
if self.can_get_content():
|
||||
self.set_title_from_content()
|
||||
self.check_content()
|
||||
self.checktime = time.time() - check_start
|
||||
# check recursion
|
||||
|
|
|
|||
Loading…
Reference in a new issue