mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-22 23:24:44 +00:00
BF: place a mutex around apparently thread-unsafe parser.feed invocation
That leads to fix up of anchors analysis and probably other issues such as floating number of found urls etc
This commit is contained in:
parent
b78c2d200e
commit
ee27e178ec
1 changed files with 8 additions and 1 deletions
|
|
@ -17,11 +17,16 @@
|
|||
"""
|
||||
Main functions for link parsing
|
||||
"""
|
||||
import threading
|
||||
|
||||
from .. import log, LOG_CHECK, strformat, url as urlutil
|
||||
from ..htmlutil import linkparse
|
||||
from ..HtmlParser import htmlsax
|
||||
from ..bookmarks import firefox
|
||||
|
||||
# Is needed within find_links around non-threadsafe call
|
||||
parse_mutex = threading.Lock()
|
||||
|
||||
|
||||
def parse_url(url_data):
|
||||
"""Parse a URL."""
|
||||
|
|
@ -125,7 +130,9 @@ def find_links (url_data, callback, tags):
|
|||
handler.parser = parser
|
||||
# parse
|
||||
try:
|
||||
parser.feed(url_data.get_content())
|
||||
content = url_data.get_content()
|
||||
with parse_mutex:
|
||||
parser.feed(content)
|
||||
parser.flush()
|
||||
except linkparse.StopParse as msg:
|
||||
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)
|
||||
|
|
|
|||
Loading…
Reference in a new issue