BF: place a mutex around apparently thread-unsafe parser.feed invocation

That leads to fix up of anchors analysis and probably other issues
such as floating number of found urls etc
This commit is contained in:
Yaroslav Halchenko 2018-10-31 01:55:20 -04:00
parent b78c2d200e
commit ee27e178ec

View file

@ -17,11 +17,16 @@
"""
Main functions for link parsing
"""
import threading
from .. import log, LOG_CHECK, strformat, url as urlutil
from ..htmlutil import linkparse
from ..HtmlParser import htmlsax
from ..bookmarks import firefox
# Is needed within find_links around non-threadsafe call
parse_mutex = threading.Lock()
def parse_url(url_data):
"""Parse a URL."""
@ -125,7 +130,9 @@ def find_links (url_data, callback, tags):
handler.parser = parser
# parse
try:
parser.feed(url_data.get_content())
content = url_data.get_content()
with parse_mutex:
parser.feed(content)
parser.flush()
except linkparse.StopParse as msg:
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)