Fix MS Word parser, hopefully

MS Word files are binary data, and get_temp_filename() will write them
to disk using open(..., 'wb'), so we want to pass bytes in there, not
Unicode.

See #323.
This commit is contained in:
Marius Gedminas 2019-10-22 16:39:57 +03:00
parent bbb90eba81
commit fa32a89d6b

View file

@ -117,7 +117,7 @@ class WordParser(_ParserPlugin):
def check(self, url_data):
"""Parse Word data."""
content = url_data.get_content()
content = url_data.get_raw_content()
filename = get_temp_filename(content)
# open word file and parse hyperlinks
try: