From 4f8c2954cf8e24a3ea01fda0536d7363646e89bf Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Sat, 5 Oct 2019 19:38:57 +0100 Subject: [PATCH] Don't set parser.encoding Read-only property with new Beautiful Soup parser. --- linkcheck/checker/httpurl.py | 2 -- linkcheck/htmlutil/formsearch.py | 3 +-- linkcheck/parser/__init__.py | 2 -- 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 498e279f..96184f3a 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -84,8 +84,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): handler = linkparse.MetaRobotsFinder() parser = htmlsax.parser(handler) handler.parser = parser - if self.charset: - parser.encoding = self.charset # parse try: parser.feed(self.get_raw_content()) diff --git a/linkcheck/htmlutil/formsearch.py b/linkcheck/htmlutil/formsearch.py index d66a2096..4ef16a53 100644 --- a/linkcheck/htmlutil/formsearch.py +++ b/linkcheck/htmlutil/formsearch.py @@ -89,14 +89,13 @@ class FormFinder(object): self.form = None -def search_form(content, cgiuser, cgipassword, encoding='utf-8'): +def search_form(content, cgiuser, cgipassword): """Search for a HTML form in the given HTML content that has the given CGI fields. If no form is found return None. """ handler = FormFinder() parser = htmlsax.parser(handler) handler.parser = parser - parser.encoding = encoding # parse parser.feed(content) parser.flush() diff --git a/linkcheck/parser/__init__.py b/linkcheck/parser/__init__.py index 45f8e84d..d234c020 100644 --- a/linkcheck/parser/__init__.py +++ b/linkcheck/parser/__init__.py @@ -125,8 +125,6 @@ def find_links (url_data, callback, tags): # construct parser object handler = linkparse.LinkFinder(callback, tags) parser = htmlsax.parser(handler) - if url_data.charset: - parser.encoding = url_data.charset handler.parser = parser # parse try: