diff --git a/linkcheck/HtmlParser/__init__.py b/linkcheck/HtmlParser/__init__.py
deleted file mode 100644
index de3a2a3e..00000000
--- a/linkcheck/HtmlParser/__init__.py
+++ /dev/null
@@ -1,49 +0,0 @@
-# -*- coding: iso-8859-1 -*-
-# Copyright (C) 2000-2014 Bastian Kleineidam
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-"""
-HTML parser module.
-
-USAGE
-
-
-Two functions are provided, one to make a BeautifulSoup object from markup and
-another to call a handler's callback for each element in a BeautifulSoup
-object it can process.
-
-The used callback of a handler is:
-
-- Start tag:
- def start_element (tag, attrs, text, line, column)
- @param tag: tag name
- @type tag: string
- @param attrs: tag attributes
- @type attrs: dict
- @param text: element text
- @type tag: string
- @param line: tag line number
- @type tag: integer
- @param column: tag column number
- @type tag: integer
-
-EXAMPLE
-
- # Create a new BeautifulSoup object.
- soup = HtmlParser.htmlsax.make_soup("Blubb")
- # Process the soup with the chosen handler as a parameter.
- HtmlParser.htmlsax.proces_soup(handler, soup)
-
-"""
diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py
index cbbf759e..e891402e 100644
--- a/linkcheck/checker/httpurl.py
+++ b/linkcheck/checker/httpurl.py
@@ -30,8 +30,7 @@ from io import BytesIO
from .. import (log, LOG_CHECK, strformat, mimeutil,
url as urlutil, LinkCheckerError, httputil)
from . import (internpaturl, proxysupport)
-from ..HtmlParser import htmlsax
-from ..htmlutil import linkparse
+from ..htmlutil import htmlsoup, linkparse
# import warnings
from .const import WARN_HTTP_EMPTY_CONTENT
from requests.sessions import REDIRECT_STATI
@@ -83,7 +82,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
handler = linkparse.MetaRobotsFinder()
# parse
try:
- htmlsax.process_soup(handler, self.get_soup())
+ htmlsoup.process_soup(handler, self.get_soup())
except linkparse.StopParse as msg:
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)
pass
@@ -302,7 +301,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
def get_content(self):
if self.text is None:
self.get_raw_content()
- self.soup = htmlsax.make_soup(self.data, self.encoding)
+ self.soup = htmlsoup.make_soup(self.data, self.encoding)
self.text = self.data.decode(self.soup.original_encoding)
return self.text
diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py
index bb7debef..78ce194f 100644
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@@ -45,7 +45,7 @@ from future.utils import python_2_unicode_compatible
from . import absolute_url, get_url_from
from .. import (log, LOG_CHECK,
strformat, LinkCheckerError, url as urlutil, trace, get_link_pat)
-from ..HtmlParser import htmlsax
+from ..htmlutil import htmlsoup
from ..network import iputil
from .const import (WARN_URL_EFFECTIVE_URL,
WARN_URL_ERROR_GETTING_CONTENT, WARN_URL_OBFUSCATED_IP,
@@ -651,7 +651,7 @@ class UrlBase (object):
def get_content (self):
if self.text is None:
self.get_raw_content()
- self.soup = htmlsax.make_soup(self.data)
+ self.soup = htmlsoup.make_soup(self.data)
self.text = self.data.decode(self.soup.original_encoding)
self.encoding = self.soup.original_encoding
return self.text
diff --git a/linkcheck/htmlutil/formsearch.py b/linkcheck/htmlutil/formsearch.py
index 1f71542f..d10b5089 100644
--- a/linkcheck/htmlutil/formsearch.py
+++ b/linkcheck/htmlutil/formsearch.py
@@ -17,7 +17,7 @@
"""
HTML form utils
"""
-from ..HtmlParser import htmlsax
+from ..htmlutil import htmlsoup
from .. import log, LOG_CHECK
class Form(object):
@@ -41,7 +41,7 @@ def search_form(content, cgiuser, cgipassword):
"""Search for a HTML form in the given HTML content that has the given
CGI fields. If no form is found return None.
"""
- soup = htmlsax.make_soup(content)
+ soup = htmlsoup.make_soup(content)
# The value of the name attribute is case-insensitive
# https://www.w3.org/TR/html401/interact/forms.html#adef-name-INPUT
cginames = {cgiuser.lower(), cgipassword.lower()}
diff --git a/linkcheck/HtmlParser/htmlsax.py b/linkcheck/htmlutil/htmlsoup.py
similarity index 64%
rename from linkcheck/HtmlParser/htmlsax.py
rename to linkcheck/htmlutil/htmlsoup.py
index 61ef5190..921703cf 100644
--- a/linkcheck/HtmlParser/htmlsax.py
+++ b/linkcheck/htmlutil/htmlsoup.py
@@ -15,6 +15,35 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
HTML parser implemented using Beautiful Soup and html.parser.
+
+USAGE
+
+Two functions are provided, one to make a BeautifulSoup object from markup and
+another to call a handler's callbacks for each element in a BeautifulSoup
+object it can process.
+
+The used callback of a handler is:
+
+- Start tag:
+ def start_element (tag, attrs, text, line, column)
+ @param tag: tag name
+ @type tag: string
+ @param attrs: tag attributes
+ @type attrs: dict
+ @param text: element text
+ @type tag: string
+ @param line: tag line number
+ @type tag: integer
+ @param column: tag column number
+ @type tag: integer
+
+EXAMPLE
+
+ # Create a new BeautifulSoup object.
+ soup = htmlutil.htmlsoup.make_soup("Blubb")
+ # Process the soup with the chosen handler as a parameter.
+ htmlutil.htmlsoup.proces_soup(handler, soup)
+
"""
from warnings import filterwarnings
diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py
index c455cffa..cd5ac582 100644
--- a/linkcheck/htmlutil/linkparse.py
+++ b/linkcheck/htmlutil/linkparse.py
@@ -99,7 +99,7 @@ class StopParse(Exception):
class TagFinder (object):
"""Base class handling HTML start elements.
- TagFinder instances are used as HtmlParser handlers."""
+ TagFinder instances are used as HTML parser handlers."""
def __init__ (self):
"""Initialize local variables."""
diff --git a/linkcheck/parser/__init__.py b/linkcheck/parser/__init__.py
index bd1e3100..0f584f35 100644
--- a/linkcheck/parser/__init__.py
+++ b/linkcheck/parser/__init__.py
@@ -18,8 +18,7 @@
Main functions for link parsing
"""
from .. import log, LOG_CHECK, strformat, url as urlutil
-from ..htmlutil import linkparse
-from ..HtmlParser import htmlsax
+from ..htmlutil import htmlsoup, linkparse
from ..bookmarks import firefox
@@ -121,7 +120,7 @@ def find_links (url_data, callback, tags):
handler = linkparse.LinkFinder(callback, tags)
# parse
try:
- htmlsax.process_soup(handler, url_data.get_soup())
+ htmlsoup.process_soup(handler, url_data.get_soup())
except linkparse.StopParse as msg:
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)
pass
diff --git a/setup.py b/setup.py
index 4f3c07ef..237d94d4 100755
--- a/setup.py
+++ b/setup.py
@@ -382,7 +382,6 @@ setup(
'linkcheck.configuration',
'linkcheck.director',
'linkcheck.htmlutil',
- 'linkcheck.HtmlParser',
'linkcheck.logger',
'linkcheck.network',
'linkcheck.parser',
diff --git a/tests/test_linkparser.py b/tests/test_linkparser.py
index 38c2277d..94171aa8 100644
--- a/tests/test_linkparser.py
+++ b/tests/test_linkparser.py
@@ -19,8 +19,7 @@ Test linkparser routines.
"""
import unittest
-from linkcheck.htmlutil import linkparse
-from linkcheck.HtmlParser import htmlsax
+from linkcheck.htmlutil import htmlsoup, linkparse
class TestLinkparser (unittest.TestCase):
@@ -32,7 +31,7 @@ class TestLinkparser (unittest.TestCase):
self.count_url = 0
h = linkparse.LinkFinder(self._test_one_url(url), linkparse.LinkTags)
try:
- htmlsax.process_soup(h, htmlsax.make_soup(content))
+ htmlsoup.process_soup(h, htmlsoup.make_soup(content))
except linkparse.StopParse:
pass
self.assertEqual(self.count_url, 1)
@@ -49,7 +48,7 @@ class TestLinkparser (unittest.TestCase):
self.assertTrue(False, 'URL %r found' % url)
h = linkparse.LinkFinder(callback, linkparse.LinkTags)
try:
- htmlsax.process_soup(h, htmlsax.make_soup(content))
+ htmlsoup.process_soup(h, htmlsoup.make_soup(content))
except linkparse.StopParse:
pass
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 3d73d8af..22c08fc1 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -18,7 +18,7 @@
Test html parsing.
"""
-from linkcheck.HtmlParser import htmlsax
+from linkcheck.htmlutil import htmlsoup
from io import StringIO
import unittest
@@ -143,7 +143,7 @@ class TestParser (unittest.TestCase):
# Parse all test patterns in one go.
out = StringIO()
handler = HtmlPrettyPrinter(out)
- htmlsax.process_soup(handler, htmlsax.make_soup(_in))
+ htmlsoup.process_soup(handler, htmlsoup.make_soup(_in))
self.check_results(_in, _out, out)
def check_results (self, _in, _out, out):
@@ -182,6 +182,6 @@ class TestParser (unittest.TestCase):
def encoding_test (self, html, expected):
out = StringIO()
handler = HtmlPrettyPrinter(out)
- soup = htmlsax.make_soup(html)
- htmlsax.process_soup(handler, soup)
+ soup = htmlsoup.make_soup(html)
+ htmlsoup.process_soup(handler, soup)
self.assertEqual(soup.original_encoding, expected)