diff --git a/linkcheck/htmlutil/linkname.py b/linkcheck/htmlutil/linkname.py deleted file mode 100644 index 9cb65dfa..00000000 --- a/linkcheck/htmlutil/linkname.py +++ /dev/null @@ -1,71 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2001-2014 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Parse names of title tags and link types. -""" - -import re -from .. import HtmlParser, strformat - - -imgtag_re = re.compile(r"(?i)\s+alt\s*=\s*"+\ - r"""(?P("[^"\n]*"|'[^'\n]*'|[^\s>]+))""") -img_re = re.compile(r"""(?i)<\s*img\s+("[^"\n]*"|'[^'\n]*'|[^>])+>""") - - -def endtag_re (tag): - """Return matcher for given end tag""" - return re.compile(r"(?i)" % tag) - -a_end_search = endtag_re("a").search -title_end_search = endtag_re("title").search - - -def _unquote (txt): - """Resolve entities and remove markup from txt.""" - return HtmlParser.resolve_entities(strformat.remove_markup(txt)) - - -def image_name (txt): - """Return the alt part of the first tag in txt.""" - mo = imgtag_re.search(txt) - if mo: - name = strformat.unquote(mo.group('name').strip()) - return _unquote(name) - return u'' - - -def href_name (txt): - """Return the name part of the first name link in txt.""" - name = u"" - endtag = a_end_search(txt) - if not endtag: - return name - name = txt[:endtag.start()] - if img_re.search(name): - return image_name(name) - return _unquote(name) - - -def title_name (txt): - """Return the part of the first name in txt.""" - name = u"" - endtag = title_end_search(txt) - if not endtag: - return name - name = txt[:endtag.start()] - return _unquote(name) diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py index f706e272..33aa4321 100644 --- a/linkcheck/htmlutil/linkparse.py +++ b/linkcheck/htmlutil/linkparse.py @@ -20,7 +20,6 @@ Find link tags in HTML text. import re from .. import strformat, log, LOG_CHECK, url as urlutil -from . import linkname from builtins import str as str_text unquote = strformat.unquote diff --git a/tests/test_linkname.py b/tests/test_linkname.py deleted file mode 100644 index 191d0a10..00000000 --- a/tests/test_linkname.py +++ /dev/null @@ -1,63 +0,0 @@ -# -*- coding: iso-8859-1 -*- -# Copyright (C) 2004-2009 Bastian Kleineidam -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -""" -Test linkname routines. -""" - -import unittest -from linkcheck.htmlutil import linkname - - -class TestLinkname (unittest.TestCase): - """ - Test href and image name parsing. - """ - - def image_name_test (self, txt, expected): - """ - Helper function calling linkname.image_name(). - """ - self.assertEqual(linkname.image_name(txt), expected) - - def href_name_test (self, txt, expected): - """ - Helper function calling linkname.href_name(). - """ - self.assertEqual(linkname.href_name(txt), expected) - - def test_image_name (self): - """ - Test image name parsing. - """ - self.image_name_test("", '') - self.image_name_test("abc", 'abc') - - def test_href_name (self): - """ - Test href name parsing. - """ - self.href_name_test("guru guru", 'guru guru') - self.href_name_test("a\njo", "a\njo") - self.href_name_test("test<", "test<") - self.href_name_test("test", "test", "test"foo', '"') - self.href_name_test("", '') - self.href_name_test("abc", 'abc')