2003-07-04 14:24:44 +00:00
|
|
|
# -*- coding: iso-8859-1 -*-
|
2004-01-03 14:59:33 +00:00
|
|
|
# Copyright (C) 2001-2004 Bastian Kleineidam
|
2001-05-23 21:20:44 +00:00
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
|
# along with this program; if not, write to the Free Software
|
|
|
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
|
|
|
|
2002-02-24 12:29:35 +00:00
|
|
|
import re, StringUtil
|
2001-04-28 18:37:10 +00:00
|
|
|
|
2002-06-07 19:50:56 +00:00
|
|
|
imgtag_re = re.compile(r"""(?i)\s+alt\s*=\s*(?P<name>("[^"\n]*"|'[^'\n]*'|[^\s>]+))""")
|
|
|
|
|
img_re = re.compile(r"""(?i)<\s*img\s+("[^"\n]*"|'[^'\n]*'|[^>]+)+>""")
|
2002-06-09 15:32:14 +00:00
|
|
|
endtag_re = re.compile(r"""(?i)</a\s*>""")
|
2001-04-28 18:37:10 +00:00
|
|
|
|
|
|
|
|
def image_name(txt):
|
|
|
|
|
mo = imgtag_re.search(txt)
|
|
|
|
|
if mo:
|
2003-03-05 01:30:19 +00:00
|
|
|
name = StringUtil.remove_markup(mo.group('name').strip())
|
|
|
|
|
return StringUtil.unquote(name)
|
2002-11-24 19:53:37 +00:00
|
|
|
return ''
|
2001-04-28 18:37:10 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def href_name(txt):
|
|
|
|
|
name = ""
|
2002-06-09 15:32:14 +00:00
|
|
|
endtag = endtag_re.search(txt)
|
|
|
|
|
if not endtag: return name
|
|
|
|
|
name = txt[:endtag.start()]
|
|
|
|
|
if img_re.search(name):
|
|
|
|
|
return image_name(name)
|
2003-03-05 01:30:19 +00:00
|
|
|
return StringUtil.unhtmlify(StringUtil.remove_markup(name))
|
2002-06-07 19:50:56 +00:00
|
|
|
|
|
|
|
|
_tests = (
|
|
|
|
|
"<img src='' alt=''></a>",
|
|
|
|
|
"<img src alt=abc></a>",
|
|
|
|
|
"<b>guru guru</a>",
|
|
|
|
|
"a\njo</a>",
|
|
|
|
|
"test<</a>",
|
|
|
|
|
"test</</a>",
|
|
|
|
|
"test</a</a>",
|
2002-06-09 15:32:14 +00:00
|
|
|
"test",
|
|
|
|
|
"\n",
|
|
|
|
|
"",
|
|
|
|
|
'"</a>"foo',
|
2002-06-07 19:50:56 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def _test ():
|
|
|
|
|
for t in _tests:
|
|
|
|
|
print repr(href_name(t))
|
|
|
|
|
|
|
|
|
|
if __name__=='__main__':
|
|
|
|
|
_test()
|