mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-25 16:44:43 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@249 e7d03fd6-7b0d-0410-9947-9c21f3af8025
30 lines
888 B
Python
30 lines
888 B
Python
import re,StringUtil
|
|
|
|
imgtag_re = re.compile("(?i)\s+alt\s*=\s*(?P<name>(\".*?\"|'.*?'|[^\s>]+))", re.DOTALL)
|
|
img_re = re.compile("(?i)<\s*img\s+.*>", re.DOTALL)
|
|
href_re = re.compile("(?i)(?P<name>.*?)</a\s*>", re.DOTALL)
|
|
|
|
def image_name(txt):
|
|
name = ""
|
|
mo = imgtag_re.search(txt)
|
|
if mo:
|
|
#print "DEBUG:", `mo.group(0)`
|
|
name = StringUtil.stripQuotes(mo.group('name').strip())
|
|
name = StringUtil.remove_markup(name)
|
|
name = StringUtil.unhtmlify(name)
|
|
#print "NAME:", `name`
|
|
return name
|
|
|
|
|
|
def href_name(txt):
|
|
name = ""
|
|
mo = href_re.search(txt)
|
|
if mo:
|
|
#print "DEBUG:", `mo.group(0)`
|
|
name = mo.group('name').strip()
|
|
if img_re.search(name):
|
|
name = image_name(name)
|
|
name = StringUtil.remove_markup(name)
|
|
name = StringUtil.unhtmlify(name)
|
|
#print "NAME:", `name`
|
|
return name
|