mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-02 11:54:43 +00:00
fix entity resolving
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@899 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
3c78149f48
commit
5680daff09
1 changed files with 6 additions and 3 deletions
|
|
@ -20,8 +20,11 @@ import re, sys, htmlentitydefs
|
|||
markup_re = re.compile("<.*?>", re.DOTALL)
|
||||
entities = htmlentitydefs.entitydefs.items()
|
||||
HtmlTable = map(lambda x: (x[1], "&"+x[0]+";"), entities)
|
||||
UnHtmlTable = map(lambda x: ("&"+x[0]+";", x[1]), entities)
|
||||
# order matters!
|
||||
HtmlTable.sort()
|
||||
UnHtmlTable.sort()
|
||||
UnHtmlTable.reverse()
|
||||
# standard xml entities
|
||||
entities = {
|
||||
'lt': '<',
|
||||
|
|
@ -119,8 +122,7 @@ is_charref = re.compile(r'&#x?(?P<num>\d+);').match
|
|||
|
||||
def resolve_entity (mo):
|
||||
ent = mo.group(0).lower()
|
||||
if htmlentitydefs.entitydefs.has_key(ent):
|
||||
return htmlentitydefs.entitydefs[ent]
|
||||
ent = applyTable(UnHtmlTable, ent)
|
||||
mo = is_charref(ent)
|
||||
if mo:
|
||||
# convert to number
|
||||
|
|
@ -137,7 +139,7 @@ def resolve_entity (mo):
|
|||
|
||||
|
||||
def unhtmlify (s):
|
||||
return re.sub(r'(?i)&(#x?\d+|[a-z]+);', resolve_entity, s)
|
||||
return re.sub(r'(?i)&(?P<ent>#x?\d+|[a-z]+);', resolve_entity, s)
|
||||
|
||||
|
||||
def xmlify (s):
|
||||
|
|
@ -204,6 +206,7 @@ def strsize (b):
|
|||
|
||||
def _test ():
|
||||
print unhtmlify('a')
|
||||
print unhtmlify('&')
|
||||
|
||||
if __name__=='__main__':
|
||||
_test()
|
||||
|
|
|
|||
Loading…
Reference in a new issue