Handle stray < before end tags.

This commit is contained in:
Bastian Kleineidam 2011-05-28 13:39:04 +02:00
parent a894363096
commit c9707ee735
4 changed files with 891 additions and 825 deletions

View file

@ -1,5 +1,9 @@
7.1 "" (released xx.xx.2011)
Fixes:
- checking: HTML parser detects and handles stray "<" characters before
end tags.
7.0 "Plots with a View" (released 28.5.2011)

File diff suppressed because it is too large Load diff

View file

@ -478,7 +478,26 @@ RX_DATA [-a-zA-Z0-9_:]+
RETURN(T_ELEMENT_START);
}
<S_TAGSTART>< {
<S_TAGSTART><\/ {
/* Abort parsing this start tag and begin an endtag. Assume
the last "<" was a stray unquoted character. */
char* tmp = NULL;
UPDATE_COLUMN;
BEGIN(S_TAGEND);
/* Add missing "<" at beginning of buffer. */
RESIZE_BUF(tmp, strlen(yyextra->tmp_buf)+2);
tmp[0] = '<';
tmp[1] = '\0';
strlcat(tmp, yyextra->tmp_buf, sizeof(tmp));
RESIZE_BUF(yyextra->tmp_buf, strlen(tmp)+1);
yyextra->tmp_buf[0] = '\0';
strlcat(yyextra->tmp_buf, tmp, sizeof(yyextra->tmp_buf));
free(tmp);
SETLVAL_UNICODE;
RETURN(T_TEXT);
}
<S_TAGSTART></[^/] {
/* Abort parsing this start tag and begin a new one. Assume
the last "<" was a stray unquoted character. */
char* tmp = NULL;

View file

@ -163,8 +163,10 @@ parsetests = [
("""</td<td a="b" >""", """</td><td a="b">"""),
# missing beginning quote
("""<td a=b">""", """<td a="b">"""),
# stray < before tag
# stray < before start tag
("""<0.<td a="b" >""", """<0.<td a="b">"""),
# stray < before end tag
("""<0.</td >""", """<0.</td>"""),
# missing end quote (XXX TODO)
#("""<td a="b>\n""", """<td a="b">\n"""),
#("""<td a="b></td>\na""", """<td a="b"></td>\na"""),