recognize broken html comments

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@760 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2003-01-22 00:12:29 +00:00
parent 41cf3d2c18
commit 38c3d905c3
2 changed files with 4323 additions and 3990 deletions

File diff suppressed because it is too large Load diff

View file

@ -179,6 +179,7 @@ static PyObject* quote_string (PyObject* val) {
%x S_PI
%x S_COMMENT
%x S_COMMENT2
%x S_DOCTYPE
%x S_CDATA
%x S_TAGSTART
@ -240,6 +241,34 @@ RX_DATA [-a-zA-Z0-9_]+
return T_WAIT;
}
/* Note: www.nba.com ad some <! Copyright !> comment */
<INITIAL><![ ]+ {
UPDATE_BUFPOS;
BEGIN(S_COMMENT2);
}
<S_COMMENT2>!> {
UPDATE_BUFPOS;
SETLVAL;
BEGIN(INITIAL);
RETURN(T_COMMENT);
}
<S_COMMENT2>[^!]+ {
UPDATE_BUFPOS;
APPEND_TO_TMP(yyleng);
}
<S_COMMENT2>![^>]+ {
UPDATE_BUFPOS;
APPEND_TO_TMP(yyleng);
}
<S_COMMENT2>. {
return T_WAIT;
}
/*********************** DOCTYPE ************************/
<INITIAL><![Dd][Oo][Cc][Tt][Yy][Pp][Ee] {
UPDATE_BUFPOS;