From 7ff1edcd908fa482de8e2df82615dbe32b3b216f Mon Sep 17 00:00:00 2001 From: calvin Date: Tue, 19 Aug 2003 11:48:49 +0000 Subject: [PATCH] fix parsing of trailing end tag garbage git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1023 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/parser/htmllex.c | 83 +++++++++++++++++++------------------- linkcheck/parser/htmllex.l | 3 +- 2 files changed, 42 insertions(+), 44 deletions(-) diff --git a/linkcheck/parser/htmllex.c b/linkcheck/parser/htmllex.c index 2d935f1f..4ae879cb 100644 --- a/linkcheck/parser/htmllex.c +++ b/linkcheck/parser/htmllex.c @@ -2966,7 +2966,7 @@ static yyconst flex_int32_t yy_nxt[][256] = 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - -81, 161, 161, 161, 161, 161, 161, 161, 161, 161, + -81, 161, -81, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, @@ -2997,7 +2997,7 @@ static yyconst flex_int32_t yy_nxt[][256] = 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - -82, 161, 161, 161, 161, 161, 161, 161, 161, 161, + -82, 161, -82, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, @@ -3054,34 +3054,34 @@ static yyconst flex_int32_t yy_nxt[][256] = }, { - 53, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 53, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, - -84, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - 161, 161, 161, 161, 161, 161 + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84, -84, -84, -84, -84, + -84, -84, -84, -84, -84, -84 }, @@ -5494,7 +5494,7 @@ static yyconst flex_int32_t yy_nxt[][256] = 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, - -161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + -161, 161, -161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, @@ -8595,7 +8595,7 @@ static yyconst yy_state_type yy_NUL_trans[257] = 95, 95, 0, 132, 132, 133, 139, 139, 140, 143, 143, 144, 146, 146, 147, 149, 0, 150, 150, 151, 153, 0, 0, 0, 0, 0, 156, 0, 0, 0, - 161, 161, 0, 161, 165, 165, 0, 0, 166, 169, + 161, 161, 0, 0, 165, 165, 0, 0, 166, 169, 171, 0, 0, 0, 0, 172, 0, 0, 0, 173, 0, 174, 176, 176, 177, 179, 179, 180, 0, 0, @@ -8630,7 +8630,7 @@ static yyconst flex_int32_t yy_rule_linenum[109] = 761, 768, 775, 792, 809, 814, 820, 837, 854, 869, 885, 892, 908, 915, 921, 927, 936, 946, 957, 964, - 973, 979, 989, 1000, 1005, 1013, 1021, 1029 + 973, 978, 988, 999, 1004, 1012, 1020, 1028 } ; /* The intent behind this definition is that it'll catch @@ -10426,7 +10426,6 @@ YY_RULE_SETUP { UPDATE_BUFPOS; UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); } YY_BREAK case 102: @@ -10435,7 +10434,7 @@ case 102: yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 979 "htmllex.l" +#line 978 "htmllex.l" { UPDATE_BUFPOS; UPDATE_LINE; @@ -10452,7 +10451,7 @@ case 103: yyg->yy_c_buf_p = yy_cp -= 1; YY_DO_BEFORE_ACTION; /* set up yytext again */ YY_RULE_SETUP -#line 989 "htmllex.l" +#line 988 "htmllex.l" { UPDATE_BUFPOS; UPDATE_LINE; @@ -10466,7 +10465,7 @@ YY_RULE_SETUP YY_BREAK case 104: YY_RULE_SETUP -#line 1000 "htmllex.l" +#line 999 "htmllex.l" { return T_WAIT; } @@ -10475,7 +10474,7 @@ YY_RULE_SETUP case 105: /* rule 105 can match eol */ YY_RULE_SETUP -#line 1005 "htmllex.l" +#line 1004 "htmllex.l" { UPDATE_BUFPOS; UPDATE_LINE; @@ -10486,7 +10485,7 @@ YY_RULE_SETUP YY_BREAK case 106: YY_RULE_SETUP -#line 1013 "htmllex.l" +#line 1012 "htmllex.l" { UPDATE_BUFPOS; UPDATE_COLUMN; @@ -10498,7 +10497,7 @@ YY_RULE_SETUP case 107: /* rule 107 can match eol */ YY_RULE_SETUP -#line 1021 "htmllex.l" +#line 1020 "htmllex.l" { UPDATE_BUFPOS; UPDATE_LINE; @@ -10509,17 +10508,17 @@ YY_RULE_SETUP YY_BREAK case 108: YY_RULE_SETUP -#line 1029 "htmllex.l" +#line 1028 "htmllex.l" { return T_WAIT; } YY_BREAK case 109: YY_RULE_SETUP -#line 1033 "htmllex.l" +#line 1032 "htmllex.l" ECHO; YY_BREAK -#line 10523 "htmllex.c" +#line 10522 "htmllex.c" case YY_END_OF_BUFFER: { @@ -11706,7 +11705,7 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner) #undef YY_DECL_IS_OURS #undef YY_DECL #endif -#line 1033 "htmllex.l" +#line 1032 "htmllex.l" diff --git a/linkcheck/parser/htmllex.l b/linkcheck/parser/htmllex.l index 38ff006e..30ab5571 100644 --- a/linkcheck/parser/htmllex.l +++ b/linkcheck/parser/htmllex.l @@ -970,10 +970,9 @@ RX_DATA [-a-zA-Z0-9_:]+ RETURN(T_ELEMENT_END); } -[^<]+ { +[^<>]+ { UPDATE_BUFPOS; UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); } <{RX_WHITE_SPACE}*\/{RX_WHITE_SPACE}*/[A-Za-z] {