diff --git a/MANIFEST.in b/MANIFEST.in index 7ca73b44..e337532c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -7,11 +7,6 @@ include cgi-bin/lc.wsgi cgi-bin/README include Makefile include cgi-bin/lconline/*.html cgi-bin/lconline/*.de cgi-bin/lconline/*.en include cgi-bin/lconline/*.js cgi-bin/lconline/*.css cgi-bin/lconline/*.ico -include linkcheck/HtmlParser/Makefile -include linkcheck/HtmlParser/htmllex.l -include linkcheck/HtmlParser/htmlparse.y -include linkcheck/HtmlParser/*.h -include linkcheck/HtmlParser/fixincludes.awk include po/*.po po/*.mo po/*.pot po/Makefile include doc/*.example doc/*.txt include doc/html/*.ico diff --git a/Makefile b/Makefile index 7b98489c..4ddecb91 100644 --- a/Makefile +++ b/Makefile @@ -53,7 +53,6 @@ all: clean: -$(PYTHON) setup.py clean --all rm -f $(LAPPNAME)-out.* *-stamp* - $(MAKE) -C linkcheck/HtmlParser clean find . -name '*.py[co]' -exec rm -f {} \; find . -name '*.bak' -exec rm -f {} \; find . -depth -name '__pycache__' -exec rm -rf {} \; @@ -75,9 +74,7 @@ locale: # to build in the current directory localbuild: MANIFEST locale - $(MAKE) -C linkcheck/HtmlParser $(PYTHON) setup.py build - cp -f build/lib.$(PLATFORM)-$(PYVER)*/linkcheck/HtmlParser/htmlsax*.so linkcheck/HtmlParser release: distclean releasecheck filescheck $(MAKE) dist sign register upload homepage tag changelog deb diff --git a/linkcheck/HtmlParser/Makefile b/linkcheck/HtmlParser/Makefile deleted file mode 100644 index d7bcf8e9..00000000 --- a/linkcheck/HtmlParser/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -# This HTML parser needs flex >= 2.5.xx from http://lex.sf.net/ for -# reentrant bison parser support and uses features of bison >= 3.0.x -LEX = flex -YACC = bison -PYINCLUDE=-I/usr/include/python2.7 - -all: htmllex.c htmlparse.c - -htmlsax.so: htmllex.o htmlparse.o s_util.o - gcc -pthread -shared $^ -o htmlsax.so - -%.o: %.c - gcc -g -std=c99 -O3 -Wall -pedantic -Wstrict-prototypes -fPIC -I. $(PYINCLUDE) -c $< -o $@ - -htmlparse.h htmlparse.c: htmlparse.y htmlsax.h - $(YACC) --output=htmlparse.c htmlparse.y - -htmllex.l: htmlparse.h - -htmllex.c: htmllex.l htmlsax.h - $(LEX) htmllex.l - awk -f fixincludes.awk htmllex.c > htmllex.c.fixed; mv -f htmllex.c.fixed htmllex.c - -clean: - rm -f *.o *.so *.pyc *.pyo *.output - -distclean: clean - rm -f htmlparse.c htmlparse.h htmllex.c - diff --git a/linkcheck/HtmlParser/__init__.py b/linkcheck/HtmlParser/__init__.py index b3bcbc66..b7d07385 100644 --- a/linkcheck/HtmlParser/__init__.py +++ b/linkcheck/HtmlParser/__init__.py @@ -15,64 +15,7 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ -Fast HTML parser module written in C with the following features: - -- Reentrant - As soon as any HTML string data is available, we try to feed it - to the HTML parser. This means that the parser has to scan possible - incomplete data, recognizing as much as it can. Incomplete trailing - data is saved for subsequent calls, or it is just flushed into the - output buffer with the flush() function. - A reset() brings the parser back to its initial state, throwing away all - buffered data. - -- Coping with HTML syntax errors - The parser recognizes as much as it can and passes the rest - of the data as TEXT tokens. - The scanner only passes complete recognized HTML syntax elements to - the parser. Invalid syntax elements are passed as TEXT. This way we do - not need the bison error recovery. - Incomplete data is rescanned the next time the parser calls yylex() or - when it is being flush()ed. - - The following syntax errors will be recognized correctly: - - - Unquoted attribute values. - - Missing beginning quote of attribute values. - - Invalid "" end tags in script modus. - - Missing ">" in tags. - - Invalid characters in tag or attribute names. - - The following syntax errors will not be recognized: - - - Missing end quote of attribute values. On the TODO list. - - Unknown HTML tag or attribute names. - - Invalid nesting of tags. - - Additionally the parser has the following features: - - - NULL bytes are changed into spaces - - inside a is matched, but not itself */ -case 47: -/* rule 47 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_bp + 1); -yyg->yy_c_buf_p = yy_cp = yy_bp + 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 556 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 48: -/* rule 48 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 561 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 49: -/* rule 49 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 566 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 50: -/* rule 50 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 571 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 51: -/* rule 51 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 576 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 52: -/* rule 52 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 581 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 53: -/* rule 53 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 586 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 54: -/* rule 54 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 591 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 55: -/* rule 55 can match eol */ -YY_RULE_SETUP -#line 596 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 56: -YY_RULE_SETUP -#line 600 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_APOS_ESC); -} - YY_BREAK -case 57: -/* rule 57 can match eol */ -YY_RULE_SETUP -#line 606 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 58: -YY_RULE_SETUP -#line 611 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - YY_BREAK -case 59: -/* rule 59 can match eol */ -YY_RULE_SETUP -#line 617 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_APOS); -} - YY_BREAK -case 60: -YY_RULE_SETUP -#line 623 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_STRING_ESC); -} - YY_BREAK -case 61: -/* rule 61 can match eol */ -YY_RULE_SETUP -#line 629 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 62: -YY_RULE_SETUP -#line 634 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - YY_BREAK -case 63: -/* rule 63 can match eol */ -YY_RULE_SETUP -#line 640 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_STRING); -} - YY_BREAK -case 64: -YY_RULE_SETUP -#line 646 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 65: -/* rule 65 can match eol */ -YY_RULE_SETUP -#line 651 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - YY_BREAK -case 66: -/* rule 66 can match eol */ -YY_RULE_SETUP -#line 657 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 67: -/* rule 67 can match eol */ -YY_RULE_SETUP -#line 661 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 68: -YY_RULE_SETUP -#line 666 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - YY_BREAK -/*********************** STYLE ************************/ -case 69: -/* rule 69 can match eol */ -YY_RULE_SETUP -#line 673 "htmllex.l" -{ - UPDATE_LINE; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_STYLE); -} - YY_BREAK -case 70: -/* rule 70 can match eol */ -YY_RULE_SETUP -#line 680 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -/* this is so shitty */ -case 71: -/* rule 71 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_bp + 1); -yyg->yy_c_buf_p = yy_cp = yy_bp + 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 686 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 72: -/* rule 72 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 691 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 73: -/* rule 73 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 696 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 74: -/* rule 74 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 701 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 75: -/* rule 75 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 706 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 76: -/* rule 76 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 711 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 77: -/* rule 77 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 716 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 78: -/* rule 78 can match eol */ -YY_RULE_SETUP -#line 721 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -/*********************** ATTRS ************************/ -case 79: -YY_RULE_SETUP -#line 726 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_ATTR2); -} - YY_BREAK -case 80: -YY_RULE_SETUP -#line 732 "htmllex.l" -{ - UPDATE_COLUMN; - FLUSH_ATTRS; - BEGIN(INITIAL); - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START_END); -} - YY_BREAK -case 81: -/* rule 81 can match eol */ -YY_RULE_SETUP -#line 740 "htmllex.l" -{ - UPDATE_LINE; -} - YY_BREAK -case 82: -YY_RULE_SETUP -#line 744 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 83: -YY_RULE_SETUP -#line 748 "htmllex.l" -{ - UPDATE_COLUMN; - FLUSH_ATTRS; - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - YY_BREAK -case 84: -YY_RULE_SETUP -#line 756 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 85: -/* rule 85 can match eol */ -YY_RULE_SETUP -#line 761 "htmllex.l" -{ - /* Line continuations */ - UPDATE_LINE; -} - YY_BREAK -case 86: -YY_RULE_SETUP -#line 766 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 87: -YY_RULE_SETUP -#line 771 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 88: -/* rule 88 can match eol */ -YY_RULE_SETUP -#line 775 "htmllex.l" -{ - UPDATE_LINE; - BEGIN(S_ATTR3); -} - YY_BREAK -case 89: -/* rule 89 can match eol */ -YY_RULE_SETUP -#line 780 "htmllex.l" -{ - UPDATE_LINE; - LOWER_TMP; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrname); - RESIZE_BUF(yyextra->tmp_buf, 1); - BEGIN(S_ATTR4); -} - YY_BREAK -case 90: -YY_RULE_SETUP -#line 788 "htmllex.l" -{ - UPDATE_COLUMN; - LOWER_TMP; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrname); - RESIZE_BUF(yyextra->tmp_buf, 1); - if (yyextra->tmp_attrval != NULL) return T_ERROR; - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, Py_None)); - Py_CLEAR(yyextra->tmp_attrname); - APPEND_TO_TMP(yyleng); - BEGIN(S_ATTR2); -} - YY_BREAK -case 91: -/* rule 91 can match eol */ -YY_RULE_SETUP -#line 800 "htmllex.l" -{ - /* this also skips whitespace! */ - UPDATE_LINE; -} - YY_BREAK -case 92: -YY_RULE_SETUP -#line 805 "htmllex.l" -{ - /* backslash escapes seen at freecode.com */ - UPDATE_COLUMN; - BEGIN(S_STRING); -} - YY_BREAK -case 93: -YY_RULE_SETUP -#line 811 "htmllex.l" -{ - UPDATE_COLUMN; - BEGIN(S_STRING); -} - YY_BREAK -case 94: -YY_RULE_SETUP -#line 816 "htmllex.l" -{ - UPDATE_COLUMN; - BEGIN(S_APOSSTRING); -} - YY_BREAK -case 95: -YY_RULE_SETUP -#line 821 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_ATTR5); -} - YY_BREAK -case 96: -YY_RULE_SETUP -#line 827 "htmllex.l" -{ - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - YY_BREAK -case 97: -/* rule 97 can match eol */ -YY_RULE_SETUP -#line 842 "htmllex.l" -{ - UPDATE_LINE; -} - YY_BREAK -case 98: -YY_RULE_SETUP -#line 846 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 99: -YY_RULE_SETUP -#line 851 "htmllex.l" -{ - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - YY_BREAK -case 100: -YY_RULE_SETUP -#line 866 "htmllex.l" -{ - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(INITIAL); - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START_END); -} - YY_BREAK -case 101: -YY_RULE_SETUP -#line 881 "htmllex.l" -{ - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - YY_BREAK -case 102: -/* rule 102 can match eol */ -YY_RULE_SETUP -#line 894 "htmllex.l" -{ - UPDATE_LINE; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - YY_BREAK -case 103: -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -yyg->yy_c_buf_p = yy_cp = yy_bp + 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 907 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_APOSSTRING_ESC); -} - YY_BREAK -case 104: -YY_RULE_SETUP -#line 913 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 105: -YY_RULE_SETUP -#line 917 "htmllex.l" -{ - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - YY_BREAK -case 106: -/* rule 106 can match eol */ -YY_RULE_SETUP -#line 930 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 107: -/* rule 107 can match eol */ -YY_RULE_SETUP -#line 936 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_APOSSTRING); -} - YY_BREAK -case 108: -YY_RULE_SETUP -#line 942 "htmllex.l" -{ - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - YY_BREAK -case 109: -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -yyg->yy_c_buf_p = yy_cp = yy_bp + 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 955 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_STRING_ESC); -} - YY_BREAK -case 110: -/* rule 110 can match eol */ -YY_RULE_SETUP -#line 961 "htmllex.l" -{ - UPDATE_LINE; -} - YY_BREAK -case 111: -YY_RULE_SETUP -#line 965 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 112: -/* rule 112 can match eol */ -YY_RULE_SETUP -#line 969 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 113: -/* rule 113 can match eol */ -YY_RULE_SETUP -#line 974 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_STRING); -} - YY_BREAK -/*********************** TAGEND ************************/ -case 114: -/* rule 114 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 982 "htmllex.l" -{ - UPDATE_LINE; - BEGIN(S_TAGEND); -} - YY_BREAK -case 115: -YY_RULE_SETUP -#line 987 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - YY_BREAK -case 116: -/* rule 116 can match eol */ -YY_RULE_SETUP -#line 992 "htmllex.l" -{ - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(INITIAL); - RETURN(T_ELEMENT_END); -} - YY_BREAK -case 117: -/* rule 117 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 1000 "htmllex.l" -{ - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(S_TAGEND); - RETURN(T_ELEMENT_END); -} - YY_BREAK -case 118: -/* rule 118 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 1008 "htmllex.l" -{ - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - CHECK_NULL(yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL)); - BEGIN(S_TAGSTART); - RETURN(T_ELEMENT_END); -} - YY_BREAK -case 119: -/* rule 119 can match eol */ -YY_RULE_SETUP -#line 1017 "htmllex.l" -{ - UPDATE_LINE; - /* ignore any trailing garbage of this end tag */ - BEGIN(S_TAGEND2); -} - YY_BREAK -case 120: -/* rule 120 can match eol */ -YY_RULE_SETUP -#line 1023 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 121: -YY_RULE_SETUP -#line 1027 "htmllex.l" -{ - UPDATE_COLUMN; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(INITIAL); - RETURN(T_ELEMENT_END); -} - YY_BREAK -case 122: -/* rule 122 can match eol */ -YY_RULE_SETUP -#line 1035 "htmllex.l" -{ - UPDATE_LINE; -} - YY_BREAK -case 123: -/* rule 123 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 1039 "htmllex.l" -{ - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(S_TAGEND); - RETURN(T_ELEMENT_END); -} - YY_BREAK -case 124: -/* rule 124 can match eol */ -*yy_cp = yyg->yy_hold_char; /* undo effects of setting up yytext */ -YY_LINENO_REWIND_TO(yy_cp - 1); -yyg->yy_c_buf_p = yy_cp -= 1; -YY_DO_BEFORE_ACTION; /* set up yytext again */ -YY_RULE_SETUP -#line 1047 "htmllex.l" -{ - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - CHECK_NULL(yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL)); - BEGIN(S_TAGSTART); - RETURN(T_ELEMENT_END); -} - YY_BREAK -case 125: -/* rule 125 can match eol */ -YY_RULE_SETUP -#line 1056 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -/*********************** TEXT ************************/ -case 126: -/* rule 126 can match eol */ -YY_RULE_SETUP -#line 1060 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - YY_BREAK -case 127: -YY_RULE_SETUP -#line 1067 "htmllex.l" -{ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - YY_BREAK -case 128: -/* rule 128 can match eol */ -YY_RULE_SETUP -#line 1074 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - YY_BREAK -case 129: -/* rule 129 can match eol */ -YY_RULE_SETUP -#line 1080 "htmllex.l" -{ - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - YY_BREAK -case 130: -/* rule 130 can match eol */ -YY_RULE_SETUP -#line 1087 "htmllex.l" -{ - return T_WAIT; -} - YY_BREAK -case 131: -YY_RULE_SETUP -#line 1091 "htmllex.l" -ECHO; - YY_BREAK -#line 4711 "htmllex.c" - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = yyg->yy_hold_char; - YY_RESTORE_YY_MORE_OFFSET - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * yylex(). If so, then we have to assure - * consistency between YY_CURRENT_BUFFER and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; -/* %if-c-only */ - YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; -/* %endif */ -/* %if-c++-only */ -/* %endif */ - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); - - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++yyg->yy_c_buf_p; - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { -/* %% [14.0] code to do back-up for compressed tables and set up yy_cp goes here */ - yy_cp = yyg->yy_c_buf_p; - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_END_OF_FILE: - { - yyg->yy_did_buffer_switch_on_eof = 0; - - if ( yywrap(yyscanner ) ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = - yyg->yytext_ptr + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - yyg->yy_c_buf_p = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; - - yy_current_state = yy_get_previous_state( yyscanner ); - - yy_cp = yyg->yy_c_buf_p; - yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ - } /* end of user's declarations */ -} /* end of yylex */ -/* %ok-for-header */ - -/* %if-c++-only */ -/* %not-for-header */ - -/* %ok-for-header */ - -/* %endif */ - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ -/* %if-c-only */ -static int yy_get_next_buffer (yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - char *source = yyg->yytext_ptr; - yy_size_t number_to_move, i; - int ret_val; - - if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (yy_size_t) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; - - else - { - int num_to_read = - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; - - int yy_c_buf_p_offset = - (int) (yyg->yy_c_buf_p - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = b->yy_buf_size * 2; - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = NULL; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - - number_to_move - 1; - - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - yyg->yy_n_chars, num_to_read ); - - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - if ( yyg->yy_n_chars == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - yyrestart(yyin ,yyscanner); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - if ((int) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { - /* Extend the array by 50%, plus the number we really need. */ - int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner ); - if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); - } - - yyg->yy_n_chars += number_to_move; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; - - yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; - - return ret_val; -} - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - -/* %if-c-only */ -/* %not-for-header */ - - static yy_state_type yy_get_previous_state (yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - yy_state_type yy_current_state; - char *yy_cp; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - -/* %% [15.0] code to get the start state into yy_current_state goes here */ - yy_current_state = yyg->yy_start; - - for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) - { -/* %% [16.0] code to find the next state goes here */ - yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)]; - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - } - - return yy_current_state; -} - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ -/* %if-c-only */ - static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - int yy_is_jam; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ -/* %% [17.0] code to find the next state, and perhaps do backing up, goes here */ - char *yy_cp = yyg->yy_c_buf_p; - - yy_current_state = yy_nxt[yy_current_state][1]; - yy_is_jam = (yy_current_state <= 0); - - if ( ! yy_is_jam ) - { - if ( yy_accept[yy_current_state] ) - { - yyg->yy_last_accepting_state = yy_current_state; - yyg->yy_last_accepting_cpos = yy_cp; - } - } - - (void)yyg; - return yy_is_jam ? 0 : yy_current_state; -} - -#ifndef YY_NO_UNPUT -/* %if-c-only */ - -/* %endif */ -#endif - -/* %if-c-only */ -#ifndef YY_NO_INPUT -#ifdef __cplusplus - static int yyinput (yyscan_t yyscanner) -#else - static int input (yyscan_t yyscanner) -#endif - -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - int c; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - *yyg->yy_c_buf_p = yyg->yy_hold_char; - - if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) - /* This was really a NUL. */ - *yyg->yy_c_buf_p = '\0'; - - else - { /* need more input */ - int offset = yyg->yy_c_buf_p - yyg->yytext_ptr; - ++yyg->yy_c_buf_p; - - switch ( yy_get_next_buffer( yyscanner ) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - yyrestart(yyin ,yyscanner); - - /*FALLTHROUGH*/ - - case EOB_ACT_END_OF_FILE: - { - if ( yywrap(yyscanner ) ) - return 0; - - if ( ! yyg->yy_did_buffer_switch_on_eof ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(yyscanner); -#else - return input(yyscanner); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - yyg->yy_c_buf_p = yyg->yytext_ptr + offset; - break; - } - } - } - - c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ - *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ - yyg->yy_hold_char = *++yyg->yy_c_buf_p; - -/* %% [19.0] update BOL and yylineno */ - - return c; -} -/* %if-c-only */ -#endif /* ifndef YY_NO_INPUT */ -/* %endif */ - -/** Immediately switch to a different input stream. - * @param input_file A readable stream. - * @param yyscanner The scanner object. - * @note This function does not reset the start condition to @c INITIAL . - */ -/* %if-c-only */ - void yyrestart (FILE * input_file , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! YY_CURRENT_BUFFER ){ - yyensure_buffer_stack (yyscanner); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); - } - - yy_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner); - yy_load_buffer_state(yyscanner ); -} - -/* %if-c++-only */ -/* %endif */ - -/** Switch to a different input buffer. - * @param new_buffer The new input buffer. - * @param yyscanner The scanner object. - */ -/* %if-c-only */ - void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* TODO. We should be able to replace this entire function body - * with - * yypop_buffer_state(); - * yypush_buffer_state(new_buffer); - */ - yyensure_buffer_stack (yyscanner); - if ( YY_CURRENT_BUFFER == new_buffer ) - return; - - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - YY_CURRENT_BUFFER_LVALUE = new_buffer; - yy_load_buffer_state(yyscanner ); - - /* We don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - yyg->yy_did_buffer_switch_on_eof = 1; -} - -/* %if-c-only */ -static void yy_load_buffer_state (yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; -/* %if-c-only */ - yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; -/* %endif */ -/* %if-c++-only */ -/* %endif */ - yyg->yy_hold_char = *yyg->yy_c_buf_p; -} - -/** Allocate and initialize an input buffer state. - * @param file A readable stream. - * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. - * @param yyscanner The scanner object. - * @return the allocated buffer state. - */ -/* %if-c-only */ - YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_buf_size = (yy_size_t)size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ,yyscanner ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - yy_init_buffer(b,file ,yyscanner); - - return b; -} - -/* %if-c++-only */ -/* %endif */ - -/** Destroy the buffer. - * @param b a buffer created with yy_create_buffer() - * @param yyscanner The scanner object. - */ -/* %if-c-only */ - void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if ( ! b ) - return; - - if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ - YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - yyfree((void *) b->yy_ch_buf ,yyscanner ); - - yyfree((void *) b ,yyscanner ); -} - -/* Initializes or reinitializes a buffer. - * This function is sometimes called more than once on the same buffer, - * such as during a yyrestart() or at EOF. - */ -/* %if-c-only */ - static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ - -{ - int oerrno = errno; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - yy_flush_buffer(b ,yyscanner); - -/* %if-c-only */ - b->yy_input_file = file; -/* %endif */ -/* %if-c++-only */ -/* %endif */ - b->yy_fill_buffer = 1; - - /* If b is the current buffer, then yy_init_buffer was _probably_ - * called from yyrestart() or through yy_get_next_buffer. - * In that case, we don't want to reset the lineno or column. - */ - if (b != YY_CURRENT_BUFFER){ - b->yy_bs_lineno = 1; - b->yy_bs_column = 0; - } - -/* %if-c-only */ - - b->yy_is_interactive = 0; - -/* %endif */ -/* %if-c++-only */ -/* %endif */ - errno = oerrno; -} - -/** Discard all buffered characters. On the next scan, YY_INPUT will be called. - * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. - * @param yyscanner The scanner object. - */ -/* %if-c-only */ - void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == YY_CURRENT_BUFFER ) - yy_load_buffer_state(yyscanner ); -} - -/* %if-c-or-c++ */ -/** Pushes the new state onto the stack. The new state becomes - * the current state. This function will allocate the stack - * if necessary. - * @param new_buffer The new state. - * @param yyscanner The scanner object. - */ -/* %if-c-only */ -void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (new_buffer == NULL) - return; - - yyensure_buffer_stack(yyscanner); - - /* This block is copied from yy_switch_to_buffer. */ - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *yyg->yy_c_buf_p = yyg->yy_hold_char; - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; - } - - /* Only push if top exists. Otherwise, replace top. */ - if (YY_CURRENT_BUFFER) - yyg->yy_buffer_stack_top++; - YY_CURRENT_BUFFER_LVALUE = new_buffer; - - /* copied from yy_switch_to_buffer. */ - yy_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; -} -/* %endif */ - -/* %if-c-or-c++ */ -/** Removes and deletes the top of the stack, if present. - * The next element becomes the new top. - * @param yyscanner The scanner object. - */ -/* %if-c-only */ -void yypop_buffer_state (yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - if (!YY_CURRENT_BUFFER) - return; - - yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner); - YY_CURRENT_BUFFER_LVALUE = NULL; - if (yyg->yy_buffer_stack_top > 0) - --yyg->yy_buffer_stack_top; - - if (YY_CURRENT_BUFFER) { - yy_load_buffer_state(yyscanner ); - yyg->yy_did_buffer_switch_on_eof = 1; - } -} -/* %endif */ - -/* %if-c-or-c++ */ -/* Allocates the stack if it does not exist. - * Guarantees space for at least one push. - */ -/* %if-c-only */ -static void yyensure_buffer_stack (yyscan_t yyscanner) -/* %endif */ -/* %if-c++-only */ -/* %endif */ -{ - int num_to_alloc; - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (!yyg->yy_buffer_stack) { - - /* First allocation is just for 2 elements, since we don't know if this - * scanner will even need a stack. We use 2 instead of 1 to avoid an - * immediate realloc on the next call. - */ - num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ - yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc - (num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); - - memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - - yyg->yy_buffer_stack_max = num_to_alloc; - yyg->yy_buffer_stack_top = 0; - return; - } - - if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ - - /* Increase the buffer to prepare for a possible push. */ - yy_size_t grow_size = 8 /* arbitrary grow size */; - - num_to_alloc = yyg->yy_buffer_stack_max + grow_size; - yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc - (yyg->yy_buffer_stack, - num_to_alloc * sizeof(struct yy_buffer_state*) - , yyscanner); - if ( ! yyg->yy_buffer_stack ) - YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); - - /* zero only the new slots.*/ - memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); - yyg->yy_buffer_stack_max = num_to_alloc; - } -} -/* %endif */ - -/* %if-c-only */ -/** Setup the input buffer state to scan directly from a user-specified character buffer. - * @param base the character buffer - * @param size the size in bytes of the character buffer - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - - if ( size < 2 || - base[size-2] != YY_END_OF_BUFFER_CHAR || - base[size-1] != YY_END_OF_BUFFER_CHAR ) - /* They forgot to leave room for the EOB's. */ - return NULL; - - b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); - - b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ - b->yy_buf_pos = b->yy_ch_buf = base; - b->yy_is_our_buffer = 0; - b->yy_input_file = NULL; - b->yy_n_chars = b->yy_buf_size; - b->yy_is_interactive = 0; - b->yy_at_bol = 1; - b->yy_fill_buffer = 0; - b->yy_buffer_status = YY_BUFFER_NEW; - - yy_switch_to_buffer(b ,yyscanner ); - - return b; -} -/* %endif */ - -/* %if-c-only */ -/** Setup the input buffer state to scan a string. The next call to yylex() will - * scan from a @e copy of @a str. - * @param yystr a NUL-terminated string to scan - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - * @note If you want to scan bytes that may contain NUL values, then use - * yy_scan_bytes() instead. - */ -YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) -{ - - return yy_scan_bytes(yystr,(int) strlen(yystr) ,yyscanner); -} -/* %endif */ - -/* %if-c-only */ -/** Setup the input buffer state to scan the given bytes. The next call to yylex() will - * scan from a @e copy of @a bytes. - * @param yybytes the byte buffer to scan - * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. - * @param yyscanner The scanner object. - * @return the newly allocated buffer state object. - */ -YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner) -{ - YY_BUFFER_STATE b; - char *buf; - yy_size_t n; - yy_size_t i; - - /* Get memory for full buffer, including space for trailing EOB's. */ - n = (yy_size_t) _yybytes_len + 2; - buf = (char *) yyalloc(n ,yyscanner ); - if ( ! buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); - - for ( i = 0; i < _yybytes_len; ++i ) - buf[i] = yybytes[i]; - - buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; - - b = yy_scan_buffer(buf,n ,yyscanner); - if ( ! b ) - YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); - - /* It's okay to grow etc. this buffer, and we should throw it - * away when we're done. - */ - b->yy_is_our_buffer = 1; - - return b; -} -/* %endif */ - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -/* %if-c-only */ -static void yynoreturn yy_fatal_error (yyconst char* msg , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - (void)yyg; - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); -} -/* %endif */ -/* %if-c++-only */ -/* %endif */ - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - yytext[yyleng] = yyg->yy_hold_char; \ - yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ - yyg->yy_hold_char = *yyg->yy_c_buf_p; \ - *yyg->yy_c_buf_p = '\0'; \ - yyleng = yyless_macro_arg; \ - } \ - while ( 0 ) - -/* Accessor methods (get/set functions) to struct members. */ - -/* %if-c-only */ -/* %if-reentrant */ - -/** Get the user-defined data for this scanner. - * @param yyscanner The scanner object. - */ -YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyextra; -} - -/* %endif */ - -/** Get the current line number. - * @param yyscanner The scanner object. - */ -int yyget_lineno (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yylineno; -} - -/** Get the current column number. - * @param yyscanner The scanner object. - */ -int yyget_column (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - if (! YY_CURRENT_BUFFER) - return 0; - - return yycolumn; -} - -/** Get the input stream. - * @param yyscanner The scanner object. - */ -FILE *yyget_in (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyin; -} - -/** Get the output stream. - * @param yyscanner The scanner object. - */ -FILE *yyget_out (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyout; -} - -/** Get the length of the current token. - * @param yyscanner The scanner object. - */ -int yyget_leng (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yyleng; -} - -/** Get the current token. - * @param yyscanner The scanner object. - */ - -char *yyget_text (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yytext; -} - -/* %if-reentrant */ - -/** Set the user-defined data. This data is never touched by the scanner. - * @param user_defined The data to be associated with this scanner. - * @param yyscanner The scanner object. - */ -void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyextra = user_defined ; -} - -/* %endif */ - -/** Set the current line number. - * @param _line_number line number - * @param yyscanner The scanner object. - */ -void yyset_lineno (int _line_number , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* lineno is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); - - yylineno = _line_number; -} - -/** Set the current column. - * @param _column_no column number - * @param yyscanner The scanner object. - */ -void yyset_column (int _column_no , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* column is only valid if an input buffer exists. */ - if (! YY_CURRENT_BUFFER ) - YY_FATAL_ERROR( "yyset_column called with no buffer" ); - - yycolumn = _column_no; -} - -/** Set the input stream. This does not discard the current - * input buffer. - * @param _in_str A readable stream. - * @param yyscanner The scanner object. - * @see yy_switch_to_buffer - */ -void yyset_in (FILE * _in_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyin = _in_str ; -} - -void yyset_out (FILE * _out_str , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yyout = _out_str ; -} - -int yyget_debug (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yy_flex_debug; -} - -void yyset_debug (int _bdebug , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yy_flex_debug = _bdebug ; -} - -/* %endif */ - -/* %if-reentrant */ -/* Accessor methods for yylval and yylloc */ - -/* %if-bison-bridge */ - -YYSTYPE * yyget_lval (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - return yylval; -} - -void yyset_lval (YYSTYPE * yylval_param , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - yylval = yylval_param; -} - -/* %endif */ - -/* User-visible API */ - -/* yylex_init is special because it creates the scanner itself, so it is - * the ONLY reentrant function that doesn't take the scanner as the last argument. - * That's why we explicitly handle the declaration, instead of using our macros. - */ - -int yylex_init(yyscan_t* ptr_yy_globals) - -{ - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - return yy_init_globals ( *ptr_yy_globals ); -} - -/* yylex_init_extra has the same functionality as yylex_init, but follows the - * convention of taking the scanner as the last argument. Note however, that - * this is a *pointer* to a scanner, as it will be allocated by this call (and - * is the reason, too, why this function also must handle its own declaration). - * The user defined value in the first argument will be available to yyalloc in - * the yyextra field. - */ - -int yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals ) - -{ - struct yyguts_t dummy_yyguts; - - yyset_extra (yy_user_defined, &dummy_yyguts); - - if (ptr_yy_globals == NULL){ - errno = EINVAL; - return 1; - } - - *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); - - if (*ptr_yy_globals == NULL){ - errno = ENOMEM; - return 1; - } - - /* By setting to 0xAA, we expose bugs in - yy_init_globals. Leave at 0x00 for releases. */ - memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); - - yyset_extra (yy_user_defined, *ptr_yy_globals); - - return yy_init_globals ( *ptr_yy_globals ); -} - -/* %endif if-c-only */ - -/* %if-c-only */ -static int yy_init_globals (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - /* Initialization is the same as for the non-reentrant scanner. - * This function is called from yylex_destroy(), so don't allocate here. - */ - - yyg->yy_buffer_stack = NULL; - yyg->yy_buffer_stack_top = 0; - yyg->yy_buffer_stack_max = 0; - yyg->yy_c_buf_p = NULL; - yyg->yy_init = 0; - yyg->yy_start = 0; - - yyg->yy_start_stack_ptr = 0; - yyg->yy_start_stack_depth = 0; - yyg->yy_start_stack = NULL; - -/* Defined in main.c */ -#ifdef YY_STDINIT - yyin = stdin; - yyout = stdout; -#else - yyin = NULL; - yyout = NULL; -#endif - - /* For future reference: Set errno on error, since we are called by - * yylex_init() - */ - return 0; -} -/* %endif */ - -/* %if-c-only SNIP! this currently causes conflicts with the c++ scanner */ -/* yylex_destroy is for both reentrant and non-reentrant scanners. */ -int yylex_destroy (yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - - /* Pop the buffer stack, destroying each element. */ - while(YY_CURRENT_BUFFER){ - yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner ); - YY_CURRENT_BUFFER_LVALUE = NULL; - yypop_buffer_state(yyscanner); - } - - /* Destroy the stack itself. */ - yyfree(yyg->yy_buffer_stack ,yyscanner); - yyg->yy_buffer_stack = NULL; - - /* Destroy the start condition stack. */ - yyfree(yyg->yy_start_stack ,yyscanner ); - yyg->yy_start_stack = NULL; - - /* Reset the globals. This is important in a non-reentrant scanner so the next time - * yylex() is called, initialization will occur. */ - yy_init_globals( yyscanner); - -/* %if-reentrant */ - /* Destroy the main struct (reentrant only). */ - yyfree ( yyscanner , yyscanner ); - yyscanner = NULL; -/* %endif */ - return 0; -} -/* %endif */ - -/* - * Internal utility routines. - */ - -#ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner) -{ - struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; - (void)yyg; - - int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; -} -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner) -{ - int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; -} -#endif - -/* %if-tables-serialization definitions */ -/* %define-yytables The name for this specific scanner's tables. */ -#define YYTABLES_NAME "yytables" -/* %endif */ - -/* %ok-for-header */ - -#line 1091 "htmllex.l" - - - -/* initialize the scanner */ -int htmllexInit (void** scanner, UserData* data) { - int res; - res = yylex_init(scanner); - if (res) { - return res; - } - yyset_extra(data,*scanner); - return 0; -} - -/* set debug level; a level > 0 enables debugging */ -int htmllexDebug (void** scanner, int debug) { - int old = yyget_debug(*scanner); - yyset_debug(debug,*scanner); - return old; -} - -/* prepare scanner for calls to yylex() */ -int htmllexStart (void* scanner, UserData* data, const char* s, int slen) { - /* append s to data buffer and scan those bytes. - As Flex does not distinguish between NUL and EOF characters, - replace NUL with ' '. */ - size_t len = strlen(data->buf); - int i; - RESIZE_BUF(data->buf, len + slen + 1); - for (i=0; i < slen; i++) { - data->buf[len+i] = (s[i]=='\0' ? ' ' : s[i]); - } - data->buf[len+slen] = '\0'; - if (yyget_debug(scanner)) { - fprintf(stderr, "SCANBUF %d `%s'\n", data->bufpos, data->buf); - } - if (len > data->bufpos) { - int rewind = len - data->bufpos; - if (yyget_debug(scanner)) { - fprintf(stderr, "REWIND %d\n", rewind); - } - slen += rewind; - len -= rewind; - } - /* reset userdata */ - data->bufpos = len; - data->exc_type = NULL; - data->exc_val = NULL; - data->exc_tb = NULL; - if (yyget_debug(scanner)) { - fprintf(stderr, "SCANNING `%s'\n", data->buf + len); - } - data->lexbuf = yy_scan_bytes(data->buf + len,slen,scanner); - return 0; -} - -/* delete scanned buffer data */ -int htmllexStop (void* scanner, UserData* data) { - yy_delete_buffer(data->lexbuf,scanner); - if (data->nextpos > 0) { - size_t len = strlen(data->buf); - int i, j; - for (i=data->nextpos, j=0; ibuf[j] = data->buf[i]; - } - data->buf[j] = '\0'; - /* Can return T_ERROR, which is guaranteed to be non-zero. */ - RESIZE_BUF(data->buf, len-data->nextpos + 1); - data->bufpos -= data->nextpos; - data->nextpos = 0; - } - return 0; -} - -/* destroy scanner when not needed any more */ -int htmllexDestroy (void* scanner) { - return yylex_destroy(scanner); -} - diff --git a/linkcheck/HtmlParser/htmllex.l b/linkcheck/HtmlParser/htmllex.l deleted file mode 100644 index 1676a60d..00000000 --- a/linkcheck/HtmlParser/htmllex.l +++ /dev/null @@ -1,1167 +0,0 @@ -/* Copyright (C) 2000-2014 Bastian Kleineidam - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -/* Lexical analyzer for finding recognizable tokens in (probably - * bad formatted) HTML streams. - * Unrecognizable character data is passed on as a TEXT token. - * - * Note that you cannot rely on the "longest match" preference of - * flex here since input data might be truncated at any given position. - * This explains some of the more complicated lookahead rules below. - */ - -%{ -#include "htmlsax.h" -#include "s_util.h" -#include -#include - - -/* token type */ -#define YYSTYPE PyObject* -/* type of user-specified data */ -#define YY_EXTRA_TYPE UserData* - -/* Returning T_ERROR is the standard error-out reaction for this lexer. */ -/* Return T_ERROR if argument is NULL. */ -#define CHECK_NULL(a) \ - if ((a) == NULL) return T_ERROR - -/* Return T_ERROR if argument is -1 (minus one). */ -#define CHECK_MINUSONE(a) \ - if ((a) == -1) return T_ERROR - -/* resize buffer b, returning T_ERROR on error */ -#define RESIZE_BUF(b, n) \ - CHECK_NULL(PyMem_Resize((b), char, (n))); \ - (b)[(n)-1] = '\0' - -/* make python unicode string from tmp_buf and assign it to a */ -#define PYSTRING_TMP_UNICODE(a) { \ - PyObject* pencoding; \ - char* encoding; \ - CHECK_NULL(pencoding = PyObject_GetAttrString(yyextra->parser, "encoding")); \ - encoding = PyBytes_AsString(pencoding); \ - if (encoding==NULL) { Py_DECREF(pencoding); return T_ERROR; } \ - (a) = PyUnicode_Decode(yyextra->tmp_buf, \ - (Py_ssize_t)strlen(yyextra->tmp_buf), \ - encoding, "ignore"); \ - Py_DECREF(pencoding); \ - CHECK_NULL(a); \ -} - -#define PYSTRING_TMP_ASCII(a) \ - CHECK_NULL((a) = PyUnicode_Decode(yyextra->tmp_buf, \ - (Py_ssize_t)strlen(yyextra->tmp_buf), "ascii", "ignore")) - -/* set return value from tmp_buf */ -#define SETLVAL_UNICODE { \ - PyObject* s; \ - PYSTRING_TMP_UNICODE(s); \ - RESIZE_BUF(yyextra->tmp_buf, 1); \ - *yylval = s; \ - } - -/* set return value from tmp_buf */ -#define SETLVAL_ASCII { \ - PyObject* s; \ - PYSTRING_TMP_ASCII(s); \ - RESIZE_BUF(yyextra->tmp_buf, 1); \ - *yylval = s; \ - } - -/* append yytext to tmp_buf */ -#define APPEND_TO_TMP(n) { \ - size_t len = strlen(yyextra->tmp_buf) + (n) + 1; \ - RESIZE_BUF(yyextra->tmp_buf, len); \ - strlcat(yyextra->tmp_buf, yytext, len); \ - } - -/* lowercase the tmp_buf */ -#define LOWER_TMP { \ - char* p = yyextra->tmp_buf; \ - while (*p) { *p = tolower(*p); p++; } \ - } - -/* check for JavaScript or CSS tags; must be before SET_ATTR_LVAL */ -#define SCRIPT_CHECK { \ - PyObject* tagname; \ - CHECK_NULL(tagname = PyUnicode_AsEncodedString(yyextra->tmp_tag, "ascii", "ignore")); \ - if (strcmp("script", PyBytes_AsString(tagname))==0) \ - BEGIN(S_SCRIPT); \ - else if (strcmp("style", PyBytes_AsString(tagname))==0) \ - BEGIN(S_STYLE); \ - else \ - BEGIN(INITIAL); \ - Py_DECREF(tagname); \ - } - -/* set return value from tag with attributes */ -#define SET_ATTR_LVAL \ - if (yyextra->tmp_tag==NULL || yyextra->tmp_attrs==NULL) { \ - PyErr_SetString(PyExc_TypeError, "tmp_tag or tmp_attrs is NULL"); \ - return T_ERROR; \ - } \ - CHECK_NULL(*yylval = Py_BuildValue("(OO)", yyextra->tmp_tag, yyextra->tmp_attrs)); \ - yyextra->tmp_tag = yyextra->tmp_attrs = NULL - -/* store collected name as attribute in dictionary - * tmp_attrname and tmp_attrval must be NULL - */ -#define FLUSH_ATTRS \ - if (strlen(yyextra->tmp_buf) > 0) { \ - PYSTRING_TMP_UNICODE(yyextra->tmp_attrname); \ - RESIZE_BUF(yyextra->tmp_buf, 1); \ - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, Py_None)); \ - Py_CLEAR(yyextra->tmp_attrname); \ - } - -/* update the buffer and scanner positions */ -#define UPDATE_BUFPOS yyextra->bufpos += yyleng; yyextra->pos += yyleng - -/* update the column position; use this *only* in rules that cannot match - the newline char '\n'! - */ -#define UPDATE_COLUMN UPDATE_BUFPOS; yyextra->column += yyleng - -/* update the line and column position; use this in rules that can match the - newline char '\n'. - */ -#define UPDATE_LINE UPDATE_BUFPOS; { \ - int i; \ - for (i=0; ilineno); \ - yyextra->column = 1; \ - } \ - else ++(yyextra->column); \ - } \ -} - -/* return a token, setting the nextpos value back to the bufpos */ -#define RETURN(tok) yyextra->nextpos = yyextra->bufpos; return tok - -/* use Pythons memory management */ -void* yyalloc (yy_size_t bytes, void* yyscanner) { - return PyMem_Malloc((size_t)bytes); -} -void* yyrealloc (void* ptr, yy_size_t bytes, void* yyscanner) { - return PyMem_Realloc(ptr, (size_t)bytes); -} -void yyfree (void* ptr, void* yyscanner) { - PyMem_Free(ptr); -} - -/* include bison-generated token definitions */ -#include "htmlparse.h" -%} - -/* use our own memory management functions (see above) */ -%option noyyalloc noyyrealloc noyyfree -/* handle 8bit characters */ -%option 8bit -/* define output file */ -%option outfile="htmllex.c" -/* optimize for speed.. */ -%option align full -/* ..but still construct equivalence classes */ -%option ecs -/* add debugging ability */ -%option debug -/* don't use unneeded functions */ -%option nounput nomain noyywrap noyymore noreject -/* make it reentrant and bison compatible */ -%option bison-bridge reentrant never-interactive -/* print warnings on compiling */ -%option warn - -/* scanner states */ -%x S_PI -%x S_COMMENT -%x S_COMMENT1 -%x S_COMMENT2 -%x S_DOCTYPE -%x S_CDATA -%x S_TAGSTART -%x S_TAGEND -%x S_TAGEND2 -%x S_SCRIPT -%x S_SCRIPT_APOS -%x S_SCRIPT_APOS_ESC -%x S_SCRIPT_STRING -%x S_SCRIPT_STRING_ESC -%x S_SCRIPT_COMMENT -%x S_SCRIPT_MCOMMENT -%x S_STYLE -%x S_ATTR1 -%x S_ATTR2 -%x S_ATTR3 -%x S_ATTR4 -%x S_ATTR5 -%x S_APOSSTRING -%x S_APOSSTRING_ESC -%x S_STRING -%x S_STRING_ESC - -/* regular expression definitions used below */ -RX_WHITE_SPACE [\n\r\ \t\b\012] -RX_EQUAL = -RX_NAME [a-zA-Z]([-a-zA-Z0-9_])* -RX_DATA [-a-zA-Z0-9_:]+ - -%% - - /*********************** EOF ************************/ -<> { - /* hit end-of-file, wait for more data */ - return T_WAIT; -} - - /*********************** COMMENT ************************/ - { - UPDATE_COLUMN; - SETLVAL_UNICODE; - RETURN(T_COMMENT); -} - - /* Note: also accept " { - UPDATE_COLUMN; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_COMMENT); -} - ---[ ]+> { - UPDATE_COLUMN; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_COMMENT); -} - --> { - UPDATE_COLUMN; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_COMMENT); -} - --/-- { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - --/[^-] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - ---/[^- >] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - ---[ ]+/[^ >] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - -[^-]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -.|\n { - return T_WAIT; -} - - /* Note: www.nba.com had some comment */ -> { - UPDATE_COLUMN; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_COMMENT); -} - -[^>] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - - - /*********************** DOCTYPE ************************/ -> { - UPDATE_COLUMN; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_DOCTYPE); -} - -[^>]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - - /*********************** CDATA ************************/ -\]\]> { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng-3); - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_CDATA); -} - -[^\]]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\][^\]] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\]\][^>] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -.|\n { - return T_WAIT; -} - - /*********************** PI ************************/ -<\? { - UPDATE_COLUMN; - BEGIN(S_PI); -} - -[^?>]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\?+> { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng-2); - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_PI); -} - -\?+[^?>]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -> { - UPDATE_COLUMN; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_PI); -} - -.|\n { - return T_WAIT; -} - - - /*********************** TAGSTART ************************/ -<{RX_WHITE_SPACE}*/[A-Za-z0-9] { - UPDATE_LINE; - CHECK_NULL(yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL)); - BEGIN(S_TAGSTART); -} - -[^ \t\r\n\b\012/<>]+ { - /* actually accept a lot of tag chars, which may be illegal, - but we dont care, it's the browsers job */ - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - -{RX_WHITE_SPACE}+ { - UPDATE_LINE; - LOWER_TMP; - PYSTRING_TMP_ASCII(yyextra->tmp_tag); - RESIZE_BUF(yyextra->tmp_buf, 1); - BEGIN(S_ATTR1); -} - -\/> { - UPDATE_COLUMN; - BEGIN(INITIAL); - if (!strlen(yyextra->tmp_buf)) { - /* the tag name was empty, assume a stray "" */ - RESIZE_BUF(yyextra->tmp_buf, 4); - strcpy(yyextra->tmp_buf, ""); - yyextra->tmp_attrs = NULL; - SETLVAL_UNICODE; - RETURN(T_TEXT); - } - LOWER_TMP; - PYSTRING_TMP_ASCII(yyextra->tmp_tag); - RESIZE_BUF(yyextra->tmp_buf, 1); - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START_END); -} - -> { - UPDATE_COLUMN; - BEGIN(INITIAL); - if (!strlen(yyextra->tmp_buf)) { - /* the tag name was empty, assume a stray "<>" */ - RESIZE_BUF(yyextra->tmp_buf, 3); - strcpy(yyextra->tmp_buf, "<>"); - yyextra->tmp_attrs = NULL; - SETLVAL_UNICODE; - RETURN(T_TEXT); - } - LOWER_TMP; - PYSTRING_TMP_ASCII(yyextra->tmp_tag); - RESIZE_BUF(yyextra->tmp_buf, 1); - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - -<\/ { - /* Abort parsing this start tag and begin an endtag. Assume - the last "<" was a stray unquoted character. */ - char* tmp = NULL; - UPDATE_COLUMN; - BEGIN(S_TAGEND); - /* Add missing "<" at beginning of buffer. */ - RESIZE_BUF(tmp, strlen(yyextra->tmp_buf)+2); - tmp[0] = '<'; - tmp[1] = '\0'; - strlcat(tmp, yyextra->tmp_buf, sizeof(tmp)); - RESIZE_BUF(yyextra->tmp_buf, strlen(tmp)+1); - yyextra->tmp_buf[0] = '\0'; - strlcat(yyextra->tmp_buf, tmp, sizeof(yyextra->tmp_buf)); - free(tmp); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - -tmp_buf)+2); - tmp[0] = '<'; - tmp[1] = '\0'; - strlcat(tmp, yyextra->tmp_buf, sizeof(tmp)); - RESIZE_BUF(yyextra->tmp_buf, strlen(tmp)+1); - yyextra->tmp_buf[0] = '\0'; - strlcat(yyextra->tmp_buf, tmp, sizeof(yyextra->tmp_buf)); - free(tmp); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - -.|\n { - return T_WAIT; -} - - /*********************** SCRIPT ************************/ -<\/{RX_WHITE_SPACE}*[Ss][Cc][Rr][Ii][Pp][Tt]{RX_WHITE_SPACE}*> { - UPDATE_LINE; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_SCRIPT); -} - -[^/'"<]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\' { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_APOS); -} - -\" { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_STRING); -} - -\/\/ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_COMMENT); -} - -\/\* { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_MCOMMENT); -} - -\/[^*/] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - - /* ensure any prefix of is matched, but not itself */ -<\/{RX_WHITE_SPACE}*/[^Ss\n\r\ \t\b\012] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss]/[^Cc] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Cc]/[^Rr] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Cc][Rr]/[^Ii] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Cc][Rr][Ii]/[^Pp] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Cc][Rr][Ii][Pp]/[^Tt] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Cc][Rr][Ii][Pp][Tt]{RX_WHITE_SPACE}*/[^>\n\r\ \t\b\012] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -.|\n { - return T_WAIT; -} - -\\ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_APOS_ESC); -} - -[^\\']+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\' { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - -.|\n { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_APOS); -} - -\\ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_STRING_ESC); -} - -[^\\"]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\" { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - -.|\n { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT_STRING); -} - -[^\r\n<]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -[\r\n] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - -.|\n { - return T_WAIT; -} - -[^*]+|\* { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -\*\/ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_SCRIPT); -} - - /*********************** STYLE ************************/ -<\/{RX_WHITE_SPACE}*[Ss][Tt][Yy][Ll][Ee]{RX_WHITE_SPACE}*> { - UPDATE_LINE; - SETLVAL_UNICODE; - BEGIN(INITIAL); - RETURN(T_STYLE); -} - -[^<]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - - /* this is so shitty */ -<\/{RX_WHITE_SPACE}*/[^Ss\n\r\ \t\b\012] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss]/[^Tt] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Tt]/[^Yy] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Tt][Yy]/[^Ll] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Tt][Yy][Ll]/[^Ee] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -<\/{RX_WHITE_SPACE}*[Ss][Tt][Yy][Ll][Ee]{RX_WHITE_SPACE}*/[^>\n\r\ \t\b\012] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -.|\n { - return T_WAIT; -} - - /*********************** ATTRS ************************/ -{RX_NAME} { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_ATTR2); -} - -\/> { - UPDATE_COLUMN; - FLUSH_ATTRS; - BEGIN(INITIAL); - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START_END); -} - -\/[^>] { - UPDATE_LINE; -} - -\/ { - return T_WAIT; -} - -> { - UPDATE_COLUMN; - FLUSH_ATTRS; - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - -{RX_DATA} { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - -\\\r?\n { - /* Line continuations */ - UPDATE_LINE; -} - -\\\r?[^\n] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - -\\\r? { - return T_WAIT; -} - -{RX_WHITE_SPACE}+ { - UPDATE_LINE; - BEGIN(S_ATTR3); -} - -{RX_WHITE_SPACE}*{RX_EQUAL}{RX_WHITE_SPACE}* { - UPDATE_LINE; - LOWER_TMP; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrname); - RESIZE_BUF(yyextra->tmp_buf, 1); - BEGIN(S_ATTR4); -} - -{RX_NAME} { - UPDATE_COLUMN; - LOWER_TMP; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrname); - RESIZE_BUF(yyextra->tmp_buf, 1); - if (yyextra->tmp_attrval != NULL) return T_ERROR; - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, Py_None)); - Py_CLEAR(yyextra->tmp_attrname); - APPEND_TO_TMP(yyleng); - BEGIN(S_ATTR2); -} - -.|\n { - /* this also skips whitespace! */ - UPDATE_LINE; -} - -\\\" { - /* backslash escapes seen at freecode.com */ - UPDATE_COLUMN; - BEGIN(S_STRING); -} - -\" { - UPDATE_COLUMN; - BEGIN(S_STRING); -} - -\' { - UPDATE_COLUMN; - BEGIN(S_APOSSTRING); -} - -[^\012 \t\b\r\n>\'\"]+ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_ATTR5); -} - -> { - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - -{RX_WHITE_SPACE}+ { - UPDATE_LINE; -} - -[^\012 \t\b\r\n>\"]+ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - -> { - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - SCRIPT_CHECK; - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START); -} - -\/> { - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(INITIAL); - SET_ATTR_LVAL; - RETURN(T_ELEMENT_START_END); -} - -[\"] { - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - -{RX_WHITE_SPACE}+ { - UPDATE_LINE; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - -\\/\r?[^\n] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_APOSSTRING_ESC); -} - -\\ { - return T_WAIT; -} - -\' { - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - -[^\\']+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - - -.|\n { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_APOSSTRING); -} - -\\?\" { - UPDATE_COLUMN; - PYSTRING_TMP_UNICODE(yyextra->tmp_attrval); - RESIZE_BUF(yyextra->tmp_buf, 1); - CHECK_NULL(yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, - "O", yyextra->tmp_attrval)); - CHECK_MINUSONE(PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, - yyextra->tmp_attrval)); - Py_CLEAR(yyextra->tmp_attrname); - Py_CLEAR(yyextra->tmp_attrval); - BEGIN(S_ATTR1); -} - -\\/\r?[^\n] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - BEGIN(S_STRING_ESC); -} - -\\\r?\n { - UPDATE_LINE; -} - -\\ { - return T_WAIT; -} - -[^\\"]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); -} - -.|\n { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - BEGIN(S_STRING); -} - - - /*********************** TAGEND ************************/ -<{RX_WHITE_SPACE}*\/{RX_WHITE_SPACE}*/[A-Za-z] { - UPDATE_LINE; - BEGIN(S_TAGEND); -} - -[^<>\r\n \t\b\012]+ { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); -} - -{RX_WHITE_SPACE}*> { - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(INITIAL); - RETURN(T_ELEMENT_END); -} - -<{RX_WHITE_SPACE}*\/{RX_WHITE_SPACE}*/[A-Za-z] { - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(S_TAGEND); - RETURN(T_ELEMENT_END); -} - -<{RX_WHITE_SPACE}*/[A-Za-z] { - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - CHECK_NULL(yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL)); - BEGIN(S_TAGSTART); - RETURN(T_ELEMENT_END); -} - -{RX_WHITE_SPACE}+ { - UPDATE_LINE; - /* ignore any trailing garbage of this end tag */ - BEGIN(S_TAGEND2); -} - -.|\n { - return T_WAIT; -} - -> { - UPDATE_COLUMN; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(INITIAL); - RETURN(T_ELEMENT_END); -} - -[^<>]+ { - UPDATE_LINE; -} - -<{RX_WHITE_SPACE}*\/{RX_WHITE_SPACE}*/[A-Za-z] { - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - BEGIN(S_TAGEND); - RETURN(T_ELEMENT_END); -} - -<{RX_WHITE_SPACE}*/[A-Za-z] { - UPDATE_LINE; - LOWER_TMP; - SETLVAL_ASCII; - CHECK_NULL(yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL)); - BEGIN(S_TAGSTART); - RETURN(T_ELEMENT_END); -} - -.|\n { - return T_WAIT; -} - /*********************** TEXT ************************/ -[^<]+ { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - -<[^\012 \t\b\r\nA-Za-z!?/] { - UPDATE_COLUMN; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} - -<{RX_WHITE_SPACE}+[^A-Za-z/] { - UPDATE_LINE; - APPEND_TO_TMP(yyleng); - SETLVAL_UNICODE; - RETURN(T_TEXT); -} -.|\n { - return T_WAIT; -} - -%% - -/* initialize the scanner */ -int htmllexInit (void** scanner, UserData* data) { - int res; - res = yylex_init(scanner); - if (res) { - return res; - } - yyset_extra(data, *scanner); - return 0; -} - -/* set debug level; a level > 0 enables debugging */ -int htmllexDebug (void** scanner, int debug) { - int old = yyget_debug(*scanner); - yyset_debug(debug, *scanner); - return old; -} - -/* prepare scanner for calls to yylex() */ -int htmllexStart (void* scanner, UserData* data, const char* s, int slen) { - /* append s to data buffer and scan those bytes. - As Flex does not distinguish between NUL and EOF characters, - replace NUL with ' '. */ - size_t len = strlen(data->buf); - int i; - RESIZE_BUF(data->buf, len + slen + 1); - for (i=0; i < slen; i++) { - data->buf[len+i] = (s[i]=='\0' ? ' ' : s[i]); - } - data->buf[len+slen] = '\0'; - if (yyget_debug(scanner)) { - fprintf(stderr, "SCANBUF %d `%s'\n", data->bufpos, data->buf); - } - if (len > data->bufpos) { - int rewind = len - data->bufpos; - if (yyget_debug(scanner)) { - fprintf(stderr, "REWIND %d\n", rewind); - } - slen += rewind; - len -= rewind; - } - /* reset userdata */ - data->bufpos = len; - data->exc_type = NULL; - data->exc_val = NULL; - data->exc_tb = NULL; - if (yyget_debug(scanner)) { - fprintf(stderr, "SCANNING `%s'\n", data->buf + len); - } - data->lexbuf = yy_scan_bytes(data->buf + len, slen, scanner); - return 0; -} - -/* delete scanned buffer data */ -int htmllexStop (void* scanner, UserData* data) { - yy_delete_buffer(data->lexbuf, scanner); - if (data->nextpos > 0) { - size_t len = strlen(data->buf); - int i, j; - for (i=data->nextpos, j=0; ibuf[j] = data->buf[i]; - } - data->buf[j] = '\0'; - /* Can return T_ERROR, which is guaranteed to be non-zero. */ - RESIZE_BUF(data->buf, len-data->nextpos + 1); - data->bufpos -= data->nextpos; - data->nextpos = 0; - } - return 0; -} - -/* destroy scanner when not needed any more */ -int htmllexDestroy (void* scanner) { - return yylex_destroy(scanner); -} diff --git a/linkcheck/HtmlParser/htmllib.py b/linkcheck/HtmlParser/htmllib.py index 054357f2..75c6a4ec 100644 --- a/linkcheck/HtmlParser/htmllib.py +++ b/linkcheck/HtmlParser/htmllib.py @@ -87,7 +87,6 @@ class HtmlPrettyPrinter (object): @type data: string @return: None """ - data = data.encode(self.encoding, "ignore") self.fd.write("" % data) def start_element (self, tag, attrs): @@ -102,7 +101,7 @@ class HtmlPrettyPrinter (object): """ self._start_element(tag, attrs, ">") - def start_end_element (self, tag, attrs): + def start_end_element (self, tag, attrs, element_text=None): """ Print HTML start-end element. @@ -126,14 +125,11 @@ class HtmlPrettyPrinter (object): @type end: string @return: None """ - tag = tag.encode(self.encoding, "ignore") self.fd.write("<%s" % tag.replace("/", "")) for key, val in attrs.items(): - key = key.encode(self.encoding, "ignore") if val is None: self.fd.write(" %s" % key) else: - val = val.encode(self.encoding, "ignore") self.fd.write(' %s="%s"' % (key, quote_attrval(val))) self.fd.write(end) @@ -145,7 +141,6 @@ class HtmlPrettyPrinter (object): @type tag: string @return: None """ - tag = tag.encode(self.encoding, "ignore") self.fd.write("" % tag) def doctype (self, data): @@ -156,7 +151,6 @@ class HtmlPrettyPrinter (object): @type data: string @return: None """ - data = data.encode(self.encoding, "ignore") self.fd.write("" % data) def pi (self, data): @@ -167,7 +161,6 @@ class HtmlPrettyPrinter (object): @type data: string @return: None """ - data = data.encode(self.encoding, "ignore") self.fd.write("" % data) def cdata (self, data): @@ -178,7 +171,6 @@ class HtmlPrettyPrinter (object): @type data: string @return: None """ - data = data.encode(self.encoding, "ignore") self.fd.write("" % data) def characters (self, data): @@ -189,7 +181,6 @@ class HtmlPrettyPrinter (object): @type data: string @return: None """ - data = data.encode(self.encoding, "ignore") self.fd.write(data) diff --git a/linkcheck/HtmlParser/htmlparse.c b/linkcheck/HtmlParser/htmlparse.c deleted file mode 100644 index c9386988..00000000 --- a/linkcheck/HtmlParser/htmlparse.c +++ /dev/null @@ -1,2495 +0,0 @@ -/* A Bison parser, made by GNU Bison 3.0.4. */ - -/* Bison implementation for Yacc-like parsers in C - - Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -/* C LALR(1) parser skeleton written by Richard Stallman, by - simplifying the original so-called "semantic" parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Bison version. */ -#define YYBISON_VERSION "3.0.4" - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 1 - -/* Push parsers. */ -#define YYPUSH 0 - -/* Pull parsers. */ -#define YYPULL 1 - - - - -/* Copy the first part of user declarations. */ -#line 1 "htmlparse.y" /* yacc.c:339 */ - -/* Copyright (C) 2000-2014 Bastian Kleineidam - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ -/* A SAX HTML parser. Includes Python module definition to make it - usable for Python programs. -*/ -#include "htmlsax.h" /* SAX interface (includes Python.h) */ -#include "structmember.h" /* Python include for object definition */ -#include -#include - -/* bison type definitions */ -#define YYSTYPE PyObject* -/* extern functions found in htmllex.l */ -extern int yylex(YYSTYPE* yylvalp, void* scanner); -extern int htmllexInit (void** scanner, UserData* data); -extern int htmllexDebug (void** scanner, int debug); -extern int htmllexStart (void* scanner, UserData* data, const char* s, int slen); -extern int htmllexStop (void* scanner, UserData* data); -extern int htmllexDestroy (void* scanner); -extern UserData* yyget_extra(void* scanner); -extern int yyget_lineno(void*); -#define YYERROR_VERBOSE 1 - -/* standard error reporting, indicating an internal error */ -static void yyerror (void *locp, char const *msg) { - fprintf(stderr, "htmlsax: internal parse error: %s\n", msg); -} - -/* Python 2/3 compatibility */ -#if PY_MAJOR_VERSION >= 3 - #define MOD_ERROR_VAL NULL - #define MOD_SUCCESS_VAL(val) val - #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) - #define MOD_DEF(ob, name, doc, methods) \ - static struct PyModuleDef moduledef = { \ - PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ - ob = PyModule_Create(&moduledef) - #define PyInt_FromLong PyLong_FromLong -#else - #define MOD_ERROR_VAL - #define MOD_SUCCESS_VAL(val) - #define MOD_INIT(name) void init##name(void) - #define MOD_DEF(ob, name, doc, methods) \ - ob = Py_InitModule3(name, methods, doc) -#endif - - -/* existing Python methods */ - -/* parser.resolve_entities */ -static PyObject* resolve_entities; -/* ListDict class, sorted dictionary */ -static PyObject* list_dict; -/* set_encoding helper function */ -static PyObject* set_encoding; -/* set_doctype helper function */ -static PyObject* set_doctype; -/* the unicode string u'meta' */ -static PyObject* u_meta; - -/* macros for easier scanner state manipulation */ - -/* clear buffer b, returning NULL on error */ -#define CLEAR_BUF(b) \ - PyMem_Resize(b, char, 1); \ - if (b == NULL) return NULL; \ - (b)[0] = '\0' - -/* clear buffer b, returning NULL and decref self on error */ -#define CLEAR_BUF_DECREF(self, b) \ - PyMem_Resize(b, char, 1); \ - if (b == NULL) { Py_DECREF(self); return NULL; } \ - (b)[0] = '\0' - -/* check an error condition and if true set error flag and goto given label */ -#define CHECK_ERROR(cond, label) \ - if (cond) { \ - error = 1; \ - goto label; \ - } - -/* generic Python callback macro */ -#define CALLBACK(ud, attr, format, arg, label) \ - if (PyObject_HasAttrString(ud->handler, attr) == 1) { \ - callback = PyObject_GetAttrString(ud->handler, attr); \ - CHECK_ERROR((callback == NULL), label); \ - result = PyObject_CallFunction(callback, format, arg); \ - CHECK_ERROR((result == NULL), label); \ - Py_CLEAR(callback); \ - Py_CLEAR(result); \ - } - -/* set old line and column */ -#define SET_OLD_LINECOL \ - ud->last_lineno = ud->lineno; \ - ud->last_column = ud->column - -/* parser type definition */ -typedef struct { - PyObject_HEAD - /* the handler object */ - PyObject* handler; - /* the charset encoding (PyBytesObject) */ - PyObject* encoding; - /* the document type (PyBytesObject) */ - PyObject* doctype; - UserData* userData; - void* scanner; -} parser_object; - -/* use Pythons memory management */ -#define YYMALLOC PyMem_Malloc -#define YYFREE PyMem_Free - -/* Test whether tag does not need an HTML end tag. - @ptag: ASCII encoded Python string in lowercase (!) - @parser: SAX parser object - @return: < 0 on error, > 0 if HTML end tag is needed, else 0 -*/ -static int html_end_tag (PyObject* ptag, PyObject* parser) { - PyObject* pdoctype = NULL; - char* doctype; - int error = 0; - int ret = 1; - pdoctype = PyObject_GetAttrString(parser, "doctype"); - CHECK_ERROR((pdoctype == NULL), finish_html_end_tag); - doctype = PyBytes_AsString(pdoctype); - CHECK_ERROR((doctype == NULL), finish_html_end_tag); - /* check for HTML (else it's presumably XHTML) */ - if (strcmp(doctype, "HTML") == 0) { - char* tag = PyBytes_AsString(ptag); - CHECK_ERROR((tag == NULL), finish_html_end_tag); - ret = strcmp(tag, "area")!=0 && - strcmp(tag, "base")!=0 && - strcmp(tag, "basefont")!=0 && - strcmp(tag, "br")!=0 && - strcmp(tag, "col")!=0 && - strcmp(tag, "frame")!=0 && - strcmp(tag, "hr")!=0 && - strcmp(tag, "img")!=0 && - strcmp(tag, "input")!=0 && - strcmp(tag, "isindex")!=0 && - strcmp(tag, "link")!=0 && - strcmp(tag, "meta")!=0 && - strcmp(tag, "param")!=0; - } -finish_html_end_tag: - Py_XDECREF(pdoctype); - if (error) { - return -1; - } - return ret; -} - - -#line 237 "htmlparse.c" /* yacc.c:339 */ - -# ifndef YY_NULLPTR -# if defined __cplusplus && 201103L <= __cplusplus -# define YY_NULLPTR nullptr -# else -# define YY_NULLPTR 0 -# endif -# endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -/* In a future release of Bison, this section will be replaced - by #include "htmlparse.h". */ -#ifndef YY_YY_HTMLPARSE_H_INCLUDED -# define YY_YY_HTMLPARSE_H_INCLUDED -/* Debug traces. */ -#ifndef YYDEBUG -# define YYDEBUG 1 -#endif -#if YYDEBUG -extern int yydebug; -#endif - -/* Token type. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - enum yytokentype - { - T_WAIT = 258, - T_ERROR = 259, - T_TEXT = 260, - T_ELEMENT_START = 261, - T_ELEMENT_START_END = 262, - T_ELEMENT_END = 263, - T_SCRIPT = 264, - T_STYLE = 265, - T_PI = 266, - T_COMMENT = 267, - T_CDATA = 268, - T_DOCTYPE = 269 - }; -#endif - -/* Value type. */ -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef int YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define YYSTYPE_IS_DECLARED 1 -#endif - - - -int yyparse (PyObject* scanner); - -#endif /* !YY_YY_HTMLPARSE_H_INCLUDED */ - -/* Copy the second part of user declarations. */ - -#line 302 "htmlparse.c" /* yacc.c:358 */ - -#ifdef short -# undef short -#endif - -#ifdef YYTYPE_UINT8 -typedef YYTYPE_UINT8 yytype_uint8; -#else -typedef unsigned char yytype_uint8; -#endif - -#ifdef YYTYPE_INT8 -typedef YYTYPE_INT8 yytype_int8; -#else -typedef signed char yytype_int8; -#endif - -#ifdef YYTYPE_UINT16 -typedef YYTYPE_UINT16 yytype_uint16; -#else -typedef unsigned short int yytype_uint16; -#endif - -#ifdef YYTYPE_INT16 -typedef YYTYPE_INT16 yytype_int16; -#else -typedef short int yytype_int16; -#endif - -#ifndef YYSIZE_T -# ifdef __SIZE_TYPE__ -# define YYSIZE_T __SIZE_TYPE__ -# elif defined size_t -# define YYSIZE_T size_t -# elif ! defined YYSIZE_T -# include /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# else -# define YYSIZE_T unsigned int -# endif -#endif - -#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) - -#ifndef YY_ -# if defined YYENABLE_NLS && YYENABLE_NLS -# if ENABLE_NLS -# include /* INFRINGES ON USER NAME SPACE */ -# define YY_(Msgid) dgettext ("bison-runtime", Msgid) -# endif -# endif -# ifndef YY_ -# define YY_(Msgid) Msgid -# endif -#endif - -#ifndef YY_ATTRIBUTE -# if (defined __GNUC__ \ - && (2 < __GNUC__ || (__GNUC__ == 2 && 96 <= __GNUC_MINOR__))) \ - || defined __SUNPRO_C && 0x5110 <= __SUNPRO_C -# define YY_ATTRIBUTE(Spec) __attribute__(Spec) -# else -# define YY_ATTRIBUTE(Spec) /* empty */ -# endif -#endif - -#ifndef YY_ATTRIBUTE_PURE -# define YY_ATTRIBUTE_PURE YY_ATTRIBUTE ((__pure__)) -#endif - -#ifndef YY_ATTRIBUTE_UNUSED -# define YY_ATTRIBUTE_UNUSED YY_ATTRIBUTE ((__unused__)) -#endif - -#if !defined _Noreturn \ - && (!defined __STDC_VERSION__ || __STDC_VERSION__ < 201112) -# if defined _MSC_VER && 1200 <= _MSC_VER -# define _Noreturn __declspec (noreturn) -# else -# define _Noreturn YY_ATTRIBUTE ((__noreturn__)) -# endif -#endif - -/* Suppress unused-variable warnings by "using" E. */ -#if ! defined lint || defined __GNUC__ -# define YYUSE(E) ((void) (E)) -#else -# define YYUSE(E) /* empty */ -#endif - -#if defined __GNUC__ && 407 <= __GNUC__ * 100 + __GNUC_MINOR__ -/* Suppress an incorrect diagnostic about yylval being uninitialized. */ -# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ - _Pragma ("GCC diagnostic push") \ - _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"")\ - _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") -# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ - _Pragma ("GCC diagnostic pop") -#else -# define YY_INITIAL_VALUE(Value) Value -#endif -#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN -# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN -# define YY_IGNORE_MAYBE_UNINITIALIZED_END -#endif -#ifndef YY_INITIAL_VALUE -# define YY_INITIAL_VALUE(Value) /* Nothing. */ -#endif - - -#if ! defined yyoverflow || YYERROR_VERBOSE - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# elif defined __BUILTIN_VA_ARG_INCR -# include /* INFRINGES ON USER NAME SPACE */ -# elif defined _AIX -# define YYSTACK_ALLOC __alloca -# elif defined _MSC_VER -# include /* INFRINGES ON USER NAME SPACE */ -# define alloca _alloca -# else -# define YYSTACK_ALLOC alloca -# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS -# include /* INFRINGES ON USER NAME SPACE */ - /* Use EXIT_SUCCESS as a witness for stdlib.h. */ -# ifndef EXIT_SUCCESS -# define EXIT_SUCCESS 0 -# endif -# endif -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's 'empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) -# ifndef YYSTACK_ALLOC_MAXIMUM - /* The OS might guarantee only one guard page at the bottom of the stack, - and a page size can be as small as 4096 bytes. So we cannot safely - invoke alloca (N) if N exceeds 4096. Use a slightly smaller number - to allow for a few compiler-allocated temporary stack slots. */ -# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ -# endif -# else -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# ifndef YYSTACK_ALLOC_MAXIMUM -# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM -# endif -# if (defined __cplusplus && ! defined EXIT_SUCCESS \ - && ! ((defined YYMALLOC || defined malloc) \ - && (defined YYFREE || defined free))) -# include /* INFRINGES ON USER NAME SPACE */ -# ifndef EXIT_SUCCESS -# define EXIT_SUCCESS 0 -# endif -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# if ! defined malloc && ! defined EXIT_SUCCESS -void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# ifndef YYFREE -# define YYFREE free -# if ! defined free && ! defined EXIT_SUCCESS -void free (void *); /* INFRINGES ON USER NAME SPACE */ -# endif -# endif -# endif -#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ - - -#if (! defined yyoverflow \ - && (! defined __cplusplus \ - || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - yytype_int16 yyss_alloc; - YYSTYPE yyvs_alloc; -}; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -# define YYCOPY_NEEDED 1 - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ - Stack = &yyptr->Stack_alloc; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (0) - -#endif - -#if defined YYCOPY_NEEDED && YYCOPY_NEEDED -/* Copy COUNT objects from SRC to DST. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined __GNUC__ && 1 < __GNUC__ -# define YYCOPY(Dst, Src, Count) \ - __builtin_memcpy (Dst, Src, (Count) * sizeof (*(Src))) -# else -# define YYCOPY(Dst, Src, Count) \ - do \ - { \ - YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (Dst)[yyi] = (Src)[yyi]; \ - } \ - while (0) -# endif -# endif -#endif /* !YYCOPY_NEEDED */ - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 15 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 26 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 15 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 3 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 15 -/* YYNSTATES -- Number of states. */ -#define YYNSTATES 17 - -/* YYTRANSLATE[YYX] -- Symbol number corresponding to YYX as returned - by yylex, with out-of-bounds checking. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 269 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM - as returned by yylex, without out-of-bounds checking. */ -static const yytype_uint8 yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 -}; - -#if YYDEBUG - /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ -static const yytype_uint16 yyrline[] = -{ - 0, 196, 196, 199, 204, 208, 215, 256, 304, 340, - 359, 377, 396, 419, 443, 467 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE || 0 -/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "T_WAIT", "T_ERROR", "T_TEXT", - "T_ELEMENT_START", "T_ELEMENT_START_END", "T_ELEMENT_END", "T_SCRIPT", - "T_STYLE", "T_PI", "T_COMMENT", "T_CDATA", "T_DOCTYPE", "$accept", - "elements", "element", YY_NULLPTR -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[NUM] -- (External) token number corresponding to the - (internal) symbol number NUM (which must be that of a token). */ -static const yytype_uint16 yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269 -}; -# endif - -#define YYPACT_NINF -13 - -#define yypact_value_is_default(Yystate) \ - (!!((Yystate) == (-13))) - -#define YYTABLE_NINF -1 - -#define yytable_value_is_error(Yytable_value) \ - 0 - - /* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -static const yytype_int8 yypact[] = -{ - 12, -13, -13, -13, -13, -13, -13, -13, -13, -13, - -13, -13, -13, 0, -13, -13, -13 -}; - - /* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. - Performed when YYTABLE does not specify something else to do. Zero - means the default is an error. */ -static const yytype_uint8 yydefact[] = -{ - 0, 4, 5, 15, 6, 7, 8, 13, 14, 10, - 9, 11, 12, 0, 2, 1, 3 -}; - - /* YYPGOTO[NTERM-NUM]. */ -static const yytype_int8 yypgoto[] = -{ - -13, -13, -12 -}; - - /* YYDEFGOTO[NTERM-NUM]. */ -static const yytype_int8 yydefgoto[] = -{ - -1, 13, 14 -}; - - /* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule whose - number is the opposite. If YYTABLE_NINF, syntax error. */ -static const yytype_uint8 yytable[] = -{ - 15, 16, 0, 1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, - 6, 7, 8, 9, 10, 11, 12 -}; - -static const yytype_int8 yycheck[] = -{ - 0, 13, -1, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14 -}; - - /* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const yytype_uint8 yystos[] = -{ - 0, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, 13, 14, 16, 17, 0, 17 -}; - - /* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const yytype_uint8 yyr1[] = -{ - 0, 15, 16, 16, 17, 17, 17, 17, 17, 17, - 17, 17, 17, 17, 17, 17 -}; - - /* YYR2[YYN] -- Number of symbols on the right hand side of rule YYN. */ -static const yytype_uint8 yyr2[] = -{ - 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1 -}; - - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - YYPOPSTACK (yylen); \ - yystate = *yyssp; \ - goto yybackup; \ - } \ - else \ - { \ - yyerror (scanner, YY_("syntax error: cannot back up")); \ - YYERROR; \ - } \ -while (0) - -/* Error token number */ -#define YYTERROR 1 -#define YYERRCODE 256 - - - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (0) - -/* This macro is provided for backward compatibility. */ -#ifndef YY_LOCATION_PRINT -# define YY_LOCATION_PRINT(File, Loc) ((void) 0) -#endif - - -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yy_symbol_print (stderr, \ - Type, Value, scanner); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (0) - - -/*----------------------------------------. -| Print this symbol's value on YYOUTPUT. | -`----------------------------------------*/ - -static void -yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, PyObject* scanner) -{ - FILE *yyo = yyoutput; - YYUSE (yyo); - YYUSE (scanner); - if (!yyvaluep) - return; -# ifdef YYPRINT - if (yytype < YYNTOKENS) - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# endif - YYUSE (yytype); -} - - -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -static void -yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, PyObject* scanner) -{ - YYFPRINTF (yyoutput, "%s %s (", - yytype < YYNTOKENS ? "token" : "nterm", yytname[yytype]); - - yy_symbol_value_print (yyoutput, yytype, yyvaluep, scanner); - YYFPRINTF (yyoutput, ")"); -} - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -static void -yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) -{ - YYFPRINTF (stderr, "Stack now"); - for (; yybottom <= yytop; yybottom++) - { - int yybot = *yybottom; - YYFPRINTF (stderr, " %d", yybot); - } - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (0) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -static void -yy_reduce_print (yytype_int16 *yyssp, YYSTYPE *yyvsp, int yyrule, PyObject* scanner) -{ - unsigned long int yylno = yyrline[yyrule]; - int yynrhs = yyr2[yyrule]; - int yyi; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", - yyrule - 1, yylno); - /* The symbols being reduced. */ - for (yyi = 0; yyi < yynrhs; yyi++) - { - YYFPRINTF (stderr, " $%d = ", yyi + 1); - yy_symbol_print (stderr, - yystos[yyssp[yyi + 1 - yynrhs]], - &(yyvsp[(yyi + 1) - (yynrhs)]) - , scanner); - YYFPRINTF (stderr, "\n"); - } -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (yyssp, yyvsp, Rule, scanner); \ -} while (0) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YY_SYMBOL_PRINT(Title, Type, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined __GLIBC__ && defined _STRING_H -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -static YYSIZE_T -yystrlen (const char *yystr) -{ - YYSIZE_T yylen; - for (yylen = 0; yystr[yylen]; yylen++) - continue; - return yylen; -} -# endif -# endif - -# ifndef yystpcpy -# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -static char * -yystpcpy (char *yydest, const char *yysrc) -{ - char *yyd = yydest; - const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -# ifndef yytnamerr -/* Copy to YYRES the contents of YYSTR after stripping away unnecessary - quotes and backslashes, so that it's suitable for yyerror. The - heuristic is that double-quoting is unnecessary unless the string - contains an apostrophe, a comma, or backslash (other than - backslash-backslash). YYSTR is taken from yytname. If YYRES is - null, do not copy; instead, return the length of what the result - would have been. */ -static YYSIZE_T -yytnamerr (char *yyres, const char *yystr) -{ - if (*yystr == '"') - { - YYSIZE_T yyn = 0; - char const *yyp = yystr; - - for (;;) - switch (*++yyp) - { - case '\'': - case ',': - goto do_not_strip_quotes; - - case '\\': - if (*++yyp != '\\') - goto do_not_strip_quotes; - /* Fall through. */ - default: - if (yyres) - yyres[yyn] = *yyp; - yyn++; - break; - - case '"': - if (yyres) - yyres[yyn] = '\0'; - return yyn; - } - do_not_strip_quotes: ; - } - - if (! yyres) - return yystrlen (yystr); - - return yystpcpy (yyres, yystr) - yyres; -} -# endif - -/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message - about the unexpected token YYTOKEN for the state stack whose top is - YYSSP. - - Return 0 if *YYMSG was successfully written. Return 1 if *YYMSG is - not large enough to hold the message. In that case, also set - *YYMSG_ALLOC to the required number of bytes. Return 2 if the - required number of bytes is too large to store. */ -static int -yysyntax_error (YYSIZE_T *yymsg_alloc, char **yymsg, - yytype_int16 *yyssp, int yytoken) -{ - YYSIZE_T yysize0 = yytnamerr (YY_NULLPTR, yytname[yytoken]); - YYSIZE_T yysize = yysize0; - enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; - /* Internationalized format string. */ - const char *yyformat = YY_NULLPTR; - /* Arguments of yyformat. */ - char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; - /* Number of reported tokens (one for the "unexpected", one per - "expected"). */ - int yycount = 0; - - /* There are many possibilities here to consider: - - If this state is a consistent state with a default action, then - the only way this function was invoked is if the default action - is an error action. In that case, don't check for expected - tokens because there are none. - - The only way there can be no lookahead present (in yychar) is if - this state is a consistent state with a default action. Thus, - detecting the absence of a lookahead is sufficient to determine - that there is no unexpected or expected token to report. In that - case, just report a simple "syntax error". - - Don't assume there isn't a lookahead just because this state is a - consistent state with a default action. There might have been a - previous inconsistent state, consistent state with a non-default - action, or user semantic action that manipulated yychar. - - Of course, the expected token list depends on states to have - correct lookahead information, and it depends on the parser not - to perform extra reductions after fetching a lookahead from the - scanner and before detecting a syntax error. Thus, state merging - (from LALR or IELR) and default reductions corrupt the expected - token list. However, the list is correct for canonical LR with - one exception: it will still contain any token that will not be - accepted due to an error action in a later state. - */ - if (yytoken != YYEMPTY) - { - int yyn = yypact[*yyssp]; - yyarg[yycount++] = yytname[yytoken]; - if (!yypact_value_is_default (yyn)) - { - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. In other words, skip the first -YYN actions for - this state because they are default actions. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn + 1; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yyx; - - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR - && !yytable_value_is_error (yytable[yyx + yyn])) - { - if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) - { - yycount = 1; - yysize = yysize0; - break; - } - yyarg[yycount++] = yytname[yyx]; - { - YYSIZE_T yysize1 = yysize + yytnamerr (YY_NULLPTR, yytname[yyx]); - if (! (yysize <= yysize1 - && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) - return 2; - yysize = yysize1; - } - } - } - } - - switch (yycount) - { -# define YYCASE_(N, S) \ - case N: \ - yyformat = S; \ - break - YYCASE_(0, YY_("syntax error")); - YYCASE_(1, YY_("syntax error, unexpected %s")); - YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); - YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); - YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); - YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); -# undef YYCASE_ - } - - { - YYSIZE_T yysize1 = yysize + yystrlen (yyformat); - if (! (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM)) - return 2; - yysize = yysize1; - } - - if (*yymsg_alloc < yysize) - { - *yymsg_alloc = 2 * yysize; - if (! (yysize <= *yymsg_alloc - && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) - *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; - return 1; - } - - /* Avoid sprintf, as that infringes on the user's name space. - Don't have undefined behavior even if the translation - produced a string with the wrong number of "%s"s. */ - { - char *yyp = *yymsg; - int yyi = 0; - while ((*yyp = *yyformat) != '\0') - if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) - { - yyp += yytnamerr (yyp, yyarg[yyi++]); - yyformat += 2; - } - else - { - yyp++; - yyformat++; - } - } - return 0; -} -#endif /* YYERROR_VERBOSE */ - -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -static void -yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, PyObject* scanner) -{ - YYUSE (yyvaluep); - YYUSE (scanner); - if (!yymsg) - yymsg = "Deleting"; - YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); - - YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - YYUSE (yytype); - YY_IGNORE_MAYBE_UNINITIALIZED_END -} - - - - -/*----------. -| yyparse. | -`----------*/ - -int -yyparse (PyObject* scanner) -{ -/* The lookahead symbol. */ -int yychar; - - -/* The semantic value of the lookahead symbol. */ -/* Default value used for initialization, for pacifying older GCCs - or non-GCC compilers. */ -YY_INITIAL_VALUE (static YYSTYPE yyval_default;) -YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); - - /* Number of syntax errors so far. */ - int yynerrs; - - int yystate; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - - /* The stacks and their tools: - 'yyss': related to states. - 'yyvs': related to semantic values. - - Refer to the stacks through separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - yytype_int16 yyssa[YYINITDEPTH]; - yytype_int16 *yyss; - yytype_int16 *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs; - YYSTYPE *yyvsp; - - YYSIZE_T yystacksize; - - int yyn; - int yyresult; - /* Lookahead token as an internal (translated) token number. */ - int yytoken = 0; - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - -#if YYERROR_VERBOSE - /* Buffer for error messages, and its allocated size. */ - char yymsgbuf[128]; - char *yymsg = yymsgbuf; - YYSIZE_T yymsg_alloc = sizeof yymsgbuf; -#endif - -#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) - - /* The number of symbols on the RHS of the reduced rule. - Keep to zero when no symbol should be popped. */ - int yylen = 0; - - yyssp = yyss = yyssa; - yyvsp = yyvs = yyvsa; - yystacksize = YYINITDEPTH; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. So pushing a state here evens the stacks. */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - yytype_int16 *yyss1 = yyss; - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow (YY_("memory exhausted"), - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyexhaustedlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyexhaustedlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - yytype_int16 *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyexhaustedlab; - YYSTACK_RELOCATE (yyss_alloc, yyss); - YYSTACK_RELOCATE (yyvs_alloc, yyvs); -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - if (yystate == YYFINAL) - YYACCEPT; - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - - /* Do appropriate processing given the current state. Read a - lookahead token if we need one and don't already have one. */ - - /* First try to decide what to do without reference to lookahead token. */ - yyn = yypact[yystate]; - if (yypact_value_is_default (yyn)) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = yylex (&yylval, scanner); - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yytable_value_is_error (yyn)) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - /* Shift the lookahead token. */ - YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); - - /* Discard the shifted token. */ - yychar = YYEMPTY; - - yystate = yyn; - YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - *++yyvsp = yylval; - YY_IGNORE_MAYBE_UNINITIALIZED_END - - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - '$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 2: -#line 196 "htmlparse.y" /* yacc.c:1646 */ - { - /* parse a single element */ -} -#line 1389 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 3: -#line 199 "htmlparse.y" /* yacc.c:1646 */ - { - /* parse a list of elements */ -} -#line 1397 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 4: -#line 204 "htmlparse.y" /* yacc.c:1646 */ - { - /* wait for more lexer input */ - YYACCEPT; -} -#line 1406 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 5: -#line 209 "htmlparse.y" /* yacc.c:1646 */ - { - /* an error occured in the scanner, the python exception must be set */ - UserData* ud = yyget_extra(scanner); - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; -} -#line 1417 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 6: -#line 216 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML start tag (eg. ) - $1 is a PyTuple (, ) - is a PyObject, is a ListDict */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - PyObject* tag = PyTuple_GET_ITEM((yyvsp[0]), 0); - PyObject* attrs = PyTuple_GET_ITEM((yyvsp[0]), 1); - int error = 0; - int cmp; - CHECK_ERROR((tag == NULL || attrs == NULL), finish_start); - cmp = PyObject_RichCompareBool(tag, u_meta, Py_EQ); - CHECK_ERROR((cmp == -1), finish_start); - if (cmp == 1) { - /* set encoding */ - result = PyObject_CallFunction(set_encoding, "OO", ud->parser, attrs); - CHECK_ERROR((result == NULL), finish_start); - Py_CLEAR(result); - } - if (PyObject_HasAttrString(ud->handler, "start_element") == 1) { - callback = PyObject_GetAttrString(ud->handler, "start_element"); - CHECK_ERROR((!callback), finish_start); - result = PyObject_CallFunction(callback, "OO", tag, attrs); - CHECK_ERROR((!result), finish_start); - Py_CLEAR(callback); - Py_CLEAR(result); - } -finish_start: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_XDECREF(tag); - Py_XDECREF(attrs); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1462 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 7: -#line 257 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML start-end tag (eg.
) - $1 is a PyTuple (, ) - is a PyObject, is a ListDict */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - PyObject* tag = PyTuple_GET_ITEM((yyvsp[0]), 0); - PyObject* attrs = PyTuple_GET_ITEM((yyvsp[0]), 1); - int error = 0; - int cmp; - char* fname; - PyObject* tagname; - CHECK_ERROR((tag == NULL || attrs == NULL), finish_start_end); - tagname = PyUnicode_AsEncodedString(tag, "ascii", "ignore"); - CHECK_ERROR((tagname == NULL), finish_start_end); - cmp = PyObject_RichCompareBool(tag, u_meta, Py_EQ); - CHECK_ERROR((cmp == -1), finish_start_end); - if (cmp == 1) { - /* set encoding */ - result = PyObject_CallFunction(set_encoding, "OO", ud->parser, attrs); - CHECK_ERROR((result == NULL), finish_start_end); - Py_CLEAR(result); - } - cmp = html_end_tag(tagname, ud->parser); - CHECK_ERROR((cmp < 0), finish_start_end); - fname = (cmp == 0 ? "start_element" : "start_end_element"); - if (PyObject_HasAttrString(ud->handler, fname) == 1) { - callback = PyObject_GetAttrString(ud->handler, fname); - CHECK_ERROR((!callback), finish_start_end); - result = PyObject_CallFunction(callback, "OO", tag, attrs); - CHECK_ERROR((!result), finish_start_end); - Py_CLEAR(callback); - Py_CLEAR(result); - } -finish_start_end: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_XDECREF(tag); - Py_XDECREF(attrs); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1514 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 8: -#line 305 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML end tag (eg. ) - $1 is a PyUnicode with the tag name */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - int cmp; - /* encode tagname in ASCII, ignoring any unknown chars */ - PyObject* tagname = PyUnicode_AsEncodedString((yyvsp[0]), "ascii", "ignore"); - if (tagname == NULL) { - error = 1; - goto finish_end; - } - cmp = html_end_tag(tagname, ud->parser); - CHECK_ERROR((cmp < 0), finish_end); - if (PyObject_HasAttrString(ud->handler, "end_element") == 1 && cmp > 0) { - callback = PyObject_GetAttrString(ud->handler, "end_element"); - CHECK_ERROR((callback == NULL), finish_end); - result = PyObject_CallFunction(callback, "O", (yyvsp[0])); - CHECK_ERROR((result == NULL), finish_end); - Py_CLEAR(callback); - Py_CLEAR(result); - } -finish_end: - Py_XDECREF(tagname); - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1554 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 9: -#line 341 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML comment (eg. ) - $1 is a PyUnicode with the comment content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "comment", "O", (yyvsp[0]), finish_comment); -finish_comment: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1577 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 10: -#line 360 "htmlparse.y" /* yacc.c:1646 */ - { - /* $1 is a PyUnicode */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "pi", "O", (yyvsp[0]), finish_pi); -finish_pi: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1599 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 11: -#line 378 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML CDATA (eg. ) - $1 is a PyUnicode with the CDATA content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "cdata", "O", (yyvsp[0]), finish_cdata); -finish_cdata: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1622 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 12: -#line 397 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML doctype (eg. ) - $1 is a PyUnicode with the doctype content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - /* set encoding */ - result = PyObject_CallFunction(set_doctype, "OO", ud->parser, (yyvsp[0])); - CHECK_ERROR((result == NULL), finish_doctype); - Py_CLEAR(result); - CALLBACK(ud, "doctype", "O", (yyvsp[0]), finish_doctype); -finish_doctype: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1649 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 13: -#line 420 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML script content (plus end tag which is omitted) - $1 is a PyUnicode with the script content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - PyObject* script = PyUnicode_DecodeASCII("script", 6, "ignore"); - CHECK_ERROR((script == NULL), finish_script); - CALLBACK(ud, "characters", "O", (yyvsp[0]), finish_script); - /* emit the omitted end tag */ - CALLBACK(ud, "end_element", "O", script, finish_script); -finish_script: - Py_XDECREF(callback); - Py_XDECREF(script); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1677 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 14: -#line 444 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML style content (plus end tag which is omitted) - $1 is a PyUnicode with the style content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - PyObject* style = PyUnicode_DecodeASCII("style", 5, "ignore"); - CHECK_ERROR((style == NULL), finish_style); - CALLBACK(ud, "characters", "O", (yyvsp[0]), finish_style); - /* emit the omitted end tag */ - CALLBACK(ud, "end_element", "O", style, finish_style); -finish_style: - Py_XDECREF(callback); - Py_XDECREF(style); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1705 "htmlparse.c" /* yacc.c:1646 */ - break; - - case 15: -#line 468 "htmlparse.y" /* yacc.c:1646 */ - { - /* parsed HTML text data - $1 is a PyUnicode with the text */ - /* Remember this is also called as a lexer fallback when no - HTML structure element could be recognized. */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "characters", "O", (yyvsp[0]), finish_characters); -finish_characters: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF((yyvsp[0])); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -#line 1730 "htmlparse.c" /* yacc.c:1646 */ - break; - - -#line 1734 "htmlparse.c" /* yacc.c:1646 */ - default: break; - } - /* User semantic actions sometimes alter yychar, and that requires - that yytoken be updated with the new translation. We take the - approach of translating immediately before every use of yytoken. - One alternative is translating here after every semantic action, - but that translation would be missed if the semantic action invokes - YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or - if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an - incorrect destructor might then be invoked immediately. In the - case of YYERROR or YYBACKUP, subsequent parser actions might lead - to an incorrect destructor call or verbose syntax error message - before the lookahead is translated. */ - YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); - - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - - /* Now 'shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*--------------------------------------. -| yyerrlab -- here on detecting error. | -`--------------------------------------*/ -yyerrlab: - /* Make sure we have latest lookahead translation. See comments at - user semantic actions for why this is necessary. */ - yytoken = yychar == YYEMPTY ? YYEMPTY : YYTRANSLATE (yychar); - - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if ! YYERROR_VERBOSE - yyerror (scanner, YY_("syntax error")); -#else -# define YYSYNTAX_ERROR yysyntax_error (&yymsg_alloc, &yymsg, \ - yyssp, yytoken) - { - char const *yymsgp = YY_("syntax error"); - int yysyntax_error_status; - yysyntax_error_status = YYSYNTAX_ERROR; - if (yysyntax_error_status == 0) - yymsgp = yymsg; - else if (yysyntax_error_status == 1) - { - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); - yymsg = (char *) YYSTACK_ALLOC (yymsg_alloc); - if (!yymsg) - { - yymsg = yymsgbuf; - yymsg_alloc = sizeof yymsgbuf; - yysyntax_error_status = 2; - } - else - { - yysyntax_error_status = YYSYNTAX_ERROR; - yymsgp = yymsg; - } - } - yyerror (scanner, yymsgp); - if (yysyntax_error_status == 2) - goto yyexhaustedlab; - } -# undef YYSYNTAX_ERROR -#endif - } - - - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse lookahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* Return failure if at end of input. */ - if (yychar == YYEOF) - YYABORT; - } - else - { - yydestruct ("Error: discarding", - yytoken, &yylval, scanner); - yychar = YYEMPTY; - } - } - - /* Else will try to reuse lookahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - - /* Pacify compilers like GCC when the user code never invokes - YYERROR and the label yyerrorlab therefore never appears in user - code. */ - if (/*CONSTCOND*/ 0) - goto yyerrorlab; - - /* Do not reclaim the symbols of the rule whose action triggered - this YYERROR. */ - YYPOPSTACK (yylen); - yylen = 0; - YY_STACK_PRINT (yyss, yyssp); - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (!yypact_value_is_default (yyn)) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; - - - yydestruct ("Error: popping", - yystos[yystate], yyvsp, scanner); - YYPOPSTACK (1); - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN - *++yyvsp = yylval; - YY_IGNORE_MAYBE_UNINITIALIZED_END - - - /* Shift the error token. */ - YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#if !defined yyoverflow || YYERROR_VERBOSE -/*-------------------------------------------------. -| yyexhaustedlab -- memory exhaustion comes here. | -`-------------------------------------------------*/ -yyexhaustedlab: - yyerror (scanner, YY_("memory exhausted")); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: - if (yychar != YYEMPTY) - { - /* Make sure we have latest lookahead translation. See comments at - user semantic actions for why this is necessary. */ - yytoken = YYTRANSLATE (yychar); - yydestruct ("Cleanup: discarding lookahead", - yytoken, &yylval, scanner); - } - /* Do not reclaim the symbols of the rule whose action triggered - this YYABORT or YYACCEPT. */ - YYPOPSTACK (yylen); - YY_STACK_PRINT (yyss, yyssp); - while (yyssp != yyss) - { - yydestruct ("Cleanup: popping", - yystos[*yyssp], yyvsp, scanner); - YYPOPSTACK (1); - } -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif -#if YYERROR_VERBOSE - if (yymsg != yymsgbuf) - YYSTACK_FREE (yymsg); -#endif - return yyresult; -} -#line 490 "htmlparse.y" /* yacc.c:1906 */ - - -/* create parser object */ -static PyObject* parser_new (PyTypeObject* type, PyObject* args, PyObject* kwds) { - parser_object* self; - if ((self = (parser_object*) type->tp_alloc(type, 0)) == NULL) { - return NULL; - } - Py_INCREF(Py_None); - self->handler = Py_None; - /* reset userData */ - self->userData = PyMem_New(UserData, sizeof(UserData)); - if (self->userData == NULL) { - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->userData->handler = self->handler; - self->userData->buf = NULL; - CLEAR_BUF_DECREF(self, self->userData->buf); - self->userData->nextpos = 0; - self->userData->bufpos = 0; - self->userData->pos = 0; - self->userData->column = 1; - self->userData->last_column = 1; - self->userData->lineno = 1; - self->userData->last_lineno = 1; - self->userData->tmp_buf = NULL; - CLEAR_BUF_DECREF(self, self->userData->tmp_buf); - self->userData->tmp_tag = self->userData->tmp_attrname = - self->userData->tmp_attrval = self->userData->tmp_attrs = - self->userData->lexbuf = NULL; - self->userData->resolve_entities = resolve_entities; - self->userData->list_dict = list_dict; - self->userData->exc_type = NULL; - self->userData->exc_val = NULL; - self->userData->exc_tb = NULL; - self->scanner = NULL; - if (htmllexInit(&(self->scanner), self->userData)!=0) { - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->encoding = PyBytes_FromString("iso8859-1"); - if (self->encoding == NULL) { - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->doctype = PyBytes_FromString("HTML"); - if (self->doctype == NULL) { - Py_DECREF(self->encoding); - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->userData->parser = (PyObject*)self; - return (PyObject*) self; -} - - -/* initialize parser object */ -static int parser_init (parser_object* self, PyObject* args, PyObject* kwds) { - PyObject* handler = NULL; - static char *kwlist[] = {"handler", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, &handler)) { - return -1; - } - if (handler == NULL) { - return 0; - } - Py_DECREF(self->handler); - Py_INCREF(handler); - self->handler = handler; - self->userData->handler = self->handler; - return 0; -} - - -/* traverse all used subobjects participating in reference cycles */ -static int parser_traverse (parser_object* self, visitproc visit, void* arg) { - Py_VISIT(self->handler); - return 0; -} - - -/* clear all used subobjects participating in reference cycles */ -static int parser_clear (parser_object* self) { - self->userData->handler = NULL; - Py_CLEAR(self->handler); - return 0; -} - - -/* free all allocated resources of parser object */ -static void parser_dealloc (parser_object* self) { - htmllexDestroy(self->scanner); - parser_clear(self); - self->userData->parser = NULL; - Py_CLEAR(self->encoding); - Py_CLEAR(self->doctype); - PyMem_Del(self->userData->buf); - PyMem_Del(self->userData->tmp_buf); - PyMem_Del(self->userData); - Py_TYPE(self)->tp_free((PyObject*)self); -} - - -/* feed a chunk of data to the parser */ -static PyObject* parser_feed (parser_object* self, PyObject* args) { - /* set up the parse string */ - int slen = 0; - char* s = NULL; - if (!PyArg_ParseTuple(args, "t#", &s, &slen)) { - PyErr_SetString(PyExc_TypeError, "string arg required"); - return NULL; - } - /* parse */ - if (htmllexStart(self->scanner, self->userData, s, slen)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not start scanner"); - return NULL; - } - if (yyparse(self->scanner)!=0) { - if (self->userData->exc_type!=NULL) { - /* note: we give away these objects, so don't decref */ - PyErr_Restore(self->userData->exc_type, - self->userData->exc_val, - self->userData->exc_tb); - } - htmllexStop(self->scanner, self->userData); - return NULL; - } - if (htmllexStop(self->scanner, self->userData)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not stop scanner"); - return NULL; - } - Py_RETURN_NONE; -} - - -/* flush all parser buffers */ -static PyObject* parser_flush (parser_object* self, PyObject* args) { - int res = 0; - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - /* reset parser variables */ - CLEAR_BUF(self->userData->tmp_buf); - Py_CLEAR(self->userData->tmp_tag); - Py_CLEAR(self->userData->tmp_attrs); - Py_CLEAR(self->userData->tmp_attrval); - Py_CLEAR(self->userData->tmp_attrname); - self->userData->bufpos = 0; - if (strlen(self->userData->buf)) { - int error = 0; - int i; - PyObject* callback = NULL; - PyObject* result = NULL; - const char* enc; - PyObject* s; - /* set line, col */ - for (i=0; iuserData->buf); ++i) { - if (self->userData->buf[i] == '\n') { - ++(self->userData->lineno); - self->userData->column = 1; - } - else ++(self->userData->column); - } - enc = PyBytes_AsString(self->encoding); - s = PyUnicode_Decode(self->userData->buf, - (Py_ssize_t)strlen(self->userData->buf), enc, "ignore"); - /* reset buffer */ - CLEAR_BUF(self->userData->buf); - if (s == NULL) { error = 1; goto finish_flush; } - if (PyObject_HasAttrString(self->handler, "characters") == 1) { - callback = PyObject_GetAttrString(self->handler, "characters"); - if (callback == NULL) { error = 1; goto finish_flush; } - result = PyObject_CallFunction(callback, "O", s); - if (result == NULL) { error = 1; goto finish_flush; } - } - finish_flush: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_XDECREF(s); - if (error == 1) { - return NULL; - } - } - if (htmllexDestroy(self->scanner)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not destroy scanner data"); - return NULL; - } - self->scanner = NULL; - if (htmllexInit(&(self->scanner), self->userData)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not initialize scanner data"); - return NULL; - } - return Py_BuildValue("i", res); -} - - -/* return the current parser line number */ -static PyObject* parser_lineno (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->lineno); -} - - -/* return the last parser line number */ -static PyObject* parser_last_lineno (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->last_lineno); -} - - -/* return the current parser column number */ -static PyObject* parser_column (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->column); -} - - -/* return the last parser column number */ -static PyObject* parser_last_column (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->last_column); -} - - -/* return the parser position in data stream */ -static PyObject* parser_pos (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->pos); -} - - -/* return buffered parser data up to given length */ -static PyObject* parser_peek (parser_object* self, PyObject* args) { - Py_ssize_t len, buflen; - if (!PyArg_ParseTuple(args, "n", &len)) { - return NULL; - } - if (len < 0) { - PyErr_SetString(PyExc_TypeError, "peek length must not be negative"); - return NULL; - } - buflen = strlen(self->userData->buf); - if (!buflen || self->userData->bufpos >= buflen) { - return PyBytes_FromString(""); - } - if (self->userData->bufpos + len >= buflen) { - len = buflen - self->userData->bufpos - 1; - } - return PyBytes_FromStringAndSize(self->userData->buf + self->userData->bufpos, len); -} - - -/* reset the parser. This will erase all buffered data! */ -static PyObject* parser_reset (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - if (htmllexDestroy(self->scanner)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not destroy scanner data"); - return NULL; - } - /* reset buffer */ - CLEAR_BUF(self->userData->buf); - CLEAR_BUF(self->userData->tmp_buf); - self->userData->bufpos = - self->userData->pos = - self->userData->nextpos = 0; - self->userData->column = - self->userData->last_column = - self->userData->lineno = - self->userData->last_lineno = 1; - self->userData->tmp_tag = self->userData->tmp_attrs = - self->userData->tmp_attrval = self->userData->tmp_attrname = NULL; - self->scanner = NULL; - if (htmllexInit(&(self->scanner), self->userData)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not initialize scanner data"); - return NULL; - } - Py_RETURN_NONE; -} - - -/* set the debug level, if its >0, debugging is on, =0 means off */ -static PyObject* parser_debug (parser_object* self, PyObject* args) { - int debug; - if (!PyArg_ParseTuple(args, "i", &debug)) { - return NULL; - } - yydebug = debug; - debug = htmllexDebug(&(self->scanner), debug); - return PyInt_FromLong((long)debug); -} - - -/* get SAX handler object */ -static PyObject* parser_gethandler (parser_object* self, void* closure) { - Py_INCREF(self->handler); - return self->handler; -} - - -/* set SAX handler object */ -static int parser_sethandler (parser_object* self, PyObject* value, void* closure) { - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete parser handler"); - return -1; - } - Py_DECREF(self->handler); - Py_INCREF(value); - self->handler = value; - self->userData->handler = value; - return 0; -} - - -/* get parser encoding */ -static PyObject* parser_getencoding (parser_object* self, void* closure) { - Py_INCREF(self->encoding); - return self->encoding; -} - - -/* set parser encoding */ -static int parser_setencoding (parser_object* self, PyObject* value, void* closure) { - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete encoding"); - return -1; - } - if (!PyBytes_CheckExact(value)) { - PyErr_SetString(PyExc_TypeError, "encoding must be string"); - return -1; - } - Py_DECREF(self->encoding); - Py_INCREF(value); - self->encoding = value; - if (yydebug > 0) { - /* print debug message */ - PyObject* repr = PyObject_Repr(value); - if (repr == NULL) { - return -1; - } - fprintf(stderr, "htmlsax: set encoding to %s\n", PyBytes_AsString(repr)); - Py_DECREF(repr); - } - return 0; -} - - -/* get parser doctype */ -static PyObject* parser_getdoctype (parser_object* self, void* closure) { - Py_INCREF(self->doctype); - return self->doctype; -} - - -/* set parser doctype */ -static int parser_setdoctype (parser_object* self, PyObject* value, void* closure) { - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete doctype"); - return -1; - } - if (!PyBytes_CheckExact(value)) { - PyObject* repr = PyObject_Repr(value); - char* cp = PyBytes_AsString(repr); - if (NULL == cp) - return -1; - PyErr_Format(PyExc_TypeError, "doctype %s must be string", cp); - return -1; - } - Py_DECREF(self->doctype); - Py_INCREF(value); - self->doctype = value; - return 0; -} - - -/* type interface */ - -static PyMemberDef parser_members[] = { - {NULL} /* Sentinel */ -}; - -static PyGetSetDef parser_getset[] = { - {"handler", (getter)parser_gethandler, (setter)parser_sethandler, - "handler object", NULL}, - {"encoding", (getter)parser_getencoding, (setter)parser_setencoding, - "encoding", NULL}, - {"doctype", (getter)parser_getdoctype, (setter)parser_setdoctype, - "doctype", NULL}, - {NULL} /* Sentinel */ -}; - -static PyMethodDef parser_methods[] = { - {"feed", (PyCFunction)parser_feed, METH_VARARGS, "feed data to parse incremental"}, - {"reset", (PyCFunction)parser_reset, METH_VARARGS, "reset the parser (no flushing)"}, - {"flush", (PyCFunction)parser_flush, METH_VARARGS, "flush parser buffers"}, - {"debug", (PyCFunction)parser_debug, METH_VARARGS, "set debug level"}, - {"lineno", (PyCFunction)parser_lineno, METH_VARARGS, "get the current line number"}, - {"last_lineno", (PyCFunction)parser_last_lineno, METH_VARARGS, "get the last line number"}, - {"column", (PyCFunction)parser_column, METH_VARARGS, "get the current column"}, - {"last_column", (PyCFunction)parser_last_column, METH_VARARGS, "get the last column"}, - {"pos", (PyCFunction)parser_pos, METH_VARARGS, "get the current scanner position"}, - {"peek", (PyCFunction)parser_peek, METH_VARARGS, "get up to given length of buffered data from current parse position"}, - {NULL} /* Sentinel */ -}; - - -static PyTypeObject parser_type = { - PyVarObject_HEAD_INIT(NULL, 0) - "linkcheck.HtmlParser.htmlsax.parser", /* tp_name */ - sizeof(parser_object), /* tp_size */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)parser_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /* tp_flags */ - "HTML parser object", /* tp_doc */ - (traverseproc)parser_traverse, /* tp_traverse */ - (inquiry)parser_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - parser_methods, /* tp_methods */ - parser_members, /* tp_members */ - parser_getset, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)parser_init, /* tp_init */ - 0, /* tp_alloc */ - parser_new, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ -}; - - -static PyMethodDef htmlsax_methods[] = { - {NULL} /* Sentinel */ -}; - - -/* initialization of the htmlsax module */ -MOD_INIT(htmlsax) { - PyObject* m = NULL; - MOD_DEF(m, "htmlsax", "SAX HTML parser routines", htmlsax_methods); - if (m == NULL) { - return MOD_ERROR_VAL; - } - if (PyType_Ready(&parser_type) < 0) { - return MOD_ERROR_VAL; - } - Py_INCREF(&parser_type); - if (PyModule_AddObject(m, "parser", (PyObject*)&parser_type) == -1) { - /* init error */ - PyErr_Print(); - } - PyObject* h = NULL; - if ((h = PyImport_ImportModule("linkcheck.HtmlParser")) == NULL) { - return MOD_ERROR_VAL; - } - if ((resolve_entities = PyObject_GetAttrString(h, "resolve_entities")) == NULL) { - Py_DECREF(h); - return MOD_ERROR_VAL; - } - if ((set_encoding = PyObject_GetAttrString(h, "set_encoding")) == NULL) { - Py_DECREF(resolve_entities); - Py_DECREF(h); - return MOD_ERROR_VAL; - } - if ((set_doctype = PyObject_GetAttrString(h, "set_doctype")) == NULL) { - Py_DECREF(resolve_entities); - Py_DECREF(set_encoding); - Py_DECREF(h); - return MOD_ERROR_VAL; - } - Py_DECREF(h); - if ((u_meta = PyUnicode_FromStringAndSize("meta", 4)) == NULL) { - return MOD_ERROR_VAL; - } - if ((h = PyImport_ImportModule("linkcheck.containers")) == NULL) { - return MOD_ERROR_VAL; - } - if ((list_dict = PyObject_GetAttrString(h, "ListDict")) == NULL) { - Py_DECREF(h); - return MOD_ERROR_VAL; - } - Py_DECREF(h); - return MOD_SUCCESS_VAL(m); -} diff --git a/linkcheck/HtmlParser/htmlparse.h b/linkcheck/HtmlParser/htmlparse.h deleted file mode 100644 index 36af12cc..00000000 --- a/linkcheck/HtmlParser/htmlparse.h +++ /dev/null @@ -1,74 +0,0 @@ -/* A Bison parser, made by GNU Bison 3.0.4. */ - -/* Bison interface for Yacc-like parsers in C - - Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc. - - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see . */ - -/* As a special exception, you may create a larger work that contains - part or all of the Bison parser skeleton and distribute that work - under terms of your choice, so long as that work isn't itself a - parser generator using the skeleton or a modified version thereof - as a parser skeleton. Alternatively, if you modify or redistribute - the parser skeleton itself, you may (at your option) remove this - special exception, which will cause the skeleton and the resulting - Bison output files to be licensed under the GNU General Public - License without this special exception. - - This special exception was added by the Free Software Foundation in - version 2.2 of Bison. */ - -#ifndef YY_YY_HTMLPARSE_H_INCLUDED -# define YY_YY_HTMLPARSE_H_INCLUDED -/* Debug traces. */ -#ifndef YYDEBUG -# define YYDEBUG 1 -#endif -#if YYDEBUG -extern int yydebug; -#endif - -/* Token type. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - enum yytokentype - { - T_WAIT = 258, - T_ERROR = 259, - T_TEXT = 260, - T_ELEMENT_START = 261, - T_ELEMENT_START_END = 262, - T_ELEMENT_END = 263, - T_SCRIPT = 264, - T_STYLE = 265, - T_PI = 266, - T_COMMENT = 267, - T_CDATA = 268, - T_DOCTYPE = 269 - }; -#endif - -/* Value type. */ -#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED -typedef int YYSTYPE; -# define YYSTYPE_IS_TRIVIAL 1 -# define YYSTYPE_IS_DECLARED 1 -#endif - - - -int yyparse (PyObject* scanner); - -#endif /* !YY_YY_HTMLPARSE_H_INCLUDED */ diff --git a/linkcheck/HtmlParser/htmlparse.y b/linkcheck/HtmlParser/htmlparse.y deleted file mode 100644 index 4fec7b16..00000000 --- a/linkcheck/HtmlParser/htmlparse.y +++ /dev/null @@ -1,1023 +0,0 @@ -%{ -/* Copyright (C) 2000-2014 Bastian Kleineidam - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ -/* A SAX HTML parser. Includes Python module definition to make it - usable for Python programs. -*/ -#include "htmlsax.h" /* SAX interface (includes Python.h) */ -#include "structmember.h" /* Python include for object definition */ -#include -#include - -/* bison type definitions */ -#define YYSTYPE PyObject* -/* extern functions found in htmllex.l */ -extern int yylex(YYSTYPE* yylvalp, void* scanner); -extern int htmllexInit (void** scanner, UserData* data); -extern int htmllexDebug (void** scanner, int debug); -extern int htmllexStart (void* scanner, UserData* data, const char* s, int slen); -extern int htmllexStop (void* scanner, UserData* data); -extern int htmllexDestroy (void* scanner); -extern UserData* yyget_extra(void* scanner); -extern int yyget_lineno(void*); -#define YYERROR_VERBOSE 1 - -/* standard error reporting, indicating an internal error */ -static void yyerror (void *locp, char const *msg) { - fprintf(stderr, "htmlsax: internal parse error: %s\n", msg); -} - -/* Python 2/3 compatibility */ -#if PY_MAJOR_VERSION >= 3 - #define MOD_ERROR_VAL NULL - #define MOD_SUCCESS_VAL(val) val - #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) - #define MOD_DEF(ob, name, doc, methods) \ - static struct PyModuleDef moduledef = { \ - PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \ - ob = PyModule_Create(&moduledef) - #define PyInt_FromLong PyLong_FromLong -#else - #define MOD_ERROR_VAL - #define MOD_SUCCESS_VAL(val) - #define MOD_INIT(name) void init##name(void) - #define MOD_DEF(ob, name, doc, methods) \ - ob = Py_InitModule3(name, methods, doc) -#endif - - -/* existing Python methods */ - -/* parser.resolve_entities */ -static PyObject* resolve_entities; -/* ListDict class, sorted dictionary */ -static PyObject* list_dict; -/* set_encoding helper function */ -static PyObject* set_encoding; -/* set_doctype helper function */ -static PyObject* set_doctype; -/* the unicode string u'meta' */ -static PyObject* u_meta; - -/* macros for easier scanner state manipulation */ - -/* clear buffer b, returning NULL on error */ -#define CLEAR_BUF(b) \ - PyMem_Resize(b, char, 1); \ - if (b == NULL) return NULL; \ - (b)[0] = '\0' - -/* clear buffer b, returning NULL and decref self on error */ -#define CLEAR_BUF_DECREF(self, b) \ - PyMem_Resize(b, char, 1); \ - if (b == NULL) { Py_DECREF(self); return NULL; } \ - (b)[0] = '\0' - -/* check an error condition and if true set error flag and goto given label */ -#define CHECK_ERROR(cond, label) \ - if (cond) { \ - error = 1; \ - goto label; \ - } - -/* generic Python callback macro */ -#define CALLBACK(ud, attr, format, arg, label) \ - if (PyObject_HasAttrString(ud->handler, attr) == 1) { \ - callback = PyObject_GetAttrString(ud->handler, attr); \ - CHECK_ERROR((callback == NULL), label); \ - result = PyObject_CallFunction(callback, format, arg); \ - CHECK_ERROR((result == NULL), label); \ - Py_CLEAR(callback); \ - Py_CLEAR(result); \ - } - -/* set old line and column */ -#define SET_OLD_LINECOL \ - ud->last_lineno = ud->lineno; \ - ud->last_column = ud->column - -/* parser type definition */ -typedef struct { - PyObject_HEAD - /* the handler object */ - PyObject* handler; - /* the charset encoding (PyBytesObject) */ - PyObject* encoding; - /* the document type (PyBytesObject) */ - PyObject* doctype; - UserData* userData; - void* scanner; -} parser_object; - -/* use Pythons memory management */ -#define YYMALLOC PyMem_Malloc -#define YYFREE PyMem_Free - -/* Test whether tag does not need an HTML end tag. - @ptag: ASCII encoded Python string in lowercase (!) - @parser: SAX parser object - @return: < 0 on error, > 0 if HTML end tag is needed, else 0 -*/ -static int html_end_tag (PyObject* ptag, PyObject* parser) { - PyObject* pdoctype = NULL; - char* doctype; - int error = 0; - int ret = 1; - pdoctype = PyObject_GetAttrString(parser, "doctype"); - CHECK_ERROR((pdoctype == NULL), finish_html_end_tag); - doctype = PyBytes_AsString(pdoctype); - CHECK_ERROR((doctype == NULL), finish_html_end_tag); - /* check for HTML (else it's presumably XHTML) */ - if (strcmp(doctype, "HTML") == 0) { - char* tag = PyBytes_AsString(ptag); - CHECK_ERROR((tag == NULL), finish_html_end_tag); - ret = strcmp(tag, "area")!=0 && - strcmp(tag, "base")!=0 && - strcmp(tag, "basefont")!=0 && - strcmp(tag, "br")!=0 && - strcmp(tag, "col")!=0 && - strcmp(tag, "frame")!=0 && - strcmp(tag, "hr")!=0 && - strcmp(tag, "img")!=0 && - strcmp(tag, "input")!=0 && - strcmp(tag, "isindex")!=0 && - strcmp(tag, "link")!=0 && - strcmp(tag, "meta")!=0 && - strcmp(tag, "param")!=0; - } -finish_html_end_tag: - Py_XDECREF(pdoctype); - if (error) { - return -1; - } - return ret; -} - -%} - -/* parser options */ -%verbose -%debug -%defines -%pure-parser -%param {PyObject* scanner} - -/* parser tokens, see below for what they mean */ -%token T_WAIT -%token T_ERROR -%token T_TEXT -%token T_ELEMENT_START -%token T_ELEMENT_START_END -%token T_ELEMENT_END -%token T_SCRIPT -%token T_STYLE -%token T_PI -%token T_COMMENT -%token T_CDATA -%token T_DOCTYPE - -/* note: the finish_ labels are for error recovery */ -%% - -elements: element { - /* parse a single element */ -} -| elements element { - /* parse a list of elements */ -} -; - -element: T_WAIT { - /* wait for more lexer input */ - YYACCEPT; -} -| T_ERROR -{ - /* an error occured in the scanner, the python exception must be set */ - UserData* ud = yyget_extra(scanner); - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; -} -| T_ELEMENT_START -{ - /* parsed HTML start tag (eg.
) - $1 is a PyTuple (, ) - is a PyObject, is a ListDict */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - PyObject* tag = PyTuple_GET_ITEM($1, 0); - PyObject* attrs = PyTuple_GET_ITEM($1, 1); - int error = 0; - int cmp; - CHECK_ERROR((tag == NULL || attrs == NULL), finish_start); - cmp = PyObject_RichCompareBool(tag, u_meta, Py_EQ); - CHECK_ERROR((cmp == -1), finish_start); - if (cmp == 1) { - /* set encoding */ - result = PyObject_CallFunction(set_encoding, "OO", ud->parser, attrs); - CHECK_ERROR((result == NULL), finish_start); - Py_CLEAR(result); - } - if (PyObject_HasAttrString(ud->handler, "start_element") == 1) { - callback = PyObject_GetAttrString(ud->handler, "start_element"); - CHECK_ERROR((!callback), finish_start); - result = PyObject_CallFunction(callback, "OO", tag, attrs); - CHECK_ERROR((!result), finish_start); - Py_CLEAR(callback); - Py_CLEAR(result); - } -finish_start: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_XDECREF(tag); - Py_XDECREF(attrs); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_ELEMENT_START_END -{ - /* parsed HTML start-end tag (eg.
) - $1 is a PyTuple (, ) - is a PyObject, is a ListDict */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - PyObject* tag = PyTuple_GET_ITEM($1, 0); - PyObject* attrs = PyTuple_GET_ITEM($1, 1); - int error = 0; - int cmp; - char* fname; - PyObject* tagname; - CHECK_ERROR((tag == NULL || attrs == NULL), finish_start_end); - tagname = PyUnicode_AsEncodedString(tag, "ascii", "ignore"); - CHECK_ERROR((tagname == NULL), finish_start_end); - cmp = PyObject_RichCompareBool(tag, u_meta, Py_EQ); - CHECK_ERROR((cmp == -1), finish_start_end); - if (cmp == 1) { - /* set encoding */ - result = PyObject_CallFunction(set_encoding, "OO", ud->parser, attrs); - CHECK_ERROR((result == NULL), finish_start_end); - Py_CLEAR(result); - } - cmp = html_end_tag(tagname, ud->parser); - CHECK_ERROR((cmp < 0), finish_start_end); - fname = (cmp == 0 ? "start_element" : "start_end_element"); - if (PyObject_HasAttrString(ud->handler, fname) == 1) { - callback = PyObject_GetAttrString(ud->handler, fname); - CHECK_ERROR((!callback), finish_start_end); - result = PyObject_CallFunction(callback, "OO", tag, attrs); - CHECK_ERROR((!result), finish_start_end); - Py_CLEAR(callback); - Py_CLEAR(result); - } -finish_start_end: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_XDECREF(tag); - Py_XDECREF(attrs); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_ELEMENT_END -{ - /* parsed HTML end tag (eg. ) - $1 is a PyUnicode with the tag name */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - int cmp; - /* encode tagname in ASCII, ignoring any unknown chars */ - PyObject* tagname = PyUnicode_AsEncodedString($1, "ascii", "ignore"); - if (tagname == NULL) { - error = 1; - goto finish_end; - } - cmp = html_end_tag(tagname, ud->parser); - CHECK_ERROR((cmp < 0), finish_end); - if (PyObject_HasAttrString(ud->handler, "end_element") == 1 && cmp > 0) { - callback = PyObject_GetAttrString(ud->handler, "end_element"); - CHECK_ERROR((callback == NULL), finish_end); - result = PyObject_CallFunction(callback, "O", $1); - CHECK_ERROR((result == NULL), finish_end); - Py_CLEAR(callback); - Py_CLEAR(result); - } -finish_end: - Py_XDECREF(tagname); - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_COMMENT -{ - /* parsed HTML comment (eg. ) - $1 is a PyUnicode with the comment content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "comment", "O", $1, finish_comment); -finish_comment: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_PI -{ - /* $1 is a PyUnicode */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "pi", "O", $1, finish_pi); -finish_pi: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_CDATA -{ - /* parsed HTML CDATA (eg. ) - $1 is a PyUnicode with the CDATA content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "cdata", "O", $1, finish_cdata); -finish_cdata: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_DOCTYPE -{ - /* parsed HTML doctype (eg. ) - $1 is a PyUnicode with the doctype content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - /* set encoding */ - result = PyObject_CallFunction(set_doctype, "OO", ud->parser, $1); - CHECK_ERROR((result == NULL), finish_doctype); - Py_CLEAR(result); - CALLBACK(ud, "doctype", "O", $1, finish_doctype); -finish_doctype: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_SCRIPT -{ - /* parsed HTML script content (plus end tag which is omitted) - $1 is a PyUnicode with the script content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - PyObject* script = PyUnicode_DecodeASCII("script", 6, "ignore"); - CHECK_ERROR((script == NULL), finish_script); - CALLBACK(ud, "characters", "O", $1, finish_script); - /* emit the omitted end tag */ - CALLBACK(ud, "end_element", "O", script, finish_script); -finish_script: - Py_XDECREF(callback); - Py_XDECREF(script); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_STYLE -{ - /* parsed HTML style content (plus end tag which is omitted) - $1 is a PyUnicode with the style content */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - PyObject* style = PyUnicode_DecodeASCII("style", 5, "ignore"); - CHECK_ERROR((style == NULL), finish_style); - CALLBACK(ud, "characters", "O", $1, finish_style); - /* emit the omitted end tag */ - CALLBACK(ud, "end_element", "O", style, finish_style); -finish_style: - Py_XDECREF(callback); - Py_XDECREF(style); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -| T_TEXT -{ - /* parsed HTML text data - $1 is a PyUnicode with the text */ - /* Remember this is also called as a lexer fallback when no - HTML structure element could be recognized. */ - UserData* ud = yyget_extra(scanner); - PyObject* callback = NULL; - PyObject* result = NULL; - int error = 0; - CALLBACK(ud, "characters", "O", $1, finish_characters); -finish_characters: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_DECREF($1); - if (error) { - PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb)); - YYABORT; - } - SET_OLD_LINECOL; -} -; - -%% - -/* create parser object */ -static PyObject* parser_new (PyTypeObject* type, PyObject* args, PyObject* kwds) { - parser_object* self; - if ((self = (parser_object*) type->tp_alloc(type, 0)) == NULL) { - return NULL; - } - Py_INCREF(Py_None); - self->handler = Py_None; - /* reset userData */ - self->userData = PyMem_New(UserData, sizeof(UserData)); - if (self->userData == NULL) { - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->userData->handler = self->handler; - self->userData->buf = NULL; - CLEAR_BUF_DECREF(self, self->userData->buf); - self->userData->nextpos = 0; - self->userData->bufpos = 0; - self->userData->pos = 0; - self->userData->column = 1; - self->userData->last_column = 1; - self->userData->lineno = 1; - self->userData->last_lineno = 1; - self->userData->tmp_buf = NULL; - CLEAR_BUF_DECREF(self, self->userData->tmp_buf); - self->userData->tmp_tag = self->userData->tmp_attrname = - self->userData->tmp_attrval = self->userData->tmp_attrs = - self->userData->lexbuf = NULL; - self->userData->resolve_entities = resolve_entities; - self->userData->list_dict = list_dict; - self->userData->exc_type = NULL; - self->userData->exc_val = NULL; - self->userData->exc_tb = NULL; - self->scanner = NULL; - if (htmllexInit(&(self->scanner), self->userData)!=0) { - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->encoding = PyBytes_FromString("iso8859-1"); - if (self->encoding == NULL) { - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->doctype = PyBytes_FromString("HTML"); - if (self->doctype == NULL) { - Py_DECREF(self->encoding); - Py_DECREF(self->handler); - Py_DECREF(self); - return NULL; - } - self->userData->parser = (PyObject*)self; - return (PyObject*) self; -} - - -/* initialize parser object */ -static int parser_init (parser_object* self, PyObject* args, PyObject* kwds) { - PyObject* handler = NULL; - static char *kwlist[] = {"handler", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O", kwlist, &handler)) { - return -1; - } - if (handler == NULL) { - return 0; - } - Py_DECREF(self->handler); - Py_INCREF(handler); - self->handler = handler; - self->userData->handler = self->handler; - return 0; -} - - -/* traverse all used subobjects participating in reference cycles */ -static int parser_traverse (parser_object* self, visitproc visit, void* arg) { - Py_VISIT(self->handler); - return 0; -} - - -/* clear all used subobjects participating in reference cycles */ -static int parser_clear (parser_object* self) { - self->userData->handler = NULL; - Py_CLEAR(self->handler); - return 0; -} - - -/* free all allocated resources of parser object */ -static void parser_dealloc (parser_object* self) { - htmllexDestroy(self->scanner); - parser_clear(self); - self->userData->parser = NULL; - Py_CLEAR(self->encoding); - Py_CLEAR(self->doctype); - PyMem_Del(self->userData->buf); - PyMem_Del(self->userData->tmp_buf); - PyMem_Del(self->userData); - Py_TYPE(self)->tp_free((PyObject*)self); -} - - -/* feed a chunk of data to the parser */ -static PyObject* parser_feed (parser_object* self, PyObject* args) { - /* set up the parse string */ - int slen = 0; - char* s = NULL; - if (!PyArg_ParseTuple(args, "t#", &s, &slen)) { - PyErr_SetString(PyExc_TypeError, "string arg required"); - return NULL; - } - /* parse */ - if (htmllexStart(self->scanner, self->userData, s, slen)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not start scanner"); - return NULL; - } - if (yyparse(self->scanner)!=0) { - if (self->userData->exc_type!=NULL) { - /* note: we give away these objects, so don't decref */ - PyErr_Restore(self->userData->exc_type, - self->userData->exc_val, - self->userData->exc_tb); - } - htmllexStop(self->scanner, self->userData); - return NULL; - } - if (htmllexStop(self->scanner, self->userData)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not stop scanner"); - return NULL; - } - Py_RETURN_NONE; -} - - -/* flush all parser buffers */ -static PyObject* parser_flush (parser_object* self, PyObject* args) { - int res = 0; - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - /* reset parser variables */ - CLEAR_BUF(self->userData->tmp_buf); - Py_CLEAR(self->userData->tmp_tag); - Py_CLEAR(self->userData->tmp_attrs); - Py_CLEAR(self->userData->tmp_attrval); - Py_CLEAR(self->userData->tmp_attrname); - self->userData->bufpos = 0; - if (strlen(self->userData->buf)) { - int error = 0; - int i; - PyObject* callback = NULL; - PyObject* result = NULL; - const char* enc; - PyObject* s; - /* set line, col */ - for (i=0; iuserData->buf); ++i) { - if (self->userData->buf[i] == '\n') { - ++(self->userData->lineno); - self->userData->column = 1; - } - else ++(self->userData->column); - } - enc = PyBytes_AsString(self->encoding); - s = PyUnicode_Decode(self->userData->buf, - (Py_ssize_t)strlen(self->userData->buf), enc, "ignore"); - /* reset buffer */ - CLEAR_BUF(self->userData->buf); - if (s == NULL) { error = 1; goto finish_flush; } - if (PyObject_HasAttrString(self->handler, "characters") == 1) { - callback = PyObject_GetAttrString(self->handler, "characters"); - if (callback == NULL) { error = 1; goto finish_flush; } - result = PyObject_CallFunction(callback, "O", s); - if (result == NULL) { error = 1; goto finish_flush; } - } - finish_flush: - Py_XDECREF(callback); - Py_XDECREF(result); - Py_XDECREF(s); - if (error == 1) { - return NULL; - } - } - if (htmllexDestroy(self->scanner)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not destroy scanner data"); - return NULL; - } - self->scanner = NULL; - if (htmllexInit(&(self->scanner), self->userData)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not initialize scanner data"); - return NULL; - } - return Py_BuildValue("i", res); -} - - -/* return the current parser line number */ -static PyObject* parser_lineno (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->lineno); -} - - -/* return the last parser line number */ -static PyObject* parser_last_lineno (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->last_lineno); -} - - -/* return the current parser column number */ -static PyObject* parser_column (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->column); -} - - -/* return the last parser column number */ -static PyObject* parser_last_column (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->last_column); -} - - -/* return the parser position in data stream */ -static PyObject* parser_pos (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - return Py_BuildValue("i", self->userData->pos); -} - - -/* return buffered parser data up to given length */ -static PyObject* parser_peek (parser_object* self, PyObject* args) { - Py_ssize_t len, buflen; - if (!PyArg_ParseTuple(args, "n", &len)) { - return NULL; - } - if (len < 0) { - PyErr_SetString(PyExc_TypeError, "peek length must not be negative"); - return NULL; - } - buflen = strlen(self->userData->buf); - if (!buflen || self->userData->bufpos >= buflen) { - return PyBytes_FromString(""); - } - if (self->userData->bufpos + len >= buflen) { - len = buflen - self->userData->bufpos - 1; - } - return PyBytes_FromStringAndSize(self->userData->buf + self->userData->bufpos, len); -} - - -/* reset the parser. This will erase all buffered data! */ -static PyObject* parser_reset (parser_object* self, PyObject* args) { - if (!PyArg_ParseTuple(args, "")) { - PyErr_SetString(PyExc_TypeError, "no args required"); - return NULL; - } - if (htmllexDestroy(self->scanner)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not destroy scanner data"); - return NULL; - } - /* reset buffer */ - CLEAR_BUF(self->userData->buf); - CLEAR_BUF(self->userData->tmp_buf); - self->userData->bufpos = - self->userData->pos = - self->userData->nextpos = 0; - self->userData->column = - self->userData->last_column = - self->userData->lineno = - self->userData->last_lineno = 1; - self->userData->tmp_tag = self->userData->tmp_attrs = - self->userData->tmp_attrval = self->userData->tmp_attrname = NULL; - self->scanner = NULL; - if (htmllexInit(&(self->scanner), self->userData)!=0) { - PyErr_SetString(PyExc_MemoryError, "could not initialize scanner data"); - return NULL; - } - Py_RETURN_NONE; -} - - -/* set the debug level, if its >0, debugging is on, =0 means off */ -static PyObject* parser_debug (parser_object* self, PyObject* args) { - int debug; - if (!PyArg_ParseTuple(args, "i", &debug)) { - return NULL; - } - yydebug = debug; - debug = htmllexDebug(&(self->scanner), debug); - return PyInt_FromLong((long)debug); -} - - -/* get SAX handler object */ -static PyObject* parser_gethandler (parser_object* self, void* closure) { - Py_INCREF(self->handler); - return self->handler; -} - - -/* set SAX handler object */ -static int parser_sethandler (parser_object* self, PyObject* value, void* closure) { - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete parser handler"); - return -1; - } - Py_DECREF(self->handler); - Py_INCREF(value); - self->handler = value; - self->userData->handler = value; - return 0; -} - - -/* get parser encoding */ -static PyObject* parser_getencoding (parser_object* self, void* closure) { - Py_INCREF(self->encoding); - return self->encoding; -} - - -/* set parser encoding */ -static int parser_setencoding (parser_object* self, PyObject* value, void* closure) { - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete encoding"); - return -1; - } - if (!PyBytes_CheckExact(value)) { - PyErr_SetString(PyExc_TypeError, "encoding must be string"); - return -1; - } - Py_DECREF(self->encoding); - Py_INCREF(value); - self->encoding = value; - if (yydebug > 0) { - /* print debug message */ - PyObject* repr = PyObject_Repr(value); - if (repr == NULL) { - return -1; - } - fprintf(stderr, "htmlsax: set encoding to %s\n", PyBytes_AsString(repr)); - Py_DECREF(repr); - } - return 0; -} - - -/* get parser doctype */ -static PyObject* parser_getdoctype (parser_object* self, void* closure) { - Py_INCREF(self->doctype); - return self->doctype; -} - - -/* set parser doctype */ -static int parser_setdoctype (parser_object* self, PyObject* value, void* closure) { - if (value == NULL) { - PyErr_SetString(PyExc_TypeError, "Cannot delete doctype"); - return -1; - } - if (!PyBytes_CheckExact(value)) { - PyObject* repr = PyObject_Repr(value); - char* cp = PyBytes_AsString(repr); - if (NULL == cp) - return -1; - PyErr_Format(PyExc_TypeError, "doctype %s must be string", cp); - return -1; - } - Py_DECREF(self->doctype); - Py_INCREF(value); - self->doctype = value; - return 0; -} - - -/* type interface */ - -static PyMemberDef parser_members[] = { - {NULL} /* Sentinel */ -}; - -static PyGetSetDef parser_getset[] = { - {"handler", (getter)parser_gethandler, (setter)parser_sethandler, - "handler object", NULL}, - {"encoding", (getter)parser_getencoding, (setter)parser_setencoding, - "encoding", NULL}, - {"doctype", (getter)parser_getdoctype, (setter)parser_setdoctype, - "doctype", NULL}, - {NULL} /* Sentinel */ -}; - -static PyMethodDef parser_methods[] = { - {"feed", (PyCFunction)parser_feed, METH_VARARGS, "feed data to parse incremental"}, - {"reset", (PyCFunction)parser_reset, METH_VARARGS, "reset the parser (no flushing)"}, - {"flush", (PyCFunction)parser_flush, METH_VARARGS, "flush parser buffers"}, - {"debug", (PyCFunction)parser_debug, METH_VARARGS, "set debug level"}, - {"lineno", (PyCFunction)parser_lineno, METH_VARARGS, "get the current line number"}, - {"last_lineno", (PyCFunction)parser_last_lineno, METH_VARARGS, "get the last line number"}, - {"column", (PyCFunction)parser_column, METH_VARARGS, "get the current column"}, - {"last_column", (PyCFunction)parser_last_column, METH_VARARGS, "get the last column"}, - {"pos", (PyCFunction)parser_pos, METH_VARARGS, "get the current scanner position"}, - {"peek", (PyCFunction)parser_peek, METH_VARARGS, "get up to given length of buffered data from current parse position"}, - {NULL} /* Sentinel */ -}; - - -static PyTypeObject parser_type = { - PyVarObject_HEAD_INIT(NULL, 0) - "linkcheck.HtmlParser.htmlsax.parser", /* tp_name */ - sizeof(parser_object), /* tp_size */ - 0, /* tp_itemsize */ - /* methods */ - (destructor)parser_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_compare */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - 0, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | - Py_TPFLAGS_HAVE_GC, /* tp_flags */ - "HTML parser object", /* tp_doc */ - (traverseproc)parser_traverse, /* tp_traverse */ - (inquiry)parser_clear, /* tp_clear */ - 0, /* tp_richcompare */ - 0, /* tp_weaklistoffset */ - 0, /* tp_iter */ - 0, /* tp_iternext */ - parser_methods, /* tp_methods */ - parser_members, /* tp_members */ - parser_getset, /* tp_getset */ - 0, /* tp_base */ - 0, /* tp_dict */ - 0, /* tp_descr_get */ - 0, /* tp_descr_set */ - 0, /* tp_dictoffset */ - (initproc)parser_init, /* tp_init */ - 0, /* tp_alloc */ - parser_new, /* tp_new */ - 0, /* tp_free */ - 0, /* tp_is_gc */ - 0, /* tp_bases */ - 0, /* tp_mro */ - 0, /* tp_cache */ - 0, /* tp_subclasses */ - 0, /* tp_weaklist */ - 0, /* tp_del */ -}; - - -static PyMethodDef htmlsax_methods[] = { - {NULL} /* Sentinel */ -}; - - -/* initialization of the htmlsax module */ -MOD_INIT(htmlsax) { - PyObject* m = NULL; - MOD_DEF(m, "htmlsax", "SAX HTML parser routines", htmlsax_methods); - if (m == NULL) { - return MOD_ERROR_VAL; - } - if (PyType_Ready(&parser_type) < 0) { - return MOD_ERROR_VAL; - } - Py_INCREF(&parser_type); - if (PyModule_AddObject(m, "parser", (PyObject*)&parser_type) == -1) { - /* init error */ - PyErr_Print(); - } - PyObject* h = NULL; - if ((h = PyImport_ImportModule("linkcheck.HtmlParser")) == NULL) { - return MOD_ERROR_VAL; - } - if ((resolve_entities = PyObject_GetAttrString(h, "resolve_entities")) == NULL) { - Py_DECREF(h); - return MOD_ERROR_VAL; - } - if ((set_encoding = PyObject_GetAttrString(h, "set_encoding")) == NULL) { - Py_DECREF(resolve_entities); - Py_DECREF(h); - return MOD_ERROR_VAL; - } - if ((set_doctype = PyObject_GetAttrString(h, "set_doctype")) == NULL) { - Py_DECREF(resolve_entities); - Py_DECREF(set_encoding); - Py_DECREF(h); - return MOD_ERROR_VAL; - } - Py_DECREF(h); - if ((u_meta = PyUnicode_FromStringAndSize("meta", 4)) == NULL) { - return MOD_ERROR_VAL; - } - if ((h = PyImport_ImportModule("linkcheck.containers")) == NULL) { - return MOD_ERROR_VAL; - } - if ((list_dict = PyObject_GetAttrString(h, "ListDict")) == NULL) { - Py_DECREF(h); - return MOD_ERROR_VAL; - } - Py_DECREF(h); - return MOD_SUCCESS_VAL(m); -} diff --git a/linkcheck/HtmlParser/htmlsax.h b/linkcheck/HtmlParser/htmlsax.h deleted file mode 100644 index c5812c5f..00000000 --- a/linkcheck/HtmlParser/htmlsax.h +++ /dev/null @@ -1,81 +0,0 @@ -/* Copyright (C) 2000-2014 Bastian Kleineidam - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - */ -/* - Includes header definitions for the HTML Sax parser Python module. - */ -#ifndef HTMLSAX_H -#define HTMLSAX_H - -#include "Python.h" - -/* require Python >= 2.6 */ -#ifndef PY_VERSION_HEX -#error please install Python >= 2.6 -#endif - -#if PY_VERSION_HEX < 0x02060000 -#error please install Python >= 2.6 -#endif - -/* user_data type for SAX calls */ -typedef struct { - /* the Python SAX object to issue callbacks */ - PyObject* handler; - /* Buffer to store still-to-be-scanned characters. After recognizing - * a complete syntax element, all data up to bufpos will be removed. - * Before scanning you should append new data to this buffer. - */ - char* buf; - /* current position in the buffer counting from zero */ - unsigned int bufpos; - /* current position of next syntax element */ - unsigned int nextpos; - /* position in the stream of data already seen, counting from zero */ - unsigned int pos; - /* line counter, counting from one */ - unsigned int lineno; - /* column counter, counting from zero */ - unsigned int column; - /* value of line counter before the current token */ - unsigned int last_lineno; - /* value of column counter before the current token */ - unsigned int last_column; - /* input buffer of lexer, must be deleted when the parsing stops */ - void* lexbuf; - /* temporary character buffer */ - char* tmp_buf; - /* temporary HTML start or end tag name */ - PyObject* tmp_tag; - /* temporary HTML start tag attribute name */ - PyObject* tmp_attrname; - /* temporary HTML start tag attribute value */ - PyObject* tmp_attrval; - /* temporary HTML start tag attribute list (a SortedDict) */ - PyObject* tmp_attrs; - /* HtmlParser.resolve_entities */ - PyObject* resolve_entities; - /* HtmlParser.SortedDict */ - PyObject* list_dict; - /* stored Python exception (if error occurred in scanner) */ - PyObject* exc_type; - PyObject* exc_val; - PyObject* exc_tb; - /* the parser object itself */ - PyObject* parser; -} UserData; - -#endif diff --git a/linkcheck/HtmlParser/htmlsax.py b/linkcheck/HtmlParser/htmlsax.py new file mode 100644 index 00000000..a7ad30b5 --- /dev/null +++ b/linkcheck/HtmlParser/htmlsax.py @@ -0,0 +1,120 @@ +# Copyright (C) 2000-2018 Petr Dlouhy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +HTML parser implemented using Beautiful Soup and html.parser. +""" + +from io import BytesIO, StringIO + +from bs4 import (BeautifulSoup, CData, Comment, Doctype, ProcessingInstruction, + Tag) + +from ..containers import ListDict + + +class Parser(object): + handler = None + encoding = None + + def __init__(self, handler): + self.handler = handler + self.reset() + + def feed(self, feed_text): + if not self.html_doc: + if isinstance(feed_text, bytes): + self.html_doc = BytesIO() + else: + self.html_doc = StringIO() + self.html_doc.write(feed_text) + + def reset(self): + self.html_doc = None + + def parse_contents(self, contents): + for content in contents: + if isinstance(content, Tag): + attrs = ListDict() + for k, v_list in sorted(content.attrs.items()): + if not isinstance(v_list, list): + v_list = [v_list] + for v in v_list: + # empty parameters returned by BS4 + # are sometimes in bytes: + if v == b'': + v = u'' + attrs[k] = v + if content.is_empty_element: + self.handler.start_end_element( + content.name, attrs, content.text.strip(), + ) + else: + self.handler.start_element( + content.name, attrs, content.text.strip(), + ) + if hasattr(content, 'contents'): # recursion + self.parse_contents(content.contents) + if hasattr(self.handler, 'end_element'): + self.handler.end_element(content.name) + if content.comments: + for comment in content.comments: + if hasattr(self.handler, 'comment'): + self.handler.comment(comment) + elif isinstance(content, Doctype): + if hasattr(self.handler, 'doctype'): + self.handler.doctype(content[7:]) + elif isinstance(content, Comment): + if hasattr(self.handler, 'comment'): + self.handler.comment(content.strip()) + elif isinstance(content, CData): + if hasattr(self.handler, 'cdata'): + self.handler.cdata(content) + elif isinstance(content, ProcessingInstruction): + if hasattr(self.handler, 'pi'): + self.handler.pi(content.strip("? ")) + else: + if hasattr(self.handler, 'characters'): + self.handler.characters(content) + + def flush(self): + soup = BeautifulSoup(self.html_doc.getvalue(), 'html.parser') + if hasattr(soup, 'contents'): + self.parse_contents(soup.contents) + self.encoding = soup.original_encoding + + def debug(self, text): + raise NotImplementedError("debug is not implemented") + + def lineno(self): + # It seems, that getting line number of element is not + # implemented in BeautifulSoup, so this is faked + return 0 + + def last_lineno(self): + return 0 + + def column(self): + return 0 + + def last_column(self): + return 0 + + def pos(self, text): + return 0 + + +def parser(handler=None): + return Parser(handler) diff --git a/linkcheck/HtmlParser/s_util.c b/linkcheck/HtmlParser/s_util.c deleted file mode 100644 index 7611d9a7..00000000 --- a/linkcheck/HtmlParser/s_util.c +++ /dev/null @@ -1,52 +0,0 @@ -/* - * linux/lib/string.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ -#include "Python.h" - -#if !defined(HAVE_STRLCPY) -/** - * strlcpy - Copy a %NUL terminated string into a sized buffer - * @dst: Where to copy the string to - * @src: Where to copy the string from - * @size: size of destination buffer - * - * Compatible with *BSD: the result is always a valid - * NUL-terminated string that fits in the buffer (unless, - * of course, the buffer size is zero). It does not pad - * out the result like strncpy() does. - */ -size_t strlcpy (char *dst, const char *src, size_t size) -{ - size_t ret = strlen(src); - if (size > 0) { - size_t len = (ret >= size) ? size-1 : ret; - Py_MEMCPY(dst, src, len); - dst[len] = '\0'; - } - return ret; -} -#endif /* !HAVE_STRLCPY */ - -#if !defined(HAVE_STRLCAT) -/** - * strlcat - Append a length-limited, %NUL-terminated string to another - * @dst: The string to be appended to - * @src: The string to append to it - * @size: The size of the destination buffer. - */ -size_t strlcat (char *dst, const char *src, size_t size) -{ - size_t dsize = strlen(dst); - size_t len = strlen(src); - size_t res = dsize + len; - dst += dsize; - size -= dsize; - if (len >= size) - len = size-1; - Py_MEMCPY(dst, src, len); - dst[len] = '\0'; - return res; -} -#endif /* !HAVE_STRLCAT */ diff --git a/linkcheck/HtmlParser/s_util.h b/linkcheck/HtmlParser/s_util.h deleted file mode 100644 index a0102806..00000000 --- a/linkcheck/HtmlParser/s_util.h +++ /dev/null @@ -1,12 +0,0 @@ -/* - * linux/lib/string.c - * - * Copyright (C) 1991, 1992 Linus Torvalds - */ -#if !defined(HAVE_STRLCPY) -size_t strlcpy(char *dst, const char *src, size_t size); -#endif /* !HAVE_STRLCPY */ - -#if !defined(HAVE_STRLCAT) -size_t strlcat(char *dst, const char *src, size_t size); -#endif /* !HAVE_STRLCAT */ diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py index 5662777c..f2c2909d 100644 --- a/linkcheck/htmlutil/linkparse.py +++ b/linkcheck/htmlutil/linkparse.py @@ -115,10 +115,10 @@ class TagFinder (object): """Does nothing, override in a subclass.""" pass - def start_end_element (self, tag, attrs): + def start_end_element (self, tag, attrs, element_text=None): """Delegate a combined start/end element (eg.
) to the start_element method. Ignore the end element part.""" - self.start_element(tag, attrs) + self.start_element(tag, attrs, element_text) class MetaRobotsFinder (TagFinder): diff --git a/setup.py b/setup.py index 97005df2..3afca9a7 100755 --- a/setup.py +++ b/setup.py @@ -466,20 +466,6 @@ args = dict( 'linkcheck.parser', 'linkcheck.plugins', ], - ext_modules = [ - Extension('linkcheck.HtmlParser.htmlsax', - sources = [ - 'linkcheck/HtmlParser/htmllex.c', - 'linkcheck/HtmlParser/htmlparse.c', - 'linkcheck/HtmlParser/s_util.c', - ], - extra_compile_args = extra_compile_args, - library_dirs = library_dirs, - libraries = libraries, - define_macros = define_macros + [('YY_NO_INPUT', None)], - include_dirs = include_dirs + [normpath("linkcheck/HtmlParser")], - ), - ], scripts = scripts, data_files = data_files, classifiers = [ diff --git a/windows/build.bat b/windows/build.bat index 3e9f5ce1..b7ea3f03 100644 --- a/windows/build.bat +++ b/windows/build.bat @@ -38,7 +38,5 @@ if defined MSSdk ( %PYDIR%\python.exe setup.py sdist --manifest-only %PYDIR%\python.exe setup.py build %COMPILER% -:: copy .pyd files to start linkchecker in local directory -copy build\lib.%PLATFORM%-%PYVER%\linkcheck\HtmlParser\htmlsax.pyd linkcheck\HtmlParser :finish diff --git a/windows/clean.bat b/windows/clean.bat index 532b3acf..ed991771 100644 --- a/windows/clean.bat +++ b/windows/clean.bat @@ -16,6 +16,5 @@ @echo off set PYDIR=C:\Python27 %PYDIR%\python.exe setup.py clean --all -del linkcheck\HtmlParser\htmlsax.pyd del doc\html\lccollection.qhc del doc\html\lcdoc.qch