ensure tags are ASCII, regen with bison 2.0

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2817 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2005-10-10 21:13:35 +00:00
parent 4315e77ae1
commit b28be779d7
5 changed files with 351 additions and 294 deletions

View file

@ -200,7 +200,7 @@ def strip_quotes (s):
_encoding_ro = re.compile(r"charset=(?P<encoding>[-0-9a-zA-Z]+)")
def set_encoding (self, attrs):
def set_encoding (parsobj, attrs):
"""
Set document encoding for the HTML parser according to the <meta>
tag attribute information.
@ -217,13 +217,13 @@ def set_encoding (self, attrs):
try:
encoding = encoding.encode("ascii")
codecs.lookup(encoding)
self.encoding = encoding
parsobj.encoding = encoding
except LookupError:
# ignore unknown encodings
pass
def set_doctype (self, doctype):
def set_doctype (parsobj, doctype):
"""
Set document type of the HTML parser according to the given
document type string.
@ -233,5 +233,5 @@ def set_doctype (self, doctype):
@return: None
"""
if u"XHTML" in doctype:
self.doctype = "XHTML"
parsobj.doctype = "XHTML"

File diff suppressed because it is too large Load diff

View file

@ -37,7 +37,7 @@
(b)[(n)-1] = '\0'
/* make python unicode string from tmp_buf and assign it to a */
#define PYSTRING_TMP(a) { \
#define PYSTRING_TMP_UNICODE(a) { \
PyObject* pencoding; \
char* encoding; \
pencoding = PyObject_GetAttrString(yyextra->parser, "encoding"); \
@ -50,10 +50,24 @@
if ((a)==NULL) return T_ERROR; \
}
#define PYSTRING_TMP_ASCII(a) { \
(a) = PyUnicode_Decode(yyextra->tmp_buf, strlen(yyextra->tmp_buf), \
"ascii", "ignore"); \
if ((a)==NULL) return T_ERROR; \
}
/* set return value from tmp_buf */
#define SETLVAL { \
#define SETLVAL_UNICODE { \
PyObject* s; \
PYSTRING_TMP(s); \
PYSTRING_TMP_UNICODE(s); \
RESIZE_BUF(yyextra->tmp_buf, 1); \
*yylval = s; \
}
/* set return value from tmp_buf */
#define SETLVAL_ASCII { \
PyObject* s; \
PYSTRING_TMP_ASCII(s); \
RESIZE_BUF(yyextra->tmp_buf, 1); \
*yylval = s; \
}
@ -99,7 +113,7 @@
*/
#define FLUSH_ATTRS \
if (strlen(yyextra->tmp_buf) > 0) { \
PYSTRING_TMP(yyextra->tmp_attrname); \
PYSTRING_TMP_UNICODE(yyextra->tmp_attrname); \
RESIZE_BUF(yyextra->tmp_buf, 1); \
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, Py_None)==-1) return T_ERROR; \
Py_CLEAR(yyextra->tmp_attrname); \
@ -213,7 +227,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_COMMENT>--[ ]*> {
UPDATE_COLUMN;
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_COMMENT);
}
@ -255,7 +269,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_COMMENT2>!> {
UPDATE_COLUMN;
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_COMMENT);
}
@ -283,7 +297,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_DOCTYPE>> {
UPDATE_COLUMN;
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_DOCTYPE);
}
@ -302,7 +316,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_CDATA>\]*\]\]> {
UPDATE_COLUMN;
APPEND_TO_TMP(yyleng-3);
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_CDATA);
}
@ -335,7 +349,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_PI>\?+> {
UPDATE_COLUMN;
APPEND_TO_TMP(yyleng-2);
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_PI);
}
@ -347,7 +361,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_PI>> {
UPDATE_COLUMN;
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_PI);
}
@ -377,7 +391,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_TAGSTART>{RX_WHITE_SPACE}+ {
UPDATE_LINE;
LOWER_TMP;
PYSTRING_TMP(yyextra->tmp_tag);
PYSTRING_TMP_ASCII(yyextra->tmp_tag);
RESIZE_BUF(yyextra->tmp_buf, 1);
BEGIN(S_ATTR1);
}
@ -390,11 +404,11 @@ RX_DATA [-a-zA-Z0-9_:]+
RESIZE_BUF(yyextra->tmp_buf, 4);
strcpy(yyextra->tmp_buf, "</>");
yyextra->tmp_attrs = NULL;
SETLVAL;
SETLVAL_UNICODE;
RETURN(T_TEXT);
}
LOWER_TMP;
PYSTRING_TMP(yyextra->tmp_tag);
PYSTRING_TMP_ASCII(yyextra->tmp_tag);
RESIZE_BUF(yyextra->tmp_buf, 1);
SET_ATTR_LVAL;
RETURN(T_ELEMENT_START_END);
@ -408,11 +422,11 @@ RX_DATA [-a-zA-Z0-9_:]+
RESIZE_BUF(yyextra->tmp_buf, 3);
strcpy(yyextra->tmp_buf, "<>");
yyextra->tmp_attrs = NULL;
SETLVAL;
SETLVAL_UNICODE;
RETURN(T_TEXT);
}
LOWER_TMP;
PYSTRING_TMP(yyextra->tmp_tag);
PYSTRING_TMP_ASCII(yyextra->tmp_tag);
RESIZE_BUF(yyextra->tmp_buf, 1);
SCRIPT_CHECK;
SET_ATTR_LVAL;
@ -426,7 +440,7 @@ RX_DATA [-a-zA-Z0-9_:]+
/*********************** SCRIPT ************************/
<S_SCRIPT><\/{RX_WHITE_SPACE}*[Ss][Cc][Rr][Ii][Pp][Tt]{RX_WHITE_SPACE}*> {
UPDATE_LINE;
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_SCRIPT);
}
@ -597,7 +611,7 @@ RX_DATA [-a-zA-Z0-9_:]+
/*********************** STYLE ************************/
<S_STYLE><\/{RX_WHITE_SPACE}*[Ss][Tt][Yy][Ll][Ee]{RX_WHITE_SPACE}*> {
UPDATE_LINE;
SETLVAL;
SETLVAL_UNICODE;
BEGIN(INITIAL);
RETURN(T_STYLE);
}
@ -691,7 +705,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_ATTR2,S_ATTR3>{RX_WHITE_SPACE}*{RX_EQUAL}{RX_WHITE_SPACE}* {
UPDATE_LINE;
LOWER_TMP;
PYSTRING_TMP(yyextra->tmp_attrname);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrname);
RESIZE_BUF(yyextra->tmp_buf, 1);
BEGIN(S_ATTR4);
}
@ -699,7 +713,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_ATTR3>{RX_NAME} {
UPDATE_COLUMN;
LOWER_TMP;
PYSTRING_TMP(yyextra->tmp_attrname);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrname);
RESIZE_BUF(yyextra->tmp_buf, 1);
if (yyextra->tmp_attrval!=NULL) return T_ERROR;
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
@ -740,7 +754,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_ATTR4>> {
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
@ -765,7 +779,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_ATTR5>> {
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
@ -781,7 +795,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_ATTR5>\/> {
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
@ -797,7 +811,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_ATTR5>{RX_WHITE_SPACE}+|[\"] {
UPDATE_LINE;
PYSTRING_TMP(yyextra->tmp_attrval);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
@ -817,7 +831,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_APOSSTRING>\' {
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
@ -849,7 +863,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_STRING>\" {
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
PYSTRING_TMP_UNICODE(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
@ -887,7 +901,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_TAGEND>{RX_WHITE_SPACE}*> {
UPDATE_LINE;
LOWER_TMP;
SETLVAL;
SETLVAL_ASCII;
BEGIN(INITIAL);
RETURN(T_ELEMENT_END);
}
@ -896,7 +910,7 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_LINE;
LOWER_TMP;
yyextra->error = PyString_FromFormat("missing > in end tag `%s'", yyextra->tmp_buf);
SETLVAL;
SETLVAL_ASCII;
BEGIN(S_TAGEND);
RETURN(T_ELEMENT_END);
}
@ -905,7 +919,7 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_LINE;
LOWER_TMP;
yyextra->error = PyString_FromFormat("missing > in end tag `%s'", yyextra->tmp_buf);
SETLVAL;
SETLVAL_ASCII;
yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL);
if (yyextra->tmp_attrs==NULL) {
return T_ERROR;
@ -923,7 +937,7 @@ RX_DATA [-a-zA-Z0-9_:]+
<S_TAGEND2>> {
UPDATE_COLUMN;
LOWER_TMP;
SETLVAL;
SETLVAL_ASCII;
BEGIN(INITIAL);
RETURN(T_ELEMENT_END);
}
@ -936,7 +950,7 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_LINE;
LOWER_TMP;
yyextra->error = PyString_FromFormat("missing > in end tag `%s'", yyextra->tmp_buf);
SETLVAL;
SETLVAL_ASCII;
BEGIN(S_TAGEND);
RETURN(T_ELEMENT_END);
}
@ -945,7 +959,7 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_LINE;
LOWER_TMP;
yyextra->error = PyString_FromFormat("missing > in end tag `%s'", yyextra->tmp_buf);
SETLVAL;
SETLVAL_ASCII;
yyextra->tmp_attrs = PyObject_CallObject(yyextra->list_dict, NULL);
if (yyextra->tmp_attrs==NULL) {
return T_ERROR;
@ -961,21 +975,21 @@ RX_DATA [-a-zA-Z0-9_:]+
<INITIAL>[^<]+ {
UPDATE_LINE;
APPEND_TO_TMP(yyleng);
SETLVAL;
SETLVAL_UNICODE;
RETURN(T_TEXT);
}
<INITIAL><[^\012 \t\b\r\nA-Za-z!?/] {
UPDATE_COLUMN;
APPEND_TO_TMP(yyleng);
SETLVAL;
SETLVAL_UNICODE;
RETURN(T_TEXT);
}
<INITIAL><{RX_WHITE_SPACE}+[^A-Za-z/] {
UPDATE_LINE;
APPEND_TO_TMP(yyleng);
SETLVAL;
SETLVAL_UNICODE;
RETURN(T_TEXT);
}

View file

@ -1,4 +1,4 @@
/* A Bison parser, made by GNU Bison 1.875d. */
/* A Bison parser, made by GNU Bison 2.0. */
/* Skeleton parser for Yacc-like parsing with Bison,
Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
@ -274,7 +274,7 @@ typedef int YYSTYPE;
/* Copy the second part of user declarations. */
/* Line 214 of yacc.c. */
/* Line 213 of yacc.c. */
#line 279 "htmlparse.c"
#if ! defined (yyoverflow) || YYERROR_VERBOSE
@ -290,14 +290,10 @@ typedef int YYSTYPE;
# ifdef YYSTACK_USE_ALLOCA
# if YYSTACK_USE_ALLOCA
# define YYSTACK_ALLOC alloca
# endif
# else
# if defined (alloca) || defined (_ALLOCA_H)
# define YYSTACK_ALLOC alloca
# else
# ifdef __GNUC__
# define YYSTACK_ALLOC __builtin_alloca
# else
# define YYSTACK_ALLOC alloca
# endif
# endif
# endif
@ -601,20 +597,53 @@ do \
} \
while (0)
#define YYTERROR 1
#define YYERRCODE 256
/* YYLLOC_DEFAULT -- Compute the default location (before the actions
are run). */
/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N].
If N is 0, then set CURRENT to the empty location which ends
the previous symbol: RHS[0] (always defined). */
#define YYRHSLOC(Rhs, K) ((Rhs)[K])
#ifndef YYLLOC_DEFAULT
# define YYLLOC_DEFAULT(Current, Rhs, N) \
((Current).first_line = (Rhs)[1].first_line, \
(Current).first_column = (Rhs)[1].first_column, \
(Current).last_line = (Rhs)[N].last_line, \
(Current).last_column = (Rhs)[N].last_column)
# define YYLLOC_DEFAULT(Current, Rhs, N) \
do \
if (N) \
{ \
(Current).first_line = YYRHSLOC (Rhs, 1).first_line; \
(Current).first_column = YYRHSLOC (Rhs, 1).first_column; \
(Current).last_line = YYRHSLOC (Rhs, N).last_line; \
(Current).last_column = YYRHSLOC (Rhs, N).last_column; \
} \
else \
{ \
(Current).first_line = (Current).last_line = \
YYRHSLOC (Rhs, 0).last_line; \
(Current).first_column = (Current).last_column = \
YYRHSLOC (Rhs, 0).last_column; \
} \
while (0)
#endif
/* YY_LOCATION_PRINT -- Print the location on the stream.
This macro was not mandated originally: define only if we know
we won't break user code: when these are the locations we know. */
#ifndef YY_LOCATION_PRINT
# if YYLTYPE_IS_TRIVIAL
# define YY_LOCATION_PRINT(File, Loc) \
fprintf (File, "%d.%d-%d.%d", \
(Loc).first_line, (Loc).first_column, \
(Loc).last_line, (Loc).last_column)
# else
# define YY_LOCATION_PRINT(File, Loc) ((void) 0)
# endif
#endif
/* YYLEX -- calling `yylex' with the right arguments. */
#ifdef YYLEX_PARAM
@ -637,19 +666,13 @@ do { \
YYFPRINTF Args; \
} while (0)
# define YYDSYMPRINT(Args) \
do { \
if (yydebug) \
yysymprint Args; \
} while (0)
# define YYDSYMPRINTF(Title, Token, Value, Location) \
# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \
do { \
if (yydebug) \
{ \
YYFPRINTF (stderr, "%s ", Title); \
yysymprint (stderr, \
Token, Value); \
Type, Value); \
YYFPRINTF (stderr, "\n"); \
} \
} while (0)
@ -716,8 +739,7 @@ do { \
int yydebug;
#else /* !YYDEBUG */
# define YYDPRINTF(Args)
# define YYDSYMPRINT(Args)
# define YYDSYMPRINTF(Title, Token, Value, Location)
# define YY_SYMBOL_PRINT(Title, Type, Value, Location)
# define YY_STACK_PRINT(Bottom, Top)
# define YY_REDUCE_PRINT(Rule)
#endif /* !YYDEBUG */
@ -735,10 +757,6 @@ int yydebug;
SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH)
evaluated with infinite-precision integer arithmetic. */
#if defined (YYMAXDEPTH) && YYMAXDEPTH == 0
# undef YYMAXDEPTH
#endif
#ifndef YYMAXDEPTH
# define YYMAXDEPTH 10000
#endif
@ -820,15 +838,15 @@ yysymprint (yyoutput, yytype, yyvaluep)
(void) yyvaluep;
if (yytype < YYNTOKENS)
{
YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
# ifdef YYPRINT
YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
# endif
}
YYFPRINTF (yyoutput, "token %s (", yytname[yytype]);
else
YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]);
# ifdef YYPRINT
if (yytype < YYNTOKENS)
YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep);
# endif
switch (yytype)
{
default:
@ -844,10 +862,11 @@ yysymprint (yyoutput, yytype, yyvaluep)
#if defined (__STDC__) || defined (__cplusplus)
static void
yydestruct (int yytype, YYSTYPE *yyvaluep)
yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep)
#else
static void
yydestruct (yytype, yyvaluep)
yydestruct (yymsg, yytype, yyvaluep)
const char *yymsg;
int yytype;
YYSTYPE *yyvaluep;
#endif
@ -855,6 +874,10 @@ yydestruct (yytype, yyvaluep)
/* Pacify ``unused variable'' warnings. */
(void) yyvaluep;
if (!yymsg)
yymsg = "Deleting";
YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp);
switch (yytype)
{
@ -907,10 +930,10 @@ yyparse ()
#endif
#endif
{
/* The lookahead symbol. */
/* The look-ahead symbol. */
int yychar;
/* The semantic value of the lookahead symbol. */
/* The semantic value of the look-ahead symbol. */
YYSTYPE yylval;
/* Number of syntax errors so far. */
@ -921,7 +944,7 @@ int yynerrs;
int yyresult;
/* Number of tokens to shift before error messages enabled. */
int yyerrstatus;
/* Lookahead token as an internal (translated) token number. */
/* Look-ahead token as an internal (translated) token number. */
int yytoken = 0;
/* Three stacks and their tools:
@ -973,6 +996,8 @@ int yynerrs;
yyvsp = yyvs;
yyvsp[0] = yylval;
goto yysetstate;
/*------------------------------------------------------------.
@ -1062,18 +1087,18 @@ int yynerrs;
yybackup:
/* Do appropriate processing given the current state. */
/* Read a lookahead token if we need one and don't already have one. */
/* Read a look-ahead token if we need one and don't already have one. */
/* yyresume: */
/* First try to decide what to do without reference to lookahead token. */
/* First try to decide what to do without reference to look-ahead token. */
yyn = yypact[yystate];
if (yyn == YYPACT_NINF)
goto yydefault;
/* Not known => get a lookahead token if don't already have one. */
/* Not known => get a look-ahead token if don't already have one. */
/* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */
/* YYCHAR is either YYEMPTY or YYEOF or a valid look-ahead symbol. */
if (yychar == YYEMPTY)
{
YYDPRINTF ((stderr, "Reading a token: "));
@ -1088,7 +1113,7 @@ yybackup:
else
{
yytoken = YYTRANSLATE (yychar);
YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc);
YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc);
}
/* If the proper action on seeing token YYTOKEN is to reduce or to
@ -1108,8 +1133,8 @@ yybackup:
if (yyn == YYFINAL)
YYACCEPT;
/* Shift the lookahead token. */
YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken]));
/* Shift the look-ahead token. */
YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc);
/* Discard the token being shifted unless it is eof. */
if (yychar != YYEOF)
@ -1199,8 +1224,8 @@ yyreduce:
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
PyObject* tag = PyTuple_GET_ITEM(yyvsp[0], 0);
PyObject* attrs = PyTuple_GET_ITEM(yyvsp[0], 1);
PyObject* tag = PyTuple_GET_ITEM((yyvsp[0]), 0);
PyObject* attrs = PyTuple_GET_ITEM((yyvsp[0]), 1);
int error = 0;
int cmp;
CHECK_ERROR((tag == NULL || attrs == NULL), finish_start);
@ -1228,7 +1253,7 @@ finish_start:
Py_XDECREF(result);
Py_XDECREF(tag);
Py_XDECREF(attrs);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1246,8 +1271,8 @@ finish_start:
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
PyObject* tag = PyTuple_GET_ITEM(yyvsp[0], 0);
PyObject* attrs = PyTuple_GET_ITEM(yyvsp[0], 1);
PyObject* tag = PyTuple_GET_ITEM((yyvsp[0]), 0);
PyObject* attrs = PyTuple_GET_ITEM((yyvsp[0]), 1);
int error = 0;
int cmp;
char* fname;
@ -1282,7 +1307,7 @@ finish_start_end:
Py_XDECREF(result);
Py_XDECREF(tag);
Py_XDECREF(attrs);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1302,7 +1327,7 @@ finish_start_end:
int error = 0;
int cmp;
/* encode tagname in ASCII, ignoring any unknown chars */
PyObject* tagname = PyUnicode_AsEncodedString(yyvsp[0], "ascii", "ignore");
PyObject* tagname = PyUnicode_AsEncodedString((yyvsp[0]), "ascii", "ignore");
if (tagname == NULL) {
error = 1;
goto finish_end;
@ -1312,7 +1337,7 @@ finish_start_end:
if (PyObject_HasAttrString(ud->handler, "end_element") == 1 && cmp > 0) {
callback = PyObject_GetAttrString(ud->handler, "end_element");
CHECK_ERROR((callback == NULL), finish_end);
result = PyObject_CallFunction(callback, "O", yyvsp[0]);
result = PyObject_CallFunction(callback, "O", (yyvsp[0]));
CHECK_ERROR((result == NULL), finish_end);
Py_CLEAR(callback);
Py_CLEAR(result);
@ -1324,7 +1349,7 @@ finish_end:
Py_XDECREF(tagname);
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1342,14 +1367,14 @@ finish_end:
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
CALLBACK(ud, "comment", "O", yyvsp[0], finish_comment);
CALLBACK(ud, "comment", "O", (yyvsp[0]), finish_comment);
CHECK_PARSER_ERROR(ud, finish_comment);
finish_comment:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1366,14 +1391,14 @@ finish_comment:
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
CALLBACK(ud, "pi", "O", yyvsp[0], finish_pi);
CALLBACK(ud, "pi", "O", (yyvsp[0]), finish_pi);
CHECK_PARSER_ERROR(ud, finish_pi);
finish_pi:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1391,14 +1416,14 @@ finish_pi:
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
CALLBACK(ud, "cdata", "O", yyvsp[0], finish_cdata);
CALLBACK(ud, "cdata", "O", (yyvsp[0]), finish_cdata);
CHECK_PARSER_ERROR(ud, finish_cdata);
finish_cdata:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1417,17 +1442,17 @@ finish_cdata:
PyObject* result = NULL;
int error = 0;
/* set encoding */
result = PyObject_CallFunction(set_doctype, "OO", ud->parser, yyvsp[0]);
result = PyObject_CallFunction(set_doctype, "OO", ud->parser, (yyvsp[0]));
CHECK_ERROR((result == NULL), finish_doctype);
Py_CLEAR(result);
CALLBACK(ud, "doctype", "O", yyvsp[0], finish_doctype);
CALLBACK(ud, "doctype", "O", (yyvsp[0]), finish_doctype);
CHECK_PARSER_ERROR(ud, finish_doctype);
finish_doctype:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1447,7 +1472,7 @@ finish_doctype:
int error = 0;
PyObject* script = PyUnicode_DecodeASCII("script", 6, "ignore");
CHECK_ERROR((script == NULL), finish_script);
CALLBACK(ud, "characters", "O", yyvsp[0], finish_script);
CALLBACK(ud, "characters", "O", (yyvsp[0]), finish_script);
/* emit the omitted end tag */
CALLBACK(ud, "end_element", "O", script, finish_script);
CHECK_PARSER_ERROR(ud, finish_script);
@ -1457,7 +1482,7 @@ finish_script:
Py_XDECREF(callback);
Py_XDECREF(script);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1477,7 +1502,7 @@ finish_script:
int error = 0;
PyObject* style = PyUnicode_DecodeASCII("style", 5, "ignore");
CHECK_ERROR((style == NULL), finish_style);
CALLBACK(ud, "characters", "O", yyvsp[0], finish_style);
CALLBACK(ud, "characters", "O", (yyvsp[0]), finish_style);
/* emit the omitted end tag */
CALLBACK(ud, "end_element", "O", style, finish_style);
CHECK_PARSER_ERROR(ud, finish_style);
@ -1487,7 +1512,7 @@ finish_style:
Py_XDECREF(callback);
Py_XDECREF(style);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1507,14 +1532,14 @@ finish_style:
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
CALLBACK(ud, "characters", "O", yyvsp[0], finish_characters);
CALLBACK(ud, "characters", "O", (yyvsp[0]), finish_characters);
CHECK_PARSER_ERROR(ud, finish_characters);
finish_characters:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF(yyvsp[0]);
Py_DECREF((yyvsp[0]));
if (error) {
PyErr_Fetch(&(ud->exc_type), &(ud->exc_val), &(ud->exc_tb));
YYABORT;
@ -1526,8 +1551,8 @@ finish_characters:
}
/* Line 1010 of yacc.c. */
#line 1531 "htmlparse.c"
/* Line 1037 of yacc.c. */
#line 1556 "htmlparse.c"
yyvsp -= yylen;
yyssp -= yylen;
@ -1627,7 +1652,7 @@ yyerrlab:
if (yyerrstatus == 3)
{
/* If just tried and failed to reuse lookahead token after an
/* If just tried and failed to reuse look-ahead token after an
error, discard it. */
if (yychar <= YYEOF)
@ -1637,23 +1662,22 @@ yyerrlab:
if (yychar == YYEOF)
for (;;)
{
YYPOPSTACK;
if (yyssp == yyss)
YYABORT;
YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
yydestruct (yystos[*yyssp], yyvsp);
yydestruct ("Error: popping",
yystos[*yyssp], yyvsp);
}
}
else
{
YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc);
yydestruct (yytoken, &yylval);
yydestruct ("Error: discarding", yytoken, &yylval);
yychar = YYEMPTY;
}
}
/* Else will try to reuse lookahead token after shifting the error
/* Else will try to reuse look-ahead token after shifting the error
token. */
goto yyerrlab1;
@ -1670,7 +1694,7 @@ yyerrorlab:
goto yyerrorlab;
#endif
yyvsp -= yylen;
yyvsp -= yylen;
yyssp -= yylen;
yystate = *yyssp;
goto yyerrlab1;
@ -1700,8 +1724,8 @@ yyerrlab1:
if (yyssp == yyss)
YYABORT;
YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp);
yydestruct (yystos[yystate], yyvsp);
yydestruct ("Error: popping", yystos[yystate], yyvsp);
YYPOPSTACK;
yystate = *yyssp;
YY_STACK_PRINT (yyss, yyssp);
@ -1710,11 +1734,12 @@ yyerrlab1:
if (yyn == YYFINAL)
YYACCEPT;
YYDPRINTF ((stderr, "Shifting error token, "));
*++yyvsp = yylval;
/* Shift the error token. */
YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
yystate = yyn;
goto yynewstate;
@ -1730,6 +1755,9 @@ yyacceptlab:
| yyabortlab -- YYABORT comes here. |
`-----------------------------------*/
yyabortlab:
yydestruct ("Error: discarding lookahead",
yytoken, &yylval);
yychar = YYEMPTY;
yyresult = 1;
goto yyreturn;

View file

@ -1,4 +1,4 @@
/* A Bison parser, made by GNU Bison 1.875d. */
/* A Bison parser, made by GNU Bison 2.0. */
/* Skeleton parser for Yacc-like parsing with Bison,
Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.