emit unicode data, store encoding

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1853 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-10-06 19:05:48 +00:00
parent f2e2288f4b
commit 10209ae499
5 changed files with 156 additions and 73 deletions

View file

@ -59,6 +59,10 @@
The parser quotes all attribute values.
Python memory management interface is used.
4. Character encoding aware
The parser itself is not encoding aware, but all the output are
always Python Unicode strings.
"""
import re
@ -79,30 +83,39 @@ def _resolve_ascii_entity (mo):
radix = 10
num = int(num, radix)
# check 7-bit ASCII char range
if 0<=num<=127:
return chr(num)
if 0 <= num <= 127:
return unicode(chr(num))
# not in range
return ent
_num_re = re.compile(ur'(?i)&#x?(?P<num>\d+);')
def resolve_ascii_entities (s):
"""resolve entities in 7-bit ASCII range to eliminate obfuscation"""
return re.sub(r'(?i)&#x?(?P<num>\d+);', _resolve_ascii_entity, s)
return _num_re.sub(_resolve_ascii_entity, s)
def _resolve_html_entity (mo):
"""resolve html entity, helper function for resolve_html_entities"""
return htmlentitydefs.entitydefs.get(mo.group("entity"), mo.group())
ent = mo.group("entity")
s = mo.group()
entdef = htmlentitydefs.entitydefs.get(ent)
if entdef is None:
return s
# note: entdef is latin-1 encoded
return entdef.decode("iso8859-1")
_entity_re = re.compile(ur'(?i)&(?P<entity>[a-z]+);')
def resolve_html_entities (s):
"""resolve html entites in s and return result"""
return re.sub(r'(?i)&(?P<entity>[a-z]+);', _resolve_html_entity, s)
return _entity_re.sub(_resolve_html_entity, s)
def resolve_entities (s):
"""resolve both html and 7-bit ASCII entites in s and return result"""
return resolve_html_entities(resolve_ascii_entities(s))
s = resolve_ascii_entities(s)
return resolve_html_entities(s)
def strip_quotes (s):
@ -112,3 +125,4 @@ def strip_quotes (s):
(s.startswith('"') and s.endswith('"'))):
return s[1:-1]
return s

View file

@ -36,13 +36,15 @@
if ((b)==NULL) return T_ERROR; \
(b)[(n)-1] = '\0'
/* make python string from tmp_buf and assign it to a */
#define PYSTRING_TMP(a) \
(a) = PyString_FromString(yyextra->tmp_buf); \
if ((a)==NULL) return T_ERROR
/* make python unicode string from tmp_buf and assign it to a */
#define PYSTRING_TMP(a) { \
const char* enc = PyString_AsString(yyextra->encoding); \
(a) = PyUnicode_Decode(yyextra->tmp_buf, strlen(yyextra->tmp_buf), enc, "ignore"); \
if ((a)==NULL) return T_ERROR; \
}
/* set return value from tmp_buf */
#define SETLVAL {\
#define SETLVAL { \
PyObject* s; \
PYSTRING_TMP(s); \
RESIZE_BUF(yyextra->tmp_buf, 1); \
@ -50,26 +52,30 @@
}
/* append yytext to tmp_buf */
#define APPEND_TO_TMP(n) {\
#define APPEND_TO_TMP(n) { \
size_t len = strlen(yyextra->tmp_buf) + (n) + 1; \
RESIZE_BUF(yyextra->tmp_buf, len); \
strlcat(yyextra->tmp_buf, yytext, len); \
}
/* lowercase the tmp_buf */
#define LOWER_TMP {\
#define LOWER_TMP { \
char* p = yyextra->tmp_buf; \
while (*p) { *p = tolower(*p); p++; } \
}
/* check for JavaScript or CSS tags; must be before SET_ATTR_LVAL */
#define SCRIPT_CHECK \
if (strcmp("script", PyString_AS_STRING(yyextra->tmp_tag))==0) \
#define SCRIPT_CHECK { \
PyObject* tagname = PyUnicode_AsEncodedString(yyextra->tmp_tag, "ascii", "ignore"); \
if (tagname==NULL) return T_ERROR; \
if (strcmp("script", PyString_AsString(tagname))==0) \
BEGIN(S_SCRIPT); \
else if (strcmp("style", PyString_AS_STRING(yyextra->tmp_tag))==0) \
else if (strcmp("style", PyString_AsString(tagname))==0) \
BEGIN(S_STYLE); \
else \
BEGIN(INITIAL)
BEGIN(INITIAL); \
Py_DECREF(tagname); \
}
/* set return value from tag with attributes */
#define SET_ATTR_LVAL \
@ -88,7 +94,7 @@
if (strlen(yyextra->tmp_buf) > 0) { \
PYSTRING_TMP(yyextra->tmp_attrname); \
RESIZE_BUF(yyextra->tmp_buf, 1); \
if (PyMapping_SetItemString(yyextra->tmp_attrs, PyString_AsString(yyextra->tmp_attrname), Py_None)==-1) return T_ERROR; \
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname, Py_None)==-1) return T_ERROR; \
Py_DECREF(yyextra->tmp_attrname); \
yyextra->tmp_attrname = NULL; \
}
@ -683,8 +689,7 @@ RX_DATA [-a-zA-Z0-9_:]+
if (yyextra->tmp_attrval!=NULL) return T_ERROR;
Py_INCREF(Py_None);
yyextra->tmp_attrval = Py_None;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
/*Py_DECREF(yyextra->tmp_attrname);*/
/*Py_DECREF(yyextra->tmp_attrval);*/
@ -726,10 +731,10 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, "O", yyextra->tmp_attrval);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
if (yyextra->tmp_attrval==NULL) return T_ERROR;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
Py_DECREF(yyextra->tmp_attrname);
Py_DECREF(yyextra->tmp_attrval);
@ -752,10 +757,10 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, "O", yyextra->tmp_attrval);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
if (yyextra->tmp_attrval==NULL) return T_ERROR;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
Py_DECREF(yyextra->tmp_attrname);
Py_DECREF(yyextra->tmp_attrval);
@ -769,10 +774,10 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, "O", yyextra->tmp_attrval);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
if (yyextra->tmp_attrval==NULL) return T_ERROR;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
Py_DECREF(yyextra->tmp_attrname);
Py_DECREF(yyextra->tmp_attrval);
@ -786,10 +791,10 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_LINE;
PYSTRING_TMP(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, "O", yyextra->tmp_attrval);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
if (yyextra->tmp_attrval==NULL) return T_ERROR;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
Py_DECREF(yyextra->tmp_attrname);
Py_DECREF(yyextra->tmp_attrval);
@ -807,10 +812,10 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, "O", yyextra->tmp_attrval);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
if (yyextra->tmp_attrval==NULL) return T_ERROR;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
Py_DECREF(yyextra->tmp_attrname);
Py_DECREF(yyextra->tmp_attrval);
@ -840,10 +845,10 @@ RX_DATA [-a-zA-Z0-9_:]+
UPDATE_COLUMN;
PYSTRING_TMP(yyextra->tmp_attrval);
RESIZE_BUF(yyextra->tmp_buf, 1);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities, "O", yyextra->tmp_attrval);
yyextra->tmp_attrval = PyObject_CallFunction(yyextra->resolve_entities,
"O", yyextra->tmp_attrval);
if (yyextra->tmp_attrval==NULL) return T_ERROR;
if (PyMapping_SetItemString(yyextra->tmp_attrs,
PyString_AsString(yyextra->tmp_attrname),
if (PyObject_SetItem(yyextra->tmp_attrs, yyextra->tmp_attrname,
yyextra->tmp_attrval)==-1) return T_ERROR;
Py_DECREF(yyextra->tmp_attrname);
Py_DECREF(yyextra->tmp_attrval);

View file

@ -53,44 +53,54 @@ class HtmlPrinter (object):
class HtmlPrettyPrinter (object):
"""Print out all parsed HTML data"""
"""Print out all parsed HTML data in encoded form."""
def __init__ (self, fd=sys.stdout):
def __init__ (self, fd=sys.stdout, encoding="iso8859-1"):
"""write to given file descriptor"""
self.fd = fd
self.encoding = encoding
def comment (self, data):
"""print comment"""
data = data.encode(self.encoding, "ignore")
self.fd.write("<!--%s-->" % data)
def start_element (self, tag, attrs):
"""print start element"""
self.fd.write("<%s"%tag.replace("/", ""))
tag = tag.encode(self.encoding, "ignore")
self.fd.write("<%s" % tag.replace("/", ""))
for key, val in attrs.iteritems():
key = key.encode(self.encoding, "ignore")
if val is None:
self.fd.write(" %s"%key)
self.fd.write(" %s" % key)
else:
val = val.encode(self.encoding, "ignore")
self.fd.write(" %s=\"%s\"" % (key, quote_attrval(val)))
self.fd.write(">")
def end_element (self, tag):
"""print end element"""
tag = tag.encode(self.encoding, "ignore")
self.fd.write("</%s>" % tag)
def doctype (self, data):
"""print document type"""
data = data.encode(self.encoding, "ignore")
self.fd.write("<!DOCTYPE%s>" % data)
def pi (self, data):
"""print pi"""
data = data.encode(self.encoding, "ignore")
self.fd.write("<?%s?>" % data)
def cdata (self, data):
"""print cdata"""
self.fd.write("<![CDATA[%s]]>"%data)
data = data.encode(self.encoding, "ignore")
self.fd.write("<![CDATA[%s]]>" % data)
def characters (self, data):
"""print characters"""
data = data.encode(self.encoding, "ignore")
self.fd.write(data)

View file

@ -104,6 +104,7 @@ static PyObject* list_dict;
typedef struct {
PyObject_HEAD
PyObject* handler;
PyObject* encoding;
UserData* userData;
void* scanner;
} parser_object;
@ -156,7 +157,7 @@ element: T_WAIT { YYACCEPT; /* wait for more lexer input */ }
| T_ELEMENT_START
{
/* $1 is a PyTuple (<tag>, <attrs>)
<tag> is a PyString, <attrs> is a PyDict */
<tag> is a PyObject, <attrs> is a PyDict */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
@ -191,14 +192,14 @@ finish_start:
| T_ELEMENT_START_END
{
/* $1 is a PyTuple (<tag>, <attrs>)
<tag> is a PyString, <attrs> is a PyDict */
<tag> is a PyObject, <attrs> is a PyDict */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
PyObject* tag = PyTuple_GET_ITEM($1, 0);
PyObject* attrs = PyTuple_GET_ITEM($1, 1);
int error = 0;
char* tagname;
PyObject* tagname = NULL;
if (!tag || !attrs) { error = 1; goto finish_start_end; }
if (PyObject_HasAttrString(ud->handler, "start_element")==1) {
callback = PyObject_GetAttrString(ud->handler, "start_element");
@ -209,9 +210,11 @@ finish_start:
Py_DECREF(result);
callback=result=NULL;
}
tagname = PyString_AS_STRING(tag);
/* encode tagname in ASCII, ignoring any unknown chars */
tagname = PyUnicode_AsEncodedString(tag, "ascii", "ignore");
if (tagname==NULL) { error=1; goto finish_start_end; }
if (PyObject_HasAttrString(ud->handler, "end_element")==1 &&
NO_HTML_END_TAG(tagname)) {
NO_HTML_END_TAG(PyString_AsString(tagname))) {
callback = PyObject_GetAttrString(ud->handler, "end_element");
if (callback==NULL) { error=1; goto finish_start_end; }
result = PyObject_CallFunction(callback, "O", tag);
@ -227,6 +230,7 @@ finish_start_end:
Py_XDECREF(callback);
Py_XDECREF(result);
Py_XDECREF(tag);
Py_XDECREF(tagname);
Py_XDECREF(attrs);
Py_DECREF($1);
if (error) {
@ -237,14 +241,16 @@ finish_start_end:
}
| T_ELEMENT_END
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
char* tagname = PyString_AS_STRING($1);
/* encode tagname in ASCII, ignoring any unknown chars */
PyObject* tagname = PyUnicode_AsEncodedString($1, "ascii", "ignore");
if (tagname==NULL) { error=1; goto finish_end; }
if (PyObject_HasAttrString(ud->handler, "end_element")==1 &&
NO_HTML_END_TAG(tagname)) {
NO_HTML_END_TAG(PyString_AsString(tagname))) {
callback = PyObject_GetAttrString(ud->handler, "end_element");
if (callback==NULL) { error=1; goto finish_end; }
result = PyObject_CallFunction(callback, "O", $1);
@ -257,6 +263,7 @@ finish_start_end:
finish_end:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(tagname);
Py_XDECREF(callback);
Py_XDECREF(result);
Py_DECREF($1);
@ -268,7 +275,7 @@ finish_end:
}
| T_COMMENT
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
@ -289,7 +296,7 @@ finish_comment:
}
| T_PI
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
@ -310,7 +317,7 @@ finish_pi:
}
| T_CDATA
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
@ -331,7 +338,7 @@ finish_cdata:
}
| T_DOCTYPE
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
@ -352,18 +359,21 @@ finish_doctype:
}
| T_SCRIPT
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
PyObject* script = PyUnicode_DecodeASCII("script", 6, "ignore");
if (script==NULL) { error=1; goto finish_script; }
CALLBACK(ud, "characters", "O", $1, finish_script);
CALLBACK(ud, "end_element", "s", "script", finish_script);
CALLBACK(ud, "end_element", "O", script, finish_script);
CHECK_ERROR(ud, finish_script);
finish_script:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(script);
Py_XDECREF(result);
Py_DECREF($1);
if (error) {
@ -374,18 +384,21 @@ finish_script:
}
| T_STYLE
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
PyObject* result = NULL;
int error = 0;
PyObject* style = PyUnicode_DecodeASCII("style", 5, "ignore");
if (style==NULL) { error=1; goto finish_style; }
CALLBACK(ud, "characters", "O", $1, finish_style);
CALLBACK(ud, "end_element", "s", "style", finish_style);
CALLBACK(ud, "end_element", "O", style, finish_style);
CHECK_ERROR(ud, finish_style);
finish_style:
Py_XDECREF(ud->error);
ud->error = NULL;
Py_XDECREF(callback);
Py_XDECREF(style);
Py_XDECREF(result);
Py_DECREF($1);
if (error) {
@ -396,7 +409,7 @@ finish_style:
}
| T_TEXT
{
/* $1 is a PyString */
/* $1 is a PyUnicode */
/* Remember this is also called as a lexer error fallback */
UserData* ud = yyget_extra(scanner);
PyObject* callback = NULL;
@ -428,16 +441,15 @@ finish_characters:
/* create parser object */
static PyObject* parser_new (PyTypeObject* type, PyObject* args, PyObject* kwds) {
parser_object* self;
if ((self = (parser_object*) type->tp_alloc(type, 0)) == NULL)
{
if ((self = (parser_object*) type->tp_alloc(type, 0)) == NULL) {
return NULL;
}
Py_INCREF(Py_None);
self->handler = Py_None;
/* reset userData */
self->userData = PyMem_New(UserData, sizeof(UserData));
if (self->userData == NULL)
{
if (self->userData == NULL) {
Py_DECREF(self->handler);
Py_DECREF(self);
return NULL;
}
@ -463,11 +475,18 @@ static PyObject* parser_new (PyTypeObject* type, PyObject* args, PyObject* kwds)
self->userData->exc_tb = NULL;
self->userData->error = NULL;
self->scanner = NULL;
if (htmllexInit(&(self->scanner), self->userData)!=0)
{
if (htmllexInit(&(self->scanner), self->userData)!=0) {
Py_DECREF(self->handler);
Py_DECREF(self);
return NULL;
}
self->encoding = PyString_FromString("iso8859-1");
if (self->encoding == NULL) {
Py_DECREF(self->handler);
Py_DECREF(self);
return NULL;
}
self->userData->encoding = self->encoding;
return (PyObject*) self;
}
@ -501,9 +520,9 @@ static int parser_traverse (parser_object* self, visitproc visit, void* arg) {
/* clear all used subobjects participating in reference cycles */
static int parser_clear (parser_object* self) {
self->userData->handler = NULL;
Py_XDECREF(self->handler);
self->handler = NULL;
self->userData->handler = NULL;
return 0;
}
@ -512,6 +531,9 @@ static int parser_clear (parser_object* self) {
static void parser_dealloc (parser_object* self) {
htmllexDestroy(self->scanner);
parser_clear(self);
self->userData->encoding = NULL;
Py_XDECREF(self->encoding);
self->encoding = NULL;
PyMem_Del(self->userData->buf);
PyMem_Del(self->userData->tmp_buf);
PyMem_Del(self->userData);
@ -570,7 +592,10 @@ static PyObject* parser_flush (parser_object* self, PyObject* args) {
if (strlen(self->userData->buf)) {
/* XXX set line, col */
int error = 0;
PyObject* s = PyString_FromString(self->userData->buf);
const char* enc = PyString_AsString(self->encoding);
PyObject* s = PyUnicode_Decode(self->userData->buf,
strlen(self->userData->buf),
enc, "ignore");
PyObject* callback = NULL;
PyObject* result = NULL;
/* reset buffer */
@ -701,6 +726,7 @@ static PyObject* parser_gethandler (parser_object* self, void* closure) {
return self->handler;
}
static int parser_sethandler (parser_object* self, PyObject* value, void* closure) {
if (value == NULL) {
PyErr_SetString(PyExc_TypeError, "Cannot delete parser handler");
@ -709,10 +735,34 @@ static int parser_sethandler (parser_object* self, PyObject* value, void* closur
Py_DECREF(self->handler);
Py_INCREF(value);
self->handler = value;
self->userData->handler = self->handler;
self->userData->handler = value;
return 0;
}
static PyObject* parser_getencoding (parser_object* self, void* closure) {
Py_INCREF(self->encoding);
return self->encoding;
}
static int parser_setencoding (parser_object* self, PyObject* value, void* closure) {
if (value == NULL) {
PyErr_SetString(PyExc_TypeError, "Cannot delete encoding");
return -1;
}
if (!PyString_Check(value)) {
PyErr_SetString(PyExc_TypeError, "encoding must be string");
return -1;
}
Py_DECREF(self->encoding);
Py_INCREF(value);
self->encoding = value;
self->userData->encoding = value;
return 0;
}
/* type interface */
static PyMemberDef parser_members[] = {
@ -722,19 +772,21 @@ static PyMemberDef parser_members[] = {
static PyGetSetDef parser_getset[] = {
{"handler", (getter)parser_gethandler, (setter)parser_sethandler,
"handler object", NULL},
{"encoding", (getter)parser_getencoding, (setter)parser_setencoding,
"encoding", NULL},
{NULL} /* Sentinel */
};
static PyMethodDef parser_methods[] = {
{"feed", (PyCFunction)parser_feed, METH_VARARGS, "feed data to parse incremental"},
{"feed", (PyCFunction)parser_feed, METH_VARARGS, "feed data to parse incremental"},
{"reset", (PyCFunction)parser_reset, METH_VARARGS, "reset the parser (no flushing)"},
{"flush", (PyCFunction)parser_flush, METH_VARARGS, "flush parser buffers"},
{"debug", (PyCFunction)parser_debug, METH_VARARGS, "set debug level"},
{"lineno", (PyCFunction)parser_lineno, METH_VARARGS, "get the current line number"},
{"lineno", (PyCFunction)parser_lineno, METH_VARARGS, "get the current line number"},
{"last_lineno", (PyCFunction)parser_last_lineno, METH_VARARGS, "get the last line number"},
{"column", (PyCFunction)parser_column, METH_VARARGS, "get the current column"},
{"column", (PyCFunction)parser_column, METH_VARARGS, "get the current column"},
{"last_column", (PyCFunction)parser_last_column, METH_VARARGS, "get the last column"},
{"pos", (PyCFunction)parser_pos, METH_VARARGS, "get the current scanner position"},
{"pos", (PyCFunction)parser_pos, METH_VARARGS, "get the current scanner position"},
{NULL} /* Sentinel */
};

View file

@ -78,6 +78,8 @@ typedef struct {
PyObject* exc_tb;
/* error string */
PyObject* error;
/* encoding string (default iso8859-1) */
PyObject* encoding;
} UserData;
#endif