diff --git a/linkcheck/HtmlParser/htmllex.c b/linkcheck/HtmlParser/htmllex.c
index 4807c4db..a9caf4ea 100644
--- a/linkcheck/HtmlParser/htmllex.c
+++ b/linkcheck/HtmlParser/htmllex.c
@@ -2658,7 +2658,7 @@ static yyconst flex_int32_t yy_rule_linenum[131] =
PyObject* pencoding; \
char* encoding; \
CHECK_NULL(pencoding = PyObject_GetAttrString(yyextra->parser, "encoding")); \
- encoding = PyString_AsString(pencoding); \
+ encoding = PyBytes_AsString(pencoding); \
if (encoding==NULL) { Py_DECREF(pencoding); return T_ERROR; } \
(a) = PyUnicode_Decode(yyextra->tmp_buf, \
(Py_ssize_t)strlen(yyextra->tmp_buf), \
@@ -2704,9 +2704,9 @@ static yyconst flex_int32_t yy_rule_linenum[131] =
#define SCRIPT_CHECK { \
PyObject* tagname; \
CHECK_NULL(tagname = PyUnicode_AsEncodedString(yyextra->tmp_tag, "ascii", "ignore")); \
- if (strcmp("script", PyString_AsString(tagname))==0) \
+ if (strcmp("script", PyBytes_AsString(tagname))==0) \
BEGIN(S_SCRIPT); \
- else if (strcmp("style", PyString_AsString(tagname))==0) \
+ else if (strcmp("style", PyBytes_AsString(tagname))==0) \
BEGIN(S_STYLE); \
else \
BEGIN(INITIAL); \
diff --git a/linkcheck/HtmlParser/htmllex.l b/linkcheck/HtmlParser/htmllex.l
index 5698b6a9..1676a60d 100644
--- a/linkcheck/HtmlParser/htmllex.l
+++ b/linkcheck/HtmlParser/htmllex.l
@@ -55,7 +55,7 @@
PyObject* pencoding; \
char* encoding; \
CHECK_NULL(pencoding = PyObject_GetAttrString(yyextra->parser, "encoding")); \
- encoding = PyString_AsString(pencoding); \
+ encoding = PyBytes_AsString(pencoding); \
if (encoding==NULL) { Py_DECREF(pencoding); return T_ERROR; } \
(a) = PyUnicode_Decode(yyextra->tmp_buf, \
(Py_ssize_t)strlen(yyextra->tmp_buf), \
@@ -101,9 +101,9 @@
#define SCRIPT_CHECK { \
PyObject* tagname; \
CHECK_NULL(tagname = PyUnicode_AsEncodedString(yyextra->tmp_tag, "ascii", "ignore")); \
- if (strcmp("script", PyString_AsString(tagname))==0) \
+ if (strcmp("script", PyBytes_AsString(tagname))==0) \
BEGIN(S_SCRIPT); \
- else if (strcmp("style", PyString_AsString(tagname))==0) \
+ else if (strcmp("style", PyBytes_AsString(tagname))==0) \
BEGIN(S_STYLE); \
else \
BEGIN(INITIAL); \
diff --git a/linkcheck/HtmlParser/htmlparse.c b/linkcheck/HtmlParser/htmlparse.c
index 02b94ab8..c9386988 100644
--- a/linkcheck/HtmlParser/htmlparse.c
+++ b/linkcheck/HtmlParser/htmlparse.c
@@ -180,9 +180,9 @@ typedef struct {
PyObject_HEAD
/* the handler object */
PyObject* handler;
- /* the charset encoding (PyStringObject) */
+ /* the charset encoding (PyBytesObject) */
PyObject* encoding;
- /* the document type (PyStringObject) */
+ /* the document type (PyBytesObject) */
PyObject* doctype;
UserData* userData;
void* scanner;
@@ -204,11 +204,11 @@ static int html_end_tag (PyObject* ptag, PyObject* parser) {
int ret = 1;
pdoctype = PyObject_GetAttrString(parser, "doctype");
CHECK_ERROR((pdoctype == NULL), finish_html_end_tag);
- doctype = PyString_AsString(pdoctype);
+ doctype = PyBytes_AsString(pdoctype);
CHECK_ERROR((doctype == NULL), finish_html_end_tag);
/* check for HTML (else it's presumably XHTML) */
if (strcmp(doctype, "HTML") == 0) {
- char* tag = PyString_AsString(ptag);
+ char* tag = PyBytes_AsString(ptag);
CHECK_ERROR((tag == NULL), finish_html_end_tag);
ret = strcmp(tag, "area")!=0 &&
strcmp(tag, "base")!=0 &&
@@ -2002,13 +2002,13 @@ static PyObject* parser_new (PyTypeObject* type, PyObject* args, PyObject* kwds)
Py_DECREF(self);
return NULL;
}
- self->encoding = PyString_FromString("iso8859-1");
+ self->encoding = PyBytes_FromString("iso8859-1");
if (self->encoding == NULL) {
Py_DECREF(self->handler);
Py_DECREF(self);
return NULL;
}
- self->doctype = PyString_FromString("HTML");
+ self->doctype = PyBytes_FromString("HTML");
if (self->doctype == NULL) {
Py_DECREF(self->encoding);
Py_DECREF(self->handler);
@@ -2128,7 +2128,7 @@ static PyObject* parser_flush (parser_object* self, PyObject* args) {
}
else ++(self->userData->column);
}
- enc = PyString_AsString(self->encoding);
+ enc = PyBytes_AsString(self->encoding);
s = PyUnicode_Decode(self->userData->buf,
(Py_ssize_t)strlen(self->userData->buf), enc, "ignore");
/* reset buffer */
@@ -2223,12 +2223,12 @@ static PyObject* parser_peek (parser_object* self, PyObject* args) {
}
buflen = strlen(self->userData->buf);
if (!buflen || self->userData->bufpos >= buflen) {
- return PyString_FromString("");
+ return PyBytes_FromString("");
}
if (self->userData->bufpos + len >= buflen) {
len = buflen - self->userData->bufpos - 1;
}
- return PyString_FromStringAndSize(self->userData->buf + self->userData->bufpos, len);
+ return PyBytes_FromStringAndSize(self->userData->buf + self->userData->bufpos, len);
}
@@ -2309,7 +2309,7 @@ static int parser_setencoding (parser_object* self, PyObject* value, void* closu
PyErr_SetString(PyExc_TypeError, "Cannot delete encoding");
return -1;
}
- if (!PyString_CheckExact(value)) {
+ if (!PyBytes_CheckExact(value)) {
PyErr_SetString(PyExc_TypeError, "encoding must be string");
return -1;
}
@@ -2322,7 +2322,7 @@ static int parser_setencoding (parser_object* self, PyObject* value, void* closu
if (repr == NULL) {
return -1;
}
- fprintf(stderr, "htmlsax: set encoding to %s\n", PyString_AsString(repr));
+ fprintf(stderr, "htmlsax: set encoding to %s\n", PyBytes_AsString(repr));
Py_DECREF(repr);
}
return 0;
@@ -2342,9 +2342,9 @@ static int parser_setdoctype (parser_object* self, PyObject* value, void* closur
PyErr_SetString(PyExc_TypeError, "Cannot delete doctype");
return -1;
}
- if (!PyString_CheckExact(value)) {
+ if (!PyBytes_CheckExact(value)) {
PyObject* repr = PyObject_Repr(value);
- char* cp = PyString_AsString(repr);
+ char* cp = PyBytes_AsString(repr);
if (NULL == cp)
return -1;
PyErr_Format(PyExc_TypeError, "doctype %s must be string", cp);
@@ -2460,35 +2460,36 @@ MOD_INIT(htmlsax) {
/* init error */
PyErr_Print();
}
- if ((m = PyImport_ImportModule("linkcheck.HtmlParser")) == NULL) {
+ PyObject* h = NULL;
+ if ((h = PyImport_ImportModule("linkcheck.HtmlParser")) == NULL) {
return MOD_ERROR_VAL;
}
- if ((resolve_entities = PyObject_GetAttrString(m, "resolve_entities")) == NULL) {
- Py_DECREF(m);
+ if ((resolve_entities = PyObject_GetAttrString(h, "resolve_entities")) == NULL) {
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- if ((set_encoding = PyObject_GetAttrString(m, "set_encoding")) == NULL) {
+ if ((set_encoding = PyObject_GetAttrString(h, "set_encoding")) == NULL) {
Py_DECREF(resolve_entities);
- Py_DECREF(m);
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- if ((set_doctype = PyObject_GetAttrString(m, "set_doctype")) == NULL) {
+ if ((set_doctype = PyObject_GetAttrString(h, "set_doctype")) == NULL) {
Py_DECREF(resolve_entities);
Py_DECREF(set_encoding);
- Py_DECREF(m);
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- Py_DECREF(m);
- if ((u_meta = PyString_Decode("meta", 4, "ascii", "ignore")) == NULL) {
+ Py_DECREF(h);
+ if ((u_meta = PyUnicode_FromStringAndSize("meta", 4)) == NULL) {
return MOD_ERROR_VAL;
}
- if ((m = PyImport_ImportModule("linkcheck.containers")) == NULL) {
+ if ((h = PyImport_ImportModule("linkcheck.containers")) == NULL) {
return MOD_ERROR_VAL;
}
- if ((list_dict = PyObject_GetAttrString(m, "ListDict")) == NULL) {
- Py_DECREF(m);
+ if ((list_dict = PyObject_GetAttrString(h, "ListDict")) == NULL) {
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- Py_DECREF(m);
+ Py_DECREF(h);
return MOD_SUCCESS_VAL(m);
}
diff --git a/linkcheck/HtmlParser/htmlparse.y b/linkcheck/HtmlParser/htmlparse.y
index 0f22e10e..4fec7b16 100644
--- a/linkcheck/HtmlParser/htmlparse.y
+++ b/linkcheck/HtmlParser/htmlparse.y
@@ -115,9 +115,9 @@ typedef struct {
PyObject_HEAD
/* the handler object */
PyObject* handler;
- /* the charset encoding (PyStringObject) */
+ /* the charset encoding (PyBytesObject) */
PyObject* encoding;
- /* the document type (PyStringObject) */
+ /* the document type (PyBytesObject) */
PyObject* doctype;
UserData* userData;
void* scanner;
@@ -139,11 +139,11 @@ static int html_end_tag (PyObject* ptag, PyObject* parser) {
int ret = 1;
pdoctype = PyObject_GetAttrString(parser, "doctype");
CHECK_ERROR((pdoctype == NULL), finish_html_end_tag);
- doctype = PyString_AsString(pdoctype);
+ doctype = PyBytes_AsString(pdoctype);
CHECK_ERROR((doctype == NULL), finish_html_end_tag);
/* check for HTML (else it's presumably XHTML) */
if (strcmp(doctype, "HTML") == 0) {
- char* tag = PyString_AsString(ptag);
+ char* tag = PyBytes_AsString(ptag);
CHECK_ERROR((tag == NULL), finish_html_end_tag);
ret = strcmp(tag, "area")!=0 &&
strcmp(tag, "base")!=0 &&
@@ -530,13 +530,13 @@ static PyObject* parser_new (PyTypeObject* type, PyObject* args, PyObject* kwds)
Py_DECREF(self);
return NULL;
}
- self->encoding = PyString_FromString("iso8859-1");
+ self->encoding = PyBytes_FromString("iso8859-1");
if (self->encoding == NULL) {
Py_DECREF(self->handler);
Py_DECREF(self);
return NULL;
}
- self->doctype = PyString_FromString("HTML");
+ self->doctype = PyBytes_FromString("HTML");
if (self->doctype == NULL) {
Py_DECREF(self->encoding);
Py_DECREF(self->handler);
@@ -656,7 +656,7 @@ static PyObject* parser_flush (parser_object* self, PyObject* args) {
}
else ++(self->userData->column);
}
- enc = PyString_AsString(self->encoding);
+ enc = PyBytes_AsString(self->encoding);
s = PyUnicode_Decode(self->userData->buf,
(Py_ssize_t)strlen(self->userData->buf), enc, "ignore");
/* reset buffer */
@@ -751,12 +751,12 @@ static PyObject* parser_peek (parser_object* self, PyObject* args) {
}
buflen = strlen(self->userData->buf);
if (!buflen || self->userData->bufpos >= buflen) {
- return PyString_FromString("");
+ return PyBytes_FromString("");
}
if (self->userData->bufpos + len >= buflen) {
len = buflen - self->userData->bufpos - 1;
}
- return PyString_FromStringAndSize(self->userData->buf + self->userData->bufpos, len);
+ return PyBytes_FromStringAndSize(self->userData->buf + self->userData->bufpos, len);
}
@@ -837,7 +837,7 @@ static int parser_setencoding (parser_object* self, PyObject* value, void* closu
PyErr_SetString(PyExc_TypeError, "Cannot delete encoding");
return -1;
}
- if (!PyString_CheckExact(value)) {
+ if (!PyBytes_CheckExact(value)) {
PyErr_SetString(PyExc_TypeError, "encoding must be string");
return -1;
}
@@ -850,7 +850,7 @@ static int parser_setencoding (parser_object* self, PyObject* value, void* closu
if (repr == NULL) {
return -1;
}
- fprintf(stderr, "htmlsax: set encoding to %s\n", PyString_AsString(repr));
+ fprintf(stderr, "htmlsax: set encoding to %s\n", PyBytes_AsString(repr));
Py_DECREF(repr);
}
return 0;
@@ -870,9 +870,9 @@ static int parser_setdoctype (parser_object* self, PyObject* value, void* closur
PyErr_SetString(PyExc_TypeError, "Cannot delete doctype");
return -1;
}
- if (!PyString_CheckExact(value)) {
+ if (!PyBytes_CheckExact(value)) {
PyObject* repr = PyObject_Repr(value);
- char* cp = PyString_AsString(repr);
+ char* cp = PyBytes_AsString(repr);
if (NULL == cp)
return -1;
PyErr_Format(PyExc_TypeError, "doctype %s must be string", cp);
@@ -988,35 +988,36 @@ MOD_INIT(htmlsax) {
/* init error */
PyErr_Print();
}
- if ((m = PyImport_ImportModule("linkcheck.HtmlParser")) == NULL) {
+ PyObject* h = NULL;
+ if ((h = PyImport_ImportModule("linkcheck.HtmlParser")) == NULL) {
return MOD_ERROR_VAL;
}
- if ((resolve_entities = PyObject_GetAttrString(m, "resolve_entities")) == NULL) {
- Py_DECREF(m);
+ if ((resolve_entities = PyObject_GetAttrString(h, "resolve_entities")) == NULL) {
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- if ((set_encoding = PyObject_GetAttrString(m, "set_encoding")) == NULL) {
+ if ((set_encoding = PyObject_GetAttrString(h, "set_encoding")) == NULL) {
Py_DECREF(resolve_entities);
- Py_DECREF(m);
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- if ((set_doctype = PyObject_GetAttrString(m, "set_doctype")) == NULL) {
+ if ((set_doctype = PyObject_GetAttrString(h, "set_doctype")) == NULL) {
Py_DECREF(resolve_entities);
Py_DECREF(set_encoding);
- Py_DECREF(m);
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- Py_DECREF(m);
- if ((u_meta = PyString_Decode("meta", 4, "ascii", "ignore")) == NULL) {
+ Py_DECREF(h);
+ if ((u_meta = PyUnicode_FromStringAndSize("meta", 4)) == NULL) {
return MOD_ERROR_VAL;
}
- if ((m = PyImport_ImportModule("linkcheck.containers")) == NULL) {
+ if ((h = PyImport_ImportModule("linkcheck.containers")) == NULL) {
return MOD_ERROR_VAL;
}
- if ((list_dict = PyObject_GetAttrString(m, "ListDict")) == NULL) {
- Py_DECREF(m);
+ if ((list_dict = PyObject_GetAttrString(h, "ListDict")) == NULL) {
+ Py_DECREF(h);
return MOD_ERROR_VAL;
}
- Py_DECREF(m);
+ Py_DECREF(h);
return MOD_SUCCESS_VAL(m);
}
diff --git a/linkcheck/checker/mailtourl.py b/linkcheck/checker/mailtourl.py
index 36f56ca4..9d36f32d 100644
--- a/linkcheck/checker/mailtourl.py
+++ b/linkcheck/checker/mailtourl.py
@@ -57,8 +57,8 @@ def is_literal (domain):
return domain.startswith(u'[') and domain.endswith(u']')
-_remove_quoted = re.compile(ur'\\.').sub
-_quotes = re.compile(ur'["\\]')
+_remove_quoted = re.compile(r'\\.').sub
+_quotes = re.compile(r'["\\]')
def is_missing_quote (addr):
"""Return True iff mail address is not correctly quoted."""
return _quotes.match(_remove_quoted(u"", addr[1:-1]))