From d766250a0047b4a645dfb5a5c96f566e6770d509 Mon Sep 17 00:00:00 2001 From: calvin Date: Tue, 29 Feb 2000 12:53:00 +0000 Subject: [PATCH] HTTPS support git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@17 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- .cvsignore | 4 +- ChangeLog | 6 +- INSTALL | 15 +- Makefile | 24 +- README | 9 +- TODO | 6 +- httpslib.py | 170 ++++++++++++++ linkcheck/Config.py | 2 +- linkcheck/HttpUrlData.py | 17 +- linkcheck/HttpsUrlData.py | 33 ++- linkcheck/JavascriptUrlData.py | 2 - linkcheck/Logging.py | 46 ++-- linkcheck/RobotsTxt.py | 11 +- linkcheck/UrlData.py | 11 +- linkchecker.bat | 2 +- ssl.c | 415 +++++++++++++++++++++++++++++++++ 16 files changed, 711 insertions(+), 62 deletions(-) create mode 100644 httpslib.py create mode 100644 ssl.c diff --git a/.cvsignore b/.cvsignore index 558a80bb..9cd8e9cd 100644 --- a/.cvsignore +++ b/.cvsignore @@ -1,3 +1,5 @@ build-stamp sample.html -linkchecker-out.* +*-out.* +*.so +*.o diff --git a/ChangeLog b/ChangeLog index 54f2ba7f..23b2da2e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,10 +1,14 @@ +29.2.2000 + * GML output additions + * HTTPS support + 28.2.2000 * the patched PyLR parser generator works * wrote a GML parser 25.2.2000 * changed the name to LinkChecker. My old Java LinkChecker will - disappear because I do not maintain it anymore. + disappear because I do not maintain it anymore 21.2.2000 * add -q, --quiet option diff --git a/INSTALL b/INSTALL index 3800a3b6..294ed25a 100644 --- a/INSTALL +++ b/INSTALL @@ -11,8 +11,12 @@ Unix Users: 1. Edit the file linkchecker. Adjust the argument to sys.path.append to point to the distribution directory. -2. Copy linkchecker to a location in your PATH (or make a symlink). -3. Check links happily by typing `linkchecker`. +2. HTTPS support (optional, you need SSLeay) + Adjust the paths at the top of the Makefile + Type "make" to produce the SSL module +3. Copy linkchecker to a location in your PATH (or make a symlink). +4. Check links happily by typing `linkchecker`. + Windows Users: 1. Edit the file linkchecker. @@ -21,8 +25,11 @@ Windows Users: 2. Edit the file linkchecker.bat. a) Adjust the PYHTON variable to point to python.exe. b) Adjust the LINKCHECKER variable to point to the distribution directory. -3. Add the distribution directory to your PATH. -4. Check links happily by typing `linkchecker.bat`. +3. HTTPS support (optional, you need SSLeay) + Compile ssl.dll from ssl.c +4. Add the distribution directory to your PATH. +5. Check links happily by typing `linkchecker.bat`. + You need Python >= 1.5.2 You get Python from http://www.python.org diff --git a/Makefile b/Makefile index 7eafc515..e8254d2f 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,14 @@ -VERSION=0.9.0 +PY_INCLDIR = -I/usr/include/python1.5 +PY_LIBDIR = -L/usr/lib +SSL_INCLDIR = -I/usr/include/openssl +SSL_LIBDIR = -L/usr/lib + +CC = gcc +CFLAGS = -O2 -Wall +LDFLAGS = -shared $(SSL_LIBDIR) $(PY_LIBDIR) +CPPFLAGS = $(SSL_INCLDIR) $(PY_INCLDIR) + +VERSION=1.1.0 HOST=treasure.calvinsplayground.de #HOST=fsinfo.cs.uni-sb.de PACKAGE = linkchecker @@ -9,12 +19,14 @@ ALLPACKAGES = ../$(BZ2PACKAGE) ../$(DEBPACKAGE) ../$(ZIPPACKAGE) .PHONY: test clean files install all TAR = tar ZIP = zip -prefix = /usr/local -all: +all: ssl.so + +ssl.so: ssl.o + $(CC) $(LDFLAGS) -o $@ $? -lssl -lcrypto -lpython1.5 clean: - rm -f $(ALLPACKAGES) $(PACKAGE)-out.* + rm -f ssl.{so,o} $(ALLPACKAGES) $(PACKAGE)-out.* files: all ./$(PACKAGE) -q -Wtext -Whtml -Wgml -Wsql -R -r2 -v -i "$(HOST)" http://$(HOST)/~calvin/ @@ -22,7 +34,7 @@ files: all install: install-dirs install -m644 linkcheck/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck install -m644 DNS/*.py? $(DESTDIR)/usr/share/$(PACKAGE)/DNS - install -m644 *.py? $(DESTDIR)/usr/share/$(PACKAGE) + install -m644 ssl.so *.py? $(DESTDIR)/usr/share/$(PACKAGE) install -m755 $(PACKAGE) $(DESTDIR)/usr/bin install -m644 $(PACKAGE)rc $(DESTDIR)/etc @@ -30,8 +42,6 @@ install-dirs: install -d -m755 \ $(DESTDIR)/usr/share/$(PACKAGE)/linkcheck \ $(DESTDIR)/usr/share/$(PACKAGE)/DNS \ - $(DESTDIR)/usr/share/$(PACKAGE)/GML \ - $(DESTDIR)/usr/share/$(PACKAGE)/PyLR \ $(DESTDIR)/usr/bin \ $(DESTDIR)/etc diff --git a/README b/README index f62948a0..7b8ba24b 100644 --- a/README +++ b/README @@ -6,8 +6,9 @@ Features: o recursive checking o multithreaded o output can be colored or normal text, HTML, SQL or a GML sitemap graph -o HTTP, FTP, mailto:, Gopher, Telnet and local file links are supported - Javascript and HTTPS links are currently ignored +o HTTP/1.1, HTTPS, FTP, mailto:, Gopher, Telnet and local file links + are supported + Javascript links are currently ignored o restrict link checking to your local domain o HTTP proxy support o give username/password for HTTP and FTP authorization @@ -23,3 +24,7 @@ robots.txt parse algorithm. I want to thank everybody who gave me feedback, bug reports and suggestions. +Included packages: +httpslib from http://home.att.net/~nvsoft1/ssl_wrapper.html +PyLR parser from http://starship.python.net/crew/scott/PyLR.html +DNS see README.dns diff --git a/TODO b/TODO index e7932bf9..1ed02f35 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,6 @@ -Use leading '_' for private functions. - Is there a way to cleanly stop arbitrary Thread objects (with exit handler)? Mail me solutions! -Write a graph layout algorithm. +configure script and Debian package cleanups -Write a little tool to produce an image of the GML output. +SSL support diff --git a/httpslib.py b/httpslib.py new file mode 100644 index 00000000..3e5d68ae --- /dev/null +++ b/httpslib.py @@ -0,0 +1,170 @@ +# @(#)httpslib.py 1.1 VMS-99/01/30 https support + +import ssl,httplib + +HTTP_PREF = 'HTTP/' +HTTPS_PORT = 443 + +class HTTPS(httplib.HTTP): + + def connect (self, host, port = 0): + """Connect to a host on a given port. + + Note: This method is automatically invoked by __init__, + if a host is specified during instantiation. + + """ + if not port: + i = string.find(host, ':') + if i >= 0: + host, port = host[:i], host[i+1:] + try: port = string.atoi(port) + except string.atoi_error: + raise socket.error, "nonnumeric port" + if not port: port = HTTPS_PORT + self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + if self.debuglevel > 0: print 'connect:', (host, port) + self.sock.connect(host, port) + self.ssl = ssl.ssl(self.sock.fileno()) + + def send (self, str): + if self.debuglevel > 0: print 'send:', `str` + self.ssl.write(str,len(str)) + + def makefile (self, mode='r', bufsize=-1): + return _fileobject(self.sock,self.ssl,mode,bufsize) + + def getreply (self): + self.file = self.makefile('rb') +# self.sock = None + line = self.file.readline() + if self.debuglevel > 0: print 'reply:',`line` + try: + [ver,code,msg] = string.split(line,None,2) + except ValueError: + try: + [ver,code] = string.split(line,None,1) + msg = "" + except ValueError: + ver = "" + if ver[:len(HTTP_PREF)] != HTTP_PREF: + self.headers = None + return -1, line, self.headers + self.headers = mimetools.Message(self.file,0) + return string.atoi(code), string.strip(msg), self.headers + + def close (self): + if self.file: + self.file.close() + self.file = self.sock = self.ssl = None + +class _fileobject: + + def __init__ (self, sock, ssl, mode, bufsize): + import string + self._sock = sock + self._ssl = ssl + self._mode = mode + if bufsize < 0: + bufsize = 512 + self._rbufsize = max(1,bufsize) + self._wbufsize = bufsize + self._wbuf = self._rbuf = "" + + def close (self): + try: + if self._sock: + self.flush() + finally: + self._sock = None + + def __del__ (self): + self.close() + + def flush (self): + if self._wbuf: + self._sock.write(self._wbuf,len(self._wbuf)) + self._wbuf = "" + + def fileno (self): + return self._sock.fileno() + + def write (self, data): + self._wbuf = self._wbuf + data + if self._wbufsize == 1: + if '\n' in data: + self.flush() + else: + if len(self._wbuf) >= self._wbufsize: + self.flush() + + def writelines (self, lst): + filter(self._sock.send,lst) + self.flush() + + def read (self, n=-1): + if n >= 0: + while len(self._rbuf) < n: + new = self._ssl.read(self._rbufsize) + if not new: break + self._rbuf = self._rbuf + new + data,self._rbuf = self._rbuf[:n],self._rbuf[n:] + return data + while 1: + new = self._ssl.read(self._rbufsize) + if not new: break + self._rbuf = self._rbuf + new + data,self._rbuf = self._rbuf,"" + return data + + def readline (self): + data = "" + i = string.find(self._rbuf,'\n') + while i < 0: + new = self._ssl.read(self._rbufsize) + if not new: break + i = string.find(new,'\n') + if i >= 0: i = i + len(self._rbuf) + self._rbuf = self._rbuf + new + if i < 0: i = len(self._rbuf) + else: i = i+1 + data,self._rbuf = self._rbuf[:i],self._rbuf[i:] + return data + + def readlines (self): + l = [] + while 1: + line = self.readline() + if not line: break + l.append(line) + return l + +def _test(): + import sys + import getopt + opts, args = getopt.getopt(sys.argv[1:], 'd') + dl = 0 + for o, a in opts: + if o == '-d': dl = dl + 1 + if args[0:]: host = args[0] + if args[1:]: selector = args[1] + h = HTTPS() + host = 'synergy.as.cmu.edu' + selector = '/~geek' +# host = 'tls.cryptsoft.com' +# selector = '/' + h.set_debuglevel(dl) + h.connect(host) + h.putrequest('GET', selector) + h.endheaders() + errcode, errmsg, headers = h.getreply() + print 'errcode =', errcode + print 'errmsg =', errmsg + print "\tHEADERS:" + if headers: + for header in headers.headers: print string.strip(header) + print "\tTEXT:" + print h.getfile().read() + +if __name__ == '__main__': + _test() diff --git a/linkcheck/Config.py b/linkcheck/Config.py index 60bf83ab..77f93583 100644 --- a/linkcheck/Config.py +++ b/linkcheck/Config.py @@ -11,7 +11,7 @@ Copyright = "Copyright HtmlCopyright = "Copyright © 2000 by "+Author AppInfo = App+" "+Copyright HtmlAppInfo = App+", "+HtmlCopyright -Url = "http://pylice.sourceforge.net/" +Url = "http://linkchecker.sourceforge.net/" Email = "calvin@users.sourceforge.net" Freeware = AppName+""" comes with ABSOLUTELY NO WARRANTY! This is free software, and you are welcome to redistribute it diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index e9b2735f..87cb3572 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -53,11 +53,11 @@ class HttpUrlData(UrlData): self.setWarning("Access denied by robots.txt, checked only syntax") return - status, statusText, self.mime = self.getHttpRequest() + status, statusText, self.mime = self._getHttpRequest() Config.debug(str(self.mime)) if status == 401: self.auth = base64.encodestring(LinkChecker.User+":"+LinkChecker.Password) - status, statusText, self.mime = self.getHttpRequest() + status, statusText, self.mime = self._getHttpRequest() if status >= 400: self.setError(`status`+" "+statusText) return @@ -68,7 +68,7 @@ class HttpUrlData(UrlData): while status in [301,302] and self.mime and tries < 5: redirected = urlparse.urljoin(redirected, self.mime.getheader("Location")) self.urlTuple = urlparse.urlparse(redirected) - status, statusText, self.mime = self.getHttpRequest() + status, statusText, self.mime = self._getHttpRequest() Config.debug("\nRedirected\n"+str(self.mime)) tries = tries + 1 @@ -86,7 +86,7 @@ class HttpUrlData(UrlData): self.setValid(`status`+" "+statusText) - def getHttpRequest(self, method="HEAD"): + def _getHttpRequest(self, method="HEAD"): "Put request and return (status code, status text, mime object)" if self.proxy: host = self.proxy+":"+`self.proxyport` @@ -94,7 +94,7 @@ class HttpUrlData(UrlData): host = self.urlTuple[1] if self.urlConnection: self.closeConnection() - self.urlConnection = httplib.HTTP(host) + self.urlConnection = self._getHTTPObject(host) if self.proxy: path = urlparse.urlunparse(self.urlTuple) else: @@ -110,10 +110,13 @@ class HttpUrlData(UrlData): self.urlConnection.endheaders() return self.urlConnection.getreply() + def _getHTTPObject(self, host): + return httplib.HTTP(host) + def getContent(self): self.closeConnection() t = time.time() - self.getHttpRequest("GET") + self._getHttpRequest("GET") self.urlConnection = self.urlConnection.getfile() data = StringUtil.stripHtmlComments(self.urlConnection.read()) self.time = time.time() - t @@ -129,7 +132,7 @@ class HttpUrlData(UrlData): if config.robotsTxtCache_has_key(self.urlTuple[1]): robotsTxt = config.robotsTxtCache_get(self.urlTuple[1]) else: - robotsTxt = RobotsTxt(self.urlTuple[1], Config.UserAgent) + robotsTxt = RobotsTxt(self.urlTuple, Config.UserAgent) Config.debug("DEBUG: "+str(robotsTxt)+"\n") config.robotsTxtCache_set(self.urlTuple[1], robotsTxt) except: diff --git a/linkcheck/HttpsUrlData.py b/linkcheck/HttpsUrlData.py index 6c58f384..5814f006 100644 --- a/linkcheck/HttpsUrlData.py +++ b/linkcheck/HttpsUrlData.py @@ -1,13 +1,30 @@ from UrlData import UrlData +from HttpUrlData import HttpUrlData +_supportHttps=1 +try: import httpslib +except: _supportHttps=0 + +class HttpsUrlData(HttpUrlData): + """Url link with https scheme""" + + def __init__(self, + urlName, + recursionLevel, + parentName = None, + baseRef = None, + line = 0, _time = 0): + HttpUrlData.__init__(self, urlName, recursionLevel, + parentName, baseRef, line, _time) + + def _getHTTPObject(self, host): + return httpslib.HTTPS(host) -class HttpsUrlData(UrlData): - "Url link with https scheme" - def check(self, config): - self.setWarning("Https url ignored") - self.logMe(config) - + if _supportHttps: + HttpUrlData.check(self, config) + else: + self.setWarning("HTTPS url ignored") + self.logMe(config) + def __str__(self): return "HTTPS link\n"+UrlData.__str__(self) - - diff --git a/linkcheck/JavascriptUrlData.py b/linkcheck/JavascriptUrlData.py index fcfaaf62..de442fdc 100644 --- a/linkcheck/JavascriptUrlData.py +++ b/linkcheck/JavascriptUrlData.py @@ -9,5 +9,3 @@ class JavascriptUrlData(UrlData): def __str__(self): return "Javascript link\n"+UrlData.__str__(self) - - diff --git a/linkcheck/Logging.py b/linkcheck/Logging.py index bccbbbb2..e9c80fe0 100644 --- a/linkcheck/Logging.py +++ b/linkcheck/Logging.py @@ -51,7 +51,7 @@ class StandardLogger: self.errors=0 self.warnings=0 self.fd = fd - if fd==sys.stdout: + if fd==sys.stdout or fd==sys.stderr: self.willclose=0 else: self.willclose=1 @@ -282,16 +282,18 @@ class ColoredLogger(StandardLogger): class GMLLogger(StandardLogger): - + """GML means Graph Modeling Language. Use a GML tool to see + your sitemap graph. + """ def __init__(self,fd=sys.stdout): StandardLogger.__init__(self,fd) self.nodes = [] def init(self): - self.fd.write("graph [\n Creator \""+Config.AppName+\ - "\"\n comment \"you get pylice at "+Config.Url+\ - "\"\n comment \"write comments and bugs to "+Config.Email+\ - "\"\n directed 1\n") + self.fd.write("# created by "+Config.AppInfo+" at "+_currentTime()+\ + "\n# you get "+Config.AppName+" at "+Config.Url+\ + "\n# comment \"write comments and bugs to "+Config.Email+\ + "\ngraph [\n directed 1\n") self.fd.flush() def newUrl(self, urlData): @@ -303,17 +305,28 @@ class GMLLogger(StandardLogger): nodeid = 1 for node in self.nodes: if node.url and not writtenNodes.has_key(node.url): - self.fd.write(" node [\n id "+`nodeid`+"\n label \""+ - node.url+"\"\n ]\n") + self.fd.write(" node [\n") + self.fd.write(" id "+`nodeid`+"\n") + self.fd.write(' label "'+node.url+'"'+"\n") + if node.time: + self.fd.write(" dltime "+`node.time`+"\n") + self.fd.write(" extern ") + if node.extern: self.fd.write("1") + else: self.fd.write("0") + self.fd.write("\n ]\n") writtenNodes[node.url] = nodeid nodeid = nodeid + 1 # write edges for node in self.nodes: if node.url and node.parentName: - self.fd.write(" edge [\n label \""+node.urlName+\ - "\"\n source "+`writtenNodes[node.parentName]`+\ - "\n target "+`writtenNodes[node.url]`+\ - "\n ]\n") + self.fd.write(" edge [\n") + self.fd.write(' label "'+node.urlName+'"\n') + self.fd.write(" source "+`writtenNodes[node.parentName]`+"\n") + self.fd.write(" target "+`writtenNodes[node.url]`+"\n") + self.fd.write(" valid ") + if node.valid: self.fd.write("1") + else: self.fd.write("0") + self.fd.write("\n ]\n") # end of output self.fd.write("]\n") self.fd.flush() @@ -321,16 +334,15 @@ class GMLLogger(StandardLogger): class SQLLogger(StandardLogger): - """ SQL output, only tested with PostgreSQL""" - + """ SQL output for PostgreSQL, not tested""" def init(self): - self.fd.write("-- created by "+Config.AppName+" at "+_currentTime()+\ - "\n-- you get pylice at "+Config.Url+\ + self.fd.write("-- created by "+Config.AppInfo+" at "+_currentTime()+\ + "\n-- you get "+Config.AppName+" at "+Config.Url+\ "\n-- write comments and bugs to "+Config.Email+"\n\n") self.fd.flush() def newUrl(self, urlData): - self.fd.write("insert into pylicedb(urlname,"+\ + self.fd.write("insert into linksdb(urlname,"+\ "recursionlevel,"+\ "parentname,"+\ "baseref,"+\ diff --git a/linkcheck/RobotsTxt.py b/linkcheck/RobotsTxt.py index cdaeffb4..3739b075 100644 --- a/linkcheck/RobotsTxt.py +++ b/linkcheck/RobotsTxt.py @@ -1,14 +1,19 @@ import re,urlparse,string,httplib,urllib,sys,StringUtil,Config class RobotsTxt: - def __init__(self, base, useragent): + def __init__(self, urltuple, useragent): self.entries = [] self.disallowAll = 0 self.allowAll = 0 - self.base = base + self.base = urltuple[0]+"://"+urltuple[1]+"/robots.txt" try: - urlConnection = httplib.HTTP(base) + urlConnection = None + if urltuple[0]=="http": + urlConnection = httplib.HTTP(urltuple[1]) + else: + import httpslib + urlConnection = httpslib.HTTPS(urltuple[1]) urlConnection.putrequest("GET", "/robots.txt") urlConnection.putheader("User-agent", useragent) urlConnection.endheaders() diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index 801b7d28..30833636 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -33,6 +33,7 @@ class UrlData: self.time = _time self.cached = 0 self.urlConnection = None + self.extern = 1 def setError(self, s): @@ -94,7 +95,9 @@ class UrlData: self.setError("URL is null or empty") self.logMe(config) return - try: self.buildUrl() + try: + self.buildUrl() + self.extern = self._isExtern(config) except: type, value = sys.exc_info()[:2] self.setError(str(value)) @@ -111,7 +114,7 @@ class UrlData: # apply filter Config.debug("DEBUG: checking filter\n") - if config["strict"] and self.isExtern(config): + if config["strict"] and self.extern: self.setWarning("outside of domain filter, checked only syntax") self.logMe(config) return @@ -161,7 +164,7 @@ class UrlData: self.isHtml() and \ not self.cached and \ self.recursionLevel < config["recursionlevel"] and \ - not self.isExtern(config) + not self.extern def isHtml(self): return 0 @@ -174,7 +177,7 @@ class UrlData: return self.setWarning("anchor #"+anchor+" not found") - def isExtern(self, config): + def _isExtern(self, config): if len(config["externlinks"])==0 and len(config["internlinks"])==0: return 0 # deny and allow external checking diff --git a/linkchecker.bat b/linkchecker.bat index 3f09c0fb..061932bc 100644 --- a/linkchecker.bat +++ b/linkchecker.bat @@ -5,4 +5,4 @@ set PYTHON=c:\progra~1\python\python.exe set LINKCHECKER=c:\progra~1\linkchecker-1.1.0 rem === end configure === -%PYTHON% %LINKCHECKER%\pylice %1 %2 %3 %4 %5 %6 %7 %8 %9 +%PYTHON% %LINKCHECKER%\linkchecker %1 %2 %3 %4 %5 %6 %7 %8 %9 diff --git a/ssl.c b/ssl.c new file mode 100644 index 00000000..295ed3ad --- /dev/null +++ b/ssl.c @@ -0,0 +1,415 @@ +/* @(#)ssl.c 1.1 VMS-99/01/30 python wrapper for SSLeay https + */ + +#include "Python.h" +#if defined(WITH_THREAD) && !defined(HAVE_GETHOSTBYNAME_R) &&\ + !defined(MS_WINDOWS) +#include "thread.h" +#endif + +#include +#ifndef MS_WINDOWS +#include +#else +#include +#endif + +#if defined(PYOS_OS2) +#define INCL_DOS +#define INCL_DOSERRORS +#define INCL_NOPMAPI +#include +#endif + +#include "ssl.h" +#include "err.h" + +/* + some hacks to choose between K&R or ANSI style function + definitions. For NT to build this as an extension module (ie, DLL) + it must be compiled by the C++ compiler, as it takes the address of + a static data item exported from the main Python DLL. +*/ +#ifdef MS_WINDOWS +#define FORCE_ANSI_FUNC_DEFS +#endif + +#if defined(PYOS_OS2) +#define FORCE_ANSI_FUNC_DEFS +#endif + +#ifdef FORCE_ANSI_FUNC_DEFS +#define BUILD_FUNC_DEF_1( fnname, arg1type, arg1name ) \ +fnname( arg1type arg1name ) + +#define BUILD_FUNC_DEF_2( fnname, arg1type, arg1name, arg2type, arg2name ) \ +fnname( arg1type arg1name, arg2type arg2name ) + +#else /* !FORCE_ANSI_FN_DEFS */ +#define BUILD_FUNC_DEF_1( fnname, arg1type, arg1name ) \ +fnname( arg1name ) \ + arg1type arg1name; + +#define BUILD_FUNC_DEF_2( fnname, arg1type, arg1name, arg2type, arg2name ) \ +fnname( arg1name, arg2name ) \ + arg1type arg1name; \ + arg2type arg2name; +#endif /* !FORCE_ANSI_FN_DEFS */ + +/* Global variable holding the exception type for errors detected + by this module (but not argument type or memory errors, etc.). */ + +static PyObject *PySslError; + +typedef struct { + PyObject_HEAD + int sock_fd; + PyObject *x_attr; /* attributes dictionary */ + SSL_CTX *ctx; + SSL *ssl; + X509 *server_cert; + BIO *sbio; + char server[256]; + char issuer[256]; +} PySslObject; + +staticforward PyTypeObject SSL_Type; +#define PySslObject_Check(v) ((v)->ob_type == &SSL_Type) + +/* + * raise an error according to errno, return NULL + */ +static PyObject * +PySsl_errno () +{ +#ifdef MS_WINDOWS + if (WSAGetLastError()) { + PyObject *v = Py_BuildValue("(is)",WSAGetLastError(),"winsock error"); + + if (v) { + PyErr_SetObject(PySslError,v); + Py_DECREF(v); + } + return NULL; + } +#endif + return PyErr_SetFromErrno(PySslError); +} + +/* + * format SSl error string + */ +static int +BUILD_FUNC_DEF_2 (PySsl_err_str, unsigned long, e, char *, buf) +{ + unsigned long l = ERR_GET_LIB(e); + unsigned long f = ERR_GET_FUNC(e); + unsigned long r = ERR_GET_REASON(e); + char* ls = (char*)ERR_lib_error_string(e); + char* fs = (char*)ERR_func_error_string(e); + char* rs = (char*)ERR_reason_error_string(e); + char* bp = buf + 2; /* skip two initial blanks */ + + (void)strcpy(buf," none:"); /* initialize buffer */ + bp += (ls) ? sprintf(bp,"%s:",ls) : + ((l) ? sprintf(bp,"lib %lu:",l) : 0); + bp += (fs) ? sprintf(bp,"%s ",fs) : + ((f) ? sprintf(bp,"func %lu:",f) : 0); + bp += (rs) ? sprintf(bp,"%s:",rs) : + ((r) ? sprintf(bp,"reason(%lu):",r) : 0); + *bp-- = 0; /* suppress last divider (:) */ + return (bp - buf); +} + +/* + * report SSL core errors + */ +static PySslObject * +PySsl_errors () +{ +#define PY_SSL_ERR_MAX 256 + + unsigned long e; + char buf[2 * PY_SSL_ERR_MAX]; + char *bf = buf; + + while (((bf - buf) < PY_SSL_ERR_MAX) && (e = ERR_get_error())) + bf += PySsl_err_str(e,bf); + { + PyObject *v = Py_BuildValue("(sss)", "ssl","core",buf+2); + if (v != NULL) { + PyErr_SetObject(PySslError,v); + Py_DECREF(v); + } + } + return (NULL); +} + +/* + * report SSL application layer errors + */ +static PySslObject * +BUILD_FUNC_DEF_2 (PySsl_app_errors, SSL *, s, int, ret) +{ + int err = SSL_get_error(s,ret); + char *str; + + switch (err) { + case SSL_ERROR_SSL: + return (PySsl_errors()); + case SSL_ERROR_SYSCALL: + return ((PySslObject *)PySsl_errno()); + case SSL_ERROR_ZERO_RETURN: + str = "End of data"; + break; + case SSL_ERROR_WANT_READ: + str = "Want read"; + break; + case SSL_ERROR_WANT_WRITE: + str = "Want write"; + break; + case SSL_ERROR_WANT_X509_LOOKUP: + str = "Want x509 lookup"; + break; + case SSL_ERROR_WANT_CONNECT: + str = "Want connect"; + break; + default: + str = "Unknown"; + break; + } + { + PyObject *v = Py_BuildValue("(sis)", "ssl",err, str); + if (v != NULL) { + PyErr_SetObject(PySslError,v); + Py_DECREF(v); + } + } + return (NULL); +} + +/* ssl.read(len) method */ + +static PyObject * +BUILD_FUNC_DEF_2 (PySslObj_read, PySslObject *, self, PyObject *, args) +{ + int len, n; + PyObject *buf; + + if (!PyArg_ParseTuple(args,"i",&len)) + return (NULL); + if (!(buf = PyString_FromStringAndSize((char *)0,len))) + return (NULL); + Py_BEGIN_ALLOW_THREADS + + n = SSL_read(self->ssl,PyString_AsString(buf),len); + + Py_END_ALLOW_THREADS + + switch (SSL_get_error(self->ssl,n)) { + case SSL_ERROR_NONE: /* good return value */ + break; + case SSL_ERROR_ZERO_RETURN: + case SSL_ERROR_SYSCALL: + if (!n) /* fix SSL_ERROR_SYCSALL errno=0 case */ + break; + /* fall thru here */ + default: + Py_DECREF(buf); + (void)PySsl_app_errors(self->ssl,n); + return (NULL); + } + if ((n != len) && (_PyString_Resize(&buf,n) < 0)) + return (NULL); + return (buf); +} + +/* ssl.write(data,len) method */ + +static PyObject * +BUILD_FUNC_DEF_2 (PySslObj_write, PySslObject *, self, PyObject *, args) +{ + char *buf; + int len, n; + if (!PyArg_ParseTuple(args, "si", &buf, &len)) + return NULL; + + /* Note: flags are ignored */ + + Py_BEGIN_ALLOW_THREADS + + n = SSL_write(self->ssl,buf,len); + + Py_END_ALLOW_THREADS + if (n < 0) + return (PySsl_errno()); + return (PyInt_FromLong((long)n)); +} + +/* ssl.server() method */ + +static PyObject * +BUILD_FUNC_DEF_2 (PySslObj_server, PySslObject *, self, PyObject *, args) +{ + if (!PyArg_NoArgs(args)) + return (NULL); + return (PyString_FromString(self->server)); +} + +/* ssl.issuer() method */ + +static PyObject * +BUILD_FUNC_DEF_2 (PySslObj_issuer, PySslObject *, self, PyObject *, args) +{ + if (!PyArg_NoArgs(args)) + return (NULL); + return (PyString_FromString(self->issuer)); +} + +/* SSL object methods */ + +static PyMethodDef PySslObj_methods[] = { + {"read", (PyCFunction)PySslObj_read,1}, + {"write", (PyCFunction)PySslObj_write,1}, + {"server", (PyCFunction)PySslObj_server}, + {"issuer", (PyCFunction)PySslObj_issuer}, + { NULL, NULL} +}; + +static void +BUILD_FUNC_DEF_1 (PySsl_dealloc, PySslObject *, self) +{ + if (self->server_cert) /* possible not to have one? */ + X509_free(self->server_cert); + SSL_CTX_free(self->ctx); + SSL_free(self->ssl); + Py_XDECREF(self->x_attr); + PyMem_DEL(self); +} + +static PyObject * +BUILD_FUNC_DEF_2 (PySsl_getattr, PySslObject *, self, char *, name) +{ + return (Py_FindMethod(PySslObj_methods,(PyObject *)self,name)); +} + +staticforward PyTypeObject SSL_Type = { + PyObject_HEAD_INIT(&PyType_Type) + 0, /*ob_size*/ + "SSL", /*tp_name*/ + sizeof(PySslObject), /*tp_basicsize*/ + 0, /*tp_itemsize*/ + /* methods */ + (destructor)PySsl_dealloc, /*tp_dealloc*/ + 0, /*tp_print*/ + (getattrfunc)PySsl_getattr, /*tp_getattr*/ + 0, /*tp_setattr*/ + 0, /*tp_compare*/ + 0, /*tp_repr*/ + 0, /*tp_as_number*/ + 0, /*tp_as_sequence*/ + 0, /*tp_as_mapping*/ + 0, /*tp_hash*/ +}; + +/* + * C function called for new object initialization + * Note: SSL protocol version 2, 3, or 2+3 set at compile time + */ +static PySslObject * +BUILD_FUNC_DEF_1 (newPySslObject, int, sock_fd) +{ + PySslObject *self; + SSL_METHOD *meth; + int ret; + +#if 0 + meth=SSLv3_client_method(); + meth=SSLv23_client_method(); +#endif + + meth=SSLv2_client_method(); + + if (!(self = PyObject_NEW(PySslObject,&SSL_Type))) /* create new object */ + return (NULL); + (void)memset(self->server,0,sizeof(self->server)); + (void)memset(self->issuer,0,sizeof(self->issuer)); + + self->x_attr = PyDict_New(); + if (!(self->ctx = SSL_CTX_new(meth))) { /* set up context */ + PyMem_DEL(self); + return (PySsl_errors()); + } +#if 0 /* Note: set this for v23, Netscape server */ + SSL_CTX_set_options(self->ctx,SSL_OP_ALL); +#endif + self->ssl = SSL_new(self->ctx); /* new ssl struct */ + if (!(ret = SSL_set_fd(self->ssl,sock_fd))) { /* set the socket for SSL */ + PyMem_DEL(self); + return (PySsl_app_errors(self->ssl,ret)); + } + SSL_CTX_set_verify(self->ctx,SSL_VERIFY_NONE,NULL); /* set verify lvl */ + SSL_set_connect_state(self->ssl); + + if ((ret = SSL_connect(self->ssl)) < 0) { /* negotiate SSL connection */ + PyMem_DEL(self); + return (PySsl_app_errors(self->ssl,ret)); + } + self->ssl->debug = 1; + + if ((self->server_cert = SSL_get_peer_certificate(self->ssl))) { + X509_NAME_oneline(X509_get_subject_name(self->server_cert), + self->server,sizeof(self->server)); + X509_NAME_oneline(X509_get_issuer_name(self->server_cert), + self->issuer, sizeof(self->issuer)); + } + self->x_attr = NULL; + self->sock_fd = sock_fd; + return (self); +} + +/* + * Python function called for new object initialization + */ +static PyObject * +BUILD_FUNC_DEF_2 (PySsl_ssl_new, PyObject *, self, PyObject *, args) +{ + int sock_fd; + if (!PyArg_ParseTuple(args, "i", &sock_fd)) + return (NULL); + return ((PyObject *)newPySslObject(sock_fd)); +} + +/* List of functions exported by this module. */ + +static PyMethodDef PySsl_methods[] = { + {"ssl", (PyCFunction)PySsl_ssl_new, 1}, + {NULL, NULL} /* sentinel */ + +}; + +/* + * Initialize this module, called when the first 'import ssl' is done + */ +void +initssl () +{ + PyObject *m, *d; + m = Py_InitModule("ssl", PySsl_methods); + d = PyModule_GetDict(m); + + SSL_load_error_strings(); + SSLeay_add_ssl_algorithms(); + + /* *** Python 1.5 *** + if (!(PySssl_Error = PyErr_NewException("ssl.error",NULL,NULL))) + return; + */ + + if (!(PySslError = PyString_FromString("ssl.error")) || + PyDict_SetItemString(d,"error",PySslError)) + Py_FatalError("can't define ssl.error"); + if (PyDict_SetItemString(d,"SSLType",(PyObject *)&SSL_Type)) + return; +} +