# -*- coding: iso-8859-1 -*- # Copyright (C) 2004-2010 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ Test http checking. """ from .httpserver import HttpServerTest, NoQueryHttpRequestHandler from linkcheck.network import iputil class TestHttp (HttpServerTest): """Test http:// link checking.""" def test_html (self): try: self.start_server(handler=CookieRedirectHttpRequestHandler) url = u"http://localhost:%d/tests/checker/data/" \ u"http.html" % self.port resultlines = self.get_resultlines("http.html") self.direct(url, resultlines, recursionlevel=1) url = u"http://localhost:%d/tests/checker/data/" \ u"http.xhtml" % self.port resultlines = self.get_resultlines("http.xhtml") self.direct(url, resultlines, recursionlevel=1) self.redirect1_http_test() self.redirect2_http_test() self.redirect3_http_test() self.robots_txt_test() self.robots_txt2_test() self.swf_test() self.obfuscate_test() finally: self.stop_server() def test_redirect (self): try: self.start_server(handler=RedirectHttpsRequestHandler) self.redirect_https_test() finally: self.stop_server() def redirect_https_test (self): url = u"http://localhost:%d/redirect1" % self.port nurl = url rurl = u"https://localhost:%d/newurl1" % self.port resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % url, u"info Redirected to `%s'." % rurl.replace('http:', 'https:'), u"error", ] self.direct(url, resultlines, recursionlevel=0) def redirect1_http_test (self): url = u"http://localhost:%d/redirect1" % self.port nurl = url rurl = url.replace("redirect", "newurl") resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % rurl, u"info Redirected to `%s'." % rurl, u"error", ] self.direct(url, resultlines, recursionlevel=0) def redirect2_http_test (self): url = u"http://localhost:%d/tests/checker/data/redirect.html" % \ self.port nurl = url rurl = url.replace("redirect", "newurl") resultlines = [ u"url %s" % url, u"cache key %s" % nurl, u"real url %s" % rurl, u"info Redirected to `%s'." % rurl, u"valid", u"url newurl.html (cached)", u"cache key %s" % rurl, u"real url %s" % rurl, u"name Recursive Redirect", u"info Redirected to `%s'." % rurl, u"valid", ] self.direct(url, resultlines, recursionlevel=99) def redirect3_http_test (self): url = u"http://localhost:%d/tests/checker/data/redir.html" % self.port resultlines = self.get_resultlines("redir.html") self.direct(url, resultlines, recursionlevel=1) def robots_txt_test (self): url = u"http://localhost:%d/robots.txt" % self.port resultlines = [ u"url %s" % url, u"cache key %s" % url, u"real url %s" % url, u"valid", ] self.direct(url, resultlines, recursionlevel=5) def robots_txt2_test (self): url = u"http://localhost:%d/secret" % self.port resultlines = [ u"url %s" % url, u"cache key %s" % url, u"real url %s" % url, u"warning Access denied by robots.txt, skipping content checks.", u"error", ] self.direct(url, resultlines, recursionlevel=5) def swf_test (self): url = u"http://localhost:%d/tests/checker/data/" \ u"test.swf" % self.port resultlines = [ u"url %s" % url, u"cache key %s" % url, u"real url %s" % url, u"valid", u"url http://www.example.org/", u"cache key http://www.example.org/", u"real url http://www.example.org/", u"valid", ] self.direct(url, resultlines, recursionlevel=1) def obfuscate_test (self): import os if os.name != "posix": return host = "www.golem.de" ip = iputil.resolve_host(host).pop() url = u"http://%s/" % iputil.obfuscate_ip(ip) resultlines = [ u"url %s" % url, u"cache key %s" % url, u"real url %s" % url, u"warning URL %s has obfuscated IP address %s" % (url, ip), u"valid", ] self.direct(url, resultlines, recursionlevel=0) def get_cookie (maxage=2000): data = ( ("Comment", "justatest"), ("Max-Age", "%d" % maxage), ("Path", "/"), ("Version", "1"), ("Foo", "Bar"), ) return "; ".join('%s="%s"' % (key, value) for key, value in data) class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler): """Handler redirecting certain requests, and setting cookies.""" def end_headers (self): """Send cookie before ending headers.""" self.send_header("Set-Cookie", get_cookie()) self.send_header("Set-Cookie", get_cookie(maxage=0)) super(CookieRedirectHttpRequestHandler, self).end_headers() def redirect (self): """Redirect request.""" path = self.path.replace("redirect", "newurl") self.send_response(302) self.send_header("Location", path) self.end_headers() def redirect_newhost (self): """Redirect request to a new host.""" path = "http://www.example.com/" self.send_response(302) self.send_header("Location", path) self.end_headers() def do_GET (self): """Removes query part of GET request.""" if "redirect_newhost" in self.path: self.redirect_newhost() elif "redirect" in self.path: self.redirect() else: super(CookieRedirectHttpRequestHandler, self).do_GET() def do_HEAD (self): if "redirect_newhost" in self.path: self.redirect_newhost() elif "redirect" in self.path: self.redirect() else: super(CookieRedirectHttpRequestHandler, self).do_HEAD() class RedirectHttpsRequestHandler (CookieRedirectHttpRequestHandler): def redirect (self): """Redirect request.""" path = self.path.replace("redirect", "newurl") port = self.server.server_address[1] url = "https://localhost:%d%s" % (port, path) self.send_response(302) self.send_header("Location", url) self.end_headers()