mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-18 23:10:32 +00:00
218 lines
7.4 KiB
Python
218 lines
7.4 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2004-2010 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
"""
|
|
Test http checking.
|
|
"""
|
|
from .httpserver import HttpServerTest, NoQueryHttpRequestHandler
|
|
from linkcheck.network import iputil
|
|
|
|
class TestHttp (HttpServerTest):
|
|
"""Test http:// link checking."""
|
|
|
|
def test_html (self):
|
|
try:
|
|
self.start_server(handler=CookieRedirectHttpRequestHandler)
|
|
url = u"http://localhost:%d/tests/checker/data/" \
|
|
u"http.html" % self.port
|
|
resultlines = self.get_resultlines("http.html")
|
|
self.direct(url, resultlines, recursionlevel=1)
|
|
url = u"http://localhost:%d/tests/checker/data/" \
|
|
u"http.xhtml" % self.port
|
|
resultlines = self.get_resultlines("http.xhtml")
|
|
self.direct(url, resultlines, recursionlevel=1)
|
|
self.redirect1_http_test()
|
|
self.redirect2_http_test()
|
|
self.redirect3_http_test()
|
|
self.robots_txt_test()
|
|
self.robots_txt2_test()
|
|
self.swf_test()
|
|
self.obfuscate_test()
|
|
finally:
|
|
self.stop_server()
|
|
|
|
def test_redirect (self):
|
|
try:
|
|
self.start_server(handler=RedirectHttpsRequestHandler)
|
|
self.redirect_https_test()
|
|
finally:
|
|
self.stop_server()
|
|
|
|
def redirect_https_test (self):
|
|
url = u"http://localhost:%d/redirect1" % self.port
|
|
nurl = url
|
|
rurl = u"https://localhost:%d/newurl1" % self.port
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % nurl,
|
|
u"real url %s" % url,
|
|
u"info Redirected to `%s'." % rurl.replace('http:', 'https:'),
|
|
u"error",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=0)
|
|
|
|
def redirect1_http_test (self):
|
|
url = u"http://localhost:%d/redirect1" % self.port
|
|
nurl = url
|
|
rurl = url.replace("redirect", "newurl")
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % nurl,
|
|
u"real url %s" % rurl,
|
|
u"info Redirected to `%s'." % rurl,
|
|
u"error",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=0)
|
|
|
|
def redirect2_http_test (self):
|
|
url = u"http://localhost:%d/tests/checker/data/redirect.html" % \
|
|
self.port
|
|
nurl = url
|
|
rurl = url.replace("redirect", "newurl")
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % nurl,
|
|
u"real url %s" % rurl,
|
|
u"info Redirected to `%s'." % rurl,
|
|
u"valid",
|
|
u"url newurl.html (cached)",
|
|
u"cache key %s" % rurl,
|
|
u"real url %s" % rurl,
|
|
u"name Recursive Redirect",
|
|
u"info Redirected to `%s'." % rurl,
|
|
u"valid",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=99)
|
|
|
|
def redirect3_http_test (self):
|
|
url = u"http://localhost:%d/tests/checker/data/redir.html" % self.port
|
|
resultlines = self.get_resultlines("redir.html")
|
|
self.direct(url, resultlines, recursionlevel=1)
|
|
|
|
def robots_txt_test (self):
|
|
url = u"http://localhost:%d/robots.txt" % self.port
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % url,
|
|
u"real url %s" % url,
|
|
u"valid",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=5)
|
|
|
|
def robots_txt2_test (self):
|
|
url = u"http://localhost:%d/secret" % self.port
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % url,
|
|
u"real url %s" % url,
|
|
u"warning Access denied by robots.txt, skipping content checks.",
|
|
u"error",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=5)
|
|
|
|
def swf_test (self):
|
|
url = u"http://localhost:%d/tests/checker/data/" \
|
|
u"test.swf" % self.port
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % url,
|
|
u"real url %s" % url,
|
|
u"valid",
|
|
u"url http://www.example.org/",
|
|
u"cache key http://www.example.org/",
|
|
u"real url http://www.example.org/",
|
|
u"valid",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=1)
|
|
|
|
def obfuscate_test (self):
|
|
import os
|
|
if os.name != "posix":
|
|
return
|
|
host = "www.golem.de"
|
|
ip = iputil.resolve_host(host).pop()
|
|
url = u"http://%s/" % iputil.obfuscate_ip(ip)
|
|
resultlines = [
|
|
u"url %s" % url,
|
|
u"cache key %s" % url,
|
|
u"real url %s" % url,
|
|
u"warning URL %s has obfuscated IP address %s" % (url, ip),
|
|
u"valid",
|
|
]
|
|
self.direct(url, resultlines, recursionlevel=0)
|
|
|
|
def get_cookie (maxage=2000):
|
|
data = (
|
|
("Comment", "justatest"),
|
|
("Max-Age", "%d" % maxage),
|
|
("Path", "/"),
|
|
("Version", "1"),
|
|
("Foo", "Bar"),
|
|
)
|
|
return "; ".join('%s="%s"' % (key, value) for key, value in data)
|
|
|
|
|
|
class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
|
|
"""Handler redirecting certain requests, and setting cookies."""
|
|
|
|
def end_headers (self):
|
|
"""Send cookie before ending headers."""
|
|
self.send_header("Set-Cookie", get_cookie())
|
|
self.send_header("Set-Cookie", get_cookie(maxage=0))
|
|
super(CookieRedirectHttpRequestHandler, self).end_headers()
|
|
|
|
def redirect (self):
|
|
"""Redirect request."""
|
|
path = self.path.replace("redirect", "newurl")
|
|
self.send_response(302)
|
|
self.send_header("Location", path)
|
|
self.end_headers()
|
|
|
|
def redirect_newhost (self):
|
|
"""Redirect request to a new host."""
|
|
path = "http://www.example.com/"
|
|
self.send_response(302)
|
|
self.send_header("Location", path)
|
|
self.end_headers()
|
|
|
|
def do_GET (self):
|
|
"""Removes query part of GET request."""
|
|
if "redirect_newhost" in self.path:
|
|
self.redirect_newhost()
|
|
elif "redirect" in self.path:
|
|
self.redirect()
|
|
else:
|
|
super(CookieRedirectHttpRequestHandler, self).do_GET()
|
|
|
|
def do_HEAD (self):
|
|
if "redirect_newhost" in self.path:
|
|
self.redirect_newhost()
|
|
elif "redirect" in self.path:
|
|
self.redirect()
|
|
else:
|
|
super(CookieRedirectHttpRequestHandler, self).do_HEAD()
|
|
|
|
|
|
class RedirectHttpsRequestHandler (CookieRedirectHttpRequestHandler):
|
|
|
|
def redirect (self):
|
|
"""Redirect request."""
|
|
path = self.path.replace("redirect", "newurl")
|
|
port = self.server.server_address[1]
|
|
url = "https://localhost:%d%s" % (port, path)
|
|
self.send_response(302)
|
|
self.send_header("Location", url)
|
|
self.end_headers()
|