Warn about obfuscated IP numbers.

This commit is contained in:
Bastian Kleineidam 2010-09-05 20:11:02 +02:00
parent 5284017d67
commit 8a1ac26c85
7 changed files with 60 additions and 5 deletions

View file

@ -46,6 +46,7 @@ Features:
- checking: Do not fall back to GET when no recursion is requested on
single pages. This allows to check pages with a HEAD request even if
robots.txt disallows to get the page content.
- checking: detect and warn when obfuscated IP addresses are found.
5.2 "11:14" (released 7.3.2010)

View file

@ -6,5 +6,3 @@
- [PYTHON3]
linkchecker-git/linkcheck/dns/resolver.py:313: DeprecationWarning: in 3.x, __getslice__ has been removed; use __getitem__
linkcheck.dns.name.Name(linkcheck.dns.name.from_text(socket.gethostname())[1:])
- [IP] Detect obfuscated IP addresses
http://morph3us.org/blog/index.php?/archives/35-Dotless-IP-addresses-and-URL-Obfuscation.html

View file

@ -80,6 +80,7 @@ WARN_URL_WARNREGEX_FOUND = "url-warnregex-found"
WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal"
WARN_URL_OBFUSCATED_IP = "url-obfuscated-ip"
WARN_FILE_MISSING_SLASH = "file-missing-slash"
WARN_FILE_SYSTEM_PATH = "file-system-path"
WARN_FTP_MISSING_SLASH = "ftp-missing-slash"
@ -135,6 +136,7 @@ Warnings = {
WARN_NNTP_NO_SERVER: _("No NNTP server was found."),
WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."),
WARN_NNTP_BUSY: _("The NNTP server was busy."),
WARN_URL_OBFUSCATED_IP: _("The IP is obfuscated."),
}
# file extensions we can parse recursively

View file

@ -35,8 +35,9 @@ from .. import (log, LOG_CHECK, LOG_CACHE, httputil, httplib2 as httplib,
strformat, LinkCheckerError, url as urlutil, trace, clamav, winutil)
from ..HtmlParser import htmlsax
from ..htmlutil import linkparse
from ..network import iputil
from .const import (WARN_URL_EFFECTIVE_URL, WARN_URL_UNICODE_DOMAIN,
WARN_URL_ERROR_GETTING_CONTENT,
WARN_URL_ERROR_GETTING_CONTENT, WARN_URL_OBFUSCATED_IP,
WARN_URL_ANCHOR_NOT_FOUND, WARN_URL_WARNREGEX_FOUND,
WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO,
WARN_URL_CONTENT_SIZE_UNEQUAL, ExcList, ExcSyntaxList, ExcNoCacheList)
@ -399,6 +400,21 @@ class UrlBase (object):
raise LinkCheckerError(_("URL has invalid port %(port)r") %
{"port": str(self.port)})
self.port = int(self.port)
self.check_obfuscated_ip()
def check_obfuscated_ip (self):
"""Warn if host of this URL is obfuscated IP address."""
# check if self.host can be an IP address
if self.scheme not in ("ftp", "http", "mailto", "news", "nntp", "telnet"):
return
# check for obfuscated IP address
if iputil.is_obfuscated_ip(self.host):
ips = iputil.resolve_host(self.host)
if ips:
self.add_warning(
_("URL %(url)s has obfuscated IP address %(ip)s") % \
{"url": self.base_url, "ip": ips.pop()},
tag=WARN_URL_OBFUSCATED_IP)
def check (self):
"""Main check function for checking this URL."""

View file

@ -263,6 +263,7 @@ def lookup_ips (ips):
def resolve_host (host):
"""
@host: hostname or IP address
Return set of ip numbers for given host.
"""
ips = set()
@ -275,3 +276,17 @@ def resolve_host (host):
except socket.error:
log.info(LOG_DNS, "Ignored invalid host %r", host)
return ips
def obfuscate_ip (ip):
"""Obfuscate given host in IP form.
@ip: IPv4 address string
@return: hexadecimal IP string ('0x1ab...')
@raise: ValueError on invalid IP addresses
"""
if not is_valid_ipv4(ip):
raise ValueError('Invalid IPv4 value %r' % ip)
return "0x%s" % "".join(hex(int(x))[2:] for x in ip.split("."))
is_obfuscated_ip = re.compile(r"^(0x[a-f0-9]+|[0-9]+)$").match

View file

@ -18,7 +18,7 @@
Test http checking.
"""
from .httpserver import HttpServerTest, NoQueryHttpRequestHandler
from linkcheck.network import iputil
class TestHttp (HttpServerTest):
"""Test http:// link checking."""
@ -40,6 +40,7 @@ class TestHttp (HttpServerTest):
self.robots_txt_test()
self.robots_txt2_test()
self.swf_test()
self.obfuscate_test()
finally:
self.stop_server()
@ -143,6 +144,18 @@ class TestHttp (HttpServerTest):
]
self.direct(url, resultlines, recursionlevel=1)
def obfuscate_test (self):
host = "www.golem.de"
ip = iputil.resolve_host(host).pop()
url = u"http://%s/" % iputil.obfuscate_ip(ip)
resultlines = [
u"url %s" % url,
u"cache key %s" % url,
u"real url %s" % url,
u"warning URL %s has obfuscated IP address %s" % (url, ip),
u"valid",
]
self.direct(url, resultlines, recursionlevel=0)
def get_cookie (maxage=2000):
data = (

View file

@ -19,8 +19,9 @@ Test network functions.
"""
import unittest
from tests import need_posix
from tests import need_posix, need_network
import linkcheck.network
from linkcheck.network import iputil
class TestNetwork (unittest.TestCase):
@ -34,3 +35,12 @@ class TestNetwork (unittest.TestCase):
def test_interfaces (self):
ifc = linkcheck.network.IfConfig()
ifc.getInterfaceList()
@need_network
def test_iputils (self):
host = "www.golem.de"
ips = iputil.resolve_host(host)
obfuscated = iputil.obfuscate_ip(ips.pop())
self.assertTrue(iputil.is_obfuscated_ip(obfuscated))
hosts = iputil.lookup_ips([obfuscated])
self.assertTrue(host in hosts)