mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-04 04:44:42 +00:00
Warn about obfuscated IP numbers.
This commit is contained in:
parent
5284017d67
commit
8a1ac26c85
7 changed files with 60 additions and 5 deletions
|
|
@ -46,6 +46,7 @@ Features:
|
|||
- checking: Do not fall back to GET when no recursion is requested on
|
||||
single pages. This allows to check pages with a HEAD request even if
|
||||
robots.txt disallows to get the page content.
|
||||
- checking: detect and warn when obfuscated IP addresses are found.
|
||||
|
||||
|
||||
5.2 "11:14" (released 7.3.2010)
|
||||
|
|
|
|||
|
|
@ -6,5 +6,3 @@
|
|||
- [PYTHON3]
|
||||
linkchecker-git/linkcheck/dns/resolver.py:313: DeprecationWarning: in 3.x, __getslice__ has been removed; use __getitem__
|
||||
linkcheck.dns.name.Name(linkcheck.dns.name.from_text(socket.gethostname())[1:])
|
||||
- [IP] Detect obfuscated IP addresses
|
||||
http://morph3us.org/blog/index.php?/archives/35-Dotless-IP-addresses-and-URL-Obfuscation.html
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ WARN_URL_WARNREGEX_FOUND = "url-warnregex-found"
|
|||
WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
|
||||
WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal"
|
||||
WARN_URL_OBFUSCATED_IP = "url-obfuscated-ip"
|
||||
WARN_FILE_MISSING_SLASH = "file-missing-slash"
|
||||
WARN_FILE_SYSTEM_PATH = "file-system-path"
|
||||
WARN_FTP_MISSING_SLASH = "ftp-missing-slash"
|
||||
|
|
@ -135,6 +136,7 @@ Warnings = {
|
|||
WARN_NNTP_NO_SERVER: _("No NNTP server was found."),
|
||||
WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."),
|
||||
WARN_NNTP_BUSY: _("The NNTP server was busy."),
|
||||
WARN_URL_OBFUSCATED_IP: _("The IP is obfuscated."),
|
||||
}
|
||||
|
||||
# file extensions we can parse recursively
|
||||
|
|
|
|||
|
|
@ -35,8 +35,9 @@ from .. import (log, LOG_CHECK, LOG_CACHE, httputil, httplib2 as httplib,
|
|||
strformat, LinkCheckerError, url as urlutil, trace, clamav, winutil)
|
||||
from ..HtmlParser import htmlsax
|
||||
from ..htmlutil import linkparse
|
||||
from ..network import iputil
|
||||
from .const import (WARN_URL_EFFECTIVE_URL, WARN_URL_UNICODE_DOMAIN,
|
||||
WARN_URL_ERROR_GETTING_CONTENT,
|
||||
WARN_URL_ERROR_GETTING_CONTENT, WARN_URL_OBFUSCATED_IP,
|
||||
WARN_URL_ANCHOR_NOT_FOUND, WARN_URL_WARNREGEX_FOUND,
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO,
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL, ExcList, ExcSyntaxList, ExcNoCacheList)
|
||||
|
|
@ -399,6 +400,21 @@ class UrlBase (object):
|
|||
raise LinkCheckerError(_("URL has invalid port %(port)r") %
|
||||
{"port": str(self.port)})
|
||||
self.port = int(self.port)
|
||||
self.check_obfuscated_ip()
|
||||
|
||||
def check_obfuscated_ip (self):
|
||||
"""Warn if host of this URL is obfuscated IP address."""
|
||||
# check if self.host can be an IP address
|
||||
if self.scheme not in ("ftp", "http", "mailto", "news", "nntp", "telnet"):
|
||||
return
|
||||
# check for obfuscated IP address
|
||||
if iputil.is_obfuscated_ip(self.host):
|
||||
ips = iputil.resolve_host(self.host)
|
||||
if ips:
|
||||
self.add_warning(
|
||||
_("URL %(url)s has obfuscated IP address %(ip)s") % \
|
||||
{"url": self.base_url, "ip": ips.pop()},
|
||||
tag=WARN_URL_OBFUSCATED_IP)
|
||||
|
||||
def check (self):
|
||||
"""Main check function for checking this URL."""
|
||||
|
|
|
|||
|
|
@ -263,6 +263,7 @@ def lookup_ips (ips):
|
|||
|
||||
def resolve_host (host):
|
||||
"""
|
||||
@host: hostname or IP address
|
||||
Return set of ip numbers for given host.
|
||||
"""
|
||||
ips = set()
|
||||
|
|
@ -275,3 +276,17 @@ def resolve_host (host):
|
|||
except socket.error:
|
||||
log.info(LOG_DNS, "Ignored invalid host %r", host)
|
||||
return ips
|
||||
|
||||
|
||||
def obfuscate_ip (ip):
|
||||
"""Obfuscate given host in IP form.
|
||||
@ip: IPv4 address string
|
||||
@return: hexadecimal IP string ('0x1ab...')
|
||||
@raise: ValueError on invalid IP addresses
|
||||
"""
|
||||
if not is_valid_ipv4(ip):
|
||||
raise ValueError('Invalid IPv4 value %r' % ip)
|
||||
return "0x%s" % "".join(hex(int(x))[2:] for x in ip.split("."))
|
||||
|
||||
|
||||
is_obfuscated_ip = re.compile(r"^(0x[a-f0-9]+|[0-9]+)$").match
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
Test http checking.
|
||||
"""
|
||||
from .httpserver import HttpServerTest, NoQueryHttpRequestHandler
|
||||
|
||||
from linkcheck.network import iputil
|
||||
|
||||
class TestHttp (HttpServerTest):
|
||||
"""Test http:// link checking."""
|
||||
|
|
@ -40,6 +40,7 @@ class TestHttp (HttpServerTest):
|
|||
self.robots_txt_test()
|
||||
self.robots_txt2_test()
|
||||
self.swf_test()
|
||||
self.obfuscate_test()
|
||||
finally:
|
||||
self.stop_server()
|
||||
|
||||
|
|
@ -143,6 +144,18 @@ class TestHttp (HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
||||
def obfuscate_test (self):
|
||||
host = "www.golem.de"
|
||||
ip = iputil.resolve_host(host).pop()
|
||||
url = u"http://%s/" % iputil.obfuscate_ip(ip)
|
||||
resultlines = [
|
||||
u"url %s" % url,
|
||||
u"cache key %s" % url,
|
||||
u"real url %s" % url,
|
||||
u"warning URL %s has obfuscated IP address %s" % (url, ip),
|
||||
u"valid",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=0)
|
||||
|
||||
def get_cookie (maxage=2000):
|
||||
data = (
|
||||
|
|
|
|||
|
|
@ -19,8 +19,9 @@ Test network functions.
|
|||
"""
|
||||
|
||||
import unittest
|
||||
from tests import need_posix
|
||||
from tests import need_posix, need_network
|
||||
import linkcheck.network
|
||||
from linkcheck.network import iputil
|
||||
|
||||
|
||||
class TestNetwork (unittest.TestCase):
|
||||
|
|
@ -34,3 +35,12 @@ class TestNetwork (unittest.TestCase):
|
|||
def test_interfaces (self):
|
||||
ifc = linkcheck.network.IfConfig()
|
||||
ifc.getInterfaceList()
|
||||
|
||||
@need_network
|
||||
def test_iputils (self):
|
||||
host = "www.golem.de"
|
||||
ips = iputil.resolve_host(host)
|
||||
obfuscated = iputil.obfuscate_ip(ips.pop())
|
||||
self.assertTrue(iputil.is_obfuscated_ip(obfuscated))
|
||||
hosts = iputil.lookup_ips([obfuscated])
|
||||
self.assertTrue(host in hosts)
|
||||
|
|
|
|||
Loading…
Reference in a new issue