mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-10 07:34:44 +00:00
Improved domain name checking
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3956 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
56017457e6
commit
366c711b43
3 changed files with 275 additions and 5 deletions
265
linkcheck/network/iputil.py
Normal file
265
linkcheck/network/iputil.py
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2003-2009 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
"""
|
||||
Ip number related utility functions.
|
||||
"""
|
||||
|
||||
import re
|
||||
import socket
|
||||
import struct
|
||||
from .. import log, LOG_DNS
|
||||
|
||||
|
||||
# IP Adress regular expressions
|
||||
_ipv4_num = r"\d{1,3}"
|
||||
_ipv4_num_4 = r"%s\.%s\.%s\.%s" % ((_ipv4_num,)*4)
|
||||
_ipv4_re = re.compile(r"^%s$" % _ipv4_num_4)
|
||||
# see rfc2373
|
||||
_ipv6_num = r"[\da-f]{1,4}"
|
||||
_ipv6_re = re.compile(r"^%s:%s:%s:%s:%s:%s:%s:%s$" % ((_ipv6_num,)*8))
|
||||
_ipv6_ipv4_re = re.compile(r"^%s:%s:%s:%s:%s:%s:" % ((_ipv6_num,)*6) + \
|
||||
r"%s$" % _ipv4_num_4)
|
||||
_ipv6_abbr_re = re.compile(r"^((%s:){0,6}%s)?::((%s:){0,6}%s)?$" % \
|
||||
((_ipv6_num,)*4))
|
||||
_ipv6_ipv4_abbr_re = re.compile(r"^((%s:){0,4}%s)?::((%s:){0,5})?" % \
|
||||
((_ipv6_num,)*3) + \
|
||||
"%s$" % _ipv4_num_4)
|
||||
# netmask regex
|
||||
_host_netmask_re = re.compile(r"^%s/%s$" % (_ipv4_num_4, _ipv4_num_4))
|
||||
_host_cidrmask_re = re.compile(r"^%s/\d{1,2}$" % _ipv4_num_4)
|
||||
|
||||
|
||||
def expand_ipv6 (ip, num):
|
||||
"""
|
||||
Expand an IPv6 address with included :: to num octets.
|
||||
|
||||
@raise: ValueError on invalid IP addresses
|
||||
"""
|
||||
i = ip.find("::")
|
||||
prefix = ip[:i]
|
||||
suffix = ip[i+2:]
|
||||
count = prefix.count(":") + suffix.count(":")
|
||||
if prefix:
|
||||
count += 1
|
||||
prefix = prefix+":"
|
||||
if suffix:
|
||||
count += 1
|
||||
suffix = ":"+suffix
|
||||
if count >= num:
|
||||
raise ValueError("invalid ipv6 number: %s" % ip)
|
||||
fill = (num-count-1)*"0:" + "0"
|
||||
return prefix+fill+suffix
|
||||
|
||||
|
||||
def expand_ip (ip):
|
||||
"""
|
||||
ipv6 addresses are expanded to full 8 octets, all other
|
||||
addresses are left untouched
|
||||
return a tuple (ip, num) where num==1 if ip is a numeric ip, 0
|
||||
otherwise.
|
||||
"""
|
||||
if _ipv4_re.match(ip) or \
|
||||
_ipv6_re.match(ip) or \
|
||||
_ipv6_ipv4_re.match(ip):
|
||||
return (ip, 1)
|
||||
if _ipv6_abbr_re.match(ip):
|
||||
return (expand_ipv6(ip, 8), 1)
|
||||
if _ipv6_ipv4_abbr_re.match(ip):
|
||||
i = ip.rfind(":") + 1
|
||||
return (expand_ipv6(ip[:i], 6) + ip[i:], 1)
|
||||
return (ip, 0)
|
||||
|
||||
|
||||
def is_valid_ip (ip):
|
||||
"""
|
||||
Return True if given ip is a valid IPv4 or IPv6 address.
|
||||
"""
|
||||
return is_valid_ipv4(ip) or is_valid_ipv6(ip)
|
||||
|
||||
|
||||
def is_valid_ipv4 (ip):
|
||||
"""
|
||||
Return True if given ip is a valid IPv4 address.
|
||||
"""
|
||||
if not _ipv4_re.match(ip):
|
||||
return False
|
||||
a, b, c, d = [int(i) for i in ip.split(".")]
|
||||
return a <= 255 and b <= 255 and c <= 255 and d <= 255
|
||||
|
||||
|
||||
def is_valid_ipv6 (ip):
|
||||
"""
|
||||
Return True if given ip is a valid IPv6 address.
|
||||
"""
|
||||
# XXX this is not complete: check ipv6 and ipv4 semantics too here
|
||||
if not (_ipv6_re.match(ip) or _ipv6_ipv4_re.match(ip) or
|
||||
_ipv6_abbr_re.match(ip) or _ipv6_ipv4_abbr_re.match(ip)):
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def is_valid_cidrmask (mask):
|
||||
"""
|
||||
Check if given mask is a valid network bitmask in CIDR notation.
|
||||
"""
|
||||
return 0 <= mask <= 32
|
||||
|
||||
|
||||
def dq2num (ip):
|
||||
"""
|
||||
Convert decimal dotted quad string to long integer.
|
||||
"""
|
||||
return struct.unpack('!L', socket.inet_aton(ip))[0]
|
||||
|
||||
|
||||
def num2dq (n):
|
||||
"""
|
||||
Convert long int to dotted quad string.
|
||||
"""
|
||||
return socket.inet_ntoa(struct.pack('!L', n))
|
||||
|
||||
|
||||
def cidr2mask (n):
|
||||
"""
|
||||
Return a mask where the n left-most of 32 bits are set.
|
||||
"""
|
||||
return ((1L << n) - 1) << (32-n)
|
||||
|
||||
|
||||
def netmask2mask (ip):
|
||||
"""
|
||||
Return a mask of bits as a long integer.
|
||||
"""
|
||||
return dq2num(ip)
|
||||
|
||||
def mask2netmask (mask):
|
||||
"""
|
||||
Return dotted quad string as netmask.
|
||||
"""
|
||||
return num2dq(mask)
|
||||
|
||||
def dq2net (ip, mask):
|
||||
"""
|
||||
Return a tuple (network ip, network mask) for given ip and mask.
|
||||
"""
|
||||
return dq2num(ip) & mask
|
||||
|
||||
|
||||
def dq_in_net (n, mask):
|
||||
"""
|
||||
Return True iff numerical ip n is in given network.
|
||||
"""
|
||||
return (n & mask) == mask
|
||||
|
||||
|
||||
def host_in_set (ip, hosts, nets):
|
||||
"""
|
||||
Return True if given ip is in host or network list.
|
||||
"""
|
||||
if ip in hosts:
|
||||
return True
|
||||
if is_valid_ipv4(ip):
|
||||
n = dq2num(ip)
|
||||
for net in nets:
|
||||
if dq_in_net(n, net):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def strhosts2map (strhosts):
|
||||
"""
|
||||
Convert a string representation of hosts and networks to
|
||||
a tuple (hosts, networks).
|
||||
"""
|
||||
return hosts2map([s.strip() for s in strhosts.split(",") if s])
|
||||
|
||||
|
||||
def hosts2map (hosts):
|
||||
"""
|
||||
Return a set of named hosts, and a list of subnets (host/netmask
|
||||
adresses).
|
||||
Only IPv4 host/netmasks are supported.
|
||||
"""
|
||||
hostset = set()
|
||||
nets = []
|
||||
for host in hosts:
|
||||
if _host_cidrmask_re.match(host):
|
||||
host, mask = host.split("/")
|
||||
mask = int(mask)
|
||||
if not is_valid_cidrmask(mask):
|
||||
log.error(LOG_DNS,
|
||||
"CIDR mask %d is not a valid network mask", mask)
|
||||
continue
|
||||
if not is_valid_ipv4(host):
|
||||
log.error(LOG_DNS, "host %r is not a valid ip address", host)
|
||||
continue
|
||||
nets.append(dq2net(host, cidr2mask(mask)))
|
||||
elif _host_netmask_re.match(host):
|
||||
host, mask = host.split("/")
|
||||
if not is_valid_ipv4(host):
|
||||
log.error(LOG_DNS, "host %r is not a valid ip address", host)
|
||||
continue
|
||||
if not is_valid_ipv4(mask):
|
||||
log.error(LOG_DNS,
|
||||
"mask %r is not a valid ip network mask", mask)
|
||||
continue
|
||||
nets.append(dq2net(host, netmask2mask(mask)))
|
||||
elif is_valid_ip(host):
|
||||
hostset.add(expand_ip(host)[0])
|
||||
else:
|
||||
hostset |= resolve_host(host)
|
||||
return (hostset, nets)
|
||||
|
||||
|
||||
def map2hosts (hostmap):
|
||||
"""
|
||||
Convert a tuple (hosts, networks) into a host/network list
|
||||
suitable for storing in a config file.
|
||||
"""
|
||||
ret = hostmap[0].copy()
|
||||
for net, mask in hostmap[1]:
|
||||
ret.add("%s/%d" % (num2dq(net), mask2netmask(mask)))
|
||||
return ret
|
||||
|
||||
|
||||
def lookup_ips (ips):
|
||||
"""
|
||||
Return set of host names that resolve to given ips.
|
||||
"""
|
||||
hosts = set()
|
||||
for ip in ips:
|
||||
try:
|
||||
hosts.add(socket.gethostbyaddr(ip)[0])
|
||||
except socket.error:
|
||||
hosts.add(ip)
|
||||
return hosts
|
||||
|
||||
|
||||
def resolve_host (host):
|
||||
"""
|
||||
Return set of ip numbers for given host.
|
||||
"""
|
||||
ips = set()
|
||||
try:
|
||||
for res in socket.getaddrinfo(host, None, 0, socket.SOCK_STREAM):
|
||||
# res is a tuple (address family, socket type, protocol,
|
||||
# canonical name, socket address)
|
||||
# add first ip of socket address
|
||||
ips.add(res[4][0])
|
||||
except socket.error:
|
||||
log.info(LOG_DNS, "Ignored invalid host %r", host)
|
||||
return ips
|
||||
|
|
@ -41,18 +41,17 @@ default_ports = {
|
|||
# http://www.dwheeler.com/secure-programs/Secure-Programs-HOWTO/\
|
||||
# filter-html.html#VALIDATING-URIS
|
||||
_basic = {
|
||||
"_az09": r"a-z0-9",
|
||||
"_path": r"\-\_\.\!\~\*\'\(\),",
|
||||
"_hex_safe": r"2-9a-f",
|
||||
"_hex_full": r"0-9a-f",
|
||||
"_part": r"([a-z0-9][-a-z0-9]{0,61}[a-z]|[a-z])",
|
||||
}
|
||||
_safe_char = r"([%(_az09)s%(_path)s\+]|"\
|
||||
_safe_char = r"([a-z0-9%(_path)s\+]|"\
|
||||
r"(%%[%(_hex_safe)s][%(_hex_full)s]))" % _basic
|
||||
_safe_scheme_pattern = r"(https?|ftp)"
|
||||
_safe_domain_pattern = r"([%(_az09)s][%(_az09)s\-]*"\
|
||||
r"(\.[%(_az09)s][%(_az09)s\-]*)*\.?)" % _basic
|
||||
_safe_domain_pattern = r"(%(_part)s(\.%(_part)s)*\.?)" % _basic
|
||||
_safe_host_pattern = _safe_domain_pattern+r"(:(80|8080|8000|443))?" % _basic
|
||||
_safe_path_pattern = r"((/([%(_az09)s%(_path)s]|"\
|
||||
_safe_path_pattern = r"((/([a-z0-9%(_path)s]|"\
|
||||
r"(%%[%(_hex_safe)s][%(_hex_full)s]))+)*/?)" % _basic
|
||||
_safe_fragment_pattern = r"%s*" % _safe_char
|
||||
_safe_cgi = r"%s+(=(%s|/)+)?" % (_safe_char, _safe_char)
|
||||
|
|
|
|||
|
|
@ -589,3 +589,9 @@ class TestUrl (unittest.TestCase):
|
|||
self.assertEqual(url_unsplit(url_split(url)), url)
|
||||
url = "http://example.org:123/whoops"
|
||||
self.assertEqual(url_unsplit(url_split(url)), url)
|
||||
|
||||
def test_safe_domain (self):
|
||||
is_safe_domain = linkcheck.url.is_safe_domain
|
||||
self.assertFalse(is_safe_domain(u"a..example.com"))
|
||||
self.assertFalse(is_safe_domain(u"a_b.example.com"))
|
||||
self.assertTrue(is_safe_domain(u"a-b.example.com"))
|
||||
|
|
|
|||
Loading…
Reference in a new issue