diff --git a/Cargo.lock b/Cargo.lock index c46070c..fac196e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1576,6 +1576,12 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "ip_network" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2f047c0a98b2f299aa5d6d7088443570faae494e9ae1305e48be000c9e0eb1" + [[package]] name = "ipconfig" version = "0.2.2" @@ -1789,6 +1795,7 @@ dependencies = [ "html5ever", "html5gum", "http", + "ip_network", "jwalk", "lazy_static", "linkify", diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 01a0c35..e102770 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -51,6 +51,7 @@ lazy_static = "1.4.0" html5ever = "0.25.1" html5gum = "0.4.0" octocrab = "0.15.4" +ip_network = "0.4.1" secrecy = "0.8.0" [dependencies.par-stream] diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index 5f9ec67..2ca75d7 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -1,7 +1,7 @@ mod excludes; mod includes; -use std::{collections::HashSet, net::IpAddr}; +use std::collections::HashSet; pub use excludes::Excludes; pub use includes::Includes; @@ -61,19 +61,14 @@ impl Filter { #[must_use] /// Whether the IP address is excluded from checking pub fn is_ip_excluded(&self, uri: &Uri) -> bool { - match uri.host_ip() { - Some(ip_addr) if self.exclude_loopback_ips && ip_addr.is_loopback() => true, - // Note: in a pathological case, an IPv6 address can be IPv4-mapped - // (IPv4 address embedded in a IPv6). We purposefully - // don't deal with it here, and assume if an address is IPv6, - // we shouldn't attempt to map it to IPv4. - // See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2 - Some(IpAddr::V4(v4_addr)) if self.exclude_private_ips && v4_addr.is_private() => true, - Some(IpAddr::V4(v4_addr)) if self.exclude_link_local_ips && v4_addr.is_link_local() => { - true - } - _ => false, + if (self.exclude_loopback_ips && uri.is_loopback()) + || (self.exclude_private_ips && uri.is_private()) + || (self.exclude_link_local_ips && uri.is_link_local()) + { + return true; } + + false } #[must_use] @@ -179,7 +174,10 @@ mod test { use url::Host; use super::{Excludes, Filter, Includes}; - use crate::test_utils::{mail, website}; + use crate::{ + test_utils::{mail, website}, + Uri, + }; // Note: the standard library, as of Rust stable 1.47.0, does not expose // "link-local" or "private" IPv6 checks. However, one might argue @@ -236,6 +234,18 @@ mod test { Ok(()) } + #[test] + fn test_exclude_loopback_ips() { + let filter = Filter { + exclude_loopback_ips: true, + ..Filter::default() + }; + let uri = Uri::try_from("https://[::1]").unwrap(); + assert!(filter.is_excluded(&uri)); + let uri = Uri::try_from("https://127.0.0.1/8").unwrap(); + assert!(filter.is_excluded(&uri)); + } + #[test] fn test_includes_and_excludes_empty() { // This is the pre-configured, empty set of excludes for a client. diff --git a/lychee-lib/src/types/uri.rs b/lychee-lib/src/types/uri.rs index 8acaab9..2c5658f 100644 --- a/lychee-lib/src/types/uri.rs +++ b/lychee-lib/src/types/uri.rs @@ -1,6 +1,7 @@ use std::{collections::HashSet, convert::TryFrom, fmt::Display, net::IpAddr}; use fast_chemail::parse_email; +use ip_network::Ipv6Network; use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; use url::Url; @@ -190,6 +191,86 @@ impl Uri { pub fn is_file(&self) -> bool { self.scheme() == "file" } + + #[inline] + #[must_use] + /// Returns `true` if this is a loopback address. + /// + /// ## IPv4 + /// + /// This is a loopback address (`127.0.0.0/8`). + /// + /// This property is defined by [IETF RFC 1122]. + /// + /// ## IPv6 + /// + /// This is the loopback address (`::1`), as defined in [IETF RFC 4291 section 2.5.3]. + /// + /// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122 + /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3 + pub fn is_loopback(&self) -> bool { + match self.url.host() { + Some(url::Host::Ipv4(addr)) => addr.is_loopback(), + Some(url::Host::Ipv6(addr)) => addr.is_loopback(), + _ => false, + } + } + + #[inline] + #[must_use] + /// Returns `true` if this is a private IPv4 address, a unique local IPv6 address (`fc00::/7`). + /// + /// # IPv4 + /// + /// The private address ranges are defined in [IETF RFC 1918] and include: + /// + /// - `10.0.0.0/8` + /// - `172.16.0.0/12` + /// - `192.168.0.0/16` + /// + /// # IPv6 + /// + /// Unique local address is defined in [IETF RFC 4193]. + /// + /// ## Note + /// + /// Unicast site-local network was defined in [IETF RFC 4291], but was fully deprecated in + /// [IETF RFC 3879]. So it is **NOT** considered as private on this purpose. + /// + /// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918 + /// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193 + /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291 + /// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879 + pub fn is_private(&self) -> bool { + match self.url.host() { + Some(url::Host::Ipv4(addr)) => addr.is_private(), + Some(url::Host::Ipv6(addr)) => Ipv6Network::from(addr).is_unique_local(), + _ => false, + } + } + + #[inline] + #[must_use] + /// Returns `true` if the address is a link-local IPv4 address (`169.254.0.0/16`), + /// or an IPv6 unicast address with link-local scope (`fe80::/10`). + /// + /// # IPv4 + /// + /// Link-local address is defined by [IETF RFC 3927]. + /// + /// # IPv6 + /// + /// Unicast address with link-local scope is defined in [IETF RFC 4291]. + /// + /// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927 + /// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291 + pub fn is_link_local(&self) -> bool { + match self.url.host() { + Some(url::Host::Ipv4(addr)) => addr.is_link_local(), + Some(url::Host::Ipv6(addr)) => Ipv6Network::from(addr).is_unicast_link_local(), + _ => false, + } + } } impl AsRef for Uri { @@ -259,6 +340,18 @@ mod test { types::uri::GithubUri, }; + #[test] + fn test_ipv4_uri_is_loopback() { + let uri = Uri::try_from("http://127.0.0.0").unwrap(); + assert!(uri.is_loopback()); + } + + #[test] + fn test_ipv6_uri_is_loopback() { + let uri = Uri::try_from("https://[::1]").unwrap(); + assert!(uri.is_loopback()); + } + #[test] fn test_uri_from_str() { assert!(Uri::try_from("").is_err());