Exclude example TLDs from RFC 2606 (#1335)

Fixes https://github.com/lycheeverse/lychee/issues/1283
This commit is contained in:
Matthias Endler 2024-01-05 18:48:15 +01:00 committed by GitHub
parent 861a71885a
commit 63ba63f7c9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 26 additions and 3 deletions

View file

@ -12,3 +12,9 @@ mailto:hello@example.com?subject=hello
http://example.net/foo/bar
mail@example.com
mail@somedomain.com
https://test.localhost
http://foo.bar.invalid
foo.bar.invalid/some/path
https://example.example
http://integration.test

View file

@ -1,3 +1,9 @@
http://gobyexample.com/
https://examples.com/
https://texample.net/
https://texample.net/
http://foo.isnotinvalid
http://foo.bar.invalid2
http://integration.text
https://test.possiblylocalhost
https://example.examplenotexample

View file

@ -65,7 +65,7 @@ mod cli {
let output = cmd.get_output();
let output = std::str::from_utf8(&output.stdout).unwrap();
assert_eq!(output.lines().count(), 3);
assert_eq!(output.lines().count(), 8);
Ok(())
}

View file

@ -17,10 +17,19 @@ use crate::Uri;
static EXAMPLE_DOMAINS: Lazy<HashSet<&'static str>> =
Lazy::new(|| HashSet::from_iter(["example.com", "example.org", "example.net", "example.edu"]));
#[cfg(all(not(test), not(feature = "check_example_domains")))]
/// We also exclude the example TLDs in section 2 of the same RFC.
/// This exclusion gets subsumed by the `check_example_domains` feature.
static EXAMPLE_TLDS: Lazy<HashSet<&'static str>> =
Lazy::new(|| HashSet::from_iter([".test", ".example", ".invalid", ".localhost"]));
// Allow usage of example domains in tests
#[cfg(any(test, feature = "check_example_domains"))]
static EXAMPLE_DOMAINS: Lazy<HashSet<&'static str>> = Lazy::new(HashSet::new);
#[cfg(any(test, feature = "check_example_domains"))]
static EXAMPLE_TLDS: Lazy<HashSet<&'static str>> = Lazy::new(HashSet::new);
static UNSUPPORTED_DOMAINS: Lazy<HashSet<&'static str>> = Lazy::new(|| {
HashSet::from_iter([
// Twitter requires an account to view tweets
@ -66,7 +75,9 @@ pub fn is_example_domain(uri: &Uri) -> bool {
|| domain
.split_once('.')
.map_or(false, |(_subdomain, tld_part)| tld_part == example)
})
}) || EXAMPLE_TLDS
.iter()
.any(|&example_tld| domain.ends_with(example_tld))
}
None => {
// Check if the URI is an email address.