mirror of
https://github.com/Hopiu/lychee.git
synced 2026-05-27 14:34:02 +00:00
Make checking email addresses optional (#1171)
E-Mail checks cause too many false-postives, so we put them behind a flag. * `--exclude-mail` is deprecated (to be removed in 1.0) * `--include-mail` is the new flag This PR also removes the obsolete tests for `--exclude-file`, which was superseded by `.lycheeignore`. Fixes #1089
This commit is contained in:
parent
0760d9241b
commit
04887ee293
12 changed files with 113 additions and 79 deletions
|
|
@ -372,7 +372,10 @@ Options:
|
|||
Exclude loopback IP address range and localhost from checking
|
||||
|
||||
--exclude-mail
|
||||
Exclude all mail addresses from checking
|
||||
Exclude all mail addresses from checking (deprecated; excluded by default)
|
||||
|
||||
--include-mail
|
||||
Also check email addresses
|
||||
|
||||
--remap <REMAP>
|
||||
Remap URI matching pattern to different URI
|
||||
|
|
|
|||
1
fixtures/TEST_EMAIL.md
vendored
1
fixtures/TEST_EMAIL.md
vendored
|
|
@ -1,6 +1,5 @@
|
|||
https://endler.dev
|
||||
test@example.com
|
||||
foo@bar.dev
|
||||
https://example.com
|
||||
octocat+github@github.com
|
||||
mailto:test2@example.com
|
||||
|
|
|
|||
3
fixtures/TEST_EXCLUDE_1.txt
vendored
3
fixtures/TEST_EXCLUDE_1.txt
vendored
|
|
@ -1,3 +0,0 @@
|
|||
https://en.wikipedia.org/*
|
||||
https://ldra.com
|
||||
https://url-does-not-exist
|
||||
1
fixtures/TEST_EXCLUDE_2.txt
vendored
1
fixtures/TEST_EXCLUDE_2.txt
vendored
|
|
@ -1 +0,0 @@
|
|||
https://i.creativecommons.org/p/zero/1.0/88x31.png
|
||||
|
|
@ -37,6 +37,25 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
|
|||
None => None,
|
||||
};
|
||||
|
||||
// `exclude_mail` will be removed in 1.0. Until then, we need to support it.
|
||||
// Therefore, we need to check if both `include_mail` and `exclude_mail` are set to `true`
|
||||
// and return an error if that's the case.
|
||||
if cfg.include_mail && cfg.exclude_mail {
|
||||
return Err(anyhow::anyhow!(
|
||||
"Cannot set both `include-mail` and `exclude-mail` to true"
|
||||
));
|
||||
}
|
||||
|
||||
// By default, clap sets `exclude_mail` to `false`.
|
||||
// Therefore, we need to check if `exclude_mail` is explicitly set to
|
||||
// `true`. If so, we need to set `include_mail` to `false`.
|
||||
// Otherwise, we use the value of `include_mail`.
|
||||
let include_mail = if cfg.exclude_mail {
|
||||
false
|
||||
} else {
|
||||
cfg.include_mail
|
||||
};
|
||||
|
||||
ClientBuilder::builder()
|
||||
.remaps(remaps)
|
||||
.includes(includes)
|
||||
|
|
@ -45,7 +64,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
|
|||
.exclude_private_ips(cfg.exclude_private)
|
||||
.exclude_link_local_ips(cfg.exclude_link_local)
|
||||
.exclude_loopback_ips(cfg.exclude_loopback)
|
||||
.exclude_mail(cfg.exclude_mail)
|
||||
.include_mail(include_mail)
|
||||
.max_redirects(cfg.max_redirects)
|
||||
.user_agent(cfg.user_agent.clone())
|
||||
.allow_insecure(cfg.insecure)
|
||||
|
|
|
|||
|
|
@ -177,9 +177,14 @@ fn load_config() -> Result<LycheeOptions> {
|
|||
opts.config.exclude.append(&mut read_lines(&lycheeignore)?);
|
||||
}
|
||||
|
||||
// TODO: Remove this warning and the parameter in a future release
|
||||
// TODO: Remove this warning and the parameter with 1.0
|
||||
if !&opts.config.exclude_file.is_empty() {
|
||||
warn!("WARNING: `--exclude-file` is deprecated and will soon be removed; use `{}` file to ignore URL patterns instead. To exclude paths of files and directories, use `--exclude-path`.", LYCHEE_IGNORE_FILE);
|
||||
warn!("WARNING: `--exclude-file` is deprecated and will soon be removed; use the `{}` file to ignore URL patterns instead. To exclude paths of files and directories, use `--exclude-path`.", LYCHEE_IGNORE_FILE);
|
||||
}
|
||||
|
||||
// TODO: Remove this warning and the parameter with 1.0
|
||||
if opts.config.exclude_mail {
|
||||
warn!("WARNING: `--exclude-mail` is deprecated and will soon be removed; E-Mail is no longer checked by default. Use `--include-mail` to enable E-Mail checking.");
|
||||
}
|
||||
|
||||
// Load excludes from file
|
||||
|
|
|
|||
|
|
@ -282,10 +282,16 @@ pub(crate) struct Config {
|
|||
pub(crate) exclude_loopback: bool,
|
||||
|
||||
/// Exclude all mail addresses from checking
|
||||
/// (deprecated; excluded by default)
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
pub(crate) exclude_mail: bool,
|
||||
|
||||
/// Also check email addresses
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
pub(crate) include_mail: bool,
|
||||
|
||||
/// Remap URI matching pattern to different URI
|
||||
#[serde(default)]
|
||||
#[arg(long)]
|
||||
|
|
|
|||
|
|
@ -121,16 +121,29 @@ mod cli {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_email() -> Result<()> {
|
||||
fn test_email() -> Result<()> {
|
||||
test_json_output!(
|
||||
"TEST_EMAIL.md",
|
||||
MockResponseStats {
|
||||
total: 6,
|
||||
excludes: 4,
|
||||
successful: 2,
|
||||
total: 5,
|
||||
excludes: 0,
|
||||
successful: 5,
|
||||
..MockResponseStats::default()
|
||||
},
|
||||
"--exclude-mail"
|
||||
"--include-mail"
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_email_by_default() -> Result<()> {
|
||||
test_json_output!(
|
||||
"TEST_EMAIL.md",
|
||||
MockResponseStats {
|
||||
total: 5,
|
||||
excludes: 3,
|
||||
successful: 2,
|
||||
..MockResponseStats::default()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -141,6 +154,7 @@ mod cli {
|
|||
|
||||
cmd.arg("--dump")
|
||||
.arg(input)
|
||||
.arg("--include-mail")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("hello@example.org?subject=%5BHello%5D"));
|
||||
|
|
@ -155,6 +169,7 @@ mod cli {
|
|||
|
||||
cmd.arg("--dump")
|
||||
.arg(input)
|
||||
.arg("--include-mail")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("hello@example.org?subject=%5BHello%5D"));
|
||||
|
|
@ -475,7 +490,8 @@ mod cli {
|
|||
"TEST.md",
|
||||
MockResponseStats {
|
||||
total: 11,
|
||||
successful: 11,
|
||||
successful: 9,
|
||||
excludes: 2,
|
||||
..MockResponseStats::default()
|
||||
}
|
||||
)
|
||||
|
|
@ -491,6 +507,7 @@ mod cli {
|
|||
cmd.arg("--output")
|
||||
.arg(&outfile)
|
||||
.arg("--dump")
|
||||
.arg("--include-mail")
|
||||
.arg(test_path)
|
||||
.assert()
|
||||
.success();
|
||||
|
|
@ -533,42 +550,7 @@ mod cli {
|
|||
.arg("https://ldra.com/")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("2 Excluded"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_file() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
let test_path = fixtures_path().join("TEST.md");
|
||||
let excludes_path = fixtures_path().join("TEST_EXCLUDE_1.txt");
|
||||
|
||||
cmd.arg(test_path)
|
||||
.arg("--exclude-file")
|
||||
.arg(excludes_path)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("2 Excluded"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_exclude_files() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
let test_path = fixtures_path().join("TEST.md");
|
||||
let excludes_path1 = fixtures_path().join("TEST_EXCLUDE_1.txt");
|
||||
let excludes_path2 = fixtures_path().join("TEST_EXCLUDE_2.txt");
|
||||
|
||||
cmd.arg(test_path)
|
||||
.arg("--exclude-file")
|
||||
.arg(excludes_path1)
|
||||
.arg("--exclude-file")
|
||||
.arg(excludes_path2)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("3 Excluded"));
|
||||
.stdout(contains("4 Excluded"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ mod cli {
|
|||
|
||||
let cmd = cmd
|
||||
.arg(input)
|
||||
.arg("--include-mail")
|
||||
.arg("--dump")
|
||||
.assert()
|
||||
.success()
|
||||
|
|
|
|||
|
|
@ -179,8 +179,8 @@ pub struct ClientBuilder {
|
|||
/// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
|
||||
exclude_loopback_ips: bool,
|
||||
|
||||
/// When `true`, don't check mail addresses.
|
||||
exclude_mail: bool,
|
||||
/// When `true`, check mail addresses.
|
||||
include_mail: bool,
|
||||
|
||||
/// Maximum number of redirects per request before returning an error.
|
||||
///
|
||||
|
|
@ -367,7 +367,7 @@ impl ClientBuilder {
|
|||
exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
|
||||
exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips,
|
||||
exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips,
|
||||
exclude_mail: self.exclude_mail,
|
||||
include_mail: self.include_mail,
|
||||
};
|
||||
|
||||
let quirks = Quirks::default();
|
||||
|
|
@ -837,19 +837,8 @@ mod tests {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exclude_mail() {
|
||||
async fn test_exclude_mail_by_default() {
|
||||
let client = ClientBuilder::builder()
|
||||
.exclude_mail(false)
|
||||
.exclude_all_private(true)
|
||||
.build()
|
||||
.client()
|
||||
.unwrap();
|
||||
assert!(!client.is_excluded(&Uri {
|
||||
url: "mailto://mail@example.com".try_into().unwrap()
|
||||
}));
|
||||
|
||||
let client = ClientBuilder::builder()
|
||||
.exclude_mail(true)
|
||||
.exclude_all_private(true)
|
||||
.build()
|
||||
.client()
|
||||
|
|
@ -859,6 +848,29 @@ mod tests {
|
|||
}));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_include_mail() {
|
||||
let client = ClientBuilder::builder()
|
||||
.include_mail(false)
|
||||
.exclude_all_private(true)
|
||||
.build()
|
||||
.client()
|
||||
.unwrap();
|
||||
assert!(client.is_excluded(&Uri {
|
||||
url: "mailto://mail@example.com".try_into().unwrap()
|
||||
}));
|
||||
|
||||
let client = ClientBuilder::builder()
|
||||
.include_mail(true)
|
||||
.exclude_all_private(true)
|
||||
.build()
|
||||
.client()
|
||||
.unwrap();
|
||||
assert!(!client.is_excluded(&Uri {
|
||||
url: "mailto://mail@example.com".try_into().unwrap()
|
||||
}));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_require_https() {
|
||||
let client = ClientBuilder::builder().build().client().unwrap();
|
||||
|
|
|
|||
|
|
@ -113,15 +113,15 @@ pub struct Filter {
|
|||
/// For IPv6: ::1/128
|
||||
pub exclude_loopback_ips: bool,
|
||||
/// Example: octocat@github.com
|
||||
pub exclude_mail: bool,
|
||||
pub include_mail: bool,
|
||||
}
|
||||
|
||||
impl Filter {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Whether e-mails aren't checked
|
||||
/// Whether e-mails aren't checked (which is the default)
|
||||
pub fn is_mail_excluded(&self, uri: &Uri) -> bool {
|
||||
self.exclude_mail && uri.is_mail()
|
||||
uri.is_mail() && !self.include_mail
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
|
|
@ -179,7 +179,7 @@ impl Filter {
|
|||
/// # Details
|
||||
///
|
||||
/// 1. If any of the following conditions are met, the URI is excluded:
|
||||
/// - If it's a mail address and it's configured to ignore mail addresses.
|
||||
/// - If it's a mail address and it's not configured to include mail addresses.
|
||||
/// - If the IP address belongs to a type that is configured to exclude.
|
||||
/// - If the host belongs to a type that is configured to exclude.
|
||||
/// - If the scheme of URI is not the allowed scheme.
|
||||
|
|
@ -196,10 +196,10 @@ impl Filter {
|
|||
#[must_use]
|
||||
pub fn is_excluded(&self, uri: &Uri) -> bool {
|
||||
// Skip mail address, specific IP, specific host and scheme
|
||||
if self.is_mail_excluded(uri)
|
||||
|| self.is_ip_excluded(uri)
|
||||
if self.is_scheme_excluded(uri)
|
||||
|| self.is_host_excluded(uri)
|
||||
|| self.is_scheme_excluded(uri)
|
||||
|| self.is_ip_excluded(uri)
|
||||
|| self.is_mail_excluded(uri)
|
||||
|| is_example_domain(uri)
|
||||
|| is_unsupported_domain(uri)
|
||||
{
|
||||
|
|
@ -211,7 +211,7 @@ impl Filter {
|
|||
if self.is_includes_empty() {
|
||||
if self.is_excludes_empty() {
|
||||
// Both excludes and includes rules are empty:
|
||||
// *Presumably included* unless it's false positive
|
||||
// *Presumably included* unless it's a false positive
|
||||
return is_false_positive(input);
|
||||
}
|
||||
} else if self.is_includes_match(input) {
|
||||
|
|
@ -363,9 +363,8 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_mail() {
|
||||
fn test_exclude_mail_by_default() {
|
||||
let filter = Filter {
|
||||
exclude_mail: true,
|
||||
..Filter::default()
|
||||
};
|
||||
|
||||
|
|
@ -374,6 +373,18 @@ mod tests {
|
|||
assert!(!filter.is_excluded(&website("http://bar.dev")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_mail() {
|
||||
let filter = Filter {
|
||||
include_mail: true,
|
||||
..Filter::default()
|
||||
};
|
||||
|
||||
assert!(!filter.is_excluded(&mail("mail@example.com")));
|
||||
assert!(!filter.is_excluded(&mail("foo@bar.dev")));
|
||||
assert!(!filter.is_excluded(&website("http://bar.dev")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_regex() {
|
||||
let excludes = Excludes {
|
||||
|
|
@ -389,7 +400,7 @@ mod tests {
|
|||
assert!(filter.is_excluded(&mail("mail@example.com")));
|
||||
|
||||
assert!(!filter.is_excluded(&website("http://bar.dev")));
|
||||
assert!(!filter.is_excluded(&mail("foo@bar.dev")));
|
||||
assert!(filter.is_excluded(&mail("foo@bar.dev")));
|
||||
}
|
||||
#[test]
|
||||
fn test_exclude_include_regex() {
|
||||
|
|
|
|||
|
|
@ -110,5 +110,5 @@ exclude_link_local = false
|
|||
# Exclude loopback IP address range and localhost from checking.
|
||||
exclude_loopback = false
|
||||
|
||||
# Exclude all mail addresses from checking.
|
||||
exclude_mail = false
|
||||
# Check mail addresses
|
||||
include_mail = true
|
||||
|
|
|
|||
Loading…
Reference in a new issue