diff --git a/README.md b/README.md index 737907a..32bc32c 100644 --- a/README.md +++ b/README.md @@ -372,7 +372,10 @@ Options: Exclude loopback IP address range and localhost from checking --exclude-mail - Exclude all mail addresses from checking + Exclude all mail addresses from checking (deprecated; excluded by default) + + --include-mail + Also check email addresses --remap Remap URI matching pattern to different URI diff --git a/fixtures/TEST_EMAIL.md b/fixtures/TEST_EMAIL.md index b0e3de4..98ea920 100644 --- a/fixtures/TEST_EMAIL.md +++ b/fixtures/TEST_EMAIL.md @@ -1,6 +1,5 @@ https://endler.dev test@example.com -foo@bar.dev https://example.com octocat+github@github.com mailto:test2@example.com diff --git a/fixtures/TEST_EXCLUDE_1.txt b/fixtures/TEST_EXCLUDE_1.txt deleted file mode 100644 index 3e11d35..0000000 --- a/fixtures/TEST_EXCLUDE_1.txt +++ /dev/null @@ -1,3 +0,0 @@ -https://en.wikipedia.org/* -https://ldra.com -https://url-does-not-exist \ No newline at end of file diff --git a/fixtures/TEST_EXCLUDE_2.txt b/fixtures/TEST_EXCLUDE_2.txt deleted file mode 100644 index af90ee7..0000000 --- a/fixtures/TEST_EXCLUDE_2.txt +++ /dev/null @@ -1 +0,0 @@ -https://i.creativecommons.org/p/zero/1.0/88x31.png \ No newline at end of file diff --git a/lychee-bin/src/client.rs b/lychee-bin/src/client.rs index 8d5e581..1bb41a3 100644 --- a/lychee-bin/src/client.rs +++ b/lychee-bin/src/client.rs @@ -37,6 +37,25 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - None => None, }; + // `exclude_mail` will be removed in 1.0. Until then, we need to support it. + // Therefore, we need to check if both `include_mail` and `exclude_mail` are set to `true` + // and return an error if that's the case. + if cfg.include_mail && cfg.exclude_mail { + return Err(anyhow::anyhow!( + "Cannot set both `include-mail` and `exclude-mail` to true" + )); + } + + // By default, clap sets `exclude_mail` to `false`. + // Therefore, we need to check if `exclude_mail` is explicitly set to + // `true`. If so, we need to set `include_mail` to `false`. + // Otherwise, we use the value of `include_mail`. + let include_mail = if cfg.exclude_mail { + false + } else { + cfg.include_mail + }; + ClientBuilder::builder() .remaps(remaps) .includes(includes) @@ -45,7 +64,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc>) - .exclude_private_ips(cfg.exclude_private) .exclude_link_local_ips(cfg.exclude_link_local) .exclude_loopback_ips(cfg.exclude_loopback) - .exclude_mail(cfg.exclude_mail) + .include_mail(include_mail) .max_redirects(cfg.max_redirects) .user_agent(cfg.user_agent.clone()) .allow_insecure(cfg.insecure) diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 353a959..0a56fc5 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -177,9 +177,14 @@ fn load_config() -> Result { opts.config.exclude.append(&mut read_lines(&lycheeignore)?); } - // TODO: Remove this warning and the parameter in a future release + // TODO: Remove this warning and the parameter with 1.0 if !&opts.config.exclude_file.is_empty() { - warn!("WARNING: `--exclude-file` is deprecated and will soon be removed; use `{}` file to ignore URL patterns instead. To exclude paths of files and directories, use `--exclude-path`.", LYCHEE_IGNORE_FILE); + warn!("WARNING: `--exclude-file` is deprecated and will soon be removed; use the `{}` file to ignore URL patterns instead. To exclude paths of files and directories, use `--exclude-path`.", LYCHEE_IGNORE_FILE); + } + + // TODO: Remove this warning and the parameter with 1.0 + if opts.config.exclude_mail { + warn!("WARNING: `--exclude-mail` is deprecated and will soon be removed; E-Mail is no longer checked by default. Use `--include-mail` to enable E-Mail checking."); } // Load excludes from file diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index e2abb9e..c33b3a2 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -282,10 +282,16 @@ pub(crate) struct Config { pub(crate) exclude_loopback: bool, /// Exclude all mail addresses from checking + /// (deprecated; excluded by default) #[arg(long)] #[serde(default)] pub(crate) exclude_mail: bool, + /// Also check email addresses + #[arg(long)] + #[serde(default)] + pub(crate) include_mail: bool, + /// Remap URI matching pattern to different URI #[serde(default)] #[arg(long)] diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 1702401..0d82872 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -121,16 +121,29 @@ mod cli { } #[test] - fn test_exclude_email() -> Result<()> { + fn test_email() -> Result<()> { test_json_output!( "TEST_EMAIL.md", MockResponseStats { - total: 6, - excludes: 4, - successful: 2, + total: 5, + excludes: 0, + successful: 5, ..MockResponseStats::default() }, - "--exclude-mail" + "--include-mail" + ) + } + + #[test] + fn test_exclude_email_by_default() -> Result<()> { + test_json_output!( + "TEST_EMAIL.md", + MockResponseStats { + total: 5, + excludes: 3, + successful: 2, + ..MockResponseStats::default() + } ) } @@ -141,6 +154,7 @@ mod cli { cmd.arg("--dump") .arg(input) + .arg("--include-mail") .assert() .success() .stdout(contains("hello@example.org?subject=%5BHello%5D")); @@ -155,6 +169,7 @@ mod cli { cmd.arg("--dump") .arg(input) + .arg("--include-mail") .assert() .success() .stdout(contains("hello@example.org?subject=%5BHello%5D")); @@ -475,7 +490,8 @@ mod cli { "TEST.md", MockResponseStats { total: 11, - successful: 11, + successful: 9, + excludes: 2, ..MockResponseStats::default() } ) @@ -491,6 +507,7 @@ mod cli { cmd.arg("--output") .arg(&outfile) .arg("--dump") + .arg("--include-mail") .arg(test_path) .assert() .success(); @@ -533,42 +550,7 @@ mod cli { .arg("https://ldra.com/") .assert() .success() - .stdout(contains("2 Excluded")); - - Ok(()) - } - - #[test] - fn test_exclude_file() -> Result<()> { - let mut cmd = main_command(); - let test_path = fixtures_path().join("TEST.md"); - let excludes_path = fixtures_path().join("TEST_EXCLUDE_1.txt"); - - cmd.arg(test_path) - .arg("--exclude-file") - .arg(excludes_path) - .assert() - .success() - .stdout(contains("2 Excluded")); - - Ok(()) - } - - #[test] - fn test_multiple_exclude_files() -> Result<()> { - let mut cmd = main_command(); - let test_path = fixtures_path().join("TEST.md"); - let excludes_path1 = fixtures_path().join("TEST_EXCLUDE_1.txt"); - let excludes_path2 = fixtures_path().join("TEST_EXCLUDE_2.txt"); - - cmd.arg(test_path) - .arg("--exclude-file") - .arg(excludes_path1) - .arg("--exclude-file") - .arg(excludes_path2) - .assert() - .success() - .stdout(contains("3 Excluded")); + .stdout(contains("4 Excluded")); Ok(()) } diff --git a/lychee-bin/tests/example_domains.rs b/lychee-bin/tests/example_domains.rs index ade7c08..73ad747 100644 --- a/lychee-bin/tests/example_domains.rs +++ b/lychee-bin/tests/example_domains.rs @@ -32,6 +32,7 @@ mod cli { let cmd = cmd .arg(input) + .arg("--include-mail") .arg("--dump") .assert() .success() diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 35054b4..80b5e25 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -179,8 +179,8 @@ pub struct ClientBuilder { /// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3 exclude_loopback_ips: bool, - /// When `true`, don't check mail addresses. - exclude_mail: bool, + /// When `true`, check mail addresses. + include_mail: bool, /// Maximum number of redirects per request before returning an error. /// @@ -367,7 +367,7 @@ impl ClientBuilder { exclude_private_ips: self.exclude_all_private || self.exclude_private_ips, exclude_link_local_ips: self.exclude_all_private || self.exclude_link_local_ips, exclude_loopback_ips: self.exclude_all_private || self.exclude_loopback_ips, - exclude_mail: self.exclude_mail, + include_mail: self.include_mail, }; let quirks = Quirks::default(); @@ -837,19 +837,8 @@ mod tests { } #[tokio::test] - async fn test_exclude_mail() { + async fn test_exclude_mail_by_default() { let client = ClientBuilder::builder() - .exclude_mail(false) - .exclude_all_private(true) - .build() - .client() - .unwrap(); - assert!(!client.is_excluded(&Uri { - url: "mailto://mail@example.com".try_into().unwrap() - })); - - let client = ClientBuilder::builder() - .exclude_mail(true) .exclude_all_private(true) .build() .client() @@ -859,6 +848,29 @@ mod tests { })); } + #[tokio::test] + async fn test_include_mail() { + let client = ClientBuilder::builder() + .include_mail(false) + .exclude_all_private(true) + .build() + .client() + .unwrap(); + assert!(client.is_excluded(&Uri { + url: "mailto://mail@example.com".try_into().unwrap() + })); + + let client = ClientBuilder::builder() + .include_mail(true) + .exclude_all_private(true) + .build() + .client() + .unwrap(); + assert!(!client.is_excluded(&Uri { + url: "mailto://mail@example.com".try_into().unwrap() + })); + } + #[tokio::test] async fn test_require_https() { let client = ClientBuilder::builder().build().client().unwrap(); diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index 68e893c..96c1adf 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -113,15 +113,15 @@ pub struct Filter { /// For IPv6: ::1/128 pub exclude_loopback_ips: bool, /// Example: octocat@github.com - pub exclude_mail: bool, + pub include_mail: bool, } impl Filter { #[inline] #[must_use] - /// Whether e-mails aren't checked + /// Whether e-mails aren't checked (which is the default) pub fn is_mail_excluded(&self, uri: &Uri) -> bool { - self.exclude_mail && uri.is_mail() + uri.is_mail() && !self.include_mail } #[must_use] @@ -179,7 +179,7 @@ impl Filter { /// # Details /// /// 1. If any of the following conditions are met, the URI is excluded: - /// - If it's a mail address and it's configured to ignore mail addresses. + /// - If it's a mail address and it's not configured to include mail addresses. /// - If the IP address belongs to a type that is configured to exclude. /// - If the host belongs to a type that is configured to exclude. /// - If the scheme of URI is not the allowed scheme. @@ -196,10 +196,10 @@ impl Filter { #[must_use] pub fn is_excluded(&self, uri: &Uri) -> bool { // Skip mail address, specific IP, specific host and scheme - if self.is_mail_excluded(uri) - || self.is_ip_excluded(uri) + if self.is_scheme_excluded(uri) || self.is_host_excluded(uri) - || self.is_scheme_excluded(uri) + || self.is_ip_excluded(uri) + || self.is_mail_excluded(uri) || is_example_domain(uri) || is_unsupported_domain(uri) { @@ -211,7 +211,7 @@ impl Filter { if self.is_includes_empty() { if self.is_excludes_empty() { // Both excludes and includes rules are empty: - // *Presumably included* unless it's false positive + // *Presumably included* unless it's a false positive return is_false_positive(input); } } else if self.is_includes_match(input) { @@ -363,9 +363,8 @@ mod tests { } #[test] - fn test_exclude_mail() { + fn test_exclude_mail_by_default() { let filter = Filter { - exclude_mail: true, ..Filter::default() }; @@ -374,6 +373,18 @@ mod tests { assert!(!filter.is_excluded(&website("http://bar.dev"))); } + #[test] + fn test_include_mail() { + let filter = Filter { + include_mail: true, + ..Filter::default() + }; + + assert!(!filter.is_excluded(&mail("mail@example.com"))); + assert!(!filter.is_excluded(&mail("foo@bar.dev"))); + assert!(!filter.is_excluded(&website("http://bar.dev"))); + } + #[test] fn test_exclude_regex() { let excludes = Excludes { @@ -389,7 +400,7 @@ mod tests { assert!(filter.is_excluded(&mail("mail@example.com"))); assert!(!filter.is_excluded(&website("http://bar.dev"))); - assert!(!filter.is_excluded(&mail("foo@bar.dev"))); + assert!(filter.is_excluded(&mail("foo@bar.dev"))); } #[test] fn test_exclude_include_regex() { diff --git a/lychee.example.toml b/lychee.example.toml index 2a51473..9639eba 100644 --- a/lychee.example.toml +++ b/lychee.example.toml @@ -110,5 +110,5 @@ exclude_link_local = false # Exclude loopback IP address range and localhost from checking. exclude_loopback = false -# Exclude all mail addresses from checking. -exclude_mail = false +# Check mail addresses +include_mail = true