diff --git a/Cargo.lock b/Cargo.lock index f2ba242..89ecfe4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1381,6 +1381,7 @@ dependencies = [ "deadpool", "derive_builder", "doc-comment", + "fast_chemail", "futures", "glob", "headers", diff --git a/Cargo.toml b/Cargo.toml index 7151316..1b3d71c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ hubcaps = { git="https://github.com/softprops/hubcaps.git" } linkify = "0.5.0" regex = "1.4.4" url = "2.2.1" -# Switch back to version on crates.io after +# Switch back to version on crates.io after # https://github.com/async-email/async-smtp/pull/36 # is merged and a new version of check-if-email-exists is released check-if-email-exists = { git="https://github.com/reacherhq/check-if-email-exists.git" } @@ -53,6 +53,7 @@ serde_json = "1.0.64" ring = "0.16.19" pad = "0.1.6" console = "0.14.0" +fast_chemail = "0.9.6" [dependencies.reqwest] features = ["gzip"] diff --git a/src/extract.rs b/src/extract.rs index 020b6c7..19969fd 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -498,4 +498,27 @@ mod test { assert_eq!(links, expected_links); } + + #[test] + fn test_extract_urls_with_at_sign_properly() { + // note that these used to parse as emails + let input = "https://example.com/@test/test http://otherdomain.com/test/@test".to_string(); + let links: HashSet = extract_links( + &InputContent::from_string(&input, FileType::Plaintext), + None, + ) + .into_iter() + .map(|r| r.uri) + .collect(); + + let expected_links = [ + website("https://example.com/@test/test"), + website("http://otherdomain.com/test/@test"), + ] + .iter() + .cloned() + .collect(); + + assert_eq!(links, expected_links); + } } diff --git a/src/uri.rs b/src/uri.rs index 57ff67c..7fa34ea 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -1,4 +1,5 @@ use anyhow::{bail, Result}; +use fast_chemail::is_valid_email; use serde::{Deserialize, Serialize}; use std::net::IpAddr; use std::{convert::TryFrom, fmt::Display}; @@ -56,12 +57,12 @@ impl TryFrom<&str> for Uri { // Remove the `mailto` scheme if it exists // to avoid parsing it as a website URL. let s = s.trim_start_matches("mailto:"); - if s.contains('@') & !is_link_internal { - return Ok(Uri::Mail(s.to_string())); - } + if let Ok(uri) = Url::parse(s) { return Ok(Uri::Website(uri)); - }; + } else if !is_link_internal && is_valid_email(&s) { + return Ok(Uri::Mail(s.to_string())); + } bail!("Cannot convert to Uri") } } @@ -86,6 +87,10 @@ mod test { Uri::try_from("http://example.org").unwrap(), website("http://example.org") ); + assert_eq!( + Uri::try_from("http://example.org/@test/testing").unwrap(), + website("http://example.org/@test/testing") + ); assert_eq!( Uri::try_from("mail@example.org").unwrap(), Uri::Mail("mail@example.org".to_string())