From c3f7fe7ad4d88a403c5ef0280233200967863d67 Mon Sep 17 00:00:00 2001 From: n4n5 <56606507+Its-Just-Nans@users.noreply.github.com> Date: Sun, 19 May 2024 20:31:38 +0200 Subject: [PATCH] Exclude `tel` scheme from being checked (#1429) --- lychee-lib/src/client.rs | 17 ++++++++++++++++ lychee-lib/src/extract/html/html5ever.rs | 26 +++++++++++++++++++++++- lychee-lib/src/extract/html/html5gum.rs | 25 ++++++++++++++++++++++- lychee-lib/src/filter/mod.rs | 1 + lychee-lib/src/types/uri/valid.rs | 15 ++++++++++++++ 5 files changed, 82 insertions(+), 2 deletions(-) diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 42cfa4b..3369abf 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -500,6 +500,7 @@ impl Client { let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, + _ if uri.is_tel() => self.check_tel(uri).await, _ => self.check_website(uri, default_chain).await?, }; @@ -702,6 +703,14 @@ impl Client { pub async fn check_mail(&self, _uri: &Uri) -> Status { Status::Excluded } + + /// Check a tel + /// + /// This implementation simply excludes all tel. + #[allow(clippy::unused_async)] + pub async fn check_tel(&self, _uri: &Uri) -> Status { + Status::Excluded + } } // Check if the given `Url` would cause `reqwest` to panic. @@ -907,6 +916,14 @@ mod tests { })); } + #[tokio::test] + async fn test_include_tel() { + let client = ClientBuilder::builder().build().client().unwrap(); + assert!(client.is_excluded(&Uri { + url: "tel:1234567890".try_into().unwrap() + })); + } + #[tokio::test] async fn test_require_https() { let client = ClientBuilder::builder().build().client().unwrap(); diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs index b9f6373..1ee03b8 100644 --- a/lychee-lib/src/extract/html/html5ever.rs +++ b/lychee-lib/src/extract/html/html5ever.rs @@ -89,9 +89,10 @@ impl TokenSink for LinkExtractor { // This ignores links like `` let is_email = is_email_link(url); let is_mailto = url.starts_with("mailto:"); + let is_phone = url.starts_with("tel:"); let is_href = attr.name.local.as_ref() == "href"; - !is_email || (is_mailto && is_href) + !is_email || (is_mailto && is_href) || (is_phone && is_href) }) .map(|url| RawUri { text: url.to_string(), @@ -318,6 +319,29 @@ mod tests { let uris = extract_html(input, false); assert_eq!(uris, expected); } + + #[test] + fn test_valid_tel() { + let input = r#" + + + + Test + + + + + "#; + + let expected = vec![RawUri { + text: "tel:1234567890".to_string(), + element: Some("a".to_string()), + attribute: Some("href".to_string()), + }]; + let uris = extract_html(input, false); + assert_eq!(uris, expected); + } + #[test] fn test_exclude_email_without_mailto() { let input = r#" diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs index ee61e64..cf9d88f 100644 --- a/lychee-lib/src/extract/html/html5gum.rs +++ b/lychee-lib/src/extract/html/html5gum.rs @@ -172,9 +172,10 @@ impl LinkExtractor { // This ignores links like `` let is_email = is_email_link(url); let is_mailto = url.starts_with("mailto:"); + let is_phone = url.starts_with("tel:"); let is_href = attr == "href"; - !is_email || (is_mailto && is_href) + !is_email || (is_mailto && is_href) || (is_phone && is_href) }) .map(|url| RawUri { text: url.to_string(), @@ -453,6 +454,28 @@ mod tests { assert_eq!(uris, expected); } + #[test] + fn test_valid_tel() { + let input = r#" + + + + Test + + + + + "#; + + let expected = vec![RawUri { + text: "tel:1234567890".to_string(), + element: Some("a".to_string()), + attribute: Some("href".to_string()), + }]; + let uris = extract_html(input, false); + assert_eq!(uris, expected); + } + #[test] fn test_valid_email() { let input = r#" diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index f8d6130..a6cda08 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -214,6 +214,7 @@ impl Filter { || self.is_host_excluded(uri) || self.is_ip_excluded(uri) || self.is_mail_excluded(uri) + || uri.is_tel() || is_example_domain(uri) || is_unsupported_domain(uri) { diff --git a/lychee-lib/src/types/uri/valid.rs b/lychee-lib/src/types/uri/valid.rs index 87954b4..9e9bb72 100644 --- a/lychee-lib/src/types/uri/valid.rs +++ b/lychee-lib/src/types/uri/valid.rs @@ -96,6 +96,13 @@ impl Uri { self.scheme() == "mailto" } + #[inline] + #[must_use] + /// Check if the URI is a tel + pub fn is_tel(&self) -> bool { + self.scheme() == "tel" + } + #[inline] #[must_use] /// Check if the URI is a file @@ -325,6 +332,14 @@ mod tests { ); } + #[test] + fn test_uri_tel() { + assert_eq!( + Uri::try_from("tel:1234567890"), + Ok(Uri::try_from("tel:1234567890").unwrap()) + ); + } + #[test] fn test_uri_host_ip_v4() { assert_eq!(