From c3f7fe7ad4d88a403c5ef0280233200967863d67 Mon Sep 17 00:00:00 2001
From: n4n5 <56606507+Its-Just-Nans@users.noreply.github.com>
Date: Sun, 19 May 2024 20:31:38 +0200
Subject: [PATCH] Exclude `tel` scheme from being checked (#1429)
---
lychee-lib/src/client.rs | 17 ++++++++++++++++
lychee-lib/src/extract/html/html5ever.rs | 26 +++++++++++++++++++++++-
lychee-lib/src/extract/html/html5gum.rs | 25 ++++++++++++++++++++++-
lychee-lib/src/filter/mod.rs | 1 +
lychee-lib/src/types/uri/valid.rs | 15 ++++++++++++++
5 files changed, 82 insertions(+), 2 deletions(-)
diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs
index 42cfa4b..3369abf 100644
--- a/lychee-lib/src/client.rs
+++ b/lychee-lib/src/client.rs
@@ -500,6 +500,7 @@ impl Client {
let status = match uri.scheme() {
_ if uri.is_file() => self.check_file(uri).await,
_ if uri.is_mail() => self.check_mail(uri).await,
+ _ if uri.is_tel() => self.check_tel(uri).await,
_ => self.check_website(uri, default_chain).await?,
};
@@ -702,6 +703,14 @@ impl Client {
pub async fn check_mail(&self, _uri: &Uri) -> Status {
Status::Excluded
}
+
+ /// Check a tel
+ ///
+ /// This implementation simply excludes all tel.
+ #[allow(clippy::unused_async)]
+ pub async fn check_tel(&self, _uri: &Uri) -> Status {
+ Status::Excluded
+ }
}
// Check if the given `Url` would cause `reqwest` to panic.
@@ -907,6 +916,14 @@ mod tests {
}));
}
+ #[tokio::test]
+ async fn test_include_tel() {
+ let client = ClientBuilder::builder().build().client().unwrap();
+ assert!(client.is_excluded(&Uri {
+ url: "tel:1234567890".try_into().unwrap()
+ }));
+ }
+
#[tokio::test]
async fn test_require_https() {
let client = ClientBuilder::builder().build().client().unwrap();
diff --git a/lychee-lib/src/extract/html/html5ever.rs b/lychee-lib/src/extract/html/html5ever.rs
index b9f6373..1ee03b8 100644
--- a/lychee-lib/src/extract/html/html5ever.rs
+++ b/lychee-lib/src/extract/html/html5ever.rs
@@ -89,9 +89,10 @@ impl TokenSink for LinkExtractor {
// This ignores links like `
`
let is_email = is_email_link(url);
let is_mailto = url.starts_with("mailto:");
+ let is_phone = url.starts_with("tel:");
let is_href = attr.name.local.as_ref() == "href";
- !is_email || (is_mailto && is_href)
+ !is_email || (is_mailto && is_href) || (is_phone && is_href)
})
.map(|url| RawUri {
text: url.to_string(),
@@ -318,6 +319,29 @@ mod tests {
let uris = extract_html(input, false);
assert_eq!(uris, expected);
}
+
+ #[test]
+ fn test_valid_tel() {
+ let input = r#"
+
+
+
+ Test
+
+
+
+
+ "#;
+
+ let expected = vec![RawUri {
+ text: "tel:1234567890".to_string(),
+ element: Some("a".to_string()),
+ attribute: Some("href".to_string()),
+ }];
+ let uris = extract_html(input, false);
+ assert_eq!(uris, expected);
+ }
+
#[test]
fn test_exclude_email_without_mailto() {
let input = r#"
diff --git a/lychee-lib/src/extract/html/html5gum.rs b/lychee-lib/src/extract/html/html5gum.rs
index ee61e64..cf9d88f 100644
--- a/lychee-lib/src/extract/html/html5gum.rs
+++ b/lychee-lib/src/extract/html/html5gum.rs
@@ -172,9 +172,10 @@ impl LinkExtractor {
// This ignores links like `
`
let is_email = is_email_link(url);
let is_mailto = url.starts_with("mailto:");
+ let is_phone = url.starts_with("tel:");
let is_href = attr == "href";
- !is_email || (is_mailto && is_href)
+ !is_email || (is_mailto && is_href) || (is_phone && is_href)
})
.map(|url| RawUri {
text: url.to_string(),
@@ -453,6 +454,28 @@ mod tests {
assert_eq!(uris, expected);
}
+ #[test]
+ fn test_valid_tel() {
+ let input = r#"
+
+
+
+ Test
+
+
+
+
+ "#;
+
+ let expected = vec![RawUri {
+ text: "tel:1234567890".to_string(),
+ element: Some("a".to_string()),
+ attribute: Some("href".to_string()),
+ }];
+ let uris = extract_html(input, false);
+ assert_eq!(uris, expected);
+ }
+
#[test]
fn test_valid_email() {
let input = r#"
diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs
index f8d6130..a6cda08 100644
--- a/lychee-lib/src/filter/mod.rs
+++ b/lychee-lib/src/filter/mod.rs
@@ -214,6 +214,7 @@ impl Filter {
|| self.is_host_excluded(uri)
|| self.is_ip_excluded(uri)
|| self.is_mail_excluded(uri)
+ || uri.is_tel()
|| is_example_domain(uri)
|| is_unsupported_domain(uri)
{
diff --git a/lychee-lib/src/types/uri/valid.rs b/lychee-lib/src/types/uri/valid.rs
index 87954b4..9e9bb72 100644
--- a/lychee-lib/src/types/uri/valid.rs
+++ b/lychee-lib/src/types/uri/valid.rs
@@ -96,6 +96,13 @@ impl Uri {
self.scheme() == "mailto"
}
+ #[inline]
+ #[must_use]
+ /// Check if the URI is a tel
+ pub fn is_tel(&self) -> bool {
+ self.scheme() == "tel"
+ }
+
#[inline]
#[must_use]
/// Check if the URI is a file
@@ -325,6 +332,14 @@ mod tests {
);
}
+ #[test]
+ fn test_uri_tel() {
+ assert_eq!(
+ Uri::try_from("tel:1234567890"),
+ Ok(Uri::try_from("tel:1234567890").unwrap())
+ );
+ }
+
#[test]
fn test_uri_host_ip_v4() {
assert_eq!(