Exclude tel scheme from being checked (#1429)

This commit is contained in:
n4n5 2024-05-19 20:31:38 +02:00 committed by GitHub
parent 9e031b6256
commit c3f7fe7ad4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 82 additions and 2 deletions

View file

@ -500,6 +500,7 @@ impl Client {
let status = match uri.scheme() {
_ if uri.is_file() => self.check_file(uri).await,
_ if uri.is_mail() => self.check_mail(uri).await,
_ if uri.is_tel() => self.check_tel(uri).await,
_ => self.check_website(uri, default_chain).await?,
};
@ -702,6 +703,14 @@ impl Client {
pub async fn check_mail(&self, _uri: &Uri) -> Status {
Status::Excluded
}
/// Check a tel
///
/// This implementation simply excludes all tel.
#[allow(clippy::unused_async)]
pub async fn check_tel(&self, _uri: &Uri) -> Status {
Status::Excluded
}
}
// Check if the given `Url` would cause `reqwest` to panic.
@ -907,6 +916,14 @@ mod tests {
}));
}
#[tokio::test]
async fn test_include_tel() {
let client = ClientBuilder::builder().build().client().unwrap();
assert!(client.is_excluded(&Uri {
url: "tel:1234567890".try_into().unwrap()
}));
}
#[tokio::test]
async fn test_require_https() {
let client = ClientBuilder::builder().build().client().unwrap();

View file

@ -89,9 +89,10 @@ impl TokenSink for LinkExtractor {
// This ignores links like `<img srcset="v2@1.5x.png">`
let is_email = is_email_link(url);
let is_mailto = url.starts_with("mailto:");
let is_phone = url.starts_with("tel:");
let is_href = attr.name.local.as_ref() == "href";
!is_email || (is_mailto && is_href)
!is_email || (is_mailto && is_href) || (is_phone && is_href)
})
.map(|url| RawUri {
text: url.to_string(),
@ -318,6 +319,29 @@ mod tests {
let uris = extract_html(input, false);
assert_eq!(uris, expected);
}
#[test]
fn test_valid_tel() {
let input = r#"<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>Test</title>
</head>
<body>
<a href="tel:1234567890">
</body>
</html>"#;
let expected = vec![RawUri {
text: "tel:1234567890".to_string(),
element: Some("a".to_string()),
attribute: Some("href".to_string()),
}];
let uris = extract_html(input, false);
assert_eq!(uris, expected);
}
#[test]
fn test_exclude_email_without_mailto() {
let input = r#"<!DOCTYPE html>

View file

@ -172,9 +172,10 @@ impl LinkExtractor {
// This ignores links like `<img srcset="v2@1.5x.png">`
let is_email = is_email_link(url);
let is_mailto = url.starts_with("mailto:");
let is_phone = url.starts_with("tel:");
let is_href = attr == "href";
!is_email || (is_mailto && is_href)
!is_email || (is_mailto && is_href) || (is_phone && is_href)
})
.map(|url| RawUri {
text: url.to_string(),
@ -453,6 +454,28 @@ mod tests {
assert_eq!(uris, expected);
}
#[test]
fn test_valid_tel() {
let input = r#"<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="utf-8">
<title>Test</title>
</head>
<body>
<a href="tel:1234567890">
</body>
</html>"#;
let expected = vec![RawUri {
text: "tel:1234567890".to_string(),
element: Some("a".to_string()),
attribute: Some("href".to_string()),
}];
let uris = extract_html(input, false);
assert_eq!(uris, expected);
}
#[test]
fn test_valid_email() {
let input = r#"<!DOCTYPE html>

View file

@ -214,6 +214,7 @@ impl Filter {
|| self.is_host_excluded(uri)
|| self.is_ip_excluded(uri)
|| self.is_mail_excluded(uri)
|| uri.is_tel()
|| is_example_domain(uri)
|| is_unsupported_domain(uri)
{

View file

@ -96,6 +96,13 @@ impl Uri {
self.scheme() == "mailto"
}
#[inline]
#[must_use]
/// Check if the URI is a tel
pub fn is_tel(&self) -> bool {
self.scheme() == "tel"
}
#[inline]
#[must_use]
/// Check if the URI is a file
@ -325,6 +332,14 @@ mod tests {
);
}
#[test]
fn test_uri_tel() {
assert_eq!(
Uri::try_from("tel:1234567890"),
Ok(Uri::try_from("tel:1234567890").unwrap())
);
}
#[test]
fn test_uri_host_ip_v4() {
assert_eq!(