Fix skipping of email addresses in stylesheets (#1546)

This commit is contained in:
Matthias Endler 2024-10-27 01:32:11 +02:00 committed by GitHub
parent 3094bbca33
commit e43086c2e9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 66 additions and 3 deletions

1
fixtures/TEST_STYLESHEET_LINK.md vendored Normal file
View file

@ -0,0 +1 @@
<link href="/@global/global.css" rel="stylesheet">

View file

@ -231,6 +231,17 @@ mod cli {
Ok(())
}
#[test]
fn test_stylesheet_misinterpreted_as_email() -> Result<()> {
test_json_output!(
"TEST_STYLESHEET_LINK.md",
MockResponseStats {
total: 0,
..MockResponseStats::default()
}
)
}
/// Test that a GitHub link can be checked without specifying the token.
#[test]
fn test_check_github_no_token() -> Result<()> {

View file

@ -92,7 +92,7 @@ impl TokenSink for LinkExtractor {
return TokenSinkResult::Continue;
}
for attr in attrs {
for attr in &attrs {
let urls = LinkExtractor::extract_urls_from_elem_attr(
&attr.name.local,
&name,
@ -104,8 +104,11 @@ impl TokenSink for LinkExtractor {
Some(urls) => urls
.into_iter()
.filter(|url| {
// Only accept email addresses, which occur in `href` attributes
// and start with `mailto:`. Technically, email addresses could
// Only accept email addresses which
// - occur in `href` attributes
// - start with `mailto:`
//
// Technically, email addresses could
// also occur in plain text, but we don't want to extract those
// because of the high false positive rate.
//
@ -115,6 +118,18 @@ impl TokenSink for LinkExtractor {
let is_phone = url.starts_with("tel:");
let is_href = attr.name.local.as_ref() == "href";
if attrs.iter().any(|attr| {
&attr.name.local == "rel" && attr.value.contains("stylesheet")
}) {
// Skip virtual/framework-specific stylesheet paths that start with /@ or @
// These are typically resolved by dev servers or build tools rather than being real URLs
// Examples: /@global/style.css, @tailwind/base.css as in
// `<link href="/@global/style.css" rel="stylesheet">`
if url.starts_with("/@") || url.starts_with('@') {
return false;
}
}
!is_email || (is_mailto && is_href) || (is_phone && is_href)
})
.map(|url| RawUri {
@ -466,4 +481,14 @@ mod tests {
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
#[test]
fn test_skip_emails_in_stylesheets() {
let input = r#"
<link href="/@global/global.css" rel="stylesheet">
"#;
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
}

View file

@ -183,6 +183,22 @@ impl LinkExtractor {
return;
}
// Skip virtual/framework-specific stylesheet paths that start with /@ or @
// These are typically resolved by dev servers or build tools rather than being real URLs
// Examples: /@global/style.css, @tailwind/base.css
if self
.current_attributes
.get("rel")
.map_or(false, |rel| rel.contains("stylesheet"))
{
if let Some(href) = self.current_attributes.get("href") {
if href.starts_with("/@") || href.starts_with('@') {
self.current_attributes.clear();
return;
}
}
}
let new_urls = self
.extract_urls_from_elem_attr()
.into_iter()
@ -662,4 +678,14 @@ mod tests {
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
#[test]
fn test_skip_emails_in_stylesheets() {
let input = r#"
<link href="/@global/global.css" rel="stylesheet">
"#;
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
}