mirror of
https://github.com/Hopiu/lychee.git
synced 2026-05-19 11:11:06 +00:00
Fix skipping of email addresses in stylesheets (#1546)
This commit is contained in:
parent
3094bbca33
commit
e43086c2e9
4 changed files with 66 additions and 3 deletions
1
fixtures/TEST_STYLESHEET_LINK.md
vendored
Normal file
1
fixtures/TEST_STYLESHEET_LINK.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
<link href="/@global/global.css" rel="stylesheet">
|
||||
|
|
@ -231,6 +231,17 @@ mod cli {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stylesheet_misinterpreted_as_email() -> Result<()> {
|
||||
test_json_output!(
|
||||
"TEST_STYLESHEET_LINK.md",
|
||||
MockResponseStats {
|
||||
total: 0,
|
||||
..MockResponseStats::default()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Test that a GitHub link can be checked without specifying the token.
|
||||
#[test]
|
||||
fn test_check_github_no_token() -> Result<()> {
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ impl TokenSink for LinkExtractor {
|
|||
return TokenSinkResult::Continue;
|
||||
}
|
||||
|
||||
for attr in attrs {
|
||||
for attr in &attrs {
|
||||
let urls = LinkExtractor::extract_urls_from_elem_attr(
|
||||
&attr.name.local,
|
||||
&name,
|
||||
|
|
@ -104,8 +104,11 @@ impl TokenSink for LinkExtractor {
|
|||
Some(urls) => urls
|
||||
.into_iter()
|
||||
.filter(|url| {
|
||||
// Only accept email addresses, which occur in `href` attributes
|
||||
// and start with `mailto:`. Technically, email addresses could
|
||||
// Only accept email addresses which
|
||||
// - occur in `href` attributes
|
||||
// - start with `mailto:`
|
||||
//
|
||||
// Technically, email addresses could
|
||||
// also occur in plain text, but we don't want to extract those
|
||||
// because of the high false positive rate.
|
||||
//
|
||||
|
|
@ -115,6 +118,18 @@ impl TokenSink for LinkExtractor {
|
|||
let is_phone = url.starts_with("tel:");
|
||||
let is_href = attr.name.local.as_ref() == "href";
|
||||
|
||||
if attrs.iter().any(|attr| {
|
||||
&attr.name.local == "rel" && attr.value.contains("stylesheet")
|
||||
}) {
|
||||
// Skip virtual/framework-specific stylesheet paths that start with /@ or @
|
||||
// These are typically resolved by dev servers or build tools rather than being real URLs
|
||||
// Examples: /@global/style.css, @tailwind/base.css as in
|
||||
// `<link href="/@global/style.css" rel="stylesheet">`
|
||||
if url.starts_with("/@") || url.starts_with('@') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
!is_email || (is_mailto && is_href) || (is_phone && is_href)
|
||||
})
|
||||
.map(|url| RawUri {
|
||||
|
|
@ -466,4 +481,14 @@ mod tests {
|
|||
let uris = extract_html(input, false);
|
||||
assert!(uris.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_emails_in_stylesheets() {
|
||||
let input = r#"
|
||||
<link href="/@global/global.css" rel="stylesheet">
|
||||
"#;
|
||||
|
||||
let uris = extract_html(input, false);
|
||||
assert!(uris.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -183,6 +183,22 @@ impl LinkExtractor {
|
|||
return;
|
||||
}
|
||||
|
||||
// Skip virtual/framework-specific stylesheet paths that start with /@ or @
|
||||
// These are typically resolved by dev servers or build tools rather than being real URLs
|
||||
// Examples: /@global/style.css, @tailwind/base.css
|
||||
if self
|
||||
.current_attributes
|
||||
.get("rel")
|
||||
.map_or(false, |rel| rel.contains("stylesheet"))
|
||||
{
|
||||
if let Some(href) = self.current_attributes.get("href") {
|
||||
if href.starts_with("/@") || href.starts_with('@') {
|
||||
self.current_attributes.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let new_urls = self
|
||||
.extract_urls_from_elem_attr()
|
||||
.into_iter()
|
||||
|
|
@ -662,4 +678,14 @@ mod tests {
|
|||
let uris = extract_html(input, false);
|
||||
assert!(uris.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_emails_in_stylesheets() {
|
||||
let input = r#"
|
||||
<link href="/@global/global.css" rel="stylesheet">
|
||||
"#;
|
||||
|
||||
let uris = extract_html(input, false);
|
||||
assert!(uris.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue