mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-10 16:30:58 +00:00
Exclude <script> tags by default (#848)
This is a naive approach to exclude script tags from getting checked. The reason is that the tag leads to a lot of false-positives (e.g. `//unpkg.com/docsify-edit-on-github@1` within a script block gets detected as an e-mail address). A more thorough approach would be the use of a tree-builder in html5gum and html5ever, but this could have a negative performance impact. I also did not want to add a new flag (e.g. `--include-scripts`) for this setting because the current set of flags around exclusion/inclusion is already quite long. Fixes #821.
This commit is contained in:
parent
982d978e47
commit
9eeea250cd
4 changed files with 38 additions and 2 deletions
|
|
@ -319,7 +319,6 @@ mod tests {
|
|||
website("https://example.com/css/style_relative_url.css"),
|
||||
website("https://example.com/head/home"),
|
||||
website("https://example.com/images/icon.png"),
|
||||
website("https://example.com/js/script.js"),
|
||||
]);
|
||||
|
||||
assert_eq!(links, expected_links);
|
||||
|
|
|
|||
|
|
@ -228,4 +228,21 @@ mod tests {
|
|||
let uris = extract_html(input, false);
|
||||
assert_eq!(uris, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_script_tags() {
|
||||
let input = r#"
|
||||
<script>
|
||||
var foo = "https://example.com";
|
||||
</script>
|
||||
<a href="https://example.org">i'm fine</a>
|
||||
"#;
|
||||
let expected = vec![RawUri {
|
||||
text: "https://example.org".to_string(),
|
||||
element: Some("a".to_string()),
|
||||
attribute: Some("href".to_string()),
|
||||
}];
|
||||
let uris = extract_html(input, false);
|
||||
assert_eq!(uris, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -316,4 +316,21 @@ mod tests {
|
|||
let uris = extract_html(input, false);
|
||||
assert_eq!(uris, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_script_tags() {
|
||||
let input = r#"
|
||||
<script>
|
||||
var foo = "https://example.com";
|
||||
</script>
|
||||
<a href="https://example.org">i'm fine</a>
|
||||
"#;
|
||||
let expected = vec![RawUri {
|
||||
text: "https://example.org".to_string(),
|
||||
element: Some("a".to_string()),
|
||||
attribute: Some("href".to_string()),
|
||||
}];
|
||||
let uris = extract_html(input, false);
|
||||
assert_eq!(uris, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,10 +11,13 @@ use plaintext::extract_plaintext;
|
|||
/// Check if the given element is in the list of preformatted ("verbatim") tags.
|
||||
///
|
||||
/// These will be excluded from link checking by default.
|
||||
// Including the <script> tag is debatable, but the alternative is to
|
||||
// have a separate list of tags which need a separate config setting and that
|
||||
// seems worse.
|
||||
pub(crate) fn is_verbatim_elem(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"pre" | "code" | "textarea" | "samp" | "xmp" | "plaintext" | "listing"
|
||||
"code" | "listing" | "plaintext" | "samp" | "script" | "textarea" | "xmp" | "pre"
|
||||
)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue