Don't check prefix attribute (#1536)

This commit is contained in:
xlai89 2024-10-20 22:56:46 +02:00 committed by GitHub
parent 3a2533f8f5
commit 7484a1ff6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 31 additions and 0 deletions

View file

@ -86,6 +86,12 @@ impl TokenSink for LinkExtractor {
}
}
// Check and exclude `prefix` attribute. This attribute is used to define a prefix
// for the current element. It is not used to link to a resource.
if let Some(_prefix) = attrs.iter().find(|attr| &attr.name.local == "prefix") {
return TokenSinkResult::Continue;
}
for attr in attrs {
let urls = LinkExtractor::extract_urls_from_elem_attr(
&attr.name.local,
@ -416,6 +422,16 @@ mod tests {
assert!(uris.is_empty());
}
#[test]
fn test_skip_prefix() {
let input = r#"
<html lang="en-EN" prefix="og: https://ogp.me/ns#">
"#;
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
#[test]
fn test_ignore_text_content_links() {
let input = r#"

View file

@ -178,6 +178,11 @@ impl LinkExtractor {
return;
}
if self.current_attributes.contains_key("prefix") {
self.current_attributes.clear();
return;
}
let new_urls = self
.extract_urls_from_elem_attr()
.into_iter()
@ -613,6 +618,16 @@ mod tests {
assert!(uris.is_empty());
}
#[test]
fn test_skip_prefix() {
let input = r#"
<html lang="en-EN" prefix="og: https://ogp.me/ns#">
"#;
let uris = extract_html(input, false);
assert!(uris.is_empty());
}
#[test]
fn test_ignore_text_content_links() {
let input = r#"