From 69e5d566875c560fb5eb30779f1855dc2f8f1bc2 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 31 Oct 2021 14:53:40 +0100 Subject: [PATCH] Add more known false positive schema domains (#376) See https://github.com/lycheeverse/lychee-action/issues/53 --- lychee-lib/src/filter/mod.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index fcb2c66..b9d5fb7 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -12,6 +12,9 @@ use crate::Uri; const FALSE_POSITIVE_PAT: &[&str] = &[ r"http://www.w3.org/1999/xhtml", r"http://www.w3.org/2000/svg", + r"https://schemas.microsoft.com", + r"http://schemas.zune.net", + r"http://schemas.openxmlformats.org", ]; #[inline] @@ -20,7 +23,7 @@ const FALSE_POSITIVE_PAT: &[&str] = &[ /// default. This behavior can be explicitly overwritten by defining an /// `Include` pattern, which will match on a false positive pub fn is_false_positive(input: &str) -> bool { - input == FALSE_POSITIVE_PAT[0] + FALSE_POSITIVE_PAT.iter().any(|pat| input.starts_with(pat)) } /// A generic URI filter @@ -247,6 +250,9 @@ mod test { let filter = Filter::default(); assert!(filter.is_excluded(&website("http://www.w3.org/1999/xhtml"))); + assert!(filter.is_excluded(&website( + "http://schemas.openxmlformats.org/markup-compatibility/2006" + ))); assert!(!filter.is_excluded(&website("https://example.org"))); }