diff --git a/Cargo.lock b/Cargo.lock index d62cbb0..8778e0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1761,6 +1761,7 @@ dependencies = [ "http", "hubcaps", "jwalk", + "lazy_static", "linkify", "log", "once_cell", diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 24f95c9..da180e0 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -47,6 +47,7 @@ cached = "0.26.2" once_cell = "1.9.0" thiserror = "1.0" futures = "0.3.19" +lazy_static = "1.4.0" [dependencies.par-stream] version = "0.7.0" diff --git a/lychee-lib/src/types/uri.rs b/lychee-lib/src/types/uri.rs index 16b3232..fe7e0b6 100644 --- a/lychee-lib/src/types/uri.rs +++ b/lychee-lib/src/types/uri.rs @@ -1,6 +1,7 @@ -use std::{convert::TryFrom, fmt::Display, net::IpAddr}; +use std::{collections::HashSet, convert::TryFrom, fmt::Display, net::IpAddr}; use fast_chemail::parse_email; +use lazy_static::lazy_static; use serde::{Deserialize, Serialize}; use url::Url; @@ -8,6 +9,16 @@ use crate::{ErrorKind, Result}; use super::raw_uri::RawUri; +lazy_static! { + static ref GITHUB_EXCLUDED_ORGS: HashSet<&'static str> = { + let mut m = HashSet::new(); + m.insert("sponsors"); + m.insert("marketplace"); + m.insert("features"); + m + }; +} + /// Lychee's own representation of a URI, which encapsulates all supported /// formats. /// @@ -75,9 +86,12 @@ impl Uri { self.domain()?, "github.com" | "www.github.com" | "raw.githubusercontent.com" ) { - let mut path = self.path_segments()?; - let owner = path.next()?; - let repo = path.next()?; + let mut segments = self.path_segments()?; + let owner = segments.next()?; + if GITHUB_EXCLUDED_ORGS.contains(owner) { + return None; + } + let repo = segments.next()?; return Some((owner, repo)); } @@ -243,6 +257,21 @@ mod test { Some(("lycheeverse", "lychee")) ); + // Check known false positives + assert!(website("https://github.com/sponsors/analysis-tools-dev ") + .extract_github() + .is_none()); + + assert!( + website("https://github.com/marketplace/actions/lychee-broken-link-checker") + .extract_github() + .is_none() + ); + + assert!(website("https://github.com/features/actions") + .extract_github() + .is_none()); + assert!( website("https://pkg.go.dev/github.com/Debian/pkg-go-tools/cmd/pgt-gopath") .extract_github()