Exclude known false-positives from Github API check (#445)

Fixes https://github.com/lycheeverse/lychee/issues/431
This commit is contained in:
Matthias 2022-01-06 00:33:53 +01:00 committed by GitHub
parent d9af0817e5
commit 388bbbe7b0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 35 additions and 4 deletions

1
Cargo.lock generated
View file

@ -1761,6 +1761,7 @@ dependencies = [
"http",
"hubcaps",
"jwalk",
"lazy_static",
"linkify",
"log",
"once_cell",

View file

@ -47,6 +47,7 @@ cached = "0.26.2"
once_cell = "1.9.0"
thiserror = "1.0"
futures = "0.3.19"
lazy_static = "1.4.0"
[dependencies.par-stream]
version = "0.7.0"

View file

@ -1,6 +1,7 @@
use std::{convert::TryFrom, fmt::Display, net::IpAddr};
use std::{collections::HashSet, convert::TryFrom, fmt::Display, net::IpAddr};
use fast_chemail::parse_email;
use lazy_static::lazy_static;
use serde::{Deserialize, Serialize};
use url::Url;
@ -8,6 +9,16 @@ use crate::{ErrorKind, Result};
use super::raw_uri::RawUri;
lazy_static! {
static ref GITHUB_EXCLUDED_ORGS: HashSet<&'static str> = {
let mut m = HashSet::new();
m.insert("sponsors");
m.insert("marketplace");
m.insert("features");
m
};
}
/// Lychee's own representation of a URI, which encapsulates all supported
/// formats.
///
@ -75,9 +86,12 @@ impl Uri {
self.domain()?,
"github.com" | "www.github.com" | "raw.githubusercontent.com"
) {
let mut path = self.path_segments()?;
let owner = path.next()?;
let repo = path.next()?;
let mut segments = self.path_segments()?;
let owner = segments.next()?;
if GITHUB_EXCLUDED_ORGS.contains(owner) {
return None;
}
let repo = segments.next()?;
return Some((owner, repo));
}
@ -243,6 +257,21 @@ mod test {
Some(("lycheeverse", "lychee"))
);
// Check known false positives
assert!(website("https://github.com/sponsors/analysis-tools-dev ")
.extract_github()
.is_none());
assert!(
website("https://github.com/marketplace/actions/lychee-broken-link-checker")
.extract_github()
.is_none()
);
assert!(website("https://github.com/features/actions")
.extract_github()
.is_none());
assert!(
website("https://pkg.go.dev/github.com/Debian/pkg-go-tools/cmd/pgt-gopath")
.extract_github()