Be more permissive around private GH repos

The Github API doesn't handle checking individual files inside repos or
paths like `github.com/org/repo/issues`, so we are more
permissive and only check for repo existence. This is the
only way to get a basic check for private repos. Public repos are not affected and should work
with a normal check.
This commit is contained in:
Matthias 2022-01-11 00:40:22 +01:00
parent e91c0c60f0
commit 8d445a3a4b
2 changed files with 106 additions and 41 deletions

View file

@ -260,26 +260,41 @@ impl Client {
wait *= 2;
status = self.check_default(uri).await;
}
// Pull out the heavy weapons in case of a failed normal request.
// Pull out the heavy machinery in case of a failed normal request.
// This could be a Github URL and we run into the rate limiter.
if let Some((owner, repo)) = uri.gh_org_and_repo() {
return self.check_github(owner, repo).await;
if let Some(github_uri) = uri.gh_org_and_repo() {
if let Some(client) = &self.github_client {
match client.repo(github_uri.owner, github_uri.repo).get().await {
Ok(repo) => {
if repo.private {
// Simplify checking private repos by
// assuming the path exists if the main repo exists
// E.g. `github.com/org/private/issues` would exist
// because `github.com/org/private` exists
return Status::Ok(StatusCode::OK);
} else {
if github_uri.endpoint.is_some() {
// The URI returned a non-200 status code in a
// normal request and now we find that the repo
// exists, so the full URI (including the
// additional endpoint) must be invalid.
return Status::Ok(StatusCode::NOT_FOUND);
}
}
}
Err(e) => {
return e.into();
}
}
} else {
return ErrorKind::MissingGitHubToken.into();
}
}
status
}
async fn check_github(&self, owner: &str, repo: &str) -> Status {
match &self.github_client {
Some(github) => github
.repo(owner, repo)
.get()
.await
.map_or_else(std::convert::Into::into, |_| Status::Ok(StatusCode::OK)),
None => ErrorKind::MissingGitHubToken.into(),
}
}
async fn check_default(&self, uri: &Uri) -> Status {
let request = match self
.reqwest_client

View file

@ -10,7 +10,7 @@ use crate::{ErrorKind, Result};
use super::raw_uri::RawUri;
lazy_static! {
static ref GITHUB_EXCLUDED_ENDPOINTS: HashSet<&'static str> = HashSet::from_iter([
static ref GITHUB_API_EXCLUDED_ENDPOINTS: HashSet<&'static str> = HashSet::from_iter([
"sponsors",
"marketplace",
"features",
@ -25,6 +25,36 @@ lazy_static! {
]);
}
/// Uri path segments extracted from a Github URL
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
pub(crate) struct GithubUri {
/// Organization name
pub(crate) owner: String,
/// Repository name
pub(crate) repo: String,
/// e.g. `issues` in `/org/repo/issues`
pub(crate) endpoint: Option<String>,
}
impl GithubUri {
/// Create a new Github URI without an endpoint
fn new<T: Into<String>>(owner: T, repo: T) -> Self {
GithubUri {
owner: owner.into(),
repo: repo.into(),
endpoint: None,
}
}
fn with_endpoint<T: Into<String>>(owner: T, repo: T, endpoint: T) -> Self {
GithubUri {
owner: owner.into(),
repo: repo.into(),
endpoint: Some(endpoint.into()),
}
}
}
/// Lychee's own representation of a URI, which encapsulates all supported
/// formats.
///
@ -84,7 +114,7 @@ impl Uri {
}
// TODO: Support GitLab etc.
pub(crate) fn gh_org_and_repo(&self) -> Option<(&str, &str)> {
pub(crate) fn gh_org_and_repo(&self) -> Option<GithubUri> {
fn remove_suffix<'a>(input: &'a str, suffix: &str) -> &'a str {
if let Some(stripped) = input.strip_suffix(suffix) {
return stripped;
@ -99,17 +129,19 @@ impl Uri {
"github.com" | "www.github.com" | "raw.githubusercontent.com"
) {
let parts: Vec<_> = self.path_segments()?.collect();
if parts.len() != 2 {
// Accept additional _empty_ path segment after last slash
if !(parts.len() == 3 && parts[2].is_empty()) {
// Not a valid org/repo pair.
// Skip this as the API doesn't handle files etc.
return None;
}
if parts.len() < 2 {
// Not a valid org/repo pair.
// Note: We don't check for exactly 2 here, because the Github
// API doesn't handle checking individual files inside repos or
// paths like `github.com/org/repo/issues`, so we are more
// permissive and only check for repo existence. This is the
// only way to get a basic check for private repos. Public repos
// are not affected and should work with a normal check.
return None;
}
let owner = parts[0];
if GITHUB_EXCLUDED_ENDPOINTS.contains(owner) {
if GITHUB_API_EXCLUDED_ENDPOINTS.contains(owner) {
return None;
}
@ -118,7 +150,17 @@ impl Uri {
// the suffix. See https://github.com/lycheeverse/lychee/issues/384
let repo = remove_suffix(repo, ".git");
return Some((owner, repo));
let endpoint = if parts.len() > 2 && !parts[2].is_empty() {
Some(parts[2..].join("/"))
} else {
None
};
return Some(GithubUri {
owner: owner.to_string(),
repo: repo.to_string(),
endpoint,
});
}
None
@ -214,7 +256,10 @@ mod test {
use pretty_assertions::assert_eq;
use super::Uri;
use crate::test_utils::{mail, website};
use crate::{
test_utils::{mail, website},
types::uri::GithubUri,
};
#[test]
fn test_uri_from_str() {
@ -270,42 +315,47 @@ mod test {
fn test_github() {
assert_eq!(
website("http://github.com/lycheeverse/lychee").gh_org_and_repo(),
Some(("lycheeverse", "lychee"))
Some(GithubUri::new("lycheeverse", "lychee"))
);
assert_eq!(
website("http://www.github.com/lycheeverse/lychee").gh_org_and_repo(),
Some(("lycheeverse", "lychee"))
Some(GithubUri::new("lycheeverse", "lychee"))
);
assert_eq!(
website("https://github.com/lycheeverse/lychee").gh_org_and_repo(),
Some(("lycheeverse", "lychee"))
Some(GithubUri::new("lycheeverse", "lychee"))
);
assert_eq!(
website("https://github.com/lycheeverse/lychee/").gh_org_and_repo(),
Some(("lycheeverse", "lychee"))
Some(GithubUri::new("lycheeverse", "lychee"))
);
assert_eq!(
website("https://github.com/Microsoft/python-language-server.git").gh_org_and_repo(),
Some(("Microsoft", "python-language-server"))
Some(GithubUri::new("Microsoft", "python-language-server"))
);
assert_eq!(
website("https://github.com/lycheeverse/lychee/foo/bar").gh_org_and_repo(),
Some(GithubUri::with_endpoint("lycheeverse", "lychee", "foo/bar"))
);
assert_eq!(
website("https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md")
.gh_org_and_repo(),
Some(GithubUri::with_endpoint(
"lycheeverse",
"lychee",
"blob/master/NON_EXISTENT_FILE.md"
))
);
}
#[test]
fn test_github_false_positives() {
assert!(website("https://github.com/lycheeverse/lychee/foo/bar")
.gh_org_and_repo()
.is_none());
assert!(
website("https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md")
.gh_org_and_repo()
.is_none()
);
assert!(website("https://github.com/sponsors/analysis-tools-dev ")
.gh_org_and_repo()
.is_none());