mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-19 20:51:02 +00:00
Be more permissive around private GH repos
The Github API doesn't handle checking individual files inside repos or paths like `github.com/org/repo/issues`, so we are more permissive and only check for repo existence. This is the only way to get a basic check for private repos. Public repos are not affected and should work with a normal check.
This commit is contained in:
parent
e91c0c60f0
commit
8d445a3a4b
2 changed files with 106 additions and 41 deletions
|
|
@ -260,26 +260,41 @@ impl Client {
|
|||
wait *= 2;
|
||||
status = self.check_default(uri).await;
|
||||
}
|
||||
// Pull out the heavy weapons in case of a failed normal request.
|
||||
|
||||
// Pull out the heavy machinery in case of a failed normal request.
|
||||
// This could be a Github URL and we run into the rate limiter.
|
||||
if let Some((owner, repo)) = uri.gh_org_and_repo() {
|
||||
return self.check_github(owner, repo).await;
|
||||
if let Some(github_uri) = uri.gh_org_and_repo() {
|
||||
if let Some(client) = &self.github_client {
|
||||
match client.repo(github_uri.owner, github_uri.repo).get().await {
|
||||
Ok(repo) => {
|
||||
if repo.private {
|
||||
// Simplify checking private repos by
|
||||
// assuming the path exists if the main repo exists
|
||||
// E.g. `github.com/org/private/issues` would exist
|
||||
// because `github.com/org/private` exists
|
||||
return Status::Ok(StatusCode::OK);
|
||||
} else {
|
||||
if github_uri.endpoint.is_some() {
|
||||
// The URI returned a non-200 status code in a
|
||||
// normal request and now we find that the repo
|
||||
// exists, so the full URI (including the
|
||||
// additional endpoint) must be invalid.
|
||||
return Status::Ok(StatusCode::NOT_FOUND);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
return e.into();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return ErrorKind::MissingGitHubToken.into();
|
||||
}
|
||||
}
|
||||
|
||||
status
|
||||
}
|
||||
|
||||
async fn check_github(&self, owner: &str, repo: &str) -> Status {
|
||||
match &self.github_client {
|
||||
Some(github) => github
|
||||
.repo(owner, repo)
|
||||
.get()
|
||||
.await
|
||||
.map_or_else(std::convert::Into::into, |_| Status::Ok(StatusCode::OK)),
|
||||
None => ErrorKind::MissingGitHubToken.into(),
|
||||
}
|
||||
}
|
||||
|
||||
async fn check_default(&self, uri: &Uri) -> Status {
|
||||
let request = match self
|
||||
.reqwest_client
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ use crate::{ErrorKind, Result};
|
|||
use super::raw_uri::RawUri;
|
||||
|
||||
lazy_static! {
|
||||
static ref GITHUB_EXCLUDED_ENDPOINTS: HashSet<&'static str> = HashSet::from_iter([
|
||||
static ref GITHUB_API_EXCLUDED_ENDPOINTS: HashSet<&'static str> = HashSet::from_iter([
|
||||
"sponsors",
|
||||
"marketplace",
|
||||
"features",
|
||||
|
|
@ -25,6 +25,36 @@ lazy_static! {
|
|||
]);
|
||||
}
|
||||
|
||||
/// Uri path segments extracted from a Github URL
|
||||
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug)]
|
||||
pub(crate) struct GithubUri {
|
||||
/// Organization name
|
||||
pub(crate) owner: String,
|
||||
/// Repository name
|
||||
pub(crate) repo: String,
|
||||
/// e.g. `issues` in `/org/repo/issues`
|
||||
pub(crate) endpoint: Option<String>,
|
||||
}
|
||||
|
||||
impl GithubUri {
|
||||
/// Create a new Github URI without an endpoint
|
||||
fn new<T: Into<String>>(owner: T, repo: T) -> Self {
|
||||
GithubUri {
|
||||
owner: owner.into(),
|
||||
repo: repo.into(),
|
||||
endpoint: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn with_endpoint<T: Into<String>>(owner: T, repo: T, endpoint: T) -> Self {
|
||||
GithubUri {
|
||||
owner: owner.into(),
|
||||
repo: repo.into(),
|
||||
endpoint: Some(endpoint.into()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lychee's own representation of a URI, which encapsulates all supported
|
||||
/// formats.
|
||||
///
|
||||
|
|
@ -84,7 +114,7 @@ impl Uri {
|
|||
}
|
||||
|
||||
// TODO: Support GitLab etc.
|
||||
pub(crate) fn gh_org_and_repo(&self) -> Option<(&str, &str)> {
|
||||
pub(crate) fn gh_org_and_repo(&self) -> Option<GithubUri> {
|
||||
fn remove_suffix<'a>(input: &'a str, suffix: &str) -> &'a str {
|
||||
if let Some(stripped) = input.strip_suffix(suffix) {
|
||||
return stripped;
|
||||
|
|
@ -99,17 +129,19 @@ impl Uri {
|
|||
"github.com" | "www.github.com" | "raw.githubusercontent.com"
|
||||
) {
|
||||
let parts: Vec<_> = self.path_segments()?.collect();
|
||||
if parts.len() != 2 {
|
||||
// Accept additional _empty_ path segment after last slash
|
||||
if !(parts.len() == 3 && parts[2].is_empty()) {
|
||||
// Not a valid org/repo pair.
|
||||
// Skip this as the API doesn't handle files etc.
|
||||
return None;
|
||||
}
|
||||
if parts.len() < 2 {
|
||||
// Not a valid org/repo pair.
|
||||
// Note: We don't check for exactly 2 here, because the Github
|
||||
// API doesn't handle checking individual files inside repos or
|
||||
// paths like `github.com/org/repo/issues`, so we are more
|
||||
// permissive and only check for repo existence. This is the
|
||||
// only way to get a basic check for private repos. Public repos
|
||||
// are not affected and should work with a normal check.
|
||||
return None;
|
||||
}
|
||||
|
||||
let owner = parts[0];
|
||||
if GITHUB_EXCLUDED_ENDPOINTS.contains(owner) {
|
||||
if GITHUB_API_EXCLUDED_ENDPOINTS.contains(owner) {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
|
@ -118,7 +150,17 @@ impl Uri {
|
|||
// the suffix. See https://github.com/lycheeverse/lychee/issues/384
|
||||
let repo = remove_suffix(repo, ".git");
|
||||
|
||||
return Some((owner, repo));
|
||||
let endpoint = if parts.len() > 2 && !parts[2].is_empty() {
|
||||
Some(parts[2..].join("/"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
return Some(GithubUri {
|
||||
owner: owner.to_string(),
|
||||
repo: repo.to_string(),
|
||||
endpoint,
|
||||
});
|
||||
}
|
||||
|
||||
None
|
||||
|
|
@ -214,7 +256,10 @@ mod test {
|
|||
use pretty_assertions::assert_eq;
|
||||
|
||||
use super::Uri;
|
||||
use crate::test_utils::{mail, website};
|
||||
use crate::{
|
||||
test_utils::{mail, website},
|
||||
types::uri::GithubUri,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_uri_from_str() {
|
||||
|
|
@ -270,42 +315,47 @@ mod test {
|
|||
fn test_github() {
|
||||
assert_eq!(
|
||||
website("http://github.com/lycheeverse/lychee").gh_org_and_repo(),
|
||||
Some(("lycheeverse", "lychee"))
|
||||
Some(GithubUri::new("lycheeverse", "lychee"))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
website("http://www.github.com/lycheeverse/lychee").gh_org_and_repo(),
|
||||
Some(("lycheeverse", "lychee"))
|
||||
Some(GithubUri::new("lycheeverse", "lychee"))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
website("https://github.com/lycheeverse/lychee").gh_org_and_repo(),
|
||||
Some(("lycheeverse", "lychee"))
|
||||
Some(GithubUri::new("lycheeverse", "lychee"))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
website("https://github.com/lycheeverse/lychee/").gh_org_and_repo(),
|
||||
Some(("lycheeverse", "lychee"))
|
||||
Some(GithubUri::new("lycheeverse", "lychee"))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
website("https://github.com/Microsoft/python-language-server.git").gh_org_and_repo(),
|
||||
Some(("Microsoft", "python-language-server"))
|
||||
Some(GithubUri::new("Microsoft", "python-language-server"))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
website("https://github.com/lycheeverse/lychee/foo/bar").gh_org_and_repo(),
|
||||
Some(GithubUri::with_endpoint("lycheeverse", "lychee", "foo/bar"))
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
website("https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md")
|
||||
.gh_org_and_repo(),
|
||||
Some(GithubUri::with_endpoint(
|
||||
"lycheeverse",
|
||||
"lychee",
|
||||
"blob/master/NON_EXISTENT_FILE.md"
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_github_false_positives() {
|
||||
assert!(website("https://github.com/lycheeverse/lychee/foo/bar")
|
||||
.gh_org_and_repo()
|
||||
.is_none());
|
||||
|
||||
assert!(
|
||||
website("https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md")
|
||||
.gh_org_and_repo()
|
||||
.is_none()
|
||||
);
|
||||
|
||||
assert!(website("https://github.com/sponsors/analysis-tools-dev ")
|
||||
.gh_org_and_repo()
|
||||
.is_none());
|
||||
|
|
|
|||
Loading…
Reference in a new issue