mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Use CheckStatus enum for more fine-grained control over check results
This commit is contained in:
parent
745e46e5de
commit
1566a99647
4 changed files with 120 additions and 56 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -834,6 +834,7 @@ dependencies = [
|
|||
"futures 0.3.5",
|
||||
"github-rs",
|
||||
"gumdrop",
|
||||
"http 0.1.21",
|
||||
"linkify",
|
||||
"log",
|
||||
"pretty_env_logger",
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ anyhow = "*"
|
|||
futures = "0.3"
|
||||
github-rs = "0.7.0"
|
||||
gumdrop = "*"
|
||||
http = "*"
|
||||
linkify = "*"
|
||||
log = "0.4"
|
||||
pretty_env_logger = "0.4"
|
||||
|
|
|
|||
170
src/checker.rs
170
src/checker.rs
|
|
@ -1,6 +1,5 @@
|
|||
use anyhow::{Context, Result};
|
||||
use github_rs::client::{Executor, Github};
|
||||
use github_rs::StatusCode;
|
||||
use regex::{Regex, RegexSet};
|
||||
use reqwest::header::{self, HeaderValue};
|
||||
use serde_json::Value;
|
||||
|
|
@ -11,13 +10,63 @@ use url::Url;
|
|||
pub(crate) struct Checker {
|
||||
reqwest_client: reqwest::Client,
|
||||
gh_client: Github,
|
||||
excludes: RegexSet,
|
||||
excludes: Option<RegexSet>,
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CheckStatus {
|
||||
NotSent,
|
||||
OK,
|
||||
Redirect,
|
||||
Excluded,
|
||||
Failed(reqwest::StatusCode),
|
||||
// github-rs is using an older version of hyper.
|
||||
// That's why reqwest::StatusCode and github_rs::StatusCode
|
||||
// are incompatible. As a workaround, we add another state for now.
|
||||
FailedGithub(github_rs::StatusCode),
|
||||
ErrorResponse(reqwest::Error),
|
||||
}
|
||||
|
||||
impl CheckStatus {
|
||||
pub fn is_success(&self) -> bool {
|
||||
// Probably there's a better way to match here... ;)
|
||||
match self {
|
||||
CheckStatus::OK => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<reqwest::StatusCode> for CheckStatus {
|
||||
fn from(s: reqwest::StatusCode) -> Self {
|
||||
if s.is_success() {
|
||||
CheckStatus::OK
|
||||
} else if s.is_redirection() {
|
||||
CheckStatus::Redirect
|
||||
} else {
|
||||
warn!("Request with non-ok status code: {:?}", s);
|
||||
CheckStatus::Failed(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<github_rs::StatusCode> for CheckStatus {
|
||||
fn from(s: github_rs::StatusCode) -> Self {
|
||||
if s.is_success() {
|
||||
CheckStatus::OK
|
||||
} else if s.is_redirection() {
|
||||
CheckStatus::Redirect
|
||||
} else {
|
||||
debug!("Request with non-ok status code: {:?}", s);
|
||||
CheckStatus::FailedGithub(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Checker {
|
||||
/// Creates a new link checker
|
||||
pub fn try_new(token: String, excludes: RegexSet, verbose: bool) -> Result<Self> {
|
||||
pub fn try_new(token: String, excludes: Option<RegexSet>, verbose: bool) -> Result<Self> {
|
||||
let mut headers = header::HeaderMap::new();
|
||||
// Faking the user agent is necessary for some websites, unfortunately.
|
||||
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
|
||||
|
|
@ -38,7 +87,7 @@ impl Checker {
|
|||
})
|
||||
}
|
||||
|
||||
fn check_github(&self, owner: String, repo: String) -> bool {
|
||||
fn check_github(&self, owner: String, repo: String) -> CheckStatus {
|
||||
info!("Check Github: {}/{}", owner, repo);
|
||||
let (_headers, status, _json) = self
|
||||
.gh_client
|
||||
|
|
@ -48,25 +97,21 @@ impl Checker {
|
|||
.repo(&repo)
|
||||
.execute::<Value>()
|
||||
.expect("Get failed");
|
||||
status == StatusCode::OK
|
||||
status.into()
|
||||
}
|
||||
|
||||
async fn check_normal(&self, url: &Url) -> bool {
|
||||
async fn check_normal(&self, url: &Url) -> CheckStatus {
|
||||
let res = self.reqwest_client.get(url.as_str()).send().await;
|
||||
if res.is_err() {
|
||||
warn!("Cannot send request: {:?}", res);
|
||||
return false;
|
||||
return CheckStatus::NotSent;
|
||||
}
|
||||
if let Ok(res) = res {
|
||||
if res.status().is_success() {
|
||||
true
|
||||
} else {
|
||||
warn!("Request with non-ok status code: {:?}", res);
|
||||
false
|
||||
match res {
|
||||
Ok(response) => response.status().into(),
|
||||
Err(e) => {
|
||||
warn!("Invalid response: {:?}", e);
|
||||
CheckStatus::ErrorResponse(e)
|
||||
}
|
||||
} else {
|
||||
warn!("Invalid response: {:?}", res);
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -78,34 +123,56 @@ impl Checker {
|
|||
Ok((owner.as_str().into(), repo.as_str().into()))
|
||||
}
|
||||
|
||||
pub async fn check_real(&self, url: &Url) -> bool {
|
||||
if self.check_normal(&url).await {
|
||||
return true;
|
||||
pub async fn check_real(&self, url: &Url) -> CheckStatus {
|
||||
let status = self.check_normal(&url).await;
|
||||
if status.is_success() {
|
||||
return status;
|
||||
}
|
||||
// Pull out the heavy weapons in case of a failed normal request.
|
||||
// This could be a Github URL and we run into the rate limiter.
|
||||
if let Ok((owner, repo)) = self.extract_github(url.as_str()) {
|
||||
return self.check_github(owner, repo);
|
||||
}
|
||||
false
|
||||
status
|
||||
}
|
||||
|
||||
pub async fn check(&self, url: &Url) -> bool {
|
||||
pub async fn check(&self, url: &Url) -> CheckStatus {
|
||||
// TODO: Indicate that the URL was skipped in the return value.
|
||||
// (Perhaps we want to return an enum value here: Status::Skipped)
|
||||
if self.excludes.is_match(url.as_str()) {
|
||||
return true;
|
||||
if let Some(excludes) = &self.excludes {
|
||||
if excludes.is_match(url.as_str()) {
|
||||
return CheckStatus::Excluded;
|
||||
}
|
||||
}
|
||||
|
||||
let ret = self.check_real(&url).await;
|
||||
match ret {
|
||||
true => {
|
||||
match &ret {
|
||||
CheckStatus::OK => {
|
||||
if self.verbose {
|
||||
println!("✅{}", &url);
|
||||
}
|
||||
}
|
||||
false => {
|
||||
println!("❌{}", &url);
|
||||
CheckStatus::Redirect => {
|
||||
if self.verbose {
|
||||
println!("🔀️{}", &url);
|
||||
}
|
||||
}
|
||||
CheckStatus::NotSent => {
|
||||
println!("⚠️{}", &url);
|
||||
}
|
||||
CheckStatus::ErrorResponse(e) => {
|
||||
println!("🚫{} ({})", &url, e);
|
||||
}
|
||||
CheckStatus::Failed(e) => {
|
||||
println!("🚫{} ({})", &url, e);
|
||||
}
|
||||
CheckStatus::FailedGithub(e) => {
|
||||
println!("🚫{} ({})", &url, e);
|
||||
}
|
||||
CheckStatus::Excluded => {
|
||||
if self.verbose {
|
||||
println!("⏩{}", &url);
|
||||
}
|
||||
}
|
||||
};
|
||||
ret
|
||||
|
|
@ -118,9 +185,18 @@ mod test {
|
|||
use std::env;
|
||||
use url::Url;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_nonexistent() {
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap();
|
||||
let res = checker
|
||||
.check(&Url::parse("https://endler.dev/abcd").unwrap())
|
||||
.await;
|
||||
assert!(matches!(res, CheckStatus::Failed(_)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_github() {
|
||||
let checker = Checker::try_new("foo".into(), false).unwrap();
|
||||
let checker = Checker::try_new("foo".into(), None, false).unwrap();
|
||||
assert_eq!(
|
||||
checker
|
||||
.extract_github("https://github.com/mre/idiomatic-rust")
|
||||
|
|
@ -128,46 +204,32 @@ mod test {
|
|||
("mre".into(), "idiomatic-rust".into())
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_github() {
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), false).unwrap();
|
||||
assert_eq!(
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap();
|
||||
assert!(matches!(
|
||||
checker
|
||||
.check(&Url::parse("https://github.com/mre/idiomatic-rust").unwrap())
|
||||
.await,
|
||||
true
|
||||
);
|
||||
CheckStatus::OK
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_github_nonexistent() {
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), false).unwrap();
|
||||
assert_eq!(
|
||||
checker
|
||||
.check(
|
||||
&Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap()
|
||||
)
|
||||
.await,
|
||||
false
|
||||
);
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap();
|
||||
let res = checker
|
||||
.check(&Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap())
|
||||
.await;
|
||||
assert!(matches!(res, CheckStatus::FailedGithub(_)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_non_github() {
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), false).unwrap();
|
||||
let valid = checker
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap();
|
||||
let res = checker
|
||||
.check(&Url::parse("https://endler.dev").unwrap())
|
||||
.await;
|
||||
assert_eq!(valid, true);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_non_github_nonexistent() {
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), false).unwrap();
|
||||
let valid = checker
|
||||
.check(&Url::parse("https://endler.dev/abcd").unwrap())
|
||||
.await;
|
||||
assert_eq!(valid, false);
|
||||
assert!(matches!(res, CheckStatus::OK));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,14 +39,14 @@ async fn main() -> Result<()> {
|
|||
|
||||
let excludes = RegexSet::new(opts.exclude).unwrap();
|
||||
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN")?, excludes, opts.verbose)?;
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN")?, Some(excludes), opts.verbose)?;
|
||||
let md = fs::read_to_string(opts.input.unwrap_or_else(|| "README.md".into()))?;
|
||||
let links = extract_links(&md);
|
||||
|
||||
let futures: Vec<_> = links.iter().map(|l| checker.check(&l)).collect();
|
||||
let results = join_all(futures).await;
|
||||
|
||||
let errorcode = if results.iter().all(|r| r == &true) {
|
||||
let errorcode = if results.iter().all(|r| r.is_success()) {
|
||||
0
|
||||
} else {
|
||||
1
|
||||
|
|
|
|||
Loading…
Reference in a new issue