From 156f2b03c2abb6265298e268d3e9d8476a26d024 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Wed, 12 Aug 2020 12:59:15 +0200 Subject: [PATCH] Make redirects configurable --- README.md | 2 +- src/checker.rs | 18 ++++++++++++------ src/main.rs | 10 +++++++++- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 37e51e0..5a10c39 100644 --- a/README.md +++ b/README.md @@ -22,9 +22,9 @@ lychee can... - fake user agents (required for some firewalls) - skip non-links like anchors or relative URLs - exclude some websites with regular expressions +- handle a configurable number of redirects - SOON: automatically retry and backoff - SOON: optionally ignore SSL certificate errors -- SOON: optionally handle redirects ## How? diff --git a/src/checker.rs b/src/checker.rs index d0cbf2c..0bfbfef 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -66,7 +66,12 @@ impl From for CheckStatus { impl Checker { /// Creates a new link checker - pub fn try_new(token: String, excludes: Option, verbose: bool) -> Result { + pub fn try_new( + token: String, + excludes: Option, + max_redirects: usize, + verbose: bool, + ) -> Result { let mut headers = header::HeaderMap::new(); // Faking the user agent is necessary for some websites, unfortunately. // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com). @@ -76,6 +81,7 @@ impl Checker { let reqwest_client = reqwest::ClientBuilder::new() .gzip(true) .default_headers(headers) + .redirect(reqwest::redirect::Policy::limited(max_redirects)) .build()?; let gh_client = Github::new(token).unwrap(); @@ -187,7 +193,7 @@ mod test { #[tokio::test] async fn test_nonexistent() { - let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap(); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap(); let res = checker .check(&Url::parse("https://endler.dev/abcd").unwrap()) .await; @@ -196,7 +202,7 @@ mod test { #[test] fn test_is_github() { - let checker = Checker::try_new("foo".into(), None, false).unwrap(); + let checker = Checker::try_new("foo".into(), None, 5, false).unwrap(); assert_eq!( checker .extract_github("https://github.com/mre/idiomatic-rust") @@ -206,7 +212,7 @@ mod test { } #[tokio::test] async fn test_github() { - let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap(); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap(); assert!(matches!( checker .check(&Url::parse("https://github.com/mre/idiomatic-rust").unwrap()) @@ -217,7 +223,7 @@ mod test { #[tokio::test] async fn test_github_nonexistent() { - let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap(); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap(); let res = checker .check(&Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap()) .await; @@ -226,7 +232,7 @@ mod test { #[tokio::test] async fn test_non_github() { - let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, false).unwrap(); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap(); let res = checker .check(&Url::parse("https://endler.dev").unwrap()) .await; diff --git a/src/main.rs b/src/main.rs index f4ab1ec..fb2189b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -26,6 +26,9 @@ struct LycheeOptions { #[options(help = "Verbose program output")] verbose: bool, + #[options(help = "Maximum number of allowed redirects", default = "10")] + max_redirects: usize, + // Accumulate all exclusions in a vector #[options(help = "Exclude URLs from checking (supports regex)")] exclude: Vec, @@ -39,7 +42,12 @@ async fn main() -> Result<()> { let excludes = RegexSet::new(opts.exclude).unwrap(); - let checker = Checker::try_new(env::var("GITHUB_TOKEN")?, Some(excludes), opts.verbose)?; + let checker = Checker::try_new( + env::var("GITHUB_TOKEN")?, + Some(excludes), + opts.max_redirects, + opts.verbose, + )?; let md = fs::read_to_string(opts.input.unwrap_or_else(|| "README.md".into()))?; let links = extract_links(&md);