diff --git a/README.md b/README.md index c5dba08..7e3df8e 100644 --- a/README.md +++ b/README.md @@ -28,12 +28,12 @@ lychee can... - run with a low memory/CPU footprint - check multiple files at once - support checking links from any website URL +- limit scheme (e.g. only check HTTPS links with "https") SOON: - automatically retry and backoff - check relative and absolute paths -- limit schema - support input files using wildcards - set timeout for HTTP requests in seconds. Disabled by default. - accept custom headers (see https://github.com/rust-lang/crates.io/issues/788) diff --git a/src/checker.rs b/src/checker.rs index 9b57509..66729ee 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -5,15 +5,6 @@ use reqwest::header::{self, HeaderValue}; use serde_json::Value; use url::Url; -/// A link checker using an API token for Github links -/// otherwise a normal HTTP client. -pub(crate) struct Checker { - reqwest_client: reqwest::Client, - gh_client: Github, - excludes: Option, - verbose: bool, -} - #[derive(Debug)] pub enum CheckStatus { OK, @@ -63,6 +54,16 @@ impl From for CheckStatus { } } +/// A link checker using an API token for Github links +/// otherwise a normal HTTP client. +pub(crate) struct Checker { + reqwest_client: reqwest::Client, + gh_client: Github, + excludes: Option, + scheme: Option, + verbose: bool, +} + impl Checker { /// Creates a new link checker pub fn try_new( @@ -71,6 +72,7 @@ impl Checker { max_redirects: usize, user_agent: String, allow_insecure: bool, + scheme: Option, verbose: bool, ) -> Result { let mut headers = header::HeaderMap::new(); @@ -86,11 +88,14 @@ impl Checker { .redirect(reqwest::redirect::Policy::limited(max_redirects)) .build()?; + let scheme = scheme.map(|s| s.to_lowercase()); + let gh_client = Github::new(token).unwrap(); Ok(Checker { reqwest_client, gh_client, excludes, + scheme, verbose, }) } @@ -140,14 +145,22 @@ impl Checker { status } - pub async fn check(&self, url: &Url) -> CheckStatus { - // TODO: Indicate that the URL was skipped in the return value. - // (Perhaps we want to return an enum value here: Status::Skipped) + fn excluded(&self, url: &Url) -> bool { if let Some(excludes) = &self.excludes { if excludes.is_match(url.as_str()) { - return CheckStatus::Excluded; + return true; } } + if Some(url.scheme().to_string()) != self.scheme { + return true; + } + false + } + + pub async fn check(&self, url: &Url) -> CheckStatus { + if self.excluded(&url) { + return CheckStatus::Excluded; + } let ret = self.check_real(&url).await; match &ret { @@ -192,6 +205,7 @@ mod test { 5, "curl/7.71.1".to_string(), allow_insecure, + None, false, ) .unwrap(); diff --git a/src/main.rs b/src/main.rs index 2d0a1ca..5acad49 100644 --- a/src/main.rs +++ b/src/main.rs @@ -44,6 +44,9 @@ struct LycheeOptions { )] insecure: bool, + #[options(help = "Only test links with given scheme (e.g. https)")] + scheme: Option, + // Accumulate all exclusions in a vector #[options(help = "Exclude URLs from checking (supports regex)")] exclude: Vec, @@ -114,6 +117,7 @@ async fn run(opts: LycheeOptions) -> Result<()> { opts.max_redirects, opts.user_agent, opts.insecure, + opts.scheme, opts.verbose, )?;