Add support for scheme (e.g. HTTPS)

This commit is contained in:
Matthias Endler 2020-08-14 01:54:05 +02:00
parent 09664a5c25
commit e758056f60
3 changed files with 32 additions and 14 deletions

View file

@ -28,12 +28,12 @@ lychee can...
- run with a low memory/CPU footprint
- check multiple files at once
- support checking links from any website URL
- limit scheme (e.g. only check HTTPS links with "https")
SOON:
- automatically retry and backoff
- check relative and absolute paths
- limit schema
- support input files using wildcards
- set timeout for HTTP requests in seconds. Disabled by default.
- accept custom headers (see https://github.com/rust-lang/crates.io/issues/788)

View file

@ -5,15 +5,6 @@ use reqwest::header::{self, HeaderValue};
use serde_json::Value;
use url::Url;
/// A link checker using an API token for Github links
/// otherwise a normal HTTP client.
pub(crate) struct Checker {
reqwest_client: reqwest::Client,
gh_client: Github,
excludes: Option<RegexSet>,
verbose: bool,
}
#[derive(Debug)]
pub enum CheckStatus {
OK,
@ -63,6 +54,16 @@ impl From<github_rs::StatusCode> for CheckStatus {
}
}
/// A link checker using an API token for Github links
/// otherwise a normal HTTP client.
pub(crate) struct Checker {
reqwest_client: reqwest::Client,
gh_client: Github,
excludes: Option<RegexSet>,
scheme: Option<String>,
verbose: bool,
}
impl Checker {
/// Creates a new link checker
pub fn try_new(
@ -71,6 +72,7 @@ impl Checker {
max_redirects: usize,
user_agent: String,
allow_insecure: bool,
scheme: Option<String>,
verbose: bool,
) -> Result<Self> {
let mut headers = header::HeaderMap::new();
@ -86,11 +88,14 @@ impl Checker {
.redirect(reqwest::redirect::Policy::limited(max_redirects))
.build()?;
let scheme = scheme.map(|s| s.to_lowercase());
let gh_client = Github::new(token).unwrap();
Ok(Checker {
reqwest_client,
gh_client,
excludes,
scheme,
verbose,
})
}
@ -140,14 +145,22 @@ impl Checker {
status
}
pub async fn check(&self, url: &Url) -> CheckStatus {
// TODO: Indicate that the URL was skipped in the return value.
// (Perhaps we want to return an enum value here: Status::Skipped)
fn excluded(&self, url: &Url) -> bool {
if let Some(excludes) = &self.excludes {
if excludes.is_match(url.as_str()) {
return CheckStatus::Excluded;
return true;
}
}
if Some(url.scheme().to_string()) != self.scheme {
return true;
}
false
}
pub async fn check(&self, url: &Url) -> CheckStatus {
if self.excluded(&url) {
return CheckStatus::Excluded;
}
let ret = self.check_real(&url).await;
match &ret {
@ -192,6 +205,7 @@ mod test {
5,
"curl/7.71.1".to_string(),
allow_insecure,
None,
false,
)
.unwrap();

View file

@ -44,6 +44,9 @@ struct LycheeOptions {
)]
insecure: bool,
#[options(help = "Only test links with given scheme (e.g. https)")]
scheme: Option<String>,
// Accumulate all exclusions in a vector
#[options(help = "Exclude URLs from checking (supports regex)")]
exclude: Vec<String>,
@ -114,6 +117,7 @@ async fn run(opts: LycheeOptions) -> Result<()> {
opts.max_redirects,
opts.user_agent,
opts.insecure,
opts.scheme,
opts.verbose,
)?;