From 80b8a856accdacffe43b48e29e250b6abab7b187 Mon Sep 17 00:00:00 2001 From: Lucius Hu <1222865+lebensterben@users.noreply.github.com> Date: Fri, 3 Sep 2021 21:21:54 -0400 Subject: [PATCH] Add new flag `--require-https` (#195) --- README.md | 1 + fixtures/TEST_HTTP.html | 1 + lychee-bin/src/main.rs | 1 + lychee-bin/src/options.rs | 6 ++++++ lychee-bin/tests/cli.rs | 12 ++++++++++++ lychee-lib/src/client.rs | 34 +++++++++++++++++++++++++++++++++- lychee-lib/src/types/error.rs | 20 ++++++++++++++------ 7 files changed, 68 insertions(+), 7 deletions(-) create mode 100644 fixtures/TEST_HTTP.html diff --git a/README.md b/README.md index 3931258..4d7be79 100644 --- a/README.md +++ b/README.md @@ -187,6 +187,7 @@ FLAGS: -i, --insecure Proceed for server connections considered insecure (invalid TLS) -n, --no-progress Do not show progress bar. This is recommended for non-interactive shells (e.g. for continuous integration) + --require-https When HTTPS is available, treat HTTP links as errors --skip-missing Skip missing input files (default is to error if they don't exist) -V, --version Prints version information -v, --verbose Verbose program output diff --git a/fixtures/TEST_HTTP.html b/fixtures/TEST_HTTP.html new file mode 100644 index 0000000..88f7f6f --- /dev/null +++ b/fixtures/TEST_HTTP.html @@ -0,0 +1 @@ +Insecure HTTP link \ No newline at end of file diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index f275e42..3f64f8d 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -195,6 +195,7 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { .github_token(cfg.github_token.clone()) .schemes(HashSet::from_iter(cfg.scheme.clone())) .accepted(accepted) + .require_https(cfg.require_https) .build() .client() .map_err(|e| anyhow!(e))?; diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 2a7c227..be67e0b 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -251,6 +251,11 @@ pub(crate) struct Config { #[structopt(short, long, default_value = "string")] #[serde(default)] pub(crate) format: Format, + + /// When HTTPS is available, treat HTTP links as errors + #[structopt(long)] + #[serde(default)] + pub(crate) require_https: bool, } impl Config { @@ -306,6 +311,7 @@ impl Config { skip_missing: false; glob_ignore_case: false; output: None; + require_https: false; } } } diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 4a6e036..b42f988 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -436,4 +436,16 @@ mod cli { Ok(()) } + + #[test] + fn test_require_https() -> Result<()> { + let mut cmd = main_command(); + let test_path = fixtures_path().join("TEST_HTTP.html"); + cmd.arg(&test_path).assert().success(); + + let mut cmd = main_command(); + cmd.arg("--require-https").arg(test_path).assert().failure(); + + Ok(()) + } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 0234b14..c1175ba 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -39,6 +39,8 @@ pub struct Client { method: reqwest::Method, /// The set of accepted HTTP status codes for valid URIs. accepted: Option>, + /// Require HTTPS URL when it's available. + require_https: bool, /// Override behavior for certain known issues with URIs. quirks: Quirks, } @@ -92,6 +94,8 @@ pub struct ClientBuilder { accepted: Option>, /// Response timeout per request timeout: Option, + /// Treat HTTP links as erros when HTTPS is available + require_https: bool, } impl Default for ClientBuilder { @@ -159,6 +163,7 @@ impl ClientBuilder { filter, method: self.method.clone(), accepted: self.accepted.clone(), + require_https: self.require_https, quirks, }) } @@ -176,7 +181,18 @@ impl Client { } else if uri.is_mail() { self.check_mail(&uri).await } else { - self.check_website(&uri).await + match self.check_website(&uri).await { + Status::Ok(code) if self.require_https && uri.scheme() == "http" => { + let mut https_uri = uri.clone(); + https_uri.url.set_scheme("https").unwrap(); + if self.check_website(&https_uri).await.is_success() { + Status::Error(Box::new(ErrorKind::InsecureURL(https_uri))) + } else { + Status::Ok(code) + } + } + s => s, + } }; Ok(Response::new(uri, status, source)) @@ -365,6 +381,22 @@ mod test { assert!(res.status().is_success()); } + #[tokio::test] + async fn test_require_https() { + let client = ClientBuilder::builder().build().client().unwrap(); + let res = client.check("http://example.org").await.unwrap(); + assert!(res.status().is_success()); + + // Same request will fail if HTTPS is required + let client = ClientBuilder::builder() + .require_https(true) + .build() + .client() + .unwrap(); + let res = client.check("http://example.org").await.unwrap(); + assert!(res.status().is_failure()); + } + #[tokio::test] async fn test_timeout() { // Note: this checks response timeout, not connect timeout. diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 575a2fc..0710f5e 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -10,12 +10,12 @@ use crate::Uri; #[derive(Debug)] #[non_exhaustive] pub enum ErrorKind { - /// Any form of I/O error occurred while reading from a given path // TODO: maybe need to be splitted; currently first slot is Some only for reading files + /// Any form of I/O error occurred while reading from a given path. IoError(Option, std::io::Error), - /// Network error when trying to connect to an endpoint via reqwest + /// Network error when trying to connect to an endpoint via reqwest. ReqwestError(reqwest::Error), - /// Network error when trying to connect to an endpoint via hubcaps + /// Network error when trying to connect to an endpoint via hubcaps. HubcapsError(hubcaps::Error), /// The given string can not be parsed into a valid URL or e-mail address UrlParseError(String, (url::ParseError, Option)), @@ -27,8 +27,10 @@ pub enum ErrorKind { InvalidHeader(InvalidHeaderValue), /// The given UNIX glob pattern is invalid InvalidGlobPattern(glob::PatternError), - /// The Github API could not be called because of a missing Github token + /// The Github API could not be called because of a missing Github token. MissingGitHubToken, + /// The website is available in HTTPS protocol, but HTTP scheme is used. + InsecureURL(Uri), } impl PartialEq for ErrorKind { @@ -38,7 +40,8 @@ impl PartialEq for ErrorKind { (Self::ReqwestError(e1), Self::ReqwestError(e2)) => e1.to_string() == e2.to_string(), (Self::HubcapsError(e1), Self::HubcapsError(e2)) => e1.to_string() == e2.to_string(), (Self::UrlParseError(s1, e1), Self::UrlParseError(s2, e2)) => s1 == s2 && e1 == e2, - (Self::UnreachableEmailAddress(u1), Self::UnreachableEmailAddress(u2)) => u1 == u2, + (Self::UnreachableEmailAddress(u1), Self::UnreachableEmailAddress(u2)) + | (Self::InsecureURL(u1), Self::InsecureURL(u2)) => u1 == u2, (Self::InvalidGlobPattern(e1), Self::InvalidGlobPattern(e2)) => { e1.msg == e2.msg && e1.pos == e2.pos } @@ -61,7 +64,7 @@ impl Hash for ErrorKind { Self::ReqwestError(e) => e.to_string().hash(state), Self::HubcapsError(e) => e.to_string().hash(state), Self::UrlParseError(s, e) => (s, e.type_id()).hash(state), - Self::UnreachableEmailAddress(u) => u.hash(state), + Self::UnreachableEmailAddress(u) | Self::InsecureURL(u) => u.hash(state), Self::InvalidHeader(e) => e.to_string().hash(state), Self::InvalidGlobPattern(e) => e.to_string().hash(state), Self::MissingGitHubToken => std::mem::discriminant(self).hash(state), @@ -98,6 +101,11 @@ impl Display for ErrorKind { "GitHub token not specified. To check GitHub links reliably, \ use `--github-token` flag / `GITHUB_TOKEN` env var.", ), + Self::InsecureURL(uri) => write!( + f, + "This URL is available in HTTPS protocol, but HTTP is provided, use '{}' instead", + uri + ), } } }