From 80b8a856accdacffe43b48e29e250b6abab7b187 Mon Sep 17 00:00:00 2001
From: Lucius Hu <1222865+lebensterben@users.noreply.github.com>
Date: Fri, 3 Sep 2021 21:21:54 -0400
Subject: [PATCH] Add new flag `--require-https` (#195)
---
README.md | 1 +
fixtures/TEST_HTTP.html | 1 +
lychee-bin/src/main.rs | 1 +
lychee-bin/src/options.rs | 6 ++++++
lychee-bin/tests/cli.rs | 12 ++++++++++++
lychee-lib/src/client.rs | 34 +++++++++++++++++++++++++++++++++-
lychee-lib/src/types/error.rs | 20 ++++++++++++++------
7 files changed, 68 insertions(+), 7 deletions(-)
create mode 100644 fixtures/TEST_HTTP.html
diff --git a/README.md b/README.md
index 3931258..4d7be79 100644
--- a/README.md
+++ b/README.md
@@ -187,6 +187,7 @@ FLAGS:
-i, --insecure Proceed for server connections considered insecure (invalid TLS)
-n, --no-progress Do not show progress bar.
This is recommended for non-interactive shells (e.g. for continuous integration)
+ --require-https When HTTPS is available, treat HTTP links as errors
--skip-missing Skip missing input files (default is to error if they don't exist)
-V, --version Prints version information
-v, --verbose Verbose program output
diff --git a/fixtures/TEST_HTTP.html b/fixtures/TEST_HTTP.html
new file mode 100644
index 0000000..88f7f6f
--- /dev/null
+++ b/fixtures/TEST_HTTP.html
@@ -0,0 +1 @@
+Insecure HTTP link
\ No newline at end of file
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index f275e42..3f64f8d 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -195,6 +195,7 @@ async fn run(cfg: &Config, inputs: Vec) -> Result {
.github_token(cfg.github_token.clone())
.schemes(HashSet::from_iter(cfg.scheme.clone()))
.accepted(accepted)
+ .require_https(cfg.require_https)
.build()
.client()
.map_err(|e| anyhow!(e))?;
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index 2a7c227..be67e0b 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -251,6 +251,11 @@ pub(crate) struct Config {
#[structopt(short, long, default_value = "string")]
#[serde(default)]
pub(crate) format: Format,
+
+ /// When HTTPS is available, treat HTTP links as errors
+ #[structopt(long)]
+ #[serde(default)]
+ pub(crate) require_https: bool,
}
impl Config {
@@ -306,6 +311,7 @@ impl Config {
skip_missing: false;
glob_ignore_case: false;
output: None;
+ require_https: false;
}
}
}
diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs
index 4a6e036..b42f988 100644
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@@ -436,4 +436,16 @@ mod cli {
Ok(())
}
+
+ #[test]
+ fn test_require_https() -> Result<()> {
+ let mut cmd = main_command();
+ let test_path = fixtures_path().join("TEST_HTTP.html");
+ cmd.arg(&test_path).assert().success();
+
+ let mut cmd = main_command();
+ cmd.arg("--require-https").arg(test_path).assert().failure();
+
+ Ok(())
+ }
}
diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs
index 0234b14..c1175ba 100644
--- a/lychee-lib/src/client.rs
+++ b/lychee-lib/src/client.rs
@@ -39,6 +39,8 @@ pub struct Client {
method: reqwest::Method,
/// The set of accepted HTTP status codes for valid URIs.
accepted: Option>,
+ /// Require HTTPS URL when it's available.
+ require_https: bool,
/// Override behavior for certain known issues with URIs.
quirks: Quirks,
}
@@ -92,6 +94,8 @@ pub struct ClientBuilder {
accepted: Option>,
/// Response timeout per request
timeout: Option,
+ /// Treat HTTP links as erros when HTTPS is available
+ require_https: bool,
}
impl Default for ClientBuilder {
@@ -159,6 +163,7 @@ impl ClientBuilder {
filter,
method: self.method.clone(),
accepted: self.accepted.clone(),
+ require_https: self.require_https,
quirks,
})
}
@@ -176,7 +181,18 @@ impl Client {
} else if uri.is_mail() {
self.check_mail(&uri).await
} else {
- self.check_website(&uri).await
+ match self.check_website(&uri).await {
+ Status::Ok(code) if self.require_https && uri.scheme() == "http" => {
+ let mut https_uri = uri.clone();
+ https_uri.url.set_scheme("https").unwrap();
+ if self.check_website(&https_uri).await.is_success() {
+ Status::Error(Box::new(ErrorKind::InsecureURL(https_uri)))
+ } else {
+ Status::Ok(code)
+ }
+ }
+ s => s,
+ }
};
Ok(Response::new(uri, status, source))
@@ -365,6 +381,22 @@ mod test {
assert!(res.status().is_success());
}
+ #[tokio::test]
+ async fn test_require_https() {
+ let client = ClientBuilder::builder().build().client().unwrap();
+ let res = client.check("http://example.org").await.unwrap();
+ assert!(res.status().is_success());
+
+ // Same request will fail if HTTPS is required
+ let client = ClientBuilder::builder()
+ .require_https(true)
+ .build()
+ .client()
+ .unwrap();
+ let res = client.check("http://example.org").await.unwrap();
+ assert!(res.status().is_failure());
+ }
+
#[tokio::test]
async fn test_timeout() {
// Note: this checks response timeout, not connect timeout.
diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs
index 575a2fc..0710f5e 100644
--- a/lychee-lib/src/types/error.rs
+++ b/lychee-lib/src/types/error.rs
@@ -10,12 +10,12 @@ use crate::Uri;
#[derive(Debug)]
#[non_exhaustive]
pub enum ErrorKind {
- /// Any form of I/O error occurred while reading from a given path
// TODO: maybe need to be splitted; currently first slot is Some only for reading files
+ /// Any form of I/O error occurred while reading from a given path.
IoError(Option, std::io::Error),
- /// Network error when trying to connect to an endpoint via reqwest
+ /// Network error when trying to connect to an endpoint via reqwest.
ReqwestError(reqwest::Error),
- /// Network error when trying to connect to an endpoint via hubcaps
+ /// Network error when trying to connect to an endpoint via hubcaps.
HubcapsError(hubcaps::Error),
/// The given string can not be parsed into a valid URL or e-mail address
UrlParseError(String, (url::ParseError, Option)),
@@ -27,8 +27,10 @@ pub enum ErrorKind {
InvalidHeader(InvalidHeaderValue),
/// The given UNIX glob pattern is invalid
InvalidGlobPattern(glob::PatternError),
- /// The Github API could not be called because of a missing Github token
+ /// The Github API could not be called because of a missing Github token.
MissingGitHubToken,
+ /// The website is available in HTTPS protocol, but HTTP scheme is used.
+ InsecureURL(Uri),
}
impl PartialEq for ErrorKind {
@@ -38,7 +40,8 @@ impl PartialEq for ErrorKind {
(Self::ReqwestError(e1), Self::ReqwestError(e2)) => e1.to_string() == e2.to_string(),
(Self::HubcapsError(e1), Self::HubcapsError(e2)) => e1.to_string() == e2.to_string(),
(Self::UrlParseError(s1, e1), Self::UrlParseError(s2, e2)) => s1 == s2 && e1 == e2,
- (Self::UnreachableEmailAddress(u1), Self::UnreachableEmailAddress(u2)) => u1 == u2,
+ (Self::UnreachableEmailAddress(u1), Self::UnreachableEmailAddress(u2))
+ | (Self::InsecureURL(u1), Self::InsecureURL(u2)) => u1 == u2,
(Self::InvalidGlobPattern(e1), Self::InvalidGlobPattern(e2)) => {
e1.msg == e2.msg && e1.pos == e2.pos
}
@@ -61,7 +64,7 @@ impl Hash for ErrorKind {
Self::ReqwestError(e) => e.to_string().hash(state),
Self::HubcapsError(e) => e.to_string().hash(state),
Self::UrlParseError(s, e) => (s, e.type_id()).hash(state),
- Self::UnreachableEmailAddress(u) => u.hash(state),
+ Self::UnreachableEmailAddress(u) | Self::InsecureURL(u) => u.hash(state),
Self::InvalidHeader(e) => e.to_string().hash(state),
Self::InvalidGlobPattern(e) => e.to_string().hash(state),
Self::MissingGitHubToken => std::mem::discriminant(self).hash(state),
@@ -98,6 +101,11 @@ impl Display for ErrorKind {
"GitHub token not specified. To check GitHub links reliably, \
use `--github-token` flag / `GITHUB_TOKEN` env var.",
),
+ Self::InsecureURL(uri) => write!(
+ f,
+ "This URL is available in HTTPS protocol, but HTTP is provided, use '{}' instead",
+ uri
+ ),
}
}
}