From b479a5810e467b9cdb7c42f8d80ce6d02245da83 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 28 Nov 2022 12:23:07 +0100 Subject: [PATCH] Allow overriding accepted status codes for cached URIs (#843) Fixes #840 --- Makefile | 4 ++ lychee-bin/src/commands/check.rs | 17 ++++++-- lychee-bin/src/main.rs | 2 +- lychee-bin/src/parse.rs | 5 +++ lychee-bin/tests/cli.rs | 75 ++++++++++++++++++++++++++++++++ lychee-lib/src/types/cache.rs | 4 +- lychee-lib/src/types/error.rs | 4 ++ lychee-lib/src/types/status.rs | 39 ++++++++++++++--- 8 files changed, 139 insertions(+), 11 deletions(-) diff --git a/Makefile b/Makefile index 4eb7526..bb60d9d 100644 --- a/Makefile +++ b/Makefile @@ -38,6 +38,10 @@ docs: ## Generate and show documentation lint: ## Run linter cargo clippy --all-targets --all-features -- -D warnings +.PHONY: test +test: ## Run tests + cargo test --all-targets --all-features + .PHONY: screencast screencast: ## Create a screencast for the docs svg-term --command 'assets/screencast.sh' --out 'assets/screencast.svg' --width 100 --padding 10 --window \ No newline at end of file diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 18a2bca..35aa6dc 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -1,3 +1,4 @@ +use std::collections::HashSet; use std::io::{self, Write}; use std::sync::Arc; use std::time::Duration; @@ -30,6 +31,7 @@ where let client = params.client; let cache = params.cache; + let accept = params.cfg.accept; // Start receiving requests tokio::spawn(async move { futures::StreamExt::for_each_concurrent( @@ -37,7 +39,7 @@ where max_concurrency, |request: Result| async { let request = request.expect("cannot read request"); - let response = handle(&client, cache.clone(), request).await; + let response = handle(&client, cache.clone(), request, accept.clone()).await; send_resp .send(response) @@ -112,7 +114,12 @@ where } /// Handle a single request -async fn handle(client: &Client, cache: Arc, request: Request) -> Response { +async fn handle( + client: &Client, + cache: Arc, + request: Request, + accept: Option>, +) -> Response { let uri = request.uri.clone(); if let Some(v) = cache.get(&uri) { // Found a cached request @@ -121,7 +128,11 @@ async fn handle(client: &Client, cache: Arc, request: Request) -> Respons let status = if client.is_excluded(&uri) { Status::Excluded } else { - Status::from(v.value().status) + // Can't impl `Status::from(v.value().status)` here because the + // `accepted` status codes might have changed from the previous run + // and they may have an impact on the interpretation of the status + // code. + Status::from_cache_status(v.value().status, accept) }; return Response::new(uri.clone(), status, request.source); } diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 4bdbf73..6cb49d4 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -178,7 +178,7 @@ fn load_cache(cfg: &Config) -> Option { let elapsed = modified.elapsed().ok()?; if elapsed > cfg.max_cache_age { eprintln!( - "Cache is too old (age: {}, max age: {}). Discarding", + "Cache is too old (age: {}, max age: {}). Discarding and recreating", humantime::format_duration(elapsed), humantime::format_duration(cfg.max_cache_age) ); diff --git a/lychee-bin/src/parse.rs b/lychee-bin/src/parse.rs index 0d3c34e..a60769a 100644 --- a/lychee-bin/src/parse.rs +++ b/lychee-bin/src/parse.rs @@ -53,6 +53,11 @@ pub(crate) fn parse_base(src: &str) -> Result { } /// Parse HTTP status codes into a set of `StatusCode` +/// +/// Note that this function does not convert the status codes into +/// `StatusCode` but rather into `u16` to avoid the need for +/// `http` as a dependency and to support custom status codes, which are +/// necessary for some websites, which don't adhere to the HTTP spec or IANA. pub(crate) fn parse_statuscodes(accept: &str) -> Result> { let mut statuscodes = HashSet::new(); for code in accept.split(',') { diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index a36c773..29a0f50 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -771,6 +771,81 @@ mod cli { Ok(()) } + #[tokio::test] + async fn test_lycheecache_accept_custom_status_codes() -> Result<()> { + let base_path = fixtures_path().join("cache"); + let cache_file = base_path.join(LYCHEE_CACHE_FILE); + + // Unconditionally remove cache file if it exists + let _ = fs::remove_file(&cache_file); + + let mock_server_ok = mock_server!(StatusCode::OK); + let mock_server_teapot = mock_server!(StatusCode::IM_A_TEAPOT); + let mock_server_server_error = mock_server!(StatusCode::INTERNAL_SERVER_ERROR); + + let dir = tempfile::tempdir()?; + let mut file = File::create(dir.path().join("c.md"))?; + + writeln!(file, "{}", mock_server_ok.uri().as_str())?; + writeln!(file, "{}", mock_server_teapot.uri().as_str())?; + writeln!(file, "{}", mock_server_server_error.uri().as_str())?; + + let mut cmd = main_command(); + let test_cmd = cmd + .current_dir(&base_path) + .arg(dir.path().join("c.md")) + .arg("--verbose") + .arg("--cache"); + + assert!( + !cache_file.exists(), + "cache file should not exist before this test" + ); + + // run first without cache to generate the cache file + // ignore exit code + test_cmd + .assert() + .failure() + .code(2) + .stdout(contains(format!( + "[418] {}/ | Failed: Network error: I'm a teapot\n", + mock_server_teapot.uri() + ))) + .stdout(contains(format!( + "[500] {}/ | Failed: Network error: Internal Server Error\n", + mock_server_server_error.uri() + ))); + + // check content of cache file + let data = fs::read_to_string(&cache_file)?; + assert!(data.contains(&format!("{}/,200", mock_server_ok.uri()))); + assert!(data.contains(&format!("{}/,418", mock_server_teapot.uri()))); + assert!(data.contains(&format!("{}/,500", mock_server_server_error.uri()))); + + // run again to verify cache behavior + // this time accept 418 and 500 as valid status codes + test_cmd + .arg("--no-progress") + .arg("--accept") + .arg("200,418,500") + .assert() + .success() + .stdout(contains(format!( + "[418] {}/ | Cached: OK (cached)\n", + mock_server_teapot.uri() + ))) + .stdout(contains(format!( + "[500] {}/ | Cached: OK (cached)\n", + mock_server_server_error.uri() + ))); + + // clear the cache file + fs::remove_file(&cache_file)?; + + Ok(()) + } + #[test] fn test_include_verbatim() -> Result<()> { let mut cmd = main_command(); diff --git a/lychee-lib/src/types/cache.rs b/lychee-lib/src/types/cache.rs index 2b22c27..39f8cac 100644 --- a/lychee-lib/src/types/cache.rs +++ b/lychee-lib/src/types/cache.rs @@ -39,7 +39,9 @@ impl<'de> Deserialize<'de> for CacheStatus { other => match other.parse::() { Ok(code) => match code { // classify successful status codes as cache status success - // TODO does not account for status code overrides passed through the 'accept' flag + // Does not account for status code overrides passed through + // the 'accept' flag. Instead, this is handled at a higher level + // when the cache status is converted to a status. 200..=299 => Ok(CacheStatus::Ok(code)), // classify redirects, client errors, & server errors as cache status error _ => Ok(CacheStatus::Error(Some(code))), diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index 2cb5dbb..d899724 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -93,6 +93,9 @@ pub enum ErrorKind { /// Cannot parse the given URI #[error("The given URI is invalid: {0}")] InvalidURI(Uri), + /// The given status code is invalid (not in the range 100-1000) + #[error("Invalid status code: {0}")] + InvalidStatusCode(u16), /// Regex error #[error("Error when using regex engine: {0}")] Regex(#[from] regex::Error), @@ -195,6 +198,7 @@ impl Hash for ErrorKind { Self::InvalidUriRemap(remap) => (remap).hash(state), Self::InvalidHeader(e) => e.to_string().hash(state), Self::InvalidGlobPattern(e) => e.to_string().hash(state), + Self::InvalidStatusCode(c) => c.hash(state), Self::Channel(e) => e.to_string().hash(state), Self::MissingGitHubToken | Self::InvalidUrlHost => { std::mem::discriminant(self).hash(state); diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 3755ff1..bd887f2 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -84,6 +84,39 @@ impl Status { } } + /// Create a status object from a cached status (from a previous run of + /// lychee) and the set of accepted status codes. + /// + /// The set of accepted status codes can change between runs, + /// necessitating more complex logic than just using the cached status. + /// + /// Note that the accepted status codes are not of type `StatusCode`, + /// because they are provided by the user and can be invalid according to + /// the HTTP spec and IANA, but the user might still want to accept them. + #[must_use] + pub fn from_cache_status(s: CacheStatus, accepted: Option>) -> Self { + match s { + CacheStatus::Ok(code) => { + // Not sure if we should change the status based on the + // accepted status codes. If we find out that this is + // counter-intuitive, we can change it. + if accepted.map(|a| a.contains(&code)) == Some(true) { + return Self::Cached(CacheStatus::Ok(code)); + }; + Self::Cached(CacheStatus::Error(Some(code))) + } + CacheStatus::Error(code) => { + if let Some(code) = code { + if accepted.map(|a| a.contains(&code)) == Some(true) { + return Self::Cached(CacheStatus::Ok(code)); + }; + } + Self::Cached(CacheStatus::Error(code)) + } + _ => Self::Cached(s), + } + } + /// Return more details about the status (if any) /// /// Which additional information we can extract depends on the underlying @@ -219,9 +252,3 @@ impl From for Status { } } } - -impl From for Status { - fn from(s: CacheStatus) -> Self { - Self::Cached(s) - } -}