diff --git a/lychee-bin/src/commands/check.rs b/lychee-bin/src/commands/check.rs index 4c98789..3e10887 100644 --- a/lychee-bin/src/commands/check.rs +++ b/lychee-bin/src/commands/check.rs @@ -135,10 +135,14 @@ async fn handle(client: &Client, cache: Arc, request: Request) -> Respons // TODO: Handle error as soon as https://github.com/seanmonstar/reqwest/pull/1399 got merged let response = client.check(request).await.expect("cannot check URI"); - // Never cache filesystem access as it is fast already so caching has no - // benefit - if !uri.is_file() { - cache.insert(uri, response.status().into()); + // - Never cache filesystem access as it is fast already so caching has no + // benefit. + // - Skip caching unsupported URLs as they might be supported in a + // future run. + // - Skip caching excluded links; they might not be excluded in the next run + let status = response.status(); + if !uri.is_file() && !status.is_excluded() && !status.is_unsupported() { + cache.insert(uri, status.into()); } response } diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index fea516d..17143a3 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -14,6 +14,11 @@ mod cli { type Result = std::result::Result>; + // The lychee cache file name is used for some tests. + // Since it is currently static and can't be overwritten, declare it as a + // constant. + const LYCHEE_CACHE_FILE: &str = ".lycheecache"; + macro_rules! mock_server { ($status:expr $(, $func:tt ($($arg:expr),*))*) => {{ let mock_server = wiremock::MockServer::start().await; @@ -609,7 +614,10 @@ mod cli { #[tokio::test] async fn test_lycheecache_file() -> Result<()> { let base_path = fixtures_path().join("cache"); - let cache_file = base_path.join(".lycheecache"); + let cache_file = base_path.join(LYCHEE_CACHE_FILE); + + // Unconditionally remove cache file if it exists + let _ = fs::remove_file(&cache_file); let mock_server_ok = mock_server!(StatusCode::OK); let mock_server_err = mock_server!(StatusCode::NOT_FOUND); @@ -649,7 +657,7 @@ mod cli { mock_server_exclude.uri() ))); - // check content of cache file file + // check content of cache file let data = fs::read_to_string(&cache_file)?; assert!(data.contains(&format!("{}/,200", mock_server_ok.uri()))); assert!(data.contains(&format!("{}/,404", mock_server_err.uri()))); @@ -677,6 +685,45 @@ mod cli { Ok(()) } + #[tokio::test] + async fn test_skip_cache_unsupported() -> Result<()> { + let base_path = fixtures_path().join("cache"); + let cache_file = base_path.join(LYCHEE_CACHE_FILE); + + // Unconditionally remove cache file if it exists + let _ = fs::remove_file(&cache_file); + + let unsupported_url = "slack://user".to_string(); + let excluded_url = "https://example.com/"; + + // run first without cache to generate the cache file + main_command() + .current_dir(&base_path) + .write_stdin(format!("{unsupported_url}\n{excluded_url}")) + .arg("--cache") + .arg("--verbose") + .arg("--exclude") + .arg(excluded_url) + .arg("--") + .arg("-") + .assert() + .stderr(contains(format!( + "[IGNORED] {unsupported_url} | Unsupported Error creating request client\n" + ))) + .stderr(contains(format!("[EXCLUDED] {excluded_url} | Excluded\n"))); + + // The cache file should be empty, because the only checked URL is + // unsupported and we don't want to cache that. It might be supported in + // future versions. + let buf = fs::read(&cache_file).unwrap(); + assert!(buf.is_empty()); + + // clear the cache file + fs::remove_file(&cache_file)?; + + Ok(()) + } + #[test] fn test_include_verbatim() -> Result<()> { let mut cmd = main_command(); diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 2fedf97..2ce9949 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -449,8 +449,9 @@ impl Client { return Status::Error(ErrorKind::InvalidURI(uri.clone())); } // This is merely a URI with a scheme that is not supported by - // reqwest yet. It would be safe to pass that to reqwest and it wouldn't - // panic, but it's also unnecessary, because it would simply return an error. + // reqwest yet. It would be safe to pass that to reqwest and it + // wouldn't panic, but it's also unnecessary, because it would + // simply return an error. return Status::Unsupported(ErrorKind::InvalidURI(uri.clone())); } diff --git a/lychee-lib/src/types/cache.rs b/lychee-lib/src/types/cache.rs index 75c089f..2b22c27 100644 --- a/lychee-lib/src/types/cache.rs +++ b/lychee-lib/src/types/cache.rs @@ -16,6 +16,11 @@ pub enum CacheStatus { /// The request was excluded (skipped) Excluded, /// The protocol is not yet supported + // We no longer cache unsupported files as they might be supported in future + // versions. + // Nevertheless, keep for compatibility when deserializing older cache + // files, even though this no longer gets serialized. Can be removed at a + // later point in time. Unsupported, } @@ -27,6 +32,9 @@ impl<'de> Deserialize<'de> for CacheStatus { let status = <&str as Deserialize<'de>>::deserialize(deserializer)?; match status { "Excluded" => Ok(CacheStatus::Excluded), + // Keep for compatibility with older cache files, even though this + // no longer gets serialized. Can be removed at a later point in + // time. "Unsupported" => Ok(CacheStatus::Unsupported), other => match other.parse::() { Ok(code) => match code { diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 2afcfd0..2b88577 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -34,7 +34,7 @@ pub enum Status { /// Resource was excluded from checking Excluded, /// The request type is currently not supported, - /// for example when the URL scheme is `slack://` or `file://` + /// for example when the URL scheme is `slack://`. /// See https://github.com/lycheeverse/lychee/issues/199 Unsupported(ErrorKind), /// Cached request status from previous run