mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-28 18:30:25 +00:00
Skip caching unsupported and excluded URLs (#692)
As discussed in https://github.com/lycheeverse/lychee/issues/647#issuecomment-1170773449, it does not make much sense to cache unsupported and excluded URLs. Unsupported URLs might be supported in the future and caching them would mean they won't get checked then. Excluded URLs were excluded for a reason and should not appear in the cache. Furthermore they might not be excluded in a consecutive run, leading to a false-positive.
This commit is contained in:
parent
9ad53f97a2
commit
6fae93f2da
5 changed files with 69 additions and 9 deletions
|
|
@ -135,10 +135,14 @@ async fn handle(client: &Client, cache: Arc<Cache>, request: Request) -> Respons
|
|||
// TODO: Handle error as soon as https://github.com/seanmonstar/reqwest/pull/1399 got merged
|
||||
let response = client.check(request).await.expect("cannot check URI");
|
||||
|
||||
// Never cache filesystem access as it is fast already so caching has no
|
||||
// benefit
|
||||
if !uri.is_file() {
|
||||
cache.insert(uri, response.status().into());
|
||||
// - Never cache filesystem access as it is fast already so caching has no
|
||||
// benefit.
|
||||
// - Skip caching unsupported URLs as they might be supported in a
|
||||
// future run.
|
||||
// - Skip caching excluded links; they might not be excluded in the next run
|
||||
let status = response.status();
|
||||
if !uri.is_file() && !status.is_excluded() && !status.is_unsupported() {
|
||||
cache.insert(uri, status.into());
|
||||
}
|
||||
response
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,11 @@ mod cli {
|
|||
|
||||
type Result<T> = std::result::Result<T, Box<dyn Error>>;
|
||||
|
||||
// The lychee cache file name is used for some tests.
|
||||
// Since it is currently static and can't be overwritten, declare it as a
|
||||
// constant.
|
||||
const LYCHEE_CACHE_FILE: &str = ".lycheecache";
|
||||
|
||||
macro_rules! mock_server {
|
||||
($status:expr $(, $func:tt ($($arg:expr),*))*) => {{
|
||||
let mock_server = wiremock::MockServer::start().await;
|
||||
|
|
@ -609,7 +614,10 @@ mod cli {
|
|||
#[tokio::test]
|
||||
async fn test_lycheecache_file() -> Result<()> {
|
||||
let base_path = fixtures_path().join("cache");
|
||||
let cache_file = base_path.join(".lycheecache");
|
||||
let cache_file = base_path.join(LYCHEE_CACHE_FILE);
|
||||
|
||||
// Unconditionally remove cache file if it exists
|
||||
let _ = fs::remove_file(&cache_file);
|
||||
|
||||
let mock_server_ok = mock_server!(StatusCode::OK);
|
||||
let mock_server_err = mock_server!(StatusCode::NOT_FOUND);
|
||||
|
|
@ -649,7 +657,7 @@ mod cli {
|
|||
mock_server_exclude.uri()
|
||||
)));
|
||||
|
||||
// check content of cache file file
|
||||
// check content of cache file
|
||||
let data = fs::read_to_string(&cache_file)?;
|
||||
assert!(data.contains(&format!("{}/,200", mock_server_ok.uri())));
|
||||
assert!(data.contains(&format!("{}/,404", mock_server_err.uri())));
|
||||
|
|
@ -677,6 +685,45 @@ mod cli {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_skip_cache_unsupported() -> Result<()> {
|
||||
let base_path = fixtures_path().join("cache");
|
||||
let cache_file = base_path.join(LYCHEE_CACHE_FILE);
|
||||
|
||||
// Unconditionally remove cache file if it exists
|
||||
let _ = fs::remove_file(&cache_file);
|
||||
|
||||
let unsupported_url = "slack://user".to_string();
|
||||
let excluded_url = "https://example.com/";
|
||||
|
||||
// run first without cache to generate the cache file
|
||||
main_command()
|
||||
.current_dir(&base_path)
|
||||
.write_stdin(format!("{unsupported_url}\n{excluded_url}"))
|
||||
.arg("--cache")
|
||||
.arg("--verbose")
|
||||
.arg("--exclude")
|
||||
.arg(excluded_url)
|
||||
.arg("--")
|
||||
.arg("-")
|
||||
.assert()
|
||||
.stderr(contains(format!(
|
||||
"[IGNORED] {unsupported_url} | Unsupported Error creating request client\n"
|
||||
)))
|
||||
.stderr(contains(format!("[EXCLUDED] {excluded_url} | Excluded\n")));
|
||||
|
||||
// The cache file should be empty, because the only checked URL is
|
||||
// unsupported and we don't want to cache that. It might be supported in
|
||||
// future versions.
|
||||
let buf = fs::read(&cache_file).unwrap();
|
||||
assert!(buf.is_empty());
|
||||
|
||||
// clear the cache file
|
||||
fs::remove_file(&cache_file)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_verbatim() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
|
|
|
|||
|
|
@ -449,8 +449,9 @@ impl Client {
|
|||
return Status::Error(ErrorKind::InvalidURI(uri.clone()));
|
||||
}
|
||||
// This is merely a URI with a scheme that is not supported by
|
||||
// reqwest yet. It would be safe to pass that to reqwest and it wouldn't
|
||||
// panic, but it's also unnecessary, because it would simply return an error.
|
||||
// reqwest yet. It would be safe to pass that to reqwest and it
|
||||
// wouldn't panic, but it's also unnecessary, because it would
|
||||
// simply return an error.
|
||||
return Status::Unsupported(ErrorKind::InvalidURI(uri.clone()));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,6 +16,11 @@ pub enum CacheStatus {
|
|||
/// The request was excluded (skipped)
|
||||
Excluded,
|
||||
/// The protocol is not yet supported
|
||||
// We no longer cache unsupported files as they might be supported in future
|
||||
// versions.
|
||||
// Nevertheless, keep for compatibility when deserializing older cache
|
||||
// files, even though this no longer gets serialized. Can be removed at a
|
||||
// later point in time.
|
||||
Unsupported,
|
||||
}
|
||||
|
||||
|
|
@ -27,6 +32,9 @@ impl<'de> Deserialize<'de> for CacheStatus {
|
|||
let status = <&str as Deserialize<'de>>::deserialize(deserializer)?;
|
||||
match status {
|
||||
"Excluded" => Ok(CacheStatus::Excluded),
|
||||
// Keep for compatibility with older cache files, even though this
|
||||
// no longer gets serialized. Can be removed at a later point in
|
||||
// time.
|
||||
"Unsupported" => Ok(CacheStatus::Unsupported),
|
||||
other => match other.parse::<u16>() {
|
||||
Ok(code) => match code {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ pub enum Status {
|
|||
/// Resource was excluded from checking
|
||||
Excluded,
|
||||
/// The request type is currently not supported,
|
||||
/// for example when the URL scheme is `slack://` or `file://`
|
||||
/// for example when the URL scheme is `slack://`.
|
||||
/// See https://github.com/lycheeverse/lychee/issues/199
|
||||
Unsupported(ErrorKind),
|
||||
/// Cached request status from previous run
|
||||
|
|
|
|||
Loading…
Reference in a new issue