Allow overriding accepted status codes for cached URIs (#843)

Fixes #840
This commit is contained in:
Matthias 2022-11-28 12:23:07 +01:00 committed by GitHub
parent d4e8bb01c1
commit b479a5810e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 139 additions and 11 deletions

View file

@ -38,6 +38,10 @@ docs: ## Generate and show documentation
lint: ## Run linter
cargo clippy --all-targets --all-features -- -D warnings
.PHONY: test
test: ## Run tests
cargo test --all-targets --all-features
.PHONY: screencast
screencast: ## Create a screencast for the docs
svg-term --command 'assets/screencast.sh' --out 'assets/screencast.svg' --width 100 --padding 10 --window

View file

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::io::{self, Write};
use std::sync::Arc;
use std::time::Duration;
@ -30,6 +31,7 @@ where
let client = params.client;
let cache = params.cache;
let accept = params.cfg.accept;
// Start receiving requests
tokio::spawn(async move {
futures::StreamExt::for_each_concurrent(
@ -37,7 +39,7 @@ where
max_concurrency,
|request: Result<Request>| async {
let request = request.expect("cannot read request");
let response = handle(&client, cache.clone(), request).await;
let response = handle(&client, cache.clone(), request, accept.clone()).await;
send_resp
.send(response)
@ -112,7 +114,12 @@ where
}
/// Handle a single request
async fn handle(client: &Client, cache: Arc<Cache>, request: Request) -> Response {
async fn handle(
client: &Client,
cache: Arc<Cache>,
request: Request,
accept: Option<HashSet<u16>>,
) -> Response {
let uri = request.uri.clone();
if let Some(v) = cache.get(&uri) {
// Found a cached request
@ -121,7 +128,11 @@ async fn handle(client: &Client, cache: Arc<Cache>, request: Request) -> Respons
let status = if client.is_excluded(&uri) {
Status::Excluded
} else {
Status::from(v.value().status)
// Can't impl `Status::from(v.value().status)` here because the
// `accepted` status codes might have changed from the previous run
// and they may have an impact on the interpretation of the status
// code.
Status::from_cache_status(v.value().status, accept)
};
return Response::new(uri.clone(), status, request.source);
}

View file

@ -178,7 +178,7 @@ fn load_cache(cfg: &Config) -> Option<Cache> {
let elapsed = modified.elapsed().ok()?;
if elapsed > cfg.max_cache_age {
eprintln!(
"Cache is too old (age: {}, max age: {}). Discarding",
"Cache is too old (age: {}, max age: {}). Discarding and recreating",
humantime::format_duration(elapsed),
humantime::format_duration(cfg.max_cache_age)
);

View file

@ -53,6 +53,11 @@ pub(crate) fn parse_base(src: &str) -> Result<Base, lychee_lib::ErrorKind> {
}
/// Parse HTTP status codes into a set of `StatusCode`
///
/// Note that this function does not convert the status codes into
/// `StatusCode` but rather into `u16` to avoid the need for
/// `http` as a dependency and to support custom status codes, which are
/// necessary for some websites, which don't adhere to the HTTP spec or IANA.
pub(crate) fn parse_statuscodes(accept: &str) -> Result<HashSet<u16>> {
let mut statuscodes = HashSet::new();
for code in accept.split(',') {

View file

@ -771,6 +771,81 @@ mod cli {
Ok(())
}
#[tokio::test]
async fn test_lycheecache_accept_custom_status_codes() -> Result<()> {
let base_path = fixtures_path().join("cache");
let cache_file = base_path.join(LYCHEE_CACHE_FILE);
// Unconditionally remove cache file if it exists
let _ = fs::remove_file(&cache_file);
let mock_server_ok = mock_server!(StatusCode::OK);
let mock_server_teapot = mock_server!(StatusCode::IM_A_TEAPOT);
let mock_server_server_error = mock_server!(StatusCode::INTERNAL_SERVER_ERROR);
let dir = tempfile::tempdir()?;
let mut file = File::create(dir.path().join("c.md"))?;
writeln!(file, "{}", mock_server_ok.uri().as_str())?;
writeln!(file, "{}", mock_server_teapot.uri().as_str())?;
writeln!(file, "{}", mock_server_server_error.uri().as_str())?;
let mut cmd = main_command();
let test_cmd = cmd
.current_dir(&base_path)
.arg(dir.path().join("c.md"))
.arg("--verbose")
.arg("--cache");
assert!(
!cache_file.exists(),
"cache file should not exist before this test"
);
// run first without cache to generate the cache file
// ignore exit code
test_cmd
.assert()
.failure()
.code(2)
.stdout(contains(format!(
"[418] {}/ | Failed: Network error: I'm a teapot\n",
mock_server_teapot.uri()
)))
.stdout(contains(format!(
"[500] {}/ | Failed: Network error: Internal Server Error\n",
mock_server_server_error.uri()
)));
// check content of cache file
let data = fs::read_to_string(&cache_file)?;
assert!(data.contains(&format!("{}/,200", mock_server_ok.uri())));
assert!(data.contains(&format!("{}/,418", mock_server_teapot.uri())));
assert!(data.contains(&format!("{}/,500", mock_server_server_error.uri())));
// run again to verify cache behavior
// this time accept 418 and 500 as valid status codes
test_cmd
.arg("--no-progress")
.arg("--accept")
.arg("200,418,500")
.assert()
.success()
.stdout(contains(format!(
"[418] {}/ | Cached: OK (cached)\n",
mock_server_teapot.uri()
)))
.stdout(contains(format!(
"[500] {}/ | Cached: OK (cached)\n",
mock_server_server_error.uri()
)));
// clear the cache file
fs::remove_file(&cache_file)?;
Ok(())
}
#[test]
fn test_include_verbatim() -> Result<()> {
let mut cmd = main_command();

View file

@ -39,7 +39,9 @@ impl<'de> Deserialize<'de> for CacheStatus {
other => match other.parse::<u16>() {
Ok(code) => match code {
// classify successful status codes as cache status success
// TODO does not account for status code overrides passed through the 'accept' flag
// Does not account for status code overrides passed through
// the 'accept' flag. Instead, this is handled at a higher level
// when the cache status is converted to a status.
200..=299 => Ok(CacheStatus::Ok(code)),
// classify redirects, client errors, & server errors as cache status error
_ => Ok(CacheStatus::Error(Some(code))),

View file

@ -93,6 +93,9 @@ pub enum ErrorKind {
/// Cannot parse the given URI
#[error("The given URI is invalid: {0}")]
InvalidURI(Uri),
/// The given status code is invalid (not in the range 100-1000)
#[error("Invalid status code: {0}")]
InvalidStatusCode(u16),
/// Regex error
#[error("Error when using regex engine: {0}")]
Regex(#[from] regex::Error),
@ -195,6 +198,7 @@ impl Hash for ErrorKind {
Self::InvalidUriRemap(remap) => (remap).hash(state),
Self::InvalidHeader(e) => e.to_string().hash(state),
Self::InvalidGlobPattern(e) => e.to_string().hash(state),
Self::InvalidStatusCode(c) => c.hash(state),
Self::Channel(e) => e.to_string().hash(state),
Self::MissingGitHubToken | Self::InvalidUrlHost => {
std::mem::discriminant(self).hash(state);

View file

@ -84,6 +84,39 @@ impl Status {
}
}
/// Create a status object from a cached status (from a previous run of
/// lychee) and the set of accepted status codes.
///
/// The set of accepted status codes can change between runs,
/// necessitating more complex logic than just using the cached status.
///
/// Note that the accepted status codes are not of type `StatusCode`,
/// because they are provided by the user and can be invalid according to
/// the HTTP spec and IANA, but the user might still want to accept them.
#[must_use]
pub fn from_cache_status(s: CacheStatus, accepted: Option<HashSet<u16>>) -> Self {
match s {
CacheStatus::Ok(code) => {
// Not sure if we should change the status based on the
// accepted status codes. If we find out that this is
// counter-intuitive, we can change it.
if accepted.map(|a| a.contains(&code)) == Some(true) {
return Self::Cached(CacheStatus::Ok(code));
};
Self::Cached(CacheStatus::Error(Some(code)))
}
CacheStatus::Error(code) => {
if let Some(code) = code {
if accepted.map(|a| a.contains(&code)) == Some(true) {
return Self::Cached(CacheStatus::Ok(code));
};
}
Self::Cached(CacheStatus::Error(code))
}
_ => Self::Cached(s),
}
}
/// Return more details about the status (if any)
///
/// Which additional information we can extract depends on the underlying
@ -219,9 +252,3 @@ impl From<reqwest::Error> for Status {
}
}
}
impl From<CacheStatus> for Status {
fn from(s: CacheStatus) -> Self {
Self::Cached(s)
}
}