mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Allow overriding accepted status codes for cached URIs (#843)
Fixes #840
This commit is contained in:
parent
d4e8bb01c1
commit
b479a5810e
8 changed files with 139 additions and 11 deletions
4
Makefile
4
Makefile
|
|
@ -38,6 +38,10 @@ docs: ## Generate and show documentation
|
|||
lint: ## Run linter
|
||||
cargo clippy --all-targets --all-features -- -D warnings
|
||||
|
||||
.PHONY: test
|
||||
test: ## Run tests
|
||||
cargo test --all-targets --all-features
|
||||
|
||||
.PHONY: screencast
|
||||
screencast: ## Create a screencast for the docs
|
||||
svg-term --command 'assets/screencast.sh' --out 'assets/screencast.svg' --width 100 --padding 10 --window
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::HashSet;
|
||||
use std::io::{self, Write};
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
|
|
@ -30,6 +31,7 @@ where
|
|||
|
||||
let client = params.client;
|
||||
let cache = params.cache;
|
||||
let accept = params.cfg.accept;
|
||||
// Start receiving requests
|
||||
tokio::spawn(async move {
|
||||
futures::StreamExt::for_each_concurrent(
|
||||
|
|
@ -37,7 +39,7 @@ where
|
|||
max_concurrency,
|
||||
|request: Result<Request>| async {
|
||||
let request = request.expect("cannot read request");
|
||||
let response = handle(&client, cache.clone(), request).await;
|
||||
let response = handle(&client, cache.clone(), request, accept.clone()).await;
|
||||
|
||||
send_resp
|
||||
.send(response)
|
||||
|
|
@ -112,7 +114,12 @@ where
|
|||
}
|
||||
|
||||
/// Handle a single request
|
||||
async fn handle(client: &Client, cache: Arc<Cache>, request: Request) -> Response {
|
||||
async fn handle(
|
||||
client: &Client,
|
||||
cache: Arc<Cache>,
|
||||
request: Request,
|
||||
accept: Option<HashSet<u16>>,
|
||||
) -> Response {
|
||||
let uri = request.uri.clone();
|
||||
if let Some(v) = cache.get(&uri) {
|
||||
// Found a cached request
|
||||
|
|
@ -121,7 +128,11 @@ async fn handle(client: &Client, cache: Arc<Cache>, request: Request) -> Respons
|
|||
let status = if client.is_excluded(&uri) {
|
||||
Status::Excluded
|
||||
} else {
|
||||
Status::from(v.value().status)
|
||||
// Can't impl `Status::from(v.value().status)` here because the
|
||||
// `accepted` status codes might have changed from the previous run
|
||||
// and they may have an impact on the interpretation of the status
|
||||
// code.
|
||||
Status::from_cache_status(v.value().status, accept)
|
||||
};
|
||||
return Response::new(uri.clone(), status, request.source);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ fn load_cache(cfg: &Config) -> Option<Cache> {
|
|||
let elapsed = modified.elapsed().ok()?;
|
||||
if elapsed > cfg.max_cache_age {
|
||||
eprintln!(
|
||||
"Cache is too old (age: {}, max age: {}). Discarding",
|
||||
"Cache is too old (age: {}, max age: {}). Discarding and recreating",
|
||||
humantime::format_duration(elapsed),
|
||||
humantime::format_duration(cfg.max_cache_age)
|
||||
);
|
||||
|
|
|
|||
|
|
@ -53,6 +53,11 @@ pub(crate) fn parse_base(src: &str) -> Result<Base, lychee_lib::ErrorKind> {
|
|||
}
|
||||
|
||||
/// Parse HTTP status codes into a set of `StatusCode`
|
||||
///
|
||||
/// Note that this function does not convert the status codes into
|
||||
/// `StatusCode` but rather into `u16` to avoid the need for
|
||||
/// `http` as a dependency and to support custom status codes, which are
|
||||
/// necessary for some websites, which don't adhere to the HTTP spec or IANA.
|
||||
pub(crate) fn parse_statuscodes(accept: &str) -> Result<HashSet<u16>> {
|
||||
let mut statuscodes = HashSet::new();
|
||||
for code in accept.split(',') {
|
||||
|
|
|
|||
|
|
@ -771,6 +771,81 @@ mod cli {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_lycheecache_accept_custom_status_codes() -> Result<()> {
|
||||
let base_path = fixtures_path().join("cache");
|
||||
let cache_file = base_path.join(LYCHEE_CACHE_FILE);
|
||||
|
||||
// Unconditionally remove cache file if it exists
|
||||
let _ = fs::remove_file(&cache_file);
|
||||
|
||||
let mock_server_ok = mock_server!(StatusCode::OK);
|
||||
let mock_server_teapot = mock_server!(StatusCode::IM_A_TEAPOT);
|
||||
let mock_server_server_error = mock_server!(StatusCode::INTERNAL_SERVER_ERROR);
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let mut file = File::create(dir.path().join("c.md"))?;
|
||||
|
||||
writeln!(file, "{}", mock_server_ok.uri().as_str())?;
|
||||
writeln!(file, "{}", mock_server_teapot.uri().as_str())?;
|
||||
writeln!(file, "{}", mock_server_server_error.uri().as_str())?;
|
||||
|
||||
let mut cmd = main_command();
|
||||
let test_cmd = cmd
|
||||
.current_dir(&base_path)
|
||||
.arg(dir.path().join("c.md"))
|
||||
.arg("--verbose")
|
||||
.arg("--cache");
|
||||
|
||||
assert!(
|
||||
!cache_file.exists(),
|
||||
"cache file should not exist before this test"
|
||||
);
|
||||
|
||||
// run first without cache to generate the cache file
|
||||
// ignore exit code
|
||||
test_cmd
|
||||
.assert()
|
||||
.failure()
|
||||
.code(2)
|
||||
.stdout(contains(format!(
|
||||
"[418] {}/ | Failed: Network error: I'm a teapot\n",
|
||||
mock_server_teapot.uri()
|
||||
)))
|
||||
.stdout(contains(format!(
|
||||
"[500] {}/ | Failed: Network error: Internal Server Error\n",
|
||||
mock_server_server_error.uri()
|
||||
)));
|
||||
|
||||
// check content of cache file
|
||||
let data = fs::read_to_string(&cache_file)?;
|
||||
assert!(data.contains(&format!("{}/,200", mock_server_ok.uri())));
|
||||
assert!(data.contains(&format!("{}/,418", mock_server_teapot.uri())));
|
||||
assert!(data.contains(&format!("{}/,500", mock_server_server_error.uri())));
|
||||
|
||||
// run again to verify cache behavior
|
||||
// this time accept 418 and 500 as valid status codes
|
||||
test_cmd
|
||||
.arg("--no-progress")
|
||||
.arg("--accept")
|
||||
.arg("200,418,500")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains(format!(
|
||||
"[418] {}/ | Cached: OK (cached)\n",
|
||||
mock_server_teapot.uri()
|
||||
)))
|
||||
.stdout(contains(format!(
|
||||
"[500] {}/ | Cached: OK (cached)\n",
|
||||
mock_server_server_error.uri()
|
||||
)));
|
||||
|
||||
// clear the cache file
|
||||
fs::remove_file(&cache_file)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_verbatim() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
|
|
|
|||
|
|
@ -39,7 +39,9 @@ impl<'de> Deserialize<'de> for CacheStatus {
|
|||
other => match other.parse::<u16>() {
|
||||
Ok(code) => match code {
|
||||
// classify successful status codes as cache status success
|
||||
// TODO does not account for status code overrides passed through the 'accept' flag
|
||||
// Does not account for status code overrides passed through
|
||||
// the 'accept' flag. Instead, this is handled at a higher level
|
||||
// when the cache status is converted to a status.
|
||||
200..=299 => Ok(CacheStatus::Ok(code)),
|
||||
// classify redirects, client errors, & server errors as cache status error
|
||||
_ => Ok(CacheStatus::Error(Some(code))),
|
||||
|
|
|
|||
|
|
@ -93,6 +93,9 @@ pub enum ErrorKind {
|
|||
/// Cannot parse the given URI
|
||||
#[error("The given URI is invalid: {0}")]
|
||||
InvalidURI(Uri),
|
||||
/// The given status code is invalid (not in the range 100-1000)
|
||||
#[error("Invalid status code: {0}")]
|
||||
InvalidStatusCode(u16),
|
||||
/// Regex error
|
||||
#[error("Error when using regex engine: {0}")]
|
||||
Regex(#[from] regex::Error),
|
||||
|
|
@ -195,6 +198,7 @@ impl Hash for ErrorKind {
|
|||
Self::InvalidUriRemap(remap) => (remap).hash(state),
|
||||
Self::InvalidHeader(e) => e.to_string().hash(state),
|
||||
Self::InvalidGlobPattern(e) => e.to_string().hash(state),
|
||||
Self::InvalidStatusCode(c) => c.hash(state),
|
||||
Self::Channel(e) => e.to_string().hash(state),
|
||||
Self::MissingGitHubToken | Self::InvalidUrlHost => {
|
||||
std::mem::discriminant(self).hash(state);
|
||||
|
|
|
|||
|
|
@ -84,6 +84,39 @@ impl Status {
|
|||
}
|
||||
}
|
||||
|
||||
/// Create a status object from a cached status (from a previous run of
|
||||
/// lychee) and the set of accepted status codes.
|
||||
///
|
||||
/// The set of accepted status codes can change between runs,
|
||||
/// necessitating more complex logic than just using the cached status.
|
||||
///
|
||||
/// Note that the accepted status codes are not of type `StatusCode`,
|
||||
/// because they are provided by the user and can be invalid according to
|
||||
/// the HTTP spec and IANA, but the user might still want to accept them.
|
||||
#[must_use]
|
||||
pub fn from_cache_status(s: CacheStatus, accepted: Option<HashSet<u16>>) -> Self {
|
||||
match s {
|
||||
CacheStatus::Ok(code) => {
|
||||
// Not sure if we should change the status based on the
|
||||
// accepted status codes. If we find out that this is
|
||||
// counter-intuitive, we can change it.
|
||||
if accepted.map(|a| a.contains(&code)) == Some(true) {
|
||||
return Self::Cached(CacheStatus::Ok(code));
|
||||
};
|
||||
Self::Cached(CacheStatus::Error(Some(code)))
|
||||
}
|
||||
CacheStatus::Error(code) => {
|
||||
if let Some(code) = code {
|
||||
if accepted.map(|a| a.contains(&code)) == Some(true) {
|
||||
return Self::Cached(CacheStatus::Ok(code));
|
||||
};
|
||||
}
|
||||
Self::Cached(CacheStatus::Error(code))
|
||||
}
|
||||
_ => Self::Cached(s),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return more details about the status (if any)
|
||||
///
|
||||
/// Which additional information we can extract depends on the underlying
|
||||
|
|
@ -219,9 +252,3 @@ impl From<reqwest::Error> for Status {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CacheStatus> for Status {
|
||||
fn from(s: CacheStatus) -> Self {
|
||||
Self::Cached(s)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue