Add documentation to chain module

Also make `Chainable` and  `ChainResult` public to support external plugins/handlers.
This commit is contained in:
Matthias 2024-04-21 15:38:01 +02:00 committed by Thomas Zahner
parent d5b9b84db6
commit 9ed97213a1
3 changed files with 115 additions and 14 deletions

View file

@ -1,39 +1,89 @@
//! [Chain of responsibility pattern][pattern] implementation.
//!
//! lychee is based on a chain of responsibility, where each handler can modify
//! a request and decide if it should be passed to the next element or not.
//!
//! The chain is implemented as a vector of [`Chainable`] handlers. It is
//! traversed by calling [`Chain::traverse`], which will call
//! [`Chainable::chain`] on each handler in the chain.
//!
//! To add external handlers, you can implement the [`Chainable`] trait and add
//! the handler to the chain.
//!
//! [pattern]: https://github.com/lpxxn/rust-design-pattern/blob/master/behavioral/chain_of_responsibility.rs
use crate::Status;
use async_trait::async_trait;
use core::fmt::Debug;
use std::sync::Arc;
use tokio::sync::Mutex;
/// Result of a handler.
///
/// This is used to decide if the chain should continue to the next handler or
/// stop and return the result:
///
/// - If the chain should continue, the handler should return
/// [`ChainResult::Next`]. This will traverse the next handler in the chain.
/// - If the chain should stop, the handler should return [`ChainResult::Done`].
/// This will stop the chain immediately and return the result of the handler.
#[derive(Debug, PartialEq)]
pub(crate) enum ChainResult<T, R> {
pub enum ChainResult<T, R> {
/// Continue to the next handler in the chain.
Next(T),
/// Stop the chain and return the result.
Done(R),
}
/// Request chain type
///
/// This takes a request and returns a status.
pub(crate) type RequestChain = Chain<reqwest::Request, Status>;
/// Inner chain type.
///
/// This holds all handlers, which were chained together.
/// Handlers are traversed in order.
///
/// Each handler needs to implement the `Chainable` trait and be `Send`, because
/// the chain is traversed concurrently and the handlers can be sent between
/// threads.
pub(crate) type InnerChain<T, R> = Vec<Box<dyn Chainable<T, R> + Send>>;
/// The outer chain type.
///
/// This is a wrapper around the inner chain type and allows for
/// concurrent access to the chain.
#[derive(Debug)]
pub struct Chain<T, R>(Arc<Mutex<InnerChain<T, R>>>);
impl<T, R> Default for Chain<T, R> {
fn default() -> Self {
Self(Arc::new(Mutex::new(vec![])))
Self(Arc::new(Mutex::new(InnerChain::default())))
}
}
impl<T, R> Clone for Chain<T, R> {
fn clone(&self) -> Self {
// Cloning the chain is a cheap operation, because the inner chain is
// wrapped in an `Arc` and `Mutex`.
Self(self.0.clone())
}
}
impl<T, R> Chain<T, R> {
/// Create a new chain from a vector of chainable handlers
pub(crate) fn new(values: InnerChain<T, R>) -> Self {
Self(Arc::new(Mutex::new(values)))
}
/// Traverse the chain with the given input.
///
/// This will call `chain` on each handler in the chain and return
/// the result. If a handler returns `ChainResult::Done`, the chain
/// will stop and return.
///
/// If no handler returns `ChainResult::Done`, the chain will return
/// `ChainResult::Next` with the input.
pub(crate) async fn traverse(&self, mut input: T) -> ChainResult<T, R> {
use ChainResult::{Done, Next};
for e in self.0.lock().await.iter_mut() {
@ -49,23 +99,71 @@ impl<T, R> Chain<T, R> {
}
}
/// Chainable trait for implementing request handlers
///
/// This trait needs to be implemented by all chainable handlers.
/// It is the only requirement to handle requests in lychee.
///
/// It takes an input request and returns a [`ChainResult`], which can be either
/// [`ChainResult::Next`] to continue to the next handler or
/// [`ChainResult::Done`] to stop the chain.
///
/// The request can be modified by the handler before it is passed to the next
/// handler. This allows for modifying the request, such as adding headers or
/// changing the URL (e.g. for remapping or filtering).
#[async_trait]
pub(crate) trait Chainable<T, R>: Debug {
pub trait Chainable<T, R>: Debug {
/// Given an input request, return a [`ChainResult`] to continue or stop the
/// chain.
///
/// The input request can be modified by the handler before it is passed to
/// the next handler.
///
/// # Example
///
/// ```
/// use lychee_lib::{Chainable, ChainResult, Status};
/// use reqwest::Request;
/// use async_trait::async_trait;
///
/// #[derive(Debug)]
/// struct AddHeader;
///
/// #[async_trait]
/// impl Chainable<Request, Status> for AddHeader {
/// async fn chain(&mut self, mut request: Request) -> ChainResult<Request, Status> {
/// // You can modify the request however you like here
/// request.headers_mut().append("X-Header", "value".parse().unwrap());
///
/// // Pass the request to the next handler
/// ChainResult::Next(request)
/// }
/// }
/// ```
async fn chain(&mut self, input: T) -> ChainResult<T, R>;
}
/// Client request chains
///
/// This struct holds all request chains.
///
/// Usually, this is used to hold the default request chain and the external
/// plugin request chain.
#[derive(Debug)]
pub(crate) struct ClientRequestChain<'a> {
pub(crate) struct ClientRequestChains<'a> {
chains: Vec<&'a RequestChain>,
}
impl<'a> ClientRequestChain<'a> {
impl<'a> ClientRequestChains<'a> {
/// Create a new chain of request chains.
pub(crate) fn new(chains: Vec<&'a RequestChain>) -> Self {
Self { chains }
}
/// Traverse all request chains and resolve to a status.
pub(crate) async fn traverse(&self, mut input: reqwest::Request) -> Status {
use ChainResult::{Done, Next};
for e in &self.chains {
match e.traverse(input).await {
Next(r) => input = r,
@ -75,7 +173,8 @@ impl<'a> ClientRequestChain<'a> {
}
}
// consider as excluded if no chain element has converted it to a done
// Consider the request to be excluded if no chain element has converted
// it to a `ChainResult::Done`
Status::Excluded
}
}

View file

@ -30,7 +30,7 @@ use secrecy::{ExposeSecret, SecretString};
use typed_builder::TypedBuilder;
use crate::{
chain::{Chain, ClientRequestChain, RequestChain},
chain::{Chain, ClientRequestChains, RequestChain},
checker::Checker,
filter::{Excludes, Filter, Includes},
quirks::Quirks,
@ -486,7 +486,7 @@ impl Client {
return Ok(Response::new(uri.clone(), Status::Excluded, source));
}
let chain: RequestChain = Chain::new(vec![
let default_chain: RequestChain = Chain::new(vec![
Box::<Quirks>::default(),
Box::new(credentials),
Box::new(Checker::new(
@ -500,7 +500,7 @@ impl Client {
let status = match uri.scheme() {
_ if uri.is_file() => self.check_file(uri).await,
_ if uri.is_mail() => self.check_mail(uri).await,
_ => self.check_website(uri, chain).await?,
_ => self.check_website(uri, default_chain).await?,
};
Ok(Response::new(uri.clone(), status, source))
@ -533,11 +533,11 @@ impl Client {
/// - The request failed.
/// - The response status code is not accepted.
/// - The URI cannot be converted to HTTPS.
pub async fn check_website(&self, uri: &Uri, chain: RequestChain) -> Result<Status> {
match self.check_website_inner(uri, &chain).await {
pub async fn check_website(&self, uri: &Uri, default_chain: RequestChain) -> Result<Status> {
match self.check_website_inner(uri, &default_chain).await {
Status::Ok(code) if self.require_https && uri.scheme() == "http" => {
if self
.check_website_inner(&uri.to_https()?, &chain)
.check_website_inner(&uri.to_https()?, &default_chain)
.await
.is_success()
{
@ -562,7 +562,7 @@ impl Client {
/// - The URI is invalid.
/// - The request failed.
/// - The response status code is not accepted.
pub async fn check_website_inner(&self, uri: &Uri, chain: &RequestChain) -> Status {
pub async fn check_website_inner(&self, uri: &Uri, default_chain: &RequestChain) -> Status {
// Workaround for upstream reqwest panic
if validate_url(&uri.url) {
if matches!(uri.scheme(), "http" | "https") {
@ -587,7 +587,7 @@ impl Client {
Err(e) => return e.into(),
};
let status = ClientRequestChain::new(vec![&self.plugin_request_chain, chain])
let status = ClientRequestChains::new(vec![&self.plugin_request_chain, default_chain])
.traverse(request)
.await;

View file

@ -85,6 +85,8 @@ use openssl_sys as _; // required for vendored-openssl feature
#[doc(inline)]
pub use crate::{
basic_auth::BasicAuthExtractor,
// Expose the `Chainable` trait to allow defining external handlers (plugins)
chain::{ChainResult, Chainable},
// Constants get exposed so that the CLI can use the same defaults as the library
client::{
check, Client, ClientBuilder, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,