diff --git a/lychee-lib/src/chain/mod.rs b/lychee-lib/src/chain/mod.rs index 738b908..c99f47b 100644 --- a/lychee-lib/src/chain/mod.rs +++ b/lychee-lib/src/chain/mod.rs @@ -1,39 +1,89 @@ +//! [Chain of responsibility pattern][pattern] implementation. +//! +//! lychee is based on a chain of responsibility, where each handler can modify +//! a request and decide if it should be passed to the next element or not. +//! +//! The chain is implemented as a vector of [`Chainable`] handlers. It is +//! traversed by calling [`Chain::traverse`], which will call +//! [`Chainable::chain`] on each handler in the chain. +//! +//! To add external handlers, you can implement the [`Chainable`] trait and add +//! the handler to the chain. +//! +//! [pattern]: https://github.com/lpxxn/rust-design-pattern/blob/master/behavioral/chain_of_responsibility.rs use crate::Status; use async_trait::async_trait; use core::fmt::Debug; use std::sync::Arc; use tokio::sync::Mutex; +/// Result of a handler. +/// +/// This is used to decide if the chain should continue to the next handler or +/// stop and return the result: +/// +/// - If the chain should continue, the handler should return +/// [`ChainResult::Next`]. This will traverse the next handler in the chain. +/// - If the chain should stop, the handler should return [`ChainResult::Done`]. +/// This will stop the chain immediately and return the result of the handler. #[derive(Debug, PartialEq)] -pub(crate) enum ChainResult { +pub enum ChainResult { + /// Continue to the next handler in the chain. Next(T), + /// Stop the chain and return the result. Done(R), } +/// Request chain type +/// +/// This takes a request and returns a status. pub(crate) type RequestChain = Chain; +/// Inner chain type. +/// +/// This holds all handlers, which were chained together. +/// Handlers are traversed in order. +/// +/// Each handler needs to implement the `Chainable` trait and be `Send`, because +/// the chain is traversed concurrently and the handlers can be sent between +/// threads. pub(crate) type InnerChain = Vec + Send>>; +/// The outer chain type. +/// +/// This is a wrapper around the inner chain type and allows for +/// concurrent access to the chain. #[derive(Debug)] pub struct Chain(Arc>>); impl Default for Chain { fn default() -> Self { - Self(Arc::new(Mutex::new(vec![]))) + Self(Arc::new(Mutex::new(InnerChain::default()))) } } impl Clone for Chain { fn clone(&self) -> Self { + // Cloning the chain is a cheap operation, because the inner chain is + // wrapped in an `Arc` and `Mutex`. Self(self.0.clone()) } } impl Chain { + /// Create a new chain from a vector of chainable handlers pub(crate) fn new(values: InnerChain) -> Self { Self(Arc::new(Mutex::new(values))) } + /// Traverse the chain with the given input. + /// + /// This will call `chain` on each handler in the chain and return + /// the result. If a handler returns `ChainResult::Done`, the chain + /// will stop and return. + /// + /// If no handler returns `ChainResult::Done`, the chain will return + /// `ChainResult::Next` with the input. pub(crate) async fn traverse(&self, mut input: T) -> ChainResult { use ChainResult::{Done, Next}; for e in self.0.lock().await.iter_mut() { @@ -49,23 +99,71 @@ impl Chain { } } +/// Chainable trait for implementing request handlers +/// +/// This trait needs to be implemented by all chainable handlers. +/// It is the only requirement to handle requests in lychee. +/// +/// It takes an input request and returns a [`ChainResult`], which can be either +/// [`ChainResult::Next`] to continue to the next handler or +/// [`ChainResult::Done`] to stop the chain. +/// +/// The request can be modified by the handler before it is passed to the next +/// handler. This allows for modifying the request, such as adding headers or +/// changing the URL (e.g. for remapping or filtering). #[async_trait] -pub(crate) trait Chainable: Debug { +pub trait Chainable: Debug { + /// Given an input request, return a [`ChainResult`] to continue or stop the + /// chain. + /// + /// The input request can be modified by the handler before it is passed to + /// the next handler. + /// + /// # Example + /// + /// ``` + /// use lychee_lib::{Chainable, ChainResult, Status}; + /// use reqwest::Request; + /// use async_trait::async_trait; + /// + /// #[derive(Debug)] + /// struct AddHeader; + /// + /// #[async_trait] + /// impl Chainable for AddHeader { + /// async fn chain(&mut self, mut request: Request) -> ChainResult { + /// // You can modify the request however you like here + /// request.headers_mut().append("X-Header", "value".parse().unwrap()); + /// + /// // Pass the request to the next handler + /// ChainResult::Next(request) + /// } + /// } + /// ``` async fn chain(&mut self, input: T) -> ChainResult; } +/// Client request chains +/// +/// This struct holds all request chains. +/// +/// Usually, this is used to hold the default request chain and the external +/// plugin request chain. #[derive(Debug)] -pub(crate) struct ClientRequestChain<'a> { +pub(crate) struct ClientRequestChains<'a> { chains: Vec<&'a RequestChain>, } -impl<'a> ClientRequestChain<'a> { +impl<'a> ClientRequestChains<'a> { + /// Create a new chain of request chains. pub(crate) fn new(chains: Vec<&'a RequestChain>) -> Self { Self { chains } } + /// Traverse all request chains and resolve to a status. pub(crate) async fn traverse(&self, mut input: reqwest::Request) -> Status { use ChainResult::{Done, Next}; + for e in &self.chains { match e.traverse(input).await { Next(r) => input = r, @@ -75,7 +173,8 @@ impl<'a> ClientRequestChain<'a> { } } - // consider as excluded if no chain element has converted it to a done + // Consider the request to be excluded if no chain element has converted + // it to a `ChainResult::Done` Status::Excluded } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index 72727ee..0bbc422 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -30,7 +30,7 @@ use secrecy::{ExposeSecret, SecretString}; use typed_builder::TypedBuilder; use crate::{ - chain::{Chain, ClientRequestChain, RequestChain}, + chain::{Chain, ClientRequestChains, RequestChain}, checker::Checker, filter::{Excludes, Filter, Includes}, quirks::Quirks, @@ -486,7 +486,7 @@ impl Client { return Ok(Response::new(uri.clone(), Status::Excluded, source)); } - let chain: RequestChain = Chain::new(vec![ + let default_chain: RequestChain = Chain::new(vec![ Box::::default(), Box::new(credentials), Box::new(Checker::new( @@ -500,7 +500,7 @@ impl Client { let status = match uri.scheme() { _ if uri.is_file() => self.check_file(uri).await, _ if uri.is_mail() => self.check_mail(uri).await, - _ => self.check_website(uri, chain).await?, + _ => self.check_website(uri, default_chain).await?, }; Ok(Response::new(uri.clone(), status, source)) @@ -533,11 +533,11 @@ impl Client { /// - The request failed. /// - The response status code is not accepted. /// - The URI cannot be converted to HTTPS. - pub async fn check_website(&self, uri: &Uri, chain: RequestChain) -> Result { - match self.check_website_inner(uri, &chain).await { + pub async fn check_website(&self, uri: &Uri, default_chain: RequestChain) -> Result { + match self.check_website_inner(uri, &default_chain).await { Status::Ok(code) if self.require_https && uri.scheme() == "http" => { if self - .check_website_inner(&uri.to_https()?, &chain) + .check_website_inner(&uri.to_https()?, &default_chain) .await .is_success() { @@ -562,7 +562,7 @@ impl Client { /// - The URI is invalid. /// - The request failed. /// - The response status code is not accepted. - pub async fn check_website_inner(&self, uri: &Uri, chain: &RequestChain) -> Status { + pub async fn check_website_inner(&self, uri: &Uri, default_chain: &RequestChain) -> Status { // Workaround for upstream reqwest panic if validate_url(&uri.url) { if matches!(uri.scheme(), "http" | "https") { @@ -587,7 +587,7 @@ impl Client { Err(e) => return e.into(), }; - let status = ClientRequestChain::new(vec![&self.plugin_request_chain, chain]) + let status = ClientRequestChains::new(vec![&self.plugin_request_chain, default_chain]) .traverse(request) .await; diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index a425063..272bd78 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -85,6 +85,8 @@ use openssl_sys as _; // required for vendored-openssl feature #[doc(inline)] pub use crate::{ basic_auth::BasicAuthExtractor, + // Expose the `Chainable` trait to allow defining external handlers (plugins) + chain::{ChainResult, Chainable}, // Constants get exposed so that the CLI can use the same defaults as the library client::{ check, Client, ClientBuilder, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,