diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 059610e..f661e52 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -1,3 +1,48 @@ +//! `lychee` is a fast, asynchronous, resource-friendly link checker. +//! It is able to find broken hyperlinks and mail addresses inside Markdown, +//! HTML, `reStructuredText`, and any other format. +//! +//! The lychee binary is a wrapper around lychee-lib, which provides +//! convenience functions for calling lychee from the command-line. +//! +//! Run it inside a repository with a `README.md`: +//! ``` +//! lychee +//! ``` +//! +//! You can also specify various types of inputs: +//! +//! Check links on a website: +//! +//! ```sh +//! lychee https://endler.dev/ +//! ``` +//! +//! Check links in a remote file: +//! ```sh +//! lychee https://raw.githubusercontent.com/lycheeverse/lychee/master/README.md +//! ``` +//! +//! Check links in local file(s): +//! ```sh +//! lychee README.md +//! lychee test.html info.txt +//! ``` +//! +//! Check links in local files (by shell glob): +//! ```sh +//! lychee ~/projects/*/README.md +//! ``` +//! +//! Check links in local files (lychee supports advanced globbing and `~` expansion): +//! ```sh +//! lychee "~/projects/big_project/**/README.*" +//! ``` +//! +//! Ignore case when globbing and check result for each link: +//! ```sh +//! lychee --glob-ignore-case --verbose "~/projects/**/[r]eadme.*" +//! ``` #![warn(clippy::all, clippy::pedantic)] #![warn( absolute_paths_not_starting_with_crate, @@ -11,6 +56,7 @@ clippy::missing_const_for_fn )] #![deny(anonymous_parameters, macro_use_extern_crate, pointer_structural_match)] +#![deny(missing_docs)] // required for apple silicon use ring as _; diff --git a/lychee-lib/src/client_pool.rs b/lychee-lib/src/client_pool.rs index 438e0ab..92b4d56 100644 --- a/lychee-lib/src/client_pool.rs +++ b/lychee-lib/src/client_pool.rs @@ -5,6 +5,8 @@ use tokio::sync::mpsc; use crate::{client, types}; #[allow(missing_debug_implementations)] +/// Manages a channel for incoming requests +/// and a pool of lychee clients to handle them pub struct ClientPool { tx: mpsc::Sender, rx: mpsc::Receiver, @@ -13,6 +15,7 @@ pub struct ClientPool { impl ClientPool { #[must_use] + /// Creates a new client pool pub fn new( tx: mpsc::Sender, rx: mpsc::Receiver, @@ -23,6 +26,8 @@ impl ClientPool { } #[allow(clippy::missing_panics_doc)] + /// Start listening for incoming requests and send each of them + /// asynchronously to a client from the pool pub async fn listen(&mut self) { while let Some(req) = self.rx.recv().await { let client = self.pool.get().await; diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index fb59be3..3b45d8d 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -19,14 +19,19 @@ use crate::{ }; const STDIN: &str = "-"; -/// Links which need to be validated. #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[non_exhaustive] +/// An exhaustive list of input sources, which lychee accepts pub enum Input { /// URL (of HTTP/HTTPS scheme). RemoteUrl(Box), - /// Unix shell style glob pattern. - FsGlob { pattern: String, ignore_case: bool }, + /// Unix shell-style glob pattern. + FsGlob { + /// The glob pattern matching all input files + pattern: String, + /// Don't be case sensitive when matching files against a glob + ignore_case: bool, + }, /// File path. FsPath(PathBuf), /// Standard Input. @@ -57,14 +62,19 @@ impl Display for Input { } #[derive(Debug)] +/// Encapsulates the content for a given input pub struct InputContent { + /// Input source pub input: Input, + /// File type of given input pub file_type: FileType, + /// Raw UTF-8 string content pub content: String, } impl InputContent { #[must_use] + /// Create an instance of `InputContent` from an input string pub fn from_string(s: &str, file_type: FileType) -> Self { // TODO: consider using Cow (to avoid one .clone() for String types) Self { @@ -77,6 +87,9 @@ impl InputContent { impl Input { #[must_use] + /// Construct a new `Input` source. In case the input is a `glob` pattern, + /// `glob_ignore_case` decides whether matching files against the `glob` is + /// case-insensitive or not pub fn new(value: &str, glob_ignore_case: bool) -> Self { if value == STDIN { Self::Stdin @@ -97,7 +110,14 @@ impl Input { } } - #[allow(clippy::missing_panics_doc, clippy::missing_errors_doc)] + #[allow(clippy::missing_panics_doc)] + /// Retrieve the contents from the input + /// + /// # Errors + /// + /// Returns an error if the contents can not be retrieved + /// because of an underlying I/O error (e.g. an error while making a + /// network request or retrieving the contents from the file system) pub async fn get_contents( &self, file_type_hint: Option, diff --git a/lychee-lib/src/extract.rs b/lychee-lib/src/extract.rs index ff96a56..ae7022e 100644 --- a/lychee-lib/src/extract.rs +++ b/lychee-lib/src/extract.rs @@ -12,9 +12,13 @@ use url::Url; use crate::{collector::InputContent, Request, Uri}; #[derive(Copy, Clone, Debug, PartialEq, Eq)] +/// `FileType` defines which file types lychee can handle pub enum FileType { + /// File in HTML format Html, + /// File in Markdown format Markdown, + /// Generic text file without syntax-specific parsing Plaintext, } diff --git a/lychee-lib/src/filter/excludes.rs b/lychee-lib/src/filter/excludes.rs index 9e5b19c..418f292 100644 --- a/lychee-lib/src/filter/excludes.rs +++ b/lychee-lib/src/filter/excludes.rs @@ -11,12 +11,15 @@ pub struct Excludes { impl Excludes { #[inline] #[must_use] + /// Returns `true` if the given input string matches the regex set + /// and should hence be excluded from checking pub fn is_match(&self, input: &str) -> bool { self.regex.is_match(input) } #[inline] #[must_use] + /// Whether there were no regular expressions defined to be excluded pub fn is_empty(&self) -> bool { self.regex.is_empty() } diff --git a/lychee-lib/src/filter/includes.rs b/lychee-lib/src/filter/includes.rs index d2c2698..ef274b5 100644 --- a/lychee-lib/src/filter/includes.rs +++ b/lychee-lib/src/filter/includes.rs @@ -11,12 +11,15 @@ pub struct Includes { impl Includes { #[inline] #[must_use] + /// Returns `true` if the given input string matches the regex set + /// and should hence be included and checked pub fn is_match(&self, input: &str) -> bool { self.regex.is_match(input) } #[inline] #[must_use] + /// Whether there were no regular expressions defined for inclusion pub fn is_empty(&self) -> bool { self.regex.is_empty() } diff --git a/lychee-lib/src/filter/mod.rs b/lychee-lib/src/filter/mod.rs index 5d5b942..273bbd9 100644 --- a/lychee-lib/src/filter/mod.rs +++ b/lychee-lib/src/filter/mod.rs @@ -13,6 +13,9 @@ static FALSE_POSITIVE_PAT: &[&str] = &[r"http://www.w3.org/1999/xhtml"]; #[inline] #[must_use] +/// The given input is a well-known false-positive, which won't be checked by +/// default. This behavior can be explicitly overwritten by defining an +/// `Include` pattern, which will match on a false positive pub fn is_false_positive(input: &str) -> bool { input == FALSE_POSITIVE_PAT[0] } @@ -22,9 +25,12 @@ pub fn is_false_positive(input: &str) -> bool { #[allow(clippy::struct_excessive_bools)] #[derive(Clone, Debug, Default)] pub struct Filter { + /// URIs explicitly included for checking. This takes precedence over excludes pub includes: Option, + /// URIs excluded from checking pub excludes: Option, - // TODO: accept multiple scheme + /// Only check URIs with the given scheme (e.g. `https`) + // TODO: accept multiple schemes // TODO: includes scheme and excludes scheme // TODO: excludes_mail should be merged to excludes scheme // allowed scheme @@ -43,11 +49,13 @@ pub struct Filter { impl Filter { #[inline] #[must_use] + /// Whether e-mails aren't checked pub fn is_mail_excluded(&self, uri: &Uri) -> bool { uri.is_mail() && self.exclude_mail } #[must_use] + /// Whether IP addresses are excluded from checking pub fn is_ip_excluded(&self, uri: &Uri) -> bool { match uri.host_ip() { Some(ip_addr) if self.exclude_loopback_ips && ip_addr.is_loopback() => true, @@ -66,6 +74,7 @@ impl Filter { #[inline] #[must_use] + /// Whether the scheme of the given URI is excluded pub fn is_scheme_excluded(&self, uri: &Uri) -> bool { matches!(self.scheme, Some(ref scheme) if scheme != uri.scheme()) } diff --git a/lychee-lib/src/lib.rs b/lychee-lib/src/lib.rs index daa6043..0c426e6 100644 --- a/lychee-lib/src/lib.rs +++ b/lychee-lib/src/lib.rs @@ -40,7 +40,7 @@ clippy::missing_const_for_fn )] #![deny(anonymous_parameters, macro_use_extern_crate, pointer_structural_match)] -// #![deny(missing_docs)] +#![deny(missing_docs)] #[cfg(doctest)] doc_comment::doctest!("../../README.md"); @@ -51,9 +51,17 @@ mod quirks; mod types; mod uri; +/// A pool of clients, to handle concurrent checks pub mod collector; + +/// Functionality to extract URIs from inputs pub mod extract; + +/// Filters are a way to define behavior when encountering +/// URIs that need to be treated differently, such as +/// local IPs or e-mail addresses pub mod filter; + #[cfg(test)] #[macro_use] pub mod test_utils; diff --git a/lychee-lib/src/test_utils.rs b/lychee-lib/src/test_utils.rs index 2f2f78c..3bf76bc 100644 --- a/lychee-lib/src/test_utils.rs +++ b/lychee-lib/src/test_utils.rs @@ -5,6 +5,8 @@ use reqwest::Url; use crate::{ClientBuilder, ErrorKind, Request, Uri}; #[macro_export] +/// Creates a mock web server, which responds with a predefined status when +/// handling a matching request macro_rules! mock_server { ($status:expr $(, $func:tt ($($arg:expr),*))*) => {{ let mock_server = wiremock::MockServer::start().await; diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index bd97614..575a2fc 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -10,14 +10,24 @@ use crate::Uri; #[derive(Debug)] #[non_exhaustive] pub enum ErrorKind { + /// Any form of I/O error occurred while reading from a given path // TODO: maybe need to be splitted; currently first slot is Some only for reading files IoError(Option, std::io::Error), + /// Network error when trying to connect to an endpoint via reqwest ReqwestError(reqwest::Error), + /// Network error when trying to connect to an endpoint via hubcaps HubcapsError(hubcaps::Error), + /// The given string can not be parsed into a valid URL or e-mail address UrlParseError(String, (url::ParseError, Option)), + /// The given mail address is unreachable UnreachableEmailAddress(Uri), + /// The given header could not be parsed. + /// A possible error when converting a `HeaderValue` from a string or byte + /// slice. InvalidHeader(InvalidHeaderValue), + /// The given UNIX glob pattern is invalid InvalidGlobPattern(glob::PatternError), + /// The Github API could not be called because of a missing Github token MissingGitHubToken, } diff --git a/lychee-lib/src/types/mod.rs b/lychee-lib/src/types/mod.rs index db0a166..a48f7a9 100644 --- a/lychee-lib/src/types/mod.rs +++ b/lychee-lib/src/types/mod.rs @@ -10,4 +10,5 @@ pub use request::Request; pub use response::{Response, ResponseBody}; pub use status::Status; +/// The lychee `Result` type pub type Result = std::result::Result; diff --git a/lychee-lib/src/types/request.rs b/lychee-lib/src/types/request.rs index 7b9342f..60fd19f 100644 --- a/lychee-lib/src/types/request.rs +++ b/lychee-lib/src/types/request.rs @@ -2,13 +2,18 @@ use std::{convert::TryFrom, fmt::Display}; use crate::{ErrorKind, Input, Uri}; +/// A request type that can be handle by lychee #[derive(Debug, PartialEq, Eq, Hash, Clone)] pub struct Request { + /// A valid Uniform Resource Identifier of a given endpoint, which can be + /// checked with lychee pub uri: Uri, + /// The resource which contained the given URI pub source: Input, } impl Request { + /// Instantiate a new `Request` object #[inline] #[must_use] pub const fn new(uri: Uri, source: Input) -> Self { diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index 8f3b64d..ab060c8 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -4,18 +4,21 @@ use serde::Serialize; use crate::{Input, Status, Uri}; +/// Response type returned by lychee after checking a URI #[derive(Debug)] pub struct Response(pub Input, pub ResponseBody); impl Response { #[inline] #[must_use] + /// Create new response pub const fn new(uri: Uri, status: Status, source: Input) -> Self { Response(source, ResponseBody { uri, status }) } #[inline] #[must_use] + /// Retrieve the underlying status of the response pub const fn status(&self) -> &Status { &self.1.status } @@ -38,9 +41,12 @@ impl Serialize for Response { #[allow(clippy::module_name_repetitions)] #[derive(Debug, Serialize, Hash, PartialEq, Eq)] +/// Encapsulates the state of a URI check pub struct ResponseBody { #[serde(flatten)] + /// The URI which was checked pub uri: Uri, + /// The status of the check pub status: Status, } diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index 40d5699..b3234ac 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -53,6 +53,7 @@ impl Serialize for Status { impl Status { #[allow(clippy::missing_panics_doc)] #[must_use] + /// Create a status object from a response and the set of accepted status codes pub fn new(response: &Response, accepted: Option>) -> Self { let code = response.status(); @@ -70,29 +71,34 @@ impl Status { #[inline] #[must_use] + /// Returns `true` if the check was successful pub const fn is_success(&self) -> bool { matches!(self, Status::Ok(_)) } #[inline] #[must_use] + /// Returns `true` if the check was not successful pub const fn is_failure(&self) -> bool { matches!(self, Status::Error(_)) } #[inline] #[must_use] + /// Returns `true` if the check was excluded pub const fn is_excluded(&self) -> bool { matches!(self, Status::Excluded) } #[inline] #[must_use] + /// Returns `true` if a check took too long to complete pub const fn is_timeout(&self) -> bool { matches!(self, Status::Timeout(_)) } #[must_use] + /// Return a unicode icon to visualize the status pub const fn icon(&self) -> &str { match self { Status::Ok(_) => ICON_OK, diff --git a/lychee-lib/src/uri.rs b/lychee-lib/src/uri.rs index fa7d082..a25aad3 100644 --- a/lychee-lib/src/uri.rs +++ b/lychee-lib/src/uri.rs @@ -29,23 +29,32 @@ impl Uri { #[inline] #[must_use] + /// Returns the scheme of the URI (e.g. `http` or `mailto`) pub fn scheme(&self) -> &str { self.url.scheme() } #[inline] #[must_use] + /// Returns the domain of the URI (e.g. `example.org`) pub fn domain(&self) -> Option<&str> { self.url.domain() } #[inline] #[must_use] + /// Unless this URL is cannot-be-a-base, + /// return an iterator of '/' slash-separated path segments, + /// each as a percent-encoded ASCII string. + /// + /// Return `None` for cannot-be-a-base URLs. pub fn path_segments(&self) -> Option> { self.url.path_segments() } #[must_use] + /// Returns the IP address (either IPv4 or IPv6) of the URI, + /// or `None` if it is a domain pub fn host_ip(&self) -> Option { match self.url.host()? { url::Host::Domain(_) => None,