diff --git a/fixtures/bench/.gitignore b/fixtures/bench/.gitignore new file mode 100644 index 0000000..49c5681 --- /dev/null +++ b/fixtures/bench/.gitignore @@ -0,0 +1 @@ +million.txt diff --git a/lychee-lib/src/helpers/mod.rs b/lychee-lib/src/helpers/mod.rs index 6602b70..fe6aec3 100644 --- a/lychee-lib/src/helpers/mod.rs +++ b/lychee-lib/src/helpers/mod.rs @@ -1,3 +1,4 @@ pub(crate) mod path; pub(crate) mod request; +pub(crate) mod reqwest; pub(crate) mod url; diff --git a/lychee-lib/src/helpers/reqwest.rs b/lychee-lib/src/helpers/reqwest.rs new file mode 100644 index 0000000..98e01f3 --- /dev/null +++ b/lychee-lib/src/helpers/reqwest.rs @@ -0,0 +1,39 @@ +/// Extract the most relevant parts from a reqwest error +/// +/// The reqwest `Error` fields aren't public as they are an implementation +/// detail. Instead, a human-readable error message can be obtained. However, +/// the error message is quite verbose and the information is redundant. For +/// example it contains the `URL`, which is already part of our `ResponseBody`. +/// Therefore we try to trim away the redundant parts so that the `ResponseBody` +/// output is cleaner. +pub(crate) fn trim_error_output(e: &reqwest::Error) -> String { + // Defer to separate function for easier testability. + // Otherwise a `reqwest::Error` object would have to be created. + trim_inner(e.to_string()) +} + +/// Get meaningful information from a reqwest error string. +/// +/// At the moment we only extract everything after "error trying to connect", +/// which is the most common error string in our tests. +fn trim_inner(text: String) -> String { + if let Some((_before, after)) = text.split_once("error trying to connect:") { + return after.trim().to_string(); + } + text +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_reqwest_error() { + let reqwest_error = "error sending request for url (https://example.com): error trying to connect: The certificate was not trusted.".to_string(); + + assert_eq!( + trim_inner(reqwest_error), + "The certificate was not trusted." + ); + } +} diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index a91dc6b..1f98f2c 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -1,12 +1,12 @@ use serde::{Serialize, Serializer}; -use std::any::Any; +use std::error::Error; use std::hash::Hash; use std::{convert::Infallible, path::PathBuf}; use thiserror::Error; use tokio::task::JoinError; use super::InputContent; -use crate::Uri; +use crate::{helpers, Uri}; /// Kinds of status errors /// Note: The error messages can change over time, so don't match on the output @@ -87,6 +87,39 @@ pub enum ErrorKind { /// Cannot parse the given URI #[error("The given URI is invalid: {0}")] InvalidURI(Uri), + /// Regex error + #[error("Error when using regex engine: {0}")] + Regex(#[from] regex::Error), +} + +impl ErrorKind { + /// Return more details from the given [`ErrorKind`] + /// + /// What additional information we can extract depends on the underlying + /// request type. The output is purely meant for humans (e.g. for status + /// messages) and future changes are expected. + #[must_use] + pub fn details(&self) -> Option { + match self { + ErrorKind::NetworkRequest(e) => { + if let Some(status) = e.status() { + Some( + status + .canonical_reason() + .unwrap_or("Unknown status code") + .to_string(), + ) + } else { + Some(helpers::reqwest::trim_error_output(e)) + } + } + ErrorKind::GithubRequest(e) => match e { + octocrab::Error::GitHub { source, .. } => Some(source.message.to_string()), + _ => None, + }, + _ => self.source().map(ToString::to_string), + } + } } #[allow(clippy::match_same_arms)] @@ -140,11 +173,11 @@ impl Hash for ErrorKind { Self::ReadResponseBody(e) => e.to_string().hash(state), Self::BuildRequestClient(e) => e.to_string().hash(state), Self::BuildGithubClient(e) => e.to_string().hash(state), - Self::GithubRequest(e) => e.type_id().hash(state), + Self::GithubRequest(e) => e.to_string().hash(state), Self::InvalidGithubUrl(s) => s.hash(state), Self::DirTraversal(e) => e.to_string().hash(state), Self::FileNotFound(e) => e.to_string_lossy().hash(state), - Self::ParseUrl(e, s) => (e.type_id(), s).hash(state), + Self::ParseUrl(e, s) => (e.to_string(), s).hash(state), Self::InvalidURI(u) => u.hash(state), Self::InvalidUrlFromPath(p) => p.hash(state), Self::Utf8(e) => e.to_string().hash(state), @@ -158,6 +191,7 @@ impl Hash for ErrorKind { Self::MissingGitHubToken | Self::InvalidUrlHost => { std::mem::discriminant(self).hash(state); } + Self::Regex(e) => e.to_string().hash(state), } } } diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index 4aba751..7b6ef28 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -1,9 +1,9 @@ -use std::{error::Error, fmt::Display}; +use std::fmt::Display; use http::StatusCode; use serde::Serialize; -use crate::{ErrorKind, InputSource, Status, Uri}; +use crate::{InputSource, Status, Uri}; /// Response type returned by lychee after checking a URI #[derive(Debug)] @@ -88,27 +88,10 @@ impl Display for ResponseBody { Status::Unsupported(e) => write!(f, "Unsupported {e}"), Status::Cached(status) => write!(f, "{status}"), Status::Error(e) => { - let details = match e { - ErrorKind::NetworkRequest(e) => { - if let Some(status) = e.status() { - status - .canonical_reason() - .unwrap_or("Unknown status code") - .to_string() - } else { - "No status code".to_string() - } - } - ErrorKind::GithubRequest(e) => match e { - octocrab::Error::GitHub { source, .. } => source.message.to_string(), - _ => "".to_string(), - }, - _ => e.source().map_or("".to_string(), ToString::to_string), - }; - if details.is_empty() { - write!(f, "{e}") - } else { + if let Some(details) = e.details() { write!(f, "{e}: {details}") + } else { + write!(f, "{e}") } } } diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index c09219d..2afcfd0 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -156,9 +156,9 @@ impl Status { | ErrorKind::ReadResponseBody(e) | ErrorKind::BuildRequestClient(e) => match e.status() { Some(code) => code.as_str().to_string(), - None => "ERROR".to_string(), + None => "ERR".to_string(), }, - _ => "ERROR".to_string(), + _ => "ERR".to_string(), }, Status::Timeout(code) => match code { Some(code) => code.as_str().to_string(), @@ -169,7 +169,7 @@ impl Status { CacheStatus::Ok(code) => code.to_string(), CacheStatus::Error(code) => match code { Some(code) => code.to_string(), - None => "ERROR".to_string(), + None => "ERR".to_string(), }, CacheStatus::Excluded => "EXCLUDED".to_string(), CacheStatus::Unsupported => "IGNORED".to_string(),