From 03d28820bbe19de642a11ceccebfbb3e6a58c91e Mon Sep 17 00:00:00 2001 From: Matthias Date: Sat, 2 Apr 2022 14:37:03 +0200 Subject: [PATCH] Extract more status information from reqwest (#577) Recently we cleaned up the commandline output to trim away redundant information like the URL, which occured twice. Unfortunately we also removed helpful information from reqwest, which could support the user in troubleshooting unexpected errors. This commit reverts that. We now extract the meaningful information from reqwest, without being too verbose. For that we have to depend on the string output for the reqwest error, but it's better than hiding that information from the user. It is fragile as it depends on the reqwest internals, but in the worst case we simply return the full error text in case our parsing won't work. --- fixtures/bench/.gitignore | 1 + lychee-lib/src/helpers/mod.rs | 1 + lychee-lib/src/helpers/reqwest.rs | 39 ++++++++++++++++++++++++++++ lychee-lib/src/types/error.rs | 42 ++++++++++++++++++++++++++++--- lychee-lib/src/types/response.rs | 27 ++++---------------- lychee-lib/src/types/status.rs | 6 ++--- 6 files changed, 87 insertions(+), 29 deletions(-) create mode 100644 fixtures/bench/.gitignore create mode 100644 lychee-lib/src/helpers/reqwest.rs diff --git a/fixtures/bench/.gitignore b/fixtures/bench/.gitignore new file mode 100644 index 0000000..49c5681 --- /dev/null +++ b/fixtures/bench/.gitignore @@ -0,0 +1 @@ +million.txt diff --git a/lychee-lib/src/helpers/mod.rs b/lychee-lib/src/helpers/mod.rs index 6602b70..fe6aec3 100644 --- a/lychee-lib/src/helpers/mod.rs +++ b/lychee-lib/src/helpers/mod.rs @@ -1,3 +1,4 @@ pub(crate) mod path; pub(crate) mod request; +pub(crate) mod reqwest; pub(crate) mod url; diff --git a/lychee-lib/src/helpers/reqwest.rs b/lychee-lib/src/helpers/reqwest.rs new file mode 100644 index 0000000..98e01f3 --- /dev/null +++ b/lychee-lib/src/helpers/reqwest.rs @@ -0,0 +1,39 @@ +/// Extract the most relevant parts from a reqwest error +/// +/// The reqwest `Error` fields aren't public as they are an implementation +/// detail. Instead, a human-readable error message can be obtained. However, +/// the error message is quite verbose and the information is redundant. For +/// example it contains the `URL`, which is already part of our `ResponseBody`. +/// Therefore we try to trim away the redundant parts so that the `ResponseBody` +/// output is cleaner. +pub(crate) fn trim_error_output(e: &reqwest::Error) -> String { + // Defer to separate function for easier testability. + // Otherwise a `reqwest::Error` object would have to be created. + trim_inner(e.to_string()) +} + +/// Get meaningful information from a reqwest error string. +/// +/// At the moment we only extract everything after "error trying to connect", +/// which is the most common error string in our tests. +fn trim_inner(text: String) -> String { + if let Some((_before, after)) = text.split_once("error trying to connect:") { + return after.trim().to_string(); + } + text +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_extract_reqwest_error() { + let reqwest_error = "error sending request for url (https://example.com): error trying to connect: The certificate was not trusted.".to_string(); + + assert_eq!( + trim_inner(reqwest_error), + "The certificate was not trusted." + ); + } +} diff --git a/lychee-lib/src/types/error.rs b/lychee-lib/src/types/error.rs index a91dc6b..1f98f2c 100644 --- a/lychee-lib/src/types/error.rs +++ b/lychee-lib/src/types/error.rs @@ -1,12 +1,12 @@ use serde::{Serialize, Serializer}; -use std::any::Any; +use std::error::Error; use std::hash::Hash; use std::{convert::Infallible, path::PathBuf}; use thiserror::Error; use tokio::task::JoinError; use super::InputContent; -use crate::Uri; +use crate::{helpers, Uri}; /// Kinds of status errors /// Note: The error messages can change over time, so don't match on the output @@ -87,6 +87,39 @@ pub enum ErrorKind { /// Cannot parse the given URI #[error("The given URI is invalid: {0}")] InvalidURI(Uri), + /// Regex error + #[error("Error when using regex engine: {0}")] + Regex(#[from] regex::Error), +} + +impl ErrorKind { + /// Return more details from the given [`ErrorKind`] + /// + /// What additional information we can extract depends on the underlying + /// request type. The output is purely meant for humans (e.g. for status + /// messages) and future changes are expected. + #[must_use] + pub fn details(&self) -> Option { + match self { + ErrorKind::NetworkRequest(e) => { + if let Some(status) = e.status() { + Some( + status + .canonical_reason() + .unwrap_or("Unknown status code") + .to_string(), + ) + } else { + Some(helpers::reqwest::trim_error_output(e)) + } + } + ErrorKind::GithubRequest(e) => match e { + octocrab::Error::GitHub { source, .. } => Some(source.message.to_string()), + _ => None, + }, + _ => self.source().map(ToString::to_string), + } + } } #[allow(clippy::match_same_arms)] @@ -140,11 +173,11 @@ impl Hash for ErrorKind { Self::ReadResponseBody(e) => e.to_string().hash(state), Self::BuildRequestClient(e) => e.to_string().hash(state), Self::BuildGithubClient(e) => e.to_string().hash(state), - Self::GithubRequest(e) => e.type_id().hash(state), + Self::GithubRequest(e) => e.to_string().hash(state), Self::InvalidGithubUrl(s) => s.hash(state), Self::DirTraversal(e) => e.to_string().hash(state), Self::FileNotFound(e) => e.to_string_lossy().hash(state), - Self::ParseUrl(e, s) => (e.type_id(), s).hash(state), + Self::ParseUrl(e, s) => (e.to_string(), s).hash(state), Self::InvalidURI(u) => u.hash(state), Self::InvalidUrlFromPath(p) => p.hash(state), Self::Utf8(e) => e.to_string().hash(state), @@ -158,6 +191,7 @@ impl Hash for ErrorKind { Self::MissingGitHubToken | Self::InvalidUrlHost => { std::mem::discriminant(self).hash(state); } + Self::Regex(e) => e.to_string().hash(state), } } } diff --git a/lychee-lib/src/types/response.rs b/lychee-lib/src/types/response.rs index 4aba751..7b6ef28 100644 --- a/lychee-lib/src/types/response.rs +++ b/lychee-lib/src/types/response.rs @@ -1,9 +1,9 @@ -use std::{error::Error, fmt::Display}; +use std::fmt::Display; use http::StatusCode; use serde::Serialize; -use crate::{ErrorKind, InputSource, Status, Uri}; +use crate::{InputSource, Status, Uri}; /// Response type returned by lychee after checking a URI #[derive(Debug)] @@ -88,27 +88,10 @@ impl Display for ResponseBody { Status::Unsupported(e) => write!(f, "Unsupported {e}"), Status::Cached(status) => write!(f, "{status}"), Status::Error(e) => { - let details = match e { - ErrorKind::NetworkRequest(e) => { - if let Some(status) = e.status() { - status - .canonical_reason() - .unwrap_or("Unknown status code") - .to_string() - } else { - "No status code".to_string() - } - } - ErrorKind::GithubRequest(e) => match e { - octocrab::Error::GitHub { source, .. } => source.message.to_string(), - _ => "".to_string(), - }, - _ => e.source().map_or("".to_string(), ToString::to_string), - }; - if details.is_empty() { - write!(f, "{e}") - } else { + if let Some(details) = e.details() { write!(f, "{e}: {details}") + } else { + write!(f, "{e}") } } } diff --git a/lychee-lib/src/types/status.rs b/lychee-lib/src/types/status.rs index c09219d..2afcfd0 100644 --- a/lychee-lib/src/types/status.rs +++ b/lychee-lib/src/types/status.rs @@ -156,9 +156,9 @@ impl Status { | ErrorKind::ReadResponseBody(e) | ErrorKind::BuildRequestClient(e) => match e.status() { Some(code) => code.as_str().to_string(), - None => "ERROR".to_string(), + None => "ERR".to_string(), }, - _ => "ERROR".to_string(), + _ => "ERR".to_string(), }, Status::Timeout(code) => match code { Some(code) => code.as_str().to_string(), @@ -169,7 +169,7 @@ impl Status { CacheStatus::Ok(code) => code.to_string(), CacheStatus::Error(code) => match code { Some(code) => code.to_string(), - None => "ERROR".to_string(), + None => "ERR".to_string(), }, CacheStatus::Excluded => "EXCLUDED".to_string(), CacheStatus::Unsupported => "IGNORED".to_string(),