Extract more status information from reqwest (#577)

Recently we cleaned up the commandline output to trim away redundant
information like the URL, which occured twice.
Unfortunately we also removed helpful information from reqwest, which
could support the user in troubleshooting unexpected errors.

This commit reverts that.
We now extract the meaningful information from reqwest, without being
too verbose. For that we have to depend on the string output for the
reqwest error, but it's better than hiding that information from the user.
It is fragile as it depends on the reqwest internals, but in the worst case
we simply return the full error text in case our parsing won't work.
This commit is contained in:
Matthias 2022-04-02 14:37:03 +02:00 committed by GitHub
parent bb6c1377c8
commit 03d28820bb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 87 additions and 29 deletions

1
fixtures/bench/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
million.txt

View file

@ -1,3 +1,4 @@
pub(crate) mod path;
pub(crate) mod request;
pub(crate) mod reqwest;
pub(crate) mod url;

View file

@ -0,0 +1,39 @@
/// Extract the most relevant parts from a reqwest error
///
/// The reqwest `Error` fields aren't public as they are an implementation
/// detail. Instead, a human-readable error message can be obtained. However,
/// the error message is quite verbose and the information is redundant. For
/// example it contains the `URL`, which is already part of our `ResponseBody`.
/// Therefore we try to trim away the redundant parts so that the `ResponseBody`
/// output is cleaner.
pub(crate) fn trim_error_output(e: &reqwest::Error) -> String {
// Defer to separate function for easier testability.
// Otherwise a `reqwest::Error` object would have to be created.
trim_inner(e.to_string())
}
/// Get meaningful information from a reqwest error string.
///
/// At the moment we only extract everything after "error trying to connect",
/// which is the most common error string in our tests.
fn trim_inner(text: String) -> String {
if let Some((_before, after)) = text.split_once("error trying to connect:") {
return after.trim().to_string();
}
text
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_reqwest_error() {
let reqwest_error = "error sending request for url (https://example.com): error trying to connect: The certificate was not trusted.".to_string();
assert_eq!(
trim_inner(reqwest_error),
"The certificate was not trusted."
);
}
}

View file

@ -1,12 +1,12 @@
use serde::{Serialize, Serializer};
use std::any::Any;
use std::error::Error;
use std::hash::Hash;
use std::{convert::Infallible, path::PathBuf};
use thiserror::Error;
use tokio::task::JoinError;
use super::InputContent;
use crate::Uri;
use crate::{helpers, Uri};
/// Kinds of status errors
/// Note: The error messages can change over time, so don't match on the output
@ -87,6 +87,39 @@ pub enum ErrorKind {
/// Cannot parse the given URI
#[error("The given URI is invalid: {0}")]
InvalidURI(Uri),
/// Regex error
#[error("Error when using regex engine: {0}")]
Regex(#[from] regex::Error),
}
impl ErrorKind {
/// Return more details from the given [`ErrorKind`]
///
/// What additional information we can extract depends on the underlying
/// request type. The output is purely meant for humans (e.g. for status
/// messages) and future changes are expected.
#[must_use]
pub fn details(&self) -> Option<String> {
match self {
ErrorKind::NetworkRequest(e) => {
if let Some(status) = e.status() {
Some(
status
.canonical_reason()
.unwrap_or("Unknown status code")
.to_string(),
)
} else {
Some(helpers::reqwest::trim_error_output(e))
}
}
ErrorKind::GithubRequest(e) => match e {
octocrab::Error::GitHub { source, .. } => Some(source.message.to_string()),
_ => None,
},
_ => self.source().map(ToString::to_string),
}
}
}
#[allow(clippy::match_same_arms)]
@ -140,11 +173,11 @@ impl Hash for ErrorKind {
Self::ReadResponseBody(e) => e.to_string().hash(state),
Self::BuildRequestClient(e) => e.to_string().hash(state),
Self::BuildGithubClient(e) => e.to_string().hash(state),
Self::GithubRequest(e) => e.type_id().hash(state),
Self::GithubRequest(e) => e.to_string().hash(state),
Self::InvalidGithubUrl(s) => s.hash(state),
Self::DirTraversal(e) => e.to_string().hash(state),
Self::FileNotFound(e) => e.to_string_lossy().hash(state),
Self::ParseUrl(e, s) => (e.type_id(), s).hash(state),
Self::ParseUrl(e, s) => (e.to_string(), s).hash(state),
Self::InvalidURI(u) => u.hash(state),
Self::InvalidUrlFromPath(p) => p.hash(state),
Self::Utf8(e) => e.to_string().hash(state),
@ -158,6 +191,7 @@ impl Hash for ErrorKind {
Self::MissingGitHubToken | Self::InvalidUrlHost => {
std::mem::discriminant(self).hash(state);
}
Self::Regex(e) => e.to_string().hash(state),
}
}
}

View file

@ -1,9 +1,9 @@
use std::{error::Error, fmt::Display};
use std::fmt::Display;
use http::StatusCode;
use serde::Serialize;
use crate::{ErrorKind, InputSource, Status, Uri};
use crate::{InputSource, Status, Uri};
/// Response type returned by lychee after checking a URI
#[derive(Debug)]
@ -88,27 +88,10 @@ impl Display for ResponseBody {
Status::Unsupported(e) => write!(f, "Unsupported {e}"),
Status::Cached(status) => write!(f, "{status}"),
Status::Error(e) => {
let details = match e {
ErrorKind::NetworkRequest(e) => {
if let Some(status) = e.status() {
status
.canonical_reason()
.unwrap_or("Unknown status code")
.to_string()
} else {
"No status code".to_string()
}
}
ErrorKind::GithubRequest(e) => match e {
octocrab::Error::GitHub { source, .. } => source.message.to_string(),
_ => "".to_string(),
},
_ => e.source().map_or("".to_string(), ToString::to_string),
};
if details.is_empty() {
write!(f, "{e}")
} else {
if let Some(details) = e.details() {
write!(f, "{e}: {details}")
} else {
write!(f, "{e}")
}
}
}

View file

@ -156,9 +156,9 @@ impl Status {
| ErrorKind::ReadResponseBody(e)
| ErrorKind::BuildRequestClient(e) => match e.status() {
Some(code) => code.as_str().to_string(),
None => "ERROR".to_string(),
None => "ERR".to_string(),
},
_ => "ERROR".to_string(),
_ => "ERR".to_string(),
},
Status::Timeout(code) => match code {
Some(code) => code.as_str().to_string(),
@ -169,7 +169,7 @@ impl Status {
CacheStatus::Ok(code) => code.to_string(),
CacheStatus::Error(code) => match code {
Some(code) => code.to_string(),
None => "ERROR".to_string(),
None => "ERR".to_string(),
},
CacheStatus::Excluded => "EXCLUDED".to_string(),
CacheStatus::Unsupported => "IGNORED".to_string(),