mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-27 09:50:24 +00:00
Extract more status information from reqwest (#577)
Recently we cleaned up the commandline output to trim away redundant information like the URL, which occured twice. Unfortunately we also removed helpful information from reqwest, which could support the user in troubleshooting unexpected errors. This commit reverts that. We now extract the meaningful information from reqwest, without being too verbose. For that we have to depend on the string output for the reqwest error, but it's better than hiding that information from the user. It is fragile as it depends on the reqwest internals, but in the worst case we simply return the full error text in case our parsing won't work.
This commit is contained in:
parent
bb6c1377c8
commit
03d28820bb
6 changed files with 87 additions and 29 deletions
1
fixtures/bench/.gitignore
vendored
Normal file
1
fixtures/bench/.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
million.txt
|
||||
|
|
@ -1,3 +1,4 @@
|
|||
pub(crate) mod path;
|
||||
pub(crate) mod request;
|
||||
pub(crate) mod reqwest;
|
||||
pub(crate) mod url;
|
||||
|
|
|
|||
39
lychee-lib/src/helpers/reqwest.rs
Normal file
39
lychee-lib/src/helpers/reqwest.rs
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
/// Extract the most relevant parts from a reqwest error
|
||||
///
|
||||
/// The reqwest `Error` fields aren't public as they are an implementation
|
||||
/// detail. Instead, a human-readable error message can be obtained. However,
|
||||
/// the error message is quite verbose and the information is redundant. For
|
||||
/// example it contains the `URL`, which is already part of our `ResponseBody`.
|
||||
/// Therefore we try to trim away the redundant parts so that the `ResponseBody`
|
||||
/// output is cleaner.
|
||||
pub(crate) fn trim_error_output(e: &reqwest::Error) -> String {
|
||||
// Defer to separate function for easier testability.
|
||||
// Otherwise a `reqwest::Error` object would have to be created.
|
||||
trim_inner(e.to_string())
|
||||
}
|
||||
|
||||
/// Get meaningful information from a reqwest error string.
|
||||
///
|
||||
/// At the moment we only extract everything after "error trying to connect",
|
||||
/// which is the most common error string in our tests.
|
||||
fn trim_inner(text: String) -> String {
|
||||
if let Some((_before, after)) = text.split_once("error trying to connect:") {
|
||||
return after.trim().to_string();
|
||||
}
|
||||
text
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_extract_reqwest_error() {
|
||||
let reqwest_error = "error sending request for url (https://example.com): error trying to connect: The certificate was not trusted.".to_string();
|
||||
|
||||
assert_eq!(
|
||||
trim_inner(reqwest_error),
|
||||
"The certificate was not trusted."
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
use serde::{Serialize, Serializer};
|
||||
use std::any::Any;
|
||||
use std::error::Error;
|
||||
use std::hash::Hash;
|
||||
use std::{convert::Infallible, path::PathBuf};
|
||||
use thiserror::Error;
|
||||
use tokio::task::JoinError;
|
||||
|
||||
use super::InputContent;
|
||||
use crate::Uri;
|
||||
use crate::{helpers, Uri};
|
||||
|
||||
/// Kinds of status errors
|
||||
/// Note: The error messages can change over time, so don't match on the output
|
||||
|
|
@ -87,6 +87,39 @@ pub enum ErrorKind {
|
|||
/// Cannot parse the given URI
|
||||
#[error("The given URI is invalid: {0}")]
|
||||
InvalidURI(Uri),
|
||||
/// Regex error
|
||||
#[error("Error when using regex engine: {0}")]
|
||||
Regex(#[from] regex::Error),
|
||||
}
|
||||
|
||||
impl ErrorKind {
|
||||
/// Return more details from the given [`ErrorKind`]
|
||||
///
|
||||
/// What additional information we can extract depends on the underlying
|
||||
/// request type. The output is purely meant for humans (e.g. for status
|
||||
/// messages) and future changes are expected.
|
||||
#[must_use]
|
||||
pub fn details(&self) -> Option<String> {
|
||||
match self {
|
||||
ErrorKind::NetworkRequest(e) => {
|
||||
if let Some(status) = e.status() {
|
||||
Some(
|
||||
status
|
||||
.canonical_reason()
|
||||
.unwrap_or("Unknown status code")
|
||||
.to_string(),
|
||||
)
|
||||
} else {
|
||||
Some(helpers::reqwest::trim_error_output(e))
|
||||
}
|
||||
}
|
||||
ErrorKind::GithubRequest(e) => match e {
|
||||
octocrab::Error::GitHub { source, .. } => Some(source.message.to_string()),
|
||||
_ => None,
|
||||
},
|
||||
_ => self.source().map(ToString::to_string),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::match_same_arms)]
|
||||
|
|
@ -140,11 +173,11 @@ impl Hash for ErrorKind {
|
|||
Self::ReadResponseBody(e) => e.to_string().hash(state),
|
||||
Self::BuildRequestClient(e) => e.to_string().hash(state),
|
||||
Self::BuildGithubClient(e) => e.to_string().hash(state),
|
||||
Self::GithubRequest(e) => e.type_id().hash(state),
|
||||
Self::GithubRequest(e) => e.to_string().hash(state),
|
||||
Self::InvalidGithubUrl(s) => s.hash(state),
|
||||
Self::DirTraversal(e) => e.to_string().hash(state),
|
||||
Self::FileNotFound(e) => e.to_string_lossy().hash(state),
|
||||
Self::ParseUrl(e, s) => (e.type_id(), s).hash(state),
|
||||
Self::ParseUrl(e, s) => (e.to_string(), s).hash(state),
|
||||
Self::InvalidURI(u) => u.hash(state),
|
||||
Self::InvalidUrlFromPath(p) => p.hash(state),
|
||||
Self::Utf8(e) => e.to_string().hash(state),
|
||||
|
|
@ -158,6 +191,7 @@ impl Hash for ErrorKind {
|
|||
Self::MissingGitHubToken | Self::InvalidUrlHost => {
|
||||
std::mem::discriminant(self).hash(state);
|
||||
}
|
||||
Self::Regex(e) => e.to_string().hash(state),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use std::{error::Error, fmt::Display};
|
||||
use std::fmt::Display;
|
||||
|
||||
use http::StatusCode;
|
||||
use serde::Serialize;
|
||||
|
||||
use crate::{ErrorKind, InputSource, Status, Uri};
|
||||
use crate::{InputSource, Status, Uri};
|
||||
|
||||
/// Response type returned by lychee after checking a URI
|
||||
#[derive(Debug)]
|
||||
|
|
@ -88,27 +88,10 @@ impl Display for ResponseBody {
|
|||
Status::Unsupported(e) => write!(f, "Unsupported {e}"),
|
||||
Status::Cached(status) => write!(f, "{status}"),
|
||||
Status::Error(e) => {
|
||||
let details = match e {
|
||||
ErrorKind::NetworkRequest(e) => {
|
||||
if let Some(status) = e.status() {
|
||||
status
|
||||
.canonical_reason()
|
||||
.unwrap_or("Unknown status code")
|
||||
.to_string()
|
||||
} else {
|
||||
"No status code".to_string()
|
||||
}
|
||||
}
|
||||
ErrorKind::GithubRequest(e) => match e {
|
||||
octocrab::Error::GitHub { source, .. } => source.message.to_string(),
|
||||
_ => "".to_string(),
|
||||
},
|
||||
_ => e.source().map_or("".to_string(), ToString::to_string),
|
||||
};
|
||||
if details.is_empty() {
|
||||
write!(f, "{e}")
|
||||
} else {
|
||||
if let Some(details) = e.details() {
|
||||
write!(f, "{e}: {details}")
|
||||
} else {
|
||||
write!(f, "{e}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -156,9 +156,9 @@ impl Status {
|
|||
| ErrorKind::ReadResponseBody(e)
|
||||
| ErrorKind::BuildRequestClient(e) => match e.status() {
|
||||
Some(code) => code.as_str().to_string(),
|
||||
None => "ERROR".to_string(),
|
||||
None => "ERR".to_string(),
|
||||
},
|
||||
_ => "ERROR".to_string(),
|
||||
_ => "ERR".to_string(),
|
||||
},
|
||||
Status::Timeout(code) => match code {
|
||||
Some(code) => code.as_str().to_string(),
|
||||
|
|
@ -169,7 +169,7 @@ impl Status {
|
|||
CacheStatus::Ok(code) => code.to_string(),
|
||||
CacheStatus::Error(code) => match code {
|
||||
Some(code) => code.to_string(),
|
||||
None => "ERROR".to_string(),
|
||||
None => "ERR".to_string(),
|
||||
},
|
||||
CacheStatus::Excluded => "EXCLUDED".to_string(),
|
||||
CacheStatus::Unsupported => "IGNORED".to_string(),
|
||||
|
|
|
|||
Loading…
Reference in a new issue