mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-17 05:00:26 +00:00
Gracefully handle invalid URIs (#1414)
With the upgrade to `reqwest` 0.12, we can finally handle a long-standing issue, when Urls could not be parsed to Uris. Previously, we would panic, but we can now handle that situation gracefully and return an error instead. I've also renamed `Status::is_failure` to `Status::is_error`, because the notion of failures no longer exists in the codebase and we use the term "error" consistently throughout the codebase instead. This is technically a breaking change in the API, but it's fine since we have not released a stable version yet. More information about the URI parsing issue: - https://github.com/lycheeverse/lychee/issues/539 - https://github.com/seanmonstar/reqwest/issues/668
This commit is contained in:
parent
8451f8846b
commit
fc85695d21
4 changed files with 48 additions and 21 deletions
|
|
@ -10,7 +10,7 @@ use reqwest::Url;
|
|||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
|
||||
use lychee_lib::{Client, Request, Response};
|
||||
use lychee_lib::{Client, ErrorKind, Request, Response};
|
||||
use lychee_lib::{InputSource, Result};
|
||||
use lychee_lib::{ResponseBody, Status};
|
||||
|
||||
|
|
@ -225,6 +225,25 @@ async fn request_channel_task(
|
|||
.await;
|
||||
}
|
||||
|
||||
/// Check a URL and return a response.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// This can fail when the URL could not be parsed to a URI.
|
||||
async fn check_url(client: &Client, request: Request) -> Response {
|
||||
// Request was not cached; run a normal check
|
||||
let uri = request.uri.clone();
|
||||
let source = request.source.clone();
|
||||
client.check(request).await.unwrap_or_else(|e| {
|
||||
log::error!("Error checking URL {}: Cannot parse URL to URI: {}", uri, e);
|
||||
Response::new(
|
||||
uri.clone(),
|
||||
Status::Error(ErrorKind::InvalidURI(uri.clone())),
|
||||
source,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Handle a single request
|
||||
async fn handle(
|
||||
client: &Client,
|
||||
|
|
@ -250,12 +269,7 @@ async fn handle(
|
|||
}
|
||||
|
||||
// Request was not cached; run a normal check
|
||||
//
|
||||
// This can panic when the Url could not be parsed to a Uri.
|
||||
// See https://github.com/servo/rust-url/issues/554
|
||||
// See https://github.com/seanmonstar/reqwest/issues/668
|
||||
// TODO: Handle error as soon as https://github.com/seanmonstar/reqwest/pull/1399 got merged
|
||||
let response = client.check(request).await.expect("cannot check URI");
|
||||
let response = check_url(client, request).await;
|
||||
|
||||
// - Never cache filesystem access as it is fast already so caching has no
|
||||
// benefit.
|
||||
|
|
@ -318,7 +332,7 @@ fn get_failed_urls(stats: &mut ResponseStats) -> Vec<(InputSource, Url)> {
|
|||
mod tests {
|
||||
use log::info;
|
||||
|
||||
use lychee_lib::{CacheStatus, InputSource, ResponseBody, Uri};
|
||||
use lychee_lib::{CacheStatus, ClientBuilder, InputSource, ResponseBody, Uri};
|
||||
|
||||
use crate::formatters;
|
||||
|
||||
|
|
@ -367,4 +381,17 @@ mod tests {
|
|||
let buf = String::from_utf8_lossy(&buf);
|
||||
assert_eq!(buf, "↻ [200] http://127.0.0.1/ | Cached: OK (cached)\n");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_invalid_url() {
|
||||
// Run a normal request with an invalid Url
|
||||
let client = ClientBuilder::builder().build().client().unwrap();
|
||||
let request = Request::try_from("http://\"").unwrap();
|
||||
let response = check_url(&client, request).await;
|
||||
assert!(response.status().is_error());
|
||||
assert!(matches!(
|
||||
response.status(),
|
||||
Status::Error(ErrorKind::InvalidURI(_))
|
||||
));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ impl ResponseStats {
|
|||
self.increment_status_counters(status);
|
||||
|
||||
match status {
|
||||
_ if status.is_failure() => {
|
||||
_ if status.is_error() => {
|
||||
let fail = self.fail_map.entry(source).or_default();
|
||||
fail.insert(response.1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -761,13 +761,13 @@ mod tests {
|
|||
let mock_server = mock_server!(StatusCode::NOT_FOUND);
|
||||
let res = get_mock_client_response(mock_server.uri()).await;
|
||||
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_nonexistent_with_path() {
|
||||
let res = get_mock_client_response("http://127.0.0.1/invalid").await;
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -779,7 +779,7 @@ mod tests {
|
|||
#[tokio::test]
|
||||
async fn test_github_nonexistent_repo() {
|
||||
let res = get_mock_client_response("https://github.com/lycheeverse/not-lychee").await;
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -788,7 +788,7 @@ mod tests {
|
|||
"https://github.com/lycheeverse/lychee/blob/master/NON_EXISTENT_FILE.md",
|
||||
)
|
||||
.await;
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -798,7 +798,7 @@ mod tests {
|
|||
assert!(res.status().is_success());
|
||||
|
||||
let res = get_mock_client_response("https://www.youtube.com/watch?v=invalidNlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7").await;
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -831,7 +831,7 @@ mod tests {
|
|||
async fn test_invalid_ssl() {
|
||||
let res = get_mock_client_response("https://expired.badssl.com/").await;
|
||||
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
|
||||
// Same, but ignore certificate error
|
||||
let res = ClientBuilder::builder()
|
||||
|
|
@ -920,7 +920,7 @@ mod tests {
|
|||
.client()
|
||||
.unwrap();
|
||||
let res = client.check("http://example.com").await.unwrap();
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -984,7 +984,7 @@ mod tests {
|
|||
let res = client.check(mock_server.uri()).await.unwrap();
|
||||
let end = start.elapsed();
|
||||
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
|
||||
// on slow connections, this might take a bit longer than nominal
|
||||
// backed-off timeout (7 secs)
|
||||
|
|
@ -996,7 +996,7 @@ mod tests {
|
|||
let client = ClientBuilder::builder().build().client().unwrap();
|
||||
// This request will fail, but it won't panic
|
||||
let res = client.check("http://\"").await.unwrap();
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -1029,7 +1029,7 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
let res = client.check(redirect_uri.clone()).await.unwrap();
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
|
||||
let client = ClientBuilder::builder()
|
||||
.max_redirects(1_usize)
|
||||
|
|
@ -1060,7 +1060,7 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
let res = client.check(mock_server.uri()).await.unwrap();
|
||||
assert!(res.status().is_failure());
|
||||
assert!(res.status().is_error());
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ impl Status {
|
|||
#[inline]
|
||||
#[must_use]
|
||||
/// Returns `true` if the check was not successful
|
||||
pub const fn is_failure(&self) -> bool {
|
||||
pub const fn is_error(&self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
Status::Error(_) | Status::Cached(CacheStatus::Error(_)) | Status::Timeout(_)
|
||||
|
|
|
|||
Loading…
Reference in a new issue