Respect timeout when retrieving archived link (#1526)

This commit is contained in:
Thomas Zahner 2024-10-12 21:49:50 +02:00 committed by GitHub
parent 3d414c2bc0
commit 17f62aef53
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 22 additions and 9 deletions

View file

@ -1,6 +1,6 @@
use reqwest::{Error, Url};
use serde::{Deserialize, Serialize};
use std::fmt::Display;
use std::{fmt::Display, time::Duration};
use strum::{Display, EnumIter, EnumString, VariantNames};
use crate::color::{color, GREEN, PINK};
@ -32,11 +32,15 @@ pub(crate) enum Archive {
}
impl Archive {
pub(crate) async fn get_link(&self, original: &Url) -> Result<Option<Url>, Error> {
pub(crate) async fn get_link(
&self,
original: &Url,
timeout: Duration,
) -> Result<Option<Url>, Error> {
let function = match self {
Archive::WaybackMachine => wayback::get_wayback_link,
};
function(original).await
function(original, timeout).await
}
}

View file

@ -1,17 +1,23 @@
use std::time::Duration;
use once_cell::sync::Lazy;
use serde::de::Error as SerdeError;
use serde::{Deserialize, Deserializer};
use http::StatusCode;
use reqwest::{Error, Url};
use reqwest::{Client, Error, Url};
static WAYBACK_URL: Lazy<Url> =
Lazy::new(|| Url::parse("https://archive.org/wayback/available").unwrap());
pub(crate) async fn get_wayback_link(url: &Url) -> Result<Option<Url>, Error> {
pub(crate) async fn get_wayback_link(url: &Url, timeout: Duration) -> Result<Option<Url>, Error> {
let mut archive_url: Url = WAYBACK_URL.clone();
archive_url.set_query(Some(&format!("url={url}")));
let response = reqwest::get(archive_url)
let response = Client::builder()
.timeout(timeout)
.build()?
.get(archive_url)
.send()
.await?
.json::<InternetArchiveResponse>()
.await?;
@ -74,7 +80,7 @@ mod tests {
// This test can be flaky, because the wayback machine does not always
// return a suggestion. Retry a few times if needed.
for _ in 0..3 {
match get_wayback_link(&target_url).await {
match get_wayback_link(&target_url, Duration::from_secs(20)).await {
Ok(Some(suggested_url)) => {
// Ensure the host is correct
let host = suggested_url
@ -124,7 +130,7 @@ mod tests {
.try_into()
.unwrap();
let response = get_wayback_link(url).await?;
let response = get_wayback_link(url, Duration::from_secs(20)).await?;
assert_eq!(response, None);
Ok(())
}

View file

@ -17,6 +17,7 @@ use lychee_lib::{ResponseBody, Status};
use crate::archive::{Archive, Suggestion};
use crate::formatters::get_response_formatter;
use crate::formatters::response::ResponseFormatter;
use crate::parse::parse_duration_secs;
use crate::verbosity::Verbosity;
use crate::{cache::Cache, stats::ResponseStats, ExitCode};
@ -95,6 +96,7 @@ where
&mut stats,
!params.cfg.no_progress,
max_concurrency,
parse_duration_secs(params.cfg.timeout),
)
.await;
}
@ -112,6 +114,7 @@ async fn suggest_archived_links(
stats: &mut ResponseStats,
show_progress: bool,
max_concurrency: usize,
timeout: Duration,
) {
let failed_urls = &get_failed_urls(stats);
let bar = if show_progress {
@ -125,7 +128,7 @@ async fn suggest_archived_links(
let suggestions = Mutex::new(&mut stats.suggestion_map);
futures::stream::iter(failed_urls)
.map(|(input, url)| (input, url, archive.get_link(url)))
.map(|(input, url)| (input, url, archive.get_link(url, timeout)))
.for_each_concurrent(max_concurrency, |(input, url, future)| async {
if let Ok(Some(suggestion)) = future.await {
suggestions