mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-17 05:00:26 +00:00
Respect timeout when retrieving archived link (#1526)
This commit is contained in:
parent
3d414c2bc0
commit
17f62aef53
3 changed files with 22 additions and 9 deletions
|
|
@ -1,6 +1,6 @@
|
|||
use reqwest::{Error, Url};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Display;
|
||||
use std::{fmt::Display, time::Duration};
|
||||
use strum::{Display, EnumIter, EnumString, VariantNames};
|
||||
|
||||
use crate::color::{color, GREEN, PINK};
|
||||
|
|
@ -32,11 +32,15 @@ pub(crate) enum Archive {
|
|||
}
|
||||
|
||||
impl Archive {
|
||||
pub(crate) async fn get_link(&self, original: &Url) -> Result<Option<Url>, Error> {
|
||||
pub(crate) async fn get_link(
|
||||
&self,
|
||||
original: &Url,
|
||||
timeout: Duration,
|
||||
) -> Result<Option<Url>, Error> {
|
||||
let function = match self {
|
||||
Archive::WaybackMachine => wayback::get_wayback_link,
|
||||
};
|
||||
|
||||
function(original).await
|
||||
function(original, timeout).await
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,17 +1,23 @@
|
|||
use std::time::Duration;
|
||||
|
||||
use once_cell::sync::Lazy;
|
||||
use serde::de::Error as SerdeError;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
||||
use http::StatusCode;
|
||||
use reqwest::{Error, Url};
|
||||
use reqwest::{Client, Error, Url};
|
||||
static WAYBACK_URL: Lazy<Url> =
|
||||
Lazy::new(|| Url::parse("https://archive.org/wayback/available").unwrap());
|
||||
|
||||
pub(crate) async fn get_wayback_link(url: &Url) -> Result<Option<Url>, Error> {
|
||||
pub(crate) async fn get_wayback_link(url: &Url, timeout: Duration) -> Result<Option<Url>, Error> {
|
||||
let mut archive_url: Url = WAYBACK_URL.clone();
|
||||
archive_url.set_query(Some(&format!("url={url}")));
|
||||
|
||||
let response = reqwest::get(archive_url)
|
||||
let response = Client::builder()
|
||||
.timeout(timeout)
|
||||
.build()?
|
||||
.get(archive_url)
|
||||
.send()
|
||||
.await?
|
||||
.json::<InternetArchiveResponse>()
|
||||
.await?;
|
||||
|
|
@ -74,7 +80,7 @@ mod tests {
|
|||
// This test can be flaky, because the wayback machine does not always
|
||||
// return a suggestion. Retry a few times if needed.
|
||||
for _ in 0..3 {
|
||||
match get_wayback_link(&target_url).await {
|
||||
match get_wayback_link(&target_url, Duration::from_secs(20)).await {
|
||||
Ok(Some(suggested_url)) => {
|
||||
// Ensure the host is correct
|
||||
let host = suggested_url
|
||||
|
|
@ -124,7 +130,7 @@ mod tests {
|
|||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
let response = get_wayback_link(url).await?;
|
||||
let response = get_wayback_link(url, Duration::from_secs(20)).await?;
|
||||
assert_eq!(response, None);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ use lychee_lib::{ResponseBody, Status};
|
|||
use crate::archive::{Archive, Suggestion};
|
||||
use crate::formatters::get_response_formatter;
|
||||
use crate::formatters::response::ResponseFormatter;
|
||||
use crate::parse::parse_duration_secs;
|
||||
use crate::verbosity::Verbosity;
|
||||
use crate::{cache::Cache, stats::ResponseStats, ExitCode};
|
||||
|
||||
|
|
@ -95,6 +96,7 @@ where
|
|||
&mut stats,
|
||||
!params.cfg.no_progress,
|
||||
max_concurrency,
|
||||
parse_duration_secs(params.cfg.timeout),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
|
@ -112,6 +114,7 @@ async fn suggest_archived_links(
|
|||
stats: &mut ResponseStats,
|
||||
show_progress: bool,
|
||||
max_concurrency: usize,
|
||||
timeout: Duration,
|
||||
) {
|
||||
let failed_urls = &get_failed_urls(stats);
|
||||
let bar = if show_progress {
|
||||
|
|
@ -125,7 +128,7 @@ async fn suggest_archived_links(
|
|||
let suggestions = Mutex::new(&mut stats.suggestion_map);
|
||||
|
||||
futures::stream::iter(failed_urls)
|
||||
.map(|(input, url)| (input, url, archive.get_link(url)))
|
||||
.map(|(input, url)| (input, url, archive.get_link(url, timeout)))
|
||||
.for_each_concurrent(max_concurrency, |(input, url, future)| async {
|
||||
if let Ok(Some(suggestion)) = future.await {
|
||||
suggestions
|
||||
|
|
|
|||
Loading…
Reference in a new issue