mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-01 20:20:32 +00:00
Make retry wait time configurable (#525)
This commit is contained in:
parent
a5a56006dd
commit
05bd3817ee
6 changed files with 69 additions and 16 deletions
|
|
@ -246,10 +246,13 @@ OPTIONS:
|
|||
--max-retries <max-retries> Maximum number of retries per request [default: 3]
|
||||
-X, --method <method> Request method [default: get]
|
||||
-o, --output <output> Output file of status report
|
||||
-r, --retry-wait-time <retry-wait-time> Minimum wait time in seconds between retries of failed requests [default:
|
||||
1]
|
||||
-s, --scheme <scheme>... Only test links with the given schemes (e.g. http and https)
|
||||
-T, --threads <threads> Number of threads to utilize. Defaults to number of cores available to
|
||||
the system
|
||||
-t, --timeout <timeout> Website timeout from connect to response finished [default: 20]
|
||||
-t, --timeout <timeout> Website timeout in seconds from connect to response finished [default:
|
||||
20]
|
||||
-u, --user-agent <user-agent> User agent [default: lychee/0.8.2]
|
||||
|
||||
ARGS:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::options::Config;
|
||||
use crate::parse::{parse_basic_auth, parse_headers, parse_statuscodes, parse_timeout};
|
||||
use crate::parse::{parse_basic_auth, parse_duration_secs, parse_headers, parse_statuscodes};
|
||||
use anyhow::{Context, Result};
|
||||
use headers::HeaderMapExt;
|
||||
use lychee_lib::{Client, ClientBuilder};
|
||||
|
|
@ -15,7 +15,8 @@ pub(crate) fn create(cfg: &Config) -> Result<Client> {
|
|||
}
|
||||
|
||||
let accepted = cfg.accept.clone().and_then(|a| parse_statuscodes(&a).ok());
|
||||
let timeout = parse_timeout(cfg.timeout);
|
||||
let timeout = parse_duration_secs(cfg.timeout);
|
||||
let retry_wait_time = parse_duration_secs(cfg.retry_wait_time);
|
||||
let method: reqwest::Method = reqwest::Method::from_str(&cfg.method.to_uppercase())?;
|
||||
let include = RegexSet::new(&cfg.include)?;
|
||||
let exclude = RegexSet::new(&cfg.exclude)?;
|
||||
|
|
@ -41,6 +42,7 @@ pub(crate) fn create(cfg: &Config) -> Result<Client> {
|
|||
.custom_headers(headers)
|
||||
.method(method)
|
||||
.timeout(timeout)
|
||||
.retry_wait_time(retry_wait_time)
|
||||
.github_token(cfg.github_token.clone())
|
||||
.schemes(HashSet::from_iter(schemes))
|
||||
.accepted(accepted)
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@ use std::{convert::TryFrom, fs, io::ErrorKind, path::PathBuf, str::FromStr, time
|
|||
use anyhow::{anyhow, Error, Result};
|
||||
use const_format::{concatcp, formatcp};
|
||||
use lychee_lib::{
|
||||
Base, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_TIMEOUT, DEFAULT_USER_AGENT,
|
||||
Base, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS,
|
||||
DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
|
||||
};
|
||||
use secrecy::{ExposeSecret, SecretString};
|
||||
use serde::Deserialize;
|
||||
|
|
@ -33,7 +34,8 @@ expressions supported; one pattern per line. Automatically excludes
|
|||
patterns from `{}` if file exists",
|
||||
LYCHEE_IGNORE_FILE,
|
||||
);
|
||||
const TIMEOUT_STR: &str = concatcp!(DEFAULT_TIMEOUT);
|
||||
const TIMEOUT_STR: &str = concatcp!(DEFAULT_TIMEOUT_SECS);
|
||||
const RETRY_WAIT_TIME_STR: &str = concatcp!(DEFAULT_RETRY_WAIT_TIME_SECS);
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
pub(crate) enum Format {
|
||||
|
|
@ -81,7 +83,8 @@ default_function! {
|
|||
max_concurrency: usize = DEFAULT_MAX_CONCURRENCY;
|
||||
max_cache_age: Duration = humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
|
||||
user_agent: String = DEFAULT_USER_AGENT.to_string();
|
||||
timeout: usize = DEFAULT_TIMEOUT;
|
||||
timeout: usize = DEFAULT_TIMEOUT_SECS;
|
||||
retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS;
|
||||
method: String = DEFAULT_METHOD.to_string();
|
||||
}
|
||||
|
||||
|
|
@ -260,11 +263,16 @@ pub(crate) struct Config {
|
|||
#[serde(default)]
|
||||
pub(crate) accept: Option<String>,
|
||||
|
||||
/// Website timeout from connect to response finished
|
||||
/// Website timeout in seconds from connect to response finished
|
||||
#[structopt(short, long, default_value = &TIMEOUT_STR)]
|
||||
#[serde(default = "timeout")]
|
||||
pub(crate) timeout: usize,
|
||||
|
||||
/// Minimum wait time in seconds between retries of failed requests
|
||||
#[structopt(short, long, default_value = &RETRY_WAIT_TIME_STR)]
|
||||
#[serde(default = "retry_wait_time")]
|
||||
pub(crate) retry_wait_time: usize,
|
||||
|
||||
/// Request method
|
||||
// Using `-X` as a short param similar to curl
|
||||
#[structopt(short = "X", long, default_value = DEFAULT_METHOD)]
|
||||
|
|
@ -361,7 +369,8 @@ impl Config {
|
|||
exclude_mail: false;
|
||||
headers: Vec::<String>::new();
|
||||
accept: None;
|
||||
timeout: DEFAULT_TIMEOUT;
|
||||
timeout: DEFAULT_TIMEOUT_SECS;
|
||||
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
|
||||
method: DEFAULT_METHOD;
|
||||
base: None;
|
||||
basic_auth: None;
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ fn read_header(input: &str) -> Result<(String, String)> {
|
|||
Ok((elements[0].into(), elements[1].into()))
|
||||
}
|
||||
|
||||
pub(crate) const fn parse_timeout(timeout: usize) -> Duration {
|
||||
Duration::from_secs(timeout as u64)
|
||||
pub(crate) const fn parse_duration_secs(secs: usize) -> Duration {
|
||||
Duration::from_secs(secs as u64)
|
||||
}
|
||||
|
||||
pub(crate) fn parse_headers<T: AsRef<str>>(headers: &[T]) -> Result<HeaderMap> {
|
||||
|
|
|
|||
|
|
@ -39,9 +39,9 @@ pub const DEFAULT_MAX_REDIRECTS: usize = 5;
|
|||
/// Default number of retries before a request is deemed as failed, 3.
|
||||
pub const DEFAULT_MAX_RETRIES: u64 = 3;
|
||||
/// Default wait time in seconds between requests, 1.
|
||||
pub const DEFAULT_RETRY_WAIT_TIME: u64 = 1;
|
||||
pub const DEFAULT_RETRY_WAIT_TIME_SECS: usize = 1;
|
||||
/// Default timeout in seconds before a request is deemed as failed, 20.
|
||||
pub const DEFAULT_TIMEOUT: usize = 20;
|
||||
pub const DEFAULT_TIMEOUT_SECS: usize = 20;
|
||||
/// Default user agent, `lychee-<PKG_VERSION>`.
|
||||
pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
|
||||
|
||||
|
|
@ -67,15 +67,18 @@ pub struct ClientBuilder {
|
|||
/// As of Feb 2022, it's 60 per hour without GitHub token v.s.
|
||||
/// 5000 per hour with token.
|
||||
github_token: Option<SecretString>,
|
||||
|
||||
/// Links matching this set of regular expressions are **always** checked.
|
||||
///
|
||||
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
|
||||
/// has lower precedence over any other `exclude_` fields or
|
||||
/// [`ClientBuilder::schemes`] below.
|
||||
includes: Option<RegexSet>,
|
||||
|
||||
/// Links matching this set of regular expressions are ignored, **except**
|
||||
/// when a link also matches against [`ClientBuilder::includes`].
|
||||
excludes: Option<RegexSet>,
|
||||
|
||||
/// When `true`, exclude all private network addresses.
|
||||
///
|
||||
/// This effectively turns on the following fields:
|
||||
|
|
@ -83,6 +86,7 @@ pub struct ClientBuilder {
|
|||
/// - [`ClientBuilder::exclude_link_local_ips`]
|
||||
/// - [`ClientBuilder::exclude_loopback_ips`]
|
||||
exclude_all_private: bool,
|
||||
|
||||
/// When `true`, exclude private IP addresses.
|
||||
///
|
||||
/// ## IPv4
|
||||
|
|
@ -109,6 +113,7 @@ pub struct ClientBuilder {
|
|||
/// [IETF RFC 4291]: https://tools.ietf.org/html/rfc4291
|
||||
/// [IETF RFC 3879]: https://tools.ietf.org/html/rfc3879
|
||||
exclude_private_ips: bool,
|
||||
|
||||
/// When `true`, exclude link-local IPs.
|
||||
///
|
||||
/// ## IPv4
|
||||
|
|
@ -127,6 +132,7 @@ pub struct ClientBuilder {
|
|||
/// [RFC 4291]: https://tools.ietf.org/html/rfc4291
|
||||
/// [RFC 4291 section 2.4]: https://tools.ietf.org/html/rfc4291#section-2.4
|
||||
exclude_link_local_ips: bool,
|
||||
|
||||
/// When `true`, exclude loopback IP addresses.
|
||||
///
|
||||
/// ## IPv4
|
||||
|
|
@ -142,14 +148,18 @@ pub struct ClientBuilder {
|
|||
/// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
|
||||
/// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
|
||||
exclude_loopback_ips: bool,
|
||||
|
||||
/// When `true`, don't check mail addresses.
|
||||
exclude_mail: bool,
|
||||
|
||||
/// Maximum number of redirects per request before returning an error.
|
||||
#[builder(default = DEFAULT_MAX_REDIRECTS)]
|
||||
max_redirects: usize,
|
||||
|
||||
/// Maximum number of retries per request before returning an error.
|
||||
#[builder(default = DEFAULT_MAX_RETRIES)]
|
||||
max_retries: u64,
|
||||
|
||||
/// User-agent used for checking links.
|
||||
///
|
||||
/// *NOTE*: This may be helpful for bypassing certain firewalls.
|
||||
|
|
@ -157,6 +167,7 @@ pub struct ClientBuilder {
|
|||
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
|
||||
#[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
|
||||
user_agent: String,
|
||||
|
||||
/// When `true`, accept invalid SSL certificates.
|
||||
///
|
||||
/// ## Warning
|
||||
|
|
@ -167,9 +178,11 @@ pub struct ClientBuilder {
|
|||
/// introduces significant vulnerabilities, and should only be used
|
||||
/// as a last resort.
|
||||
allow_insecure: bool,
|
||||
|
||||
/// When non-empty, only links with matched URI schemes are checked.
|
||||
/// Otherwise, this has no effect.
|
||||
schemes: HashSet<String>,
|
||||
|
||||
/// Sets the default [headers] for every request. See also [here].
|
||||
///
|
||||
/// This allows working around validation issues on some websites.
|
||||
|
|
@ -177,15 +190,24 @@ pub struct ClientBuilder {
|
|||
/// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
|
||||
/// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
|
||||
custom_headers: HeaderMap,
|
||||
|
||||
/// HTTP method used for requests, e.g. `GET` or `HEAD`.
|
||||
#[builder(default = reqwest::Method::GET)]
|
||||
method: reqwest::Method,
|
||||
|
||||
/// Set of accepted return codes / status codes.
|
||||
///
|
||||
/// Unmatched return codes/ status codes are deemed as errors.
|
||||
accepted: Option<HashSet<StatusCode>>,
|
||||
|
||||
/// Response timeout per request.
|
||||
timeout: Option<Duration>,
|
||||
|
||||
/// Initial time between retries of failed requests
|
||||
///
|
||||
/// The wait time will increase using an exponential backoff mechanism
|
||||
retry_wait_time: Option<Duration>,
|
||||
|
||||
/// Requires using HTTPS when it's available.
|
||||
///
|
||||
/// This would treat unencrypted links as errors when HTTPS is avaliable.
|
||||
|
|
@ -265,6 +287,10 @@ impl ClientBuilder {
|
|||
exclude_mail: self.exclude_mail,
|
||||
};
|
||||
|
||||
let retry_wait_time = self
|
||||
.retry_wait_time
|
||||
.unwrap_or_else(|| Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64));
|
||||
|
||||
let quirks = Quirks::default();
|
||||
|
||||
Ok(Client {
|
||||
|
|
@ -272,6 +298,7 @@ impl ClientBuilder {
|
|||
github_client,
|
||||
filter,
|
||||
max_retries: self.max_retries,
|
||||
retry_wait_time,
|
||||
method,
|
||||
accepted,
|
||||
require_https: self.require_https,
|
||||
|
|
@ -287,22 +314,34 @@ impl ClientBuilder {
|
|||
pub struct Client {
|
||||
/// Underlying `reqwest` client instance that handles the HTTP requests.
|
||||
reqwest_client: reqwest::Client,
|
||||
|
||||
/// Github client.
|
||||
github_client: Option<Octocrab>,
|
||||
|
||||
/// Rules to decided whether each link would be checked or ignored.
|
||||
filter: Filter,
|
||||
|
||||
/// Maximum number of retries per request before returning an error.
|
||||
max_retries: u64,
|
||||
|
||||
/// Initial time between retries of failed requests
|
||||
retry_wait_time: Duration,
|
||||
|
||||
/// HTTP method used for requests, e.g. `GET` or `HEAD`.
|
||||
///
|
||||
/// The same method will be used for all links.
|
||||
method: reqwest::Method,
|
||||
|
||||
/// Set of accepted return codes / status codes.
|
||||
///
|
||||
/// Unmatched return codes/ status codes are deemed as errors.
|
||||
accepted: Option<HashSet<StatusCode>>,
|
||||
|
||||
/// Requires using HTTPS when it's available.
|
||||
///
|
||||
/// This would treat unecrypted links as errors when HTTPS is avaliable.
|
||||
/// This would treat unencrypted links as errors when HTTPS is avaliable.
|
||||
require_https: bool,
|
||||
|
||||
/// Override behaviors for certain known issues with special URIs.
|
||||
quirks: Quirks,
|
||||
}
|
||||
|
|
@ -362,14 +401,14 @@ impl Client {
|
|||
/// Here `uri` must has either `http` or `https` scheme.
|
||||
pub async fn check_website(&self, uri: &Uri) -> Status {
|
||||
let mut retries: u64 = 0;
|
||||
let mut wait = DEFAULT_RETRY_WAIT_TIME;
|
||||
let mut wait = self.retry_wait_time;
|
||||
|
||||
let mut status = self.check_default(uri).await;
|
||||
while retries < self.max_retries {
|
||||
if status.is_success() {
|
||||
return status;
|
||||
}
|
||||
sleep(Duration::from_secs(wait)).await;
|
||||
sleep(wait).await;
|
||||
retries += 1;
|
||||
wait *= 2;
|
||||
status = self.check_default(uri).await;
|
||||
|
|
|
|||
|
|
@ -79,7 +79,7 @@ pub use crate::{
|
|||
// Constants get exposed so that the CLI can use the same defaults as the library
|
||||
client::{
|
||||
check, Client, ClientBuilder, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
|
||||
DEFAULT_RETRY_WAIT_TIME, DEFAULT_TIMEOUT, DEFAULT_USER_AGENT,
|
||||
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
|
||||
},
|
||||
collector::Collector,
|
||||
filter::{Excludes, Filter, Includes},
|
||||
|
|
|
|||
Loading…
Reference in a new issue