mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-17 20:00:58 +00:00
Make retries configurable; align constants (#446)
Using the same default values for the library and the binary now but tweaked the values a bit for slightly faster performance.
This commit is contained in:
parent
8df50cf501
commit
21f3160b71
5 changed files with 50 additions and 24 deletions
|
|
@ -233,11 +233,12 @@ OPTIONS:
|
|||
-f, --format <format> Output format of final status report (compact, detailed, json, markdown)
|
||||
[default: compact]
|
||||
--github-token <github-token> GitHub API token to use when checking github.com links, to avoid rate
|
||||
limiting [env: GITHUB_TOKEN=]
|
||||
limiting [env: GITHUB_TOKEN]
|
||||
-h, --headers <headers>... Custom request headers
|
||||
--include <include>... URLs to check (supports regex). Has preference over all excludes
|
||||
--max-concurrency <max-concurrency> Maximum number of concurrent network requests [default: 128]
|
||||
-m, --max-redirects <max-redirects> Maximum number of allowed redirects [default: 10]
|
||||
-m, --max-redirects <max-redirects> Maximum number of allowed redirects [default: 5]
|
||||
--max-retries <max-retries> Maximum number of retries per request [default: 3]
|
||||
-X, --method <method> Request method [default: get]
|
||||
-o, --output <output> Output file of status report
|
||||
-s, --scheme <scheme>... Only test links with the given schemes (e.g. http and https)
|
||||
|
|
|
|||
|
|
@ -2,22 +2,22 @@ use std::{convert::TryFrom, fs, io::ErrorKind, path::PathBuf, str::FromStr};
|
|||
|
||||
use anyhow::{anyhow, Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use lychee_lib::{Base, Input};
|
||||
use lychee_lib::{
|
||||
Base, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_TIMEOUT, DEFAULT_USER_AGENT,
|
||||
};
|
||||
use serde::Deserialize;
|
||||
use structopt::{clap::crate_version, StructOpt};
|
||||
use structopt::StructOpt;
|
||||
|
||||
const METHOD: &str = "get";
|
||||
const TIMEOUT: usize = 20;
|
||||
const MAX_CONCURRENCY: usize = 128;
|
||||
const MAX_REDIRECTS: usize = 10;
|
||||
const USER_AGENT: &str = concat!("lychee/", crate_version!());
|
||||
|
||||
// this exists because structopt requires `&str` type values for defaults
|
||||
// (we can't use e.g. `TIMEOUT` or `timeout()` which gets created for serde)
|
||||
lazy_static! {
|
||||
static ref TIMEOUT_STR: String = TIMEOUT.to_string();
|
||||
static ref TIMEOUT_STR: String = DEFAULT_TIMEOUT.to_string();
|
||||
static ref MAX_CONCURRENCY_STR: String = MAX_CONCURRENCY.to_string();
|
||||
static ref MAX_REDIRECTS_STR: String = MAX_REDIRECTS.to_string();
|
||||
static ref MAX_REDIRECTS_STR: String = DEFAULT_MAX_REDIRECTS.to_string();
|
||||
static ref MAX_RETRIES_STR: String = DEFAULT_MAX_RETRIES.to_string();
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
|
@ -61,10 +61,11 @@ macro_rules! default_function {
|
|||
|
||||
// Generate the functions for serde defaults
|
||||
default_function! {
|
||||
max_redirects: usize = MAX_REDIRECTS;
|
||||
max_redirects: usize = DEFAULT_MAX_REDIRECTS;
|
||||
max_retries: u64 = DEFAULT_MAX_RETRIES;
|
||||
max_concurrency: usize = MAX_CONCURRENCY;
|
||||
user_agent: String = USER_AGENT.to_string();
|
||||
timeout: usize = TIMEOUT;
|
||||
user_agent: String = DEFAULT_USER_AGENT.to_string();
|
||||
timeout: usize = DEFAULT_TIMEOUT;
|
||||
method: String = METHOD.to_string();
|
||||
}
|
||||
|
||||
|
|
@ -142,6 +143,11 @@ pub(crate) struct Config {
|
|||
#[serde(default = "max_redirects")]
|
||||
pub(crate) max_redirects: usize,
|
||||
|
||||
/// Maximum number of retries per request
|
||||
#[structopt(long, default_value = &MAX_RETRIES_STR)]
|
||||
#[serde(default = "max_retries")]
|
||||
pub(crate) max_retries: u64,
|
||||
|
||||
/// Maximum number of concurrent network requests
|
||||
#[structopt(long, default_value = &MAX_CONCURRENCY_STR)]
|
||||
#[serde(default = "max_concurrency")]
|
||||
|
|
@ -154,7 +160,7 @@ pub(crate) struct Config {
|
|||
pub(crate) threads: Option<usize>,
|
||||
|
||||
/// User agent
|
||||
#[structopt(short, long, default_value = USER_AGENT)]
|
||||
#[structopt(short, long, default_value = DEFAULT_USER_AGENT)]
|
||||
#[serde(default = "user_agent")]
|
||||
pub(crate) user_agent: String,
|
||||
|
||||
|
|
@ -308,10 +314,11 @@ impl Config {
|
|||
// Keys with defaults to assign
|
||||
verbose: false;
|
||||
no_progress: false;
|
||||
max_redirects: MAX_REDIRECTS;
|
||||
max_redirects: DEFAULT_MAX_REDIRECTS;
|
||||
max_retries: DEFAULT_MAX_RETRIES;
|
||||
max_concurrency: MAX_CONCURRENCY;
|
||||
threads: None;
|
||||
user_agent: USER_AGENT;
|
||||
user_agent: DEFAULT_USER_AGENT;
|
||||
insecure: false;
|
||||
scheme: Vec::<String>::new();
|
||||
include: Vec::<String>::new();
|
||||
|
|
@ -324,7 +331,7 @@ impl Config {
|
|||
exclude_mail: false;
|
||||
headers: Vec::<String>::new();
|
||||
accept: None;
|
||||
timeout: TIMEOUT;
|
||||
timeout: DEFAULT_TIMEOUT;
|
||||
method: METHOD;
|
||||
base: None;
|
||||
basic_auth: None;
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ mod cli {
|
|||
// Currently getting a 429 with Googlebot.
|
||||
// See https://github.com/lycheeverse/lychee/issues/448
|
||||
// See https://twitter.com/matthiasendler/status/1479224185125748737
|
||||
// TODO: Remove this exlusion in the future
|
||||
// TODO: Remove this exclusion in the future
|
||||
"--exclude",
|
||||
"twitter"
|
||||
)
|
||||
|
|
|
|||
|
|
@ -23,8 +23,16 @@ use crate::{
|
|||
ErrorKind, Request, Response, Result, Status, Uri,
|
||||
};
|
||||
|
||||
const DEFAULT_MAX_REDIRECTS: usize = 5;
|
||||
const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
|
||||
/// Default lychee user agent
|
||||
pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
|
||||
/// Number of redirects until a request gets declared as failed
|
||||
pub const DEFAULT_MAX_REDIRECTS: usize = 5;
|
||||
/// Number of retries until a request gets declared as failed
|
||||
pub const DEFAULT_MAX_RETRIES: u64 = 3;
|
||||
/// Wait time in seconds between requests (will be doubled after every failure)
|
||||
pub const DEFAULT_RETRY_WAIT_TIME: u64 = 1;
|
||||
/// Total timeout per request until a request gets declared as failed
|
||||
pub const DEFAULT_TIMEOUT: usize = 20;
|
||||
|
||||
/// Handles incoming requests and returns responses. Usually you would not
|
||||
/// initialize a `Client` yourself, but use the `ClientBuilder` because it
|
||||
|
|
@ -37,6 +45,8 @@ pub struct Client {
|
|||
github_client: Option<Github>,
|
||||
/// Filtered domain handling.
|
||||
filter: Filter,
|
||||
/// Maximum number of retries
|
||||
max_retries: u64,
|
||||
/// Default request HTTP method to use.
|
||||
method: reqwest::Method,
|
||||
/// The set of accepted HTTP status codes for valid URIs.
|
||||
|
|
@ -74,6 +84,9 @@ pub struct ClientBuilder {
|
|||
/// Maximum number of redirects before returning error
|
||||
#[builder(default = DEFAULT_MAX_REDIRECTS)]
|
||||
max_redirects: usize,
|
||||
/// Maximum number of retries before returning error
|
||||
#[builder(default = DEFAULT_MAX_RETRIES)]
|
||||
max_retries: u64,
|
||||
/// User agent used for checking links
|
||||
// Faking the user agent is necessary for some websites, unfortunately.
|
||||
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
|
||||
|
|
@ -169,6 +182,7 @@ impl ClientBuilder {
|
|||
reqwest_client,
|
||||
github_client: github_token,
|
||||
filter,
|
||||
max_retries: self.max_retries,
|
||||
method: self.method.clone(),
|
||||
accepted: self.accepted.clone(),
|
||||
require_https: self.require_https,
|
||||
|
|
@ -233,16 +247,16 @@ impl Client {
|
|||
|
||||
/// Check a website URI
|
||||
pub async fn check_website(&self, uri: &Uri) -> Status {
|
||||
let mut retries: i64 = 3;
|
||||
let mut wait: u64 = 1;
|
||||
let mut retries: u64 = 0;
|
||||
let mut wait = DEFAULT_RETRY_WAIT_TIME;
|
||||
|
||||
let mut status = self.check_default(uri).await;
|
||||
while retries > 0 {
|
||||
while retries < self.max_retries {
|
||||
if status.is_success() {
|
||||
return status;
|
||||
}
|
||||
retries -= 1;
|
||||
sleep(Duration::from_secs(wait)).await;
|
||||
retries += 1;
|
||||
wait *= 2;
|
||||
status = self.check_default(uri).await;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -72,7 +72,11 @@ use ring as _; // required for apple silicon
|
|||
|
||||
#[doc(inline)]
|
||||
pub use crate::{
|
||||
client::{check, Client, ClientBuilder},
|
||||
// Constants get exposed so that the CLI can use the same defaults as the library
|
||||
client::{
|
||||
check, Client, ClientBuilder, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
|
||||
DEFAULT_RETRY_WAIT_TIME, DEFAULT_TIMEOUT, DEFAULT_USER_AGENT,
|
||||
},
|
||||
collector::Collector,
|
||||
filter::{Excludes, Filter, Includes},
|
||||
types::{
|
||||
|
|
|
|||
Loading…
Reference in a new issue