lychee/lychee-bin/src/options.rs

637 lines
21 KiB
Rust
Raw Normal View History

use crate::archive::Archive;
use crate::parse::parse_base;
2023-02-24 22:53:09 +00:00
use crate::verbosity::Verbosity;
use anyhow::{anyhow, Context, Error, Result};
use clap::builder::PossibleValuesParser;
use clap::{arg, builder::TypedValueParser, Parser};
use const_format::{concatcp, formatcp};
use lychee_lib::{
Base, BasicAuthSelector, Input, StatusCodeExcluder, StatusCodeSelector, DEFAULT_MAX_REDIRECTS,
DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
};
use secrecy::{ExposeSecret, SecretString};
2020-10-21 00:10:25 +00:00
use serde::Deserialize;
use std::path::Path;
use std::{fs, path::PathBuf, str::FromStr, time::Duration};
use strum::{Display, EnumIter, EnumString, VariantNames};
2020-10-21 00:10:25 +00:00
pub(crate) const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
pub(crate) const LYCHEE_CACHE_FILE: &str = ".lycheecache";
pub(crate) const LYCHEE_CONFIG_FILE: &str = "lychee.toml";
const DEFAULT_METHOD: &str = "get";
const DEFAULT_MAX_CACHE_AGE: &str = "1d";
const DEFAULT_MAX_CONCURRENCY: usize = 128;
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
// this exists because clap requires `&str` type values for defaults
// whereas serde expects owned `String` types
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
// (we can't use e.g. `TIMEOUT` or `timeout()` which gets created for serde)
const MAX_CONCURRENCY_STR: &str = concatcp!(DEFAULT_MAX_CONCURRENCY);
const MAX_CACHE_AGE_STR: &str = concatcp!(DEFAULT_MAX_CACHE_AGE);
const MAX_REDIRECTS_STR: &str = concatcp!(DEFAULT_MAX_REDIRECTS);
const MAX_RETRIES_STR: &str = concatcp!(DEFAULT_MAX_RETRIES);
const HELP_MSG_CACHE: &str = formatcp!(
"Use request cache stored on disk at `{}`",
LYCHEE_CACHE_FILE,
);
// We use a custom help message here because we want to show the default
// value of the config file, but also be able to check if the user has
// provided a custom value. If they didn't, we won't throw an error if
// the file doesn't exist.
const HELP_MSG_CONFIG_FILE: &str = formatcp!(
"Configuration file to use\n\n[default: {}]",
LYCHEE_CONFIG_FILE,
);
const TIMEOUT_STR: &str = concatcp!(DEFAULT_TIMEOUT_SECS);
const RETRY_WAIT_TIME_STR: &str = concatcp!(DEFAULT_RETRY_WAIT_TIME_SECS);
/// The format to use for the final status report
#[derive(Debug, Deserialize, Default, Clone, Display, EnumIter, VariantNames, PartialEq)]
#[non_exhaustive]
#[strum(serialize_all = "snake_case")]
#[serde(rename_all = "snake_case")]
pub(crate) enum StatsFormat {
#[default]
Compact,
Detailed,
Json,
Markdown,
Raw,
}
impl FromStr for StatsFormat {
type Err = Error;
fn from_str(format: &str) -> Result<Self, Self::Err> {
match format.to_lowercase().as_str() {
"compact" | "string" => Ok(StatsFormat::Compact),
"detailed" => Ok(StatsFormat::Detailed),
"json" => Ok(StatsFormat::Json),
"markdown" | "md" => Ok(StatsFormat::Markdown),
"raw" => Ok(StatsFormat::Raw),
_ => Err(anyhow!("Unknown format {}", format)),
}
}
}
/// The different formatter modes
///
/// This decides over whether to use color,
/// emojis, or plain text for the output.
#[derive(
Debug, Deserialize, Default, Clone, Display, EnumIter, EnumString, VariantNames, PartialEq,
)]
#[non_exhaustive]
pub(crate) enum OutputMode {
/// Plain text output.
///
/// This is the most basic output mode for terminals that do not support
/// color or emojis. It can also be helpful for scripting or when you want
/// to pipe the output to another program.
#[serde(rename = "plain")]
#[strum(serialize = "plain", ascii_case_insensitive)]
Plain,
/// Colorful output.
///
/// This mode uses colors to highlight the status of the requests.
/// It is useful for terminals that support colors and you want to
/// provide a more visually appealing output.
///
/// This is the default output mode.
#[serde(rename = "color")]
#[strum(serialize = "color", ascii_case_insensitive)]
#[default]
Color,
/// Emoji output.
///
/// This mode uses emojis to represent the status of the requests.
/// Some people may find this mode more intuitive and fun to use.
#[serde(rename = "emoji")]
#[strum(serialize = "emoji", ascii_case_insensitive)]
Emoji,
/// Task output.
///
/// This mode uses Markdown-styled checkboxes to represent the status of the requests.
/// Some people may find this mode more intuitive and useful for task tracking.
#[serde(rename = "task")]
#[strum(serialize = "task", ascii_case_insensitive)]
Task,
}
impl OutputMode {
/// Returns `true` if the response format is `Plain`
pub(crate) const fn is_plain(&self) -> bool {
matches!(self, OutputMode::Plain)
}
/// Returns `true` if the response format is `Emoji`
pub(crate) const fn is_emoji(&self) -> bool {
matches!(self, OutputMode::Emoji)
}
}
2020-10-21 00:10:25 +00:00
// Macro for generating default functions to be used by serde
macro_rules! default_function {
( $( $name:ident : $T:ty = $e:expr; )* ) => {
$(
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
#[allow(clippy::missing_const_for_fn)]
2020-10-21 00:10:25 +00:00
fn $name() -> $T {
$e
}
)*
};
}
// Generate the functions for serde defaults
default_function! {
max_redirects: usize = DEFAULT_MAX_REDIRECTS;
max_retries: u64 = DEFAULT_MAX_RETRIES;
max_concurrency: usize = DEFAULT_MAX_CONCURRENCY;
max_cache_age: Duration = humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
user_agent: String = DEFAULT_USER_AGENT.to_string();
timeout: usize = DEFAULT_TIMEOUT_SECS;
retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS;
method: String = DEFAULT_METHOD.to_string();
2023-02-24 22:53:09 +00:00
verbosity: Verbosity = Verbosity::default();
cache_exclude_selector: StatusCodeExcluder = StatusCodeExcluder::new();
accept_selector: StatusCodeSelector = StatusCodeSelector::default();
}
2020-10-21 00:10:25 +00:00
// Macro for merging configuration values
macro_rules! fold_in {
( $cli:ident , $toml:ident ; $( $key:ident : $default:expr; )* ) => {
$(
if $cli.$key == $default && $toml.$key != $default {
$cli.$key = $toml.$key;
}
)*
};
}
2020-08-14 09:43:45 +00:00
/// A fast, async link checker
///
/// Finds broken URLs and mail addresses inside Markdown, HTML,
/// `reStructuredText`, websites and more!
#[derive(Parser, Debug)]
#[command(version, about)]
2020-08-14 09:43:45 +00:00
pub(crate) struct LycheeOptions {
/// The inputs (where to get links to check from).
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
/// NOTE: Use `--` to separate inputs from options that allow multiple arguments.
#[arg(name = "inputs", required = true)]
raw_inputs: Vec<String>,
2020-08-14 09:43:45 +00:00
2020-10-21 00:10:25 +00:00
/// Configuration file to use
#[arg(short, long = "config")]
#[arg(help = HELP_MSG_CONFIG_FILE)]
pub(crate) config_file: Option<PathBuf>,
2020-10-21 00:10:25 +00:00
#[clap(flatten)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) config: Config,
2020-10-21 00:10:25 +00:00
}
2020-08-14 09:43:45 +00:00
impl LycheeOptions {
/// Get parsed inputs from options.
2022-11-05 16:25:44 +00:00
// This depends on the config, which is why a method is required (we could
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there, but
// we wouldn't get access to `glob_ignore_case`.
pub(crate) fn inputs(&self) -> Result<Vec<Input>> {
let excluded = if self.config.exclude_path.is_empty() {
None
} else {
Some(self.config.exclude_path.clone())
};
self.raw_inputs
.iter()
.map(|s| Input::new(s, None, self.config.glob_ignore_case, excluded.clone()))
.collect::<Result<_, _>>()
.context("Cannot parse inputs from arguments")
}
}
/// The main configuration for lychee
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
#[allow(clippy::struct_excessive_bools)]
#[derive(Parser, Debug, Deserialize, Clone, Default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) struct Config {
2020-10-21 00:10:25 +00:00
/// Verbose program output
#[clap(flatten)]
#[serde(default = "verbosity")]
2023-02-24 22:53:09 +00:00
pub(crate) verbose: Verbosity,
2020-08-14 09:43:45 +00:00
/// Do not show progress bar.
/// This is recommended for non-interactive shells (e.g. for continuous integration)
#[arg(short, long, verbatim_doc_comment)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) no_progress: bool,
2020-10-10 04:31:28 +00:00
#[arg(help = HELP_MSG_CACHE)]
#[arg(long)]
#[serde(default)]
pub(crate) cache: bool,
/// Discard all cached requests older than this duration
#[arg(
long,
value_parser = humantime::parse_duration,
default_value = &MAX_CACHE_AGE_STR
)]
#[serde(default = "max_cache_age")]
#[serde(with = "humantime_serde")]
pub(crate) max_cache_age: Duration,
/// A list of status codes that will be excluded from the cache
#[arg(
long,
default_value_t,
long_help = "A list of status codes that will be ignored from the cache
The following accept range syntax is supported: [start]..[=]end|code. Some valid
examples are:
- 429
- 500..=599
- 500..
Use \"lychee --cache-exclude-status '429, 500..502' <inputs>...\" to provide a comma- separated
list of excluded status codes. This example will not cache results with a status code of 429, 500,
501 and 502."
)]
#[serde(default = "cache_exclude_selector")]
pub(crate) cache_exclude_status: StatusCodeExcluder,
/// Don't perform any link checking.
/// Instead, dump all the links extracted from inputs that would be checked
#[arg(long)]
#[serde(default)]
pub(crate) dump: bool,
/// Don't perform any link extraction and checking.
/// Instead, dump all input sources from which links would be collected
#[arg(long)]
#[serde(default)]
pub(crate) dump_inputs: bool,
/// Specify the use of a specific web archive.
/// Can be used in combination with `--suggest`
#[arg(long, value_parser = PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
#[serde(default)]
pub(crate) archive: Option<Archive>,
/// Suggest link replacements for broken links, using a web archive.
/// The web archive can be specified with `--archive`
#[arg(long)]
#[serde(default)]
pub(crate) suggest: bool,
2020-10-21 00:10:25 +00:00
/// Maximum number of allowed redirects
#[arg(short, long, default_value = &MAX_REDIRECTS_STR)]
#[serde(default = "max_redirects")]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) max_redirects: usize,
2020-08-14 09:43:45 +00:00
/// Maximum number of retries per request
#[arg(long, default_value = &MAX_RETRIES_STR)]
#[serde(default = "max_retries")]
pub(crate) max_retries: u64,
/// Maximum number of concurrent network requests
#[arg(long, default_value = &MAX_CONCURRENCY_STR)]
#[serde(default = "max_concurrency")]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) max_concurrency: usize,
2020-10-21 00:10:25 +00:00
/// Number of threads to utilize.
/// Defaults to number of cores available to the system
#[arg(short = 'T', long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) threads: Option<usize>,
2020-08-14 09:43:45 +00:00
2020-10-21 00:10:25 +00:00
/// User agent
#[arg(short, long, default_value = DEFAULT_USER_AGENT)]
2020-10-21 00:10:25 +00:00
#[serde(default = "user_agent")]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) user_agent: String,
2020-08-14 09:43:45 +00:00
2020-10-21 00:10:25 +00:00
/// Proceed for server connections considered insecure (invalid TLS)
#[arg(short, long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) insecure: bool,
2020-08-14 09:43:45 +00:00
/// Only test links with the given schemes (e.g. https).
/// Omit to check links with any other scheme.
/// At the moment, we support http, https, file, and mailto.
#[arg(short, long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
pub(crate) scheme: Vec<String>,
2020-08-14 09:43:45 +00:00
2021-09-03 00:09:30 +00:00
/// Only check local files and block network requests.
#[arg(long)]
2021-09-02 21:10:46 +00:00
#[serde(default)]
pub(crate) offline: bool,
/// URLs to check (supports regex). Has preference over all excludes.
#[arg(long)]
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) include: Vec<String>,
/// Exclude URLs and mail addresses from checking (supports regex)
#[arg(long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) exclude: Vec<String>,
2020-08-14 13:24:41 +00:00
/// Deprecated; use `--exclude-path` instead
#[arg(long)]
2021-09-01 15:37:31 +00:00
#[serde(default)]
pub(crate) exclude_file: Vec<String>,
2021-09-03 00:12:03 +00:00
/// Exclude file path from getting checked.
#[arg(long)]
#[serde(default)]
pub(crate) exclude_path: Vec<PathBuf>,
2020-10-21 00:10:25 +00:00
/// Exclude all private IPs from checking.
/// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
#[arg(short = 'E', long, verbatim_doc_comment)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) exclude_all_private: bool,
2020-10-21 00:10:25 +00:00
/// Exclude private IP address ranges from checking
#[arg(long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) exclude_private: bool,
2020-10-21 00:10:25 +00:00
/// Exclude link-local IP address range from checking
#[arg(long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) exclude_link_local: bool,
/// Exclude loopback IP address range and localhost from checking
#[arg(long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) exclude_loopback: bool,
/// Exclude all mail addresses from checking
/// (deprecated; excluded by default)
#[arg(long)]
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) exclude_mail: bool,
/// Also check email addresses
#[arg(long)]
#[serde(default)]
pub(crate) include_mail: bool,
/// Remap URI matching pattern to different URI
#[serde(default)]
#[arg(long)]
pub(crate) remap: Vec<String>,
/// Automatically append file extensions to `file://` URIs as needed
#[serde(default)]
#[arg(
long,
value_delimiter = ',',
long_help = "Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Extensions will be checked in
order of appearance.
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi"
)]
pub(crate) fallback_extensions: Vec<String>,
/// Custom request header
#[arg(long)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
pub(crate) header: Vec<String>,
2020-08-14 15:36:43 +00:00
/// A List of accepted status codes for valid links
#[arg(
short,
long,
default_value_t,
long_help = "A List of accepted status codes for valid links
The following accept range syntax is supported: [start]..[=]end|code. Some valid
examples are:
- 200..=204
- 200..204
- ..=204
- ..204
- 200
Use \"lychee --accept '200..=204, 429, 500' <inputs>...\" to provide a comma-
separated list of accepted status codes. This example will accept 200, 201,
202, 203, 204, 429, and 500 as valid status codes."
)]
#[serde(default = "accept_selector")]
pub(crate) accept: StatusCodeSelector,
/// Enable the checking of fragments in links.
#[arg(long)]
#[serde(default)]
pub(crate) include_fragments: bool,
/// Website timeout in seconds from connect to response finished
#[arg(short, long, default_value = &TIMEOUT_STR)]
2020-10-21 00:10:25 +00:00
#[serde(default = "timeout")]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) timeout: usize,
2020-08-21 22:36:03 +00:00
/// Minimum wait time in seconds between retries of failed requests
#[arg(short, long, default_value = &RETRY_WAIT_TIME_STR)]
#[serde(default = "retry_wait_time")]
pub(crate) retry_wait_time: usize,
2020-10-21 00:10:25 +00:00
/// Request method
// Using `-X` as a short param similar to curl
#[arg(short = 'X', long, default_value = DEFAULT_METHOD)]
2020-10-21 00:10:25 +00:00
#[serde(default = "method")]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) method: String,
2020-10-20 23:31:06 +00:00
/// Deprecated; use `--base-url` instead
#[arg(long, value_parser = parse_base)]
#[serde(skip)]
pub(crate) base: Option<Base>,
/// Base URL used to resolve relative URLs during link checking
/// Example: <https://example.com>
#[arg(short, long, value_parser= parse_base)]
2020-10-21 00:10:25 +00:00
#[serde(default)]
pub(crate) base_url: Option<Base>,
/// Root path to use when checking absolute local links,
/// must be an absolute path
#[arg(long)]
#[serde(default)]
pub(crate) root_dir: Option<PathBuf>,
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]
pub(crate) basic_auth: Option<Vec<BasicAuthSelector>>,
/// GitHub API token to use when checking github.com links, to avoid rate limiting
#[arg(long, env = "GITHUB_TOKEN", hide_env_values = true)]
#[serde(default)]
pub(crate) github_token: Option<SecretString>,
/// Skip missing input files (default is to error if they don't exist)
#[arg(long)]
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) skip_missing: bool,
/// Do not skip files that would otherwise be ignored by
/// '.gitignore', '.ignore', or the global ignore file.
#[arg(long)]
#[serde(default)]
pub(crate) no_ignore: bool,
/// Do not skip hidden directories and files.
#[arg(long)]
#[serde(default)]
pub(crate) hidden: bool,
/// Find links in verbatim sections like `pre`- and `code` blocks
#[arg(long)]
#[serde(default)]
pub(crate) include_verbatim: bool,
/// Ignore case when expanding filesystem path glob inputs
#[arg(long)]
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) glob_ignore_case: bool,
/// Output file of status report
#[arg(short, long, value_parser)]
#[serde(default)]
Major refactor of codebase (#208) - The binary component and library component are separated as two packages in the same workspace. - `lychee` is the binary component, in `lychee-bin/*`. - `lychee-lib` is the library component, in `lychee-lib/*`. - Users can now install only the `lychee-lib`, instead of both components, that would require fewer dependencies and faster compilation. - Dependencies for each component are adjusted and updated. E.g., no CLI dependencies for `lychee-lib`. - CLI tests are only moved to `lychee`, as it has nothing to do with the library component. - `Status::Error` is refactored to contain dedicated error enum, `ErrorKind`. - The motivation is to delay the formatting of errors to strings. Note that `e.to_string()` is not necessarily cheap (though trivial in many cases). The formatting is no delayed until the error is needed to be displayed to users. So in some cases, if the error is never used, it means that it won't be formatted at all. - Replaced `regex` based matching with one of the following: - Simple string equality test in the case of 'false positivie'. - URL parsing based test, in the case of extracting repository and user name for GitHub links. - Either cases would be much more efficient than `regex` based matching. First, there's no need to construct a state machine for regex. Second, URL is already verified and parsed on its creation, and extracting its components is fairly cheap. Also, this removes the dependency on `lazy-static` in `lychee-lib`. - `types` module now has a sub-directory, and its components are now separated into their own modules (in that sub-directory). - `lychee-lib::test_utils` module is only compiled for tests. - `wiremock` is moved to `dev-dependency` as it's only needed for `test` modules. - Dependencies are listed in alphabetical order. - Imports are organized in the following fashion: - Imports from `std` - Imports from 3rd-party crates, and `lychee-lib`. - Imports from `crate::*` or `super::*`. - No glob import. - I followed suggestion from `cargo clippy`, with `clippy::all` and `clippy:pedantic`. Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
pub(crate) output: Option<PathBuf>,
/// Set the output display mode. Determines how results are presented in the terminal
#[arg(long, default_value = "color", value_parser = PossibleValuesParser::new(OutputMode::VARIANTS).map(|s| s.parse::<OutputMode>().unwrap()))]
#[serde(default)]
pub(crate) mode: OutputMode,
/// Output format of final status report
#[arg(short, long, default_value = "compact", value_parser = PossibleValuesParser::new(StatsFormat::VARIANTS).map(|s| s.parse::<StatsFormat>().unwrap()))]
#[serde(default)]
pub(crate) format: StatsFormat,
2021-09-04 01:21:54 +00:00
/// When HTTPS is available, treat HTTP links as errors
#[arg(long)]
2021-09-04 01:21:54 +00:00
#[serde(default)]
pub(crate) require_https: bool,
/// Tell lychee to read cookies from the given file.
/// Cookies will be stored in the cookie jar and sent with requests.
/// New cookies will be stored in the cookie jar and existing cookies will be updated.
#[arg(long)]
#[serde(default)]
pub(crate) cookie_jar: Option<PathBuf>,
2020-08-14 09:43:45 +00:00
}
2020-10-21 00:10:25 +00:00
impl Config {
/// Load configuration from a file
pub(crate) fn load_from_file(path: &Path) -> Result<Config> {
2020-10-21 00:10:25 +00:00
// Read configuration file
let contents = fs::read_to_string(path)?;
toml::from_str(&contents).with_context(|| "Failed to parse configuration file")
2020-10-21 00:10:25 +00:00
}
/// Merge the configuration from TOML into the CLI configuration
pub(crate) fn merge(&mut self, toml: Config) {
2020-10-21 00:10:25 +00:00
fold_in! {
// Destination and source configs
self, toml;
// Keys with defaults to assign
2023-02-24 22:53:09 +00:00
verbose: Verbosity::default();
cache: false;
no_progress: false;
max_redirects: DEFAULT_MAX_REDIRECTS;
max_retries: DEFAULT_MAX_RETRIES;
max_concurrency: DEFAULT_MAX_CONCURRENCY;
max_cache_age: humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
cache_exclude_status: StatusCodeExcluder::default();
2020-10-21 00:10:25 +00:00
threads: None;
user_agent: DEFAULT_USER_AGENT;
2020-10-21 00:10:25 +00:00
insecure: false;
scheme: Vec::<String>::new();
include: Vec::<String>::new();
2020-10-21 00:10:25 +00:00
exclude: Vec::<String>::new();
exclude_file: Vec::<String>::new(); // deprecated
exclude_path: Vec::<PathBuf>::new();
2020-10-21 00:10:25 +00:00
exclude_all_private: false;
exclude_private: false;
exclude_link_local: false;
exclude_loopback: false;
exclude_mail: false;
format: StatsFormat::default();
remap: Vec::<String>::new();
fallback_extensions: Vec::<String>::new();
header: Vec::<String>::new();
timeout: DEFAULT_TIMEOUT_SECS;
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
method: DEFAULT_METHOD;
base_url: None;
basic_auth: None;
skip_missing: false;
include_verbatim: false;
include_mail: false;
glob_ignore_case: false;
output: None;
2021-09-04 01:21:54 +00:00
require_https: false;
cookie_jar: None;
include_fragments: false;
accept: StatusCodeSelector::default();
2020-10-21 00:10:25 +00:00
}
if self
.github_token
.as_ref()
.map(ExposeSecret::expose_secret)
.is_none()
&& toml
.github_token
.as_ref()
.map(ExposeSecret::expose_secret)
.is_some()
{
self.github_token = toml.github_token;
}
2020-10-21 00:10:25 +00:00
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_accept_status_codes() {
let toml = Config {
accept: StatusCodeSelector::from_str("200..=204, 429, 500").unwrap(),
..Default::default()
};
let mut cli = Config::default();
cli.merge(toml);
assert!(cli.accept.contains(429));
assert!(cli.accept.contains(200));
assert!(cli.accept.contains(203));
assert!(cli.accept.contains(204));
assert!(!cli.accept.contains(205));
}
#[test]
fn test_default() {
let cli = Config::default();
assert_eq!(
cli.accept,
StatusCodeSelector::from_str("100..=103,200..=299").expect("no error")
);
assert_eq!(cli.cache_exclude_status, StatusCodeExcluder::new());
}
}