2023-03-27 22:45:06 +00:00
|
|
|
use crate::archive::Archive;
|
feat: Add support for ranges in the `--accept` option / config field (#1167)
Adds support for accept ranges discussed in #1157. This allows the user to specify custom HTTP status codes accepted during checking and thus will report as valid (not broken). The accept option only supports specifying status codes as a comma-separated list. With this PR, the option will accept a list of status code ranges formatted like this:
```toml
accept = ["100..=103", "200..=299", "403"]
```
These combinations will be supported: `..<end>`, ` ..=<end>`, `<start>..<end>` and `<start>..=<end>`.
The behavior is copied from the Rust Range like concepts:
```
..<end>, includes 0 to <end> (exclusive)
..=<end>, includes 0 to <end> (inclusive)
<start>..<end>, includes <start> to <end> (exclusive)
<start>..=<end>, includes <start> to <end> (inclusive)
```
- Foundation and enhancements for accept ranges, including support for comma-separated strings and integration into the CLI.
- Implementations and updates for AcceptSelector, including Default, Display, and serde defaults.
- Address and fix various errors: clippy, cargo fmt, and tests.
- Add more tests, address edge cases, and enhance error messaging, especially for TOML config parsing.
- Update dependencies.
2023-09-17 19:39:01 +00:00
|
|
|
use crate::parse::parse_base;
|
2023-02-24 22:53:09 +00:00
|
|
|
use crate::verbosity::Verbosity;
|
2022-03-27 00:27:27 +00:00
|
|
|
use anyhow::{anyhow, Context, Error, Result};
|
2024-06-14 17:47:52 +00:00
|
|
|
use clap::builder::PossibleValuesParser;
|
2023-03-27 22:45:06 +00:00
|
|
|
use clap::{arg, builder::TypedValueParser, Parser};
|
2022-02-07 21:45:17 +00:00
|
|
|
use const_format::{concatcp, formatcp};
|
2022-01-07 00:03:10 +00:00
|
|
|
use lychee_lib::{
|
2024-10-14 00:41:56 +00:00
|
|
|
Base, BasicAuthSelector, Input, StatusCodeExcluder, StatusCodeSelector, DEFAULT_MAX_REDIRECTS,
|
|
|
|
|
DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
|
2022-01-07 00:03:10 +00:00
|
|
|
};
|
2022-02-13 12:53:46 +00:00
|
|
|
use secrecy::{ExposeSecret, SecretString};
|
2020-10-21 00:10:25 +00:00
|
|
|
use serde::Deserialize;
|
2023-03-01 23:24:08 +00:00
|
|
|
use std::path::Path;
|
feat: Add support for ranges in the `--accept` option / config field (#1167)
Adds support for accept ranges discussed in #1157. This allows the user to specify custom HTTP status codes accepted during checking and thus will report as valid (not broken). The accept option only supports specifying status codes as a comma-separated list. With this PR, the option will accept a list of status code ranges formatted like this:
```toml
accept = ["100..=103", "200..=299", "403"]
```
These combinations will be supported: `..<end>`, ` ..=<end>`, `<start>..<end>` and `<start>..=<end>`.
The behavior is copied from the Rust Range like concepts:
```
..<end>, includes 0 to <end> (exclusive)
..=<end>, includes 0 to <end> (inclusive)
<start>..<end>, includes <start> to <end> (exclusive)
<start>..=<end>, includes <start> to <end> (inclusive)
```
- Foundation and enhancements for accept ranges, including support for comma-separated strings and integration into the CLI.
- Implementations and updates for AcceptSelector, including Default, Display, and serde defaults.
- Address and fix various errors: clippy, cargo fmt, and tests.
- Add more tests, address edge cases, and enhance error messaging, especially for TOML config parsing.
- Update dependencies.
2023-09-17 19:39:01 +00:00
|
|
|
use std::{fs, path::PathBuf, str::FromStr, time::Duration};
|
2024-06-14 17:47:52 +00:00
|
|
|
use strum::{Display, EnumIter, EnumString, VariantNames};
|
2020-10-21 00:10:25 +00:00
|
|
|
|
2022-01-14 14:25:51 +00:00
|
|
|
pub(crate) const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
|
|
|
|
|
pub(crate) const LYCHEE_CACHE_FILE: &str = ".lycheecache";
|
2023-03-01 23:24:08 +00:00
|
|
|
pub(crate) const LYCHEE_CONFIG_FILE: &str = "lychee.toml";
|
2022-01-14 14:25:51 +00:00
|
|
|
|
2022-02-07 22:17:50 +00:00
|
|
|
const DEFAULT_METHOD: &str = "get";
|
|
|
|
|
const DEFAULT_MAX_CACHE_AGE: &str = "1d";
|
|
|
|
|
const DEFAULT_MAX_CONCURRENCY: usize = 128;
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
|
2022-08-12 20:53:13 +00:00
|
|
|
// this exists because clap requires `&str` type values for defaults
|
2022-02-07 22:17:50 +00:00
|
|
|
// whereas serde expects owned `String` types
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
// (we can't use e.g. `TIMEOUT` or `timeout()` which gets created for serde)
|
2022-02-07 22:17:50 +00:00
|
|
|
const MAX_CONCURRENCY_STR: &str = concatcp!(DEFAULT_MAX_CONCURRENCY);
|
|
|
|
|
const MAX_CACHE_AGE_STR: &str = concatcp!(DEFAULT_MAX_CACHE_AGE);
|
2022-02-07 21:45:17 +00:00
|
|
|
const MAX_REDIRECTS_STR: &str = concatcp!(DEFAULT_MAX_REDIRECTS);
|
|
|
|
|
const MAX_RETRIES_STR: &str = concatcp!(DEFAULT_MAX_RETRIES);
|
2022-08-12 20:53:13 +00:00
|
|
|
const HELP_MSG_CACHE: &str = formatcp!(
|
2022-02-07 21:45:17 +00:00
|
|
|
"Use request cache stored on disk at `{}`",
|
|
|
|
|
LYCHEE_CACHE_FILE,
|
|
|
|
|
);
|
2023-03-01 23:24:08 +00:00
|
|
|
// We use a custom help message here because we want to show the default
|
|
|
|
|
// value of the config file, but also be able to check if the user has
|
|
|
|
|
// provided a custom value. If they didn't, we won't throw an error if
|
|
|
|
|
// the file doesn't exist.
|
|
|
|
|
const HELP_MSG_CONFIG_FILE: &str = formatcp!(
|
|
|
|
|
"Configuration file to use\n\n[default: {}]",
|
|
|
|
|
LYCHEE_CONFIG_FILE,
|
|
|
|
|
);
|
2022-02-24 11:24:57 +00:00
|
|
|
const TIMEOUT_STR: &str = concatcp!(DEFAULT_TIMEOUT_SECS);
|
|
|
|
|
const RETRY_WAIT_TIME_STR: &str = concatcp!(DEFAULT_RETRY_WAIT_TIME_SECS);
|
2020-12-02 22:28:37 +00:00
|
|
|
|
2024-06-14 17:47:52 +00:00
|
|
|
/// The format to use for the final status report
|
2024-10-27 00:17:00 +00:00
|
|
|
#[derive(Debug, Deserialize, Default, Clone, Display, EnumIter, VariantNames, PartialEq)]
|
2024-06-14 17:47:52 +00:00
|
|
|
#[non_exhaustive]
|
|
|
|
|
#[strum(serialize_all = "snake_case")]
|
2024-10-27 00:17:00 +00:00
|
|
|
#[serde(rename_all = "snake_case")]
|
2024-06-14 17:47:52 +00:00
|
|
|
pub(crate) enum StatsFormat {
|
2023-03-10 14:15:37 +00:00
|
|
|
#[default]
|
2021-11-17 23:44:48 +00:00
|
|
|
Compact,
|
|
|
|
|
Detailed,
|
2021-02-18 00:32:48 +00:00
|
|
|
Json,
|
2021-11-17 23:44:48 +00:00
|
|
|
Markdown,
|
2022-04-25 17:19:36 +00:00
|
|
|
Raw,
|
2020-12-14 00:15:14 +00:00
|
|
|
}
|
|
|
|
|
|
2024-06-14 17:47:52 +00:00
|
|
|
impl FromStr for StatsFormat {
|
2020-12-14 00:15:14 +00:00
|
|
|
type Err = Error;
|
2024-06-14 17:47:52 +00:00
|
|
|
|
2020-12-14 00:15:14 +00:00
|
|
|
fn from_str(format: &str) -> Result<Self, Self::Err> {
|
2022-04-25 17:19:36 +00:00
|
|
|
match format.to_lowercase().as_str() {
|
2024-06-14 17:47:52 +00:00
|
|
|
"compact" | "string" => Ok(StatsFormat::Compact),
|
|
|
|
|
"detailed" => Ok(StatsFormat::Detailed),
|
|
|
|
|
"json" => Ok(StatsFormat::Json),
|
|
|
|
|
"markdown" | "md" => Ok(StatsFormat::Markdown),
|
|
|
|
|
"raw" => Ok(StatsFormat::Raw),
|
2022-04-25 17:19:36 +00:00
|
|
|
_ => Err(anyhow!("Unknown format {}", format)),
|
2020-12-14 00:15:14 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-14 17:47:52 +00:00
|
|
|
/// The different formatter modes
|
|
|
|
|
///
|
|
|
|
|
/// This decides over whether to use color,
|
|
|
|
|
/// emojis, or plain text for the output.
|
2025-02-06 23:05:21 +00:00
|
|
|
#[derive(
|
|
|
|
|
Debug, Deserialize, Default, Clone, Display, EnumIter, EnumString, VariantNames, PartialEq,
|
|
|
|
|
)]
|
2024-06-14 17:47:52 +00:00
|
|
|
#[non_exhaustive]
|
|
|
|
|
pub(crate) enum OutputMode {
|
|
|
|
|
/// Plain text output.
|
|
|
|
|
///
|
|
|
|
|
/// This is the most basic output mode for terminals that do not support
|
|
|
|
|
/// color or emojis. It can also be helpful for scripting or when you want
|
|
|
|
|
/// to pipe the output to another program.
|
|
|
|
|
#[serde(rename = "plain")]
|
|
|
|
|
#[strum(serialize = "plain", ascii_case_insensitive)]
|
|
|
|
|
Plain,
|
|
|
|
|
|
|
|
|
|
/// Colorful output.
|
|
|
|
|
///
|
|
|
|
|
/// This mode uses colors to highlight the status of the requests.
|
|
|
|
|
/// It is useful for terminals that support colors and you want to
|
|
|
|
|
/// provide a more visually appealing output.
|
|
|
|
|
///
|
|
|
|
|
/// This is the default output mode.
|
|
|
|
|
#[serde(rename = "color")]
|
|
|
|
|
#[strum(serialize = "color", ascii_case_insensitive)]
|
|
|
|
|
#[default]
|
|
|
|
|
Color,
|
|
|
|
|
|
|
|
|
|
/// Emoji output.
|
|
|
|
|
///
|
|
|
|
|
/// This mode uses emojis to represent the status of the requests.
|
|
|
|
|
/// Some people may find this mode more intuitive and fun to use.
|
|
|
|
|
#[serde(rename = "emoji")]
|
|
|
|
|
#[strum(serialize = "emoji", ascii_case_insensitive)]
|
|
|
|
|
Emoji,
|
2025-02-06 14:03:53 +00:00
|
|
|
|
|
|
|
|
/// Task output.
|
|
|
|
|
///
|
|
|
|
|
/// This mode uses Markdown-styled checkboxes to represent the status of the requests.
|
|
|
|
|
/// Some people may find this mode more intuitive and useful for task tracking.
|
|
|
|
|
#[serde(rename = "task")]
|
|
|
|
|
#[strum(serialize = "task", ascii_case_insensitive)]
|
|
|
|
|
Task,
|
2024-06-14 17:47:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
impl OutputMode {
|
|
|
|
|
/// Returns `true` if the response format is `Plain`
|
|
|
|
|
pub(crate) const fn is_plain(&self) -> bool {
|
|
|
|
|
matches!(self, OutputMode::Plain)
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Returns `true` if the response format is `Emoji`
|
|
|
|
|
pub(crate) const fn is_emoji(&self) -> bool {
|
|
|
|
|
matches!(self, OutputMode::Emoji)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
// Macro for generating default functions to be used by serde
|
|
|
|
|
macro_rules! default_function {
|
|
|
|
|
( $( $name:ident : $T:ty = $e:expr; )* ) => {
|
|
|
|
|
$(
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
#[allow(clippy::missing_const_for_fn)]
|
2020-10-21 00:10:25 +00:00
|
|
|
fn $name() -> $T {
|
|
|
|
|
$e
|
|
|
|
|
}
|
|
|
|
|
)*
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2020-12-02 22:28:37 +00:00
|
|
|
// Generate the functions for serde defaults
|
|
|
|
|
default_function! {
|
2022-01-07 00:03:10 +00:00
|
|
|
max_redirects: usize = DEFAULT_MAX_REDIRECTS;
|
|
|
|
|
max_retries: u64 = DEFAULT_MAX_RETRIES;
|
2022-02-07 22:17:50 +00:00
|
|
|
max_concurrency: usize = DEFAULT_MAX_CONCURRENCY;
|
|
|
|
|
max_cache_age: Duration = humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
|
2022-01-07 00:03:10 +00:00
|
|
|
user_agent: String = DEFAULT_USER_AGENT.to_string();
|
2022-02-24 11:24:57 +00:00
|
|
|
timeout: usize = DEFAULT_TIMEOUT_SECS;
|
|
|
|
|
retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS;
|
2022-02-07 22:17:50 +00:00
|
|
|
method: String = DEFAULT_METHOD.to_string();
|
2023-02-24 22:53:09 +00:00
|
|
|
verbosity: Verbosity = Verbosity::default();
|
2024-10-14 00:41:56 +00:00
|
|
|
cache_exclude_selector: StatusCodeExcluder = StatusCodeExcluder::new();
|
|
|
|
|
accept_selector: StatusCodeSelector = StatusCodeSelector::default();
|
2020-12-02 22:28:37 +00:00
|
|
|
}
|
|
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
// Macro for merging configuration values
|
|
|
|
|
macro_rules! fold_in {
|
|
|
|
|
( $cli:ident , $toml:ident ; $( $key:ident : $default:expr; )* ) => {
|
|
|
|
|
$(
|
|
|
|
|
if $cli.$key == $default && $toml.$key != $default {
|
|
|
|
|
$cli.$key = $toml.$key;
|
|
|
|
|
}
|
|
|
|
|
)*
|
|
|
|
|
};
|
|
|
|
|
}
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2022-11-13 20:10:32 +00:00
|
|
|
/// A fast, async link checker
|
|
|
|
|
///
|
|
|
|
|
/// Finds broken URLs and mail addresses inside Markdown, HTML,
|
|
|
|
|
/// `reStructuredText`, websites and more!
|
2024-06-14 17:47:52 +00:00
|
|
|
#[derive(Parser, Debug)]
|
|
|
|
|
#[command(version, about)]
|
2020-08-14 09:43:45 +00:00
|
|
|
pub(crate) struct LycheeOptions {
|
2020-12-02 22:28:37 +00:00
|
|
|
/// The inputs (where to get links to check from).
|
|
|
|
|
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
|
2022-02-18 09:29:49 +00:00
|
|
|
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
|
2021-09-16 14:40:38 +00:00
|
|
|
/// NOTE: Use `--` to separate inputs from options that allow multiple arguments.
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(name = "inputs", required = true)]
|
2020-12-02 22:28:37 +00:00
|
|
|
raw_inputs: Vec<String>,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Configuration file to use
|
2023-03-01 23:24:08 +00:00
|
|
|
#[arg(short, long = "config")]
|
|
|
|
|
#[arg(help = HELP_MSG_CONFIG_FILE)]
|
|
|
|
|
pub(crate) config_file: Option<PathBuf>,
|
2020-10-21 00:10:25 +00:00
|
|
|
|
2022-11-28 22:25:33 +00:00
|
|
|
#[clap(flatten)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) config: Config,
|
2020-10-21 00:10:25 +00:00
|
|
|
}
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2020-12-02 22:28:37 +00:00
|
|
|
impl LycheeOptions {
|
|
|
|
|
/// Get parsed inputs from options.
|
2022-11-05 16:25:44 +00:00
|
|
|
// This depends on the config, which is why a method is required (we could
|
|
|
|
|
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there, but
|
|
|
|
|
// we wouldn't get access to `glob_ignore_case`.
|
2022-03-27 00:27:27 +00:00
|
|
|
pub(crate) fn inputs(&self) -> Result<Vec<Input>> {
|
2022-05-29 15:27:09 +00:00
|
|
|
let excluded = if self.config.exclude_path.is_empty() {
|
|
|
|
|
None
|
|
|
|
|
} else {
|
|
|
|
|
Some(self.config.exclude_path.clone())
|
|
|
|
|
};
|
2020-12-02 22:28:37 +00:00
|
|
|
self.raw_inputs
|
|
|
|
|
.iter()
|
2022-05-29 15:27:09 +00:00
|
|
|
.map(|s| Input::new(s, None, self.config.glob_ignore_case, excluded.clone()))
|
2022-03-27 00:27:27 +00:00
|
|
|
.collect::<Result<_, _>>()
|
|
|
|
|
.context("Cannot parse inputs from arguments")
|
2020-12-02 22:28:37 +00:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-14 17:47:52 +00:00
|
|
|
/// The main configuration for lychee
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
#[allow(clippy::struct_excessive_bools)]
|
2024-01-09 19:55:39 +00:00
|
|
|
#[derive(Parser, Debug, Deserialize, Clone, Default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) struct Config {
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Verbose program output
|
2022-11-28 22:25:33 +00:00
|
|
|
#[clap(flatten)]
|
2022-11-29 11:59:32 +00:00
|
|
|
#[serde(default = "verbosity")]
|
2023-02-24 22:53:09 +00:00
|
|
|
pub(crate) verbose: Verbosity,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2021-02-21 16:19:32 +00:00
|
|
|
/// Do not show progress bar.
|
2021-04-16 18:25:22 +00:00
|
|
|
/// This is recommended for non-interactive shells (e.g. for continuous integration)
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, verbatim_doc_comment)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) no_progress: bool,
|
2020-10-10 04:31:28 +00:00
|
|
|
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(help = HELP_MSG_CACHE)]
|
|
|
|
|
#[arg(long)]
|
2022-01-14 14:25:51 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) cache: bool,
|
|
|
|
|
|
2022-01-14 15:55:56 +00:00
|
|
|
/// Discard all cached requests older than this duration
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(
|
2022-01-14 14:25:51 +00:00
|
|
|
long,
|
2022-11-13 20:10:32 +00:00
|
|
|
value_parser = humantime::parse_duration,
|
2022-02-07 22:17:50 +00:00
|
|
|
default_value = &MAX_CACHE_AGE_STR
|
2022-01-14 14:25:51 +00:00
|
|
|
)]
|
2022-02-07 22:17:50 +00:00
|
|
|
#[serde(default = "max_cache_age")]
|
2022-05-31 17:43:46 +00:00
|
|
|
#[serde(with = "humantime_serde")]
|
2022-01-14 14:25:51 +00:00
|
|
|
pub(crate) max_cache_age: Duration,
|
|
|
|
|
|
2024-10-14 00:41:56 +00:00
|
|
|
/// A list of status codes that will be excluded from the cache
|
|
|
|
|
#[arg(
|
|
|
|
|
long,
|
|
|
|
|
default_value_t,
|
|
|
|
|
long_help = "A list of status codes that will be ignored from the cache
|
|
|
|
|
|
|
|
|
|
The following accept range syntax is supported: [start]..[=]end|code. Some valid
|
|
|
|
|
examples are:
|
|
|
|
|
|
|
|
|
|
- 429
|
|
|
|
|
- 500..=599
|
|
|
|
|
- 500..
|
|
|
|
|
|
|
|
|
|
Use \"lychee --cache-exclude-status '429, 500..502' <inputs>...\" to provide a comma- separated
|
|
|
|
|
list of excluded status codes. This example will not cache results with a status code of 429, 500,
|
|
|
|
|
501 and 502."
|
|
|
|
|
)]
|
|
|
|
|
#[serde(default = "cache_exclude_selector")]
|
|
|
|
|
pub(crate) cache_exclude_status: StatusCodeExcluder,
|
|
|
|
|
|
2021-09-06 14:10:48 +00:00
|
|
|
/// Don't perform any link checking.
|
|
|
|
|
/// Instead, dump all the links extracted from inputs that would be checked
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2021-09-06 14:10:48 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) dump: bool,
|
|
|
|
|
|
2023-07-16 16:08:14 +00:00
|
|
|
/// Don't perform any link extraction and checking.
|
|
|
|
|
/// Instead, dump all input sources from which links would be collected
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) dump_inputs: bool,
|
|
|
|
|
|
2023-03-27 22:45:06 +00:00
|
|
|
/// Specify the use of a specific web archive.
|
|
|
|
|
/// Can be used in combination with `--suggest`
|
2024-06-14 17:47:52 +00:00
|
|
|
#[arg(long, value_parser = PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
|
2023-03-27 22:45:06 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) archive: Option<Archive>,
|
|
|
|
|
|
|
|
|
|
/// Suggest link replacements for broken links, using a web archive.
|
|
|
|
|
/// The web archive can be specified with `--archive`
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) suggest: bool,
|
|
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Maximum number of allowed redirects
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, default_value = &MAX_REDIRECTS_STR)]
|
2020-12-02 22:28:37 +00:00
|
|
|
#[serde(default = "max_redirects")]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) max_redirects: usize,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2022-01-07 00:03:10 +00:00
|
|
|
/// Maximum number of retries per request
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long, default_value = &MAX_RETRIES_STR)]
|
2022-01-07 00:03:10 +00:00
|
|
|
#[serde(default = "max_retries")]
|
|
|
|
|
pub(crate) max_retries: u64,
|
|
|
|
|
|
2020-11-24 20:30:06 +00:00
|
|
|
/// Maximum number of concurrent network requests
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long, default_value = &MAX_CONCURRENCY_STR)]
|
2020-12-02 22:28:37 +00:00
|
|
|
#[serde(default = "max_concurrency")]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) max_concurrency: usize,
|
2020-11-24 20:30:06 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Number of threads to utilize.
|
|
|
|
|
/// Defaults to number of cores available to the system
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short = 'T', long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) threads: Option<usize>,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// User agent
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, default_value = DEFAULT_USER_AGENT)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default = "user_agent")]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) user_agent: String,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Proceed for server connections considered insecure (invalid TLS)
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) insecure: bool,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2023-09-14 23:27:44 +00:00
|
|
|
/// Only test links with the given schemes (e.g. https).
|
|
|
|
|
/// Omit to check links with any other scheme.
|
|
|
|
|
/// At the moment, we support http, https, file, and mailto.
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
2021-04-26 16:24:54 +00:00
|
|
|
pub(crate) scheme: Vec<String>,
|
2020-08-14 09:43:45 +00:00
|
|
|
|
2021-09-03 00:09:30 +00:00
|
|
|
/// Only check local files and block network requests.
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2021-09-02 21:10:46 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) offline: bool,
|
|
|
|
|
|
2020-10-25 12:41:06 +00:00
|
|
|
/// URLs to check (supports regex). Has preference over all excludes.
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-10-25 12:41:06 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) include: Vec<String>,
|
2020-10-25 12:41:06 +00:00
|
|
|
|
2022-10-23 10:17:20 +00:00
|
|
|
/// Exclude URLs and mail addresses from checking (supports regex)
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) exclude: Vec<String>,
|
2020-08-14 13:24:41 +00:00
|
|
|
|
2022-05-29 15:27:09 +00:00
|
|
|
/// Deprecated; use `--exclude-path` instead
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2021-09-01 15:37:31 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) exclude_file: Vec<String>,
|
2021-09-03 00:12:03 +00:00
|
|
|
|
2022-05-29 15:27:09 +00:00
|
|
|
/// Exclude file path from getting checked.
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2022-05-29 15:27:09 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) exclude_path: Vec<PathBuf>,
|
|
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Exclude all private IPs from checking.
|
|
|
|
|
/// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short = 'E', long, verbatim_doc_comment)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) exclude_all_private: bool,
|
2020-10-17 08:01:06 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Exclude private IP address ranges from checking
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) exclude_private: bool,
|
2020-10-09 14:29:20 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Exclude link-local IP address range from checking
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) exclude_link_local: bool,
|
2020-10-09 14:29:20 +00:00
|
|
|
|
2021-10-06 09:33:23 +00:00
|
|
|
/// Exclude loopback IP address range and localhost from checking
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) exclude_loopback: bool,
|
2020-10-09 14:29:20 +00:00
|
|
|
|
2021-02-10 10:58:04 +00:00
|
|
|
/// Exclude all mail addresses from checking
|
2023-07-19 17:58:38 +00:00
|
|
|
/// (deprecated; excluded by default)
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2021-02-10 10:58:04 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) exclude_mail: bool,
|
2021-02-10 10:58:04 +00:00
|
|
|
|
2023-07-19 17:58:38 +00:00
|
|
|
/// Also check email addresses
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) include_mail: bool,
|
|
|
|
|
|
2022-05-29 19:41:22 +00:00
|
|
|
/// Remap URI matching pattern to different URI
|
|
|
|
|
#[serde(default)]
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2022-05-29 19:41:22 +00:00
|
|
|
pub(crate) remap: Vec<String>,
|
|
|
|
|
|
2024-06-11 14:11:24 +00:00
|
|
|
/// Automatically append file extensions to `file://` URIs as needed
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
#[arg(
|
|
|
|
|
long,
|
|
|
|
|
value_delimiter = ',',
|
|
|
|
|
long_help = "Test the specified file extensions for URIs when checking files locally.
|
|
|
|
|
Multiple extensions can be separated by commas. Extensions will be checked in
|
|
|
|
|
order of appearance.
|
|
|
|
|
|
|
|
|
|
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi"
|
|
|
|
|
)]
|
|
|
|
|
pub(crate) fallback_extensions: Vec<String>,
|
|
|
|
|
|
2022-11-13 20:10:32 +00:00
|
|
|
/// Custom request header
|
|
|
|
|
#[arg(long)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
2022-11-13 20:10:32 +00:00
|
|
|
pub(crate) header: Vec<String>,
|
2020-08-14 15:36:43 +00:00
|
|
|
|
feat: Add support for ranges in the `--accept` option / config field (#1167)
Adds support for accept ranges discussed in #1157. This allows the user to specify custom HTTP status codes accepted during checking and thus will report as valid (not broken). The accept option only supports specifying status codes as a comma-separated list. With this PR, the option will accept a list of status code ranges formatted like this:
```toml
accept = ["100..=103", "200..=299", "403"]
```
These combinations will be supported: `..<end>`, ` ..=<end>`, `<start>..<end>` and `<start>..=<end>`.
The behavior is copied from the Rust Range like concepts:
```
..<end>, includes 0 to <end> (exclusive)
..=<end>, includes 0 to <end> (inclusive)
<start>..<end>, includes <start> to <end> (exclusive)
<start>..=<end>, includes <start> to <end> (inclusive)
```
- Foundation and enhancements for accept ranges, including support for comma-separated strings and integration into the CLI.
- Implementations and updates for AcceptSelector, including Default, Display, and serde defaults.
- Address and fix various errors: clippy, cargo fmt, and tests.
- Add more tests, address edge cases, and enhance error messaging, especially for TOML config parsing.
- Update dependencies.
2023-09-17 19:39:01 +00:00
|
|
|
/// A List of accepted status codes for valid links
|
|
|
|
|
#[arg(
|
|
|
|
|
short,
|
|
|
|
|
long,
|
|
|
|
|
default_value_t,
|
|
|
|
|
long_help = "A List of accepted status codes for valid links
|
|
|
|
|
|
|
|
|
|
The following accept range syntax is supported: [start]..[=]end|code. Some valid
|
|
|
|
|
examples are:
|
|
|
|
|
|
|
|
|
|
- 200..=204
|
|
|
|
|
- 200..204
|
|
|
|
|
- ..=204
|
|
|
|
|
- ..204
|
|
|
|
|
- 200
|
|
|
|
|
|
|
|
|
|
Use \"lychee --accept '200..=204, 429, 500' <inputs>...\" to provide a comma-
|
|
|
|
|
separated list of accepted status codes. This example will accept 200, 201,
|
|
|
|
|
202, 203, 204, 429, and 500 as valid status codes."
|
|
|
|
|
)]
|
|
|
|
|
#[serde(default = "accept_selector")]
|
2024-10-14 00:41:56 +00:00
|
|
|
pub(crate) accept: StatusCodeSelector,
|
2020-08-17 23:17:26 +00:00
|
|
|
|
2023-07-31 14:04:00 +00:00
|
|
|
/// Enable the checking of fragments in links.
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) include_fragments: bool,
|
|
|
|
|
|
2022-02-24 11:24:57 +00:00
|
|
|
/// Website timeout in seconds from connect to response finished
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, default_value = &TIMEOUT_STR)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default = "timeout")]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) timeout: usize,
|
2020-08-21 22:36:03 +00:00
|
|
|
|
2022-02-24 11:24:57 +00:00
|
|
|
/// Minimum wait time in seconds between retries of failed requests
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, default_value = &RETRY_WAIT_TIME_STR)]
|
2022-02-24 11:24:57 +00:00
|
|
|
#[serde(default = "retry_wait_time")]
|
|
|
|
|
pub(crate) retry_wait_time: usize,
|
|
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
/// Request method
|
2020-11-24 20:30:06 +00:00
|
|
|
// Using `-X` as a short param similar to curl
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short = 'X', long, default_value = DEFAULT_METHOD)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default = "method")]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) method: String,
|
2020-10-20 23:31:06 +00:00
|
|
|
|
2025-02-16 00:41:32 +00:00
|
|
|
/// Deprecated; use `--base-url` instead
|
|
|
|
|
#[arg(long, value_parser = parse_base)]
|
|
|
|
|
#[serde(skip)]
|
|
|
|
|
pub(crate) base: Option<Base>,
|
|
|
|
|
|
|
|
|
|
/// Base URL used to resolve relative URLs during link checking
|
|
|
|
|
/// Example: <https://example.com>
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, value_parser= parse_base)]
|
2020-10-21 00:10:25 +00:00
|
|
|
#[serde(default)]
|
2025-02-16 00:41:32 +00:00
|
|
|
pub(crate) base_url: Option<Base>,
|
2020-10-26 08:23:45 +00:00
|
|
|
|
2024-12-13 13:36:33 +00:00
|
|
|
/// Root path to use when checking absolute local links,
|
|
|
|
|
/// must be an absolute path
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) root_dir: Option<PathBuf>,
|
|
|
|
|
|
2023-06-26 10:06:24 +00:00
|
|
|
/// Basic authentication support. E.g. `http://example.com username:password`
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-10-26 08:23:45 +00:00
|
|
|
#[serde(default)]
|
2023-06-26 10:06:24 +00:00
|
|
|
pub(crate) basic_auth: Option<Vec<BasicAuthSelector>>,
|
2020-10-26 22:31:31 +00:00
|
|
|
|
2020-12-02 22:28:37 +00:00
|
|
|
/// GitHub API token to use when checking github.com links, to avoid rate limiting
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long, env = "GITHUB_TOKEN", hide_env_values = true)]
|
2020-10-26 22:31:31 +00:00
|
|
|
#[serde(default)]
|
2022-02-13 12:53:46 +00:00
|
|
|
pub(crate) github_token: Option<SecretString>,
|
2020-12-02 22:28:37 +00:00
|
|
|
|
|
|
|
|
/// Skip missing input files (default is to error if they don't exist)
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-12-02 22:28:37 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) skip_missing: bool,
|
2020-12-02 22:28:37 +00:00
|
|
|
|
2024-09-20 07:53:43 +00:00
|
|
|
/// Do not skip files that would otherwise be ignored by
|
|
|
|
|
/// '.gitignore', '.ignore', or the global ignore file.
|
2024-09-15 08:38:09 +00:00
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
2024-09-20 07:53:43 +00:00
|
|
|
pub(crate) no_ignore: bool,
|
|
|
|
|
|
|
|
|
|
/// Do not skip hidden directories and files.
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) hidden: bool,
|
2024-09-15 08:38:09 +00:00
|
|
|
|
2022-03-26 09:42:56 +00:00
|
|
|
/// Find links in verbatim sections like `pre`- and `code` blocks
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2022-03-26 09:42:56 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) include_verbatim: bool,
|
|
|
|
|
|
2020-12-02 22:28:37 +00:00
|
|
|
/// Ignore case when expanding filesystem path glob inputs
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2020-12-02 22:28:37 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) glob_ignore_case: bool,
|
2020-12-14 00:15:14 +00:00
|
|
|
|
|
|
|
|
/// Output file of status report
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(short, long, value_parser)]
|
2020-12-14 00:15:14 +00:00
|
|
|
#[serde(default)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
pub(crate) output: Option<PathBuf>,
|
2020-12-14 00:15:14 +00:00
|
|
|
|
2024-06-14 17:47:52 +00:00
|
|
|
/// Set the output display mode. Determines how results are presented in the terminal
|
|
|
|
|
#[arg(long, default_value = "color", value_parser = PossibleValuesParser::new(OutputMode::VARIANTS).map(|s| s.parse::<OutputMode>().unwrap()))]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) mode: OutputMode,
|
|
|
|
|
|
|
|
|
|
/// Output format of final status report
|
|
|
|
|
#[arg(short, long, default_value = "compact", value_parser = PossibleValuesParser::new(StatsFormat::VARIANTS).map(|s| s.parse::<StatsFormat>().unwrap()))]
|
2020-12-14 00:15:14 +00:00
|
|
|
#[serde(default)]
|
2024-06-14 17:47:52 +00:00
|
|
|
pub(crate) format: StatsFormat,
|
2021-09-04 01:21:54 +00:00
|
|
|
|
|
|
|
|
/// When HTTPS is available, treat HTTP links as errors
|
2022-11-13 20:10:32 +00:00
|
|
|
#[arg(long)]
|
2021-09-04 01:21:54 +00:00
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) require_https: bool,
|
2023-07-13 15:32:41 +00:00
|
|
|
|
|
|
|
|
/// Tell lychee to read cookies from the given file.
|
|
|
|
|
/// Cookies will be stored in the cookie jar and sent with requests.
|
|
|
|
|
/// New cookies will be stored in the cookie jar and existing cookies will be updated.
|
|
|
|
|
#[arg(long)]
|
|
|
|
|
#[serde(default)]
|
|
|
|
|
pub(crate) cookie_jar: Option<PathBuf>,
|
2020-08-14 09:43:45 +00:00
|
|
|
}
|
2020-10-21 00:10:25 +00:00
|
|
|
|
|
|
|
|
impl Config {
|
|
|
|
|
/// Load configuration from a file
|
2023-03-01 23:24:08 +00:00
|
|
|
pub(crate) fn load_from_file(path: &Path) -> Result<Config> {
|
2020-10-21 00:10:25 +00:00
|
|
|
// Read configuration file
|
2023-03-01 23:24:08 +00:00
|
|
|
let contents = fs::read_to_string(path)?;
|
2024-01-04 21:17:14 +00:00
|
|
|
toml::from_str(&contents).with_context(|| "Failed to parse configuration file")
|
2020-10-21 00:10:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Merge the configuration from TOML into the CLI configuration
|
2020-12-02 22:28:37 +00:00
|
|
|
pub(crate) fn merge(&mut self, toml: Config) {
|
2020-10-21 00:10:25 +00:00
|
|
|
fold_in! {
|
|
|
|
|
// Destination and source configs
|
|
|
|
|
self, toml;
|
|
|
|
|
|
|
|
|
|
// Keys with defaults to assign
|
2023-02-24 22:53:09 +00:00
|
|
|
verbose: Verbosity::default();
|
2022-01-14 14:25:51 +00:00
|
|
|
cache: false;
|
2021-02-21 16:19:32 +00:00
|
|
|
no_progress: false;
|
2022-01-07 00:03:10 +00:00
|
|
|
max_redirects: DEFAULT_MAX_REDIRECTS;
|
|
|
|
|
max_retries: DEFAULT_MAX_RETRIES;
|
2022-02-07 22:17:50 +00:00
|
|
|
max_concurrency: DEFAULT_MAX_CONCURRENCY;
|
|
|
|
|
max_cache_age: humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
|
2024-10-14 00:41:56 +00:00
|
|
|
cache_exclude_status: StatusCodeExcluder::default();
|
2020-10-21 00:10:25 +00:00
|
|
|
threads: None;
|
2022-01-07 00:03:10 +00:00
|
|
|
user_agent: DEFAULT_USER_AGENT;
|
2020-10-21 00:10:25 +00:00
|
|
|
insecure: false;
|
2021-04-26 16:24:54 +00:00
|
|
|
scheme: Vec::<String>::new();
|
2020-10-25 12:41:06 +00:00
|
|
|
include: Vec::<String>::new();
|
2020-10-21 00:10:25 +00:00
|
|
|
exclude: Vec::<String>::new();
|
2022-05-29 15:27:09 +00:00
|
|
|
exclude_file: Vec::<String>::new(); // deprecated
|
|
|
|
|
exclude_path: Vec::<PathBuf>::new();
|
2020-10-21 00:10:25 +00:00
|
|
|
exclude_all_private: false;
|
|
|
|
|
exclude_private: false;
|
|
|
|
|
exclude_link_local: false;
|
|
|
|
|
exclude_loopback: false;
|
2021-02-10 10:58:04 +00:00
|
|
|
exclude_mail: false;
|
2024-10-27 00:17:00 +00:00
|
|
|
format: StatsFormat::default();
|
2022-05-29 19:41:22 +00:00
|
|
|
remap: Vec::<String>::new();
|
2024-06-11 14:11:24 +00:00
|
|
|
fallback_extensions: Vec::<String>::new();
|
2022-11-13 20:10:32 +00:00
|
|
|
header: Vec::<String>::new();
|
2022-02-24 11:24:57 +00:00
|
|
|
timeout: DEFAULT_TIMEOUT_SECS;
|
|
|
|
|
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
|
2022-02-07 22:17:50 +00:00
|
|
|
method: DEFAULT_METHOD;
|
2025-02-16 00:41:32 +00:00
|
|
|
base_url: None;
|
2020-10-26 08:23:45 +00:00
|
|
|
basic_auth: None;
|
2020-12-02 22:28:37 +00:00
|
|
|
skip_missing: false;
|
2022-03-26 09:42:56 +00:00
|
|
|
include_verbatim: false;
|
2024-01-24 12:39:43 +00:00
|
|
|
include_mail: false;
|
2020-12-02 22:28:37 +00:00
|
|
|
glob_ignore_case: false;
|
2020-12-14 00:15:14 +00:00
|
|
|
output: None;
|
2021-09-04 01:21:54 +00:00
|
|
|
require_https: false;
|
2023-07-13 15:32:41 +00:00
|
|
|
cookie_jar: None;
|
2023-07-31 14:04:00 +00:00
|
|
|
include_fragments: false;
|
2024-10-14 00:41:56 +00:00
|
|
|
accept: StatusCodeSelector::default();
|
2020-10-21 00:10:25 +00:00
|
|
|
}
|
2022-02-13 12:53:46 +00:00
|
|
|
|
|
|
|
|
if self
|
|
|
|
|
.github_token
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map(ExposeSecret::expose_secret)
|
|
|
|
|
.is_none()
|
|
|
|
|
&& toml
|
|
|
|
|
.github_token
|
|
|
|
|
.as_ref()
|
|
|
|
|
.map(ExposeSecret::expose_secret)
|
|
|
|
|
.is_some()
|
|
|
|
|
{
|
|
|
|
|
self.github_token = toml.github_token;
|
|
|
|
|
}
|
2020-10-21 00:10:25 +00:00
|
|
|
}
|
|
|
|
|
}
|
2024-01-09 19:55:39 +00:00
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
|
mod tests {
|
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_accept_status_codes() {
|
|
|
|
|
let toml = Config {
|
2024-10-14 00:41:56 +00:00
|
|
|
accept: StatusCodeSelector::from_str("200..=204, 429, 500").unwrap(),
|
2024-01-09 19:55:39 +00:00
|
|
|
..Default::default()
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
let mut cli = Config::default();
|
|
|
|
|
cli.merge(toml);
|
|
|
|
|
|
|
|
|
|
assert!(cli.accept.contains(429));
|
|
|
|
|
assert!(cli.accept.contains(200));
|
|
|
|
|
assert!(cli.accept.contains(203));
|
|
|
|
|
assert!(cli.accept.contains(204));
|
|
|
|
|
assert!(!cli.accept.contains(205));
|
|
|
|
|
}
|
2024-10-14 00:41:56 +00:00
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_default() {
|
|
|
|
|
let cli = Config::default();
|
|
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
|
cli.accept,
|
|
|
|
|
StatusCodeSelector::from_str("100..=103,200..=299").expect("no error")
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(cli.cache_exclude_status, StatusCodeExcluder::new());
|
|
|
|
|
}
|
2024-01-09 19:55:39 +00:00
|
|
|
}
|