lychee/lychee-bin/src/options.rs

929 lines
31 KiB
Rust

use crate::archive::Archive;
use crate::parse::parse_base;
use crate::verbosity::Verbosity;
use anyhow::{anyhow, Context, Error, Result};
use clap::builder::PossibleValuesParser;
use clap::{arg, builder::TypedValueParser, Parser};
use const_format::{concatcp, formatcp};
use http::{
header::{HeaderName, HeaderValue},
HeaderMap,
};
use lychee_lib::{
Base, BasicAuthSelector, FileExtensions, FileType, Input, StatusCodeExcluder,
StatusCodeSelector, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS,
DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
};
use reqwest::tls;
use secrecy::{ExposeSecret, SecretString};
use serde::{Deserialize, Deserializer};
use std::collections::HashMap;
use std::path::Path;
use std::{fs, path::PathBuf, str::FromStr, time::Duration};
use strum::{Display, EnumIter, EnumString, VariantNames};
pub(crate) const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
pub(crate) const LYCHEE_CACHE_FILE: &str = ".lycheecache";
pub(crate) const LYCHEE_CONFIG_FILE: &str = "lychee.toml";
const DEFAULT_METHOD: &str = "get";
const DEFAULT_MAX_CACHE_AGE: &str = "1d";
const DEFAULT_MAX_CONCURRENCY: usize = 128;
// this exists because clap requires `&str` type values for defaults
// whereas serde expects owned `String` types
// (we can't use e.g. `TIMEOUT` or `timeout()` which gets created for serde)
const MAX_CONCURRENCY_STR: &str = concatcp!(DEFAULT_MAX_CONCURRENCY);
const MAX_CACHE_AGE_STR: &str = concatcp!(DEFAULT_MAX_CACHE_AGE);
const MAX_REDIRECTS_STR: &str = concatcp!(DEFAULT_MAX_REDIRECTS);
const MAX_RETRIES_STR: &str = concatcp!(DEFAULT_MAX_RETRIES);
const HELP_MSG_CACHE: &str = formatcp!(
"Use request cache stored on disk at `{}`",
LYCHEE_CACHE_FILE,
);
// We use a custom help message here because we want to show the default
// value of the config file, but also be able to check if the user has
// provided a custom value. If they didn't, we won't throw an error if
// the file doesn't exist.
const HELP_MSG_CONFIG_FILE: &str = formatcp!(
"Configuration file to use\n\n[default: {}]",
LYCHEE_CONFIG_FILE,
);
const TIMEOUT_STR: &str = concatcp!(DEFAULT_TIMEOUT_SECS);
const RETRY_WAIT_TIME_STR: &str = concatcp!(DEFAULT_RETRY_WAIT_TIME_SECS);
#[derive(Debug, Deserialize, Default, Clone, Display, EnumIter, EnumString, VariantNames)]
#[non_exhaustive]
pub(crate) enum TlsVersion {
#[serde(rename = "TLSv1_0")]
#[strum(serialize = "TLSv1_0")]
V1_0,
#[serde(rename = "TLSv1_1")]
#[strum(serialize = "TLSv1_1")]
V1_1,
#[serde(rename = "TLSv1_2")]
#[strum(serialize = "TLSv1_2")]
#[default]
V1_2,
#[serde(rename = "TLSv1_3")]
#[strum(serialize = "TLSv1_3")]
V1_3,
}
impl From<TlsVersion> for tls::Version {
fn from(ver: TlsVersion) -> Self {
match ver {
TlsVersion::V1_0 => tls::Version::TLS_1_0,
TlsVersion::V1_1 => tls::Version::TLS_1_1,
TlsVersion::V1_2 => tls::Version::TLS_1_2,
TlsVersion::V1_3 => tls::Version::TLS_1_3,
}
}
}
/// The format to use for the final status report
#[derive(Debug, Deserialize, Default, Clone, Display, EnumIter, VariantNames, PartialEq)]
#[non_exhaustive]
#[strum(serialize_all = "snake_case")]
#[serde(rename_all = "snake_case")]
pub(crate) enum StatsFormat {
#[default]
Compact,
Detailed,
Json,
Markdown,
Raw,
}
impl FromStr for StatsFormat {
type Err = Error;
fn from_str(format: &str) -> Result<Self, Self::Err> {
match format.to_lowercase().as_str() {
"compact" | "string" => Ok(StatsFormat::Compact),
"detailed" => Ok(StatsFormat::Detailed),
"json" => Ok(StatsFormat::Json),
"markdown" | "md" => Ok(StatsFormat::Markdown),
"raw" => Ok(StatsFormat::Raw),
_ => Err(anyhow!("Unknown format {}", format)),
}
}
}
/// The different formatter modes
///
/// This decides over whether to use color,
/// emojis, or plain text for the output.
#[derive(
Debug, Deserialize, Default, Clone, Display, EnumIter, EnumString, VariantNames, PartialEq,
)]
#[non_exhaustive]
pub(crate) enum OutputMode {
/// Plain text output.
///
/// This is the most basic output mode for terminals that do not support
/// color or emojis. It can also be helpful for scripting or when you want
/// to pipe the output to another program.
#[serde(rename = "plain")]
#[strum(serialize = "plain", ascii_case_insensitive)]
Plain,
/// Colorful output.
///
/// This mode uses colors to highlight the status of the requests.
/// It is useful for terminals that support colors and you want to
/// provide a more visually appealing output.
///
/// This is the default output mode.
#[serde(rename = "color")]
#[strum(serialize = "color", ascii_case_insensitive)]
#[default]
Color,
/// Emoji output.
///
/// This mode uses emojis to represent the status of the requests.
/// Some people may find this mode more intuitive and fun to use.
#[serde(rename = "emoji")]
#[strum(serialize = "emoji", ascii_case_insensitive)]
Emoji,
/// Task output.
///
/// This mode uses Markdown-styled checkboxes to represent the status of the requests.
/// Some people may find this mode more intuitive and useful for task tracking.
#[serde(rename = "task")]
#[strum(serialize = "task", ascii_case_insensitive)]
Task,
}
impl OutputMode {
/// Returns `true` if the response format is `Plain`
pub(crate) const fn is_plain(&self) -> bool {
matches!(self, OutputMode::Plain)
}
/// Returns `true` if the response format is `Emoji`
pub(crate) const fn is_emoji(&self) -> bool {
matches!(self, OutputMode::Emoji)
}
}
// Macro for generating default functions to be used by serde
macro_rules! default_function {
( $( $name:ident : $T:ty = $e:expr; )* ) => {
$(
#[allow(clippy::missing_const_for_fn)]
fn $name() -> $T {
$e
}
)*
};
}
// Generate the functions for serde defaults
default_function! {
max_redirects: usize = DEFAULT_MAX_REDIRECTS;
max_retries: u64 = DEFAULT_MAX_RETRIES;
max_concurrency: usize = DEFAULT_MAX_CONCURRENCY;
max_cache_age: Duration = humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
user_agent: String = DEFAULT_USER_AGENT.to_string();
timeout: usize = DEFAULT_TIMEOUT_SECS;
retry_wait_time: usize = DEFAULT_RETRY_WAIT_TIME_SECS;
method: String = DEFAULT_METHOD.to_string();
verbosity: Verbosity = Verbosity::default();
cache_exclude_selector: StatusCodeExcluder = StatusCodeExcluder::new();
accept_selector: StatusCodeSelector = StatusCodeSelector::default();
}
// Macro for merging configuration values
macro_rules! fold_in {
( $cli:ident , $toml:ident ; $( $key:ident : $default:expr; )* ) => {
$(
if $cli.$key == $default && $toml.$key != $default {
$cli.$key = $toml.$key;
}
)*
};
}
/// Parse a single header into a [`HeaderName`] and [`HeaderValue`]
///
/// Headers are expected to be in format `Header-Name: Header-Value`.
/// The header name and value are trimmed of whitespace.
///
/// If the header contains multiple colons, the part after the first colon is
/// considered the value.
fn parse_single_header(header: &str) -> Result<(HeaderName, HeaderValue)> {
let parts: Vec<&str> = header.splitn(2, ':').collect();
match parts.as_slice() {
[name, value] => {
let name = HeaderName::from_bytes(name.trim().as_bytes())
.map_err(|e| anyhow!("Invalid header name '{}': {}", name.trim(), e))?;
let value = HeaderValue::from_str(value.trim())
.map_err(|e| anyhow!("Invalid header value '{}': {}", value.trim(), e))?;
Ok((name, value))
}
_ => Err(anyhow!(
"Invalid header format. Expected colon-separated string in the format 'HeaderName: HeaderValue', got '{}'",
header
)),
}
}
/// Parses a single HTTP header into a tuple of (String, String)
///
/// This does NOT merge multiple headers into one.
#[derive(Clone, Debug)]
struct HeaderParser;
impl TypedValueParser for HeaderParser {
type Value = (String, String);
fn parse_ref(
&self,
_cmd: &clap::Command,
_arg: Option<&clap::Arg>,
value: &std::ffi::OsStr,
) -> Result<Self::Value, clap::Error> {
let header_str = value.to_str().ok_or_else(|| {
clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Header value contains invalid UTF-8",
)
})?;
match parse_single_header(header_str) {
Ok((name, value)) => {
let Ok(value) = value.to_str() else {
return Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
"Header value contains invalid UTF-8",
));
};
Ok((name.to_string(), value.to_string()))
}
Err(e) => Err(clap::Error::raw(
clap::error::ErrorKind::InvalidValue,
e.to_string(),
)),
}
}
}
impl clap::builder::ValueParserFactory for HeaderParser {
type Parser = HeaderParser;
fn value_parser() -> Self::Parser {
HeaderParser
}
}
/// Extension trait for converting a Vec of header pairs to a `HeaderMap`
pub(crate) trait HeaderMapExt {
/// Convert a collection of header key-value pairs to a `HeaderMap`
fn from_header_pairs(headers: &[(String, String)]) -> Result<HeaderMap, Error>;
}
impl HeaderMapExt for HeaderMap {
fn from_header_pairs(headers: &[(String, String)]) -> Result<HeaderMap, Error> {
let mut header_map = HeaderMap::new();
for (name, value) in headers {
let header_name = HeaderName::from_bytes(name.as_bytes())
.map_err(|e| anyhow!("Invalid header name '{}': {}", name, e))?;
let header_value = HeaderValue::from_str(value)
.map_err(|e| anyhow!("Invalid header value '{}': {}", value, e))?;
header_map.insert(header_name, header_value);
}
Ok(header_map)
}
}
/// A fast, async link checker
///
/// Finds broken URLs and mail addresses inside Markdown, HTML,
/// `reStructuredText`, websites and more!
#[derive(Parser, Debug)]
#[command(version, about)]
pub(crate) struct LycheeOptions {
/// The inputs (where to get links to check from).
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
/// NOTE: Use `--` to separate inputs from options that allow multiple arguments.
#[arg(name = "inputs", required = true)]
raw_inputs: Vec<String>,
/// Configuration file to use
#[arg(short, long = "config")]
#[arg(help = HELP_MSG_CONFIG_FILE)]
pub(crate) config_file: Option<PathBuf>,
#[clap(flatten)]
pub(crate) config: Config,
}
impl LycheeOptions {
/// Get parsed inputs from options.
// This depends on the config, which is why a method is required (we could
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there, but
// we wouldn't get access to `glob_ignore_case`.
pub(crate) fn inputs(&self) -> Result<Vec<Input>> {
let excluded = if self.config.exclude_path.is_empty() {
None
} else {
Some(self.config.exclude_path.clone())
};
let headers = HeaderMap::from_header_pairs(&self.config.header)?;
self.raw_inputs
.iter()
.map(|s| {
Input::new(
s,
None,
self.config.glob_ignore_case,
excluded.clone(),
headers.clone(),
)
})
.collect::<Result<_, _>>()
.context("Cannot parse inputs from arguments")
}
}
// Custom deserializer function for the header field
fn deserialize_headers<'de, D>(deserializer: D) -> Result<Vec<(String, String)>, D::Error>
where
D: Deserializer<'de>,
{
let map = HashMap::<String, String>::deserialize(deserializer)?;
Ok(map.into_iter().collect())
}
/// The main configuration for lychee
#[allow(clippy::struct_excessive_bools)]
#[derive(Parser, Debug, Deserialize, Clone, Default)]
pub(crate) struct Config {
/// Verbose program output
#[clap(flatten)]
#[serde(default = "verbosity")]
pub(crate) verbose: Verbosity,
/// Do not show progress bar.
/// This is recommended for non-interactive shells (e.g. for continuous integration)
#[arg(short, long, verbatim_doc_comment)]
#[serde(default)]
pub(crate) no_progress: bool,
/// A list of file extensions. Files not matching the specified extensions are skipped.
///
/// E.g. a user can specify `--extensions html,htm,php,asp,aspx,jsp,cgi`
/// to check for links in files with these extensions.
///
/// This is useful when the default extensions are not enough and you don't
/// want to provide a long list of inputs (e.g. file1.html, file2.md, etc.)
#[arg(
long,
default_value_t = FileExtensions::default(),
long_help = "Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Note that if you want to check filetypes,
which have multiple extensions, e.g. HTML files with both .html and .htm extensions, you need to
specify both extensions explicitly."
)]
#[serde(default = "FileExtensions::default")]
pub(crate) extensions: FileExtensions,
#[arg(help = HELP_MSG_CACHE)]
#[arg(long)]
#[serde(default)]
pub(crate) cache: bool,
/// Discard all cached requests older than this duration
#[arg(
long,
value_parser = humantime::parse_duration,
default_value = &MAX_CACHE_AGE_STR
)]
#[serde(default = "max_cache_age")]
#[serde(with = "humantime_serde")]
pub(crate) max_cache_age: Duration,
/// A list of status codes that will be excluded from the cache
#[arg(
long,
default_value_t,
long_help = "A list of status codes that will be ignored from the cache
The following exclude range syntax is supported: [start]..[[=]end]|code. Some valid
examples are:
- 429 (excludes the 429 status code only)
- 500.. (excludes any status code >= 500)
- ..100 (excludes any status code < 100)
- 500..=599 (excludes any status code from 500 to 599 inclusive)
- 500..600 (excludes any status code from 500 to 600 excluding 600, same as 500..=599)
Use \"lychee --cache-exclude-status '429, 500..502' <inputs>...\" to provide a comma- separated
list of excluded status codes. This example will not cache results with a status code of 429, 500
and 501."
)]
#[serde(default = "cache_exclude_selector")]
pub(crate) cache_exclude_status: StatusCodeExcluder,
/// Don't perform any link checking.
/// Instead, dump all the links extracted from inputs that would be checked
#[arg(long)]
#[serde(default)]
pub(crate) dump: bool,
/// Don't perform any link extraction and checking.
/// Instead, dump all input sources from which links would be collected
#[arg(long)]
#[serde(default)]
pub(crate) dump_inputs: bool,
/// Specify the use of a specific web archive.
/// Can be used in combination with `--suggest`
#[arg(long, value_parser = PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
#[serde(default)]
pub(crate) archive: Option<Archive>,
/// Suggest link replacements for broken links, using a web archive.
/// The web archive can be specified with `--archive`
#[arg(long)]
#[serde(default)]
pub(crate) suggest: bool,
/// Maximum number of allowed redirects
#[arg(short, long, default_value = &MAX_REDIRECTS_STR)]
#[serde(default = "max_redirects")]
pub(crate) max_redirects: usize,
/// Maximum number of retries per request
#[arg(long, default_value = &MAX_RETRIES_STR)]
#[serde(default = "max_retries")]
pub(crate) max_retries: u64,
/// Minimum accepted TLS Version
#[arg(long, value_parser = PossibleValuesParser::new(TlsVersion::VARIANTS).map(|s| s.parse::<TlsVersion>().unwrap()))]
#[serde(default)]
pub(crate) min_tls: Option<TlsVersion>,
/// Maximum number of concurrent network requests
#[arg(long, default_value = &MAX_CONCURRENCY_STR)]
#[serde(default = "max_concurrency")]
pub(crate) max_concurrency: usize,
/// Number of threads to utilize.
/// Defaults to number of cores available to the system
#[arg(short = 'T', long)]
#[serde(default)]
pub(crate) threads: Option<usize>,
/// User agent
#[arg(short, long, default_value = DEFAULT_USER_AGENT)]
#[serde(default = "user_agent")]
pub(crate) user_agent: String,
/// Proceed for server connections considered insecure (invalid TLS)
#[arg(short, long)]
#[serde(default)]
pub(crate) insecure: bool,
/// Only test links with the given schemes (e.g. https).
/// Omit to check links with any other scheme.
/// At the moment, we support http, https, file, and mailto.
#[arg(short, long)]
#[serde(default)]
pub(crate) scheme: Vec<String>,
/// Only check local files and block network requests.
#[arg(long)]
#[serde(default)]
pub(crate) offline: bool,
/// URLs to check (supports regex). Has preference over all excludes.
#[arg(long)]
#[serde(default)]
pub(crate) include: Vec<String>,
/// Exclude URLs and mail addresses from checking (supports regex)
#[arg(long)]
#[serde(default)]
pub(crate) exclude: Vec<String>,
/// Deprecated; use `--exclude-path` instead
#[arg(long)]
#[serde(default)]
pub(crate) exclude_file: Vec<String>,
/// Exclude file path from getting checked.
#[arg(long)]
#[serde(default)]
pub(crate) exclude_path: Vec<PathBuf>,
/// Exclude all private IPs from checking.
/// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
#[arg(short = 'E', long, verbatim_doc_comment)]
#[serde(default)]
pub(crate) exclude_all_private: bool,
/// Exclude private IP address ranges from checking
#[arg(long)]
#[serde(default)]
pub(crate) exclude_private: bool,
/// Exclude link-local IP address range from checking
#[arg(long)]
#[serde(default)]
pub(crate) exclude_link_local: bool,
/// Exclude loopback IP address range and localhost from checking
#[arg(long)]
#[serde(default)]
pub(crate) exclude_loopback: bool,
/// Also check email addresses
#[arg(long)]
#[serde(default)]
pub(crate) include_mail: bool,
/// Remap URI matching pattern to different URI
#[serde(default)]
#[arg(long)]
pub(crate) remap: Vec<String>,
/// Automatically append file extensions to `file://` URIs as needed
#[serde(default)]
#[arg(
long,
value_delimiter = ',',
long_help = "Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Extensions will be checked in
order of appearance.
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi"
)]
pub(crate) fallback_extensions: Vec<String>,
/// Set custom header for requests
#[arg(
short = 'H',
long = "header",
// Note: We use a `Vec<(String, String)>` for headers, which is
// unfortunate. The reason is that `clap::ArgAction::Append` collects
// multiple values, and `clap` cannot automatically convert these tuples
// into a `HashMap<String, String>`.
action = clap::ArgAction::Append,
value_parser = HeaderParser,
value_name = "HEADER:VALUE",
long_help = "Set custom header for requests
Some websites require custom headers to be passed in order to return valid responses.
You can specify custom headers in the format 'Name: Value'. For example, 'Accept: text/html'.
This is the same format that other tools like curl or wget use.
Multiple headers can be specified by using the flag multiple times."
)]
#[serde(default)]
#[serde(deserialize_with = "deserialize_headers")]
pub header: Vec<(String, String)>,
/// A List of accepted status codes for valid links
#[arg(
short,
long,
default_value_t,
long_help = "A List of accepted status codes for valid links
The following accept range syntax is supported: [start]..[[=]end]|code. Some valid
examples are:
- 200 (accepts the 200 status code only)
- ..204 (accepts any status code < 204)
- ..=204 (accepts any status code <= 204)
- 200..=204 (accepts any status code from 200 to 204 inclusive)
- 200..205 (accepts any status code from 200 to 205 excluding 205, same as 200..=204)
Use \"lychee --accept '200..=204, 429, 500' <inputs>...\" to provide a comma-
separated list of accepted status codes. This example will accept 200, 201,
202, 203, 204, 429, and 500 as valid status codes."
)]
#[serde(default = "accept_selector")]
pub(crate) accept: StatusCodeSelector,
/// Enable the checking of fragments in links.
#[arg(long)]
#[serde(default)]
pub(crate) include_fragments: bool,
/// Website timeout in seconds from connect to response finished
#[arg(short, long, default_value = &TIMEOUT_STR)]
#[serde(default = "timeout")]
pub(crate) timeout: usize,
/// Minimum wait time in seconds between retries of failed requests
#[arg(short, long, default_value = &RETRY_WAIT_TIME_STR)]
#[serde(default = "retry_wait_time")]
pub(crate) retry_wait_time: usize,
/// Request method
// Using `-X` as a short param similar to curl
#[arg(short = 'X', long, default_value = DEFAULT_METHOD)]
#[serde(default = "method")]
pub(crate) method: String,
/// Deprecated; use `--base-url` instead
#[arg(long, value_parser = parse_base)]
#[serde(skip)]
pub(crate) base: Option<Base>,
/// Base URL used to resolve relative URLs during link checking
/// Example: <https://example.com>
#[arg(short, long, value_parser= parse_base)]
#[serde(default)]
pub(crate) base_url: Option<Base>,
/// Root path to use when checking absolute local links,
/// must be an absolute path
#[arg(long)]
#[serde(default)]
pub(crate) root_dir: Option<PathBuf>,
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]
pub(crate) basic_auth: Option<Vec<BasicAuthSelector>>,
/// GitHub API token to use when checking github.com links, to avoid rate limiting
#[arg(long, env = "GITHUB_TOKEN", hide_env_values = true)]
#[serde(default)]
pub(crate) github_token: Option<SecretString>,
/// Skip missing input files (default is to error if they don't exist)
#[arg(long)]
#[serde(default)]
pub(crate) skip_missing: bool,
/// Do not skip files that would otherwise be ignored by
/// '.gitignore', '.ignore', or the global ignore file.
#[arg(long)]
#[serde(default)]
pub(crate) no_ignore: bool,
/// Do not skip hidden directories and files.
#[arg(long)]
#[serde(default)]
pub(crate) hidden: bool,
/// Find links in verbatim sections like `pre`- and `code` blocks
#[arg(long)]
#[serde(default)]
pub(crate) include_verbatim: bool,
/// Ignore case when expanding filesystem path glob inputs
#[arg(long)]
#[serde(default)]
pub(crate) glob_ignore_case: bool,
/// Output file of status report
#[arg(short, long, value_parser)]
#[serde(default)]
pub(crate) output: Option<PathBuf>,
/// Set the output display mode. Determines how results are presented in the terminal
#[arg(long, default_value = "color", value_parser = PossibleValuesParser::new(OutputMode::VARIANTS).map(|s| s.parse::<OutputMode>().unwrap()))]
#[serde(default)]
pub(crate) mode: OutputMode,
/// Output format of final status report
#[arg(short, long, default_value = "compact", value_parser = PossibleValuesParser::new(StatsFormat::VARIANTS).map(|s| s.parse::<StatsFormat>().unwrap()))]
#[serde(default)]
pub(crate) format: StatsFormat,
/// When HTTPS is available, treat HTTP links as errors
#[arg(long)]
#[serde(default)]
pub(crate) require_https: bool,
/// Tell lychee to read cookies from the given file.
/// Cookies will be stored in the cookie jar and sent with requests.
/// New cookies will be stored in the cookie jar and existing cookies will be updated.
#[arg(long)]
#[serde(default)]
pub(crate) cookie_jar: Option<PathBuf>,
}
impl Config {
/// Special handling for merging headers
///
/// Overwrites existing headers in `self` with the values from `other`.
fn merge_headers(&mut self, other: &[(String, String)]) {
let self_map = self.header.iter().cloned().collect::<HashMap<_, _>>();
let other_map = other.iter().cloned().collect::<HashMap<_, _>>();
// Merge the two maps, with `other` taking precedence
let merged_map: HashMap<_, _> = self_map.into_iter().chain(other_map).collect();
// Convert the merged map back to a Vec of tuples
self.header = merged_map.into_iter().collect();
}
/// Load configuration from a file
pub(crate) fn load_from_file(path: &Path) -> Result<Config> {
// Read configuration file
let contents = fs::read_to_string(path)?;
toml::from_str(&contents).with_context(|| "Failed to parse configuration file")
}
/// Merge the configuration from TOML into the CLI configuration
pub(crate) fn merge(&mut self, toml: Config) {
// Special handling for headers before fold_in!
self.merge_headers(&toml.header);
fold_in! {
// Destination and source configs
self, toml;
// Keys with defaults to assign
accept: StatusCodeSelector::default();
base_url: None;
basic_auth: None;
cache_exclude_status: StatusCodeExcluder::default();
cache: false;
cookie_jar: None;
exclude_all_private: false;
exclude_file: Vec::<String>::new(); // deprecated
exclude_link_local: false;
exclude_loopback: false;
exclude_path: Vec::<PathBuf>::new();
exclude_private: false;
exclude: Vec::<String>::new();
extensions: FileType::default_extensions();
fallback_extensions: Vec::<String>::new();
format: StatsFormat::default();
glob_ignore_case: false;
header: Vec::<(String, String)>::new();
include_fragments: false;
include_mail: false;
include_verbatim: false;
include: Vec::<String>::new();
insecure: false;
max_cache_age: humantime::parse_duration(DEFAULT_MAX_CACHE_AGE).unwrap();
max_concurrency: DEFAULT_MAX_CONCURRENCY;
max_redirects: DEFAULT_MAX_REDIRECTS;
max_retries: DEFAULT_MAX_RETRIES;
method: DEFAULT_METHOD;
no_progress: false;
output: None;
remap: Vec::<String>::new();
require_https: false;
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
scheme: Vec::<String>::new();
skip_missing: false;
threads: None;
timeout: DEFAULT_TIMEOUT_SECS;
user_agent: DEFAULT_USER_AGENT;
verbose: Verbosity::default();
}
// If the config file has a value for the GitHub token, but the CLI
// doesn't, use the token from the config file.
if self
.github_token
.as_ref()
.map(ExposeSecret::expose_secret)
.is_none()
&& toml
.github_token
.as_ref()
.map(ExposeSecret::expose_secret)
.is_some()
{
self.github_token = toml.github_token;
}
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use super::*;
#[test]
fn test_accept_status_codes() {
let toml = Config {
accept: StatusCodeSelector::from_str("200..=204, 429, 500").unwrap(),
..Default::default()
};
let mut cli = Config::default();
cli.merge(toml);
assert!(cli.accept.contains(429));
assert!(cli.accept.contains(200));
assert!(cli.accept.contains(203));
assert!(cli.accept.contains(204));
assert!(!cli.accept.contains(205));
}
#[test]
fn test_default() {
let cli = Config::default();
assert_eq!(
cli.accept,
StatusCodeSelector::from_str("100..=103,200..=299").expect("no error")
);
assert_eq!(cli.cache_exclude_status, StatusCodeExcluder::new());
}
#[test]
fn test_parse_custom_headers() {
assert_eq!(
parse_single_header("accept:text/html").unwrap(),
(
HeaderName::from_static("accept"),
HeaderValue::from_static("text/html")
)
);
}
#[test]
fn test_parse_custom_header_multiple_colons() {
assert_eq!(
parse_single_header("key:x-test:check=this").unwrap(),
(
HeaderName::from_static("key"),
HeaderValue::from_static("x-test:check=this")
)
);
}
#[test]
fn test_parse_custom_headers_with_equals() {
assert_eq!(
parse_single_header("key:x-test=check=this").unwrap(),
(
HeaderName::from_static("key"),
HeaderValue::from_static("x-test=check=this")
)
);
}
#[test]
fn test_header_parsing_and_merging() {
// Simulate commandline arguments with multiple headers
let args = vec![
"lychee",
"--header",
"Accept: text/html",
"--header",
"X-Test: check=this",
"input.md",
];
// Parse the arguments
let opts = crate::LycheeOptions::parse_from(args);
// Check that the headers were collected correctly
let headers = &opts.config.header;
assert_eq!(headers.len(), 2);
// Convert to HashMap for easier testing
let header_map: HashMap<String, String> = headers.iter().cloned().collect();
assert_eq!(header_map["accept"], "text/html");
assert_eq!(header_map["x-test"], "check=this");
}
#[test]
fn test_merge_headers_with_config() {
let toml = Config {
header: vec![
("Accept".to_string(), "text/html".to_string()),
("X-Test".to_string(), "check=this".to_string()),
],
..Default::default()
};
// Set X-Test and see if it gets overwritten
let mut cli = Config {
header: vec![("X-Test".to_string(), "check=that".to_string())],
..Default::default()
};
cli.merge(toml);
assert_eq!(cli.header.len(), 2);
// Sort vector before assert
cli.header.sort();
assert_eq!(
cli.header,
vec![
("Accept".to_string(), "text/html".to_string()),
("X-Test".to_string(), "check=this".to_string()),
]
);
}
}