From 3a12b3e2208a6e5eda56550459f0b28e974154df Mon Sep 17 00:00:00 2001 From: Alexander Krantz Date: Tue, 20 Oct 2020 17:10:25 -0700 Subject: [PATCH] Configuration file (lychee.toml) (#16) --- Cargo.lock | 128 ++++++++++++++++++++++++----- Cargo.toml | 4 +- lychee.example.toml | 66 +++++++++++++++ src/checker.rs | 18 +++-- src/main.rs | 45 ++++++----- src/options.rs | 192 ++++++++++++++++++++++++++++++++++++-------- 6 files changed, 368 insertions(+), 85 deletions(-) create mode 100644 lychee.example.toml diff --git a/Cargo.lock b/Cargo.lock index 72cf249..63ad262 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -89,6 +89,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "ansi_term" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +dependencies = [ + "winapi 0.3.9", +] + [[package]] name = "ansi_term" version = "0.12.1" @@ -570,6 +579,21 @@ dependencies = [ "time 0.1.43", ] +[[package]] +name = "clap" +version = "2.33.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002" +dependencies = [ + "ansi_term 0.11.0", + "atty", + "bitflags", + "strsim", + "textwrap", + "unicode-width", + "vec_map", +] + [[package]] name = "cloudabi" version = "0.1.0" @@ -1096,26 +1120,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "gumdrop" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46571f5d540478cf70d2a42dd0d6d8e9f4b9cc7531544b93311e657b86568a0b" -dependencies = [ - "gumdrop_derive", -] - -[[package]] -name = "gumdrop_derive" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "915ef07c710d84733522461de2a734d4d62a3fd39a4d4f404c2f385ef8618d05" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "h2" version = "0.2.6" @@ -1562,7 +1566,6 @@ dependencies = [ "check-if-email-exists", "futures", "glob", - "gumdrop", "http", "hubcaps", "indicatif", @@ -1574,7 +1577,10 @@ dependencies = [ "quick-xml", "regex", "reqwest", + "serde", + "structopt", "tokio", + "toml", "url", "wiremock", ] @@ -2056,6 +2062,30 @@ dependencies = [ "log", ] +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + [[package]] name = "proc-macro-hack" version = "0.5.18" @@ -2563,6 +2593,36 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0" +[[package]] +name = "strsim" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" + +[[package]] +name = "structopt" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "126d630294ec449fae0b16f964e35bf3c74f940da9dca17ee9b905f7b3112eb8" +dependencies = [ + "clap", + "lazy_static", + "structopt-derive", +] + +[[package]] +name = "structopt-derive" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65e51c492f9e23a220534971ff5afc14037289de430e3c83f9daf6a1b6ae91e8" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "subtle" version = "2.3.0" @@ -2622,6 +2682,15 @@ dependencies = [ "libc", ] +[[package]] +name = "textwrap" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +dependencies = [ + "unicode-width", +] + [[package]] name = "thiserror" version = "1.0.20" @@ -2777,6 +2846,15 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75cf45bb0bef80604d001caaec0d09da99611b3c0fd39d3080468875cdb65645" +dependencies = [ + "serde", +] + [[package]] name = "tower-service" version = "0.3.0" @@ -2842,7 +2920,7 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "abd165311cc4d7a555ad11cc77a37756df836182db0d81aac908c8184c584f40" dependencies = [ - "ansi_term", + "ansi_term 0.12.1", "chrono", "lazy_static", "matchers", @@ -3008,6 +3086,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8cb18268690309760d59ee1a9b21132c126ba384f374c59a94db4bc03adeb561" +[[package]] +name = "vec_map" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" + [[package]] name = "version_check" version = "0.9.2" diff --git a/Cargo.toml b/Cargo.toml index bfb5a00..4167e76 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,7 +13,6 @@ version = "0.3.0" anyhow = "1.0.32" futures = "0.3" glob = "0.3" -gumdrop = "0.8.0" http = "0.2" hubcaps = "0.6" linkify = "0.4.0" @@ -23,6 +22,9 @@ regex = "1.3.9" url = "2.1.1" check-if-email-exists = "0.8.13" indicatif = "0.15.0" +structopt = "0.3" +toml = "0.5.7" +serde = { version = "1.0", features = ["derive"] } pulldown-cmark = "0.8.0" quick-xml = "0.20.0" diff --git a/lychee.example.toml b/lychee.example.toml new file mode 100644 index 0000000..7d13300 --- /dev/null +++ b/lychee.example.toml @@ -0,0 +1,66 @@ +### +### Display +### +# Verbose program output +verbose = false + +# Show progress +progress = false + + +### +### Runtime +### +# Number of threads to utilize. +# Defaults to number of cores available to the system if omitted. +#threads = 2 + +# Maximum number of allowed redirects +max_redirects = 10 + + +### +### Requests +### +# User agent to send with each request +user_agent = "curl/7.71.1" + +# Website timeout from connect to response finished +timeout = "20" + +# Comma-separated list of accepted status codes for valid links. +# Omit to accept all response types. +#accept = "text/html" + +# Proceed for server connections considered insecure (invalid TLS) +insecure = false + +# Only test links with the given scheme (e.g. https) +# Omit to check links with any scheme +#scheme = "https" + +# Request method +method = "get" + +# Custom request headers +headers = [] + + +### +### Exclusions +### +# Exclude URLs from checking (supports regex) +exclude = [] + +# Exclude all private IPs from checking +# Equivalent to setting `exclude_private`, `exclude_link_local`, and `exclude_loopback` to true +exclude_all_private = false + +# Exclude private IP address ranges from checking +exclude_private = false + +# Exclude link-local IP address range from checking +exclude_link_local = false + +# Exclude loopback IP address range from checking +exclude_loopback = false diff --git a/src/checker.rs b/src/checker.rs index 8a1742d..8b0701c 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -1,5 +1,7 @@ -use crate::extract::{self, Uri}; -use crate::options::LycheeOptions; +use crate::{ + extract::{self, Uri}, + options::Config, +}; use anyhow::anyhow; use anyhow::{Context, Result}; use check_if_email_exists::{check_email, CheckEmailInput}; @@ -78,16 +80,16 @@ pub(crate) struct Excludes { } impl Excludes { - pub fn from_options(options: &LycheeOptions) -> Self { + pub fn from_options(config: &Config) -> Self { // exclude_all_private option turns on all "private" excludes, // including private IPs, link-local IPs and loopback IPs - let enable_exclude = |opt| opt || options.exclude_all_private; + let enable_exclude = |opt| opt || config.exclude_all_private; Self { - regex: RegexSet::new(&options.exclude).ok(), - private_ips: enable_exclude(options.exclude_private), - link_local_ips: enable_exclude(options.exclude_link_local), - loopback_ips: enable_exclude(options.exclude_loopback), + regex: RegexSet::new(&config.exclude).ok(), + private_ips: enable_exclude(config.exclude_private), + link_local_ips: enable_exclude(config.exclude_link_local), + loopback_ips: enable_exclude(config.exclude_loopback), } } } diff --git a/src/main.rs b/src/main.rs index d857c10..9efb64e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,10 +4,10 @@ extern crate log; use anyhow::anyhow; use anyhow::Result; use futures::future::join_all; -use gumdrop::Options; use indicatif::{ProgressBar, ProgressStyle}; use reqwest::header::{HeaderMap, HeaderName}; use std::{collections::HashSet, convert::TryInto, env, time::Duration}; +use structopt::StructOpt; mod checker; mod collector; @@ -16,7 +16,7 @@ mod options; use checker::{Checker, Excludes, Status}; use extract::Uri; -use options::LycheeOptions; +use options::{Config, LycheeOptions}; fn print_summary(found: &HashSet, results: &[Status]) { let found = found.len(); @@ -41,9 +41,16 @@ fn print_summary(found: &HashSet, results: &[Status]) { fn main() -> Result<()> { pretty_env_logger::init(); - let opts = LycheeOptions::parse_args_default_or_exit(); + let opts = LycheeOptions::from_args(); - let mut runtime = match opts.threads { + // Load a potentially existing config file and merge it into the config from the CLI + let cfg = if let Some(c) = Config::load_from_file(&opts.config_file)? { + opts.config.merge(c) + } else { + opts.config + }; + + let mut runtime = match cfg.threads { Some(threads) => { // We define our own runtime instead of the `tokio::main` attribute since we want to make the number of threads configurable tokio::runtime::Builder::new() @@ -54,20 +61,20 @@ fn main() -> Result<()> { } None => tokio::runtime::Runtime::new()?, }; - let errorcode = runtime.block_on(run(opts))?; + let errorcode = runtime.block_on(run(cfg, opts.inputs))?; std::process::exit(errorcode); } -async fn run(opts: LycheeOptions) -> Result { - let excludes = Excludes::from_options(&opts); - let headers = parse_headers(opts.headers)?; - let accepted = match opts.accept { +async fn run(cfg: Config, inputs: Vec) -> Result { + let excludes = Excludes::from_options(&cfg); + let headers = parse_headers(cfg.headers)?; + let accepted = match cfg.accept { Some(accept) => parse_statuscodes(accept)?, None => None, }; - let timeout = parse_timeout(opts.timeout)?; - let links = collector::collect_links(opts.inputs, opts.base_url).await?; - let progress_bar = if opts.progress { + let timeout = parse_timeout(cfg.timeout)?; + let links = collector::collect_links(inputs, cfg.base_url).await?; + let progress_bar = if cfg.progress { Some( ProgressBar::new(links.len() as u64) .with_style( @@ -82,15 +89,15 @@ async fn run(opts: LycheeOptions) -> Result { let checker = Checker::try_new( env::var("GITHUB_TOKEN")?, excludes, - opts.max_redirects, - opts.user_agent, - opts.insecure, - opts.scheme, + cfg.max_redirects, + cfg.user_agent, + cfg.insecure, + cfg.scheme, headers, - opts.method.try_into()?, + cfg.method.try_into()?, accepted, Some(timeout), - opts.verbose, + cfg.verbose, progress_bar.as_ref(), )?; @@ -102,7 +109,7 @@ async fn run(opts: LycheeOptions) -> Result { progress_bar.finish_and_clear(); } - if opts.verbose { + if cfg.verbose { print_summary(&links, &results); } diff --git a/src/options.rs b/src/options.rs index 7dc9e33..3458d36 100644 --- a/src/options.rs +++ b/src/options.rs @@ -1,74 +1,196 @@ -use gumdrop::Options; +use anyhow::{Error, Result}; +use serde::Deserialize; +use std::{fs, io::ErrorKind}; +use structopt::StructOpt; -#[derive(Debug, Options)] +const USER_AGENT: &str = "curl/7.71.1"; +const METHOD: &str = "get"; +const TIMEOUT: &str = "20"; + +// Macro for generating default functions to be used by serde +macro_rules! default_function { + ( $( $name:ident : $T:ty = $e:expr; )* ) => { + $( + fn $name() -> $T { + $e + } + )* + }; +} + +// Macro for merging configuration values +macro_rules! fold_in { + ( $cli:ident , $toml:ident ; $( $key:ident : $default:expr; )* ) => { + $( + if $cli.$key == $default && $toml.$key != $default { + $cli.$key = $toml.$key; + } + )* + }; +} + +#[derive(Debug, StructOpt)] +#[structopt( + name = "lychee", + about = "A boring link checker for my projects (and maybe yours)" +)] pub(crate) struct LycheeOptions { - #[options(free, help = "Input files")] + /// Input files pub inputs: Vec, - #[options(help = "show help")] - pub help: bool, + /// Configuration file to use + #[structopt(short, long = "config", default_value = "./lychee.toml")] + pub config_file: String, - #[options(help = "Verbose program output")] + #[structopt(flatten)] + pub config: Config, +} + +#[derive(Debug, Deserialize, StructOpt)] +pub(crate) struct Config { + /// Verbose program output + #[structopt(short, long)] + #[serde(default)] pub verbose: bool, - #[options(help = "Show progress")] + /// Show progress + #[structopt(short, long)] + #[serde(default)] pub progress: bool, - #[options(help = "Maximum number of allowed redirects", default = "10")] + /// Maximum number of allowed redirects + #[structopt(short, long, default_value = "10")] + #[serde(default)] pub max_redirects: usize, - #[options( - help = "Number of threads to utilize (defaults to number of cores available to the system" - )] + /// Number of threads to utilize. + /// Defaults to number of cores available to the system + #[structopt(short = "T", long)] + #[serde(default)] pub threads: Option, - #[options(help = "User agent", default = "curl/7.71.1")] + /// User agent + #[structopt(short, long, default_value = USER_AGENT)] + #[serde(default = "user_agent")] pub user_agent: String, - #[options( - help = "Proceed for server connections considered insecure (invalid TLS)", - default = "false" - )] + /// Proceed for server connections considered insecure (invalid TLS) + #[structopt(short, long)] + #[serde(default)] pub insecure: bool, - #[options(help = "Only test links with given scheme (e.g. https)")] + /// Only test links with the given scheme (e.g. https) + #[structopt(short, long)] + #[serde(default)] pub scheme: Option, - // Accumulate all exclusions in a vector - #[options(help = "Exclude URLs from checking (supports regex)")] + /// Exclude URLs from checking (supports regex) + #[structopt(short, long)] + #[serde(default)] pub exclude: Vec, - #[options( - help = "Exclude all private IPs from checking, equivalent to `--exclude-private --exclude-link-local --exclude--loopback`", - short = "E" - )] + /// Exclude all private IPs from checking. + /// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback` + #[structopt(short = "E", long)] + #[serde(default)] pub exclude_all_private: bool, - #[options(help = "Exclude private IP address ranges from checking", no_short)] + /// Exclude private IP address ranges from checking + #[structopt(long)] + #[serde(default)] pub exclude_private: bool, - #[options(help = "Exclude link-local IP address range from checking", no_short)] + /// Exclude link-local IP address range from checking + #[structopt(long)] + #[serde(default)] pub exclude_link_local: bool, - #[options(help = "Exclude loopback IP address range from checking", no_short)] + /// Exclude loopback IP address range from checking + #[structopt(long)] + #[serde(default)] pub exclude_loopback: bool, - // Accumulate all headers in a vector - #[options(help = "Custom request headers")] + /// Custom request headers + #[structopt(short, long)] + #[serde(default)] pub headers: Vec, - #[options(help = "Comma-separated list of accepted status codes for valid links")] + /// Comma-separated list of accepted status codes for valid links + #[structopt(short, long)] + #[serde(default)] pub accept: Option, - #[options( - help = "Website timeout from connect to response finished", - default = "20" - )] + /// Website timeout from connect to response finished + #[structopt(short, long, default_value = TIMEOUT)] + #[serde(default = "timeout")] pub timeout: String, - #[options(help = "Request method", default = "get")] + /// Request method + #[structopt(short = "M", long, default_value = METHOD)] + #[serde(default = "method")] pub method: String, - #[options(help = "Base URL to check relative URls")] + #[structopt(short, long, help = "Base URL to check relative URls")] + #[serde(default)] pub base_url: Option, } + +impl Config { + /// Load configuration from a file + pub(crate) fn load_from_file(path: &str) -> Result> { + // Read configuration file + let result = fs::read(path); + + // Ignore a file not found error + let contents = match result { + Ok(c) => c, + Err(e) => { + return match e.kind() { + ErrorKind::NotFound => { + println!("[WARN] could not find configuration file, using arguments"); + Ok(None) + } + _ => Err(Error::from(e)), + } + } + }; + + Ok(Some(toml::from_slice(&contents)?)) + } + + /// Merge the configuration from TOML into the CLI configuration + pub(crate) fn merge(mut self, toml: Config) -> Config { + fold_in! { + // Destination and source configs + self, toml; + + // Keys with defaults to assign + verbose: false; + progress: false; + max_redirects: 10; + threads: None; + user_agent: USER_AGENT; + insecure: false; + scheme: None; + exclude: Vec::::new(); + exclude_all_private: false; + exclude_private: false; + exclude_link_local: false; + exclude_loopback: false; + headers: Vec::::new(); + accept: None; + timeout: TIMEOUT; + method: METHOD; + base_url: None; + } + + self + } +} + +// Generate the functions for serde defaults +default_function! { + user_agent: String = USER_AGENT.to_string(); + timeout: String = TIMEOUT.to_string(); + method: String = METHOD.to_string(); +}