Configuration file (lychee.toml) (#16)

This commit is contained in:
Alexander Krantz 2020-10-20 17:10:25 -07:00 committed by GitHub
parent 6bd7bbf51f
commit 3a12b3e220
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 368 additions and 85 deletions

128
Cargo.lock generated
View file

@ -89,6 +89,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
@ -570,6 +579,21 @@ dependencies = [
"time 0.1.43",
]
[[package]]
name = "clap"
version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
dependencies = [
"ansi_term 0.11.0",
"atty",
"bitflags",
"strsim",
"textwrap",
"unicode-width",
"vec_map",
]
[[package]]
name = "cloudabi"
version = "0.1.0"
@ -1096,26 +1120,6 @@ dependencies = [
"web-sys",
]
[[package]]
name = "gumdrop"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46571f5d540478cf70d2a42dd0d6d8e9f4b9cc7531544b93311e657b86568a0b"
dependencies = [
"gumdrop_derive",
]
[[package]]
name = "gumdrop_derive"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "915ef07c710d84733522461de2a734d4d62a3fd39a4d4f404c2f385ef8618d05"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "h2"
version = "0.2.6"
@ -1562,7 +1566,6 @@ dependencies = [
"check-if-email-exists",
"futures",
"glob",
"gumdrop",
"http",
"hubcaps",
"indicatif",
@ -1574,7 +1577,10 @@ dependencies = [
"quick-xml",
"regex",
"reqwest",
"serde",
"structopt",
"tokio",
"toml",
"url",
"wiremock",
]
@ -2056,6 +2062,30 @@ dependencies = [
"log",
]
[[package]]
name = "proc-macro-error"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
dependencies = [
"proc-macro-error-attr",
"proc-macro2",
"quote",
"syn",
"version_check",
]
[[package]]
name = "proc-macro-error-attr"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
dependencies = [
"proc-macro2",
"quote",
"version_check",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.18"
@ -2563,6 +2593,36 @@ version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "213701ba3370744dcd1a12960caa4843b3d68b4d1c0a5d575e0d65b2ee9d16c0"
[[package]]
name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
[[package]]
name = "structopt"
version = "0.3.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "126d630294ec449fae0b16f964e35bf3c74f940da9dca17ee9b905f7b3112eb8"
dependencies = [
"clap",
"lazy_static",
"structopt-derive",
]
[[package]]
name = "structopt-derive"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "65e51c492f9e23a220534971ff5afc14037289de430e3c83f9daf6a1b6ae91e8"
dependencies = [
"heck",
"proc-macro-error",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "subtle"
version = "2.3.0"
@ -2622,6 +2682,15 @@ dependencies = [
"libc",
]
[[package]]
name = "textwrap"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
dependencies = [
"unicode-width",
]
[[package]]
name = "thiserror"
version = "1.0.20"
@ -2777,6 +2846,15 @@ dependencies = [
"tokio",
]
[[package]]
name = "toml"
version = "0.5.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "75cf45bb0bef80604d001caaec0d09da99611b3c0fd39d3080468875cdb65645"
dependencies = [
"serde",
]
[[package]]
name = "tower-service"
version = "0.3.0"
@ -2842,7 +2920,7 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "abd165311cc4d7a555ad11cc77a37756df836182db0d81aac908c8184c584f40"
dependencies = [
"ansi_term",
"ansi_term 0.12.1",
"chrono",
"lazy_static",
"matchers",
@ -3008,6 +3086,12 @@ version = "0.5.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8cb18268690309760d59ee1a9b21132c126ba384f374c59a94db4bc03adeb561"
[[package]]
name = "vec_map"
version = "0.8.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
[[package]]
name = "version_check"
version = "0.9.2"

View file

@ -13,7 +13,6 @@ version = "0.3.0"
anyhow = "1.0.32"
futures = "0.3"
glob = "0.3"
gumdrop = "0.8.0"
http = "0.2"
hubcaps = "0.6"
linkify = "0.4.0"
@ -23,6 +22,9 @@ regex = "1.3.9"
url = "2.1.1"
check-if-email-exists = "0.8.13"
indicatif = "0.15.0"
structopt = "0.3"
toml = "0.5.7"
serde = { version = "1.0", features = ["derive"] }
pulldown-cmark = "0.8.0"
quick-xml = "0.20.0"

66
lychee.example.toml Normal file
View file

@ -0,0 +1,66 @@
###
### Display
###
# Verbose program output
verbose = false
# Show progress
progress = false
###
### Runtime
###
# Number of threads to utilize.
# Defaults to number of cores available to the system if omitted.
#threads = 2
# Maximum number of allowed redirects
max_redirects = 10
###
### Requests
###
# User agent to send with each request
user_agent = "curl/7.71.1"
# Website timeout from connect to response finished
timeout = "20"
# Comma-separated list of accepted status codes for valid links.
# Omit to accept all response types.
#accept = "text/html"
# Proceed for server connections considered insecure (invalid TLS)
insecure = false
# Only test links with the given scheme (e.g. https)
# Omit to check links with any scheme
#scheme = "https"
# Request method
method = "get"
# Custom request headers
headers = []
###
### Exclusions
###
# Exclude URLs from checking (supports regex)
exclude = []
# Exclude all private IPs from checking
# Equivalent to setting `exclude_private`, `exclude_link_local`, and `exclude_loopback` to true
exclude_all_private = false
# Exclude private IP address ranges from checking
exclude_private = false
# Exclude link-local IP address range from checking
exclude_link_local = false
# Exclude loopback IP address range from checking
exclude_loopback = false

View file

@ -1,5 +1,7 @@
use crate::extract::{self, Uri};
use crate::options::LycheeOptions;
use crate::{
extract::{self, Uri},
options::Config,
};
use anyhow::anyhow;
use anyhow::{Context, Result};
use check_if_email_exists::{check_email, CheckEmailInput};
@ -78,16 +80,16 @@ pub(crate) struct Excludes {
}
impl Excludes {
pub fn from_options(options: &LycheeOptions) -> Self {
pub fn from_options(config: &Config) -> Self {
// exclude_all_private option turns on all "private" excludes,
// including private IPs, link-local IPs and loopback IPs
let enable_exclude = |opt| opt || options.exclude_all_private;
let enable_exclude = |opt| opt || config.exclude_all_private;
Self {
regex: RegexSet::new(&options.exclude).ok(),
private_ips: enable_exclude(options.exclude_private),
link_local_ips: enable_exclude(options.exclude_link_local),
loopback_ips: enable_exclude(options.exclude_loopback),
regex: RegexSet::new(&config.exclude).ok(),
private_ips: enable_exclude(config.exclude_private),
link_local_ips: enable_exclude(config.exclude_link_local),
loopback_ips: enable_exclude(config.exclude_loopback),
}
}
}

View file

@ -4,10 +4,10 @@ extern crate log;
use anyhow::anyhow;
use anyhow::Result;
use futures::future::join_all;
use gumdrop::Options;
use indicatif::{ProgressBar, ProgressStyle};
use reqwest::header::{HeaderMap, HeaderName};
use std::{collections::HashSet, convert::TryInto, env, time::Duration};
use structopt::StructOpt;
mod checker;
mod collector;
@ -16,7 +16,7 @@ mod options;
use checker::{Checker, Excludes, Status};
use extract::Uri;
use options::LycheeOptions;
use options::{Config, LycheeOptions};
fn print_summary(found: &HashSet<Uri>, results: &[Status]) {
let found = found.len();
@ -41,9 +41,16 @@ fn print_summary(found: &HashSet<Uri>, results: &[Status]) {
fn main() -> Result<()> {
pretty_env_logger::init();
let opts = LycheeOptions::parse_args_default_or_exit();
let opts = LycheeOptions::from_args();
let mut runtime = match opts.threads {
// Load a potentially existing config file and merge it into the config from the CLI
let cfg = if let Some(c) = Config::load_from_file(&opts.config_file)? {
opts.config.merge(c)
} else {
opts.config
};
let mut runtime = match cfg.threads {
Some(threads) => {
// We define our own runtime instead of the `tokio::main` attribute since we want to make the number of threads configurable
tokio::runtime::Builder::new()
@ -54,20 +61,20 @@ fn main() -> Result<()> {
}
None => tokio::runtime::Runtime::new()?,
};
let errorcode = runtime.block_on(run(opts))?;
let errorcode = runtime.block_on(run(cfg, opts.inputs))?;
std::process::exit(errorcode);
}
async fn run(opts: LycheeOptions) -> Result<i32> {
let excludes = Excludes::from_options(&opts);
let headers = parse_headers(opts.headers)?;
let accepted = match opts.accept {
async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
let excludes = Excludes::from_options(&cfg);
let headers = parse_headers(cfg.headers)?;
let accepted = match cfg.accept {
Some(accept) => parse_statuscodes(accept)?,
None => None,
};
let timeout = parse_timeout(opts.timeout)?;
let links = collector::collect_links(opts.inputs, opts.base_url).await?;
let progress_bar = if opts.progress {
let timeout = parse_timeout(cfg.timeout)?;
let links = collector::collect_links(inputs, cfg.base_url).await?;
let progress_bar = if cfg.progress {
Some(
ProgressBar::new(links.len() as u64)
.with_style(
@ -82,15 +89,15 @@ async fn run(opts: LycheeOptions) -> Result<i32> {
let checker = Checker::try_new(
env::var("GITHUB_TOKEN")?,
excludes,
opts.max_redirects,
opts.user_agent,
opts.insecure,
opts.scheme,
cfg.max_redirects,
cfg.user_agent,
cfg.insecure,
cfg.scheme,
headers,
opts.method.try_into()?,
cfg.method.try_into()?,
accepted,
Some(timeout),
opts.verbose,
cfg.verbose,
progress_bar.as_ref(),
)?;
@ -102,7 +109,7 @@ async fn run(opts: LycheeOptions) -> Result<i32> {
progress_bar.finish_and_clear();
}
if opts.verbose {
if cfg.verbose {
print_summary(&links, &results);
}

View file

@ -1,74 +1,196 @@
use gumdrop::Options;
use anyhow::{Error, Result};
use serde::Deserialize;
use std::{fs, io::ErrorKind};
use structopt::StructOpt;
#[derive(Debug, Options)]
const USER_AGENT: &str = "curl/7.71.1";
const METHOD: &str = "get";
const TIMEOUT: &str = "20";
// Macro for generating default functions to be used by serde
macro_rules! default_function {
( $( $name:ident : $T:ty = $e:expr; )* ) => {
$(
fn $name() -> $T {
$e
}
)*
};
}
// Macro for merging configuration values
macro_rules! fold_in {
( $cli:ident , $toml:ident ; $( $key:ident : $default:expr; )* ) => {
$(
if $cli.$key == $default && $toml.$key != $default {
$cli.$key = $toml.$key;
}
)*
};
}
#[derive(Debug, StructOpt)]
#[structopt(
name = "lychee",
about = "A boring link checker for my projects (and maybe yours)"
)]
pub(crate) struct LycheeOptions {
#[options(free, help = "Input files")]
/// Input files
pub inputs: Vec<String>,
#[options(help = "show help")]
pub help: bool,
/// Configuration file to use
#[structopt(short, long = "config", default_value = "./lychee.toml")]
pub config_file: String,
#[options(help = "Verbose program output")]
#[structopt(flatten)]
pub config: Config,
}
#[derive(Debug, Deserialize, StructOpt)]
pub(crate) struct Config {
/// Verbose program output
#[structopt(short, long)]
#[serde(default)]
pub verbose: bool,
#[options(help = "Show progress")]
/// Show progress
#[structopt(short, long)]
#[serde(default)]
pub progress: bool,
#[options(help = "Maximum number of allowed redirects", default = "10")]
/// Maximum number of allowed redirects
#[structopt(short, long, default_value = "10")]
#[serde(default)]
pub max_redirects: usize,
#[options(
help = "Number of threads to utilize (defaults to number of cores available to the system"
)]
/// Number of threads to utilize.
/// Defaults to number of cores available to the system
#[structopt(short = "T", long)]
#[serde(default)]
pub threads: Option<usize>,
#[options(help = "User agent", default = "curl/7.71.1")]
/// User agent
#[structopt(short, long, default_value = USER_AGENT)]
#[serde(default = "user_agent")]
pub user_agent: String,
#[options(
help = "Proceed for server connections considered insecure (invalid TLS)",
default = "false"
)]
/// Proceed for server connections considered insecure (invalid TLS)
#[structopt(short, long)]
#[serde(default)]
pub insecure: bool,
#[options(help = "Only test links with given scheme (e.g. https)")]
/// Only test links with the given scheme (e.g. https)
#[structopt(short, long)]
#[serde(default)]
pub scheme: Option<String>,
// Accumulate all exclusions in a vector
#[options(help = "Exclude URLs from checking (supports regex)")]
/// Exclude URLs from checking (supports regex)
#[structopt(short, long)]
#[serde(default)]
pub exclude: Vec<String>,
#[options(
help = "Exclude all private IPs from checking, equivalent to `--exclude-private --exclude-link-local --exclude--loopback`",
short = "E"
)]
/// Exclude all private IPs from checking.
/// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
#[structopt(short = "E", long)]
#[serde(default)]
pub exclude_all_private: bool,
#[options(help = "Exclude private IP address ranges from checking", no_short)]
/// Exclude private IP address ranges from checking
#[structopt(long)]
#[serde(default)]
pub exclude_private: bool,
#[options(help = "Exclude link-local IP address range from checking", no_short)]
/// Exclude link-local IP address range from checking
#[structopt(long)]
#[serde(default)]
pub exclude_link_local: bool,
#[options(help = "Exclude loopback IP address range from checking", no_short)]
/// Exclude loopback IP address range from checking
#[structopt(long)]
#[serde(default)]
pub exclude_loopback: bool,
// Accumulate all headers in a vector
#[options(help = "Custom request headers")]
/// Custom request headers
#[structopt(short, long)]
#[serde(default)]
pub headers: Vec<String>,
#[options(help = "Comma-separated list of accepted status codes for valid links")]
/// Comma-separated list of accepted status codes for valid links
#[structopt(short, long)]
#[serde(default)]
pub accept: Option<String>,
#[options(
help = "Website timeout from connect to response finished",
default = "20"
)]
/// Website timeout from connect to response finished
#[structopt(short, long, default_value = TIMEOUT)]
#[serde(default = "timeout")]
pub timeout: String,
#[options(help = "Request method", default = "get")]
/// Request method
#[structopt(short = "M", long, default_value = METHOD)]
#[serde(default = "method")]
pub method: String,
#[options(help = "Base URL to check relative URls")]
#[structopt(short, long, help = "Base URL to check relative URls")]
#[serde(default)]
pub base_url: Option<String>,
}
impl Config {
/// Load configuration from a file
pub(crate) fn load_from_file(path: &str) -> Result<Option<Config>> {
// Read configuration file
let result = fs::read(path);
// Ignore a file not found error
let contents = match result {
Ok(c) => c,
Err(e) => {
return match e.kind() {
ErrorKind::NotFound => {
println!("[WARN] could not find configuration file, using arguments");
Ok(None)
}
_ => Err(Error::from(e)),
}
}
};
Ok(Some(toml::from_slice(&contents)?))
}
/// Merge the configuration from TOML into the CLI configuration
pub(crate) fn merge(mut self, toml: Config) -> Config {
fold_in! {
// Destination and source configs
self, toml;
// Keys with defaults to assign
verbose: false;
progress: false;
max_redirects: 10;
threads: None;
user_agent: USER_AGENT;
insecure: false;
scheme: None;
exclude: Vec::<String>::new();
exclude_all_private: false;
exclude_private: false;
exclude_link_local: false;
exclude_loopback: false;
headers: Vec::<String>::new();
accept: None;
timeout: TIMEOUT;
method: METHOD;
base_url: None;
}
self
}
}
// Generate the functions for serde defaults
default_function! {
user_agent: String = USER_AGENT.to_string();
timeout: String = TIMEOUT.to_string();
method: String = METHOD.to_string();
}