From dcee4a1058df8fd2e7b310e51408fca57c5e3362 Mon Sep 17 00:00:00 2001 From: dblock Date: Wed, 1 Sep 2021 11:37:31 -0400 Subject: [PATCH] Added support for --exclude-file. --- README.md | 1 + fixtures/TEST_EXCLUDE_1.txt | 3 ++ fixtures/TEST_EXCLUDE_2.txt | 1 + lychee-bin/src/main.rs | 14 ++++++++ lychee-bin/src/options.rs | 6 ++++ lychee-bin/tests/cli.rs | 67 +++++++++++++++++++++++++++++++++++++ lychee.example.toml | 3 ++ 7 files changed, 95 insertions(+) create mode 100644 fixtures/TEST_EXCLUDE_1.txt create mode 100644 fixtures/TEST_EXCLUDE_2.txt diff --git a/README.md b/README.md index 63f1dc0..f6a951d 100644 --- a/README.md +++ b/README.md @@ -197,6 +197,7 @@ OPTIONS: --basic-auth Basic authentication support. E.g. `username:password` -c, --config Configuration file to use [default: ./lychee.toml] --exclude ... Exclude URLs from checking (supports regex) + --exclude-file ... A file or files that contains URLs to exclude from checking -f, --format Output file format of status report (json, string) [default: string] --github-token GitHub API token to use when checking github.com links, to avoid rate limiting [env: GITHUB_TOKEN=] diff --git a/fixtures/TEST_EXCLUDE_1.txt b/fixtures/TEST_EXCLUDE_1.txt new file mode 100644 index 0000000..3e11d35 --- /dev/null +++ b/fixtures/TEST_EXCLUDE_1.txt @@ -0,0 +1,3 @@ +https://en.wikipedia.org/* +https://ldra.com +https://url-does-not-exist \ No newline at end of file diff --git a/fixtures/TEST_EXCLUDE_2.txt b/fixtures/TEST_EXCLUDE_2.txt new file mode 100644 index 0000000..af90ee7 --- /dev/null +++ b/fixtures/TEST_EXCLUDE_2.txt @@ -0,0 +1 @@ +https://i.creativecommons.org/p/zero/1.0/88x31.png \ No newline at end of file diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index a685f25..5b526f2 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -63,6 +63,8 @@ use ring as _; use std::iter::FromIterator; use std::{collections::HashSet, fs, str::FromStr, time::Duration}; +use std::io::{self, BufRead}; +use std::fs::File; use anyhow::{anyhow, Context, Result}; use headers::{authorization::Basic, Authorization, HeaderMap, HeaderMapExt, HeaderName}; @@ -113,6 +115,18 @@ fn run_main() -> Result { if let Some(c) = Config::load_from_file(&opts.config_file)? { opts.config.merge(c) } + + // Load excludes from file + for path in &opts.config.exclude_file { + let file = File::open(path).expect("No such file"); + opts.config.exclude.append( + &mut io::BufReader::new(file) + .lines() + .map(|l| l.expect("Could not read line")) + .collect() + ); + } + let cfg = &opts.config; let runtime = match cfg.threads { diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index 4689b7e..9c88bb9 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -165,6 +165,11 @@ pub(crate) struct Config { #[serde(default)] pub(crate) exclude: Vec, + /// A file or files that contains URLs to exclude from checking + #[structopt(long)] + #[serde(default)] + pub(crate) exclude_file: Vec, + /// Exclude all private IPs from checking. /// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback` #[structopt(short = "E", long, verbatim_doc_comment)] @@ -285,6 +290,7 @@ impl Config { scheme: Vec::::new(); include: Vec::::new(); exclude: Vec::::new(); + exclude_file: Vec::::new(); exclude_all_private: false; exclude_private: false; exclude_link_local: false; diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index d9e5082..a4f353a 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -370,4 +370,71 @@ mod cli { fs::remove_file(outfile)?; Ok(()) } + + /// Test excludes + #[test] + fn test_exclude_wildcard() -> Result<()> { + let mut cmd = main_command(); + let test_path = fixtures_path().join("TEST.md"); + + cmd.arg(test_path) + .arg("--exclude") + .arg(".*") + .assert() + .success() + .stdout(contains("Excluded........10")); + + Ok(()) + } + + #[test] + fn test_exclude_multiple_urls() -> Result<()> { + let mut cmd = main_command(); + let test_path = fixtures_path().join("TEST.md"); + + cmd.arg(test_path) + .arg("--exclude") + .arg("https://en.wikipedia.org/*") + .arg("https://ldra.com/") + .assert() + .success() + .stdout(contains("Excluded.........2")); + + Ok(()) + } + + #[test] + fn test_exclude_file() -> Result<()> { + let mut cmd = main_command(); + let test_path = fixtures_path().join("TEST.md"); + let excludes_path = fixtures_path().join("TEST_EXCLUDE_1.txt"); + + cmd.arg(test_path) + .arg("--exclude-file") + .arg(excludes_path) + .assert() + .success() + .stdout(contains("Excluded.........2")); + + Ok(()) + } + + #[test] + fn test_multiple_exclude_files() -> Result<()> { + let mut cmd = main_command(); + let test_path = fixtures_path().join("TEST.md"); + let excludes_path1 = fixtures_path().join("TEST_EXCLUDE_1.txt"); + let excludes_path2 = fixtures_path().join("TEST_EXCLUDE_2.txt"); + + cmd.arg(test_path) + .arg("--exclude-file") + .arg(excludes_path1) + .arg(excludes_path2) + .assert() + .success() + .stdout(contains("Excluded.........3")); + + Ok(()) + } } + diff --git a/lychee.example.toml b/lychee.example.toml index 358a1b0..12a5c5c 100644 --- a/lychee.example.toml +++ b/lychee.example.toml @@ -52,6 +52,9 @@ headers = [] # Exclude URLs from checking (supports regex) exclude = [] +# Exclude URLs contained in a file from checking +exclude_file = [] + include = [] # Exclude all private IPs from checking