Add support for .lycheeignore file #308 (#402)

This is similar to files like .gitignore and .dockerignore
and gets merged into exclude_files
This commit is contained in:
Matthias 2021-11-23 01:39:53 +01:00 committed by GitHub
parent cda11359ee
commit 591cbdbebb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 82 additions and 6 deletions

View file

@ -227,7 +227,9 @@ OPTIONS:
--basic-auth <basic-auth> Basic authentication support. E.g. `username:password`
-c, --config <config-file> Configuration file to use [default: ./lychee.toml]
--exclude <exclude>... Exclude URLs from checking (supports regex)
--exclude-file <exclude-file>... A file or files that contains URLs to exclude from checking
--exclude-file <exclude-file>... File or files that contain URLs to be excluded from checking. Regular
expressions supported; one pattern per line. Automatically excludes
patterns from `.lycheeignore` if file exists
-f, --format <format> Output format of final status report (compact, detailed, json, markdown)
[default: compact]
--github-token <github-token> GitHub API token to use when checking github.com links, to avoid rate
@ -256,6 +258,14 @@ ARGS:
- `1` for missing inputs and any unexpected runtime failures or config errors
- `2` for link check failures (if any non-excluded link failed the check)
### Ignoring links
You can exclude links from getting checked by either specifying regex patterns
with `--exclude` (e.g. `--exclude example\.(com|org)`) or by using an "exclude
file" (`--exclude_file`), which allows you to list multiple regular expressions
for exclusion (one pattern per line).
If a file named `.lycheeignore` exists in the current working directory, its contents are excluded as well.
## Library usage
You can use lychee as a library for your own projects.

View file

@ -0,0 +1,8 @@
.*\.example.com
example.org/.+
http://.*
github.com/.*/.*$
^file.*
@

10
fixtures/ignore/TEST.md Normal file
View file

@ -0,0 +1,10 @@
Test HTTP and HTTPS for the same site.
https://example.org
https://example.com
https://github.com/rust-lang/rust/
https://foo.example.com
https://example.org/bar
http://wikipedia.org
https://github.com/lycheeverse/lychee
file:///path/to/file
mail@example.org

View file

@ -0,0 +1 @@
example.com

View file

@ -63,7 +63,7 @@ use ring as _;
use stats::color_response;
use std::fs::File;
use std::io::{self, BufRead, Write};
use std::io::{self, BufRead, BufReader, Write};
use std::iter::FromIterator;
use std::{collections::HashSet, fs, str::FromStr};
@ -90,6 +90,8 @@ use crate::{
writer::StatsWriter,
};
const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
/// A C-like enum that can be cast to `i32` and used as process exit code.
enum ExitCode {
Success = 0,
@ -112,6 +114,12 @@ fn main() -> Result<()> {
std::process::exit(exit_code);
}
// Read lines from file; ignore empty lines
fn read_lines(file: &File) -> Result<Vec<String>> {
let lines: Vec<_> = BufReader::new(file).lines().collect::<Result<_, _>>()?;
Ok(lines.into_iter().filter(|line| !line.is_empty()).collect())
}
fn run_main() -> Result<i32> {
let mut opts = LycheeOptions::from_args();
@ -120,12 +128,14 @@ fn run_main() -> Result<i32> {
opts.config.merge(c);
}
if let Ok(lycheeignore) = File::open(LYCHEE_IGNORE_FILE) {
opts.config.exclude.append(&mut read_lines(&lycheeignore)?);
}
// Load excludes from file
for path in &opts.config.exclude_file {
let file = File::open(path)?;
opts.config
.exclude
.append(&mut io::BufReader::new(file).lines().collect::<Result<_, _>>()?);
opts.config.exclude.append(&mut read_lines(&file)?);
}
let cfg = &opts.config;

View file

@ -183,7 +183,9 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) exclude: Vec<String>,
/// A file or files that contains URLs to exclude from checking
/// File or files that contain URLs to be excluded from checking. Regular
/// expressions supported; one pattern per line. Automatically excludes
/// patterns from `.lycheeignore` if file exists.
#[structopt(long)]
#[serde(default)]
pub(crate) exclude_file: Vec<String>,

View file

@ -487,6 +487,39 @@ mod cli {
Ok(())
}
#[test]
fn test_lycheeignore_file() -> Result<()> {
let mut cmd = main_command();
let test_path = fixtures_path().join("ignore");
cmd.current_dir(test_path)
.arg("TEST.md")
.assert()
.success()
.stdout(contains("9 Total"))
.stdout(contains("7 Excluded"));
Ok(())
}
#[test]
fn test_lycheeignore_and_exclude_file() -> Result<()> {
let mut cmd = main_command();
let test_path = fixtures_path().join("ignore");
let excludes_path = test_path.join("normal-exclude-file");
cmd.current_dir(test_path)
.arg("TEST.md")
.arg("--exclude-file")
.arg(excludes_path)
.assert()
.success()
.stdout(contains("9 Total"))
.stdout(contains("8 Excluded"));
Ok(())
}
#[test]
fn test_require_https() -> Result<()> {
let mut cmd = main_command();

View file

@ -53,6 +53,8 @@ headers = []
exclude = []
# Exclude URLs contained in a file from checking
# If a file named `.lycheeignore` exists in the current working directory,
# its contents will be excluded as well.
exclude_file = []
include = []