mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Replace Vec<PathBuf> with dedicated PathExcludes type
This commit is contained in:
parent
1de218a78a
commit
002fa49f29
9 changed files with 119 additions and 102 deletions
|
|
@ -1,6 +1,7 @@
|
|||
use log::error;
|
||||
use lychee_lib::Request;
|
||||
use lychee_lib::Result;
|
||||
use lychee_lib::filter::PathExcludes;
|
||||
use std::fs;
|
||||
use std::io::{self, Write};
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -75,7 +76,7 @@ where
|
|||
pub(crate) async fn dump_inputs<S>(
|
||||
sources: S,
|
||||
output: Option<&PathBuf>,
|
||||
excluded_paths: &[PathBuf],
|
||||
excluded_paths: &PathExcludes,
|
||||
) -> Result<ExitCode>
|
||||
where
|
||||
S: futures::Stream<Item = Result<String>>,
|
||||
|
|
@ -90,9 +91,8 @@ where
|
|||
while let Some(source) = sources.next().await {
|
||||
let source = source?;
|
||||
|
||||
let excluded = excluded_paths
|
||||
.iter()
|
||||
.any(|path| source.starts_with(path.to_string_lossy().as_ref()));
|
||||
// TODO: is this working? check for test coverage, create test if necessary
|
||||
let excluded = excluded_paths.is_match(&source);
|
||||
if excluded {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -141,6 +141,7 @@ fn write_out(writer: &mut Box<dyn Write>, out_str: &str) -> io::Result<()> {
|
|||
mod tests {
|
||||
use super::*;
|
||||
use futures::stream;
|
||||
use regex::RegexSet;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -158,7 +159,7 @@ mod tests {
|
|||
let stream = stream::iter(inputs);
|
||||
|
||||
// Run dump_inputs
|
||||
let result = dump_inputs(stream, Some(&output_path), &[]).await?;
|
||||
let result = dump_inputs(stream, Some(&output_path), &PathExcludes::empty()).await?;
|
||||
assert_eq!(result, ExitCode::Success);
|
||||
|
||||
// Verify output
|
||||
|
|
@ -179,8 +180,10 @@ mod tests {
|
|||
];
|
||||
let stream = stream::iter(inputs);
|
||||
|
||||
let excluded = vec![PathBuf::from("excluded")];
|
||||
let result = dump_inputs(stream, Some(&output_path), &excluded).await?;
|
||||
let excluded = &PathExcludes {
|
||||
regex: RegexSet::new(["excluded"]).unwrap(),
|
||||
};
|
||||
let result = dump_inputs(stream, Some(&output_path), excluded).await?;
|
||||
assert_eq!(result, ExitCode::Success);
|
||||
|
||||
let contents = fs::read_to_string(&output_path)?;
|
||||
|
|
@ -194,7 +197,7 @@ mod tests {
|
|||
let output_path = temp_file.path().to_path_buf();
|
||||
|
||||
let stream = stream::iter::<Vec<Result<String>>>(vec![]);
|
||||
let result = dump_inputs(stream, Some(&output_path), &[]).await?;
|
||||
let result = dump_inputs(stream, Some(&output_path), &PathExcludes::empty()).await?;
|
||||
assert_eq!(result, ExitCode::Success);
|
||||
|
||||
let contents = fs::read_to_string(&output_path)?;
|
||||
|
|
@ -214,7 +217,7 @@ mod tests {
|
|||
];
|
||||
let stream = stream::iter(inputs);
|
||||
|
||||
let result = dump_inputs(stream, Some(&output_path), &[]).await;
|
||||
let result = dump_inputs(stream, Some(&output_path), &PathExcludes::empty()).await;
|
||||
assert!(result.is_err());
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -225,7 +228,7 @@ mod tests {
|
|||
let inputs = vec![Ok(String::from("test/path1"))];
|
||||
let stream = stream::iter(inputs);
|
||||
|
||||
let result = dump_inputs(stream, None, &[]).await?;
|
||||
let result = dump_inputs(stream, None, &PathExcludes::empty()).await?;
|
||||
assert_eq!(result, ExitCode::Success);
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -70,10 +70,12 @@ use formatters::{get_stats_formatter, log::init_logging};
|
|||
use http::HeaderMap;
|
||||
use log::{error, info, warn};
|
||||
|
||||
use lychee_lib::filter::PathExcludes;
|
||||
#[cfg(feature = "native-tls")]
|
||||
use openssl_sys as _; // required for vendored-openssl feature
|
||||
|
||||
use options::{HeaderMapExt, LYCHEE_CONFIG_FILE};
|
||||
use regex::RegexSet;
|
||||
use ring as _; // required for apple silicon
|
||||
|
||||
use lychee_lib::BasicAuthExtractor;
|
||||
|
|
@ -329,7 +331,9 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
|
|||
let exit_code = commands::dump_inputs(
|
||||
sources,
|
||||
opts.config.output.as_ref(),
|
||||
&opts.config.exclude_path,
|
||||
&PathExcludes {
|
||||
regex: RegexSet::new(&opts.config.exclude_path)?,
|
||||
},
|
||||
)
|
||||
.await?;
|
||||
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use lychee_lib::{
|
|||
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT, FileExtensions,
|
||||
FileType, Input, StatusCodeExcluder, StatusCodeSelector, archive::Archive,
|
||||
};
|
||||
use regex::RegexSet;
|
||||
use reqwest::tls;
|
||||
use secrecy::{ExposeSecret, SecretString};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
|
@ -338,7 +339,7 @@ impl LycheeOptions {
|
|||
s,
|
||||
None,
|
||||
self.config.glob_ignore_case,
|
||||
self.config.exclude_path.clone(),
|
||||
RegexSet::new(&self.config.exclude_path)?.into(),
|
||||
)
|
||||
})
|
||||
.collect::<Result<_, _>>()
|
||||
|
|
@ -503,7 +504,8 @@ and 501."
|
|||
#[serde(default)]
|
||||
pub(crate) include: Vec<String>,
|
||||
|
||||
/// Exclude URLs and mail addresses from checking (supports regex)
|
||||
/// Exclude URLs and mail addresses from checking.
|
||||
/// The value is treated as regular expression.
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
pub(crate) exclude: Vec<String>,
|
||||
|
|
@ -513,10 +515,11 @@ and 501."
|
|||
#[serde(default)]
|
||||
pub(crate) exclude_file: Vec<String>,
|
||||
|
||||
/// Exclude file path from getting checked.
|
||||
/// Exclude paths from getting checked.
|
||||
/// The value is treated as regular expression.
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
pub(crate) exclude_path: Vec<PathBuf>,
|
||||
pub(crate) exclude_path: Vec<String>,
|
||||
|
||||
/// Exclude all private IPs from checking.
|
||||
/// Equivalent to `--exclude-private --exclude-link-local --exclude-loopback`
|
||||
|
|
@ -751,7 +754,7 @@ impl Config {
|
|||
exclude_file: Vec::<String>::new(); // deprecated
|
||||
exclude_link_local: false;
|
||||
exclude_loopback: false;
|
||||
exclude_path: Vec::<PathBuf>::new();
|
||||
exclude_path: Vec::<String>::new();
|
||||
exclude_private: false;
|
||||
exclude: Vec::<String>::new();
|
||||
extensions: FileType::default_extensions();
|
||||
|
|
|
|||
|
|
@ -231,7 +231,9 @@ mod tests {
|
|||
|
||||
use super::*;
|
||||
use crate::{
|
||||
Result, Uri, mock_server,
|
||||
Result, Uri,
|
||||
filter::PathExcludes,
|
||||
mock_server,
|
||||
test_utils::{load_fixture, mail, path, website},
|
||||
types::{FileType, Input, InputSource},
|
||||
};
|
||||
|
|
@ -278,7 +280,7 @@ mod tests {
|
|||
&file_path.as_path().display().to_string(),
|
||||
None,
|
||||
true,
|
||||
vec![],
|
||||
PathExcludes::empty(),
|
||||
)?;
|
||||
let contents: Vec<_> = input
|
||||
.get_contents(
|
||||
|
|
@ -298,7 +300,7 @@ mod tests {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_url_without_extension_is_html() -> Result<()> {
|
||||
let input = Input::new("https://example.com/", None, true, vec![])?;
|
||||
let input = Input::new("https://example.com/", None, true, PathExcludes::empty())?;
|
||||
let contents: Vec<_> = input
|
||||
.get_contents(
|
||||
true,
|
||||
|
|
@ -372,7 +374,7 @@ mod tests {
|
|||
let input = Input {
|
||||
source: InputSource::String("This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)".to_string()),
|
||||
file_type_hint: Some(FileType::Markdown),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
};
|
||||
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
|
||||
|
||||
|
|
@ -398,7 +400,7 @@ mod tests {
|
|||
.to_string(),
|
||||
),
|
||||
file_type_hint: Some(FileType::Html),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
};
|
||||
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
|
||||
|
||||
|
|
@ -427,7 +429,7 @@ mod tests {
|
|||
.to_string(),
|
||||
),
|
||||
file_type_hint: Some(FileType::Html),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
};
|
||||
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
|
||||
|
||||
|
|
@ -453,7 +455,7 @@ mod tests {
|
|||
.to_string(),
|
||||
),
|
||||
file_type_hint: Some(FileType::Markdown),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
};
|
||||
|
||||
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
|
||||
|
|
@ -476,7 +478,7 @@ mod tests {
|
|||
let input = Input {
|
||||
source: InputSource::String(input),
|
||||
file_type_hint: Some(FileType::Html),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
};
|
||||
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
|
||||
|
||||
|
|
@ -551,7 +553,7 @@ mod tests {
|
|||
.unwrap(),
|
||||
)),
|
||||
file_type_hint: Some(FileType::Html),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
},
|
||||
Input {
|
||||
source: InputSource::RemoteUrl(Box::new(
|
||||
|
|
@ -562,7 +564,7 @@ mod tests {
|
|||
.unwrap(),
|
||||
)),
|
||||
file_type_hint: Some(FileType::Html),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
},
|
||||
];
|
||||
|
||||
|
|
@ -597,7 +599,7 @@ mod tests {
|
|||
.into(),
|
||||
),
|
||||
file_type_hint: Some(FileType::Html),
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: PathExcludes::empty(),
|
||||
};
|
||||
|
||||
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
|
||||
|
|
|
|||
|
|
@ -1,26 +0,0 @@
|
|||
use regex::RegexSet;
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
/// You can ignore links based on regex patterns.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Excludes {
|
||||
/// User-defined set of excluded regex patterns
|
||||
pub(crate) regex: RegexSet,
|
||||
}
|
||||
|
||||
impl Excludes {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Returns `true` if the given input string matches the regex set
|
||||
/// and should hence be excluded from checking
|
||||
pub fn is_match(&self, input: &str) -> bool {
|
||||
self.regex.is_match(input)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Whether there were no regular expressions defined to be excluded
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.regex.is_empty()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,26 +0,0 @@
|
|||
use regex::RegexSet;
|
||||
|
||||
/// Include configuration for the link checker.
|
||||
/// You can include links based on regex patterns
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Includes {
|
||||
/// User-defined set of included regex patterns
|
||||
pub regex: RegexSet,
|
||||
}
|
||||
|
||||
impl Includes {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Returns `true` if the given input string matches the regex set
|
||||
/// and should hence be included and checked
|
||||
pub fn is_match(&self, input: &str) -> bool {
|
||||
self.regex.is_match(input)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Whether there were no regular expressions defined for inclusion
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.regex.is_empty()
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,19 @@
|
|||
mod excludes;
|
||||
mod includes;
|
||||
mod regex_filter;
|
||||
|
||||
use regex::RegexSet;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
pub use excludes::Excludes;
|
||||
pub use includes::Includes;
|
||||
/// Include configuration for the link checker.
|
||||
/// You can include links based on regex patterns.
|
||||
pub type Includes = regex_filter::RegexFilter;
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
/// You can ignore links based on regex patterns.
|
||||
pub type Excludes = regex_filter::RegexFilter;
|
||||
|
||||
/// You can exclude paths and files based on regex patterns.
|
||||
pub type PathExcludes = regex_filter::RegexFilter;
|
||||
|
||||
use crate::Uri;
|
||||
|
||||
|
|
|
|||
46
lychee-lib/src/filter/regex_filter.rs
Normal file
46
lychee-lib/src/filter/regex_filter.rs
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
use regex::RegexSet;
|
||||
|
||||
/// Filter configuration for the link checker.
|
||||
/// You can include and exclude links and paths based on regex patterns
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexFilter {
|
||||
/// User-defined set of regex patterns
|
||||
pub regex: RegexSet,
|
||||
}
|
||||
|
||||
impl RegexFilter {
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Returns `true` if the given input string matches the regex set
|
||||
/// and should hence be included or excluded
|
||||
pub fn is_match(&self, input: &str) -> bool {
|
||||
self.regex.is_match(input)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use]
|
||||
/// Whether there were no regular expressions defined
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.regex.is_empty()
|
||||
}
|
||||
|
||||
/// Create a new empty regex set.
|
||||
pub fn empty() -> Self {
|
||||
Self {
|
||||
regex: RegexSet::empty(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for RegexFilter {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
// Workaround, see https://github.com/rust-lang/regex/issues/364
|
||||
self.regex.patterns() == other.regex.patterns()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RegexSet> for RegexFilter {
|
||||
fn from(regex: RegexSet) -> Self {
|
||||
Self { regex }
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +1,13 @@
|
|||
use super::file::FileExtensions;
|
||||
use super::resolver::UrlContentResolver;
|
||||
use crate::filter::PathExcludes;
|
||||
use crate::types::FileType;
|
||||
use crate::{ErrorKind, Result, utils};
|
||||
use async_stream::try_stream;
|
||||
use futures::stream::Stream;
|
||||
use glob::glob_with;
|
||||
use ignore::WalkBuilder;
|
||||
use regex::RegexSet;
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use shellexpand::tilde;
|
||||
|
|
@ -101,14 +103,14 @@ impl Display for InputSource {
|
|||
}
|
||||
|
||||
/// Lychee Input with optional file hint for parsing
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct Input {
|
||||
/// Origin of input
|
||||
pub source: InputSource,
|
||||
/// Hint to indicate which extractor to use
|
||||
pub file_type_hint: Option<FileType>,
|
||||
/// Excluded paths that will be skipped when reading content
|
||||
pub excluded_paths: Vec<PathBuf>,
|
||||
pub excluded_paths: PathExcludes,
|
||||
}
|
||||
|
||||
impl Input {
|
||||
|
|
@ -124,7 +126,7 @@ impl Input {
|
|||
value: &str,
|
||||
file_type_hint: Option<FileType>,
|
||||
glob_ignore_case: bool,
|
||||
excluded_paths: Vec<PathBuf>,
|
||||
excluded_paths: PathExcludes,
|
||||
) -> Result<Self> {
|
||||
let source = if value == STDIN {
|
||||
InputSource::Stdin
|
||||
|
|
@ -200,7 +202,7 @@ impl Input {
|
|||
/// Returns an error if the input does not exist (i.e. invalid path)
|
||||
/// and the input cannot be parsed as a URL.
|
||||
pub fn from_value(value: &str) -> Result<Self> {
|
||||
Self::new(value, None, false, vec![])
|
||||
Self::new(value, None, false, RegexSet::empty().into())
|
||||
}
|
||||
|
||||
/// Convenience constructor
|
||||
|
|
@ -208,7 +210,7 @@ impl Input {
|
|||
Self {
|
||||
source,
|
||||
file_type_hint: None,
|
||||
excluded_paths: vec![],
|
||||
excluded_paths: RegexSet::empty().into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -422,13 +424,8 @@ impl TryFrom<&str> for Input {
|
|||
/// Function for path exclusion tests
|
||||
///
|
||||
/// This is a standalone function to allow for easier testing
|
||||
fn is_excluded_path(excluded_paths: &[PathBuf], path: &PathBuf) -> bool {
|
||||
for excluded in excluded_paths {
|
||||
if let Ok(true) = utils::path::contains(excluded, path) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
fn is_excluded_path(excluded_paths: &PathExcludes, path: &PathBuf) -> bool {
|
||||
excluded_paths.is_match(&path.to_string_lossy())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -443,7 +440,7 @@ mod tests {
|
|||
assert!(path.exists());
|
||||
assert!(path.is_relative());
|
||||
|
||||
let input = Input::new(test_file, None, false, vec![]);
|
||||
let input = Input::new(test_file, None, false, PathExcludes::empty());
|
||||
assert!(input.is_ok());
|
||||
assert!(matches!(
|
||||
input,
|
||||
|
|
@ -471,14 +468,20 @@ mod tests {
|
|||
#[test]
|
||||
fn test_no_exclusions() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
assert!(!is_excluded_path(&[], &dir.path().to_path_buf()));
|
||||
assert!(!is_excluded_path(
|
||||
&PathExcludes::empty(),
|
||||
&dir.path().to_path_buf()
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_excluded() {
|
||||
let dir = tempfile::tempdir().unwrap();
|
||||
let path = dir.path().to_path_buf();
|
||||
assert!(is_excluded_path(&[path.clone()], &path));
|
||||
let excludes = PathExcludes {
|
||||
regex: RegexSet::new([path.to_string_lossy()]).unwrap(),
|
||||
};
|
||||
assert!(is_excluded_path(&excludes, &path));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -487,10 +490,11 @@ mod tests {
|
|||
let parent = parent_dir.path();
|
||||
let child_dir = tempfile::tempdir_in(parent).unwrap();
|
||||
let child = child_dir.path();
|
||||
assert!(is_excluded_path(
|
||||
&[parent.to_path_buf()],
|
||||
&child.to_path_buf()
|
||||
));
|
||||
|
||||
let excludes = PathExcludes {
|
||||
regex: RegexSet::new([parent.to_path_buf().to_string_lossy()]).unwrap(),
|
||||
};
|
||||
assert!(is_excluded_path(&excludes, &child.to_path_buf()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
Loading…
Reference in a new issue