mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Add support for include patterns (#23)
In one or more `include` arguments are specified, only check the URLs that match the patterns. In case `exclude` arguments are also specified, make an exception from the excluded URLs if they also match the `include` patterns.
This commit is contained in:
parent
6463372799
commit
f0e4c3adc1
5 changed files with 96 additions and 4 deletions
|
|
@ -37,7 +37,7 @@ This comparison is made on a best-effort basis. Please create a PR to fix outdat
|
|||
| Custom user agent | ✔️ | ✖️ | ✖️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ |
|
||||
| Relative URLs | ✔️ | ✔️ | ✖️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
||||
| Skip relative URLs | ✔️ | ✖️ | ✖️ | **?** | ✖️ | ✖️ | ✖️ | ✖️ |
|
||||
| Include patterns | ✖️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | ✖️ | ✖️ |
|
||||
| Include patterns | ✔️️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | ✖️ | ✖️ |
|
||||
| Exclude patterns | ✔️ | ✖️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
||||
| Handle redirects | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
|
||||
| Ignore insecure SSL | ✔️ | ✔️ | ✔️ | ✖️ | ✖️ | ✔️ | ✖️ | ✔️ |
|
||||
|
|
|
|||
|
|
@ -52,6 +52,8 @@ headers = []
|
|||
# Exclude URLs from checking (supports regex)
|
||||
exclude = []
|
||||
|
||||
include = []
|
||||
|
||||
# Exclude all private IPs from checking
|
||||
# Equivalent to setting `exclude_private`, `exclude_link_local`, and `exclude_loopback` to true
|
||||
exclude_all_private = false
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ impl Default for Excludes {
|
|||
pub(crate) struct Checker<'a> {
|
||||
reqwest_client: reqwest::Client,
|
||||
github: Github,
|
||||
includes: Option<RegexSet>,
|
||||
excludes: Excludes,
|
||||
scheme: Option<String>,
|
||||
method: RequestMethod,
|
||||
|
|
@ -125,6 +126,7 @@ impl<'a> Checker<'a> {
|
|||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn try_new(
|
||||
token: String,
|
||||
includes: Option<RegexSet>,
|
||||
excludes: Excludes,
|
||||
max_redirects: usize,
|
||||
user_agent: String,
|
||||
|
|
@ -165,6 +167,7 @@ impl<'a> Checker<'a> {
|
|||
Ok(Checker {
|
||||
reqwest_client,
|
||||
github,
|
||||
includes,
|
||||
excludes,
|
||||
scheme,
|
||||
method,
|
||||
|
|
@ -282,6 +285,18 @@ impl<'a> Checker<'a> {
|
|||
}
|
||||
|
||||
pub fn excluded(&self, uri: &Uri) -> bool {
|
||||
if let Some(includes) = &self.includes {
|
||||
if includes.is_match(uri.as_str()) {
|
||||
// Includes take precedence over excludes
|
||||
return false;
|
||||
} else {
|
||||
// In case we have includes and no excludes,
|
||||
// skip everything that was not included
|
||||
if self.excludes.regex.is_none() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.in_regex_excludes(uri.as_str()) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -347,7 +362,7 @@ impl<'a> Checker<'a> {
|
|||
|
||||
if let Some(pb) = self.progress_bar {
|
||||
pb.inc(1);
|
||||
// regular println! inteferes with progress bar
|
||||
// regular println! interferes with progress bar
|
||||
if let Some(message) = self.status_message(&ret, uri) {
|
||||
pb.println(message);
|
||||
}
|
||||
|
|
@ -390,6 +405,7 @@ mod test {
|
|||
fn get_checker(allow_insecure: bool, custom_headers: HeaderMap) -> Checker<'static> {
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
None,
|
||||
Excludes::default(),
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
|
|
@ -419,7 +435,7 @@ mod test {
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exponetial_backoff() {
|
||||
async fn test_exponential_backoff() {
|
||||
let start = Instant::now();
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
|
|
@ -519,6 +535,7 @@ mod test {
|
|||
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
None,
|
||||
Excludes::default(),
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
|
|
@ -539,6 +556,69 @@ mod test {
|
|||
assert!(matches!(resp, Status::Timeout));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_include_regex() {
|
||||
let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap());
|
||||
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
includes,
|
||||
Excludes::default(),
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
true,
|
||||
None,
|
||||
HeaderMap::new(),
|
||||
RequestMethod::GET,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
checker.excluded(&website_url("https://foo.github.com")),
|
||||
false
|
||||
);
|
||||
assert_eq!(
|
||||
checker.excluded(&website_url("https://bar.github.com")),
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exclude_include_regex() {
|
||||
let mut excludes = Excludes::default();
|
||||
excludes.regex = Some(RegexSet::new(&[r"github.com"]).unwrap());
|
||||
let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap());
|
||||
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
includes,
|
||||
excludes,
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
true,
|
||||
None,
|
||||
HeaderMap::new(),
|
||||
RequestMethod::GET,
|
||||
None,
|
||||
None,
|
||||
false,
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
checker.excluded(&website_url("https://foo.github.com")),
|
||||
false
|
||||
);
|
||||
assert_eq!(checker.excluded(&website_url("https://github.com")), true);
|
||||
assert_eq!(
|
||||
checker.excluded(&website_url("https://bar.github.com")),
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exclude_regex() {
|
||||
let mut excludes = Excludes::default();
|
||||
|
|
@ -547,6 +627,7 @@ mod test {
|
|||
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
None,
|
||||
excludes,
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ use anyhow::anyhow;
|
|||
use anyhow::Result;
|
||||
use futures::future::join_all;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use regex::RegexSet;
|
||||
use reqwest::header::{HeaderMap, HeaderName};
|
||||
use std::{collections::HashSet, convert::TryInto, env, time::Duration};
|
||||
use structopt::StructOpt;
|
||||
|
|
@ -66,6 +67,7 @@ fn main() -> Result<()> {
|
|||
}
|
||||
|
||||
async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
|
||||
let includes = RegexSet::new(&cfg.include).ok();
|
||||
let excludes = Excludes::from_options(&cfg);
|
||||
let headers = parse_headers(cfg.headers)?;
|
||||
let accepted = match cfg.accept {
|
||||
|
|
@ -88,6 +90,7 @@ async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
|
|||
};
|
||||
let checker = Checker::try_new(
|
||||
env::var("GITHUB_TOKEN")?,
|
||||
includes,
|
||||
excludes,
|
||||
cfg.max_redirects,
|
||||
cfg.user_agent,
|
||||
|
|
|
|||
|
|
@ -84,8 +84,13 @@ pub(crate) struct Config {
|
|||
#[serde(default)]
|
||||
pub scheme: Option<String>,
|
||||
|
||||
/// URLs to check (supports regex). Has preference over all excludes.
|
||||
#[structopt(long)]
|
||||
#[serde(default)]
|
||||
pub include: Vec<String>,
|
||||
|
||||
/// Exclude URLs from checking (supports regex)
|
||||
#[structopt(short, long)]
|
||||
#[structopt(long)]
|
||||
#[serde(default)]
|
||||
pub exclude: Vec<String>,
|
||||
|
||||
|
|
@ -172,6 +177,7 @@ impl Config {
|
|||
user_agent: USER_AGENT;
|
||||
insecure: false;
|
||||
scheme: None;
|
||||
include: Vec::<String>::new();
|
||||
exclude: Vec::<String>::new();
|
||||
exclude_all_private: false;
|
||||
exclude_private: false;
|
||||
|
|
|
|||
Loading…
Reference in a new issue