From f0e4c3adc19c86afa61d2df0929d1a107b7c1674 Mon Sep 17 00:00:00 2001 From: Matthias Date: Sun, 25 Oct 2020 13:41:06 +0100 Subject: [PATCH] Add support for `include` patterns (#23) In one or more `include` arguments are specified, only check the URLs that match the patterns. In case `exclude` arguments are also specified, make an exception from the excluded URLs if they also match the `include` patterns. --- README.md | 2 +- lychee.example.toml | 2 ++ src/checker.rs | 85 +++++++++++++++++++++++++++++++++++++++++++-- src/main.rs | 3 ++ src/options.rs | 8 ++++- 5 files changed, 96 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 21d86c9..4d6aec4 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ This comparison is made on a best-effort basis. Please create a PR to fix outdat | Custom user agent | ✔️ | ✖️ | ✖️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | | Relative URLs | ✔️ | ✔️ | ✖️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | Skip relative URLs | ✔️ | ✖️ | ✖️ | **?** | ✖️ | ✖️ | ✖️ | ✖️ | -| Include patterns | ✖️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | ✖️ | ✖️ | +| Include patterns | ✔️️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | ✖️ | ✖️ | | Exclude patterns | ✔️ | ✖️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | Handle redirects | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | | Ignore insecure SSL | ✔️ | ✔️ | ✔️ | ✖️ | ✖️ | ✔️ | ✖️ | ✔️ | diff --git a/lychee.example.toml b/lychee.example.toml index 7d13300..15f4c1e 100644 --- a/lychee.example.toml +++ b/lychee.example.toml @@ -52,6 +52,8 @@ headers = [] # Exclude URLs from checking (supports regex) exclude = [] +include = [] + # Exclude all private IPs from checking # Equivalent to setting `exclude_private`, `exclude_link_local`, and `exclude_loopback` to true exclude_all_private = false diff --git a/src/checker.rs b/src/checker.rs index 8b0701c..487819c 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -110,6 +110,7 @@ impl Default for Excludes { pub(crate) struct Checker<'a> { reqwest_client: reqwest::Client, github: Github, + includes: Option, excludes: Excludes, scheme: Option, method: RequestMethod, @@ -125,6 +126,7 @@ impl<'a> Checker<'a> { #[allow(clippy::too_many_arguments)] pub fn try_new( token: String, + includes: Option, excludes: Excludes, max_redirects: usize, user_agent: String, @@ -165,6 +167,7 @@ impl<'a> Checker<'a> { Ok(Checker { reqwest_client, github, + includes, excludes, scheme, method, @@ -282,6 +285,18 @@ impl<'a> Checker<'a> { } pub fn excluded(&self, uri: &Uri) -> bool { + if let Some(includes) = &self.includes { + if includes.is_match(uri.as_str()) { + // Includes take precedence over excludes + return false; + } else { + // In case we have includes and no excludes, + // skip everything that was not included + if self.excludes.regex.is_none() { + return true; + } + } + } if self.in_regex_excludes(uri.as_str()) { return true; } @@ -347,7 +362,7 @@ impl<'a> Checker<'a> { if let Some(pb) = self.progress_bar { pb.inc(1); - // regular println! inteferes with progress bar + // regular println! interferes with progress bar if let Some(message) = self.status_message(&ret, uri) { pb.println(message); } @@ -390,6 +405,7 @@ mod test { fn get_checker(allow_insecure: bool, custom_headers: HeaderMap) -> Checker<'static> { let checker = Checker::try_new( "DUMMY_GITHUB_TOKEN".to_string(), + None, Excludes::default(), 5, "curl/7.71.1".to_string(), @@ -419,7 +435,7 @@ mod test { } #[tokio::test] - async fn test_exponetial_backoff() { + async fn test_exponential_backoff() { let start = Instant::now(); let res = get_checker(false, HeaderMap::new()) .check(&Uri::Website( @@ -519,6 +535,7 @@ mod test { let checker = Checker::try_new( "DUMMY_GITHUB_TOKEN".to_string(), + None, Excludes::default(), 5, "curl/7.71.1".to_string(), @@ -539,6 +556,69 @@ mod test { assert!(matches!(resp, Status::Timeout)); } + #[tokio::test] + async fn test_include_regex() { + let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap()); + + let checker = Checker::try_new( + "DUMMY_GITHUB_TOKEN".to_string(), + includes, + Excludes::default(), + 5, + "curl/7.71.1".to_string(), + true, + None, + HeaderMap::new(), + RequestMethod::GET, + None, + None, + false, + None, + ) + .unwrap(); + assert_eq!( + checker.excluded(&website_url("https://foo.github.com")), + false + ); + assert_eq!( + checker.excluded(&website_url("https://bar.github.com")), + true + ); + } + + #[tokio::test] + async fn test_exclude_include_regex() { + let mut excludes = Excludes::default(); + excludes.regex = Some(RegexSet::new(&[r"github.com"]).unwrap()); + let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap()); + + let checker = Checker::try_new( + "DUMMY_GITHUB_TOKEN".to_string(), + includes, + excludes, + 5, + "curl/7.71.1".to_string(), + true, + None, + HeaderMap::new(), + RequestMethod::GET, + None, + None, + false, + None, + ) + .unwrap(); + assert_eq!( + checker.excluded(&website_url("https://foo.github.com")), + false + ); + assert_eq!(checker.excluded(&website_url("https://github.com")), true); + assert_eq!( + checker.excluded(&website_url("https://bar.github.com")), + true + ); + } + #[tokio::test] async fn test_exclude_regex() { let mut excludes = Excludes::default(); @@ -547,6 +627,7 @@ mod test { let checker = Checker::try_new( "DUMMY_GITHUB_TOKEN".to_string(), + None, excludes, 5, "curl/7.71.1".to_string(), diff --git a/src/main.rs b/src/main.rs index 9efb64e..3b06ec5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use anyhow::anyhow; use anyhow::Result; use futures::future::join_all; use indicatif::{ProgressBar, ProgressStyle}; +use regex::RegexSet; use reqwest::header::{HeaderMap, HeaderName}; use std::{collections::HashSet, convert::TryInto, env, time::Duration}; use structopt::StructOpt; @@ -66,6 +67,7 @@ fn main() -> Result<()> { } async fn run(cfg: Config, inputs: Vec) -> Result { + let includes = RegexSet::new(&cfg.include).ok(); let excludes = Excludes::from_options(&cfg); let headers = parse_headers(cfg.headers)?; let accepted = match cfg.accept { @@ -88,6 +90,7 @@ async fn run(cfg: Config, inputs: Vec) -> Result { }; let checker = Checker::try_new( env::var("GITHUB_TOKEN")?, + includes, excludes, cfg.max_redirects, cfg.user_agent, diff --git a/src/options.rs b/src/options.rs index 3458d36..6a21271 100644 --- a/src/options.rs +++ b/src/options.rs @@ -84,8 +84,13 @@ pub(crate) struct Config { #[serde(default)] pub scheme: Option, + /// URLs to check (supports regex). Has preference over all excludes. + #[structopt(long)] + #[serde(default)] + pub include: Vec, + /// Exclude URLs from checking (supports regex) - #[structopt(short, long)] + #[structopt(long)] #[serde(default)] pub exclude: Vec, @@ -172,6 +177,7 @@ impl Config { user_agent: USER_AGENT; insecure: false; scheme: None; + include: Vec::::new(); exclude: Vec::::new(); exclude_all_private: false; exclude_private: false;