Add support for include patterns (#23)

In one or more `include` arguments are specified, only check the URLs that match the patterns.
In case `exclude` arguments are also
specified, make an exception from the
excluded URLs if they also match the
`include` patterns.
This commit is contained in:
Matthias 2020-10-25 13:41:06 +01:00 committed by GitHub
parent 6463372799
commit f0e4c3adc1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 96 additions and 4 deletions

View file

@ -37,7 +37,7 @@ This comparison is made on a best-effort basis. Please create a PR to fix outdat
| Custom user agent | ✔️ | ✖️ | ✖️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ |
| Relative URLs | ✔️ | ✔️ | ✖️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
| Skip relative URLs | ✔️ | ✖️ | ✖️ | **?** | ✖️ | ✖️ | ✖️ | ✖️ |
| Include patterns | | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | ✖️ | ✖️ |
| Include patterns | ✔️ | ✔️ | ✖️ | ✔️ | ✖️ | ✖️ | ✖️ | ✖️ |
| Exclude patterns | ✔️ | ✖️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
| Handle redirects | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ |
| Ignore insecure SSL | ✔️ | ✔️ | ✔️ | ✖️ | ✖️ | ✔️ | ✖️ | ✔️ |

View file

@ -52,6 +52,8 @@ headers = []
# Exclude URLs from checking (supports regex)
exclude = []
include = []
# Exclude all private IPs from checking
# Equivalent to setting `exclude_private`, `exclude_link_local`, and `exclude_loopback` to true
exclude_all_private = false

View file

@ -110,6 +110,7 @@ impl Default for Excludes {
pub(crate) struct Checker<'a> {
reqwest_client: reqwest::Client,
github: Github,
includes: Option<RegexSet>,
excludes: Excludes,
scheme: Option<String>,
method: RequestMethod,
@ -125,6 +126,7 @@ impl<'a> Checker<'a> {
#[allow(clippy::too_many_arguments)]
pub fn try_new(
token: String,
includes: Option<RegexSet>,
excludes: Excludes,
max_redirects: usize,
user_agent: String,
@ -165,6 +167,7 @@ impl<'a> Checker<'a> {
Ok(Checker {
reqwest_client,
github,
includes,
excludes,
scheme,
method,
@ -282,6 +285,18 @@ impl<'a> Checker<'a> {
}
pub fn excluded(&self, uri: &Uri) -> bool {
if let Some(includes) = &self.includes {
if includes.is_match(uri.as_str()) {
// Includes take precedence over excludes
return false;
} else {
// In case we have includes and no excludes,
// skip everything that was not included
if self.excludes.regex.is_none() {
return true;
}
}
}
if self.in_regex_excludes(uri.as_str()) {
return true;
}
@ -347,7 +362,7 @@ impl<'a> Checker<'a> {
if let Some(pb) = self.progress_bar {
pb.inc(1);
// regular println! inteferes with progress bar
// regular println! interferes with progress bar
if let Some(message) = self.status_message(&ret, uri) {
pb.println(message);
}
@ -390,6 +405,7 @@ mod test {
fn get_checker(allow_insecure: bool, custom_headers: HeaderMap) -> Checker<'static> {
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
Excludes::default(),
5,
"curl/7.71.1".to_string(),
@ -419,7 +435,7 @@ mod test {
}
#[tokio::test]
async fn test_exponetial_backoff() {
async fn test_exponential_backoff() {
let start = Instant::now();
let res = get_checker(false, HeaderMap::new())
.check(&Uri::Website(
@ -519,6 +535,7 @@ mod test {
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
Excludes::default(),
5,
"curl/7.71.1".to_string(),
@ -539,6 +556,69 @@ mod test {
assert!(matches!(resp, Status::Timeout));
}
#[tokio::test]
async fn test_include_regex() {
let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap());
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
includes,
Excludes::default(),
5,
"curl/7.71.1".to_string(),
true,
None,
HeaderMap::new(),
RequestMethod::GET,
None,
None,
false,
None,
)
.unwrap();
assert_eq!(
checker.excluded(&website_url("https://foo.github.com")),
false
);
assert_eq!(
checker.excluded(&website_url("https://bar.github.com")),
true
);
}
#[tokio::test]
async fn test_exclude_include_regex() {
let mut excludes = Excludes::default();
excludes.regex = Some(RegexSet::new(&[r"github.com"]).unwrap());
let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap());
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
includes,
excludes,
5,
"curl/7.71.1".to_string(),
true,
None,
HeaderMap::new(),
RequestMethod::GET,
None,
None,
false,
None,
)
.unwrap();
assert_eq!(
checker.excluded(&website_url("https://foo.github.com")),
false
);
assert_eq!(checker.excluded(&website_url("https://github.com")), true);
assert_eq!(
checker.excluded(&website_url("https://bar.github.com")),
true
);
}
#[tokio::test]
async fn test_exclude_regex() {
let mut excludes = Excludes::default();
@ -547,6 +627,7 @@ mod test {
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
excludes,
5,
"curl/7.71.1".to_string(),

View file

@ -5,6 +5,7 @@ use anyhow::anyhow;
use anyhow::Result;
use futures::future::join_all;
use indicatif::{ProgressBar, ProgressStyle};
use regex::RegexSet;
use reqwest::header::{HeaderMap, HeaderName};
use std::{collections::HashSet, convert::TryInto, env, time::Duration};
use structopt::StructOpt;
@ -66,6 +67,7 @@ fn main() -> Result<()> {
}
async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
let includes = RegexSet::new(&cfg.include).ok();
let excludes = Excludes::from_options(&cfg);
let headers = parse_headers(cfg.headers)?;
let accepted = match cfg.accept {
@ -88,6 +90,7 @@ async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
};
let checker = Checker::try_new(
env::var("GITHUB_TOKEN")?,
includes,
excludes,
cfg.max_redirects,
cfg.user_agent,

View file

@ -84,8 +84,13 @@ pub(crate) struct Config {
#[serde(default)]
pub scheme: Option<String>,
/// URLs to check (supports regex). Has preference over all excludes.
#[structopt(long)]
#[serde(default)]
pub include: Vec<String>,
/// Exclude URLs from checking (supports regex)
#[structopt(short, long)]
#[structopt(long)]
#[serde(default)]
pub exclude: Vec<String>,
@ -172,6 +177,7 @@ impl Config {
user_agent: USER_AGENT;
insecure: false;
scheme: None;
include: Vec::<String>::new();
exclude: Vec::<String>::new();
exclude_all_private: false;
exclude_private: false;