Change exclude behavior to make includes more intuitive

Previously an include alone would not mean that only the included
patterns were checked. Only if an exclude was given as well,
the includes would make a difference. Now, the includes on their
own will work as expected.
Moved the exlude methods into the Exclude mod.
Also changed the order of exclude tests to do the fast lookup
ones before the regex ones.
Added tests to guarantee behavior in the future.
This commit is contained in:
Matthias Endler 2021-02-18 21:42:00 +01:00
parent fe5cea1de3
commit 5226cd10aa
2 changed files with 105 additions and 46 deletions

View file

@ -5,8 +5,8 @@ use headers::{HeaderMap, HeaderValue};
use hubcaps::{Credentials, Github};
use regex::{Regex, RegexSet};
use reqwest::header;
use std::convert::TryInto;
use std::{collections::HashSet, time::Duration};
use std::{convert::TryInto, net::IpAddr};
use tokio::time::sleep;
use url::Url;
@ -253,63 +253,29 @@ impl Client {
}
}
fn in_regex_excludes(&self, input: &str) -> bool {
if let Some(excludes) = &self.excludes.regex {
if excludes.is_match(input) {
return true;
}
}
false
}
fn in_ip_excludes(&self, uri: &Uri) -> bool {
if let Some(ipaddr) = uri.host_ip() {
if self.excludes.loopback_ips && ipaddr.is_loopback() {
return true;
}
// Note: in a pathological case, an IPv6 address can be IPv4-mapped
// (IPv4 address embedded in a IPv6). We purposefully
// don't deal with it here, and assume if an address is IPv6,
// we shouldn't attempt to map it to IPv4.
// See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
if let IpAddr::V4(v4addr) = ipaddr {
if self.excludes.private_ips && v4addr.is_private() {
return true;
}
if self.excludes.link_local_ips && v4addr.is_link_local() {
return true;
}
}
}
false
}
pub fn is_mail_excluded(&self) -> bool {
self.excludes.mail
}
pub fn excluded(&self, request: &Request) -> bool {
if matches!(request.uri, Uri::Mail(_)) && self.excludes.is_mail_excluded() {
return true;
}
if self.excludes.ip(&request.uri) {
return true;
}
if let Some(includes) = &self.includes {
if includes.is_empty() {
return false;
}
if includes.is_match(request.uri.as_str()) {
// Includes take precedence over excludes
return false;
} else {
// In case we have includes and no excludes,
// skip everything that was not included
if self.excludes.regex.is_none() {
if self.excludes.is_empty() {
return true;
}
}
}
if self.in_regex_excludes(request.uri.as_str()) {
return true;
}
if matches!(request.uri, Uri::Mail(_)) {
return self.is_mail_excluded();
}
if self.in_ip_excludes(&request.uri) {
if self.excludes.regex(request.uri.as_str()) {
return true;
}
if self.scheme.is_none() {
@ -550,6 +516,49 @@ mod test {
);
}
#[tokio::test]
async fn test_includes_and_excludes_empty() {
// This is the pre-configured, empty set of excludes for a client
// In this case, only the requests matching the include set will be checked
let exclude = Some(RegexSet::empty());
let includes = RegexSet::empty();
let client = ClientBuilder::default()
.includes(includes)
.excludes(exclude)
.build()
.unwrap();
assert_eq!(
client.excluded(&website_url("https://foo.github.com")),
false
);
}
#[tokio::test]
async fn test_include_with_empty_exclude() {
// This is the pre-configured, empty set of excludes for a client
// In this case, only the requests matching the include set will be checked
let exclude = Some(RegexSet::empty());
let includes = RegexSet::new(&[r"foo.github.com"]).unwrap();
let client = ClientBuilder::default()
.includes(includes)
.excludes(exclude)
.build()
.unwrap();
assert_eq!(
client.excluded(&website_url("https://foo.github.com")),
false
);
assert_eq!(client.excluded(&website_url("https://github.com")), true);
assert_eq!(
client.excluded(&website_url("https://bar.github.com")),
true
);
}
#[tokio::test]
async fn test_exclude_include_regex() {
let exclude = Some(RegexSet::new(&[r"github.com"]).unwrap());

View file

@ -1,5 +1,9 @@
use std::net::IpAddr;
use regex::RegexSet;
use crate::Uri;
/// Exclude configuration for the link checker.
/// You can ignore links based on regex patterns or pre-defined IP ranges.
#[derive(Clone, Debug)]
@ -27,3 +31,49 @@ impl Default for Excludes {
}
}
}
impl Excludes {
pub fn regex(&self, input: &str) -> bool {
if let Some(excludes) = &self.regex {
if excludes.is_match(input) {
return true;
}
}
false
}
pub fn ip(&self, uri: &Uri) -> bool {
if let Some(ipaddr) = uri.host_ip() {
if self.loopback_ips && ipaddr.is_loopback() {
return true;
}
// Note: in a pathological case, an IPv6 address can be IPv4-mapped
// (IPv4 address embedded in a IPv6). We purposefully
// don't deal with it here, and assume if an address is IPv6,
// we shouldn't attempt to map it to IPv4.
// See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
if let IpAddr::V4(v4addr) = ipaddr {
if self.private_ips && v4addr.is_private() {
return true;
}
if self.link_local_ips && v4addr.is_link_local() {
return true;
}
}
}
false
}
pub fn is_mail_excluded(&self) -> bool {
self.mail
}
pub fn is_empty(&self) -> bool {
match &self.regex {
None => true,
Some(regex_set) => regex_set.is_empty(),
}
}
}