mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-17 11:50:59 +00:00
Merge pull request #7 from pawroman/exclude-private-urls
Add exclude private URLs feature
This commit is contained in:
commit
dd73d6e145
8 changed files with 383 additions and 45 deletions
86
Cargo.lock
generated
86
Cargo.lock
generated
|
|
@ -68,6 +68,19 @@ version = "0.9.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71938f30533e4d95a6d17aa530939da3842c2ab6f4f84b9dae68447e4129f74a"
|
||||
|
||||
[[package]]
|
||||
name = "assert_cmd"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c88b9ca26f9c16ec830350d309397e74ee9abdfd8eb1f71cb6ecc71a3fc818da"
|
||||
dependencies = [
|
||||
"doc-comment",
|
||||
"predicates",
|
||||
"predicates-core",
|
||||
"predicates-tree",
|
||||
"wait-timeout",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "async-channel"
|
||||
version = "1.4.0"
|
||||
|
|
@ -577,6 +590,18 @@ version = "2.3.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d4d0e2d24e5ee3b23a01de38eefdcd978907890701f08ffffd4cb457ca4ee8d6"
|
||||
|
||||
[[package]]
|
||||
name = "difference"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
|
||||
|
||||
[[package]]
|
||||
name = "doc-comment"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10"
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "0.4.6"
|
||||
|
|
@ -675,6 +700,15 @@ dependencies = [
|
|||
"miniz_oxide",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "float-cmp"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1267f4ac4f343772758f7b1bdcbe767c218bbab93bb432acbf5162bbf85a6c4"
|
||||
dependencies = [
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.7"
|
||||
|
|
@ -1317,6 +1351,7 @@ name = "lychee"
|
|||
version = "0.3.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
"check-if-email-exists",
|
||||
"futures",
|
||||
"glob",
|
||||
|
|
@ -1326,6 +1361,7 @@ dependencies = [
|
|||
"indicatif",
|
||||
"linkify",
|
||||
"log",
|
||||
"predicates",
|
||||
"pretty_env_logger",
|
||||
"regex",
|
||||
"reqwest",
|
||||
|
|
@ -1525,6 +1561,12 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "normalize-line-endings"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be"
|
||||
|
||||
[[package]]
|
||||
name = "nuclei"
|
||||
version = "0.1.1"
|
||||
|
|
@ -1753,6 +1795,35 @@ version = "0.2.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c36fa947111f5c62a733b652544dd0016a43ce89619538a8ef92724a6f501a20"
|
||||
|
||||
[[package]]
|
||||
name = "predicates"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "96bfead12e90dccead362d62bb2c90a5f6fc4584963645bc7f71a735e0b0735a"
|
||||
dependencies = [
|
||||
"difference",
|
||||
"float-cmp",
|
||||
"normalize-line-endings",
|
||||
"predicates-core",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "predicates-core"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178"
|
||||
|
||||
[[package]]
|
||||
name = "predicates-tree"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124"
|
||||
dependencies = [
|
||||
"predicates-core",
|
||||
"treeline",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_env_logger"
|
||||
version = "0.4.0"
|
||||
|
|
@ -2374,6 +2445,12 @@ dependencies = [
|
|||
"tracing-serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "treeline"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
|
||||
|
||||
[[package]]
|
||||
name = "trust-dns-proto"
|
||||
version = "0.19.5"
|
||||
|
|
@ -2508,6 +2585,15 @@ version = "0.9.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
|
||||
|
||||
[[package]]
|
||||
name = "wait-timeout"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "waker-fn"
|
||||
version = "1.0.0"
|
||||
|
|
|
|||
|
|
@ -34,3 +34,5 @@ version = "0.2"
|
|||
|
||||
[dev-dependencies]
|
||||
wiremock = "0.2.4"
|
||||
assert_cmd = "1.0"
|
||||
predicates = "1.0"
|
||||
|
|
|
|||
12
fixtures/TEST_ALL_PRIVATE.md
Normal file
12
fixtures/TEST_ALL_PRIVATE.md
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
Test file: "private" URLs (should all be excluded when using `-E` flag).
|
||||
|
||||
- Loopback: http://127.0.0.1
|
||||
- Link-local 1: http://169.254.0.1
|
||||
- Link-local 2: https://169.254.10.1:8080
|
||||
- Private class A: http://10.0.1.1
|
||||
- Private class B: http://172.16.42.42
|
||||
- Private class C: http://192.168.10.1
|
||||
|
||||
IPv6:
|
||||
|
||||
- Loopback: http://[::1]
|
||||
234
src/checker.rs
234
src/checker.rs
|
|
@ -1,4 +1,5 @@
|
|||
use crate::extract::{self, Uri};
|
||||
use crate::options::LycheeOptions;
|
||||
use anyhow::anyhow;
|
||||
use anyhow::{Context, Result};
|
||||
use check_if_email_exists::{check_email, CheckEmailInput};
|
||||
|
|
@ -6,6 +7,7 @@ use hubcaps::{Credentials, Github};
|
|||
use indicatif::ProgressBar;
|
||||
use regex::{Regex, RegexSet};
|
||||
use reqwest::header::{self, HeaderMap, HeaderValue};
|
||||
use std::net::IpAddr;
|
||||
use std::{collections::HashSet, convert::TryFrom, time::Duration};
|
||||
use url::Url;
|
||||
|
||||
|
|
@ -68,12 +70,46 @@ impl From<reqwest::Error> for Status {
|
|||
}
|
||||
}
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
pub(crate) struct Excludes {
|
||||
regex: Option<RegexSet>,
|
||||
private_ips: bool,
|
||||
link_local_ips: bool,
|
||||
loopback_ips: bool,
|
||||
}
|
||||
|
||||
impl Excludes {
|
||||
pub fn from_options(options: &LycheeOptions) -> Self {
|
||||
// exclude_all_private option turns on all "private" excludes,
|
||||
// including private IPs, link-local IPs and loopback IPs
|
||||
let enable_exclude = |opt| opt || options.exclude_all_private;
|
||||
|
||||
Self {
|
||||
regex: RegexSet::new(&options.exclude).ok(),
|
||||
private_ips: enable_exclude(options.exclude_private),
|
||||
link_local_ips: enable_exclude(options.exclude_link_local),
|
||||
loopback_ips: enable_exclude(options.exclude_loopback),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Excludes {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
regex: None,
|
||||
private_ips: false,
|
||||
link_local_ips: false,
|
||||
loopback_ips: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A link checker using an API token for Github links
|
||||
/// otherwise a normal HTTP client.
|
||||
pub(crate) struct Checker<'a> {
|
||||
reqwest_client: reqwest::Client,
|
||||
github: Github,
|
||||
excludes: Option<RegexSet>,
|
||||
excludes: Excludes,
|
||||
scheme: Option<String>,
|
||||
method: RequestMethod,
|
||||
accepted: Option<HashSet<reqwest::StatusCode>>,
|
||||
|
|
@ -85,7 +121,7 @@ impl<'a> Checker<'a> {
|
|||
/// Creates a new link checker
|
||||
pub fn try_new(
|
||||
token: String,
|
||||
excludes: Option<RegexSet>,
|
||||
excludes: Excludes,
|
||||
max_redirects: usize,
|
||||
user_agent: String,
|
||||
allow_insecure: bool,
|
||||
|
|
@ -195,8 +231,8 @@ impl<'a> Checker<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn in_excludes(&self, input: &str) -> bool {
|
||||
if let Some(excludes) = &self.excludes {
|
||||
fn in_regex_excludes(&self, input: &str) -> bool {
|
||||
if let Some(excludes) = &self.excludes.regex {
|
||||
if excludes.is_match(input) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -204,8 +240,35 @@ impl<'a> Checker<'a> {
|
|||
false
|
||||
}
|
||||
|
||||
fn in_ip_excludes(&self, uri: &Uri) -> bool {
|
||||
if let Some(ipaddr) = uri.host_ip() {
|
||||
if self.excludes.loopback_ips && ipaddr.is_loopback() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: in a pathological case, an IPv6 address can be IPv4-mapped
|
||||
// (IPv4 address embedded in a IPv6). We purposefully
|
||||
// don't deal with it here, and assume if an address is IPv6,
|
||||
// we shouldn't attempt to map it to IPv4.
|
||||
// See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
|
||||
if let IpAddr::V4(v4addr) = ipaddr {
|
||||
if self.excludes.private_ips && v4addr.is_private() {
|
||||
return true;
|
||||
}
|
||||
if self.excludes.link_local_ips && v4addr.is_link_local() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub fn excluded(&self, uri: &Uri) -> bool {
|
||||
if self.in_excludes(uri.as_str()) {
|
||||
if self.in_regex_excludes(uri.as_str()) {
|
||||
return true;
|
||||
}
|
||||
if self.in_ip_excludes(&uri) {
|
||||
return true;
|
||||
}
|
||||
if self.scheme.is_none() {
|
||||
|
|
@ -290,10 +353,29 @@ mod test {
|
|||
use wiremock::matchers::method;
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
// Note: the standard library as of Rust stable 1.47.0 does not expose
|
||||
// "link-local" or "private" IPv6 checks. However, one might argue
|
||||
// that these concepts do exist in IPv6, albeit the naming is different.
|
||||
// See: https://en.wikipedia.org/wiki/Link-local_address#IPv6
|
||||
// See: https://en.wikipedia.org/wiki/Private_network#IPv6
|
||||
// See: https://doc.rust-lang.org/stable/std/net/struct.Ipv6Addr.html#method.is_unicast_link_local
|
||||
const V4_PRIVATE_CLASS_A: &str = "http://10.0.0.1";
|
||||
const V4_PRIVATE_CLASS_B: &str = "http://172.16.0.1";
|
||||
const V4_PRIVATE_CLASS_C: &str = "http://192.168.0.1";
|
||||
|
||||
const V4_LOOPBACK: &str = "http://127.0.0.1";
|
||||
const V6_LOOPBACK: &str = "http://[::1]";
|
||||
|
||||
const V4_LINK_LOCAL: &str = "http://169.254.0.1";
|
||||
|
||||
// IPv4-Mapped IPv6 addresses (IPv4 embedded in IPv6)
|
||||
const V6_MAPPED_V4_PRIVATE_CLASS_A: &str = "http://[::ffff:10.0.0.1]";
|
||||
const V6_MAPPED_V4_LINK_LOCAL: &str = "http://[::ffff:169.254.0.1]";
|
||||
|
||||
fn get_checker(allow_insecure: bool, custom_headers: HeaderMap) -> Checker<'static> {
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
None,
|
||||
Excludes::default(),
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
allow_insecure,
|
||||
|
|
@ -309,12 +391,14 @@ mod test {
|
|||
checker
|
||||
}
|
||||
|
||||
fn website_url(s: &str) -> Uri {
|
||||
Uri::Website(Url::parse(s).expect("Expected valid Website Uri"))
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_nonexistent() {
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://endler.dev/abcd").unwrap(),
|
||||
))
|
||||
.check(&website_url("https://endler.dev/abcd"))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Failed(_)));
|
||||
}
|
||||
|
|
@ -332,9 +416,7 @@ mod test {
|
|||
async fn test_github() {
|
||||
assert!(matches!(
|
||||
get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://github.com/mre/idiomatic-rust").unwrap()
|
||||
))
|
||||
.check(&website_url("https://github.com/mre/idiomatic-rust"))
|
||||
.await,
|
||||
Status::Ok(_)
|
||||
));
|
||||
|
|
@ -343,8 +425,8 @@ mod test {
|
|||
#[tokio::test]
|
||||
async fn test_github_nonexistent() {
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap(),
|
||||
.check(&website_url(
|
||||
"https://github.com/mre/idiomatic-rust-doesnt-exist-man",
|
||||
))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Error(_)));
|
||||
|
|
@ -353,7 +435,7 @@ mod test {
|
|||
#[tokio::test]
|
||||
async fn test_non_github() {
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(Url::parse("https://endler.dev").unwrap()))
|
||||
.check(&website_url("https://endler.dev"))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Ok(_)));
|
||||
}
|
||||
|
|
@ -361,17 +443,13 @@ mod test {
|
|||
#[tokio::test]
|
||||
async fn test_invalid_ssl() {
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://expired.badssl.com/").unwrap(),
|
||||
))
|
||||
.check(&website_url("https://expired.badssl.com/"))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Error(_)));
|
||||
|
||||
// Same, but ignore certificate error
|
||||
let res = get_checker(true, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://expired.badssl.com/").unwrap(),
|
||||
))
|
||||
.check(&website_url("https://expired.badssl.com/"))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Ok(_)));
|
||||
}
|
||||
|
|
@ -379,9 +457,7 @@ mod test {
|
|||
#[tokio::test]
|
||||
async fn test_custom_headers() {
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://crates.io/keywords/cassandra").unwrap(),
|
||||
))
|
||||
.check(&website_url("https://crates.io/keywords/cassandra"))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Failed(StatusCode::NOT_FOUND)));
|
||||
|
||||
|
|
@ -391,9 +467,7 @@ mod test {
|
|||
let mut custom = HeaderMap::new();
|
||||
custom.insert(header::ACCEPT, "text/html".parse().unwrap());
|
||||
let res = get_checker(true, custom)
|
||||
.check(&Uri::Website(
|
||||
Url::parse("https://crates.io/keywords/cassandra").unwrap(),
|
||||
))
|
||||
.check(&website_url("https://crates.io/keywords/cassandra"))
|
||||
.await;
|
||||
assert!(matches!(res, Status::Ok(_)));
|
||||
}
|
||||
|
|
@ -411,20 +485,21 @@ mod test {
|
|||
.await;
|
||||
|
||||
let res = get_checker(false, HeaderMap::new())
|
||||
.check(&Uri::Website(Url::parse(&mock_server.uri()).unwrap()))
|
||||
.check(&website_url(&mock_server.uri()))
|
||||
.await;
|
||||
println!("{:?}", res);
|
||||
assert!(matches!(res, Status::Timeout));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exclude() {
|
||||
let excludes =
|
||||
RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap();
|
||||
async fn test_exclude_regex() {
|
||||
let mut excludes = Excludes::default();
|
||||
excludes.regex =
|
||||
Some(RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap());
|
||||
|
||||
let checker = Checker::try_new(
|
||||
"DUMMY_GITHUB_TOKEN".to_string(),
|
||||
Some(excludes),
|
||||
excludes,
|
||||
5,
|
||||
"curl/7.71.1".to_string(),
|
||||
true,
|
||||
|
|
@ -437,14 +512,8 @@ mod test {
|
|||
None,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
checker.excluded(&Uri::Website(Url::parse("http://github.com").unwrap())),
|
||||
true
|
||||
);
|
||||
assert_eq!(
|
||||
checker.excluded(&Uri::Website(Url::parse("http://exclude.org").unwrap())),
|
||||
true
|
||||
);
|
||||
assert_eq!(checker.excluded(&website_url("http://github.com")), true);
|
||||
assert_eq!(checker.excluded(&website_url("http://exclude.org")), true);
|
||||
assert_eq!(
|
||||
checker.excluded(&Uri::Mail("mail@example.com".to_string())),
|
||||
true
|
||||
|
|
@ -454,4 +523,89 @@ mod test {
|
|||
false
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_const_sanity() {
|
||||
let get_host = |s| {
|
||||
Url::parse(s)
|
||||
.expect("Expected valid URL")
|
||||
.host()
|
||||
.expect("Expected host address")
|
||||
.to_owned()
|
||||
};
|
||||
let into_v4 = |host| match host {
|
||||
url::Host::Ipv4(ipv4) => ipv4,
|
||||
_ => panic!("Not IPv4"),
|
||||
};
|
||||
let into_v6 = |host| match host {
|
||||
url::Host::Ipv6(ipv6) => ipv6,
|
||||
_ => panic!("Not IPv6"),
|
||||
};
|
||||
|
||||
assert!(into_v4(get_host(V4_PRIVATE_CLASS_A)).is_private());
|
||||
assert!(into_v4(get_host(V4_PRIVATE_CLASS_B)).is_private());
|
||||
assert!(into_v4(get_host(V4_PRIVATE_CLASS_C)).is_private());
|
||||
|
||||
assert!(into_v4(get_host(V4_LOOPBACK)).is_loopback());
|
||||
assert!(into_v6(get_host(V6_LOOPBACK)).is_loopback());
|
||||
|
||||
assert!(into_v4(get_host(V4_LINK_LOCAL)).is_link_local());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_excludes_no_private_ips_by_default() {
|
||||
let checker = get_checker(false, HeaderMap::new());
|
||||
|
||||
assert_eq!(checker.excluded(&website_url(V4_PRIVATE_CLASS_A)), false);
|
||||
assert_eq!(checker.excluded(&website_url(V4_PRIVATE_CLASS_B)), false);
|
||||
assert_eq!(checker.excluded(&website_url(V4_PRIVATE_CLASS_C)), false);
|
||||
assert_eq!(checker.excluded(&website_url(V4_LINK_LOCAL)), false);
|
||||
assert_eq!(checker.excluded(&website_url(V4_LOOPBACK)), false);
|
||||
|
||||
assert_eq!(checker.excluded(&website_url(V6_LOOPBACK)), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_private() {
|
||||
let mut checker = get_checker(false, HeaderMap::new());
|
||||
checker.excludes.private_ips = true;
|
||||
|
||||
assert_eq!(checker.excluded(&website_url(V4_PRIVATE_CLASS_A)), true);
|
||||
assert_eq!(checker.excluded(&website_url(V4_PRIVATE_CLASS_B)), true);
|
||||
assert_eq!(checker.excluded(&website_url(V4_PRIVATE_CLASS_C)), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_link_local() {
|
||||
let mut checker = get_checker(false, HeaderMap::new());
|
||||
checker.excludes.link_local_ips = true;
|
||||
|
||||
assert_eq!(checker.excluded(&website_url(V4_LINK_LOCAL)), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_loopback() {
|
||||
let mut checker = get_checker(false, HeaderMap::new());
|
||||
checker.excludes.loopback_ips = true;
|
||||
|
||||
assert_eq!(checker.excluded(&website_url(V4_LOOPBACK)), true);
|
||||
assert_eq!(checker.excluded(&website_url(V6_LOOPBACK)), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_ip_v4_mapped_ip_v6_not_supported() {
|
||||
let mut checker = get_checker(false, HeaderMap::new());
|
||||
checker.excludes.private_ips = true;
|
||||
checker.excludes.link_local_ips = true;
|
||||
|
||||
// if these were pure IPv4, we would exclude
|
||||
assert_eq!(
|
||||
checker.excluded(&website_url(V6_MAPPED_V4_PRIVATE_CLASS_A)),
|
||||
false
|
||||
);
|
||||
assert_eq!(
|
||||
checker.excluded(&website_url(V6_MAPPED_V4_LINK_LOCAL)),
|
||||
false
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use linkify::LinkFinder;
|
||||
|
||||
use std::net::IpAddr;
|
||||
use std::{collections::HashSet, fmt::Display};
|
||||
use url::Url;
|
||||
|
||||
|
|
@ -23,6 +24,17 @@ impl Uri {
|
|||
Uri::Mail(_address) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_ip(&self) -> Option<IpAddr> {
|
||||
match self {
|
||||
Self::Website(url) => match url.host()? {
|
||||
url::Host::Ipv4(v4_addr) => Some(v4_addr.into()),
|
||||
url::Host::Ipv6(v6_addr) => Some(v6_addr.into()),
|
||||
_ => None,
|
||||
},
|
||||
Self::Mail(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Uri {
|
||||
|
|
@ -56,6 +68,7 @@ pub(crate) fn extract_links(input: &str) -> HashSet<Uri> {
|
|||
mod test {
|
||||
use super::*;
|
||||
use std::iter::FromIterator;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
|
||||
#[test]
|
||||
fn test_extract_markdown_links() {
|
||||
|
|
@ -113,4 +126,30 @@ mod test {
|
|||
assert!(links.len() == 1);
|
||||
assert_eq!(links[0].as_str(), expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v4() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4"));
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv4");
|
||||
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v6() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("https://[2020::0010]").expect("Expected URI with valid IPv6"));
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv6");
|
||||
assert_eq!(
|
||||
ip,
|
||||
IpAddr::V6(Ipv6Addr::new(0x2020, 0, 0, 0, 0, 0, 0, 0x10))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_no_ip() {
|
||||
let uri = Uri::Website(Url::parse("https://some.cryptic/url").expect("Expected valid URI"));
|
||||
let ip = uri.host_ip();
|
||||
assert!(ip.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,6 @@ use anyhow::Result;
|
|||
use futures::future::join_all;
|
||||
use gumdrop::Options;
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
use regex::RegexSet;
|
||||
use reqwest::header::{HeaderMap, HeaderName};
|
||||
use std::{collections::HashSet, convert::TryInto, env, time::Duration};
|
||||
|
||||
|
|
@ -15,7 +14,7 @@ mod collector;
|
|||
mod extract;
|
||||
mod options;
|
||||
|
||||
use checker::{Checker, Status};
|
||||
use checker::{Checker, Excludes, Status};
|
||||
use extract::Uri;
|
||||
use options::LycheeOptions;
|
||||
|
||||
|
|
@ -31,7 +30,7 @@ fn print_summary(found: &HashSet<Uri>, results: &Vec<Status>) {
|
|||
.count();
|
||||
let errors: usize = found - excluded - success;
|
||||
|
||||
println!("");
|
||||
println!();
|
||||
println!("📝Summary");
|
||||
println!("-------------------");
|
||||
println!("🔍Found: {}", found);
|
||||
|
|
@ -60,7 +59,7 @@ fn main() -> Result<()> {
|
|||
}
|
||||
|
||||
async fn run(opts: LycheeOptions) -> Result<i32> {
|
||||
let excludes = RegexSet::new(opts.exclude).unwrap();
|
||||
let excludes = Excludes::from_options(&opts);
|
||||
let headers = parse_headers(opts.headers)?;
|
||||
let accepted = match opts.accept {
|
||||
Some(accept) => parse_statuscodes(accept)?,
|
||||
|
|
@ -82,7 +81,7 @@ async fn run(opts: LycheeOptions) -> Result<i32> {
|
|||
};
|
||||
let checker = Checker::try_new(
|
||||
env::var("GITHUB_TOKEN")?,
|
||||
Some(excludes),
|
||||
excludes,
|
||||
opts.max_redirects,
|
||||
opts.user_agent,
|
||||
opts.insecure,
|
||||
|
|
|
|||
|
|
@ -38,6 +38,21 @@ pub(crate) struct LycheeOptions {
|
|||
#[options(help = "Exclude URLs from checking (supports regex)")]
|
||||
pub exclude: Vec<String>,
|
||||
|
||||
#[options(
|
||||
help = "Exclude all private IPs from checking, equivalent to `--exclude-private --exclude-link-local --exclude--loopback`",
|
||||
short = "E"
|
||||
)]
|
||||
pub exclude_all_private: bool,
|
||||
|
||||
#[options(help = "Exclude private IP address ranges from checking", no_short)]
|
||||
pub exclude_private: bool,
|
||||
|
||||
#[options(help = "Exclude link-local IP address range from checking", no_short)]
|
||||
pub exclude_link_local: bool,
|
||||
|
||||
#[options(help = "Exclude loopback IP address range from checking", no_short)]
|
||||
pub exclude_loopback: bool,
|
||||
|
||||
// Accumulate all headers in a vector
|
||||
#[options(help = "Custom request headers")]
|
||||
pub headers: Vec<String>,
|
||||
|
|
|
|||
31
tests/cli.rs
Normal file
31
tests/cli.rs
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#[cfg(test)]
|
||||
mod cli {
|
||||
use assert_cmd::Command;
|
||||
use predicates::str::contains;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn test_exclude_all_private() {
|
||||
// this gets the "main" binary name (e.g. `lychee`)
|
||||
let mut cmd =
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
|
||||
|
||||
let test_all_private_path = Path::new(module_path!())
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("fixtures")
|
||||
.join("TEST_ALL_PRIVATE.md");
|
||||
|
||||
// assert that the command runs OK, and that it excluded all the links
|
||||
cmd.env("GITHUB_TOKEN", "invalid-token")
|
||||
.arg("--exclude-all-private")
|
||||
.arg("--verbose")
|
||||
.arg(test_all_private_path)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Found: 7"))
|
||||
.stdout(contains("Excluded: 7"))
|
||||
.stdout(contains("Successful: 0"))
|
||||
.stdout(contains("Errors: 0"));
|
||||
}
|
||||
}
|
||||
Loading…
Reference in a new issue