From 16cd67331a8be878cca74a26c5aa416ac9cf6f8d Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Thu, 18 Feb 2021 01:32:48 +0100 Subject: [PATCH 1/8] Add simple, standalone client Adds a new function `lychee::check()`, which removes a lot of boilerplate for simple cases. Adjusted the code, tests, and documentation. The downside is that `check` now returns a Result, so we have to use `?` to get to the response. That's because we have to account for the case where the given string is not a valid URI. --- README.md | 27 +++++++++++----- src/bin/lychee/main.rs | 2 +- src/bin/lychee/options.rs | 4 +-- src/client.rs | 67 +++++++++++++++++++++++++-------------- src/client_pool.rs | 6 ++-- src/extract.rs | 20 ++++++------ src/lib.rs | 25 ++++++++++----- src/types.rs | 26 ++++++++++++--- 8 files changed, 118 insertions(+), 59 deletions(-) diff --git a/README.md b/README.md index eecb1ec..3814f04 100644 --- a/README.md +++ b/README.md @@ -205,25 +205,35 @@ ARGS: ## Library usage You can use lychee as a library for your own projects. -Simply add it as a dependency and build your client: +Here is a "hello world" example: ```rust -use lychee::{Request, Input, ClientBuilder, Status}; -use lychee::Uri::Website; -use url::Url; +use std::error::Error; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let response = lychee::check("https://github.com/lycheeverse/lychee").await?; + println!("{}", response); + Ok(()) +} +``` + +This is equivalent to the following snippet, in which we build our own client: + +```rust +use lychee::{ClientBuilder, Status}; use std::error::Error; #[tokio::main] async fn main() -> Result<(), Box> { let client = ClientBuilder::default().build()?; - let url = Url::parse("https://github.com/lycheeverse/lychee")?; - let response = client.check(Request::new(Website(url), Input::Stdin)).await; + let response = client.check("https://github.com/lycheeverse/lychee").await?; assert!(matches!(response.status, Status::Ok(_))); Ok(()) } ``` -The client is very customizable, e.g. +The client builder is very customizable: ```rust let client = lychee::ClientBuilder::default() @@ -242,11 +252,12 @@ let client = lychee::ClientBuilder::default() .build()?; ``` +All options that you set will be used for all link checks. See the [builder documentation](https://docs.rs/lychee/latest/lychee/struct.ClientBuilder.html) for all options. ## GitHub Action usage -GitHub Action is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action) +A GitHub Action that uses lychee is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action) which includes usage instructions. ## Troubleshooting and workarounds diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index 5f8b97b..e6be371 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -77,7 +77,7 @@ fn show_progress(progress_bar: &Option, response: &Response, verbos fn fmt(stats: &ResponseStats, format: &Format) -> Result { Ok(match format { Format::String => stats.to_string(), - Format::JSON => serde_json::to_string_pretty(&stats)?, + Format::Json => serde_json::to_string_pretty(&stats)?, }) } diff --git a/src/bin/lychee/options.rs b/src/bin/lychee/options.rs index 17cb80f..5227d0e 100644 --- a/src/bin/lychee/options.rs +++ b/src/bin/lychee/options.rs @@ -16,7 +16,7 @@ const MAX_REDIRECTS: usize = 10; #[derive(Debug, Deserialize)] pub enum Format { String, - JSON, + Json, } impl FromStr for Format { @@ -24,7 +24,7 @@ impl FromStr for Format { fn from_str(format: &str) -> Result { match format { "string" => Ok(Format::String), - "json" => Ok(Format::JSON), + "json" => Ok(Format::Json), _ => Err(anyhow!("Could not parse format {}", format)), } } diff --git a/src/client.rs b/src/client.rs index 23a9b36..3530dee 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,12 +1,12 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use check_if_email_exists::{check_email, CheckEmailInput}; use derive_builder::Builder; use headers::{HeaderMap, HeaderValue}; use hubcaps::{Credentials, Github}; use regex::{Regex, RegexSet}; use reqwest::header; -use std::net::IpAddr; use std::{collections::HashSet, time::Duration}; +use std::{convert::TryInto, net::IpAddr}; use tokio::time::sleep; use url::Url; @@ -153,30 +153,32 @@ impl ClientBuilder { } impl Client { - pub async fn check(&self, request: Request) -> Response { + pub async fn check>(&self, request: T) -> Result { + let request: Request = match request.try_into() { + Ok(request) => request, + Err(_e) => bail!("Invalid URI:"), + }; if self.excluded(&request) { - return Response::new(request.uri, Status::Excluded, request.source); + return Ok(Response::new(request.uri, Status::Excluded, request.source)); } let status = match request.uri { Uri::Website(ref url) => self.check_website(&url).await, Uri::Mail(ref address) => { - let valid = self.valid_mail(&address).await; - if valid { - // TODO: We should not be using a HTTP status code for mail - Status::Ok(http::StatusCode::OK) - } else { - Status::Error(format!("Invalid mail address: {}", address)) + // TODO: We should not be using a HTTP status code for mail + match self.valid_mail(&address).await { + true => Status::Ok(http::StatusCode::OK), + false => Status::Error(format!("Invalid mail address: {}", address)), } } }; - Response::new(request.uri, status, request.source) + Ok(Response::new(request.uri, status, request.source)) } pub async fn check_website(&self, url: &Url) -> Status { let mut retries: i64 = 3; let mut wait: u64 = 1; let status = loop { - let res = self.check_normal(&url).await; + let res = self.check_default(&url).await; match res.is_success() { true => return res, false => { @@ -216,7 +218,7 @@ impl Client { } } - async fn check_normal(&self, url: &Url) -> Status { + async fn check_default(&self, url: &Url) -> Status { let request = self .reqwest_client .request(self.method.clone(), url.as_str()); @@ -317,6 +319,14 @@ impl Client { } } +/// A convenience function to check a single URI +/// This is the most simple link check and avoids having to create a client manually. +/// For more complex scenarios, look into using the `ClientBuilder` instead. +pub async fn check>(request: T) -> Result { + let client = ClientBuilder::default().build()?; + Ok(client.check(request).await?) +} + #[cfg(test)] mod test { use crate::collector::Input; @@ -366,8 +376,9 @@ mod test { let res = ClientBuilder::default() .build() .unwrap() - .check(website_url(&mock_server.uri())) - .await; + .check(mock_server.uri()) + .await + .unwrap(); assert!(matches!(res.status, Status::Failed(_))); } @@ -385,7 +396,8 @@ mod test { .build() .unwrap() .check(website_url(&mock_server.uri())) - .await; + .await + .unwrap(); let end = start.elapsed(); assert!(matches!(res.status, Status::Failed(_))); @@ -414,6 +426,7 @@ mod test { .unwrap() .check(website_url("https://github.com/lycheeverse/lychee")) .await + .unwrap() .status, Status::Ok(_) )); @@ -424,8 +437,9 @@ mod test { let res = ClientBuilder::default() .build() .unwrap() - .check(website_url("https://github.com/lycheeverse/not-lychee")) + .check("https://github.com/lycheeverse/not-lychee") .await + .unwrap() .status; assert!(matches!(res, Status::Error(_))); } @@ -444,6 +458,7 @@ mod test { .unwrap() .check(website_url(&mock_server.uri())) .await + .unwrap() .status; assert!(matches!(res, Status::Ok(_))); } @@ -453,8 +468,9 @@ mod test { let res = ClientBuilder::default() .build() .unwrap() - .check(website_url("https://expired.badssl.com/")) - .await; + .check("https://expired.badssl.com/") + .await + .unwrap(); assert!(matches!(res.status, Status::Error(_))); // Same, but ignore certificate error @@ -462,8 +478,9 @@ mod test { .allow_insecure(true) .build() .unwrap() - .check(website_url("https://expired.badssl.com/")) - .await; + .check("https://expired.badssl.com/") + .await + .unwrap(); assert!(matches!(res.status, Status::Ok(_))); } @@ -473,7 +490,8 @@ mod test { .build() .unwrap() .check(website_url("https://crates.io/crates/lychee")) - .await; + .await + .unwrap(); assert!(matches!(res.status, Status::Failed(StatusCode::NOT_FOUND))); // Try again, but with a custom header. @@ -486,7 +504,8 @@ mod test { .build() .unwrap() .check(website_url("https://crates.io/crates/lychee")) - .await; + .await + .unwrap(); assert!(matches!(res.status, Status::Ok(_))); } @@ -511,7 +530,7 @@ mod test { .build() .unwrap(); - let resp = client.check(website_url(&mock_server.uri())).await; + let resp = client.check(website_url(&mock_server.uri())).await.unwrap(); assert!(matches!(resp.status, Status::Timeout(_))); } diff --git a/src/client_pool.rs b/src/client_pool.rs index 223baa4..56e58b3 100644 --- a/src/client_pool.rs +++ b/src/client_pool.rs @@ -25,8 +25,10 @@ impl ClientPool { let client = self.pool.get().await; let tx = self.tx.clone(); tokio::spawn(async move { - let resp = client.check(req).await; - tx.send(resp).await.unwrap(); + let resp = client.check(req).await.expect("Invalid URI"); + tx.send(resp) + .await + .expect("Cannot send response to channel"); }); } } diff --git a/src/extract.rs b/src/extract.rs index caf30da..07fe944 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -11,7 +11,7 @@ use url::Url; #[derive(Clone, Debug)] pub enum FileType { - HTML, + Html, Markdown, Plaintext, } @@ -29,7 +29,7 @@ impl> From

for FileType { match path.extension() { Some(ext) => match ext { _ if ext == "md" => FileType::Markdown, - _ if (ext == "htm" || ext == "html") => FileType::HTML, + _ if (ext == "htm" || ext == "html") => FileType::Html, _ => FileType::Plaintext, }, None => FileType::Plaintext, @@ -147,7 +147,7 @@ pub(crate) fn extract_links( ) -> HashSet { let links = match input_content.file_type { FileType::Markdown => extract_links_from_markdown(&input_content.content), - FileType::HTML => extract_links_from_html(&input_content.content), + FileType::Html => extract_links_from_html(&input_content.content), FileType::Plaintext => extract_links_from_plaintext(&input_content.content), }; @@ -234,7 +234,7 @@ mod test { "#; let links: HashSet = extract_links( - &InputContent::from_string(input, FileType::HTML), + &InputContent::from_string(input, FileType::Html), Some(Url::parse("https://github.com/lycheeverse/").unwrap()), ) .into_iter() @@ -305,7 +305,7 @@ mod test { fn test_extract_html5_not_valid_xml() { let input = load_fixture("TEST_HTML5.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -328,7 +328,7 @@ mod test { fn test_extract_html5_not_valid_xml_relative_links() { let input = load_fixture("TEST_HTML5.html"); let links: HashSet = extract_links( - &InputContent::from_string(&input, FileType::HTML), + &InputContent::from_string(&input, FileType::Html), Some(Url::parse("https://example.com").unwrap()), ) .into_iter() @@ -357,7 +357,7 @@ mod test { // this has been problematic with previous XML based parser let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -375,7 +375,7 @@ mod test { // minified HTML with some quirky elements such as href attribute values specified without quotes let input = load_fixture("TEST_HTML5_MINIFIED.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -399,7 +399,7 @@ mod test { // malformed links shouldn't stop the parser from further parsing let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -419,7 +419,7 @@ mod test { // the element name shouldn't matter for attributes like href, src, cite etc let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); diff --git a/src/lib.rs b/src/lib.rs index 32079c1..8345d59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,22 +2,30 @@ /** * `lychee` is a library for checking links. -* The main struct of this crate is `ClientBuilder` which can be used to -* configure and run your own link checker. -* * "Hello world" example: * ``` +* use std::error::Error; * -* use lychee::{Request, Input, ClientBuilder, Status}; -* use lychee::Uri::Website; -* use url::Url; +* #[tokio::main] +* async fn main() -> Result<(), Box> { +* let response = lychee::check("https://github.com/lycheeverse/lychee").await?; +* println!("{}", response); +* Ok(()) +* } +* ``` +* +* For more specific use-cases you can build a lychee client yourself, +* using the `ClientBuilder` which can be used to +* configure and run your own link checker and grants full flexibility: +* +* ``` +* use lychee::{ClientBuilder, Status}; * use std::error::Error; * * #[tokio::main] * async fn main() -> Result<(), Box> { * let client = ClientBuilder::default().build()?; -* let url = Url::parse("https://github.com/lycheeverse/lychee")?; -* let response = client.check(Request::new(Website(url), Input::Stdin)).await; +* let response = client.check("https://github.com/lycheeverse/lychee").await?; * assert!(matches!(response.status, Status::Ok(_))); * Ok(()) * } @@ -33,6 +41,7 @@ pub mod collector; pub mod extract; pub mod test_utils; +pub use client::check; pub use client::ClientBuilder; pub use client_pool::ClientPool; pub use collector::Input; diff --git a/src/types.rs b/src/types.rs index d2040e6..9d75157 100644 --- a/src/types.rs +++ b/src/types.rs @@ -21,18 +21,36 @@ impl Display for Request { } } +impl TryFrom for Request { + type Error = anyhow::Error; + + fn try_from(s: String) -> Result { + let uri = Uri::try_from(s.as_str())?; + Ok(Request::new(uri, Input::String(s))) + } +} + +impl TryFrom<&str> for Request { + type Error = anyhow::Error; + + fn try_from(s: &str) -> Result { + let uri = Uri::try_from(s)?; + Ok(Request::new(uri, Input::String(s.to_owned()))) + } +} + /// Specifies how requests to websites will be made pub(crate) enum RequestMethod { - GET, - HEAD, + Get, + Head, } impl TryFrom for RequestMethod { type Error = anyhow::Error; fn try_from(value: String) -> Result { match value.to_lowercase().as_ref() { - "get" => Ok(RequestMethod::GET), - "head" => Ok(RequestMethod::HEAD), + "get" => Ok(RequestMethod::Get), + "head" => Ok(RequestMethod::Head), _ => Err(anyhow!("Only `get` and `head` allowed, got {}", value)), } } From fe5cea1de355dd8bef59486860e4fd68ee2c7672 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Thu, 18 Feb 2021 11:14:00 +0100 Subject: [PATCH 2/8] Add doc-comment tests --- Cargo.lock | 1 + Cargo.toml | 1 + README.md | 12 ++++++------ src/lib.rs | 8 ++++++++ tests/{readme.rs => usage.rs} | 4 ++-- 5 files changed, 18 insertions(+), 8 deletions(-) rename tests/{readme.rs => usage.rs} (95%) diff --git a/Cargo.lock b/Cargo.lock index 39b84ea..2ab6e02 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1399,6 +1399,7 @@ dependencies = [ "check-if-email-exists", "deadpool", "derive_builder", + "doc-comment", "futures", "glob", "headers", diff --git a/Cargo.toml b/Cargo.toml index aeab718..3bb241c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ assert_cmd = "1.0.3" predicates = "1.0.7" uuid = { version = "0.8.2", features = ["v4"] } tempfile = "3.2.0" +doc-comment = "0.3.3" [features] vendored-openssl = ["openssl-sys/vendored"] diff --git a/README.md b/README.md index 3814f04..f6c53d6 100644 --- a/README.md +++ b/README.md @@ -89,13 +89,13 @@ You can run lychee directly from the commandline. #### Using cargo -``` +```sh cargo install lychee ``` #### Using the official Docker image -``` +```sh docker pull lycheeverse/lychee ``` @@ -108,13 +108,13 @@ You can download them from the [releases page](https://github.com/lycheeverse/ly Run it inside a repository with a `README.md`: -``` +```sh lychee ``` You can also specify various types of inputs: -``` +```sh # check links on a website: lychee https://endler.dev/ @@ -149,7 +149,7 @@ token with no extra permissions is enough to be able to check public repos links There is an extensive list of commandline parameters to customize the behavior, see below for a full list. -``` +```sh USAGE: lychee [FLAGS] [OPTIONS] [--] [inputs]... @@ -235,7 +235,7 @@ async fn main() -> Result<(), Box> { The client builder is very customizable: -```rust +```rust,ignore let client = lychee::ClientBuilder::default() .includes(includes) .excludes(excludes) diff --git a/src/lib.rs b/src/lib.rs index 8345d59..6cd698e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,6 +31,14 @@ * } * ``` */ + +#[cfg(doctest)] +#[macro_use] +extern crate doc_comment; + +#[cfg(doctest)] +doctest!("../README.md"); + mod client; mod client_pool; mod excludes; diff --git a/tests/readme.rs b/tests/usage.rs similarity index 95% rename from tests/readme.rs rename to tests/usage.rs index 7861dc4..9455dd9 100644 --- a/tests/readme.rs +++ b/tests/usage.rs @@ -41,13 +41,13 @@ mod readme { .expect("Invalid utf8 output for `--help`"); let readme = load_readme_text(); - const BACKTICKS_OFFSET: usize = 3; + const BACKTICKS_OFFSET: usize = 5; // marker: ```sh const NEWLINE_OFFSET: usize = 1; let usage_start = BACKTICKS_OFFSET + NEWLINE_OFFSET + readme - .find("```\nUSAGE:\n") + .find("```sh\nUSAGE:\n") .expect("Couldn't find USAGE section in README.md"); let usage_end = readme[usage_start..] From 5226cd10aac78e9fd9bf7fb0b14941e2328c065a Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Thu, 18 Feb 2021 21:42:00 +0100 Subject: [PATCH 3/8] Change exclude behavior to make includes more intuitive Previously an include alone would not mean that only the included patterns were checked. Only if an exclude was given as well, the includes would make a difference. Now, the includes on their own will work as expected. Moved the exlude methods into the Exclude mod. Also changed the order of exclude tests to do the fast lookup ones before the regex ones. Added tests to guarantee behavior in the future. --- src/client.rs | 101 ++++++++++++++++++++++++++---------------------- src/excludes.rs | 50 ++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 46 deletions(-) diff --git a/src/client.rs b/src/client.rs index 3530dee..8ef305c 100644 --- a/src/client.rs +++ b/src/client.rs @@ -5,8 +5,8 @@ use headers::{HeaderMap, HeaderValue}; use hubcaps::{Credentials, Github}; use regex::{Regex, RegexSet}; use reqwest::header; +use std::convert::TryInto; use std::{collections::HashSet, time::Duration}; -use std::{convert::TryInto, net::IpAddr}; use tokio::time::sleep; use url::Url; @@ -253,63 +253,29 @@ impl Client { } } - fn in_regex_excludes(&self, input: &str) -> bool { - if let Some(excludes) = &self.excludes.regex { - if excludes.is_match(input) { - return true; - } - } - false - } - - fn in_ip_excludes(&self, uri: &Uri) -> bool { - if let Some(ipaddr) = uri.host_ip() { - if self.excludes.loopback_ips && ipaddr.is_loopback() { - return true; - } - - // Note: in a pathological case, an IPv6 address can be IPv4-mapped - // (IPv4 address embedded in a IPv6). We purposefully - // don't deal with it here, and assume if an address is IPv6, - // we shouldn't attempt to map it to IPv4. - // See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2 - if let IpAddr::V4(v4addr) = ipaddr { - if self.excludes.private_ips && v4addr.is_private() { - return true; - } - if self.excludes.link_local_ips && v4addr.is_link_local() { - return true; - } - } - } - - false - } - - pub fn is_mail_excluded(&self) -> bool { - self.excludes.mail - } - pub fn excluded(&self, request: &Request) -> bool { + if matches!(request.uri, Uri::Mail(_)) && self.excludes.is_mail_excluded() { + return true; + } + if self.excludes.ip(&request.uri) { + return true; + } if let Some(includes) = &self.includes { + if includes.is_empty() { + return false; + } if includes.is_match(request.uri.as_str()) { // Includes take precedence over excludes return false; } else { // In case we have includes and no excludes, // skip everything that was not included - if self.excludes.regex.is_none() { + if self.excludes.is_empty() { return true; } } } - if self.in_regex_excludes(request.uri.as_str()) { - return true; - } - if matches!(request.uri, Uri::Mail(_)) { - return self.is_mail_excluded(); - } - if self.in_ip_excludes(&request.uri) { + if self.excludes.regex(request.uri.as_str()) { return true; } if self.scheme.is_none() { @@ -550,6 +516,49 @@ mod test { ); } + #[tokio::test] + async fn test_includes_and_excludes_empty() { + // This is the pre-configured, empty set of excludes for a client + // In this case, only the requests matching the include set will be checked + let exclude = Some(RegexSet::empty()); + let includes = RegexSet::empty(); + + let client = ClientBuilder::default() + .includes(includes) + .excludes(exclude) + .build() + .unwrap(); + + assert_eq!( + client.excluded(&website_url("https://foo.github.com")), + false + ); + } + + #[tokio::test] + async fn test_include_with_empty_exclude() { + // This is the pre-configured, empty set of excludes for a client + // In this case, only the requests matching the include set will be checked + let exclude = Some(RegexSet::empty()); + let includes = RegexSet::new(&[r"foo.github.com"]).unwrap(); + + let client = ClientBuilder::default() + .includes(includes) + .excludes(exclude) + .build() + .unwrap(); + + assert_eq!( + client.excluded(&website_url("https://foo.github.com")), + false + ); + assert_eq!(client.excluded(&website_url("https://github.com")), true); + assert_eq!( + client.excluded(&website_url("https://bar.github.com")), + true + ); + } + #[tokio::test] async fn test_exclude_include_regex() { let exclude = Some(RegexSet::new(&[r"github.com"]).unwrap()); diff --git a/src/excludes.rs b/src/excludes.rs index a9692c1..2d981cb 100644 --- a/src/excludes.rs +++ b/src/excludes.rs @@ -1,5 +1,9 @@ +use std::net::IpAddr; + use regex::RegexSet; +use crate::Uri; + /// Exclude configuration for the link checker. /// You can ignore links based on regex patterns or pre-defined IP ranges. #[derive(Clone, Debug)] @@ -27,3 +31,49 @@ impl Default for Excludes { } } } + +impl Excludes { + pub fn regex(&self, input: &str) -> bool { + if let Some(excludes) = &self.regex { + if excludes.is_match(input) { + return true; + } + } + false + } + + pub fn ip(&self, uri: &Uri) -> bool { + if let Some(ipaddr) = uri.host_ip() { + if self.loopback_ips && ipaddr.is_loopback() { + return true; + } + + // Note: in a pathological case, an IPv6 address can be IPv4-mapped + // (IPv4 address embedded in a IPv6). We purposefully + // don't deal with it here, and assume if an address is IPv6, + // we shouldn't attempt to map it to IPv4. + // See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2 + if let IpAddr::V4(v4addr) = ipaddr { + if self.private_ips && v4addr.is_private() { + return true; + } + if self.link_local_ips && v4addr.is_link_local() { + return true; + } + } + } + + false + } + + pub fn is_mail_excluded(&self) -> bool { + self.mail + } + + pub fn is_empty(&self) -> bool { + match &self.regex { + None => true, + Some(regex_set) => regex_set.is_empty(), + } + } +} From 59c6093c8a5b62af2946803ada46b8cde54f8045 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Thu, 18 Feb 2021 23:33:14 +0100 Subject: [PATCH 4/8] Formatting --- src/bin/lychee/main.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index e6be371..65b87be 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -49,7 +49,8 @@ fn run_main() -> Result { let runtime = match cfg.threads { Some(threads) => { - // We define our own runtime instead of the `tokio::main` attribute since we want to make the number of threads configurable + // We define our own runtime instead of the `tokio::main` attribute + // since we want to make the number of threads configurable tokio::runtime::Builder::new_multi_thread() .worker_threads(threads) .enable_all() @@ -62,13 +63,14 @@ fn run_main() -> Result { } fn show_progress(progress_bar: &Option, response: &Response, verbose: bool) { - if (response.status.is_success() || response.status.is_excluded()) && !verbose { - return; - } - // Regular println! interferes with progress bar if let Some(pb) = progress_bar { pb.inc(1); - pb.println(response.to_string()); + pb.set_message(&response.to_string()); + // pb.println(response.to_string()); + return; + } + if (response.status.is_success() || response.status.is_excluded()) && !verbose { + return; } else { println!("{}", response); } @@ -122,14 +124,16 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { .await?; let pb = if cfg.progress { - Some( - ProgressBar::new(links.len() as u64) + let bar = ProgressBar::new(links.len() as u64) .with_style( ProgressStyle::default_bar() - .template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta}) {wide_msg}") - .progress_chars("#>-") - ) - ) + .template("{spinner:.blue.bright} Checking:.bold {bar:40} {wide_msg:.dim}") + // .template("{prefix:.bold.dim} {spinner} {msg}") + // .template("{spinner:.green} [{elapsed_precise}] [{bar:40.yellow/green}] {pos}/{len} ({eta}) {wide_message}") + // .progress_chars("#>-"), + ); + bar.enable_steady_tick(100); + Some(bar) } else { None }; From 69e3b2cffc0fbbd05cd3bc1d30735e0f994ca62f Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Thu, 18 Feb 2021 23:33:26 +0100 Subject: [PATCH 5/8] Formatting --- src/bin/lychee/stats.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/lychee/stats.rs b/src/bin/lychee/stats.rs index 6de2565..6994e69 100644 --- a/src/bin/lychee/stats.rs +++ b/src/bin/lychee/stats.rs @@ -76,7 +76,7 @@ fn write_stat(f: &mut fmt::Formatter, title: &str, stat: usize) -> fmt::Result { impl Display for ResponseStats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let separator = "-".repeat(MAX_PADDING); + let separator = "-".repeat(MAX_PADDING+1); writeln!(f, "📝 Summary")?; writeln!(f, "{}", separator)?; From 1a95aa4a446092aa063eaf7367a2a439e482562f Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Fri, 19 Feb 2021 00:05:14 +0100 Subject: [PATCH 6/8] Format --- src/bin/lychee/stats.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/lychee/stats.rs b/src/bin/lychee/stats.rs index 6994e69..de53eaa 100644 --- a/src/bin/lychee/stats.rs +++ b/src/bin/lychee/stats.rs @@ -76,7 +76,7 @@ fn write_stat(f: &mut fmt::Formatter, title: &str, stat: usize) -> fmt::Result { impl Display for ResponseStats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let separator = "-".repeat(MAX_PADDING+1); + let separator = "-".repeat(MAX_PADDING + 1); writeln!(f, "📝 Summary")?; writeln!(f, "{}", separator)?; From a4b1609424ebd39f9b53f7d9be94400146e5a583 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Fri, 19 Feb 2021 00:23:35 +0100 Subject: [PATCH 7/8] Improve progress visualization --- src/bin/lychee/main.rs | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index 65b87be..32fa30d 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -66,12 +66,13 @@ fn show_progress(progress_bar: &Option, response: &Response, verbos if let Some(pb) = progress_bar { pb.inc(1); pb.set_message(&response.to_string()); - // pb.println(response.to_string()); - return; - } - if (response.status.is_success() || response.status.is_excluded()) && !verbose { - return; + if verbose { + pb.println(response.to_string()); + } } else { + if (response.status.is_success() || response.status.is_excluded()) && !verbose { + return; + } println!("{}", response); } } @@ -124,14 +125,10 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { .await?; let pb = if cfg.progress { - let bar = ProgressBar::new(links.len() as u64) - .with_style( - ProgressStyle::default_bar() - .template("{spinner:.blue.bright} Checking:.bold {bar:40} {wide_msg:.dim}") - // .template("{prefix:.bold.dim} {spinner} {msg}") - // .template("{spinner:.green} [{elapsed_precise}] [{bar:40.yellow/green}] {pos}/{len} ({eta}) {wide_message}") - // .progress_chars("#>-"), - ); + let bar = + ProgressBar::new(links.len() as u64).with_style(ProgressStyle::default_bar().template( + "{spinner:.red.bright} {pos}/{len:.dim} [{elapsed_precise}] {bar:25} {wide_msg}", + )); bar.enable_steady_tick(100); Some(bar) } else { @@ -168,7 +165,7 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { // Note that print statements may interfere with the progress bar, so this // must go before printing the stats if let Some(pb) = &pb { - pb.finish_and_clear(); + pb.finish_with_message("Done"); } let stats_formatted = fmt(&stats, &cfg.format)?; From 678dc4f18eb5dc301862bafff54c94eede380fe6 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Fri, 19 Feb 2021 00:41:49 +0100 Subject: [PATCH 8/8] Add support for colored output --- Cargo.lock | 1 + Cargo.toml | 1 + src/bin/lychee/main.rs | 22 +++++++++++++++++++--- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ab6e02..0bd0e12 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1397,6 +1397,7 @@ dependencies = [ "anyhow", "assert_cmd", "check-if-email-exists", + "console", "deadpool", "derive_builder", "doc-comment", diff --git a/Cargo.toml b/Cargo.toml index 3bb241c..3f7edb6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,6 +52,7 @@ serde_json = "1.0.62" # https://github.com/Homebrew/homebrew-core/pull/70216 ring = "0.16.19" pad = "0.1.6" +console = "0.14.0" [dependencies.reqwest] features = ["gzip"] diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index 32fa30d..870890d 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -1,4 +1,5 @@ use anyhow::{anyhow, Context, Result}; +use console::style; use headers::authorization::Basic; use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName}; use indicatif::{ProgressBar, ProgressStyle}; @@ -15,7 +16,10 @@ mod stats; use crate::options::{Config, LycheeOptions}; use crate::stats::ResponseStats; -use lychee::collector::{self, Input}; +use lychee::{ + collector::{self, Input}, + Status, +}; use lychee::{ClientBuilder, ClientPool, Response}; /// A C-like enum that can be cast to `i32` and used as process exit code. @@ -62,18 +66,30 @@ fn run_main() -> Result { runtime.block_on(run(cfg, opts.inputs())) } +fn color_response(response: &Response) -> String { + let out = match response.status { + Status::Ok(_) => style(response).green().bright(), + Status::Redirected(_) => style(response), + Status::Excluded => style(response).dim(), + Status::Error(_) => style(response).yellow().bright(), + Status::Timeout(_) => style(response).yellow().bright(), + Status::Failed(_) => style(response).red().bright(), + }; + out.to_string() +} + fn show_progress(progress_bar: &Option, response: &Response, verbose: bool) { if let Some(pb) = progress_bar { pb.inc(1); pb.set_message(&response.to_string()); if verbose { - pb.println(response.to_string()); + pb.println(color_response(response)); } } else { if (response.status.is_success() || response.status.is_excluded()) && !verbose { return; } - println!("{}", response); + println!("{}", color_response(response)); } }