diff --git a/Cargo.lock b/Cargo.lock index 272cbc5..39b84ea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1392,7 +1392,7 @@ dependencies = [ [[package]] name = "lychee" -version = "0.5.0" +version = "0.6.0" dependencies = [ "anyhow", "assert_cmd", @@ -1411,6 +1411,7 @@ dependencies = [ "markup5ever", "markup5ever_rcdom", "openssl-sys", + "pad", "predicates", "pulldown-cmark", "regex", @@ -1690,6 +1691,15 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "pad" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3" +dependencies = [ + "unicode-width", +] + [[package]] name = "parking" version = "2.0.0" diff --git a/Cargo.toml b/Cargo.toml index eb6134d..aeab718 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ keywords = [ license = "Apache-2.0/MIT" name = "lychee" repository = "https://github.com/lycheeverse/lychee" -version = "0.5.0" +version = "0.6.0" [dependencies] anyhow = "1.0.38" @@ -51,6 +51,7 @@ serde_json = "1.0.62" # This is necessary for the homebrew build # https://github.com/Homebrew/homebrew-core/pull/70216 ring = "0.16.19" +pad = "0.1.6" [dependencies.reqwest] features = ["gzip"] diff --git a/README.md b/README.md index 9396033..eecb1ec 100644 --- a/README.md +++ b/README.md @@ -174,7 +174,7 @@ OPTIONS: --basic-auth Basic authentication support. E.g. `username:password` -c, --config Configuration file to use [default: ./lychee.toml] --exclude ... Exclude URLs from checking (supports regex) - -f, --format Output file format of status report [default: string] + -f, --format Output file format of status report (json, string) [default: string] --github-token GitHub API token to use when checking github.com links, to avoid rate limiting [env: GITHUB_TOKEN=] -h, --headers ... Custom request headers @@ -187,7 +187,7 @@ OPTIONS: -T, --threads Number of threads to utilize. Defaults to number of cores available to the system -t, --timeout Website timeout from connect to response finished [default: 20] - -u, --user-agent User agent [default: lychee/0.5.0] + -u, --user-agent User agent [default: lychee/0.6.0] ARGS: ... The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs @@ -208,12 +208,19 @@ You can use lychee as a library for your own projects. Simply add it as a dependency and build your client: ```rust -use http::StatusCode +use lychee::{Request, Input, ClientBuilder, Status}; +use lychee::Uri::Website; +use url::Url; +use std::error::Error; -let client = lychee::ClientBuilder::default().build()?; -let url = Url::parse("https://github.com/lycheeverse/lychee")?; -let response = client.check(Website(url)).await?; -assert!(matches!(response.status, Status::Ok(_))); +#[tokio::main] +async fn main() -> Result<(), Box> { + let client = ClientBuilder::default().build()?; + let url = Url::parse("https://github.com/lycheeverse/lychee")?; + let response = client.check(Request::new(Website(url), Input::Stdin)).await; + assert!(matches!(response.status, Status::Ok(_))); + Ok(()) +} ``` The client is very customizable, e.g. diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index 6ed843f..5f8b97b 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -16,7 +16,7 @@ use crate::options::{Config, LycheeOptions}; use crate::stats::ResponseStats; use lychee::collector::{self, Input}; -use lychee::{ClientBuilder, ClientPool, Response, Status}; +use lychee::{ClientBuilder, ClientPool, Response}; /// A C-like enum that can be cast to `i32` and used as process exit code. enum ExitCode { @@ -62,22 +62,22 @@ fn run_main() -> Result { } fn show_progress(progress_bar: &Option, response: &Response, verbose: bool) { - let message = status_message(&response, verbose); + if (response.status.is_success() || response.status.is_excluded()) && !verbose { + return; + } + // Regular println! interferes with progress bar if let Some(pb) = progress_bar { pb.inc(1); - // regular println! interferes with progress bar - if let Some(message) = message { - pb.println(message); - } - } else if let Some(message) = message { - println!("{}", message); - }; + pb.println(response.to_string()); + } else { + println!("{}", response); + } } fn fmt(stats: &ResponseStats, format: &Format) -> Result { Ok(match format { Format::String => stats.to_string(), - Format::JSON => serde_json::to_string(&stats)?, + Format::JSON => serde_json::to_string_pretty(&stats)?, }) } @@ -120,6 +120,7 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { max_concurrency, ) .await?; + let pb = if cfg.progress { Some( ProgressBar::new(links.len() as u64) @@ -166,13 +167,11 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { pb.finish_and_clear(); } - if cfg.verbose { - println!("\n{}", stats); - } - + let stats_formatted = fmt(&stats, &cfg.format)?; if let Some(output) = &cfg.output { - fs::write(output, fmt(&stats, &cfg.format)?) - .context("Cannot write status output to file")?; + fs::write(output, stats_formatted).context("Cannot write status output to file")?; + } else { + println!("\n{}", stats_formatted); } match stats.is_success() { @@ -228,18 +227,6 @@ fn parse_basic_auth(auth: &str) -> Result> { Ok(Authorization::basic(params[0], params[1])) } -fn status_message(response: &Response, verbose: bool) -> Option { - match &response.status { - Status::Ok(code) if verbose => Some(format!("✅ {} [{}]", response.uri, code)), - Status::Redirected if verbose => Some(format!("🔀️ {}", response.uri)), - Status::Excluded if verbose => Some(format!("👻 {}", response.uri)), - Status::Failed(code) => Some(format!("🚫 {} [{}]", response.uri, code)), - Status::Error(e) => Some(format!("⚡ {} ({})", response.uri, e)), - Status::Timeout => Some(format!("⌛ {}", response.uri)), - _ => None, - } -} - #[cfg(test)] mod test { use super::*; diff --git a/src/bin/lychee/options.rs b/src/bin/lychee/options.rs index 84a6e96..17cb80f 100644 --- a/src/bin/lychee/options.rs +++ b/src/bin/lychee/options.rs @@ -239,7 +239,7 @@ pub struct Config { #[serde(default)] pub output: Option, - /// Output file format of status report + /// Output file format of status report (json, string) #[structopt(short, long, default_value = "string")] #[serde(default)] pub format: Format, diff --git a/src/bin/lychee/stats.rs b/src/bin/lychee/stats.rs index 1525c0c..6de2565 100644 --- a/src/bin/lychee/stats.rs +++ b/src/bin/lychee/stats.rs @@ -1,65 +1,153 @@ -use serde::{Deserialize, Serialize}; +use pad::{Alignment, PadStr}; +use serde::Serialize; use std::{ - collections::HashSet, + collections::{HashMap, HashSet}, fmt::{self, Display}, }; -use lychee::{Response, Status::*, Uri}; +use lychee::{collector::Input, Response, Status::*}; -#[derive(Serialize, Deserialize)] +// Maximum padding for each entry in the final statistics output +const MAX_PADDING: usize = 20; + +#[derive(Serialize)] pub struct ResponseStats { total: usize, successful: usize, - failures: HashSet, - timeouts: HashSet, - redirects: HashSet, - excludes: HashSet, - errors: HashSet, + failures: usize, + timeouts: usize, + redirects: usize, + excludes: usize, + errors: usize, + fail_map: HashMap>, } impl ResponseStats { pub fn new() -> Self { + let fail_map = HashMap::new(); ResponseStats { total: 0, successful: 0, - failures: HashSet::new(), - timeouts: HashSet::new(), - redirects: HashSet::new(), - excludes: HashSet::new(), - errors: HashSet::new(), + failures: 0, + timeouts: 0, + redirects: 0, + excludes: 0, + errors: 0, + fail_map, } } pub fn add(&mut self, response: Response) { self.total += 1; - let uri = response.uri; - if !match response.status { - Failed(_) => self.failures.insert(uri), - Timeout => self.timeouts.insert(uri), - Redirected => self.redirects.insert(uri), - Excluded => self.excludes.insert(uri), - Error(_) => self.errors.insert(uri), - _ => false, - } { - self.successful += 1; + match response.status { + Failed(_) => self.failures += 1, + Timeout(_) => self.timeouts += 1, + Redirected(_) => self.redirects += 1, + Excluded => self.excludes += 1, + Error(_) => self.errors += 1, + _ => self.successful += 1, } + + if matches!( + response.status, + Failed(_) | Timeout(_) | Redirected(_) | Error(_) + ) { + let fail = self.fail_map.entry(response.source.clone()).or_default(); + fail.insert(response); + }; } pub fn is_success(&self) -> bool { - self.total == self.successful + self.excludes.len() + self.total == self.successful + self.excludes } } +fn write_stat(f: &mut fmt::Formatter, title: &str, stat: usize) -> fmt::Result { + let fill = title.chars().count(); + f.write_str(title)?; + f.write_str( + &stat + .to_string() + .pad(MAX_PADDING - fill, '.', Alignment::Right, false), + )?; + f.write_str("\n") +} + impl Display for ResponseStats { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let separator = "-".repeat(MAX_PADDING); + writeln!(f, "📝 Summary")?; - writeln!(f, "-------------------")?; - writeln!(f, "🔍 Total: {}", self.total)?; - writeln!(f, "✅ Successful: {}", self.successful)?; - writeln!(f, "⏳ Timeouts: {}", self.timeouts.len())?; - writeln!(f, "🔀 Redirected: {}", self.redirects.len())?; - writeln!(f, "👻 Excluded: {}", self.excludes.len())?; - writeln!(f, "🚫 Errors: {}", self.errors.len() + self.failures.len()) + writeln!(f, "{}", separator)?; + write_stat(f, "🔍 Total", self.total)?; + write_stat(f, "✅ Successful", self.successful)?; + write_stat(f, "⏳ Timeouts", self.timeouts)?; + write_stat(f, "🔀 Redirected", self.redirects)?; + write_stat(f, "👻 Excluded", self.excludes)?; + write_stat(f, "🚫 Errors", self.errors + self.failures)?; + + if !&self.fail_map.is_empty() { + writeln!(f)?; + } + for (input, responses) in &self.fail_map { + writeln!(f, "Input: {}", input)?; + for response in responses { + writeln!( + f, + " {} {}\n {}", + response.status.icon(), + response.uri, + response.status + )? + } + } + writeln!(f) + } +} + +#[cfg(test)] +mod test_super { + use lychee::{test_utils::website, Status}; + + use super::*; + + #[test] + fn test_stats() { + let mut stats = ResponseStats::new(); + stats.add(Response { + uri: website("http://example.com/ok"), + status: Status::Ok(http::StatusCode::OK), + source: Input::Stdin, + }); + stats.add(Response { + uri: website("http://example.com/failed"), + status: Status::Failed(http::StatusCode::BAD_GATEWAY), + source: Input::Stdin, + }); + stats.add(Response { + uri: website("http://example.com/redirect"), + status: Status::Redirected(http::StatusCode::PERMANENT_REDIRECT), + source: Input::Stdin, + }); + let mut expected_map = HashMap::new(); + expected_map.insert( + Input::Stdin, + vec![ + Response { + uri: website("http://example.com/failed"), + status: Status::Failed(http::StatusCode::BAD_GATEWAY), + source: Input::Stdin, + }, + Response { + uri: website("http://example.com/redirect"), + status: Status::Redirected(http::StatusCode::PERMANENT_REDIRECT), + source: Input::Stdin, + }, + ] + .into_iter() + .collect::>(), + ); + assert_eq!(stats.fail_map, expected_map); } } diff --git a/src/client.rs b/src/client.rs index 71db2ea..23a9b36 100644 --- a/src/client.rs +++ b/src/client.rs @@ -10,9 +10,9 @@ use std::{collections::HashSet, time::Duration}; use tokio::time::sleep; use url::Url; -use crate::excludes::Excludes; use crate::types::{Response, Status}; use crate::uri::Uri; +use crate::{excludes::Excludes, Request}; const VERSION: &str = env!("CARGO_PKG_VERSION"); const DEFAULT_MAX_REDIRECTS: usize = 5; @@ -153,6 +153,52 @@ impl ClientBuilder { } impl Client { + pub async fn check(&self, request: Request) -> Response { + if self.excluded(&request) { + return Response::new(request.uri, Status::Excluded, request.source); + } + let status = match request.uri { + Uri::Website(ref url) => self.check_website(&url).await, + Uri::Mail(ref address) => { + let valid = self.valid_mail(&address).await; + if valid { + // TODO: We should not be using a HTTP status code for mail + Status::Ok(http::StatusCode::OK) + } else { + Status::Error(format!("Invalid mail address: {}", address)) + } + } + }; + Response::new(request.uri, status, request.source) + } + + pub async fn check_website(&self, url: &Url) -> Status { + let mut retries: i64 = 3; + let mut wait: u64 = 1; + let status = loop { + let res = self.check_normal(&url).await; + match res.is_success() { + true => return res, + false => { + if retries > 0 { + retries -= 1; + sleep(Duration::from_secs(wait)).await; + wait *= 2; + } else { + break res; + } + } + } + }; + // Pull out the heavy weapons in case of a failed normal request. + // This could be a Github URL and we run into the rate limiter. + if let Ok((owner, repo)) = self.extract_github(url.as_str()) { + return self.check_github(owner, repo).await; + } + + status + } + async fn check_github(&self, owner: String, repo: String) -> Status { match &self.github { Some(github) => { @@ -189,33 +235,6 @@ impl Client { Ok((owner.as_str().into(), repo.as_str().into())) } - pub async fn check_real(&self, url: &Url) -> Status { - let mut retries: i64 = 3; - let mut wait: u64 = 1; - let status = loop { - let res = self.check_normal(&url).await; - match res.is_success() { - true => return res, - false => { - if retries > 0 { - retries -= 1; - sleep(Duration::from_secs(wait)).await; - wait *= 2; - } else { - break res; - } - } - } - }; - // Pull out the heavy weapons in case of a failed normal request. - // This could be a Github URL and we run into the rate limiter. - if let Ok((owner, repo)) = self.extract_github(url.as_str()) { - return self.check_github(owner, repo).await; - } - - status - } - pub async fn valid_mail(&self, address: &str) -> bool { let input = CheckEmailInput::new(vec![address.to_string()]); let results = check_email(&input).await; @@ -269,9 +288,9 @@ impl Client { self.excludes.mail } - pub fn excluded(&self, uri: &Uri) -> bool { + pub fn excluded(&self, request: &Request) -> bool { if let Some(includes) = &self.includes { - if includes.is_match(uri.as_str()) { + if includes.is_match(request.uri.as_str()) { // Includes take precedence over excludes return false; } else { @@ -282,43 +301,26 @@ impl Client { } } } - if self.in_regex_excludes(uri.as_str()) { + if self.in_regex_excludes(request.uri.as_str()) { return true; } - if matches!(uri, Uri::Mail(_)) { + if matches!(request.uri, Uri::Mail(_)) { return self.is_mail_excluded(); } - if self.in_ip_excludes(&uri) { + if self.in_ip_excludes(&request.uri) { return true; } if self.scheme.is_none() { return false; } - uri.scheme() != self.scheme - } - - pub async fn check(&self, uri: Uri) -> Response { - if self.excluded(&uri) { - return Response::new(uri, Status::Excluded); - } - let status = match uri { - Uri::Website(ref url) => self.check_real(&url).await, - Uri::Mail(ref address) => { - let valid = self.valid_mail(&address).await; - if valid { - // TODO: We should not be using a HTTP status code for mail - Status::Ok(http::StatusCode::OK) - } else { - Status::Error(format!("Invalid mail address: {}", address)) - } - } - }; - Response::new(uri, status) + request.uri.scheme() != self.scheme } } #[cfg(test)] mod test { + use crate::collector::Input; + use super::*; use http::StatusCode; use std::time::{Duration, Instant}; @@ -345,8 +347,11 @@ mod test { const V6_MAPPED_V4_PRIVATE_CLASS_A: &str = "http://[::ffff:10.0.0.1]"; const V6_MAPPED_V4_LINK_LOCAL: &str = "http://[::ffff:169.254.0.1]"; - fn website_url(s: &str) -> Uri { - Uri::Website(Url::parse(s).expect("Expected valid Website URI")) + fn website_url(s: &str) -> Request { + Request::new( + Uri::Website(Url::parse(s).expect("Expected valid Website URI")), + Input::Stdin, + ) } #[tokio::test] @@ -507,7 +512,7 @@ mod test { .unwrap(); let resp = client.check(website_url(&mock_server.uri())).await; - assert!(matches!(resp.status, Status::Timeout)); + assert!(matches!(resp.status, Status::Timeout(_))); } #[tokio::test] @@ -558,11 +563,17 @@ mod test { assert_eq!(client.excluded(&website_url("http://github.com")), true); assert_eq!(client.excluded(&website_url("http://exclude.org")), true); assert_eq!( - client.excluded(&Uri::Mail("mail@example.com".to_string())), + client.excluded(&Request::new( + Uri::Mail("mail@example.com".to_string()), + Input::Stdin, + )), true ); assert_eq!( - client.excluded(&Uri::Mail("foo@bar.dev".to_string())), + client.excluded(&Request::new( + Uri::Mail("foo@bar.dev".to_string()), + Input::Stdin, + )), false ); } diff --git a/src/client_pool.rs b/src/client_pool.rs index 3770cb6..223baa4 100644 --- a/src/client_pool.rs +++ b/src/client_pool.rs @@ -2,19 +2,18 @@ use client::Client; use deadpool::unmanaged::Pool; use tokio::sync::mpsc; -use crate::uri; use crate::{client, types}; pub struct ClientPool { tx: mpsc::Sender, - rx: mpsc::Receiver, + rx: mpsc::Receiver, pool: deadpool::unmanaged::Pool, } impl ClientPool { pub fn new( tx: mpsc::Sender, - rx: mpsc::Receiver, + rx: mpsc::Receiver, clients: Vec, ) -> Self { let pool = Pool::from(clients); diff --git a/src/collector.rs b/src/collector.rs index 2a7a285..0889179 100644 --- a/src/collector.rs +++ b/src/collector.rs @@ -1,18 +1,21 @@ -use crate::extract::{extract_links, FileType}; -use crate::uri::Uri; +use crate::{ + extract::{extract_links, FileType}, + Request, +}; use anyhow::{anyhow, Context, Result}; use glob::glob_with; use reqwest::Url; +use serde::Serialize; use shellexpand::tilde; -use std::collections::HashSet; use std::path::Path; use std::path::PathBuf; +use std::{collections::HashSet, fmt::Display}; use tokio::fs::read_to_string; use tokio::io::{stdin, AsyncReadExt}; const STDIN: &str = "-"; -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[non_exhaustive] pub enum Input { RemoteUrl(Url), @@ -22,6 +25,40 @@ pub enum Input { String(String), } +impl Serialize for Input { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + serializer.collect_str(self) + } +} + +impl Display for Input { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Input::RemoteUrl(url) => { + write!(f, "{}", url) + } + Input::FsGlob { + pattern, + ignore_case: _, + } => { + write!(f, "{}", pattern) + } + Input::FsPath(path) => { + write!(f, "{}", path.to_str().unwrap_or_default()) + } + Input::Stdin => { + write!(f, "stdin") + } + Input::String(_) => { + write!(f, "raw input string") + } + } + } +} + #[derive(Debug)] pub struct InputContent { pub input: Input, @@ -157,18 +194,6 @@ impl Input { } } -impl ToString for Input { - fn to_string(&self) -> String { - match self { - Self::RemoteUrl(url) => url.to_string(), - Self::FsGlob { pattern, .. } => pattern.clone(), - Self::FsPath(p) => p.to_str().unwrap_or_default().to_owned(), - Self::Stdin => STDIN.to_owned(), - Self::String(s) => s.clone(), - } - } -} - /// Fetch all unique links from a slice of inputs /// All relative URLs get prefixed with `base_url` if given. pub async fn collect_links( @@ -176,7 +201,7 @@ pub async fn collect_links( base_url: Option, skip_missing_inputs: bool, max_concurrency: usize, -) -> Result> { +) -> Result> { let base_url = match base_url { Some(url) => Some(Url::parse(&url)?), _ => None, @@ -213,7 +238,7 @@ pub async fn collect_links( // instead of building a HashSet with all links. // This optimization would speed up cases where there's // a lot of inputs and/or the inputs are large (e.g. big files). - let mut collected_links = HashSet::new(); + let mut collected_links: HashSet = HashSet::new(); for handle in extract_links_handles { let links = handle.await?; @@ -226,7 +251,10 @@ pub async fn collect_links( #[cfg(test)] mod test { use super::*; - use crate::test_utils::get_mock_server_with_content; + use crate::{ + test_utils::{get_mock_server_with_content, website}, + Uri, + }; use std::fs::File; use std::io::Write; use std::str::FromStr; @@ -264,13 +292,17 @@ mod test { }, ]; - let links = collect_links(&inputs, None, false, 8).await?; + let responses = collect_links(&inputs, None, false, 8).await?; + let links = responses + .into_iter() + .map(|r| r.uri) + .collect::>(); - let mut expected_links = HashSet::new(); - expected_links.insert(Uri::Website(Url::from_str(TEST_STRING)?)); - expected_links.insert(Uri::Website(Url::from_str(TEST_URL)?)); - expected_links.insert(Uri::Website(Url::from_str(TEST_FILE)?)); - expected_links.insert(Uri::Website(Url::from_str(TEST_GLOB_1)?)); + let mut expected_links: HashSet = HashSet::new(); + expected_links.insert(website(TEST_STRING)); + expected_links.insert(website(TEST_URL)); + expected_links.insert(website(TEST_FILE)); + expected_links.insert(website(TEST_GLOB_1)); expected_links.insert(Uri::Mail(TEST_GLOB_2_MAIL.to_string())); assert_eq!(links, expected_links); diff --git a/src/extract.rs b/src/extract.rs index 025e6c0..caf30da 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -1,5 +1,5 @@ -use crate::collector::InputContent; use crate::uri::Uri; +use crate::{collector::InputContent, Request}; use html5ever::parse_document; use html5ever::tendril::{StrTendril, TendrilSink}; use linkify::LinkFinder; @@ -141,7 +141,10 @@ fn extract_links_from_plaintext(input: &str) -> Vec { .collect() } -pub(crate) fn extract_links(input_content: &InputContent, base_url: Option) -> HashSet { +pub(crate) fn extract_links( + input_content: &InputContent, + base_url: Option, +) -> HashSet { let links = match input_content.file_type { FileType::Markdown => extract_links_from_markdown(&input_content.content), FileType::HTML => extract_links_from_html(&input_content.content), @@ -150,28 +153,33 @@ pub(crate) fn extract_links(input_content: &InputContent, base_url: Option) // Only keep legit URLs. This sorts out things like anchors. // Silently ignore the parse failures for now. - let mut uris = HashSet::new(); + let mut requests: HashSet = HashSet::new(); for link in links { match Uri::try_from(link.as_str()) { Ok(uri) => { - uris.insert(uri); + requests.insert(Request::new(uri, input_content.input.clone())); } Err(_) => { if !Path::new(&link).exists() { if let Some(base_url) = &base_url { if let Ok(new_url) = base_url.join(&link) { - uris.insert(Uri::Website(new_url)); + requests.insert(Request::new( + Uri::Website(new_url), + input_content.input.clone(), + )); } } } } }; } - uris + requests } #[cfg(test)] mod test { + use crate::test_utils::website; + use super::*; use std::fs::File; use std::io::{BufReader, Read}; @@ -197,17 +205,18 @@ mod test { #[test] fn test_extract_markdown_links() { let input = "This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)"; - let links = extract_links( + let links: HashSet = extract_links( &InputContent::from_string(input, FileType::Markdown), Some(Url::parse("https://github.com/hello-rust/lychee/").unwrap()), - ); + ) + .into_iter() + .map(|r| r.uri) + .collect(); assert_eq!( links, [ - Uri::Website(Url::parse("https://endler.dev").unwrap()), - Uri::Website( - Url::parse("https://github.com/hello-rust/lychee/relative_link").unwrap() - ) + website("https://endler.dev"), + website("https://github.com/hello-rust/lychee/relative_link"), ] .iter() .cloned() @@ -219,23 +228,28 @@ mod test { fn test_extract_html_links() { let input = r#" "#; - let links = extract_links( + let links: HashSet = extract_links( &InputContent::from_string(input, FileType::HTML), - Some(Url::parse("https://github.com/hello-rust/").unwrap()), - ); + Some(Url::parse("https://github.com/lycheeverse/").unwrap()), + ) + .into_iter() + .map(|r| r.uri) + .collect(); assert_eq!( - links - .get(&Uri::Website( - Url::parse("https://github.com/hello-rust/blob/master/README.md").unwrap() - )) - .is_some(), - true + links, + [ + website("https://github.com/lycheeverse/lychee/"), + website("https://github.com/lycheeverse/blob/master/README.md"), + ] + .iter() + .cloned() + .collect::>(), ); } @@ -257,15 +271,21 @@ mod test { fn test_non_markdown_links() { let input = "https://endler.dev and https://hello-rust.show/foo/bar?lol=1 at test@example.com"; - let links = extract_links(&InputContent::from_string(input, FileType::Plaintext), None); + let links: HashSet = + extract_links(&InputContent::from_string(input, FileType::Plaintext), None) + .into_iter() + .map(|r| r.uri) + .collect(); + let expected = [ - Uri::Website(Url::parse("https://endler.dev").unwrap()), - Uri::Website(Url::parse("https://hello-rust.show/foo/bar?lol=1").unwrap()), + website("https://endler.dev"), + website("https://hello-rust.show/foo/bar?lol=1"), Uri::Mail("test@example.com".to_string()), ] .iter() .cloned() .collect(); + assert_eq!(links, expected) } @@ -284,14 +304,18 @@ mod test { #[test] fn test_extract_html5_not_valid_xml() { let input = load_fixture("TEST_HTML5.html"); - let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None); + let links: HashSet = + extract_links(&InputContent::from_string(&input, FileType::HTML), None) + .into_iter() + .map(|r| r.uri) + .collect(); let expected_links = [ - Uri::Website(Url::parse("https://example.com/head/home").unwrap()), - Uri::Website(Url::parse("https://example.com/css/style_full_url.css").unwrap()), + website("https://example.com/head/home"), + website("https://example.com/css/style_full_url.css"), // the body links wouldn't be present if the file was parsed strictly as XML - Uri::Website(Url::parse("https://example.com/body/a").unwrap()), - Uri::Website(Url::parse("https://example.com/body/div_empty_a").unwrap()), + website("https://example.com/body/a"), + website("https://example.com/body/div_empty_a"), ] .iter() .cloned() @@ -303,20 +327,23 @@ mod test { #[test] fn test_extract_html5_not_valid_xml_relative_links() { let input = load_fixture("TEST_HTML5.html"); - let links = extract_links( + let links: HashSet = extract_links( &InputContent::from_string(&input, FileType::HTML), Some(Url::parse("https://example.com").unwrap()), - ); + ) + .into_iter() + .map(|r| r.uri) + .collect(); let expected_links = [ - Uri::Website(Url::parse("https://example.com/head/home").unwrap()), - Uri::Website(Url::parse("https://example.com/images/icon.png").unwrap()), - Uri::Website(Url::parse("https://example.com/css/style_relative_url.css").unwrap()), - Uri::Website(Url::parse("https://example.com/css/style_full_url.css").unwrap()), - Uri::Website(Url::parse("https://example.com/js/script.js").unwrap()), + website("https://example.com/head/home"), + website("https://example.com/images/icon.png"), + website("https://example.com/css/style_relative_url.css"), + website("https://example.com/css/style_full_url.css"), + website("https://example.com/js/script.js"), // the body links wouldn't be present if the file was parsed strictly as XML - Uri::Website(Url::parse("https://example.com/body/a").unwrap()), - Uri::Website(Url::parse("https://example.com/body/div_empty_a").unwrap()), + website("https://example.com/body/a"), + website("https://example.com/body/div_empty_a"), ] .iter() .cloned() @@ -329,14 +356,16 @@ mod test { fn test_extract_html5_lowercase_doctype() { // this has been problematic with previous XML based parser let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html"); - let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None); + let links: HashSet = + extract_links(&InputContent::from_string(&input, FileType::HTML), None) + .into_iter() + .map(|r| r.uri) + .collect(); - let expected_links = [Uri::Website( - Url::parse("https://example.com/body/a").unwrap(), - )] - .iter() - .cloned() - .collect(); + let expected_links = [website("https://example.com/body/a")] + .iter() + .cloned() + .collect(); assert_eq!(links, expected_links); } @@ -345,14 +374,18 @@ mod test { fn test_extract_html5_minified() { // minified HTML with some quirky elements such as href attribute values specified without quotes let input = load_fixture("TEST_HTML5_MINIFIED.html"); - let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None); + let links: HashSet = + extract_links(&InputContent::from_string(&input, FileType::HTML), None) + .into_iter() + .map(|r| r.uri) + .collect(); let expected_links = [ - Uri::Website(Url::parse("https://example.com/").unwrap()), - Uri::Website(Url::parse("https://example.com/favicon.ico").unwrap()), - Uri::Website(Url::parse("https://fonts.externalsite.com").unwrap()), - Uri::Website(Url::parse("https://example.com/docs/").unwrap()), - Uri::Website(Url::parse("https://example.com/forum").unwrap()), + website("https://example.com/"), + website("https://example.com/favicon.ico"), + website("https://fonts.externalsite.com"), + website("https://example.com/docs/"), + website("https://example.com/forum"), ] .iter() .cloned() @@ -365,7 +398,11 @@ mod test { fn test_extract_html5_malformed() { // malformed links shouldn't stop the parser from further parsing let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html"); - let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None); + let links: HashSet = + extract_links(&InputContent::from_string(&input, FileType::HTML), None) + .into_iter() + .map(|r| r.uri) + .collect(); let expected_links = [Uri::Website( Url::parse("https://example.com/valid").unwrap(), @@ -381,13 +418,17 @@ mod test { fn test_extract_html5_custom_elements() { // the element name shouldn't matter for attributes like href, src, cite etc let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html"); - let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None); + let links: HashSet = + extract_links(&InputContent::from_string(&input, FileType::HTML), None) + .into_iter() + .map(|r| r.uri) + .collect(); let expected_links = [ - Uri::Website(Url::parse("https://example.com/some-weird-element").unwrap()), - Uri::Website(Url::parse("https://example.com/even-weirder-src").unwrap()), - Uri::Website(Url::parse("https://example.com/even-weirder-href").unwrap()), - Uri::Website(Url::parse("https://example.com/citations").unwrap()), + website("https://example.com/some-weird-element"), + website("https://example.com/even-weirder-src"), + website("https://example.com/even-weirder-href"), + website("https://example.com/citations"), ] .iter() .cloned() diff --git a/src/lib.rs b/src/lib.rs index fe8939b..32079c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -8,7 +8,7 @@ * "Hello world" example: * ``` * -* use lychee::{ClientBuilder, Status}; +* use lychee::{Request, Input, ClientBuilder, Status}; * use lychee::Uri::Website; * use url::Url; * use std::error::Error; @@ -17,7 +17,7 @@ * async fn main() -> Result<(), Box> { * let client = ClientBuilder::default().build()?; * let url = Url::parse("https://github.com/lycheeverse/lychee")?; -* let response = client.check(Website(url)).await; +* let response = client.check(Request::new(Website(url), Input::Stdin)).await; * assert!(matches!(response.status, Status::Ok(_))); * Ok(()) * } @@ -35,6 +35,7 @@ pub mod test_utils; pub use client::ClientBuilder; pub use client_pool::ClientPool; +pub use collector::Input; pub use excludes::Excludes; pub use types::*; pub use uri::Uri; diff --git a/src/test_utils.rs b/src/test_utils.rs index ca977bb..811c4c3 100644 --- a/src/test_utils.rs +++ b/src/test_utils.rs @@ -1,7 +1,10 @@ use http::StatusCode; +use reqwest::Url; use wiremock::matchers::path; use wiremock::{Mock, MockServer, ResponseTemplate}; +use crate::Uri; + #[allow(unused)] pub async fn get_mock_server(response_code: S) -> MockServer where @@ -30,3 +33,7 @@ where mock_server } + +pub fn website(url: &str) -> Uri { + Uri::Website(Url::parse(url).unwrap()) +} diff --git a/src/types.rs b/src/types.rs index 7a0ce56..d2040e6 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1,6 +1,25 @@ -use crate::uri::Uri; +use crate::{collector::Input, uri::Uri}; use anyhow::anyhow; -use std::{collections::HashSet, convert::TryFrom}; +use serde::{Serialize, Serializer}; +use std::{collections::HashSet, convert::TryFrom, fmt::Display}; + +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct Request { + pub uri: Uri, + pub source: Input, +} + +impl Request { + pub fn new(uri: Uri, source: Input) -> Self { + Request { uri, source } + } +} + +impl Display for Request { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{} ({})", self.uri, self.source) + } +} /// Specifies how requests to websites will be made pub(crate) enum RequestMethod { @@ -19,35 +38,80 @@ impl TryFrom for RequestMethod { } } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq, Hash, Serialize)] pub struct Response { + #[serde(flatten)] pub uri: Uri, pub status: Status, + #[serde(skip)] + pub source: Input, } impl Response { - pub fn new(uri: Uri, status: Status) -> Self { - Response { uri, status } + pub fn new(uri: Uri, status: Status, source: Input) -> Self { + Response { + uri, + status, + source, + } + } +} + +impl Display for Response { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let metadata = match &self.status { + Status::Ok(code) | Status::Redirected(code) | Status::Failed(code) => { + format!(" [{}]", code) + } + Status::Timeout(code) if code.is_some() => format!(" [{}]", code.unwrap()), + Status::Error(e) => format!(" ({})", e), + _ => "".to_string(), + }; + write!(f, "{} {}{}", self.status.icon(), self.uri, metadata) } } /// Response status of the request -#[derive(Debug)] +#[derive(Debug, Hash, PartialEq, Eq)] pub enum Status { /// Request was successful Ok(http::StatusCode), /// Request failed with HTTP error code Failed(http::StatusCode), /// Request timed out - Timeout, + Timeout(Option), /// Got redirected to different resource - Redirected, + Redirected(http::StatusCode), /// Resource was excluded from checking Excluded, /// Low-level error while loading resource Error(String), } +impl Display for Status { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let out = match self { + Status::Ok(c) => format!("OK ({})", c), + Status::Redirected(c) => format!("Redirect ({})", c), + Status::Excluded => "Excluded".to_string(), + Status::Failed(c) => format!("Failed ({})", c), + Status::Error(e) => format!("Runtime error ({})", e), + Status::Timeout(Some(c)) => format!("Timeout ({})", c), + Status::Timeout(None) => "Timeout".to_string(), + }; + write!(f, "{}", out) + } +} + +impl Serialize for Status { + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + serializer.collect_str(self) + } +} + impl Status { pub fn new(statuscode: http::StatusCode, accepted: Option>) -> Self { if let Some(true) = accepted.map(|a| a.contains(&statuscode)) { @@ -55,7 +119,7 @@ impl Status { } else if statuscode.is_success() { Status::Ok(statuscode) } else if statuscode.is_redirection() { - Status::Redirected + Status::Redirected(statuscode) } else { Status::Failed(statuscode) } @@ -64,12 +128,27 @@ impl Status { pub fn is_success(&self) -> bool { matches!(self, Status::Ok(_)) } + + pub fn is_excluded(&self) -> bool { + matches!(self, Status::Excluded) + } + + pub fn icon(&self) -> &str { + match self { + Status::Ok(_) => "✅", + Status::Redirected(_) => "🔀️", + Status::Excluded => "👻", + Status::Failed(_) => "🚫", + Status::Error(_) => "⚡", + Status::Timeout(_) => "⌛", + } + } } impl From for Status { fn from(e: reqwest::Error) -> Self { if e.is_timeout() { - Status::Timeout + Status::Timeout(e.status()) } else { Status::Error(e.to_string()) } @@ -78,22 +157,19 @@ impl From for Status { #[cfg(test)] mod test { - use super::*; + use crate::test_utils::website; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; - use url::Url; #[test] fn test_uri_host_ip_v4() { - let uri = - Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4")); + let uri = website("http://127.0.0.1"); let ip = uri.host_ip().expect("Expected a valid IPv4"); assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); } #[test] fn test_uri_host_ip_v6() { - let uri = - Uri::Website(Url::parse("https://[2020::0010]").expect("Expected URI with valid IPv6")); + let uri = website("https://[2020::0010]"); let ip = uri.host_ip().expect("Expected a valid IPv6"); assert_eq!( ip, @@ -103,7 +179,7 @@ mod test { #[test] fn test_uri_host_ip_no_ip() { - let uri = Uri::Website(Url::parse("https://some.cryptic/url").expect("Expected valid URI")); + let uri = website("https://some.cryptic/url"); let ip = uri.host_ip(); assert!(ip.is_none()); } diff --git a/src/uri.rs b/src/uri.rs index 7df7534..5834a5e 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -65,7 +65,7 @@ impl Display for Uri { #[cfg(test)] mod test { - use reqwest::Url; + use crate::test_utils::website; use super::*; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; @@ -75,7 +75,7 @@ mod test { assert!(matches!(Uri::try_from(""), Err(_))); assert_eq!( Uri::try_from("http://example.com").unwrap(), - Uri::Website(url::Url::parse("http://example.com").unwrap()) + website("http://example.com") ); assert_eq!( Uri::try_from("mail@example.com").unwrap(), @@ -89,16 +89,14 @@ mod test { #[test] fn test_uri_host_ip_v4() { - let uri = - Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4")); + let uri = website("http://127.0.0.1"); let ip = uri.host_ip().expect("Expected a valid IPv4"); assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); } #[test] fn test_uri_host_ip_v6() { - let uri = - Uri::Website(Url::parse("https://[2020::0010]").expect("Expected URI with valid IPv6")); + let uri = website("https://[2020::0010]"); let ip = uri.host_ip().expect("Expected a valid IPv6"); assert_eq!( ip, @@ -108,15 +106,14 @@ mod test { #[test] fn test_uri_host_ip_no_ip() { - let uri = Uri::Website(Url::parse("https://some.cryptic/url").expect("Expected valid URI")); + let uri = website("https://some.cryptic/url"); let ip = uri.host_ip(); assert!(ip.is_none()); } #[test] fn test_mail() { - let uri = - Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4")); + let uri = website("http://127.0.0.1"); let ip = uri.host_ip().expect("Expected a valid IPv4"); assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1))); } diff --git a/tests/cli.rs b/tests/cli.rs index 8258b3c..6b98c5d 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -30,10 +30,10 @@ mod cli { .arg(test_all_private_path) .assert() .success() - .stdout(contains("Total: 7")) - .stdout(contains("Excluded: 7")) - .stdout(contains("Successful: 0")) - .stdout(contains("Errors: 0")); + .stdout(contains("Total............7")) + .stdout(contains("Excluded.........7")) + .stdout(contains("Successful.......0")) + .stdout(contains("Errors...........0")); } /// Test that a GitHub link can be checked without specifying the token. @@ -46,10 +46,10 @@ mod cli { .arg(test_github_path) .assert() .success() - .stdout(contains("Total: 1")) - .stdout(contains("Excluded: 0")) - .stdout(contains("Successful: 1")) - .stdout(contains("Errors: 0")); + .stdout(contains("Total............1")) + .stdout(contains("Excluded.........0")) + .stdout(contains("Successful.......1")) + .stdout(contains("Errors...........0")); } #[tokio::test] @@ -164,7 +164,7 @@ mod cli { .arg("--verbose") .assert() .success() - .stdout(contains("Total: 2")); + .stdout(contains("Total............2")); Ok(()) } @@ -188,7 +188,7 @@ mod cli { .arg("--glob-ignore-case") .assert() .success() - .stdout(contains("Total: 2")); + .stdout(contains("Total............2")); Ok(()) } @@ -211,7 +211,7 @@ mod cli { .arg("--verbose") .assert() .success() - .stdout(contains("Total: 1")); + .stdout(contains("Total............1")); Ok(()) } @@ -231,9 +231,9 @@ mod cli { .assert() .success(); - let expected = r##"{"total":11,"successful":11,"failures":[],"timeouts":[],"redirects":[],"excludes":[],"errors":[]}"##; + let expected = r##"{"total":11,"successful":11,"failures":0,"timeouts":0,"redirects":0,"excludes":0,"errors":0,"fail_map":{}}"##; let output = fs::read_to_string(&outfile)?; - assert_eq!(output, expected); + assert_eq!(output.split_whitespace().collect::(), expected); fs::remove_file(outfile)?; Ok(()) }