From a78e8318cd1828b9d298daaa14638c2395c37043 Mon Sep 17 00:00:00 2001 From: Matthias Date: Mon, 14 Dec 2020 01:15:14 +0100 Subject: [PATCH] Add (machine-readable) output file support (fixes #53) For now we only support JSON. I honestly don't know if it makes sense to include other formats. For example, MD and HTML are not really machine-readable. YAML is not a great standard format for this use-case. Open for discussions, though. --- Cargo.lock | 5 +++-- Cargo.toml | 1 + fixtures/TEST.md | 6 +++--- src/bin/lychee/main.rs | 17 +++++++++++++++-- src/bin/lychee/options.rs | 39 +++++++++++++++++++++++++++++++++++++-- src/bin/lychee/stats.rs | 3 +++ src/uri.rs | 3 ++- tests/cli.rs | 25 ++++++++++++++++++++++++- 8 files changed, 88 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e336e2..84fe518 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1836,6 +1836,7 @@ dependencies = [ "regex", "reqwest", "serde", + "serde_json", "shellexpand", "structopt", "tempfile", @@ -2688,9 +2689,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.57" +version = "1.0.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "164eacbdb13512ec2745fb09d51fd5b22b0d65ed294a1dcf7285a360c80a675c" +checksum = "1500e84d27fe482ed1dc791a56eddc2f230046a040fa908c08bda1d9fb615779" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 80800fc..7416eea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,6 +39,7 @@ shellexpand = "2.0" lazy_static = "1.1" wiremock = "0.3.0" openssl-sys = "0.9.58" +serde_json = "1.0.60" [dependencies.reqwest] features = ["gzip"] diff --git a/fixtures/TEST.md b/fixtures/TEST.md index bdba86f..c9b7c14 100644 --- a/fixtures/TEST.md +++ b/fixtures/TEST.md @@ -16,9 +16,9 @@ Some more complex formatting to test that Markdown parsing works. [![CC0](https://i.creativecommons.org/p/zero/1.0/88x31.png)](https://creativecommons.org/publicdomain/zero/1.0/) Test HTTP and HTTPS for the same site. -http://spinroot.com/cobra/ -https://spinroot.com/cobra/ +http://example.com +https://example.com -https://www.peerlyst.com/posts/a-list-of-static-analysis-tools-for-c-c-peerlyst +https://www.peerlyst.com/posts/a-list-of-static-analysis-tools-for-c-c-peerlyst test@example.com diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index d7d0dcf..c225d13 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -1,10 +1,11 @@ -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use headers::authorization::Basic; use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName}; use indicatif::{ProgressBar, ProgressStyle}; +use options::Format; use regex::RegexSet; -use std::str::FromStr; use std::{collections::HashSet, time::Duration}; +use std::{fs, str::FromStr}; use structopt::StructOpt; use tokio::sync::mpsc; @@ -65,6 +66,13 @@ fn show_progress(progress_bar: &Option, response: &Response, verbos }; } +fn fmt(stats: &ResponseStats, format: &Format) -> Result { + Ok(match format { + Format::String => stats.to_string(), + Format::JSON => serde_json::to_string(&stats)?, + }) +} + async fn run(cfg: &Config, inputs: Vec) -> Result { let mut headers = parse_headers(&cfg.headers)?; if let Some(auth) = &cfg.basic_auth { @@ -154,6 +162,11 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { println!("\n{}", stats); } + if let Some(output) = &cfg.output { + fs::write(output, fmt(&stats, &cfg.format)?) + .context("Cannot write status output to file")?; + } + match stats.is_success() { true => Ok(ExitCode::Success as i32), false => Ok(ExitCode::LinkCheckFailure as i32), diff --git a/src/bin/lychee/options.rs b/src/bin/lychee/options.rs index 184f38a..f282d47 100644 --- a/src/bin/lychee/options.rs +++ b/src/bin/lychee/options.rs @@ -1,9 +1,10 @@ use lychee::collector::Input; -use anyhow::{Error, Result}; +use anyhow::{anyhow, Error, Result}; use lazy_static::lazy_static; use serde::Deserialize; -use std::{fs, io::ErrorKind}; +use std::str::FromStr; +use std::{fs, io::ErrorKind, path::PathBuf}; use structopt::{clap::crate_version, StructOpt}; pub(crate) const USER_AGENT: &str = concat!("lychee/", crate_version!()); @@ -12,6 +13,29 @@ const TIMEOUT: usize = 20; const MAX_CONCURRENCY: usize = 128; const MAX_REDIRECTS: usize = 10; +#[derive(Debug, Deserialize)] +pub enum Format { + String, + JSON, +} + +impl FromStr for Format { + type Err = Error; + fn from_str(format: &str) -> Result { + match format { + "string" => Ok(Format::String), + "json" => Ok(Format::JSON), + _ => Err(anyhow!("Could not parse format {}", format)), + } + } +} + +impl Default for Format { + fn default() -> Self { + Format::String + } +} + // this exists because structopt requires `&str` type values for defaults // (we can't use e.g. `TIMEOUT` or `timeout()` which gets created for serde) lazy_static! { @@ -203,6 +227,16 @@ pub struct Config { #[structopt(long)] #[serde(default)] pub glob_ignore_case: bool, + + /// Output file of status report + #[structopt(short, long, parse(from_os_str))] + #[serde(default)] + pub output: Option, + + /// Output file format of status report + #[structopt(short, long, default_value = "string")] + #[serde(default)] + pub format: Format, } impl Config { @@ -255,6 +289,7 @@ impl Config { github_token: None; skip_missing: false; glob_ignore_case: false; + output: None; } } } diff --git a/src/bin/lychee/stats.rs b/src/bin/lychee/stats.rs index 80ecb9c..1525c0c 100644 --- a/src/bin/lychee/stats.rs +++ b/src/bin/lychee/stats.rs @@ -1,3 +1,5 @@ +use serde::{Deserialize, Serialize}; + use std::{ collections::HashSet, fmt::{self, Display}, @@ -5,6 +7,7 @@ use std::{ use lychee::{Response, Status::*, Uri}; +#[derive(Serialize, Deserialize)] pub struct ResponseStats { total: usize, successful: usize, diff --git a/src/uri.rs b/src/uri.rs index 0a8b9c1..1bb2193 100644 --- a/src/uri.rs +++ b/src/uri.rs @@ -1,10 +1,11 @@ use anyhow::Result; +use serde::{Deserialize, Serialize}; use std::net::IpAddr; use std::{convert::TryFrom, fmt::Display}; use url::Url; /// Lychee's own representation of a URI, which encapsulates all support formats -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] pub enum Uri { /// Website URL Website(Url), diff --git a/tests/cli.rs b/tests/cli.rs index 7a76e18..b2220e4 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -4,9 +4,10 @@ mod cli { use assert_cmd::Command; use lychee::test_utils; use predicates::str::contains; - use std::fs::File; + use std::fs::{self, File}; use std::io::Write; use std::path::{Path, PathBuf}; + use uuid::Uuid; fn main_command() -> Command { // this gets the "main" binary name (e.g. `lychee`) @@ -214,4 +215,26 @@ mod cli { Ok(()) } + + /// Test formatted file output + #[test] + fn test_formatted_file_output() -> Result<()> { + let mut cmd = main_command(); + let test_path = fixtures_path().join("TEST.md"); + let outfile = format!("{}.json", Uuid::new_v4()); + + cmd.arg("--output") + .arg(&outfile) + .arg("--format") + .arg("json") + .arg(test_path) + .assert() + .success(); + + let expected = r##"{"total":10,"successful":10,"failures":[],"timeouts":[],"redirects":[],"excludes":[],"errors":[]}"##; + let output = fs::read_to_string(&outfile)?; + assert_eq!(output, expected); + fs::remove_file(outfile)?; + Ok(()) + } }