mirror of
https://github.com/Hopiu/lychee.git
synced 2026-05-02 10:54:46 +00:00
Add support for reading from stdin and make input handling more robust (closes #26)
* Adds a `skip_missing` flag * Adds an `Input` enum to handle different types of inputs
This commit is contained in:
parent
e197012e7a
commit
1f787613d4
11 changed files with 736 additions and 149 deletions
80
Cargo.lock
generated
80
Cargo.lock
generated
|
|
@ -119,6 +119,12 @@ version = "0.4.7"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d25d88fd6b8041580a654f9d0c581a047baee2b3efee13275f2fc392fc75034"
|
||||
|
||||
[[package]]
|
||||
name = "arrayref"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4c527152e37cf757a3f78aae5a06fbeefdb07ccc535c980a3208ee3060dd544"
|
||||
|
||||
[[package]]
|
||||
name = "arrayvec"
|
||||
version = "0.5.1"
|
||||
|
|
@ -476,6 +482,17 @@ version = "1.2.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
|
||||
[[package]]
|
||||
name = "blake2b_simd"
|
||||
version = "0.5.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "afa748e348ad3be8263be728124b24a24f268266f6f5d58af9d75f6a40b5c587"
|
||||
dependencies = [
|
||||
"arrayref",
|
||||
"arrayvec",
|
||||
"constant_time_eq",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-buffer"
|
||||
version = "0.7.3"
|
||||
|
|
@ -712,6 +729,12 @@ version = "0.4.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce90df4c658c62f12d78f7508cf92f9173e5184a539c10bfe54a3107b3ffd0f2"
|
||||
|
||||
[[package]]
|
||||
name = "constant_time_eq"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
||||
|
||||
[[package]]
|
||||
name = "cookie"
|
||||
version = "0.14.2"
|
||||
|
|
@ -943,6 +966,27 @@ dependencies = [
|
|||
"generic-array 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs"
|
||||
version = "2.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "13aea89a5c93364a98e9b37b2fa237effbb694d5cfe01c5b70941f7eb087d5e3"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"dirs-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dirs-sys"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e93d7f5705de3e49895a2b5e0b8855a1c27f080192ae9c32a6432d50741a57a"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"redox_users",
|
||||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "discard"
|
||||
version = "1.0.4"
|
||||
|
|
@ -1790,6 +1834,7 @@ dependencies = [
|
|||
"http",
|
||||
"hubcaps",
|
||||
"indicatif",
|
||||
"lazy_static",
|
||||
"linkify",
|
||||
"log",
|
||||
"predicates",
|
||||
|
|
@ -1799,10 +1844,13 @@ dependencies = [
|
|||
"regex",
|
||||
"reqwest",
|
||||
"serde",
|
||||
"shellexpand",
|
||||
"structopt",
|
||||
"tempfile",
|
||||
"tokio 0.2.22",
|
||||
"toml",
|
||||
"url",
|
||||
"uuid",
|
||||
"wiremock",
|
||||
]
|
||||
|
||||
|
|
@ -2421,6 +2469,17 @@ version = "0.1.57"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
|
||||
|
||||
[[package]]
|
||||
name = "redox_users"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "de0737333e7a9502c789a36d7c7fa6092a49895d4faa31ca5df163857ded2e9d"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
"redox_syscall",
|
||||
"rust-argon2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.2"
|
||||
|
|
@ -2522,6 +2581,18 @@ dependencies = [
|
|||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rust-argon2"
|
||||
version = "0.8.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b18820d944b33caa75a71378964ac46f58517c92b6ae5f762636247c09e78fb"
|
||||
dependencies = [
|
||||
"base64 0.13.0",
|
||||
"blake2b_simd",
|
||||
"constant_time_eq",
|
||||
"crossbeam-utils 0.8.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
version = "0.1.16"
|
||||
|
|
@ -2698,6 +2769,15 @@ dependencies = [
|
|||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shellexpand"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a2b22262a9aaf9464d356f656fea420634f78c881c5eebd5ef5e66d8b9bc603"
|
||||
dependencies = [
|
||||
"dirs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "signal-hook-registry"
|
||||
version = "1.2.1"
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ quick-xml = "0.20.0"
|
|||
headers = "0.3.2"
|
||||
derive_builder = "0.9.0"
|
||||
deadpool = "0.6.0"
|
||||
shellexpand = "2.0"
|
||||
lazy_static = "1.1"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["gzip"]
|
||||
|
|
@ -43,3 +45,5 @@ version = "0.2"
|
|||
wiremock = "0.3"
|
||||
assert_cmd = "1.0"
|
||||
predicates = "1.0"
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
tempfile = "3.1"
|
||||
|
|
|
|||
28
README.md
28
README.md
|
|
@ -80,10 +80,32 @@ cargo install lychee
|
|||
|
||||
## Usage
|
||||
|
||||
Run it inside a repository with a `README.md`, or specify a file with
|
||||
Run it inside a repository with a `README.md`:
|
||||
|
||||
```
|
||||
lychee <yourfile>
|
||||
lychee
|
||||
```
|
||||
|
||||
You can also specify various types of inputs:
|
||||
|
||||
```
|
||||
# check links on a website:
|
||||
lychee https://endler.dev/
|
||||
|
||||
# check links in a remote file:
|
||||
lychee https://raw.githubusercontent.com/lycheeverse/lychee/master/README.md
|
||||
|
||||
# check links in local file(s):
|
||||
lychee README.md
|
||||
lychee test.html info.txt
|
||||
|
||||
# check links in local files (by shell glob):
|
||||
lychee ~/projects/*/README.md
|
||||
|
||||
# check links in local files (lychee supports advanced globbing and ~ expansion):
|
||||
lychee "~/projects/big_project/**/README.*"
|
||||
# ignore case when globbing, displaying progress and check result for each link:
|
||||
lychee --glob-ignore-case --progress --verbose "~/projects/**/[r]eadme.*"
|
||||
```
|
||||
|
||||
Optional (to avoid getting rate-limited): set an environment variable with your Github token
|
||||
|
|
@ -93,7 +115,7 @@ config file.
|
|||
### CLI exit codes
|
||||
|
||||
- `0` for success (all links checked successfully or excluded/skipped as configured)
|
||||
- `1` for any unexpected runtime failures or config errors
|
||||
- `1` for missing inputs and any unexpected runtime failures or config errors
|
||||
- `2` for link check failures (if any non-excluded link failed the check)
|
||||
|
||||
## Troubleshooting and workarounds
|
||||
|
|
|
|||
|
|
@ -1,3 +0,0 @@
|
|||
Test file: this link should be a valid link but return a HTTP 404 when followed.
|
||||
|
||||
http://httpbin.org/status/404
|
||||
306
src/collector.rs
306
src/collector.rs
|
|
@ -1,63 +1,283 @@
|
|||
use crate::extract::{extract_links, FileType};
|
||||
use crate::types::Uri;
|
||||
use anyhow::Result;
|
||||
use glob::glob;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use glob::glob_with;
|
||||
use reqwest::Url;
|
||||
use std::{collections::HashSet, fs};
|
||||
use std::{ffi::OsStr, path::Path};
|
||||
use shellexpand::tilde;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use tokio::fs::read_to_string;
|
||||
use tokio::io::{stdin, AsyncReadExt};
|
||||
|
||||
/// Detect if the given path points to a Markdown, HTML, or plaintext file.
|
||||
fn resolve_file_type_by_path<P: AsRef<Path>>(path: P) -> FileType {
|
||||
match path.as_ref().extension().and_then(OsStr::to_str) {
|
||||
Some("md") => FileType::Markdown,
|
||||
Some("html") => FileType::HTML,
|
||||
_ => FileType::Plaintext,
|
||||
const STDIN: &str = "-";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[non_exhaustive]
|
||||
pub(crate) enum Input {
|
||||
RemoteUrl(Url),
|
||||
FsGlob { pattern: String, ignore_case: bool },
|
||||
FsPath(PathBuf),
|
||||
Stdin,
|
||||
String(String),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct InputContent {
|
||||
pub input: Input,
|
||||
pub file_type: FileType,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
impl InputContent {
|
||||
pub fn from_string(s: &str, file_type: FileType) -> Self {
|
||||
// TODO: consider using Cow (to avoid one .clone() for String types)
|
||||
Self {
|
||||
input: Input::String(s.to_owned()),
|
||||
file_type,
|
||||
content: s.to_owned(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all unique links from a vector of inputs
|
||||
impl Input {
|
||||
pub(crate) fn new(value: &str, glob_ignore_case: bool) -> Self {
|
||||
if value == STDIN {
|
||||
Self::Stdin
|
||||
} else {
|
||||
match Url::parse(&value) {
|
||||
Ok(url) => Self::RemoteUrl(url),
|
||||
Err(_) => {
|
||||
// this seems to be the only way to determine if this is a glob pattern
|
||||
let is_glob = glob::Pattern::escape(value) != value;
|
||||
|
||||
if is_glob {
|
||||
Self::FsGlob {
|
||||
pattern: value.to_owned(),
|
||||
ignore_case: glob_ignore_case,
|
||||
}
|
||||
} else {
|
||||
Self::FsPath(value.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get_contents(
|
||||
&self,
|
||||
file_type_hint: Option<FileType>,
|
||||
skip_missing: bool,
|
||||
) -> Result<Vec<InputContent>> {
|
||||
use Input::*;
|
||||
|
||||
match self {
|
||||
// TODO: should skip_missing also affect URLs?
|
||||
RemoteUrl(url) => Ok(vec![Self::url_contents(url).await?]),
|
||||
FsGlob {
|
||||
pattern,
|
||||
ignore_case,
|
||||
} => Ok(Self::glob_contents(pattern, *ignore_case).await?),
|
||||
FsPath(path) => {
|
||||
let content = Self::path_content(&path).await.with_context(|| {
|
||||
format!(
|
||||
"Failed to read file: `{}`",
|
||||
path.to_str().unwrap_or("<MALFORMED PATH>")
|
||||
)
|
||||
});
|
||||
match content {
|
||||
Ok(input_content) => Ok(vec![input_content]),
|
||||
Err(_) if skip_missing => Ok(vec![]),
|
||||
Err(arg) => Err(anyhow!(arg)),
|
||||
}
|
||||
}
|
||||
Stdin => Ok(vec![Self::stdin_content(file_type_hint).await?]),
|
||||
String(s) => Ok(vec![Self::string_content(s, file_type_hint)?]),
|
||||
}
|
||||
}
|
||||
|
||||
async fn url_contents(url: &Url) -> Result<InputContent> {
|
||||
let res = reqwest::get(url.clone()).await?;
|
||||
let content = res.text().await?;
|
||||
let input_content = InputContent {
|
||||
input: Input::RemoteUrl(url.clone()),
|
||||
file_type: FileType::from(url.as_str()),
|
||||
content,
|
||||
};
|
||||
|
||||
Ok(input_content)
|
||||
}
|
||||
|
||||
async fn glob_contents(path_glob: &str, ignore_case: bool) -> Result<Vec<InputContent>> {
|
||||
let mut contents = vec![];
|
||||
let glob_expanded = tilde(&path_glob);
|
||||
let mut match_opts = glob::MatchOptions::new();
|
||||
|
||||
match_opts.case_sensitive = !ignore_case;
|
||||
|
||||
for entry in glob_with(&glob_expanded, match_opts)? {
|
||||
match entry {
|
||||
Ok(path) => {
|
||||
let content = Self::path_content(&path).await?;
|
||||
contents.push(content);
|
||||
}
|
||||
Err(e) => println!("{:?}", e),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(contents)
|
||||
}
|
||||
|
||||
async fn path_content<P: Into<PathBuf> + AsRef<Path>>(path: P) -> Result<InputContent> {
|
||||
let input_content = InputContent {
|
||||
file_type: FileType::from(path.as_ref()),
|
||||
content: read_to_string(&path).await?,
|
||||
input: Input::FsPath(path.into()),
|
||||
};
|
||||
|
||||
Ok(input_content)
|
||||
}
|
||||
|
||||
async fn stdin_content(file_type_hint: Option<FileType>) -> Result<InputContent> {
|
||||
let mut content = String::new();
|
||||
let mut stdin = stdin();
|
||||
stdin.read_to_string(&mut content).await?;
|
||||
|
||||
let input_content = InputContent {
|
||||
input: Input::Stdin,
|
||||
file_type: file_type_hint.unwrap_or_default(),
|
||||
content,
|
||||
};
|
||||
|
||||
Ok(input_content)
|
||||
}
|
||||
|
||||
fn string_content(s: &str, file_type_hint: Option<FileType>) -> Result<InputContent> {
|
||||
Ok(InputContent::from_string(
|
||||
s,
|
||||
file_type_hint.unwrap_or_default(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
impl ToString for Input {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Self::RemoteUrl(url) => url.to_string(),
|
||||
Self::FsGlob { pattern, .. } => pattern.clone(),
|
||||
Self::FsPath(p) => p.to_str().unwrap_or_default().to_owned(),
|
||||
Self::Stdin => STDIN.to_owned(),
|
||||
Self::String(s) => s.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all unique links from a slice of inputs
|
||||
/// All relative URLs get prefixed with `base_url` if given.
|
||||
pub(crate) async fn collect_links(
|
||||
inputs: Vec<String>,
|
||||
inputs: &[Input],
|
||||
base_url: Option<String>,
|
||||
skip_missing_inputs: bool,
|
||||
max_concurrency: usize,
|
||||
) -> Result<HashSet<Uri>> {
|
||||
let base_url = match base_url {
|
||||
Some(url) => Some(Url::parse(&url)?),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let mut links = HashSet::new();
|
||||
let (contents_tx, mut contents_rx) = tokio::sync::mpsc::channel(max_concurrency);
|
||||
|
||||
for input in inputs {
|
||||
match Url::parse(&input) {
|
||||
Ok(url) => {
|
||||
let path = String::from(url.path());
|
||||
let res = reqwest::get(url).await?;
|
||||
let content = res.text().await?;
|
||||
// extract input contents
|
||||
for input in inputs.iter().cloned() {
|
||||
let mut sender = contents_tx.clone();
|
||||
|
||||
links.extend(extract_links(
|
||||
resolve_file_type_by_path(path),
|
||||
&content,
|
||||
base_url.clone(),
|
||||
));
|
||||
}
|
||||
Err(_) => {
|
||||
// Assume we got a single file or a glob on our hands
|
||||
for entry in glob(&input)? {
|
||||
match entry {
|
||||
Ok(path) => {
|
||||
let content = fs::read_to_string(&path)?;
|
||||
links.extend(extract_links(
|
||||
resolve_file_type_by_path(&path),
|
||||
&content,
|
||||
base_url.clone(),
|
||||
));
|
||||
}
|
||||
Err(e) => println!("Error handling file pattern {}: {:?}", input, e),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
tokio::spawn(async move {
|
||||
let contents = input.get_contents(None, skip_missing_inputs).await;
|
||||
sender.send(contents).await
|
||||
});
|
||||
}
|
||||
|
||||
// receiver will get None once all tasks are done
|
||||
drop(contents_tx);
|
||||
|
||||
// extract links from input contents
|
||||
let mut extract_links_handles = vec![];
|
||||
|
||||
while let Some(result) = contents_rx.recv().await {
|
||||
for input_content in result? {
|
||||
let base_url = base_url.clone();
|
||||
let handle =
|
||||
tokio::task::spawn_blocking(move || extract_links(&input_content, base_url));
|
||||
extract_links_handles.push(handle);
|
||||
}
|
||||
}
|
||||
|
||||
// Note: we could dispatch links to be checked as soon as we get them,
|
||||
// instead of building a HashSet with all links.
|
||||
// This optimization would speed up cases where there's
|
||||
// a lot of inputs and/or the inputs are large (e.g. big files).
|
||||
let mut collected_links = HashSet::new();
|
||||
|
||||
for handle in extract_links_handles {
|
||||
let links = handle.await?;
|
||||
collected_links.extend(links);
|
||||
}
|
||||
|
||||
Ok(collected_links)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test_utils::get_mock_server_with_content;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::str::FromStr;
|
||||
|
||||
const TEST_STRING: &str = "http://test-string.com";
|
||||
const TEST_URL: &str = "https://test-url.org";
|
||||
const TEST_FILE: &str = "https://test-file.io";
|
||||
const TEST_GLOB_1: &str = "https://test-glob-1.io";
|
||||
const TEST_GLOB_2_MAIL: &str = "test@glob-2.io";
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_collect_links() -> Result<()> {
|
||||
let dir = tempfile::tempdir()?;
|
||||
let file_path = dir.path().join("f");
|
||||
let file_glob_1_path = dir.path().join("glob-1");
|
||||
let file_glob_2_path = dir.path().join("glob-2");
|
||||
|
||||
let mut file = File::create(&file_path)?;
|
||||
let mut file_glob_1 = File::create(file_glob_1_path)?;
|
||||
let mut file_glob_2 = File::create(file_glob_2_path)?;
|
||||
|
||||
writeln!(file, "{}", TEST_FILE)?;
|
||||
writeln!(file_glob_1, "{}", TEST_GLOB_1)?;
|
||||
writeln!(file_glob_2, "{}", TEST_GLOB_2_MAIL)?;
|
||||
|
||||
let mock_server = get_mock_server_with_content(http::StatusCode::OK, Some(TEST_URL)).await;
|
||||
|
||||
let inputs = vec![
|
||||
Input::String(TEST_STRING.to_string()),
|
||||
Input::RemoteUrl(Url::from_str(&mock_server.uri())?),
|
||||
Input::FsPath(file_path),
|
||||
Input::FsGlob {
|
||||
pattern: dir.path().join("glob*").to_str().unwrap().to_string(),
|
||||
ignore_case: true,
|
||||
},
|
||||
];
|
||||
|
||||
let links = collect_links(&inputs, None, false, 8).await?;
|
||||
|
||||
let mut expected_links = HashSet::new();
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_STRING)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_URL)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_FILE)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_GLOB_1)?));
|
||||
expected_links.insert(Uri::Mail(TEST_GLOB_2_MAIL.to_string()));
|
||||
|
||||
assert_eq!(links, expected_links);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Ok(links)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use crate::collector::InputContent;
|
||||
use crate::types::Uri;
|
||||
use linkify::LinkFinder;
|
||||
use pulldown_cmark::{Event as MDEvent, Parser, Tag};
|
||||
|
|
@ -13,6 +14,27 @@ pub(crate) enum FileType {
|
|||
Plaintext,
|
||||
}
|
||||
|
||||
impl Default for FileType {
|
||||
fn default() -> Self {
|
||||
Self::Plaintext
|
||||
}
|
||||
}
|
||||
|
||||
impl<P: AsRef<Path>> From<P> for FileType {
|
||||
/// Detect if the given path points to a Markdown, HTML, or plaintext file.
|
||||
fn from(p: P) -> FileType {
|
||||
let path = p.as_ref();
|
||||
match path.extension() {
|
||||
Some(ext) => match ext {
|
||||
_ if ext == "md" => FileType::Markdown,
|
||||
_ if (ext == "htm" || ext == "html") => FileType::HTML,
|
||||
_ => FileType::Plaintext,
|
||||
},
|
||||
None => FileType::Plaintext,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use LinkFinder here to offload the actual link searching
|
||||
fn find_links(input: &str) -> Vec<linkify::Link> {
|
||||
let finder = LinkFinder::new();
|
||||
|
|
@ -105,15 +127,11 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn extract_links(
|
||||
file_type: FileType,
|
||||
input: &str,
|
||||
base_url: Option<Url>,
|
||||
) -> HashSet<Uri> {
|
||||
let links = match file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(input),
|
||||
FileType::HTML => extract_links_from_html(input),
|
||||
FileType::Plaintext => extract_links_from_plaintext(input),
|
||||
pub(crate) fn extract_links(input_content: &InputContent, base_url: Option<Url>) -> HashSet<Uri> {
|
||||
let links = match input_content.file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(&input_content.content),
|
||||
FileType::HTML => extract_links_from_html(&input_content.content),
|
||||
FileType::Plaintext => extract_links_from_plaintext(&input_content.content),
|
||||
};
|
||||
|
||||
// Only keep legit URLs. This sorts out things like anchors.
|
||||
|
|
@ -149,8 +167,7 @@ mod test {
|
|||
fn test_extract_markdown_links() {
|
||||
let input = "This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)";
|
||||
let links = extract_links(
|
||||
FileType::Markdown,
|
||||
input,
|
||||
&InputContent::from_string(input, FileType::Markdown),
|
||||
Some(Url::parse("https://github.com/hello-rust/lychee/").unwrap()),
|
||||
);
|
||||
assert_eq!(
|
||||
|
|
@ -178,8 +195,7 @@ mod test {
|
|||
</html>"#;
|
||||
|
||||
let links = extract_links(
|
||||
FileType::HTML,
|
||||
input,
|
||||
&InputContent::from_string(input, FileType::HTML),
|
||||
Some(Url::parse("https://github.com/hello-rust/").unwrap()),
|
||||
);
|
||||
|
||||
|
|
@ -196,14 +212,14 @@ mod test {
|
|||
#[test]
|
||||
fn test_skip_markdown_anchors() {
|
||||
let input = "This is [a test](#lol).";
|
||||
let links = extract_links(FileType::Markdown, input, None);
|
||||
let links = extract_links(&InputContent::from_string(input, FileType::Markdown), None);
|
||||
assert_eq!(links, HashSet::new())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_skip_markdown_internal_urls() {
|
||||
let input = "This is [a test](./internal).";
|
||||
let links = extract_links(FileType::Markdown, input, None);
|
||||
let links = extract_links(&InputContent::from_string(input, FileType::Markdown), None);
|
||||
assert_eq!(links, HashSet::new())
|
||||
}
|
||||
|
||||
|
|
@ -211,7 +227,7 @@ mod test {
|
|||
fn test_non_markdown_links() {
|
||||
let input =
|
||||
"https://endler.dev and https://hello-rust.show/foo/bar?lol=1 at test@example.com";
|
||||
let links = extract_links(FileType::Plaintext, input, None);
|
||||
let links = extract_links(&InputContent::from_string(input, FileType::Plaintext), None);
|
||||
let expected = HashSet::from_iter(
|
||||
[
|
||||
Uri::Website(Url::parse("https://endler.dev").unwrap()),
|
||||
|
|
|
|||
42
src/main.rs
42
src/main.rs
|
|
@ -19,12 +19,15 @@ mod options;
|
|||
mod stats;
|
||||
mod types;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_utils;
|
||||
|
||||
use client::ClientBuilder;
|
||||
use client_pool::ClientPool;
|
||||
use collector::Input;
|
||||
use options::{Config, LycheeOptions};
|
||||
use stats::ResponseStats;
|
||||
use types::Response;
|
||||
use types::{Excludes, Status};
|
||||
use types::{Excludes, Response, Status};
|
||||
|
||||
/// A C-like enum that can be cast to `i32` and used as process exit code.
|
||||
enum ExitCode {
|
||||
|
|
@ -39,14 +42,13 @@ enum ExitCode {
|
|||
|
||||
fn main() -> Result<()> {
|
||||
pretty_env_logger::init();
|
||||
let opts = LycheeOptions::from_args();
|
||||
let mut opts = LycheeOptions::from_args();
|
||||
|
||||
// Load a potentially existing config file and merge it into the config from the CLI
|
||||
let cfg = if let Some(c) = Config::load_from_file(&opts.config_file)? {
|
||||
if let Some(c) = Config::load_from_file(&opts.config_file)? {
|
||||
opts.config.merge(c)
|
||||
} else {
|
||||
opts.config
|
||||
};
|
||||
}
|
||||
let cfg = &opts.config;
|
||||
|
||||
let mut runtime = match cfg.threads {
|
||||
Some(threads) => {
|
||||
|
|
@ -59,7 +61,7 @@ fn main() -> Result<()> {
|
|||
}
|
||||
None => tokio::runtime::Runtime::new()?,
|
||||
};
|
||||
let errorcode = runtime.block_on(run(cfg, opts.inputs))?;
|
||||
let errorcode = runtime.block_on(run(cfg, opts.inputs()))?;
|
||||
std::process::exit(errorcode);
|
||||
}
|
||||
|
||||
|
|
@ -76,7 +78,7 @@ fn show_progress(progress_bar: &Option<ProgressBar>, response: &Response, verbos
|
|||
};
|
||||
}
|
||||
|
||||
async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
|
||||
async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
||||
let mut headers = parse_headers(&cfg.headers)?;
|
||||
if let Some(auth) = &cfg.basic_auth {
|
||||
let auth_header = parse_basic_auth(&auth)?;
|
||||
|
|
@ -84,8 +86,8 @@ async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
|
|||
}
|
||||
|
||||
let accepted = cfg.accept.clone().and_then(|a| parse_statuscodes(&a).ok());
|
||||
let timeout = parse_timeout(&cfg.timeout)?;
|
||||
let max_concurrency = cfg.max_concurrency.parse()?;
|
||||
let timeout = parse_timeout(cfg.timeout);
|
||||
let max_concurrency = cfg.max_concurrency;
|
||||
let method: reqwest::Method = reqwest::Method::from_str(&cfg.method.to_uppercase())?;
|
||||
let includes = RegexSet::new(&cfg.include)?;
|
||||
let excludes = Excludes::from_options(&cfg);
|
||||
|
|
@ -94,18 +96,24 @@ async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
|
|||
.includes(includes)
|
||||
.excludes(excludes)
|
||||
.max_redirects(cfg.max_redirects)
|
||||
.user_agent(cfg.user_agent)
|
||||
.user_agent(cfg.user_agent.clone())
|
||||
.allow_insecure(cfg.insecure)
|
||||
.custom_headers(headers)
|
||||
.method(method)
|
||||
.timeout(timeout)
|
||||
.verbose(cfg.verbose)
|
||||
.github_token(cfg.github_token)
|
||||
.scheme(cfg.scheme)
|
||||
.github_token(cfg.github_token.clone())
|
||||
.scheme(cfg.scheme.clone())
|
||||
.accepted(accepted)
|
||||
.build()?;
|
||||
|
||||
let links = collector::collect_links(inputs, cfg.base_url.clone()).await?;
|
||||
let links = collector::collect_links(
|
||||
&inputs,
|
||||
cfg.base_url.clone(),
|
||||
cfg.skip_missing,
|
||||
max_concurrency,
|
||||
)
|
||||
.await?;
|
||||
let pb = if cfg.progress {
|
||||
Some(
|
||||
ProgressBar::new(links.len() as u64)
|
||||
|
|
@ -173,8 +181,8 @@ fn read_header(input: &str) -> Result<(String, String)> {
|
|||
Ok((elements[0].into(), elements[1].into()))
|
||||
}
|
||||
|
||||
fn parse_timeout<S: AsRef<str>>(timeout: S) -> Result<Duration> {
|
||||
Ok(Duration::from_secs(timeout.as_ref().parse::<u64>()?))
|
||||
fn parse_timeout(timeout: usize) -> Duration {
|
||||
Duration::from_secs(timeout as u64)
|
||||
}
|
||||
|
||||
fn parse_headers<T: AsRef<str>>(headers: &[T]) -> Result<HeaderMap> {
|
||||
|
|
|
|||
103
src/options.rs
103
src/options.rs
|
|
@ -1,12 +1,24 @@
|
|||
use crate::collector::Input;
|
||||
|
||||
use anyhow::{Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use serde::Deserialize;
|
||||
use std::{fs, io::ErrorKind};
|
||||
use structopt::{clap::crate_version, StructOpt};
|
||||
|
||||
pub(crate) const USER_AGENT: &str = concat!("lychee/", crate_version!());
|
||||
const METHOD: &str = "get";
|
||||
const TIMEOUT: &str = "20";
|
||||
const MAX_CONCURRENCY: &str = "128";
|
||||
const TIMEOUT: usize = 20;
|
||||
const MAX_CONCURRENCY: usize = 128;
|
||||
const MAX_REDIRECTS: usize = 10;
|
||||
|
||||
// this exists because structopt requires `&str` type values for defaults
|
||||
// (we can't use e.g. `TIMEOUT` or `timeout()` which gets created for serde)
|
||||
lazy_static! {
|
||||
static ref TIMEOUT_STR: String = TIMEOUT.to_string();
|
||||
static ref MAX_CONCURRENCY_STR: String = MAX_CONCURRENCY.to_string();
|
||||
static ref MAX_REDIRECTS_STR: String = MAX_REDIRECTS.to_string();
|
||||
}
|
||||
|
||||
// Macro for generating default functions to be used by serde
|
||||
macro_rules! default_function {
|
||||
|
|
@ -19,6 +31,15 @@ macro_rules! default_function {
|
|||
};
|
||||
}
|
||||
|
||||
// Generate the functions for serde defaults
|
||||
default_function! {
|
||||
max_redirects: usize = MAX_REDIRECTS;
|
||||
max_concurrency: usize = MAX_CONCURRENCY;
|
||||
user_agent: String = USER_AGENT.to_string();
|
||||
timeout: usize = TIMEOUT;
|
||||
method: String = METHOD.to_string();
|
||||
}
|
||||
|
||||
// Macro for merging configuration values
|
||||
macro_rules! fold_in {
|
||||
( $cli:ident , $toml:ident ; $( $key:ident : $default:expr; )* ) => {
|
||||
|
|
@ -31,11 +52,16 @@ macro_rules! fold_in {
|
|||
}
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "lychee", about = "A glorious link checker")]
|
||||
#[structopt(
|
||||
name = "lychee",
|
||||
about = "A glorious link checker.\n\nProject home page: https://github.com/lycheeverse/lychee"
|
||||
)]
|
||||
pub(crate) struct LycheeOptions {
|
||||
/// Input files
|
||||
#[structopt(default_value = "README.md")]
|
||||
pub inputs: Vec<String>,
|
||||
/// The inputs (where to get links to check from).
|
||||
/// These can be: files (e.g. `README.md`), file globs (e.g. `"~/git/*/README.md"`),
|
||||
/// remote URLs (e.g. `https://example.com/README.md`) or standard input (`-`).
|
||||
#[structopt(name = "inputs", default_value = "README.md")]
|
||||
raw_inputs: Vec<String>,
|
||||
|
||||
/// Configuration file to use
|
||||
#[structopt(short, long = "config", default_value = "./lychee.toml")]
|
||||
|
|
@ -45,6 +71,19 @@ pub(crate) struct LycheeOptions {
|
|||
pub config: Config,
|
||||
}
|
||||
|
||||
impl LycheeOptions {
|
||||
// This depends on config, which is why a method is required (we could
|
||||
// accept a `Vec<Input>` in `LycheeOptions` and do the conversion there,
|
||||
// but we'd get no access to `glob_ignore_case`.
|
||||
/// Get parsed inputs from options.
|
||||
pub(crate) fn inputs(&self) -> Vec<Input> {
|
||||
self.raw_inputs
|
||||
.iter()
|
||||
.map(|s| Input::new(s, self.config.glob_ignore_case))
|
||||
.collect()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, StructOpt)]
|
||||
pub struct Config {
|
||||
/// Verbose program output
|
||||
|
|
@ -58,14 +97,14 @@ pub struct Config {
|
|||
pub progress: bool,
|
||||
|
||||
/// Maximum number of allowed redirects
|
||||
#[structopt(short, long, default_value = "10")]
|
||||
#[serde(default)]
|
||||
#[structopt(short, long, default_value = &MAX_REDIRECTS_STR)]
|
||||
#[serde(default = "max_redirects")]
|
||||
pub max_redirects: usize,
|
||||
|
||||
/// Maximum number of concurrent network requests
|
||||
#[structopt(long, default_value = MAX_CONCURRENCY)]
|
||||
#[serde(default)]
|
||||
pub max_concurrency: String,
|
||||
#[structopt(long, default_value = &MAX_CONCURRENCY_STR)]
|
||||
#[serde(default = "max_concurrency")]
|
||||
pub max_concurrency: usize,
|
||||
|
||||
/// Number of threads to utilize.
|
||||
/// Defaults to number of cores available to the system
|
||||
|
|
@ -130,9 +169,9 @@ pub struct Config {
|
|||
pub accept: Option<String>,
|
||||
|
||||
/// Website timeout from connect to response finished
|
||||
#[structopt(short, long, default_value = TIMEOUT)]
|
||||
#[structopt(short, long, default_value = &TIMEOUT_STR)]
|
||||
#[serde(default = "timeout")]
|
||||
pub timeout: String,
|
||||
pub timeout: usize,
|
||||
|
||||
/// Request method
|
||||
// Using `-X` as a short param similar to curl
|
||||
|
|
@ -140,21 +179,30 @@ pub struct Config {
|
|||
#[serde(default = "method")]
|
||||
pub method: String,
|
||||
|
||||
#[structopt(short, long, help = "Base URL to check relative URls")]
|
||||
/// Base URL to check relative URLs
|
||||
#[structopt(short, long)]
|
||||
#[serde(default)]
|
||||
pub base_url: Option<String>,
|
||||
|
||||
#[structopt(long, help = "Basic authentication support. Ex 'username:password'")]
|
||||
/// Basic authentication support. E.g. `username:password`
|
||||
#[structopt(long)]
|
||||
#[serde(default)]
|
||||
pub basic_auth: Option<String>,
|
||||
|
||||
#[structopt(
|
||||
long,
|
||||
help = "GitHub API token to use when checking github.com links, to avoid rate limiting",
|
||||
env = "GITHUB_TOKEN"
|
||||
)]
|
||||
/// GitHub API token to use when checking github.com links, to avoid rate limiting
|
||||
#[structopt(long, env = "GITHUB_TOKEN")]
|
||||
#[serde(default)]
|
||||
pub github_token: Option<String>,
|
||||
|
||||
/// Skip missing input files (default is to error if they don't exist)
|
||||
#[structopt(long)]
|
||||
#[serde(default)]
|
||||
pub skip_missing: bool,
|
||||
|
||||
/// Ignore case when expanding filesystem path glob inputs
|
||||
#[structopt(long)]
|
||||
#[serde(default)]
|
||||
pub glob_ignore_case: bool,
|
||||
}
|
||||
|
||||
impl Config {
|
||||
|
|
@ -178,7 +226,7 @@ impl Config {
|
|||
}
|
||||
|
||||
/// Merge the configuration from TOML into the CLI configuration
|
||||
pub(crate) fn merge(mut self, toml: Config) -> Config {
|
||||
pub(crate) fn merge(&mut self, toml: Config) {
|
||||
fold_in! {
|
||||
// Destination and source configs
|
||||
self, toml;
|
||||
|
|
@ -186,7 +234,7 @@ impl Config {
|
|||
// Keys with defaults to assign
|
||||
verbose: false;
|
||||
progress: false;
|
||||
max_redirects: 10;
|
||||
max_redirects: MAX_REDIRECTS;
|
||||
max_concurrency: MAX_CONCURRENCY;
|
||||
threads: None;
|
||||
user_agent: USER_AGENT;
|
||||
|
|
@ -205,15 +253,8 @@ impl Config {
|
|||
base_url: None;
|
||||
basic_auth: None;
|
||||
github_token: None;
|
||||
skip_missing: false;
|
||||
glob_ignore_case: false;
|
||||
}
|
||||
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
// Generate the functions for serde defaults
|
||||
default_function! {
|
||||
user_agent: String = USER_AGENT.to_string();
|
||||
timeout: String = TIMEOUT.to_string();
|
||||
method: String = METHOD.to_string();
|
||||
}
|
||||
|
|
|
|||
38
src/test_utils.rs
Normal file
38
src/test_utils.rs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use http::StatusCode;
|
||||
use wiremock::matchers::path;
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
// TODO: used in cli tests (as duplicate)
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn get_mock_server<S>(response_code: S) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
get_mock_server_with_content(response_code, None).await
|
||||
}
|
||||
|
||||
pub(crate) async fn get_mock_server_with_content<S>(
|
||||
response_code: S,
|
||||
content: Option<&str>,
|
||||
) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
let mock_server = MockServer::start().await;
|
||||
let template = ResponseTemplate::new(response_code.into());
|
||||
|
||||
let template = if let Some(s) = content {
|
||||
template.set_body_string(s)
|
||||
} else {
|
||||
template
|
||||
};
|
||||
|
||||
Mock::given(path("/"))
|
||||
.respond_with(template)
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
mock_server
|
||||
}
|
||||
|
|
@ -119,7 +119,7 @@ impl From<reqwest::Error> for Status {
|
|||
}
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
/// You can ignore links based on
|
||||
/// You can ignore links based on regex patterns or pre-defined IP ranges.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Excludes {
|
||||
pub regex: Option<RegexSet>,
|
||||
|
|
|
|||
231
tests/cli.rs
231
tests/cli.rs
|
|
@ -1,20 +1,58 @@
|
|||
#[cfg(test)]
|
||||
mod cli {
|
||||
use anyhow::Result;
|
||||
use assert_cmd::Command;
|
||||
use http::StatusCode;
|
||||
use predicates::str::contains;
|
||||
use std::path::Path;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use wiremock::matchers::path;
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
fn main_command() -> Command {
|
||||
// this gets the "main" binary name (e.g. `lychee`)
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name")
|
||||
}
|
||||
|
||||
fn fixtures_path() -> PathBuf {
|
||||
Path::new(module_path!()).parent().unwrap().join("fixtures")
|
||||
}
|
||||
|
||||
// TODO: duplicate of test_utils
|
||||
async fn get_mock_server<S>(response_code: S) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
get_mock_server_with_content(response_code, None).await
|
||||
}
|
||||
|
||||
async fn get_mock_server_with_content<S>(response_code: S, content: Option<&str>) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
let mock_server = MockServer::start().await;
|
||||
let template = ResponseTemplate::new(response_code.into());
|
||||
|
||||
let template = if let Some(s) = content {
|
||||
template.set_body_string(s)
|
||||
} else {
|
||||
template
|
||||
};
|
||||
|
||||
Mock::given(path("/"))
|
||||
.respond_with(template)
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
mock_server
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_all_private() {
|
||||
// this gets the "main" binary name (e.g. `lychee`)
|
||||
let mut cmd =
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
|
||||
let mut cmd = main_command();
|
||||
|
||||
let test_all_private_path = Path::new(module_path!())
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("fixtures")
|
||||
.join("TEST_ALL_PRIVATE.md");
|
||||
let test_all_private_path = fixtures_path().join("TEST_ALL_PRIVATE.md");
|
||||
|
||||
// assert that the command runs OK, and that it excluded all the links
|
||||
cmd.arg("--exclude-all-private")
|
||||
|
|
@ -31,14 +69,8 @@ mod cli {
|
|||
/// Test that a GitHub link can be checked without specifying the token.
|
||||
#[test]
|
||||
fn test_check_github_no_token() {
|
||||
let mut cmd =
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
|
||||
|
||||
let test_github_path = Path::new(module_path!())
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("fixtures")
|
||||
.join("TEST_GITHUB.md");
|
||||
let mut cmd = main_command();
|
||||
let test_github_path = fixtures_path().join("TEST_GITHUB.md");
|
||||
|
||||
cmd.arg("--verbose")
|
||||
.arg(test_github_path)
|
||||
|
|
@ -50,30 +82,27 @@ mod cli {
|
|||
.stdout(contains("Errors: 0"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failure_404_link() {
|
||||
let mut cmd =
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
|
||||
#[tokio::test]
|
||||
async fn test_failure_404_link() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server = get_mock_server(http::StatusCode::NOT_FOUND).await;
|
||||
let dir = tempfile::tempdir().expect("Failed to create tempdir");
|
||||
let file_path = dir.path().join("test.txt");
|
||||
let mut file = File::create(&file_path).expect("Failed to create tempfile");
|
||||
|
||||
let test_404_path = Path::new(module_path!())
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("fixtures")
|
||||
.join("TEST_404.md");
|
||||
writeln!(file, "{}", mock_server.uri()).expect("Failed to write to file");
|
||||
|
||||
cmd.arg(test_404_path).assert().failure().code(2);
|
||||
cmd.arg(file_path)
|
||||
.write_stdin(mock_server.uri())
|
||||
.assert()
|
||||
.failure()
|
||||
.code(2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failure_github_404_no_token() {
|
||||
let mut cmd =
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
|
||||
|
||||
let test_github_404_path = Path::new(module_path!())
|
||||
.parent()
|
||||
.unwrap()
|
||||
.join("fixtures")
|
||||
.join("TEST_GITHUB_404.md");
|
||||
let mut cmd = main_command();
|
||||
let test_github_404_path = fixtures_path().join("TEST_GITHUB_404.md");
|
||||
|
||||
cmd.arg(test_github_404_path)
|
||||
.env_clear()
|
||||
|
|
@ -83,4 +112,136 @@ mod cli {
|
|||
.stdout(contains("https://github.com/mre/idiomatic-rust-doesnt-exist-man \
|
||||
(GitHub token not specified. To check GitHub links reliably, use `--github-token` flag / `GITHUB_TOKEN` env var.)"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stdin_input() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server = get_mock_server(http::StatusCode::OK).await;
|
||||
|
||||
cmd.arg("-")
|
||||
.write_stdin(mock_server.uri())
|
||||
.assert()
|
||||
.success();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stdin_input_failure() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server = get_mock_server(http::StatusCode::INTERNAL_SERVER_ERROR).await;
|
||||
|
||||
cmd.arg("-")
|
||||
.write_stdin(mock_server.uri())
|
||||
.assert()
|
||||
.failure()
|
||||
.code(2);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_stdin_input_multiple() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server_a = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = get_mock_server(http::StatusCode::OK).await;
|
||||
|
||||
// this behavior (treating multiple `-` as separate inputs) is the same as most CLI tools
|
||||
// that accept `-` as stdin, e.g. `cat`, `bat`, `grep` etc.
|
||||
cmd.arg("-")
|
||||
.arg("-")
|
||||
.write_stdin(mock_server_a.uri())
|
||||
.write_stdin(mock_server_b.uri())
|
||||
.assert()
|
||||
.success();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_file_error() {
|
||||
let mut cmd = main_command();
|
||||
let filename = format!("non-existing-file-{}", uuid::Uuid::new_v4().to_string());
|
||||
|
||||
cmd.arg(&filename)
|
||||
.assert()
|
||||
.failure()
|
||||
.code(1)
|
||||
.stderr(contains(format!(
|
||||
"Error: Failed to read file: `{}`",
|
||||
filename
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_missing_file_ok_if_skip_missing() {
|
||||
let mut cmd = main_command();
|
||||
let filename = format!("non-existing-file-{}", uuid::Uuid::new_v4().to_string());
|
||||
|
||||
cmd.arg(&filename).arg("--skip-missing").assert().success();
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_glob() -> Result<()> {
|
||||
// using Result to be able to use `?`
|
||||
let mut cmd = main_command();
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let mock_server_a = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = get_mock_server(http::StatusCode::OK).await;
|
||||
let mut file_a = File::create(dir.path().join("a.md"))?;
|
||||
let mut file_b = File::create(dir.path().join("b.md"))?;
|
||||
|
||||
writeln!(file_a, "{}", mock_server_a.uri().as_str())?;
|
||||
writeln!(file_b, "{}", mock_server_b.uri().as_str())?;
|
||||
|
||||
cmd.arg(dir.path().join("*.md"))
|
||||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 2"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(target_os = "linux")] // MacOS and Windows have case-insensitive filesystems
|
||||
#[tokio::test]
|
||||
async fn test_glob_ignore_case() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let mock_server_a = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = get_mock_server(http::StatusCode::OK).await;
|
||||
let mut file_a = File::create(dir.path().join("README.md"))?;
|
||||
let mut file_b = File::create(dir.path().join("readme.md"))?;
|
||||
|
||||
writeln!(file_a, "{}", mock_server_a.uri().as_str())?;
|
||||
writeln!(file_b, "{}", mock_server_b.uri().as_str())?;
|
||||
|
||||
cmd.arg(dir.path().join("[r]eadme.md"))
|
||||
.arg("--verbose")
|
||||
.arg("--glob-ignore-case")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 2"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_glob_recursive() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let subdir_level_1 = tempfile::tempdir_in(&dir)?;
|
||||
let subdir_level_2 = tempfile::tempdir_in(&subdir_level_1)?;
|
||||
|
||||
let mock_server = get_mock_server(http::StatusCode::OK).await;
|
||||
let mut file = File::create(subdir_level_2.path().join("test.md"))?;
|
||||
|
||||
writeln!(file, "{}", mock_server.uri().as_str())?;
|
||||
|
||||
// ** should be a recursive glob
|
||||
cmd.arg(dir.path().join("**/*.md"))
|
||||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 1"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue