diff --git a/Cargo.lock b/Cargo.lock index b81f71a..2931082 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -500,6 +500,12 @@ dependencies = [ "tokio-core", ] +[[package]] +name = "glob" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574" + [[package]] name = "gumdrop" version = "0.8.0" @@ -826,6 +832,7 @@ dependencies = [ "anyhow", "futures 0.3.5", "github-rs", + "glob", "gumdrop", "http 0.1.21", "linkify", diff --git a/Cargo.toml b/Cargo.toml index 8123650..be17c5d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ version = "0.2.0" anyhow = "1.0.32" futures = "0.3" github-rs = "0.7.0" +glob = "*" gumdrop = "0.8.0" http = "0.1.21" linkify = "0.4.0" diff --git a/README.md b/README.md index 7e3df8e..b0e9053 100644 --- a/README.md +++ b/README.md @@ -26,15 +26,14 @@ lychee can... - disguise as a different user agent (like curl) - optionally ignore SSL certificate errors - run with a low memory/CPU footprint -- check multiple files at once +- check multiple files at once (supports globbing) - support checking links from any website URL - limit scheme (e.g. only check HTTPS links with "https") SOON: - automatically retry and backoff -- check relative and absolute paths -- support input files using wildcards +- check relative and absolute URLs - set timeout for HTTP requests in seconds. Disabled by default. - accept custom headers (see https://github.com/rust-lang/crates.io/issues/788) - use `HEAD` requests instead of `GET` to avoid network I/O diff --git a/fixtures/TEST.html b/fixtures/TEST.html new file mode 100644 index 0000000..4415bbc --- /dev/null +++ b/fixtures/TEST.html @@ -0,0 +1,15 @@ + + + Some more test links + + + + + diff --git a/src/checker.rs b/src/checker.rs index 66729ee..73a978a 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -151,8 +151,10 @@ impl Checker { return true; } } - if Some(url.scheme().to_string()) != self.scheme { - return true; + if let Some(scheme) = &self.scheme { + if url.scheme() != scheme { + return true; + } } false } diff --git a/src/main.rs b/src/main.rs index 5acad49..d4f1cb5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -2,6 +2,7 @@ extern crate log; use anyhow::Result; +use glob::glob; use regex::RegexSet; use std::env; use std::{collections::HashSet, fs}; @@ -96,14 +97,25 @@ async fn collect_links(inputs: Vec) -> Result> { let mut links = HashSet::new(); for input in inputs { - let content = match Url::parse(&input) { + match Url::parse(&input) { Ok(url) => { let res = reqwest::get(url).await?; - res.text().await? + let content = res.text().await?; + links.extend(extract_links(&content)); + } + Err(_) => { + // Assume we got a single file or a glob on our hands + for entry in glob(&input)? { + match entry { + Ok(path) => { + let content = fs::read_to_string(path)?; + links.extend(extract_links(&content)); + } + Err(e) => println!("{:?}", e), + } + } } - Err(_) => fs::read_to_string(input)?, }; - links.extend(extract_links(&content)); } Ok(links) }