Add globbing support

This commit is contained in:
Matthias Endler 2020-08-14 02:33:04 +02:00
parent e758056f60
commit 391144b2ff
6 changed files with 45 additions and 9 deletions

7
Cargo.lock generated
View file

@ -500,6 +500,12 @@ dependencies = [
"tokio-core",
]
[[package]]
name = "glob"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "gumdrop"
version = "0.8.0"
@ -826,6 +832,7 @@ dependencies = [
"anyhow",
"futures 0.3.5",
"github-rs",
"glob",
"gumdrop",
"http 0.1.21",
"linkify",

View file

@ -12,6 +12,7 @@ version = "0.2.0"
anyhow = "1.0.32"
futures = "0.3"
github-rs = "0.7.0"
glob = "*"
gumdrop = "0.8.0"
http = "0.1.21"
linkify = "0.4.0"

View file

@ -26,15 +26,14 @@ lychee can...
- disguise as a different user agent (like curl)
- optionally ignore SSL certificate errors
- run with a low memory/CPU footprint
- check multiple files at once
- check multiple files at once (supports globbing)
- support checking links from any website URL
- limit scheme (e.g. only check HTTPS links with "https")
SOON:
- automatically retry and backoff
- check relative and absolute paths
- support input files using wildcards
- check relative and absolute URLs
- set timeout for HTTP requests in seconds. Disabled by default.
- accept custom headers (see https://github.com/rust-lang/crates.io/issues/788)
- use `HEAD` requests instead of `GET` to avoid network I/O

15
fixtures/TEST.html Normal file
View file

@ -0,0 +1,15 @@
<html>
<head>
<title>Some more test links</title>
</head>
<body>
<ul>
<li>
<a href="https://endler.dev/2020/rust-compile-times/"
>Matthias Endler</a
>
</li>
<li><a href="https://hello-rust.show/10/">Hello Rust</a></li>
</ul>
</body>
</html>

View file

@ -151,8 +151,10 @@ impl Checker {
return true;
}
}
if Some(url.scheme().to_string()) != self.scheme {
return true;
if let Some(scheme) = &self.scheme {
if url.scheme() != scheme {
return true;
}
}
false
}

View file

@ -2,6 +2,7 @@
extern crate log;
use anyhow::Result;
use glob::glob;
use regex::RegexSet;
use std::env;
use std::{collections::HashSet, fs};
@ -96,14 +97,25 @@ async fn collect_links(inputs: Vec<String>) -> Result<HashSet<Url>> {
let mut links = HashSet::new();
for input in inputs {
let content = match Url::parse(&input) {
match Url::parse(&input) {
Ok(url) => {
let res = reqwest::get(url).await?;
res.text().await?
let content = res.text().await?;
links.extend(extract_links(&content));
}
Err(_) => {
// Assume we got a single file or a glob on our hands
for entry in glob(&input)? {
match entry {
Ok(path) => {
let content = fs::read_to_string(path)?;
links.extend(extract_links(&content));
}
Err(e) => println!("{:?}", e),
}
}
}
Err(_) => fs::read_to_string(input)?,
};
links.extend(extract_links(&content));
}
Ok(links)
}