mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-30 03:10:25 +00:00
Add globbing support
This commit is contained in:
parent
e758056f60
commit
391144b2ff
6 changed files with 45 additions and 9 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
|
@ -500,6 +500,12 @@ dependencies = [
|
|||
"tokio-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "gumdrop"
|
||||
version = "0.8.0"
|
||||
|
|
@ -826,6 +832,7 @@ dependencies = [
|
|||
"anyhow",
|
||||
"futures 0.3.5",
|
||||
"github-rs",
|
||||
"glob",
|
||||
"gumdrop",
|
||||
"http 0.1.21",
|
||||
"linkify",
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ version = "0.2.0"
|
|||
anyhow = "1.0.32"
|
||||
futures = "0.3"
|
||||
github-rs = "0.7.0"
|
||||
glob = "*"
|
||||
gumdrop = "0.8.0"
|
||||
http = "0.1.21"
|
||||
linkify = "0.4.0"
|
||||
|
|
|
|||
|
|
@ -26,15 +26,14 @@ lychee can...
|
|||
- disguise as a different user agent (like curl)
|
||||
- optionally ignore SSL certificate errors
|
||||
- run with a low memory/CPU footprint
|
||||
- check multiple files at once
|
||||
- check multiple files at once (supports globbing)
|
||||
- support checking links from any website URL
|
||||
- limit scheme (e.g. only check HTTPS links with "https")
|
||||
|
||||
SOON:
|
||||
|
||||
- automatically retry and backoff
|
||||
- check relative and absolute paths
|
||||
- support input files using wildcards
|
||||
- check relative and absolute URLs
|
||||
- set timeout for HTTP requests in seconds. Disabled by default.
|
||||
- accept custom headers (see https://github.com/rust-lang/crates.io/issues/788)
|
||||
- use `HEAD` requests instead of `GET` to avoid network I/O
|
||||
|
|
|
|||
15
fixtures/TEST.html
Normal file
15
fixtures/TEST.html
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
<html>
|
||||
<head>
|
||||
<title>Some more test links</title>
|
||||
</head>
|
||||
<body>
|
||||
<ul>
|
||||
<li>
|
||||
<a href="https://endler.dev/2020/rust-compile-times/"
|
||||
>Matthias Endler</a
|
||||
>
|
||||
</li>
|
||||
<li><a href="https://hello-rust.show/10/">Hello Rust</a></li>
|
||||
</ul>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -151,8 +151,10 @@ impl Checker {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
if Some(url.scheme().to_string()) != self.scheme {
|
||||
return true;
|
||||
if let Some(scheme) = &self.scheme {
|
||||
if url.scheme() != scheme {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
|
|
|||
20
src/main.rs
20
src/main.rs
|
|
@ -2,6 +2,7 @@
|
|||
extern crate log;
|
||||
|
||||
use anyhow::Result;
|
||||
use glob::glob;
|
||||
use regex::RegexSet;
|
||||
use std::env;
|
||||
use std::{collections::HashSet, fs};
|
||||
|
|
@ -96,14 +97,25 @@ async fn collect_links(inputs: Vec<String>) -> Result<HashSet<Url>> {
|
|||
let mut links = HashSet::new();
|
||||
|
||||
for input in inputs {
|
||||
let content = match Url::parse(&input) {
|
||||
match Url::parse(&input) {
|
||||
Ok(url) => {
|
||||
let res = reqwest::get(url).await?;
|
||||
res.text().await?
|
||||
let content = res.text().await?;
|
||||
links.extend(extract_links(&content));
|
||||
}
|
||||
Err(_) => {
|
||||
// Assume we got a single file or a glob on our hands
|
||||
for entry in glob(&input)? {
|
||||
match entry {
|
||||
Ok(path) => {
|
||||
let content = fs::read_to_string(path)?;
|
||||
links.extend(extract_links(&content));
|
||||
}
|
||||
Err(e) => println!("{:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(_) => fs::read_to_string(input)?,
|
||||
};
|
||||
links.extend(extract_links(&content));
|
||||
}
|
||||
Ok(links)
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue