mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-16 03:10:58 +00:00
Merge pull request #145 from lycheeverse/simple-client
Improve lychee ergonomics
This commit is contained in:
commit
b8f24bfa3b
13 changed files with 277 additions and 130 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -1397,8 +1397,10 @@ dependencies = [
|
|||
"anyhow",
|
||||
"assert_cmd",
|
||||
"check-if-email-exists",
|
||||
"console",
|
||||
"deadpool",
|
||||
"derive_builder",
|
||||
"doc-comment",
|
||||
"futures",
|
||||
"glob",
|
||||
"headers",
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ serde_json = "1.0.62"
|
|||
# https://github.com/Homebrew/homebrew-core/pull/70216
|
||||
ring = "0.16.19"
|
||||
pad = "0.1.6"
|
||||
console = "0.14.0"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["gzip"]
|
||||
|
|
@ -66,6 +67,7 @@ assert_cmd = "1.0.3"
|
|||
predicates = "1.0.7"
|
||||
uuid = { version = "0.8.2", features = ["v4"] }
|
||||
tempfile = "3.2.0"
|
||||
doc-comment = "0.3.3"
|
||||
|
||||
[features]
|
||||
vendored-openssl = ["openssl-sys/vendored"]
|
||||
|
|
|
|||
39
README.md
39
README.md
|
|
@ -89,13 +89,13 @@ You can run lychee directly from the commandline.
|
|||
|
||||
#### Using cargo
|
||||
|
||||
```
|
||||
```sh
|
||||
cargo install lychee
|
||||
```
|
||||
|
||||
#### Using the official Docker image
|
||||
|
||||
```
|
||||
```sh
|
||||
docker pull lycheeverse/lychee
|
||||
```
|
||||
|
||||
|
|
@ -108,13 +108,13 @@ You can download them from the [releases page](https://github.com/lycheeverse/ly
|
|||
|
||||
Run it inside a repository with a `README.md`:
|
||||
|
||||
```
|
||||
```sh
|
||||
lychee
|
||||
```
|
||||
|
||||
You can also specify various types of inputs:
|
||||
|
||||
```
|
||||
```sh
|
||||
# check links on a website:
|
||||
lychee https://endler.dev/
|
||||
|
||||
|
|
@ -149,7 +149,7 @@ token with no extra permissions is enough to be able to check public repos links
|
|||
There is an extensive list of commandline parameters to customize the behavior,
|
||||
see below for a full list.
|
||||
|
||||
```
|
||||
```sh
|
||||
USAGE:
|
||||
lychee [FLAGS] [OPTIONS] [--] [inputs]...
|
||||
|
||||
|
|
@ -205,27 +205,37 @@ ARGS:
|
|||
## Library usage
|
||||
|
||||
You can use lychee as a library for your own projects.
|
||||
Simply add it as a dependency and build your client:
|
||||
Here is a "hello world" example:
|
||||
|
||||
```rust
|
||||
use lychee::{Request, Input, ClientBuilder, Status};
|
||||
use lychee::Uri::Website;
|
||||
use url::Url;
|
||||
use std::error::Error;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let response = lychee::check("https://github.com/lycheeverse/lychee").await?;
|
||||
println!("{}", response);
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
This is equivalent to the following snippet, in which we build our own client:
|
||||
|
||||
```rust
|
||||
use lychee::{ClientBuilder, Status};
|
||||
use std::error::Error;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let client = ClientBuilder::default().build()?;
|
||||
let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
let response = client.check(Request::new(Website(url), Input::Stdin)).await;
|
||||
let response = client.check("https://github.com/lycheeverse/lychee").await?;
|
||||
assert!(matches!(response.status, Status::Ok(_)));
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
The client is very customizable, e.g.
|
||||
The client builder is very customizable:
|
||||
|
||||
```rust
|
||||
```rust,ignore
|
||||
let client = lychee::ClientBuilder::default()
|
||||
.includes(includes)
|
||||
.excludes(excludes)
|
||||
|
|
@ -242,11 +252,12 @@ let client = lychee::ClientBuilder::default()
|
|||
.build()?;
|
||||
```
|
||||
|
||||
All options that you set will be used for all link checks.
|
||||
See the [builder documentation](https://docs.rs/lychee/latest/lychee/struct.ClientBuilder.html) for all options.
|
||||
|
||||
## GitHub Action usage
|
||||
|
||||
GitHub Action is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action)
|
||||
A GitHub Action that uses lychee is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action)
|
||||
which includes usage instructions.
|
||||
|
||||
## Troubleshooting and workarounds
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::{anyhow, Context, Result};
|
||||
use console::style;
|
||||
use headers::authorization::Basic;
|
||||
use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName};
|
||||
use indicatif::{ProgressBar, ProgressStyle};
|
||||
|
|
@ -15,7 +16,10 @@ mod stats;
|
|||
use crate::options::{Config, LycheeOptions};
|
||||
use crate::stats::ResponseStats;
|
||||
|
||||
use lychee::collector::{self, Input};
|
||||
use lychee::{
|
||||
collector::{self, Input},
|
||||
Status,
|
||||
};
|
||||
use lychee::{ClientBuilder, ClientPool, Response};
|
||||
|
||||
/// A C-like enum that can be cast to `i32` and used as process exit code.
|
||||
|
|
@ -49,7 +53,8 @@ fn run_main() -> Result<i32> {
|
|||
|
||||
let runtime = match cfg.threads {
|
||||
Some(threads) => {
|
||||
// We define our own runtime instead of the `tokio::main` attribute since we want to make the number of threads configurable
|
||||
// We define our own runtime instead of the `tokio::main` attribute
|
||||
// since we want to make the number of threads configurable
|
||||
tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(threads)
|
||||
.enable_all()
|
||||
|
|
@ -61,23 +66,37 @@ fn run_main() -> Result<i32> {
|
|||
runtime.block_on(run(cfg, opts.inputs()))
|
||||
}
|
||||
|
||||
fn color_response(response: &Response) -> String {
|
||||
let out = match response.status {
|
||||
Status::Ok(_) => style(response).green().bright(),
|
||||
Status::Redirected(_) => style(response),
|
||||
Status::Excluded => style(response).dim(),
|
||||
Status::Error(_) => style(response).yellow().bright(),
|
||||
Status::Timeout(_) => style(response).yellow().bright(),
|
||||
Status::Failed(_) => style(response).red().bright(),
|
||||
};
|
||||
out.to_string()
|
||||
}
|
||||
|
||||
fn show_progress(progress_bar: &Option<ProgressBar>, response: &Response, verbose: bool) {
|
||||
if (response.status.is_success() || response.status.is_excluded()) && !verbose {
|
||||
return;
|
||||
}
|
||||
// Regular println! interferes with progress bar
|
||||
if let Some(pb) = progress_bar {
|
||||
pb.inc(1);
|
||||
pb.println(response.to_string());
|
||||
pb.set_message(&response.to_string());
|
||||
if verbose {
|
||||
pb.println(color_response(response));
|
||||
}
|
||||
} else {
|
||||
println!("{}", response);
|
||||
if (response.status.is_success() || response.status.is_excluded()) && !verbose {
|
||||
return;
|
||||
}
|
||||
println!("{}", color_response(response));
|
||||
}
|
||||
}
|
||||
|
||||
fn fmt(stats: &ResponseStats, format: &Format) -> Result<String> {
|
||||
Ok(match format {
|
||||
Format::String => stats.to_string(),
|
||||
Format::JSON => serde_json::to_string_pretty(&stats)?,
|
||||
Format::Json => serde_json::to_string_pretty(&stats)?,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -122,14 +141,12 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
|||
.await?;
|
||||
|
||||
let pb = if cfg.progress {
|
||||
Some(
|
||||
ProgressBar::new(links.len() as u64)
|
||||
.with_style(
|
||||
ProgressStyle::default_bar()
|
||||
.template("{spinner:.green} [{elapsed_precise}] [{bar:40.cyan/blue}] {pos}/{len} ({eta}) {wide_msg}")
|
||||
.progress_chars("#>-")
|
||||
)
|
||||
)
|
||||
let bar =
|
||||
ProgressBar::new(links.len() as u64).with_style(ProgressStyle::default_bar().template(
|
||||
"{spinner:.red.bright} {pos}/{len:.dim} [{elapsed_precise}] {bar:25} {wide_msg}",
|
||||
));
|
||||
bar.enable_steady_tick(100);
|
||||
Some(bar)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
|
@ -164,7 +181,7 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
|||
// Note that print statements may interfere with the progress bar, so this
|
||||
// must go before printing the stats
|
||||
if let Some(pb) = &pb {
|
||||
pb.finish_and_clear();
|
||||
pb.finish_with_message("Done");
|
||||
}
|
||||
|
||||
let stats_formatted = fmt(&stats, &cfg.format)?;
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ const MAX_REDIRECTS: usize = 10;
|
|||
#[derive(Debug, Deserialize)]
|
||||
pub enum Format {
|
||||
String,
|
||||
JSON,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl FromStr for Format {
|
||||
|
|
@ -24,7 +24,7 @@ impl FromStr for Format {
|
|||
fn from_str(format: &str) -> Result<Self, Self::Err> {
|
||||
match format {
|
||||
"string" => Ok(Format::String),
|
||||
"json" => Ok(Format::JSON),
|
||||
"json" => Ok(Format::Json),
|
||||
_ => Err(anyhow!("Could not parse format {}", format)),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ fn write_stat(f: &mut fmt::Formatter, title: &str, stat: usize) -> fmt::Result {
|
|||
|
||||
impl Display for ResponseStats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let separator = "-".repeat(MAX_PADDING);
|
||||
let separator = "-".repeat(MAX_PADDING + 1);
|
||||
|
||||
writeln!(f, "📝 Summary")?;
|
||||
writeln!(f, "{}", separator)?;
|
||||
|
|
|
|||
166
src/client.rs
166
src/client.rs
|
|
@ -1,11 +1,11 @@
|
|||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use check_if_email_exists::{check_email, CheckEmailInput};
|
||||
use derive_builder::Builder;
|
||||
use headers::{HeaderMap, HeaderValue};
|
||||
use hubcaps::{Credentials, Github};
|
||||
use regex::{Regex, RegexSet};
|
||||
use reqwest::header;
|
||||
use std::net::IpAddr;
|
||||
use std::convert::TryInto;
|
||||
use std::{collections::HashSet, time::Duration};
|
||||
use tokio::time::sleep;
|
||||
use url::Url;
|
||||
|
|
@ -153,30 +153,32 @@ impl ClientBuilder {
|
|||
}
|
||||
|
||||
impl Client {
|
||||
pub async fn check(&self, request: Request) -> Response {
|
||||
pub async fn check<T: TryInto<Request>>(&self, request: T) -> Result<Response> {
|
||||
let request: Request = match request.try_into() {
|
||||
Ok(request) => request,
|
||||
Err(_e) => bail!("Invalid URI:"),
|
||||
};
|
||||
if self.excluded(&request) {
|
||||
return Response::new(request.uri, Status::Excluded, request.source);
|
||||
return Ok(Response::new(request.uri, Status::Excluded, request.source));
|
||||
}
|
||||
let status = match request.uri {
|
||||
Uri::Website(ref url) => self.check_website(&url).await,
|
||||
Uri::Mail(ref address) => {
|
||||
let valid = self.valid_mail(&address).await;
|
||||
if valid {
|
||||
// TODO: We should not be using a HTTP status code for mail
|
||||
Status::Ok(http::StatusCode::OK)
|
||||
} else {
|
||||
Status::Error(format!("Invalid mail address: {}", address))
|
||||
// TODO: We should not be using a HTTP status code for mail
|
||||
match self.valid_mail(&address).await {
|
||||
true => Status::Ok(http::StatusCode::OK),
|
||||
false => Status::Error(format!("Invalid mail address: {}", address)),
|
||||
}
|
||||
}
|
||||
};
|
||||
Response::new(request.uri, status, request.source)
|
||||
Ok(Response::new(request.uri, status, request.source))
|
||||
}
|
||||
|
||||
pub async fn check_website(&self, url: &Url) -> Status {
|
||||
let mut retries: i64 = 3;
|
||||
let mut wait: u64 = 1;
|
||||
let status = loop {
|
||||
let res = self.check_normal(&url).await;
|
||||
let res = self.check_default(&url).await;
|
||||
match res.is_success() {
|
||||
true => return res,
|
||||
false => {
|
||||
|
|
@ -216,7 +218,7 @@ impl Client {
|
|||
}
|
||||
}
|
||||
|
||||
async fn check_normal(&self, url: &Url) -> Status {
|
||||
async fn check_default(&self, url: &Url) -> Status {
|
||||
let request = self
|
||||
.reqwest_client
|
||||
.request(self.method.clone(), url.as_str());
|
||||
|
|
@ -251,63 +253,29 @@ impl Client {
|
|||
}
|
||||
}
|
||||
|
||||
fn in_regex_excludes(&self, input: &str) -> bool {
|
||||
if let Some(excludes) = &self.excludes.regex {
|
||||
if excludes.is_match(input) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn in_ip_excludes(&self, uri: &Uri) -> bool {
|
||||
if let Some(ipaddr) = uri.host_ip() {
|
||||
if self.excludes.loopback_ips && ipaddr.is_loopback() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: in a pathological case, an IPv6 address can be IPv4-mapped
|
||||
// (IPv4 address embedded in a IPv6). We purposefully
|
||||
// don't deal with it here, and assume if an address is IPv6,
|
||||
// we shouldn't attempt to map it to IPv4.
|
||||
// See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
|
||||
if let IpAddr::V4(v4addr) = ipaddr {
|
||||
if self.excludes.private_ips && v4addr.is_private() {
|
||||
return true;
|
||||
}
|
||||
if self.excludes.link_local_ips && v4addr.is_link_local() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub fn is_mail_excluded(&self) -> bool {
|
||||
self.excludes.mail
|
||||
}
|
||||
|
||||
pub fn excluded(&self, request: &Request) -> bool {
|
||||
if matches!(request.uri, Uri::Mail(_)) && self.excludes.is_mail_excluded() {
|
||||
return true;
|
||||
}
|
||||
if self.excludes.ip(&request.uri) {
|
||||
return true;
|
||||
}
|
||||
if let Some(includes) = &self.includes {
|
||||
if includes.is_empty() {
|
||||
return false;
|
||||
}
|
||||
if includes.is_match(request.uri.as_str()) {
|
||||
// Includes take precedence over excludes
|
||||
return false;
|
||||
} else {
|
||||
// In case we have includes and no excludes,
|
||||
// skip everything that was not included
|
||||
if self.excludes.regex.is_none() {
|
||||
if self.excludes.is_empty() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.in_regex_excludes(request.uri.as_str()) {
|
||||
return true;
|
||||
}
|
||||
if matches!(request.uri, Uri::Mail(_)) {
|
||||
return self.is_mail_excluded();
|
||||
}
|
||||
if self.in_ip_excludes(&request.uri) {
|
||||
if self.excludes.regex(request.uri.as_str()) {
|
||||
return true;
|
||||
}
|
||||
if self.scheme.is_none() {
|
||||
|
|
@ -317,6 +285,14 @@ impl Client {
|
|||
}
|
||||
}
|
||||
|
||||
/// A convenience function to check a single URI
|
||||
/// This is the most simple link check and avoids having to create a client manually.
|
||||
/// For more complex scenarios, look into using the `ClientBuilder` instead.
|
||||
pub async fn check<T: TryInto<Request>>(request: T) -> Result<Response> {
|
||||
let client = ClientBuilder::default().build()?;
|
||||
Ok(client.check(request).await?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::collector::Input;
|
||||
|
|
@ -366,8 +342,9 @@ mod test {
|
|||
let res = ClientBuilder::default()
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url(&mock_server.uri()))
|
||||
.await;
|
||||
.check(mock_server.uri())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Failed(_)));
|
||||
}
|
||||
|
||||
|
|
@ -385,7 +362,8 @@ mod test {
|
|||
.build()
|
||||
.unwrap()
|
||||
.check(website_url(&mock_server.uri()))
|
||||
.await;
|
||||
.await
|
||||
.unwrap();
|
||||
let end = start.elapsed();
|
||||
|
||||
assert!(matches!(res.status, Status::Failed(_)));
|
||||
|
|
@ -414,6 +392,7 @@ mod test {
|
|||
.unwrap()
|
||||
.check(website_url("https://github.com/lycheeverse/lychee"))
|
||||
.await
|
||||
.unwrap()
|
||||
.status,
|
||||
Status::Ok(_)
|
||||
));
|
||||
|
|
@ -424,8 +403,9 @@ mod test {
|
|||
let res = ClientBuilder::default()
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://github.com/lycheeverse/not-lychee"))
|
||||
.check("https://github.com/lycheeverse/not-lychee")
|
||||
.await
|
||||
.unwrap()
|
||||
.status;
|
||||
assert!(matches!(res, Status::Error(_)));
|
||||
}
|
||||
|
|
@ -444,6 +424,7 @@ mod test {
|
|||
.unwrap()
|
||||
.check(website_url(&mock_server.uri()))
|
||||
.await
|
||||
.unwrap()
|
||||
.status;
|
||||
assert!(matches!(res, Status::Ok(_)));
|
||||
}
|
||||
|
|
@ -453,8 +434,9 @@ mod test {
|
|||
let res = ClientBuilder::default()
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://expired.badssl.com/"))
|
||||
.await;
|
||||
.check("https://expired.badssl.com/")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Error(_)));
|
||||
|
||||
// Same, but ignore certificate error
|
||||
|
|
@ -462,8 +444,9 @@ mod test {
|
|||
.allow_insecure(true)
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://expired.badssl.com/"))
|
||||
.await;
|
||||
.check("https://expired.badssl.com/")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Ok(_)));
|
||||
}
|
||||
|
||||
|
|
@ -473,7 +456,8 @@ mod test {
|
|||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://crates.io/crates/lychee"))
|
||||
.await;
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Failed(StatusCode::NOT_FOUND)));
|
||||
|
||||
// Try again, but with a custom header.
|
||||
|
|
@ -486,7 +470,8 @@ mod test {
|
|||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://crates.io/crates/lychee"))
|
||||
.await;
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Ok(_)));
|
||||
}
|
||||
|
||||
|
|
@ -511,7 +496,7 @@ mod test {
|
|||
.build()
|
||||
.unwrap();
|
||||
|
||||
let resp = client.check(website_url(&mock_server.uri())).await;
|
||||
let resp = client.check(website_url(&mock_server.uri())).await.unwrap();
|
||||
assert!(matches!(resp.status, Status::Timeout(_)));
|
||||
}
|
||||
|
||||
|
|
@ -531,6 +516,49 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_includes_and_excludes_empty() {
|
||||
// This is the pre-configured, empty set of excludes for a client
|
||||
// In this case, only the requests matching the include set will be checked
|
||||
let exclude = Some(RegexSet::empty());
|
||||
let includes = RegexSet::empty();
|
||||
|
||||
let client = ClientBuilder::default()
|
||||
.includes(includes)
|
||||
.excludes(exclude)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
client.excluded(&website_url("https://foo.github.com")),
|
||||
false
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_include_with_empty_exclude() {
|
||||
// This is the pre-configured, empty set of excludes for a client
|
||||
// In this case, only the requests matching the include set will be checked
|
||||
let exclude = Some(RegexSet::empty());
|
||||
let includes = RegexSet::new(&[r"foo.github.com"]).unwrap();
|
||||
|
||||
let client = ClientBuilder::default()
|
||||
.includes(includes)
|
||||
.excludes(exclude)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
client.excluded(&website_url("https://foo.github.com")),
|
||||
false
|
||||
);
|
||||
assert_eq!(client.excluded(&website_url("https://github.com")), true);
|
||||
assert_eq!(
|
||||
client.excluded(&website_url("https://bar.github.com")),
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_exclude_include_regex() {
|
||||
let exclude = Some(RegexSet::new(&[r"github.com"]).unwrap());
|
||||
|
|
|
|||
|
|
@ -25,8 +25,10 @@ impl ClientPool {
|
|||
let client = self.pool.get().await;
|
||||
let tx = self.tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let resp = client.check(req).await;
|
||||
tx.send(resp).await.unwrap();
|
||||
let resp = client.check(req).await.expect("Invalid URI");
|
||||
tx.send(resp)
|
||||
.await
|
||||
.expect("Cannot send response to channel");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,9 @@
|
|||
use std::net::IpAddr;
|
||||
|
||||
use regex::RegexSet;
|
||||
|
||||
use crate::Uri;
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
/// You can ignore links based on regex patterns or pre-defined IP ranges.
|
||||
#[derive(Clone, Debug)]
|
||||
|
|
@ -27,3 +31,49 @@ impl Default for Excludes {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Excludes {
|
||||
pub fn regex(&self, input: &str) -> bool {
|
||||
if let Some(excludes) = &self.regex {
|
||||
if excludes.is_match(input) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
pub fn ip(&self, uri: &Uri) -> bool {
|
||||
if let Some(ipaddr) = uri.host_ip() {
|
||||
if self.loopback_ips && ipaddr.is_loopback() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Note: in a pathological case, an IPv6 address can be IPv4-mapped
|
||||
// (IPv4 address embedded in a IPv6). We purposefully
|
||||
// don't deal with it here, and assume if an address is IPv6,
|
||||
// we shouldn't attempt to map it to IPv4.
|
||||
// See: https://tools.ietf.org/html/rfc4291#section-2.5.5.2
|
||||
if let IpAddr::V4(v4addr) = ipaddr {
|
||||
if self.private_ips && v4addr.is_private() {
|
||||
return true;
|
||||
}
|
||||
if self.link_local_ips && v4addr.is_link_local() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub fn is_mail_excluded(&self) -> bool {
|
||||
self.mail
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match &self.regex {
|
||||
None => true,
|
||||
Some(regex_set) => regex_set.is_empty(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use url::Url;
|
|||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum FileType {
|
||||
HTML,
|
||||
Html,
|
||||
Markdown,
|
||||
Plaintext,
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ impl<P: AsRef<Path>> From<P> for FileType {
|
|||
match path.extension() {
|
||||
Some(ext) => match ext {
|
||||
_ if ext == "md" => FileType::Markdown,
|
||||
_ if (ext == "htm" || ext == "html") => FileType::HTML,
|
||||
_ if (ext == "htm" || ext == "html") => FileType::Html,
|
||||
_ => FileType::Plaintext,
|
||||
},
|
||||
None => FileType::Plaintext,
|
||||
|
|
@ -147,7 +147,7 @@ pub(crate) fn extract_links(
|
|||
) -> HashSet<Request> {
|
||||
let links = match input_content.file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(&input_content.content),
|
||||
FileType::HTML => extract_links_from_html(&input_content.content),
|
||||
FileType::Html => extract_links_from_html(&input_content.content),
|
||||
FileType::Plaintext => extract_links_from_plaintext(&input_content.content),
|
||||
};
|
||||
|
||||
|
|
@ -234,7 +234,7 @@ mod test {
|
|||
</html>"#;
|
||||
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(input, FileType::HTML),
|
||||
&InputContent::from_string(input, FileType::Html),
|
||||
Some(Url::parse("https://github.com/lycheeverse/").unwrap()),
|
||||
)
|
||||
.into_iter()
|
||||
|
|
@ -305,7 +305,7 @@ mod test {
|
|||
fn test_extract_html5_not_valid_xml() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -328,7 +328,7 @@ mod test {
|
|||
fn test_extract_html5_not_valid_xml_relative_links() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(&input, FileType::HTML),
|
||||
&InputContent::from_string(&input, FileType::Html),
|
||||
Some(Url::parse("https://example.com").unwrap()),
|
||||
)
|
||||
.into_iter()
|
||||
|
|
@ -357,7 +357,7 @@ mod test {
|
|||
// this has been problematic with previous XML based parser
|
||||
let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -375,7 +375,7 @@ mod test {
|
|||
// minified HTML with some quirky elements such as href attribute values specified without quotes
|
||||
let input = load_fixture("TEST_HTML5_MINIFIED.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -399,7 +399,7 @@ mod test {
|
|||
// malformed links shouldn't stop the parser from further parsing
|
||||
let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -419,7 +419,7 @@ mod test {
|
|||
// the element name shouldn't matter for attributes like href, src, cite etc
|
||||
let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
|
|||
33
src/lib.rs
33
src/lib.rs
|
|
@ -2,27 +2,43 @@
|
|||
|
||||
/**
|
||||
* `lychee` is a library for checking links.
|
||||
* The main struct of this crate is `ClientBuilder` which can be used to
|
||||
* configure and run your own link checker.
|
||||
*
|
||||
* "Hello world" example:
|
||||
* ```
|
||||
* use std::error::Error;
|
||||
*
|
||||
* use lychee::{Request, Input, ClientBuilder, Status};
|
||||
* use lychee::Uri::Website;
|
||||
* use url::Url;
|
||||
* #[tokio::main]
|
||||
* async fn main() -> Result<(), Box<dyn Error>> {
|
||||
* let response = lychee::check("https://github.com/lycheeverse/lychee").await?;
|
||||
* println!("{}", response);
|
||||
* Ok(())
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* For more specific use-cases you can build a lychee client yourself,
|
||||
* using the `ClientBuilder` which can be used to
|
||||
* configure and run your own link checker and grants full flexibility:
|
||||
*
|
||||
* ```
|
||||
* use lychee::{ClientBuilder, Status};
|
||||
* use std::error::Error;
|
||||
*
|
||||
* #[tokio::main]
|
||||
* async fn main() -> Result<(), Box<dyn Error>> {
|
||||
* let client = ClientBuilder::default().build()?;
|
||||
* let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
* let response = client.check(Request::new(Website(url), Input::Stdin)).await;
|
||||
* let response = client.check("https://github.com/lycheeverse/lychee").await?;
|
||||
* assert!(matches!(response.status, Status::Ok(_)));
|
||||
* Ok(())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
#[cfg(doctest)]
|
||||
#[macro_use]
|
||||
extern crate doc_comment;
|
||||
|
||||
#[cfg(doctest)]
|
||||
doctest!("../README.md");
|
||||
|
||||
mod client;
|
||||
mod client_pool;
|
||||
mod excludes;
|
||||
|
|
@ -33,6 +49,7 @@ pub mod collector;
|
|||
pub mod extract;
|
||||
pub mod test_utils;
|
||||
|
||||
pub use client::check;
|
||||
pub use client::ClientBuilder;
|
||||
pub use client_pool::ClientPool;
|
||||
pub use collector::Input;
|
||||
|
|
|
|||
26
src/types.rs
26
src/types.rs
|
|
@ -21,18 +21,36 @@ impl Display for Request {
|
|||
}
|
||||
}
|
||||
|
||||
impl TryFrom<String> for Request {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let uri = Uri::try_from(s.as_str())?;
|
||||
Ok(Request::new(uri, Input::String(s)))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Request {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(s: &str) -> Result<Self, Self::Error> {
|
||||
let uri = Uri::try_from(s)?;
|
||||
Ok(Request::new(uri, Input::String(s.to_owned())))
|
||||
}
|
||||
}
|
||||
|
||||
/// Specifies how requests to websites will be made
|
||||
pub(crate) enum RequestMethod {
|
||||
GET,
|
||||
HEAD,
|
||||
Get,
|
||||
Head,
|
||||
}
|
||||
|
||||
impl TryFrom<String> for RequestMethod {
|
||||
type Error = anyhow::Error;
|
||||
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||
match value.to_lowercase().as_ref() {
|
||||
"get" => Ok(RequestMethod::GET),
|
||||
"head" => Ok(RequestMethod::HEAD),
|
||||
"get" => Ok(RequestMethod::Get),
|
||||
"head" => Ok(RequestMethod::Head),
|
||||
_ => Err(anyhow!("Only `get` and `head` allowed, got {}", value)),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,13 +41,13 @@ mod readme {
|
|||
.expect("Invalid utf8 output for `--help`");
|
||||
let readme = load_readme_text();
|
||||
|
||||
const BACKTICKS_OFFSET: usize = 3;
|
||||
const BACKTICKS_OFFSET: usize = 5; // marker: ```sh
|
||||
const NEWLINE_OFFSET: usize = 1;
|
||||
|
||||
let usage_start = BACKTICKS_OFFSET
|
||||
+ NEWLINE_OFFSET
|
||||
+ readme
|
||||
.find("```\nUSAGE:\n")
|
||||
.find("```sh\nUSAGE:\n")
|
||||
.expect("Couldn't find USAGE section in README.md");
|
||||
|
||||
let usage_end = readme[usage_start..]
|
||||
Loading…
Reference in a new issue