mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-17 05:00:26 +00:00
Merge pull request #143 from lycheeverse/input-source
Show input source in status output
This commit is contained in:
commit
ae2d02b8a0
15 changed files with 512 additions and 255 deletions
12
Cargo.lock
generated
12
Cargo.lock
generated
|
|
@ -1392,7 +1392,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "lychee"
|
||||
version = "0.5.0"
|
||||
version = "0.6.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"assert_cmd",
|
||||
|
|
@ -1411,6 +1411,7 @@ dependencies = [
|
|||
"markup5ever",
|
||||
"markup5ever_rcdom",
|
||||
"openssl-sys",
|
||||
"pad",
|
||||
"predicates",
|
||||
"pulldown-cmark",
|
||||
"regex",
|
||||
|
|
@ -1690,6 +1691,15 @@ dependencies = [
|
|||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pad"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.0.0"
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ keywords = [
|
|||
license = "Apache-2.0/MIT"
|
||||
name = "lychee"
|
||||
repository = "https://github.com/lycheeverse/lychee"
|
||||
version = "0.5.0"
|
||||
version = "0.6.0"
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.38"
|
||||
|
|
@ -51,6 +51,7 @@ serde_json = "1.0.62"
|
|||
# This is necessary for the homebrew build
|
||||
# https://github.com/Homebrew/homebrew-core/pull/70216
|
||||
ring = "0.16.19"
|
||||
pad = "0.1.6"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["gzip"]
|
||||
|
|
|
|||
21
README.md
21
README.md
|
|
@ -174,7 +174,7 @@ OPTIONS:
|
|||
--basic-auth <basic-auth> Basic authentication support. E.g. `username:password`
|
||||
-c, --config <config-file> Configuration file to use [default: ./lychee.toml]
|
||||
--exclude <exclude>... Exclude URLs from checking (supports regex)
|
||||
-f, --format <format> Output file format of status report [default: string]
|
||||
-f, --format <format> Output file format of status report (json, string) [default: string]
|
||||
--github-token <github-token> GitHub API token to use when checking github.com links, to avoid rate
|
||||
limiting [env: GITHUB_TOKEN=]
|
||||
-h, --headers <headers>... Custom request headers
|
||||
|
|
@ -187,7 +187,7 @@ OPTIONS:
|
|||
-T, --threads <threads> Number of threads to utilize. Defaults to number of cores available to
|
||||
the system
|
||||
-t, --timeout <timeout> Website timeout from connect to response finished [default: 20]
|
||||
-u, --user-agent <user-agent> User agent [default: lychee/0.5.0]
|
||||
-u, --user-agent <user-agent> User agent [default: lychee/0.6.0]
|
||||
|
||||
ARGS:
|
||||
<inputs>... The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs
|
||||
|
|
@ -208,12 +208,19 @@ You can use lychee as a library for your own projects.
|
|||
Simply add it as a dependency and build your client:
|
||||
|
||||
```rust
|
||||
use http::StatusCode
|
||||
use lychee::{Request, Input, ClientBuilder, Status};
|
||||
use lychee::Uri::Website;
|
||||
use url::Url;
|
||||
use std::error::Error;
|
||||
|
||||
let client = lychee::ClientBuilder::default().build()?;
|
||||
let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
let response = client.check(Website(url)).await?;
|
||||
assert!(matches!(response.status, Status::Ok(_)));
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let client = ClientBuilder::default().build()?;
|
||||
let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
let response = client.check(Request::new(Website(url), Input::Stdin)).await;
|
||||
assert!(matches!(response.status, Status::Ok(_)));
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
The client is very customizable, e.g.
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ use crate::options::{Config, LycheeOptions};
|
|||
use crate::stats::ResponseStats;
|
||||
|
||||
use lychee::collector::{self, Input};
|
||||
use lychee::{ClientBuilder, ClientPool, Response, Status};
|
||||
use lychee::{ClientBuilder, ClientPool, Response};
|
||||
|
||||
/// A C-like enum that can be cast to `i32` and used as process exit code.
|
||||
enum ExitCode {
|
||||
|
|
@ -62,22 +62,22 @@ fn run_main() -> Result<i32> {
|
|||
}
|
||||
|
||||
fn show_progress(progress_bar: &Option<ProgressBar>, response: &Response, verbose: bool) {
|
||||
let message = status_message(&response, verbose);
|
||||
if (response.status.is_success() || response.status.is_excluded()) && !verbose {
|
||||
return;
|
||||
}
|
||||
// Regular println! interferes with progress bar
|
||||
if let Some(pb) = progress_bar {
|
||||
pb.inc(1);
|
||||
// regular println! interferes with progress bar
|
||||
if let Some(message) = message {
|
||||
pb.println(message);
|
||||
}
|
||||
} else if let Some(message) = message {
|
||||
println!("{}", message);
|
||||
};
|
||||
pb.println(response.to_string());
|
||||
} else {
|
||||
println!("{}", response);
|
||||
}
|
||||
}
|
||||
|
||||
fn fmt(stats: &ResponseStats, format: &Format) -> Result<String> {
|
||||
Ok(match format {
|
||||
Format::String => stats.to_string(),
|
||||
Format::JSON => serde_json::to_string(&stats)?,
|
||||
Format::JSON => serde_json::to_string_pretty(&stats)?,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
@ -120,6 +120,7 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
|||
max_concurrency,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let pb = if cfg.progress {
|
||||
Some(
|
||||
ProgressBar::new(links.len() as u64)
|
||||
|
|
@ -166,13 +167,11 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
|||
pb.finish_and_clear();
|
||||
}
|
||||
|
||||
if cfg.verbose {
|
||||
println!("\n{}", stats);
|
||||
}
|
||||
|
||||
let stats_formatted = fmt(&stats, &cfg.format)?;
|
||||
if let Some(output) = &cfg.output {
|
||||
fs::write(output, fmt(&stats, &cfg.format)?)
|
||||
.context("Cannot write status output to file")?;
|
||||
fs::write(output, stats_formatted).context("Cannot write status output to file")?;
|
||||
} else {
|
||||
println!("\n{}", stats_formatted);
|
||||
}
|
||||
|
||||
match stats.is_success() {
|
||||
|
|
@ -228,18 +227,6 @@ fn parse_basic_auth(auth: &str) -> Result<Authorization<Basic>> {
|
|||
Ok(Authorization::basic(params[0], params[1]))
|
||||
}
|
||||
|
||||
fn status_message(response: &Response, verbose: bool) -> Option<String> {
|
||||
match &response.status {
|
||||
Status::Ok(code) if verbose => Some(format!("✅ {} [{}]", response.uri, code)),
|
||||
Status::Redirected if verbose => Some(format!("🔀️ {}", response.uri)),
|
||||
Status::Excluded if verbose => Some(format!("👻 {}", response.uri)),
|
||||
Status::Failed(code) => Some(format!("🚫 {} [{}]", response.uri, code)),
|
||||
Status::Error(e) => Some(format!("⚡ {} ({})", response.uri, e)),
|
||||
Status::Timeout => Some(format!("⌛ {}", response.uri)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -239,7 +239,7 @@ pub struct Config {
|
|||
#[serde(default)]
|
||||
pub output: Option<PathBuf>,
|
||||
|
||||
/// Output file format of status report
|
||||
/// Output file format of status report (json, string)
|
||||
#[structopt(short, long, default_value = "string")]
|
||||
#[serde(default)]
|
||||
pub format: Format,
|
||||
|
|
|
|||
|
|
@ -1,65 +1,153 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use pad::{Alignment, PadStr};
|
||||
use serde::Serialize;
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
collections::{HashMap, HashSet},
|
||||
fmt::{self, Display},
|
||||
};
|
||||
|
||||
use lychee::{Response, Status::*, Uri};
|
||||
use lychee::{collector::Input, Response, Status::*};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
// Maximum padding for each entry in the final statistics output
|
||||
const MAX_PADDING: usize = 20;
|
||||
|
||||
#[derive(Serialize)]
|
||||
pub struct ResponseStats {
|
||||
total: usize,
|
||||
successful: usize,
|
||||
failures: HashSet<Uri>,
|
||||
timeouts: HashSet<Uri>,
|
||||
redirects: HashSet<Uri>,
|
||||
excludes: HashSet<Uri>,
|
||||
errors: HashSet<Uri>,
|
||||
failures: usize,
|
||||
timeouts: usize,
|
||||
redirects: usize,
|
||||
excludes: usize,
|
||||
errors: usize,
|
||||
fail_map: HashMap<Input, HashSet<Response>>,
|
||||
}
|
||||
|
||||
impl ResponseStats {
|
||||
pub fn new() -> Self {
|
||||
let fail_map = HashMap::new();
|
||||
ResponseStats {
|
||||
total: 0,
|
||||
successful: 0,
|
||||
failures: HashSet::new(),
|
||||
timeouts: HashSet::new(),
|
||||
redirects: HashSet::new(),
|
||||
excludes: HashSet::new(),
|
||||
errors: HashSet::new(),
|
||||
failures: 0,
|
||||
timeouts: 0,
|
||||
redirects: 0,
|
||||
excludes: 0,
|
||||
errors: 0,
|
||||
fail_map,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, response: Response) {
|
||||
self.total += 1;
|
||||
let uri = response.uri;
|
||||
if !match response.status {
|
||||
Failed(_) => self.failures.insert(uri),
|
||||
Timeout => self.timeouts.insert(uri),
|
||||
Redirected => self.redirects.insert(uri),
|
||||
Excluded => self.excludes.insert(uri),
|
||||
Error(_) => self.errors.insert(uri),
|
||||
_ => false,
|
||||
} {
|
||||
self.successful += 1;
|
||||
match response.status {
|
||||
Failed(_) => self.failures += 1,
|
||||
Timeout(_) => self.timeouts += 1,
|
||||
Redirected(_) => self.redirects += 1,
|
||||
Excluded => self.excludes += 1,
|
||||
Error(_) => self.errors += 1,
|
||||
_ => self.successful += 1,
|
||||
}
|
||||
|
||||
if matches!(
|
||||
response.status,
|
||||
Failed(_) | Timeout(_) | Redirected(_) | Error(_)
|
||||
) {
|
||||
let fail = self.fail_map.entry(response.source.clone()).or_default();
|
||||
fail.insert(response);
|
||||
};
|
||||
}
|
||||
|
||||
pub fn is_success(&self) -> bool {
|
||||
self.total == self.successful + self.excludes.len()
|
||||
self.total == self.successful + self.excludes
|
||||
}
|
||||
}
|
||||
|
||||
fn write_stat(f: &mut fmt::Formatter, title: &str, stat: usize) -> fmt::Result {
|
||||
let fill = title.chars().count();
|
||||
f.write_str(title)?;
|
||||
f.write_str(
|
||||
&stat
|
||||
.to_string()
|
||||
.pad(MAX_PADDING - fill, '.', Alignment::Right, false),
|
||||
)?;
|
||||
f.write_str("\n")
|
||||
}
|
||||
|
||||
impl Display for ResponseStats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let separator = "-".repeat(MAX_PADDING);
|
||||
|
||||
writeln!(f, "📝 Summary")?;
|
||||
writeln!(f, "-------------------")?;
|
||||
writeln!(f, "🔍 Total: {}", self.total)?;
|
||||
writeln!(f, "✅ Successful: {}", self.successful)?;
|
||||
writeln!(f, "⏳ Timeouts: {}", self.timeouts.len())?;
|
||||
writeln!(f, "🔀 Redirected: {}", self.redirects.len())?;
|
||||
writeln!(f, "👻 Excluded: {}", self.excludes.len())?;
|
||||
writeln!(f, "🚫 Errors: {}", self.errors.len() + self.failures.len())
|
||||
writeln!(f, "{}", separator)?;
|
||||
write_stat(f, "🔍 Total", self.total)?;
|
||||
write_stat(f, "✅ Successful", self.successful)?;
|
||||
write_stat(f, "⏳ Timeouts", self.timeouts)?;
|
||||
write_stat(f, "🔀 Redirected", self.redirects)?;
|
||||
write_stat(f, "👻 Excluded", self.excludes)?;
|
||||
write_stat(f, "🚫 Errors", self.errors + self.failures)?;
|
||||
|
||||
if !&self.fail_map.is_empty() {
|
||||
writeln!(f)?;
|
||||
}
|
||||
for (input, responses) in &self.fail_map {
|
||||
writeln!(f, "Input: {}", input)?;
|
||||
for response in responses {
|
||||
writeln!(
|
||||
f,
|
||||
" {} {}\n {}",
|
||||
response.status.icon(),
|
||||
response.uri,
|
||||
response.status
|
||||
)?
|
||||
}
|
||||
}
|
||||
writeln!(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_super {
|
||||
use lychee::{test_utils::website, Status};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_stats() {
|
||||
let mut stats = ResponseStats::new();
|
||||
stats.add(Response {
|
||||
uri: website("http://example.com/ok"),
|
||||
status: Status::Ok(http::StatusCode::OK),
|
||||
source: Input::Stdin,
|
||||
});
|
||||
stats.add(Response {
|
||||
uri: website("http://example.com/failed"),
|
||||
status: Status::Failed(http::StatusCode::BAD_GATEWAY),
|
||||
source: Input::Stdin,
|
||||
});
|
||||
stats.add(Response {
|
||||
uri: website("http://example.com/redirect"),
|
||||
status: Status::Redirected(http::StatusCode::PERMANENT_REDIRECT),
|
||||
source: Input::Stdin,
|
||||
});
|
||||
let mut expected_map = HashMap::new();
|
||||
expected_map.insert(
|
||||
Input::Stdin,
|
||||
vec![
|
||||
Response {
|
||||
uri: website("http://example.com/failed"),
|
||||
status: Status::Failed(http::StatusCode::BAD_GATEWAY),
|
||||
source: Input::Stdin,
|
||||
},
|
||||
Response {
|
||||
uri: website("http://example.com/redirect"),
|
||||
status: Status::Redirected(http::StatusCode::PERMANENT_REDIRECT),
|
||||
source: Input::Stdin,
|
||||
},
|
||||
]
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>(),
|
||||
);
|
||||
assert_eq!(stats.fail_map, expected_map);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
127
src/client.rs
127
src/client.rs
|
|
@ -10,9 +10,9 @@ use std::{collections::HashSet, time::Duration};
|
|||
use tokio::time::sleep;
|
||||
use url::Url;
|
||||
|
||||
use crate::excludes::Excludes;
|
||||
use crate::types::{Response, Status};
|
||||
use crate::uri::Uri;
|
||||
use crate::{excludes::Excludes, Request};
|
||||
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
const DEFAULT_MAX_REDIRECTS: usize = 5;
|
||||
|
|
@ -153,6 +153,52 @@ impl ClientBuilder {
|
|||
}
|
||||
|
||||
impl Client {
|
||||
pub async fn check(&self, request: Request) -> Response {
|
||||
if self.excluded(&request) {
|
||||
return Response::new(request.uri, Status::Excluded, request.source);
|
||||
}
|
||||
let status = match request.uri {
|
||||
Uri::Website(ref url) => self.check_website(&url).await,
|
||||
Uri::Mail(ref address) => {
|
||||
let valid = self.valid_mail(&address).await;
|
||||
if valid {
|
||||
// TODO: We should not be using a HTTP status code for mail
|
||||
Status::Ok(http::StatusCode::OK)
|
||||
} else {
|
||||
Status::Error(format!("Invalid mail address: {}", address))
|
||||
}
|
||||
}
|
||||
};
|
||||
Response::new(request.uri, status, request.source)
|
||||
}
|
||||
|
||||
pub async fn check_website(&self, url: &Url) -> Status {
|
||||
let mut retries: i64 = 3;
|
||||
let mut wait: u64 = 1;
|
||||
let status = loop {
|
||||
let res = self.check_normal(&url).await;
|
||||
match res.is_success() {
|
||||
true => return res,
|
||||
false => {
|
||||
if retries > 0 {
|
||||
retries -= 1;
|
||||
sleep(Duration::from_secs(wait)).await;
|
||||
wait *= 2;
|
||||
} else {
|
||||
break res;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
// Pull out the heavy weapons in case of a failed normal request.
|
||||
// This could be a Github URL and we run into the rate limiter.
|
||||
if let Ok((owner, repo)) = self.extract_github(url.as_str()) {
|
||||
return self.check_github(owner, repo).await;
|
||||
}
|
||||
|
||||
status
|
||||
}
|
||||
|
||||
async fn check_github(&self, owner: String, repo: String) -> Status {
|
||||
match &self.github {
|
||||
Some(github) => {
|
||||
|
|
@ -189,33 +235,6 @@ impl Client {
|
|||
Ok((owner.as_str().into(), repo.as_str().into()))
|
||||
}
|
||||
|
||||
pub async fn check_real(&self, url: &Url) -> Status {
|
||||
let mut retries: i64 = 3;
|
||||
let mut wait: u64 = 1;
|
||||
let status = loop {
|
||||
let res = self.check_normal(&url).await;
|
||||
match res.is_success() {
|
||||
true => return res,
|
||||
false => {
|
||||
if retries > 0 {
|
||||
retries -= 1;
|
||||
sleep(Duration::from_secs(wait)).await;
|
||||
wait *= 2;
|
||||
} else {
|
||||
break res;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
// Pull out the heavy weapons in case of a failed normal request.
|
||||
// This could be a Github URL and we run into the rate limiter.
|
||||
if let Ok((owner, repo)) = self.extract_github(url.as_str()) {
|
||||
return self.check_github(owner, repo).await;
|
||||
}
|
||||
|
||||
status
|
||||
}
|
||||
|
||||
pub async fn valid_mail(&self, address: &str) -> bool {
|
||||
let input = CheckEmailInput::new(vec![address.to_string()]);
|
||||
let results = check_email(&input).await;
|
||||
|
|
@ -269,9 +288,9 @@ impl Client {
|
|||
self.excludes.mail
|
||||
}
|
||||
|
||||
pub fn excluded(&self, uri: &Uri) -> bool {
|
||||
pub fn excluded(&self, request: &Request) -> bool {
|
||||
if let Some(includes) = &self.includes {
|
||||
if includes.is_match(uri.as_str()) {
|
||||
if includes.is_match(request.uri.as_str()) {
|
||||
// Includes take precedence over excludes
|
||||
return false;
|
||||
} else {
|
||||
|
|
@ -282,43 +301,26 @@ impl Client {
|
|||
}
|
||||
}
|
||||
}
|
||||
if self.in_regex_excludes(uri.as_str()) {
|
||||
if self.in_regex_excludes(request.uri.as_str()) {
|
||||
return true;
|
||||
}
|
||||
if matches!(uri, Uri::Mail(_)) {
|
||||
if matches!(request.uri, Uri::Mail(_)) {
|
||||
return self.is_mail_excluded();
|
||||
}
|
||||
if self.in_ip_excludes(&uri) {
|
||||
if self.in_ip_excludes(&request.uri) {
|
||||
return true;
|
||||
}
|
||||
if self.scheme.is_none() {
|
||||
return false;
|
||||
}
|
||||
uri.scheme() != self.scheme
|
||||
}
|
||||
|
||||
pub async fn check(&self, uri: Uri) -> Response {
|
||||
if self.excluded(&uri) {
|
||||
return Response::new(uri, Status::Excluded);
|
||||
}
|
||||
let status = match uri {
|
||||
Uri::Website(ref url) => self.check_real(&url).await,
|
||||
Uri::Mail(ref address) => {
|
||||
let valid = self.valid_mail(&address).await;
|
||||
if valid {
|
||||
// TODO: We should not be using a HTTP status code for mail
|
||||
Status::Ok(http::StatusCode::OK)
|
||||
} else {
|
||||
Status::Error(format!("Invalid mail address: {}", address))
|
||||
}
|
||||
}
|
||||
};
|
||||
Response::new(uri, status)
|
||||
request.uri.scheme() != self.scheme
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::collector::Input;
|
||||
|
||||
use super::*;
|
||||
use http::StatusCode;
|
||||
use std::time::{Duration, Instant};
|
||||
|
|
@ -345,8 +347,11 @@ mod test {
|
|||
const V6_MAPPED_V4_PRIVATE_CLASS_A: &str = "http://[::ffff:10.0.0.1]";
|
||||
const V6_MAPPED_V4_LINK_LOCAL: &str = "http://[::ffff:169.254.0.1]";
|
||||
|
||||
fn website_url(s: &str) -> Uri {
|
||||
Uri::Website(Url::parse(s).expect("Expected valid Website URI"))
|
||||
fn website_url(s: &str) -> Request {
|
||||
Request::new(
|
||||
Uri::Website(Url::parse(s).expect("Expected valid Website URI")),
|
||||
Input::Stdin,
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -507,7 +512,7 @@ mod test {
|
|||
.unwrap();
|
||||
|
||||
let resp = client.check(website_url(&mock_server.uri())).await;
|
||||
assert!(matches!(resp.status, Status::Timeout));
|
||||
assert!(matches!(resp.status, Status::Timeout(_)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -558,11 +563,17 @@ mod test {
|
|||
assert_eq!(client.excluded(&website_url("http://github.com")), true);
|
||||
assert_eq!(client.excluded(&website_url("http://exclude.org")), true);
|
||||
assert_eq!(
|
||||
client.excluded(&Uri::Mail("mail@example.com".to_string())),
|
||||
client.excluded(&Request::new(
|
||||
Uri::Mail("mail@example.com".to_string()),
|
||||
Input::Stdin,
|
||||
)),
|
||||
true
|
||||
);
|
||||
assert_eq!(
|
||||
client.excluded(&Uri::Mail("foo@bar.dev".to_string())),
|
||||
client.excluded(&Request::new(
|
||||
Uri::Mail("foo@bar.dev".to_string()),
|
||||
Input::Stdin,
|
||||
)),
|
||||
false
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,19 +2,18 @@ use client::Client;
|
|||
use deadpool::unmanaged::Pool;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::uri;
|
||||
use crate::{client, types};
|
||||
|
||||
pub struct ClientPool {
|
||||
tx: mpsc::Sender<types::Response>,
|
||||
rx: mpsc::Receiver<uri::Uri>,
|
||||
rx: mpsc::Receiver<types::Request>,
|
||||
pool: deadpool::unmanaged::Pool<client::Client>,
|
||||
}
|
||||
|
||||
impl ClientPool {
|
||||
pub fn new(
|
||||
tx: mpsc::Sender<types::Response>,
|
||||
rx: mpsc::Receiver<uri::Uri>,
|
||||
rx: mpsc::Receiver<types::Request>,
|
||||
clients: Vec<Client>,
|
||||
) -> Self {
|
||||
let pool = Pool::from(clients);
|
||||
|
|
|
|||
|
|
@ -1,18 +1,21 @@
|
|||
use crate::extract::{extract_links, FileType};
|
||||
use crate::uri::Uri;
|
||||
use crate::{
|
||||
extract::{extract_links, FileType},
|
||||
Request,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use glob::glob_with;
|
||||
use reqwest::Url;
|
||||
use serde::Serialize;
|
||||
use shellexpand::tilde;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::{collections::HashSet, fmt::Display};
|
||||
use tokio::fs::read_to_string;
|
||||
use tokio::io::{stdin, AsyncReadExt};
|
||||
|
||||
const STDIN: &str = "-";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
#[non_exhaustive]
|
||||
pub enum Input {
|
||||
RemoteUrl(Url),
|
||||
|
|
@ -22,6 +25,40 @@ pub enum Input {
|
|||
String(String),
|
||||
}
|
||||
|
||||
impl Serialize for Input {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
serializer.collect_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Input {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Input::RemoteUrl(url) => {
|
||||
write!(f, "{}", url)
|
||||
}
|
||||
Input::FsGlob {
|
||||
pattern,
|
||||
ignore_case: _,
|
||||
} => {
|
||||
write!(f, "{}", pattern)
|
||||
}
|
||||
Input::FsPath(path) => {
|
||||
write!(f, "{}", path.to_str().unwrap_or_default())
|
||||
}
|
||||
Input::Stdin => {
|
||||
write!(f, "stdin")
|
||||
}
|
||||
Input::String(_) => {
|
||||
write!(f, "raw input string")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct InputContent {
|
||||
pub input: Input,
|
||||
|
|
@ -157,18 +194,6 @@ impl Input {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToString for Input {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Self::RemoteUrl(url) => url.to_string(),
|
||||
Self::FsGlob { pattern, .. } => pattern.clone(),
|
||||
Self::FsPath(p) => p.to_str().unwrap_or_default().to_owned(),
|
||||
Self::Stdin => STDIN.to_owned(),
|
||||
Self::String(s) => s.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all unique links from a slice of inputs
|
||||
/// All relative URLs get prefixed with `base_url` if given.
|
||||
pub async fn collect_links(
|
||||
|
|
@ -176,7 +201,7 @@ pub async fn collect_links(
|
|||
base_url: Option<String>,
|
||||
skip_missing_inputs: bool,
|
||||
max_concurrency: usize,
|
||||
) -> Result<HashSet<Uri>> {
|
||||
) -> Result<HashSet<Request>> {
|
||||
let base_url = match base_url {
|
||||
Some(url) => Some(Url::parse(&url)?),
|
||||
_ => None,
|
||||
|
|
@ -213,7 +238,7 @@ pub async fn collect_links(
|
|||
// instead of building a HashSet with all links.
|
||||
// This optimization would speed up cases where there's
|
||||
// a lot of inputs and/or the inputs are large (e.g. big files).
|
||||
let mut collected_links = HashSet::new();
|
||||
let mut collected_links: HashSet<Request> = HashSet::new();
|
||||
|
||||
for handle in extract_links_handles {
|
||||
let links = handle.await?;
|
||||
|
|
@ -226,7 +251,10 @@ pub async fn collect_links(
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test_utils::get_mock_server_with_content;
|
||||
use crate::{
|
||||
test_utils::{get_mock_server_with_content, website},
|
||||
Uri,
|
||||
};
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::str::FromStr;
|
||||
|
|
@ -264,13 +292,17 @@ mod test {
|
|||
},
|
||||
];
|
||||
|
||||
let links = collect_links(&inputs, None, false, 8).await?;
|
||||
let responses = collect_links(&inputs, None, false, 8).await?;
|
||||
let links = responses
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect::<HashSet<Uri>>();
|
||||
|
||||
let mut expected_links = HashSet::new();
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_STRING)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_URL)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_FILE)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_GLOB_1)?));
|
||||
let mut expected_links: HashSet<Uri> = HashSet::new();
|
||||
expected_links.insert(website(TEST_STRING));
|
||||
expected_links.insert(website(TEST_URL));
|
||||
expected_links.insert(website(TEST_FILE));
|
||||
expected_links.insert(website(TEST_GLOB_1));
|
||||
expected_links.insert(Uri::Mail(TEST_GLOB_2_MAIL.to_string()));
|
||||
|
||||
assert_eq!(links, expected_links);
|
||||
|
|
|
|||
157
src/extract.rs
157
src/extract.rs
|
|
@ -1,5 +1,5 @@
|
|||
use crate::collector::InputContent;
|
||||
use crate::uri::Uri;
|
||||
use crate::{collector::InputContent, Request};
|
||||
use html5ever::parse_document;
|
||||
use html5ever::tendril::{StrTendril, TendrilSink};
|
||||
use linkify::LinkFinder;
|
||||
|
|
@ -141,7 +141,10 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn extract_links(input_content: &InputContent, base_url: Option<Url>) -> HashSet<Uri> {
|
||||
pub(crate) fn extract_links(
|
||||
input_content: &InputContent,
|
||||
base_url: Option<Url>,
|
||||
) -> HashSet<Request> {
|
||||
let links = match input_content.file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(&input_content.content),
|
||||
FileType::HTML => extract_links_from_html(&input_content.content),
|
||||
|
|
@ -150,28 +153,33 @@ pub(crate) fn extract_links(input_content: &InputContent, base_url: Option<Url>)
|
|||
|
||||
// Only keep legit URLs. This sorts out things like anchors.
|
||||
// Silently ignore the parse failures for now.
|
||||
let mut uris = HashSet::new();
|
||||
let mut requests: HashSet<Request> = HashSet::new();
|
||||
for link in links {
|
||||
match Uri::try_from(link.as_str()) {
|
||||
Ok(uri) => {
|
||||
uris.insert(uri);
|
||||
requests.insert(Request::new(uri, input_content.input.clone()));
|
||||
}
|
||||
Err(_) => {
|
||||
if !Path::new(&link).exists() {
|
||||
if let Some(base_url) = &base_url {
|
||||
if let Ok(new_url) = base_url.join(&link) {
|
||||
uris.insert(Uri::Website(new_url));
|
||||
requests.insert(Request::new(
|
||||
Uri::Website(new_url),
|
||||
input_content.input.clone(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
uris
|
||||
requests
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::test_utils::website;
|
||||
|
||||
use super::*;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Read};
|
||||
|
|
@ -197,17 +205,18 @@ mod test {
|
|||
#[test]
|
||||
fn test_extract_markdown_links() {
|
||||
let input = "This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)";
|
||||
let links = extract_links(
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(input, FileType::Markdown),
|
||||
Some(Url::parse("https://github.com/hello-rust/lychee/").unwrap()),
|
||||
);
|
||||
)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
assert_eq!(
|
||||
links,
|
||||
[
|
||||
Uri::Website(Url::parse("https://endler.dev").unwrap()),
|
||||
Uri::Website(
|
||||
Url::parse("https://github.com/hello-rust/lychee/relative_link").unwrap()
|
||||
)
|
||||
website("https://endler.dev"),
|
||||
website("https://github.com/hello-rust/lychee/relative_link"),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
|
|
@ -219,23 +228,28 @@ mod test {
|
|||
fn test_extract_html_links() {
|
||||
let input = r#"<html>
|
||||
<div class="row">
|
||||
<a href="https://github.com/hello-rust/lychee/">
|
||||
<a href="https://github.com/lycheeverse/lychee/">
|
||||
<a href="blob/master/README.md">README</a>
|
||||
</div>
|
||||
</html>"#;
|
||||
|
||||
let links = extract_links(
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(input, FileType::HTML),
|
||||
Some(Url::parse("https://github.com/hello-rust/").unwrap()),
|
||||
);
|
||||
Some(Url::parse("https://github.com/lycheeverse/").unwrap()),
|
||||
)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
links
|
||||
.get(&Uri::Website(
|
||||
Url::parse("https://github.com/hello-rust/blob/master/README.md").unwrap()
|
||||
))
|
||||
.is_some(),
|
||||
true
|
||||
links,
|
||||
[
|
||||
website("https://github.com/lycheeverse/lychee/"),
|
||||
website("https://github.com/lycheeverse/blob/master/README.md"),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<HashSet<Uri>>(),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -257,15 +271,21 @@ mod test {
|
|||
fn test_non_markdown_links() {
|
||||
let input =
|
||||
"https://endler.dev and https://hello-rust.show/foo/bar?lol=1 at test@example.com";
|
||||
let links = extract_links(&InputContent::from_string(input, FileType::Plaintext), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(input, FileType::Plaintext), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected = [
|
||||
Uri::Website(Url::parse("https://endler.dev").unwrap()),
|
||||
Uri::Website(Url::parse("https://hello-rust.show/foo/bar?lol=1").unwrap()),
|
||||
website("https://endler.dev"),
|
||||
website("https://hello-rust.show/foo/bar?lol=1"),
|
||||
Uri::Mail("test@example.com".to_string()),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
assert_eq!(links, expected)
|
||||
}
|
||||
|
||||
|
|
@ -284,14 +304,18 @@ mod test {
|
|||
#[test]
|
||||
fn test_extract_html5_not_valid_xml() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/head/home").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/css/style_full_url.css").unwrap()),
|
||||
website("https://example.com/head/home"),
|
||||
website("https://example.com/css/style_full_url.css"),
|
||||
// the body links wouldn't be present if the file was parsed strictly as XML
|
||||
Uri::Website(Url::parse("https://example.com/body/a").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/body/div_empty_a").unwrap()),
|
||||
website("https://example.com/body/a"),
|
||||
website("https://example.com/body/div_empty_a"),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
|
|
@ -303,20 +327,23 @@ mod test {
|
|||
#[test]
|
||||
fn test_extract_html5_not_valid_xml_relative_links() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links = extract_links(
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(&input, FileType::HTML),
|
||||
Some(Url::parse("https://example.com").unwrap()),
|
||||
);
|
||||
)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/head/home").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/images/icon.png").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/css/style_relative_url.css").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/css/style_full_url.css").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/js/script.js").unwrap()),
|
||||
website("https://example.com/head/home"),
|
||||
website("https://example.com/images/icon.png"),
|
||||
website("https://example.com/css/style_relative_url.css"),
|
||||
website("https://example.com/css/style_full_url.css"),
|
||||
website("https://example.com/js/script.js"),
|
||||
// the body links wouldn't be present if the file was parsed strictly as XML
|
||||
Uri::Website(Url::parse("https://example.com/body/a").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/body/div_empty_a").unwrap()),
|
||||
website("https://example.com/body/a"),
|
||||
website("https://example.com/body/div_empty_a"),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
|
|
@ -329,14 +356,16 @@ mod test {
|
|||
fn test_extract_html5_lowercase_doctype() {
|
||||
// this has been problematic with previous XML based parser
|
||||
let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [Uri::Website(
|
||||
Url::parse("https://example.com/body/a").unwrap(),
|
||||
)]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
let expected_links = [website("https://example.com/body/a")]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
assert_eq!(links, expected_links);
|
||||
}
|
||||
|
|
@ -345,14 +374,18 @@ mod test {
|
|||
fn test_extract_html5_minified() {
|
||||
// minified HTML with some quirky elements such as href attribute values specified without quotes
|
||||
let input = load_fixture("TEST_HTML5_MINIFIED.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/favicon.ico").unwrap()),
|
||||
Uri::Website(Url::parse("https://fonts.externalsite.com").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/docs/").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/forum").unwrap()),
|
||||
website("https://example.com/"),
|
||||
website("https://example.com/favicon.ico"),
|
||||
website("https://fonts.externalsite.com"),
|
||||
website("https://example.com/docs/"),
|
||||
website("https://example.com/forum"),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
|
|
@ -365,7 +398,11 @@ mod test {
|
|||
fn test_extract_html5_malformed() {
|
||||
// malformed links shouldn't stop the parser from further parsing
|
||||
let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [Uri::Website(
|
||||
Url::parse("https://example.com/valid").unwrap(),
|
||||
|
|
@ -381,13 +418,17 @@ mod test {
|
|||
fn test_extract_html5_custom_elements() {
|
||||
// the element name shouldn't matter for attributes like href, src, cite etc
|
||||
let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/some-weird-element").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/even-weirder-src").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/even-weirder-href").unwrap()),
|
||||
Uri::Website(Url::parse("https://example.com/citations").unwrap()),
|
||||
website("https://example.com/some-weird-element"),
|
||||
website("https://example.com/even-weirder-src"),
|
||||
website("https://example.com/even-weirder-href"),
|
||||
website("https://example.com/citations"),
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
* "Hello world" example:
|
||||
* ```
|
||||
*
|
||||
* use lychee::{ClientBuilder, Status};
|
||||
* use lychee::{Request, Input, ClientBuilder, Status};
|
||||
* use lychee::Uri::Website;
|
||||
* use url::Url;
|
||||
* use std::error::Error;
|
||||
|
|
@ -17,7 +17,7 @@
|
|||
* async fn main() -> Result<(), Box<dyn Error>> {
|
||||
* let client = ClientBuilder::default().build()?;
|
||||
* let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
* let response = client.check(Website(url)).await;
|
||||
* let response = client.check(Request::new(Website(url), Input::Stdin)).await;
|
||||
* assert!(matches!(response.status, Status::Ok(_)));
|
||||
* Ok(())
|
||||
* }
|
||||
|
|
@ -35,6 +35,7 @@ pub mod test_utils;
|
|||
|
||||
pub use client::ClientBuilder;
|
||||
pub use client_pool::ClientPool;
|
||||
pub use collector::Input;
|
||||
pub use excludes::Excludes;
|
||||
pub use types::*;
|
||||
pub use uri::Uri;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
use http::StatusCode;
|
||||
use reqwest::Url;
|
||||
use wiremock::matchers::path;
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
use crate::Uri;
|
||||
|
||||
#[allow(unused)]
|
||||
pub async fn get_mock_server<S>(response_code: S) -> MockServer
|
||||
where
|
||||
|
|
@ -30,3 +33,7 @@ where
|
|||
|
||||
mock_server
|
||||
}
|
||||
|
||||
pub fn website(url: &str) -> Uri {
|
||||
Uri::Website(Url::parse(url).unwrap())
|
||||
}
|
||||
|
|
|
|||
110
src/types.rs
110
src/types.rs
|
|
@ -1,6 +1,25 @@
|
|||
use crate::uri::Uri;
|
||||
use crate::{collector::Input, uri::Uri};
|
||||
use anyhow::anyhow;
|
||||
use std::{collections::HashSet, convert::TryFrom};
|
||||
use serde::{Serialize, Serializer};
|
||||
use std::{collections::HashSet, convert::TryFrom, fmt::Display};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
pub struct Request {
|
||||
pub uri: Uri,
|
||||
pub source: Input,
|
||||
}
|
||||
|
||||
impl Request {
|
||||
pub fn new(uri: Uri, source: Input) -> Self {
|
||||
Request { uri, source }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Request {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.uri, self.source)
|
||||
}
|
||||
}
|
||||
|
||||
/// Specifies how requests to websites will be made
|
||||
pub(crate) enum RequestMethod {
|
||||
|
|
@ -19,35 +38,80 @@ impl TryFrom<String> for RequestMethod {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Serialize)]
|
||||
pub struct Response {
|
||||
#[serde(flatten)]
|
||||
pub uri: Uri,
|
||||
pub status: Status,
|
||||
#[serde(skip)]
|
||||
pub source: Input,
|
||||
}
|
||||
|
||||
impl Response {
|
||||
pub fn new(uri: Uri, status: Status) -> Self {
|
||||
Response { uri, status }
|
||||
pub fn new(uri: Uri, status: Status, source: Input) -> Self {
|
||||
Response {
|
||||
uri,
|
||||
status,
|
||||
source,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Response {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let metadata = match &self.status {
|
||||
Status::Ok(code) | Status::Redirected(code) | Status::Failed(code) => {
|
||||
format!(" [{}]", code)
|
||||
}
|
||||
Status::Timeout(code) if code.is_some() => format!(" [{}]", code.unwrap()),
|
||||
Status::Error(e) => format!(" ({})", e),
|
||||
_ => "".to_string(),
|
||||
};
|
||||
write!(f, "{} {}{}", self.status.icon(), self.uri, metadata)
|
||||
}
|
||||
}
|
||||
|
||||
/// Response status of the request
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Hash, PartialEq, Eq)]
|
||||
pub enum Status {
|
||||
/// Request was successful
|
||||
Ok(http::StatusCode),
|
||||
/// Request failed with HTTP error code
|
||||
Failed(http::StatusCode),
|
||||
/// Request timed out
|
||||
Timeout,
|
||||
Timeout(Option<http::StatusCode>),
|
||||
/// Got redirected to different resource
|
||||
Redirected,
|
||||
Redirected(http::StatusCode),
|
||||
/// Resource was excluded from checking
|
||||
Excluded,
|
||||
/// Low-level error while loading resource
|
||||
Error(String),
|
||||
}
|
||||
|
||||
impl Display for Status {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let out = match self {
|
||||
Status::Ok(c) => format!("OK ({})", c),
|
||||
Status::Redirected(c) => format!("Redirect ({})", c),
|
||||
Status::Excluded => "Excluded".to_string(),
|
||||
Status::Failed(c) => format!("Failed ({})", c),
|
||||
Status::Error(e) => format!("Runtime error ({})", e),
|
||||
Status::Timeout(Some(c)) => format!("Timeout ({})", c),
|
||||
Status::Timeout(None) => "Timeout".to_string(),
|
||||
};
|
||||
write!(f, "{}", out)
|
||||
}
|
||||
}
|
||||
|
||||
impl Serialize for Status {
|
||||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||||
where
|
||||
S: Serializer,
|
||||
{
|
||||
serializer.collect_str(self)
|
||||
}
|
||||
}
|
||||
|
||||
impl Status {
|
||||
pub fn new(statuscode: http::StatusCode, accepted: Option<HashSet<http::StatusCode>>) -> Self {
|
||||
if let Some(true) = accepted.map(|a| a.contains(&statuscode)) {
|
||||
|
|
@ -55,7 +119,7 @@ impl Status {
|
|||
} else if statuscode.is_success() {
|
||||
Status::Ok(statuscode)
|
||||
} else if statuscode.is_redirection() {
|
||||
Status::Redirected
|
||||
Status::Redirected(statuscode)
|
||||
} else {
|
||||
Status::Failed(statuscode)
|
||||
}
|
||||
|
|
@ -64,12 +128,27 @@ impl Status {
|
|||
pub fn is_success(&self) -> bool {
|
||||
matches!(self, Status::Ok(_))
|
||||
}
|
||||
|
||||
pub fn is_excluded(&self) -> bool {
|
||||
matches!(self, Status::Excluded)
|
||||
}
|
||||
|
||||
pub fn icon(&self) -> &str {
|
||||
match self {
|
||||
Status::Ok(_) => "✅",
|
||||
Status::Redirected(_) => "🔀️",
|
||||
Status::Excluded => "👻",
|
||||
Status::Failed(_) => "🚫",
|
||||
Status::Error(_) => "⚡",
|
||||
Status::Timeout(_) => "⌛",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<reqwest::Error> for Status {
|
||||
fn from(e: reqwest::Error) -> Self {
|
||||
if e.is_timeout() {
|
||||
Status::Timeout
|
||||
Status::Timeout(e.status())
|
||||
} else {
|
||||
Status::Error(e.to_string())
|
||||
}
|
||||
|
|
@ -78,22 +157,19 @@ impl From<reqwest::Error> for Status {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test_utils::website;
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v4() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4"));
|
||||
let uri = website("http://127.0.0.1");
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv4");
|
||||
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v6() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("https://[2020::0010]").expect("Expected URI with valid IPv6"));
|
||||
let uri = website("https://[2020::0010]");
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv6");
|
||||
assert_eq!(
|
||||
ip,
|
||||
|
|
@ -103,7 +179,7 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_uri_host_ip_no_ip() {
|
||||
let uri = Uri::Website(Url::parse("https://some.cryptic/url").expect("Expected valid URI"));
|
||||
let uri = website("https://some.cryptic/url");
|
||||
let ip = uri.host_ip();
|
||||
assert!(ip.is_none());
|
||||
}
|
||||
|
|
|
|||
15
src/uri.rs
15
src/uri.rs
|
|
@ -65,7 +65,7 @@ impl Display for Uri {
|
|||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use reqwest::Url;
|
||||
use crate::test_utils::website;
|
||||
|
||||
use super::*;
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
|
||||
|
|
@ -75,7 +75,7 @@ mod test {
|
|||
assert!(matches!(Uri::try_from(""), Err(_)));
|
||||
assert_eq!(
|
||||
Uri::try_from("http://example.com").unwrap(),
|
||||
Uri::Website(url::Url::parse("http://example.com").unwrap())
|
||||
website("http://example.com")
|
||||
);
|
||||
assert_eq!(
|
||||
Uri::try_from("mail@example.com").unwrap(),
|
||||
|
|
@ -89,16 +89,14 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v4() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4"));
|
||||
let uri = website("http://127.0.0.1");
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv4");
|
||||
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v6() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("https://[2020::0010]").expect("Expected URI with valid IPv6"));
|
||||
let uri = website("https://[2020::0010]");
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv6");
|
||||
assert_eq!(
|
||||
ip,
|
||||
|
|
@ -108,15 +106,14 @@ mod test {
|
|||
|
||||
#[test]
|
||||
fn test_uri_host_ip_no_ip() {
|
||||
let uri = Uri::Website(Url::parse("https://some.cryptic/url").expect("Expected valid URI"));
|
||||
let uri = website("https://some.cryptic/url");
|
||||
let ip = uri.host_ip();
|
||||
assert!(ip.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mail() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4"));
|
||||
let uri = website("http://127.0.0.1");
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv4");
|
||||
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
|
||||
}
|
||||
|
|
|
|||
26
tests/cli.rs
26
tests/cli.rs
|
|
@ -30,10 +30,10 @@ mod cli {
|
|||
.arg(test_all_private_path)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 7"))
|
||||
.stdout(contains("Excluded: 7"))
|
||||
.stdout(contains("Successful: 0"))
|
||||
.stdout(contains("Errors: 0"));
|
||||
.stdout(contains("Total............7"))
|
||||
.stdout(contains("Excluded.........7"))
|
||||
.stdout(contains("Successful.......0"))
|
||||
.stdout(contains("Errors...........0"));
|
||||
}
|
||||
|
||||
/// Test that a GitHub link can be checked without specifying the token.
|
||||
|
|
@ -46,10 +46,10 @@ mod cli {
|
|||
.arg(test_github_path)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 1"))
|
||||
.stdout(contains("Excluded: 0"))
|
||||
.stdout(contains("Successful: 1"))
|
||||
.stdout(contains("Errors: 0"));
|
||||
.stdout(contains("Total............1"))
|
||||
.stdout(contains("Excluded.........0"))
|
||||
.stdout(contains("Successful.......1"))
|
||||
.stdout(contains("Errors...........0"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -164,7 +164,7 @@ mod cli {
|
|||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 2"));
|
||||
.stdout(contains("Total............2"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -188,7 +188,7 @@ mod cli {
|
|||
.arg("--glob-ignore-case")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 2"));
|
||||
.stdout(contains("Total............2"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -211,7 +211,7 @@ mod cli {
|
|||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 1"));
|
||||
.stdout(contains("Total............1"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -231,9 +231,9 @@ mod cli {
|
|||
.assert()
|
||||
.success();
|
||||
|
||||
let expected = r##"{"total":11,"successful":11,"failures":[],"timeouts":[],"redirects":[],"excludes":[],"errors":[]}"##;
|
||||
let expected = r##"{"total":11,"successful":11,"failures":0,"timeouts":0,"redirects":0,"excludes":0,"errors":0,"fail_map":{}}"##;
|
||||
let output = fs::read_to_string(&outfile)?;
|
||||
assert_eq!(output, expected);
|
||||
assert_eq!(output.split_whitespace().collect::<String>(), expected);
|
||||
fs::remove_file(outfile)?;
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue