mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Show input source in status output
If an error occurs during link checking, it is important to know where the error occured. Therefore the request and response objects now contain a the input source as a field. This makes error tracking easier.
This commit is contained in:
parent
a633afc6b9
commit
4bec47904e
10 changed files with 263 additions and 118 deletions
10
Cargo.lock
generated
10
Cargo.lock
generated
|
|
@ -1411,6 +1411,7 @@ dependencies = [
|
|||
"markup5ever",
|
||||
"markup5ever_rcdom",
|
||||
"openssl-sys",
|
||||
"pad",
|
||||
"predicates",
|
||||
"pulldown-cmark",
|
||||
"regex",
|
||||
|
|
@ -1690,6 +1691,15 @@ dependencies = [
|
|||
"vcpkg",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pad"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2ad9b889f1b12e0b9ee24db044b5129150d5eada288edc800f789928dc8c0e3"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking"
|
||||
version = "2.0.0"
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ serde_json = "1.0.62"
|
|||
# This is necessary for the homebrew build
|
||||
# https://github.com/Homebrew/homebrew-core/pull/70216
|
||||
ring = "0.16.19"
|
||||
pad = "0.1.6"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["gzip"]
|
||||
|
|
|
|||
|
|
@ -231,11 +231,14 @@ fn parse_basic_auth(auth: &str) -> Result<Authorization<Basic>> {
|
|||
fn status_message(response: &Response, verbose: bool) -> Option<String> {
|
||||
match &response.status {
|
||||
Status::Ok(code) if verbose => Some(format!("✅ {} [{}]", response.uri, code)),
|
||||
Status::Redirected if verbose => Some(format!("🔀️ {}", response.uri)),
|
||||
Status::Redirected(code) if verbose => Some(format!("🔀️ {} [{}]", response.uri, code)),
|
||||
Status::Excluded if verbose => Some(format!("👻 {}", response.uri)),
|
||||
Status::Failed(code) => Some(format!("🚫 {} [{}]", response.uri, code)),
|
||||
Status::Error(e) => Some(format!("⚡ {} ({})", response.uri, e)),
|
||||
Status::Timeout => Some(format!("⌛ {}", response.uri)),
|
||||
Status::Timeout(code) => match code {
|
||||
Some(c) => Some(format!("⌛ {} [{}]", response.uri, c)),
|
||||
None => Some(format!("⌛ {}", response.uri)),
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,65 +1,98 @@
|
|||
use pad::{Alignment, PadStr};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
collections::HashMap,
|
||||
fmt::{self, Display},
|
||||
};
|
||||
|
||||
use lychee::{Response, Status::*, Uri};
|
||||
use lychee::{collector::Input, Response, Status::*, Uri};
|
||||
|
||||
// Maximum padding for each entry in the final statistics output
|
||||
const MAX_PADDING: usize = 20;
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct ResponseStats {
|
||||
total: usize,
|
||||
successful: usize,
|
||||
failures: HashSet<Uri>,
|
||||
timeouts: HashSet<Uri>,
|
||||
redirects: HashSet<Uri>,
|
||||
excludes: HashSet<Uri>,
|
||||
errors: HashSet<Uri>,
|
||||
failures: usize,
|
||||
timeouts: usize,
|
||||
redirects: usize,
|
||||
excludes: usize,
|
||||
errors: usize,
|
||||
fail_map: HashMap<Input, Vec<Uri>>,
|
||||
}
|
||||
|
||||
impl ResponseStats {
|
||||
pub fn new() -> Self {
|
||||
let fail_map = HashMap::new();
|
||||
ResponseStats {
|
||||
total: 0,
|
||||
successful: 0,
|
||||
failures: HashSet::new(),
|
||||
timeouts: HashSet::new(),
|
||||
redirects: HashSet::new(),
|
||||
excludes: HashSet::new(),
|
||||
errors: HashSet::new(),
|
||||
failures: 0,
|
||||
timeouts: 0,
|
||||
redirects: 0,
|
||||
excludes: 0,
|
||||
errors: 0,
|
||||
fail_map,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn add(&mut self, response: Response) {
|
||||
self.total += 1;
|
||||
let uri = response.uri;
|
||||
if !match response.status {
|
||||
Failed(_) => self.failures.insert(uri),
|
||||
Timeout => self.timeouts.insert(uri),
|
||||
Redirected => self.redirects.insert(uri),
|
||||
Excluded => self.excludes.insert(uri),
|
||||
Error(_) => self.errors.insert(uri),
|
||||
_ => false,
|
||||
} {
|
||||
self.successful += 1;
|
||||
match response.status {
|
||||
Failed(_) => self.failures += 1,
|
||||
Timeout(_) => self.timeouts += 1,
|
||||
Redirected(_) => self.redirects += 1,
|
||||
Excluded => self.excludes += 1,
|
||||
Error(_) => self.errors += 1,
|
||||
_ => self.successful += 1,
|
||||
}
|
||||
|
||||
if matches!(response.status, Failed(_)) {
|
||||
let fail = self.fail_map.entry(response.source).or_default();
|
||||
fail.push(response.uri);
|
||||
};
|
||||
}
|
||||
|
||||
pub fn is_success(&self) -> bool {
|
||||
self.total == self.successful + self.excludes.len()
|
||||
self.total == self.successful + self.excludes
|
||||
}
|
||||
}
|
||||
|
||||
fn write_stat(f: &mut fmt::Formatter, title: &str, stat: usize) -> fmt::Result {
|
||||
let fill = title.chars().count();
|
||||
f.write_str(title)?;
|
||||
f.write_str(
|
||||
&stat
|
||||
.to_string()
|
||||
.pad(MAX_PADDING - fill, '.', Alignment::Right, false),
|
||||
)?;
|
||||
f.write_str("\n")
|
||||
}
|
||||
|
||||
impl Display for ResponseStats {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let separator = "-".repeat(MAX_PADDING);
|
||||
|
||||
writeln!(f, "📝 Summary")?;
|
||||
writeln!(f, "-------------------")?;
|
||||
writeln!(f, "🔍 Total: {}", self.total)?;
|
||||
writeln!(f, "✅ Successful: {}", self.successful)?;
|
||||
writeln!(f, "⏳ Timeouts: {}", self.timeouts.len())?;
|
||||
writeln!(f, "🔀 Redirected: {}", self.redirects.len())?;
|
||||
writeln!(f, "👻 Excluded: {}", self.excludes.len())?;
|
||||
writeln!(f, "🚫 Errors: {}", self.errors.len() + self.failures.len())
|
||||
writeln!(f, "{}", separator)?;
|
||||
write_stat(f, "🔍 Total", self.total)?;
|
||||
write_stat(f, "✅ Successful", self.successful)?;
|
||||
write_stat(f, "⏳ Timeouts", self.timeouts)?;
|
||||
write_stat(f, "🔀 Redirected", self.redirects)?;
|
||||
write_stat(f, "👻 Excluded", self.excludes)?;
|
||||
write_stat(f, "🚫 Errors", self.errors + self.failures)?;
|
||||
|
||||
if !&self.fail_map.is_empty() {
|
||||
writeln!(f, "")?;
|
||||
}
|
||||
for (input, uris) in &self.fail_map {
|
||||
writeln!(f, "❯❯ {}", input)?;
|
||||
for uri in uris {
|
||||
writeln!(f, " {}", uri)?
|
||||
}
|
||||
}
|
||||
writeln!(f, "")
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,9 +10,9 @@ use std::{collections::HashSet, time::Duration};
|
|||
use tokio::time::sleep;
|
||||
use url::Url;
|
||||
|
||||
use crate::excludes::Excludes;
|
||||
use crate::types::{Response, Status};
|
||||
use crate::uri::Uri;
|
||||
use crate::{excludes::Excludes, Request};
|
||||
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
const DEFAULT_MAX_REDIRECTS: usize = 5;
|
||||
|
|
@ -269,9 +269,9 @@ impl Client {
|
|||
self.excludes.mail
|
||||
}
|
||||
|
||||
pub fn excluded(&self, uri: &Uri) -> bool {
|
||||
pub fn excluded(&self, request: &Request) -> bool {
|
||||
if let Some(includes) = &self.includes {
|
||||
if includes.is_match(uri.as_str()) {
|
||||
if includes.is_match(request.uri.as_str()) {
|
||||
// Includes take precedence over excludes
|
||||
return false;
|
||||
} else {
|
||||
|
|
@ -282,26 +282,26 @@ impl Client {
|
|||
}
|
||||
}
|
||||
}
|
||||
if self.in_regex_excludes(uri.as_str()) {
|
||||
if self.in_regex_excludes(request.uri.as_str()) {
|
||||
return true;
|
||||
}
|
||||
if matches!(uri, Uri::Mail(_)) {
|
||||
if matches!(request.uri, Uri::Mail(_)) {
|
||||
return self.is_mail_excluded();
|
||||
}
|
||||
if self.in_ip_excludes(&uri) {
|
||||
if self.in_ip_excludes(&request.uri) {
|
||||
return true;
|
||||
}
|
||||
if self.scheme.is_none() {
|
||||
return false;
|
||||
}
|
||||
uri.scheme() != self.scheme
|
||||
request.uri.scheme() != self.scheme
|
||||
}
|
||||
|
||||
pub async fn check(&self, uri: Uri) -> Response {
|
||||
if self.excluded(&uri) {
|
||||
return Response::new(uri, Status::Excluded);
|
||||
pub async fn check(&self, request: Request) -> Response {
|
||||
if self.excluded(&request) {
|
||||
return Response::new(request.uri, Status::Excluded, request.source);
|
||||
}
|
||||
let status = match uri {
|
||||
let status = match request.uri {
|
||||
Uri::Website(ref url) => self.check_real(&url).await,
|
||||
Uri::Mail(ref address) => {
|
||||
let valid = self.valid_mail(&address).await;
|
||||
|
|
@ -313,12 +313,14 @@ impl Client {
|
|||
}
|
||||
}
|
||||
};
|
||||
Response::new(uri, status)
|
||||
Response::new(request.uri, status, request.source)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::collector::Input;
|
||||
|
||||
use super::*;
|
||||
use http::StatusCode;
|
||||
use std::time::{Duration, Instant};
|
||||
|
|
@ -345,8 +347,11 @@ mod test {
|
|||
const V6_MAPPED_V4_PRIVATE_CLASS_A: &str = "http://[::ffff:10.0.0.1]";
|
||||
const V6_MAPPED_V4_LINK_LOCAL: &str = "http://[::ffff:169.254.0.1]";
|
||||
|
||||
fn website_url(s: &str) -> Uri {
|
||||
Uri::Website(Url::parse(s).expect("Expected valid Website URI"))
|
||||
fn website_url(s: &str) -> Request {
|
||||
Request::new(
|
||||
Uri::Website(Url::parse(s).expect("Expected valid Website URI")),
|
||||
Input::Stdin,
|
||||
)
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -507,7 +512,7 @@ mod test {
|
|||
.unwrap();
|
||||
|
||||
let resp = client.check(website_url(&mock_server.uri())).await;
|
||||
assert!(matches!(resp.status, Status::Timeout));
|
||||
assert!(matches!(resp.status, Status::Timeout(_)));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -558,11 +563,17 @@ mod test {
|
|||
assert_eq!(client.excluded(&website_url("http://github.com")), true);
|
||||
assert_eq!(client.excluded(&website_url("http://exclude.org")), true);
|
||||
assert_eq!(
|
||||
client.excluded(&Uri::Mail("mail@example.com".to_string())),
|
||||
client.excluded(&Request::new(
|
||||
Uri::Mail("mail@example.com".to_string()),
|
||||
Input::Stdin,
|
||||
)),
|
||||
true
|
||||
);
|
||||
assert_eq!(
|
||||
client.excluded(&Uri::Mail("foo@bar.dev".to_string())),
|
||||
client.excluded(&Request::new(
|
||||
Uri::Mail("foo@bar.dev".to_string()),
|
||||
Input::Stdin,
|
||||
)),
|
||||
false
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,19 +2,18 @@ use client::Client;
|
|||
use deadpool::unmanaged::Pool;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::uri;
|
||||
use crate::{client, types};
|
||||
|
||||
pub struct ClientPool {
|
||||
tx: mpsc::Sender<types::Response>,
|
||||
rx: mpsc::Receiver<uri::Uri>,
|
||||
rx: mpsc::Receiver<types::Request>,
|
||||
pool: deadpool::unmanaged::Pool<client::Client>,
|
||||
}
|
||||
|
||||
impl ClientPool {
|
||||
pub fn new(
|
||||
tx: mpsc::Sender<types::Response>,
|
||||
rx: mpsc::Receiver<uri::Uri>,
|
||||
rx: mpsc::Receiver<types::Request>,
|
||||
clients: Vec<Client>,
|
||||
) -> Self {
|
||||
let pool = Pool::from(clients);
|
||||
|
|
|
|||
|
|
@ -1,18 +1,21 @@
|
|||
use crate::extract::{extract_links, FileType};
|
||||
use crate::uri::Uri;
|
||||
use crate::{
|
||||
extract::{extract_links, FileType},
|
||||
Request,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use glob::glob_with;
|
||||
use reqwest::Url;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use shellexpand::tilde;
|
||||
use std::collections::HashSet;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::{collections::HashSet, fmt::Display};
|
||||
use tokio::fs::read_to_string;
|
||||
use tokio::io::{stdin, AsyncReadExt};
|
||||
|
||||
const STDIN: &str = "-";
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
#[non_exhaustive]
|
||||
pub enum Input {
|
||||
RemoteUrl(Url),
|
||||
|
|
@ -22,6 +25,31 @@ pub enum Input {
|
|||
String(String),
|
||||
}
|
||||
|
||||
impl Display for Input {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Input::RemoteUrl(url) => {
|
||||
write!(f, "{}", url)
|
||||
}
|
||||
Input::FsGlob {
|
||||
pattern,
|
||||
ignore_case: _,
|
||||
} => {
|
||||
write!(f, "{}", pattern)
|
||||
}
|
||||
Input::FsPath(path) => {
|
||||
write!(f, "{}", path.to_str().unwrap_or_default())
|
||||
}
|
||||
Input::Stdin => {
|
||||
write!(f, "stdin")
|
||||
}
|
||||
Input::String(_) => {
|
||||
write!(f, "raw input string")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct InputContent {
|
||||
pub input: Input,
|
||||
|
|
@ -157,18 +185,6 @@ impl Input {
|
|||
}
|
||||
}
|
||||
|
||||
impl ToString for Input {
|
||||
fn to_string(&self) -> String {
|
||||
match self {
|
||||
Self::RemoteUrl(url) => url.to_string(),
|
||||
Self::FsGlob { pattern, .. } => pattern.clone(),
|
||||
Self::FsPath(p) => p.to_str().unwrap_or_default().to_owned(),
|
||||
Self::Stdin => STDIN.to_owned(),
|
||||
Self::String(s) => s.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetch all unique links from a slice of inputs
|
||||
/// All relative URLs get prefixed with `base_url` if given.
|
||||
pub async fn collect_links(
|
||||
|
|
@ -176,7 +192,7 @@ pub async fn collect_links(
|
|||
base_url: Option<String>,
|
||||
skip_missing_inputs: bool,
|
||||
max_concurrency: usize,
|
||||
) -> Result<HashSet<Uri>> {
|
||||
) -> Result<HashSet<Request>> {
|
||||
let base_url = match base_url {
|
||||
Some(url) => Some(Url::parse(&url)?),
|
||||
_ => None,
|
||||
|
|
@ -213,7 +229,7 @@ pub async fn collect_links(
|
|||
// instead of building a HashSet with all links.
|
||||
// This optimization would speed up cases where there's
|
||||
// a lot of inputs and/or the inputs are large (e.g. big files).
|
||||
let mut collected_links = HashSet::new();
|
||||
let mut collected_links: HashSet<Request> = HashSet::new();
|
||||
|
||||
for handle in extract_links_handles {
|
||||
let links = handle.await?;
|
||||
|
|
@ -226,7 +242,7 @@ pub async fn collect_links(
|
|||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use crate::test_utils::get_mock_server_with_content;
|
||||
use crate::{test_utils::get_mock_server_with_content, Uri};
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::str::FromStr;
|
||||
|
|
@ -264,9 +280,13 @@ mod test {
|
|||
},
|
||||
];
|
||||
|
||||
let links = collect_links(&inputs, None, false, 8).await?;
|
||||
let responses = collect_links(&inputs, None, false, 8).await?;
|
||||
let links = responses
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect::<HashSet<Uri>>();
|
||||
|
||||
let mut expected_links = HashSet::new();
|
||||
let mut expected_links: HashSet<Uri> = HashSet::new();
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_STRING)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_URL)?));
|
||||
expected_links.insert(Uri::Website(Url::from_str(TEST_FILE)?));
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::collector::InputContent;
|
||||
use crate::uri::Uri;
|
||||
use crate::{collector::InputContent, Request};
|
||||
use html5ever::parse_document;
|
||||
use html5ever::tendril::{StrTendril, TendrilSink};
|
||||
use linkify::LinkFinder;
|
||||
|
|
@ -141,7 +141,10 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
pub(crate) fn extract_links(input_content: &InputContent, base_url: Option<Url>) -> HashSet<Uri> {
|
||||
pub(crate) fn extract_links(
|
||||
input_content: &InputContent,
|
||||
base_url: Option<Url>,
|
||||
) -> HashSet<Request> {
|
||||
let links = match input_content.file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(&input_content.content),
|
||||
FileType::HTML => extract_links_from_html(&input_content.content),
|
||||
|
|
@ -150,24 +153,27 @@ pub(crate) fn extract_links(input_content: &InputContent, base_url: Option<Url>)
|
|||
|
||||
// Only keep legit URLs. This sorts out things like anchors.
|
||||
// Silently ignore the parse failures for now.
|
||||
let mut uris = HashSet::new();
|
||||
let mut requests: HashSet<Request> = HashSet::new();
|
||||
for link in links {
|
||||
match Uri::try_from(link.as_str()) {
|
||||
Ok(uri) => {
|
||||
uris.insert(uri);
|
||||
requests.insert(Request::new(uri, input_content.input.clone()));
|
||||
}
|
||||
Err(_) => {
|
||||
if !Path::new(&link).exists() {
|
||||
if let Some(base_url) = &base_url {
|
||||
if let Ok(new_url) = base_url.join(&link) {
|
||||
uris.insert(Uri::Website(new_url));
|
||||
requests.insert(Request::new(
|
||||
Uri::Website(new_url),
|
||||
input_content.input.clone(),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
uris
|
||||
requests
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
@ -197,10 +203,13 @@ mod test {
|
|||
#[test]
|
||||
fn test_extract_markdown_links() {
|
||||
let input = "This is [a test](https://endler.dev). This is a relative link test [Relative Link Test](relative_link)";
|
||||
let links = extract_links(
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(input, FileType::Markdown),
|
||||
Some(Url::parse("https://github.com/hello-rust/lychee/").unwrap()),
|
||||
);
|
||||
)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
assert_eq!(
|
||||
links,
|
||||
[
|
||||
|
|
@ -219,23 +228,30 @@ mod test {
|
|||
fn test_extract_html_links() {
|
||||
let input = r#"<html>
|
||||
<div class="row">
|
||||
<a href="https://github.com/hello-rust/lychee/">
|
||||
<a href="https://github.com/lycheeverse/lychee/">
|
||||
<a href="blob/master/README.md">README</a>
|
||||
</div>
|
||||
</html>"#;
|
||||
|
||||
let links = extract_links(
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(input, FileType::HTML),
|
||||
Some(Url::parse("https://github.com/hello-rust/").unwrap()),
|
||||
);
|
||||
Some(Url::parse("https://github.com/lycheeverse/").unwrap()),
|
||||
)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
assert_eq!(
|
||||
links
|
||||
.get(&Uri::Website(
|
||||
Url::parse("https://github.com/hello-rust/blob/master/README.md").unwrap()
|
||||
))
|
||||
.is_some(),
|
||||
true
|
||||
links,
|
||||
[
|
||||
Uri::Website(Url::parse("https://github.com/lycheeverse/lychee/").unwrap()),
|
||||
Uri::Website(
|
||||
Url::parse("https://github.com/lycheeverse/blob/master/README.md").unwrap()
|
||||
)
|
||||
]
|
||||
.iter()
|
||||
.cloned()
|
||||
.collect::<HashSet<Uri>>(),
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +273,12 @@ mod test {
|
|||
fn test_non_markdown_links() {
|
||||
let input =
|
||||
"https://endler.dev and https://hello-rust.show/foo/bar?lol=1 at test@example.com";
|
||||
let links = extract_links(&InputContent::from_string(input, FileType::Plaintext), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(input, FileType::Plaintext), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected = [
|
||||
Uri::Website(Url::parse("https://endler.dev").unwrap()),
|
||||
Uri::Website(Url::parse("https://hello-rust.show/foo/bar?lol=1").unwrap()),
|
||||
|
|
@ -266,6 +287,7 @@ mod test {
|
|||
.iter()
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
assert_eq!(links, expected)
|
||||
}
|
||||
|
||||
|
|
@ -284,7 +306,11 @@ mod test {
|
|||
#[test]
|
||||
fn test_extract_html5_not_valid_xml() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/head/home").unwrap()),
|
||||
|
|
@ -303,10 +329,13 @@ mod test {
|
|||
#[test]
|
||||
fn test_extract_html5_not_valid_xml_relative_links() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links = extract_links(
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(&input, FileType::HTML),
|
||||
Some(Url::parse("https://example.com").unwrap()),
|
||||
);
|
||||
)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/head/home").unwrap()),
|
||||
|
|
@ -329,7 +358,11 @@ mod test {
|
|||
fn test_extract_html5_lowercase_doctype() {
|
||||
// this has been problematic with previous XML based parser
|
||||
let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [Uri::Website(
|
||||
Url::parse("https://example.com/body/a").unwrap(),
|
||||
|
|
@ -345,7 +378,11 @@ mod test {
|
|||
fn test_extract_html5_minified() {
|
||||
// minified HTML with some quirky elements such as href attribute values specified without quotes
|
||||
let input = load_fixture("TEST_HTML5_MINIFIED.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/").unwrap()),
|
||||
|
|
@ -365,7 +402,11 @@ mod test {
|
|||
fn test_extract_html5_malformed() {
|
||||
// malformed links shouldn't stop the parser from further parsing
|
||||
let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [Uri::Website(
|
||||
Url::parse("https://example.com/valid").unwrap(),
|
||||
|
|
@ -381,7 +422,11 @@ mod test {
|
|||
fn test_extract_html5_custom_elements() {
|
||||
// the element name shouldn't matter for attributes like href, src, cite etc
|
||||
let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html");
|
||||
let links = extract_links(&InputContent::from_string(&input, FileType::HTML), None);
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
||||
let expected_links = [
|
||||
Uri::Website(Url::parse("https://example.com/some-weird-element").unwrap()),
|
||||
|
|
|
|||
39
src/types.rs
39
src/types.rs
|
|
@ -1,6 +1,24 @@
|
|||
use crate::uri::Uri;
|
||||
use crate::{collector::Input, uri::Uri};
|
||||
use anyhow::anyhow;
|
||||
use std::{collections::HashSet, convert::TryFrom};
|
||||
use std::{collections::HashSet, convert::TryFrom, fmt::Display};
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
|
||||
pub struct Request {
|
||||
pub uri: Uri,
|
||||
pub source: Input,
|
||||
}
|
||||
|
||||
impl Request {
|
||||
pub fn new(uri: Uri, source: Input) -> Self {
|
||||
Request { uri, source }
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Request {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{} ({})", self.uri, self.source)
|
||||
}
|
||||
}
|
||||
|
||||
/// Specifies how requests to websites will be made
|
||||
pub(crate) enum RequestMethod {
|
||||
|
|
@ -23,11 +41,16 @@ impl TryFrom<String> for RequestMethod {
|
|||
pub struct Response {
|
||||
pub uri: Uri,
|
||||
pub status: Status,
|
||||
pub source: Input,
|
||||
}
|
||||
|
||||
impl Response {
|
||||
pub fn new(uri: Uri, status: Status) -> Self {
|
||||
Response { uri, status }
|
||||
pub fn new(uri: Uri, status: Status, source: Input) -> Self {
|
||||
Response {
|
||||
uri,
|
||||
status,
|
||||
source,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -39,9 +62,9 @@ pub enum Status {
|
|||
/// Request failed with HTTP error code
|
||||
Failed(http::StatusCode),
|
||||
/// Request timed out
|
||||
Timeout,
|
||||
Timeout(Option<http::StatusCode>),
|
||||
/// Got redirected to different resource
|
||||
Redirected,
|
||||
Redirected(http::StatusCode),
|
||||
/// Resource was excluded from checking
|
||||
Excluded,
|
||||
/// Low-level error while loading resource
|
||||
|
|
@ -55,7 +78,7 @@ impl Status {
|
|||
} else if statuscode.is_success() {
|
||||
Status::Ok(statuscode)
|
||||
} else if statuscode.is_redirection() {
|
||||
Status::Redirected
|
||||
Status::Redirected(statuscode)
|
||||
} else {
|
||||
Status::Failed(statuscode)
|
||||
}
|
||||
|
|
@ -69,7 +92,7 @@ impl Status {
|
|||
impl From<reqwest::Error> for Status {
|
||||
fn from(e: reqwest::Error) -> Self {
|
||||
if e.is_timeout() {
|
||||
Status::Timeout
|
||||
Status::Timeout(e.status())
|
||||
} else {
|
||||
Status::Error(e.to_string())
|
||||
}
|
||||
|
|
|
|||
22
tests/cli.rs
22
tests/cli.rs
|
|
@ -30,10 +30,10 @@ mod cli {
|
|||
.arg(test_all_private_path)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 7"))
|
||||
.stdout(contains("Excluded: 7"))
|
||||
.stdout(contains("Successful: 0"))
|
||||
.stdout(contains("Errors: 0"));
|
||||
.stdout(contains("Total............7"))
|
||||
.stdout(contains("Excluded.........7"))
|
||||
.stdout(contains("Successful.......0"))
|
||||
.stdout(contains("Errors...........0"));
|
||||
}
|
||||
|
||||
/// Test that a GitHub link can be checked without specifying the token.
|
||||
|
|
@ -46,10 +46,10 @@ mod cli {
|
|||
.arg(test_github_path)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 1"))
|
||||
.stdout(contains("Excluded: 0"))
|
||||
.stdout(contains("Successful: 1"))
|
||||
.stdout(contains("Errors: 0"));
|
||||
.stdout(contains("Total............1"))
|
||||
.stdout(contains("Excluded.........0"))
|
||||
.stdout(contains("Successful.......1"))
|
||||
.stdout(contains("Errors...........0"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -164,7 +164,7 @@ mod cli {
|
|||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 2"));
|
||||
.stdout(contains("Total............2"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -211,7 +211,7 @@ mod cli {
|
|||
.arg("--verbose")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total: 1"));
|
||||
.stdout(contains("Total............1"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -231,7 +231,7 @@ mod cli {
|
|||
.assert()
|
||||
.success();
|
||||
|
||||
let expected = r##"{"total":11,"successful":11,"failures":[],"timeouts":[],"redirects":[],"excludes":[],"errors":[]}"##;
|
||||
let expected = r##"{"total":11,"successful":11,"failures":0,"timeouts":0,"redirects":0,"excludes":0,"errors":0,"fail_map":{}}"##;
|
||||
let output = fs::read_to_string(&outfile)?;
|
||||
assert_eq!(output, expected);
|
||||
fs::remove_file(outfile)?;
|
||||
|
|
|
|||
Loading…
Reference in a new issue