mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Wayback integration (#1003)
Adds support for suggesting archived URLs for broken links. Uses Wayback Machine as the archive provider.
This commit is contained in:
parent
f02576810b
commit
994b2852cd
14 changed files with 336 additions and 37 deletions
|
|
@ -34,4 +34,4 @@ ask for some early feedback by creating an issue yourself and asking for feedbac
|
|||
|
||||
## Thanks!
|
||||
|
||||
No matter how small, we appreciate very contribution. You're awesome!
|
||||
No matter how small, we appreciate every contribution. You're awesome!
|
||||
|
|
|
|||
54
Cargo.lock
generated
54
Cargo.lock
generated
|
|
@ -622,7 +622,7 @@ version = "4.1.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fddf67631444a3a3e3e5ac51c36a5e01335302de677bd78759eaa90ab1f46644"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -1110,7 +1110,7 @@ version = "0.4.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.107",
|
||||
|
|
@ -1122,7 +1122,7 @@ version = "0.5.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c9720bba047d567ffc8a3cba48bf19126600e249ab7f128e9233e6376976a116"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.107",
|
||||
|
|
@ -1518,6 +1518,15 @@ dependencies = [
|
|||
"http",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
|
||||
dependencies = [
|
||||
"unicode-segmentation",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
version = "0.4.0"
|
||||
|
|
@ -1989,6 +1998,7 @@ dependencies = [
|
|||
"secrecy",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"strum",
|
||||
"supports-color",
|
||||
"tabled",
|
||||
"tempfile",
|
||||
|
|
@ -2924,6 +2934,12 @@ dependencies = [
|
|||
"windows-sys 0.42.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustversion"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.12"
|
||||
|
|
@ -3172,7 +3188,7 @@ version = "0.7.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d5e79cdebbabaebb06a9bdbaedc7f159b410461f63611d4d0e3fb0fab8fed850"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 1.0.107",
|
||||
|
|
@ -3235,6 +3251,28 @@ version = "0.10.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
|
||||
|
||||
[[package]]
|
||||
name = "strum"
|
||||
version = "0.23.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb"
|
||||
dependencies = [
|
||||
"strum_macros",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strum_macros"
|
||||
version = "0.23.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38"
|
||||
dependencies = [
|
||||
"heck 0.3.3",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustversion",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "supports-color"
|
||||
version = "2.0.0"
|
||||
|
|
@ -3284,7 +3322,7 @@ version = "0.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "beca1b4eaceb4f2755df858b88d9b9315b7ccfd1ffd0d7a48a52602301f01a57"
|
||||
dependencies = [
|
||||
"heck",
|
||||
"heck 0.4.0",
|
||||
"proc-macro-error",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
|
|
@ -3724,6 +3762,12 @@ dependencies = [
|
|||
"tinyvec",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-segmentation"
|
||||
version = "1.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.10"
|
||||
|
|
|
|||
|
|
@ -250,6 +250,14 @@ Options:
|
|||
--dump
|
||||
Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked
|
||||
|
||||
--archive <ARCHIVE>
|
||||
Specify the use of a specific web archive. Can be used in combination with `--suggest`
|
||||
|
||||
[possible values: wayback]
|
||||
|
||||
--suggest
|
||||
Suggest link replacements for broken links, using a web archive. The web archive can be specified with `--archive`
|
||||
|
||||
-m, --max-redirects <MAX_REDIRECTS>
|
||||
Maximum number of allowed redirects
|
||||
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ secrecy = { version = "0.8.0", features = ["serde"] }
|
|||
supports-color = "2.0.0"
|
||||
log = "0.4.17"
|
||||
env_logger = "0.10.0"
|
||||
strum = {version = "0.23.0" , features = ["derive"] }
|
||||
|
||||
[dependencies.clap]
|
||||
version = "4.1.11"
|
||||
|
|
|
|||
42
lychee-bin/src/archive/mod.rs
Normal file
42
lychee-bin/src/archive/mod.rs
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
use reqwest::{Error, Url};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fmt::Display;
|
||||
use strum::{Display, EnumIter, EnumString, EnumVariantNames};
|
||||
|
||||
use crate::color::{color, GREEN, PINK};
|
||||
|
||||
mod wayback;
|
||||
|
||||
#[derive(Debug, Serialize, Eq, Hash, PartialEq)]
|
||||
pub(crate) struct Suggestion {
|
||||
pub(crate) original: Url,
|
||||
pub(crate) suggestion: Url,
|
||||
}
|
||||
|
||||
impl Display for Suggestion {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
color!(f, PINK, "{}", self.original)?;
|
||||
write!(f, " ")?;
|
||||
color!(f, GREEN, "{}", self.suggestion)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug, Deserialize, Default, Clone, Display, EnumIter, EnumString, EnumVariantNames)]
|
||||
pub(crate) enum Archive {
|
||||
#[serde(rename = "wayback")]
|
||||
#[strum(serialize = "wayback", ascii_case_insensitive)]
|
||||
#[default]
|
||||
WaybackMachine,
|
||||
}
|
||||
|
||||
impl Archive {
|
||||
pub(crate) async fn get_link(&self, original: &Url) -> Result<Option<Url>, Error> {
|
||||
let function = match self {
|
||||
Archive::WaybackMachine => wayback::get_wayback_link,
|
||||
};
|
||||
|
||||
function(original).await
|
||||
}
|
||||
}
|
||||
79
lychee-bin/src/archive/wayback/mod.rs
Normal file
79
lychee-bin/src/archive/wayback/mod.rs
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
use once_cell::sync::Lazy;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
||||
use http::StatusCode;
|
||||
use reqwest::{Error, Url};
|
||||
static WAYBACK_URL: Lazy<Url> =
|
||||
Lazy::new(|| Url::parse("https://archive.org/wayback/available").unwrap());
|
||||
|
||||
pub(crate) async fn get_wayback_link(url: &Url) -> Result<Option<Url>, Error> {
|
||||
let mut archive_url: Url = WAYBACK_URL.clone();
|
||||
archive_url.set_query(Some(&format!("url={url}")));
|
||||
|
||||
let response = reqwest::get(archive_url)
|
||||
.await?
|
||||
.json::<InternetArchiveResponse>()
|
||||
.await?;
|
||||
|
||||
Ok(response
|
||||
.archived_snapshots
|
||||
.closest
|
||||
.map(|closest| closest.url))
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Eq, PartialEq)]
|
||||
pub(crate) struct InternetArchiveResponse {
|
||||
pub(crate) url: Url,
|
||||
pub(crate) archived_snapshots: ArchivedSnapshots,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Eq, PartialEq)]
|
||||
pub(crate) struct ArchivedSnapshots {
|
||||
pub(crate) closest: Option<Closest>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Eq, PartialEq)]
|
||||
pub(crate) struct Closest {
|
||||
#[serde(deserialize_with = "from_string")]
|
||||
pub(crate) status: StatusCode,
|
||||
pub(crate) available: bool,
|
||||
pub(crate) url: Url,
|
||||
pub(crate) timestamp: String,
|
||||
}
|
||||
|
||||
fn from_string<'d, D>(deserializer: D) -> Result<StatusCode, D::Error>
|
||||
where
|
||||
D: Deserializer<'d>,
|
||||
{
|
||||
let value: &str = Deserialize::deserialize(deserializer)?;
|
||||
let result = value.parse::<u16>().unwrap();
|
||||
Ok(StatusCode::from_u16(result).unwrap())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::archive::wayback::get_wayback_link;
|
||||
use reqwest::Error;
|
||||
|
||||
#[tokio::test]
|
||||
async fn wayback_suggestion() -> Result<(), Error> {
|
||||
let url = &"https://example.com".try_into().unwrap();
|
||||
let response = get_wayback_link(url).await?;
|
||||
let suggestion = response.unwrap();
|
||||
|
||||
assert!(suggestion.as_str().contains("web.archive.org"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn wayback_suggestion_unknown_url() -> Result<(), Error> {
|
||||
let url = &"https://github.com/mre/idiomatic-rust-doesnt-exist-man"
|
||||
.try_into()
|
||||
.unwrap();
|
||||
|
||||
let response = get_wayback_link(url).await?;
|
||||
assert_eq!(response, None);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -5,16 +5,19 @@ use std::time::Duration;
|
|||
|
||||
use indicatif::ProgressBar;
|
||||
use indicatif::ProgressStyle;
|
||||
use lychee_lib::Result;
|
||||
use lychee_lib::Status;
|
||||
use reqwest::Url;
|
||||
use tokio::sync::mpsc;
|
||||
use tokio_stream::wrappers::ReceiverStream;
|
||||
use tokio_stream::StreamExt;
|
||||
|
||||
use lychee_lib::Status;
|
||||
use lychee_lib::{Client, Request, Response};
|
||||
use lychee_lib::{InputSource, Result};
|
||||
|
||||
use crate::archive::{Archive, Suggestion};
|
||||
use crate::formatters::response::ResponseFormatter;
|
||||
use crate::verbosity::Verbosity;
|
||||
use crate::{cache::Cache, stats::ResponseStats, ExitCode};
|
||||
use lychee_lib::{Client, Request, Response};
|
||||
|
||||
use super::CommandParams;
|
||||
|
||||
|
|
@ -42,7 +45,7 @@ where
|
|||
let pb = if params.cfg.no_progress {
|
||||
None
|
||||
} else {
|
||||
Some(init_progress_bar())
|
||||
Some(init_progress_bar("Extracting links"))
|
||||
};
|
||||
|
||||
// Start receiving requests
|
||||
|
|
@ -68,12 +71,21 @@ where
|
|||
|
||||
// Wait until all responses are received
|
||||
let result = show_results_task.await?;
|
||||
let (pb, stats) = result?;
|
||||
let (pb, mut stats) = result?;
|
||||
|
||||
// Note that print statements may interfere with the progress bar, so this
|
||||
// must go before printing the stats
|
||||
if let Some(pb) = &pb {
|
||||
pb.finish_and_clear();
|
||||
pb.finish_with_message("Finished extracting links");
|
||||
}
|
||||
|
||||
if params.cfg.suggest {
|
||||
suggest_archived_links(
|
||||
params.cfg.archive.unwrap_or_default(),
|
||||
&mut stats,
|
||||
!params.cfg.no_progress,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let code = if stats.is_success() {
|
||||
|
|
@ -84,6 +96,48 @@ where
|
|||
Ok((stats, cache_ref, code))
|
||||
}
|
||||
|
||||
async fn suggest_archived_links(archive: Archive, stats: &mut ResponseStats, show_progress: bool) {
|
||||
let failed_urls = &stats
|
||||
.fail_map
|
||||
.iter()
|
||||
.flat_map(|(source, set)| set.iter().map(|entry| (source, entry)).collect::<Vec<_>>())
|
||||
.filter(|(_, response)| {
|
||||
let uri = &response.uri;
|
||||
!(uri.is_data() || uri.is_mail() || uri.is_file())
|
||||
})
|
||||
.map(|(source, response)| (source, response.uri.as_str().try_into().unwrap()))
|
||||
.collect::<Vec<(&InputSource, Url)>>();
|
||||
|
||||
let bar = if show_progress {
|
||||
let bar = init_progress_bar("Searching for alternatives");
|
||||
bar.set_length(failed_urls.len() as u64);
|
||||
Some(bar)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
for (input, url) in failed_urls {
|
||||
if let Ok(Some(suggestion)) = archive.get_link(url).await {
|
||||
stats
|
||||
.suggestion_map
|
||||
.entry((*input).clone())
|
||||
.or_default()
|
||||
.insert(Suggestion {
|
||||
suggestion,
|
||||
original: url.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(bar) = &bar {
|
||||
bar.inc(1);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(bar) = &bar {
|
||||
bar.finish_with_message("Finished searching for alternatives");
|
||||
}
|
||||
}
|
||||
|
||||
// drops the `send_req` channel on exit
|
||||
// required for the receiver task to end, which closes send_resp, which allows
|
||||
// the show_results_task to finish
|
||||
|
|
@ -125,7 +179,7 @@ async fn progress_bar_task(
|
|||
Ok((pb, stats))
|
||||
}
|
||||
|
||||
fn init_progress_bar() -> ProgressBar {
|
||||
fn init_progress_bar(initial_message: &'static str) -> ProgressBar {
|
||||
let bar = ProgressBar::new_spinner().with_style(
|
||||
ProgressStyle::with_template(
|
||||
"{spinner:.197.bright} {pos}/{len:.dim} ETA {eta} {bar:.dim} {wide_msg}",
|
||||
|
|
@ -133,7 +187,7 @@ fn init_progress_bar() -> ProgressBar {
|
|||
.expect("Valid progress bar"),
|
||||
);
|
||||
bar.set_length(0);
|
||||
bar.set_message("Extracting links");
|
||||
bar.set_message(initial_message);
|
||||
// report status _at least_ every 500ms
|
||||
bar.enable_steady_tick(Duration::from_millis(500));
|
||||
bar
|
||||
|
|
@ -232,6 +286,7 @@ fn show_progress(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use log::info;
|
||||
|
||||
use lychee_lib::{CacheStatus, InputSource, ResponseBody, Uri};
|
||||
|
||||
use crate::formatters;
|
||||
|
|
|
|||
|
|
@ -59,6 +59,14 @@ impl Display for CompactResponseStats {
|
|||
for response in responses {
|
||||
writeln!(f, "{}", color_response(response))?;
|
||||
}
|
||||
|
||||
if let Some(suggestions) = &stats.suggestion_map.get(source) {
|
||||
writeln!(f, "\n\u{2139} Suggestions")?;
|
||||
for suggestion in *suggestions {
|
||||
writeln!(f, "{suggestion}")?;
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(f)?;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,6 +45,13 @@ impl Display for DetailedResponseStats {
|
|||
write!(f, "\n\nErrors in {source}")?;
|
||||
for response in responses {
|
||||
write!(f, "\n{}", color_response(response))?;
|
||||
|
||||
if let Some(suggestions) = &stats.suggestion_map.get(source) {
|
||||
writeln!(f, "\nSuggestions in {source}")?;
|
||||
for suggestion in *suggestions {
|
||||
writeln!(f, "{suggestion}")?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,12 @@
|
|||
use std::fmt::{self, Display};
|
||||
use std::{
|
||||
collections::{HashMap, HashSet},
|
||||
fmt::{self, Display},
|
||||
};
|
||||
|
||||
use super::StatsFormatter;
|
||||
use anyhow::Result;
|
||||
use http::StatusCode;
|
||||
use lychee_lib::{ResponseBody, Status};
|
||||
use lychee_lib::{InputSource, ResponseBody, Status};
|
||||
use std::fmt::Write;
|
||||
use tabled::{object::Segment, Alignment, Modify, Table, Tabled};
|
||||
|
||||
|
|
@ -94,28 +97,44 @@ impl Display for MarkdownResponseStats {
|
|||
writeln!(f)?;
|
||||
writeln!(f, "{}", stats_table(&self.0))?;
|
||||
|
||||
if !&stats.fail_map.is_empty() {
|
||||
writeln!(f)?;
|
||||
writeln!(f, "## Errors per input\n")?;
|
||||
for (source, responses) in &stats.fail_map {
|
||||
// Using leading newlines over trailing ones (e.g. `writeln!`)
|
||||
// lets us avoid extra newlines without any additional logic.
|
||||
writeln!(f, "### Errors in {source}\n")?;
|
||||
for response in responses {
|
||||
writeln!(
|
||||
f,
|
||||
"{}",
|
||||
markdown_response(response).map_err(|_e| fmt::Error)?
|
||||
)?;
|
||||
}
|
||||
writeln!(f)?;
|
||||
}
|
||||
}
|
||||
write_stats_per_input(f, "Errors", &stats.fail_map, |response| {
|
||||
markdown_response(response).map_err(|_e| fmt::Error)
|
||||
})?;
|
||||
|
||||
write_stats_per_input(f, "Suggestions", &stats.suggestion_map, |suggestion| {
|
||||
Ok(format!(
|
||||
"* {} --> {}",
|
||||
suggestion.original, suggestion.suggestion
|
||||
))
|
||||
})?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn write_stats_per_input<T, F>(
|
||||
f: &mut fmt::Formatter<'_>,
|
||||
name: &'static str,
|
||||
map: &HashMap<InputSource, HashSet<T>>,
|
||||
write_stat: F,
|
||||
) -> fmt::Result
|
||||
where
|
||||
T: Display,
|
||||
F: Fn(&T) -> Result<String, std::fmt::Error>,
|
||||
{
|
||||
if !&map.is_empty() {
|
||||
writeln!(f, "\n## {name} per input")?;
|
||||
for (source, responses) in map {
|
||||
writeln!(f, "\n### {name} in {source}\n")?;
|
||||
for response in responses {
|
||||
writeln!(f, "{}", write_stat(response)?)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) struct Markdown;
|
||||
|
||||
impl Markdown {
|
||||
|
|
@ -136,6 +155,9 @@ mod tests {
|
|||
|
||||
use http::StatusCode;
|
||||
use lychee_lib::{CacheStatus, InputSource, Response, ResponseBody, Status, Uri};
|
||||
use reqwest::Url;
|
||||
|
||||
use crate::archive::Suggestion;
|
||||
|
||||
use super::*;
|
||||
|
||||
|
|
@ -205,6 +227,14 @@ mod tests {
|
|||
},
|
||||
);
|
||||
stats.add(response);
|
||||
stats
|
||||
.suggestion_map
|
||||
.entry((InputSource::Stdin).clone())
|
||||
.or_default()
|
||||
.insert(Suggestion {
|
||||
suggestion: Url::parse("https://example.com/suggestion").unwrap(),
|
||||
original: Url::parse("https://example.com/original").unwrap(),
|
||||
});
|
||||
let summary = MarkdownResponseStats(stats);
|
||||
let expected = r#"## Summary
|
||||
|
||||
|
|
@ -224,6 +254,11 @@ mod tests {
|
|||
|
||||
* [404] [http://127.0.0.1/](http://127.0.0.1/) | Cached: Error (cached)
|
||||
|
||||
## Suggestions per input
|
||||
|
||||
### Suggestions in stdin
|
||||
|
||||
* https://example.com/original --> https://example.com/suggestion
|
||||
"#;
|
||||
assert_eq!(summary.to_string(), expected.to_string());
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ use ring as _; // required for apple silicon
|
|||
|
||||
use lychee_lib::Collector;
|
||||
|
||||
mod archive;
|
||||
mod cache;
|
||||
mod client;
|
||||
mod color;
|
||||
|
|
@ -109,7 +110,7 @@ enum ExitCode {
|
|||
}
|
||||
|
||||
/// Ignore lines starting with this marker in `.lycheeignore` files
|
||||
const LYCHEEINGORE_COMMENT_MARKER: &str = "#";
|
||||
const LYCHEEIGNORE_COMMENT_MARKER: &str = "#";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
#[cfg(feature = "tokio-console")]
|
||||
|
|
@ -128,7 +129,7 @@ fn read_lines(file: &File) -> Result<Vec<String>> {
|
|||
Ok(lines
|
||||
.into_iter()
|
||||
.filter(|line| {
|
||||
!line.is_empty() && !line.trim_start().starts_with(LYCHEEINGORE_COMMENT_MARKER)
|
||||
!line.is_empty() && !line.trim_start().starts_with(LYCHEEIGNORE_COMMENT_MARKER)
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
use crate::archive::Archive;
|
||||
use crate::parse::{parse_base, parse_statuscodes};
|
||||
use crate::verbosity::Verbosity;
|
||||
use anyhow::{anyhow, Context, Error, Result};
|
||||
use clap::{arg, Parser};
|
||||
use clap::{arg, builder::TypedValueParser, Parser};
|
||||
use const_format::{concatcp, formatcp};
|
||||
use lychee_lib::{
|
||||
Base, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS,
|
||||
|
|
@ -11,6 +12,7 @@ use secrecy::{ExposeSecret, SecretString};
|
|||
use serde::Deserialize;
|
||||
use std::path::Path;
|
||||
use std::{collections::HashSet, fs, path::PathBuf, str::FromStr, time::Duration};
|
||||
use strum::VariantNames;
|
||||
|
||||
pub(crate) const LYCHEE_IGNORE_FILE: &str = ".lycheeignore";
|
||||
pub(crate) const LYCHEE_CACHE_FILE: &str = ".lycheecache";
|
||||
|
|
@ -179,6 +181,18 @@ pub(crate) struct Config {
|
|||
#[serde(default)]
|
||||
pub(crate) dump: bool,
|
||||
|
||||
/// Specify the use of a specific web archive.
|
||||
/// Can be used in combination with `--suggest`
|
||||
#[arg(long, value_parser = clap::builder::PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
|
||||
#[serde(default)]
|
||||
pub(crate) archive: Option<Archive>,
|
||||
|
||||
/// Suggest link replacements for broken links, using a web archive.
|
||||
/// The web archive can be specified with `--archive`
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
pub(crate) suggest: bool,
|
||||
|
||||
/// Maximum number of allowed redirects
|
||||
#[arg(short, long, default_value = &MAX_REDIRECTS_STR)]
|
||||
#[serde(default = "max_redirects")]
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use crate::archive::Suggestion;
|
||||
use lychee_lib::{CacheStatus, InputSource, Response, ResponseBody, Status};
|
||||
use serde::Serialize;
|
||||
|
||||
|
|
@ -17,6 +18,7 @@ pub(crate) struct ResponseStats {
|
|||
pub(crate) cached: usize,
|
||||
pub(crate) success_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
pub(crate) fail_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
pub(crate) suggestion_map: HashMap<InputSource, HashSet<Suggestion>>,
|
||||
pub(crate) excluded_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -78,6 +78,7 @@ mod cli {
|
|||
cached: usize,
|
||||
success_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
fail_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
suggestion_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
excluded_map: HashMap<InputSource, HashSet<ResponseBody>>,
|
||||
}
|
||||
|
||||
|
|
@ -97,6 +98,7 @@ mod cli {
|
|||
"cached": {},
|
||||
"success_map": {:?},
|
||||
"fail_map": {:?},
|
||||
"suggestion_map": {:?},
|
||||
"excluded_map": {:?}
|
||||
}}"#,
|
||||
self.detailed_stats,
|
||||
|
|
@ -110,6 +112,7 @@ mod cli {
|
|||
self.errors,
|
||||
self.cached,
|
||||
self.success_map,
|
||||
self.suggestion_map,
|
||||
self.fail_map,
|
||||
self.excluded_map
|
||||
)
|
||||
|
|
@ -508,7 +511,7 @@ mod cli {
|
|||
.assert()
|
||||
.success();
|
||||
|
||||
let expected = r#"{"detailed_stats":false,"total":11,"successful":11,"unknown":0,"unsupported":0,"timeouts":0,"redirects":0,"excludes":0,"errors":0,"cached":0,"success_map":{},"fail_map":{},"excluded_map":{}}"#;
|
||||
let expected = r#"{"detailed_stats":false,"total":11,"successful":11,"unknown":0,"unsupported":0,"timeouts":0,"redirects":0,"excludes":0,"errors":0,"cached":0,"success_map":{},"fail_map":{},"suggestion_map":{},"excluded_map":{}}"#;
|
||||
let output = fs::read_to_string(&outfile)?;
|
||||
assert_eq!(output.split_whitespace().collect::<String>(), expected);
|
||||
fs::remove_file(outfile)?;
|
||||
|
|
|
|||
Loading…
Reference in a new issue