mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
feat: Add support for --dump-inputs (#1159)
* Add support for --dump-inputs * Add integration tests * Fix usage guide in README
This commit is contained in:
parent
f1817ead5e
commit
f53619a455
13 changed files with 158 additions and 4 deletions
|
|
@ -303,6 +303,9 @@ Options:
|
|||
--dump
|
||||
Don't perform any link checking. Instead, dump all the links extracted from inputs that would be checked
|
||||
|
||||
--dump-inputs
|
||||
Don't perform any link extraction and checking. Instead, dump all input sources from which links would be collected
|
||||
|
||||
--archive <ARCHIVE>
|
||||
Specify the use of a specific web archive. Can be used in combination with `--suggest`
|
||||
|
||||
|
|
|
|||
0
fixtures/dump_inputs/markdown.md
vendored
Normal file
0
fixtures/dump_inputs/markdown.md
vendored
Normal file
0
fixtures/dump_inputs/some_file.txt
vendored
Normal file
0
fixtures/dump_inputs/some_file.txt
vendored
Normal file
0
fixtures/dump_inputs/subfolder/example.bin
vendored
Normal file
0
fixtures/dump_inputs/subfolder/example.bin
vendored
Normal file
0
fixtures/dump_inputs/subfolder/file2.md
vendored
Normal file
0
fixtures/dump_inputs/subfolder/file2.md
vendored
Normal file
0
fixtures/dump_inputs/subfolder/test.html
vendored
Normal file
0
fixtures/dump_inputs/subfolder/test.html
vendored
Normal file
|
|
@ -37,8 +37,8 @@ where
|
|||
let requests = params.requests;
|
||||
tokio::pin!(requests);
|
||||
|
||||
if let Some(outfile) = ¶ms.cfg.output {
|
||||
fs::File::create(outfile)?;
|
||||
if let Some(out_file) = ¶ms.cfg.output {
|
||||
fs::File::create(out_file)?;
|
||||
}
|
||||
|
||||
let mut writer = create_writer(params.cfg.output)?;
|
||||
|
|
@ -70,6 +70,30 @@ where
|
|||
Ok(ExitCode::Success)
|
||||
}
|
||||
|
||||
/// Dump all input sources to stdout without extracting any links and checking
|
||||
/// them.
|
||||
pub(crate) async fn dump_inputs<S>(sources: S, output: Option<&PathBuf>) -> Result<ExitCode>
|
||||
where
|
||||
S: futures::Stream<Item = Result<String>>,
|
||||
{
|
||||
let sources = sources;
|
||||
tokio::pin!(sources);
|
||||
|
||||
if let Some(out_file) = output {
|
||||
fs::File::create(out_file)?;
|
||||
}
|
||||
|
||||
let mut writer = create_writer(output.cloned())?;
|
||||
|
||||
while let Some(source) = sources.next().await {
|
||||
let source = source?;
|
||||
|
||||
writeln!(writer, "{source}")?;
|
||||
}
|
||||
|
||||
Ok(ExitCode::Success)
|
||||
}
|
||||
|
||||
/// Dump request to stdout
|
||||
fn write(
|
||||
writer: &mut Box<dyn Write>,
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ pub(crate) mod dump;
|
|||
|
||||
pub(crate) use check::check;
|
||||
pub(crate) use dump::dump;
|
||||
pub(crate) use dump::dump_inputs;
|
||||
|
||||
use std::sync::Arc;
|
||||
|
||||
|
|
|
|||
|
|
@ -293,6 +293,13 @@ async fn run(opts: &LycheeOptions) -> Result<i32> {
|
|||
// File a bug if you rely on this envvar! It's going to go away eventually.
|
||||
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").map_or(false, |x| x == "1"));
|
||||
|
||||
if opts.config.dump_inputs {
|
||||
let sources = collector.collect_sources(inputs).await;
|
||||
let exit_code = commands::dump_inputs(sources, opts.config.output.as_ref()).await?;
|
||||
|
||||
return Ok(exit_code as i32);
|
||||
}
|
||||
|
||||
collector = if let Some(ref basic_auth) = opts.config.basic_auth {
|
||||
collector.basic_auth_extractor(BasicAuthExtractor::new(basic_auth)?)
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -181,6 +181,12 @@ pub(crate) struct Config {
|
|||
#[serde(default)]
|
||||
pub(crate) dump: bool,
|
||||
|
||||
/// Don't perform any link extraction and checking.
|
||||
/// Instead, dump all input sources from which links would be collected
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
pub(crate) dump_inputs: bool,
|
||||
|
||||
/// Specify the use of a specific web archive.
|
||||
/// Can be used in combination with `--suggest`
|
||||
#[arg(long, value_parser = clap::builder::PossibleValuesParser::new(Archive::VARIANTS).map(|s| s.parse::<Archive>().unwrap()))]
|
||||
|
|
|
|||
|
|
@ -1347,4 +1347,74 @@ mod cli {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_inputs_glob_md() -> Result<()> {
|
||||
let pattern = fixtures_path().join("**/*.md");
|
||||
|
||||
let mut cmd = main_command();
|
||||
cmd.arg("--dump-inputs")
|
||||
.arg(pattern)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("fixtures/dump_inputs/subfolder/file2.md"))
|
||||
.stdout(contains("fixtures/dump_inputs/markdown.md"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_inputs_glob_all() -> Result<()> {
|
||||
let pattern = fixtures_path().join("**/*");
|
||||
|
||||
let mut cmd = main_command();
|
||||
cmd.arg("--dump-inputs")
|
||||
.arg(pattern)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("fixtures/dump_inputs/subfolder/test.html"))
|
||||
.stdout(contains("fixtures/dump_inputs/subfolder/file2.md"))
|
||||
.stdout(contains("fixtures/dump_inputs/subfolder"))
|
||||
.stdout(contains("fixtures/dump_inputs/markdown.md"))
|
||||
.stdout(contains("fixtures/dump_inputs/subfolder/example.bin"))
|
||||
.stdout(contains("fixtures/dump_inputs/some_file.txt"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_inputs_url() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
cmd.arg("--dump-inputs")
|
||||
.arg("https://example.com")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("https://example.com"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_inputs_path() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
cmd.arg("--dump-inputs")
|
||||
.arg("fixtures")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("fixtures"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_dump_inputs_stdin() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
cmd.arg("--dump-inputs")
|
||||
.arg("-")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Stdin"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -63,6 +63,14 @@ impl Collector {
|
|||
self
|
||||
}
|
||||
|
||||
/// Collect all sources from a list of [`Input`]s. For further details,
|
||||
/// see also [`Input::get_sources`](crate::Input#method.get_sources).
|
||||
pub async fn collect_sources(self, inputs: Vec<Input>) -> impl Stream<Item = Result<String>> {
|
||||
stream::iter(inputs)
|
||||
.par_then_unordered(None, move |input| async move { input.get_sources().await })
|
||||
.flatten()
|
||||
}
|
||||
|
||||
/// Fetch all unique links from inputs
|
||||
/// All relative URLs get prefixed with `base` (if given).
|
||||
/// (This can be a directory or a base URL)
|
||||
|
|
|
|||
|
|
@ -260,6 +260,41 @@ impl Input {
|
|||
}
|
||||
}
|
||||
|
||||
/// Retrieve all sources from this input. The output depends on the type of
|
||||
/// input:
|
||||
///
|
||||
/// - Remote URLs are returned as is, in their full form
|
||||
/// - Filepath Glob Patterns are expanded and each matched entry is returned
|
||||
/// - Absolute or relative filepaths are returned as is
|
||||
/// - All other input types are not returned
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error if the globbing fails with the expanded pattern.
|
||||
pub async fn get_sources(self) -> impl Stream<Item = Result<String>> {
|
||||
try_stream! {
|
||||
match self.source {
|
||||
InputSource::RemoteUrl(url) => yield url.to_string(),
|
||||
InputSource::FsGlob { pattern, ignore_case } => {
|
||||
let glob_expanded = tilde(&pattern).to_string();
|
||||
let mut match_opts = glob::MatchOptions::new();
|
||||
|
||||
match_opts.case_sensitive = !ignore_case;
|
||||
|
||||
for entry in glob_with(&glob_expanded, match_opts)? {
|
||||
match entry {
|
||||
Ok(path) => yield path.to_string_lossy().to_string(),
|
||||
Err(e) => eprintln!("{e:?}")
|
||||
}
|
||||
}
|
||||
},
|
||||
InputSource::FsPath(path) => yield path.to_string_lossy().to_string(),
|
||||
InputSource::Stdin => yield "Stdin".into(),
|
||||
InputSource::String(_) => yield "Raw String".into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn url_contents(url: &Url) -> Result<InputContent> {
|
||||
// Assume HTML for default paths
|
||||
let file_type = if url.path().is_empty() || url.path() == "/" {
|
||||
|
|
@ -282,10 +317,10 @@ impl Input {
|
|||
|
||||
async fn glob_contents(
|
||||
&self,
|
||||
path_glob: &str,
|
||||
pattern: &str,
|
||||
ignore_case: bool,
|
||||
) -> impl Stream<Item = Result<InputContent>> + '_ {
|
||||
let glob_expanded = tilde(&path_glob).to_string();
|
||||
let glob_expanded = tilde(&pattern).to_string();
|
||||
let mut match_opts = glob::MatchOptions::new();
|
||||
|
||||
match_opts.case_sensitive = !ignore_case;
|
||||
|
|
|
|||
Loading…
Reference in a new issue