diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs index fbff5f5..fc97cbd 100644 --- a/examples/collect_links/collect_links.rs +++ b/examples/collect_links/collect_links.rs @@ -14,8 +14,8 @@ async fn main() -> Result<()> { ]; let links = Collector::new( - None, // base_url - false, // don't skip missing inputs + None, // base_url + None, false, // don't skip missing inputs 10, // max concurrency ) .collect_links( diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs index 535fa1a..585159f 100644 --- a/lychee-bin/src/main.rs +++ b/lychee-bin/src/main.rs @@ -197,10 +197,15 @@ async fn run(cfg: &Config, inputs: Vec) -> Result { .client() .map_err(|e| anyhow!(e))?; - let links = Collector::new(cfg.base_url.clone(), cfg.skip_missing, max_concurrency) - .collect_links(&inputs) - .await - .map_err(|e| anyhow!(e))?; + let links = Collector::new( + cfg.base_url.clone(), + cfg.base_dir.clone(), + cfg.skip_missing, + max_concurrency, + ) + .collect_links(&inputs) + .await + .map_err(|e| anyhow!(e))?; let pb = if cfg.no_progress { None diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs index ab463df..07b62d2 100644 --- a/lychee-bin/src/options.rs +++ b/lychee-bin/src/options.rs @@ -218,7 +218,12 @@ pub(crate) struct Config { pub(crate) method: String, /// Base URL to check relative URLs - #[structopt(short, long, parse(try_from_str))] + #[structopt(long, parse(try_from_str))] + #[serde(default)] + pub(crate) base_dir: Option, + + /// Base URL to check relative URLs + #[structopt(long, parse(try_from_str))] #[serde(default)] pub(crate) base_url: Option, diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs index 31416cc..232ddb0 100644 --- a/lychee-lib/src/collector.rs +++ b/lychee-lib/src/collector.rs @@ -1,11 +1,12 @@ use crate::{extract::extract_links, uri::Uri, Input, Request, Result}; use reqwest::Url; -use std::collections::HashSet; +use std::{collections::HashSet, path::PathBuf}; /// Collector keeps the state of link collection #[derive(Debug, Clone)] pub struct Collector { base_url: Option, + base_dir: Option, skip_missing_inputs: bool, max_concurrency: usize, cache: HashSet, @@ -14,9 +15,15 @@ pub struct Collector { impl Collector { /// Create a new collector with an empty cache #[must_use] - pub fn new(base_url: Option, skip_missing_inputs: bool, max_concurrency: usize) -> Self { + pub fn new( + base_url: Option, + base_dir: Option, + skip_missing_inputs: bool, + max_concurrency: usize, + ) -> Self { Collector { base_url, + base_dir, skip_missing_inputs, max_concurrency, cache: HashSet::new(), @@ -52,8 +59,10 @@ impl Collector { while let Some(result) = contents_rx.recv().await { for input_content in result? { let base_url = self.base_url.clone(); - let handle = - tokio::task::spawn_blocking(move || extract_links(&input_content, &base_url)); + let base_dir = self.base_dir.clone(); + let handle = tokio::task::spawn_blocking(move || { + extract_links(&input_content, &base_url, &base_dir) + }); extract_links_handles.push(handle); } } @@ -160,7 +169,7 @@ mod test { }, ]; - let responses = Collector::new(None, false, 8) + let responses = Collector::new(None, None, false, 8) .collect_links(&inputs) .await?; let mut links = responses.into_iter().map(|r| r.uri).collect::>(); diff --git a/lychee-lib/src/extract.rs b/lychee-lib/src/extract.rs index 1dd310b..5bfafae 100644 --- a/lychee-lib/src/extract.rs +++ b/lychee-lib/src/extract.rs @@ -11,6 +11,7 @@ use pulldown_cmark::{Event as MDEvent, Parser, Tag}; use url::Url; use crate::{ + fs_tree, types::{FileType, InputContent}, Input, Request, Result, Uri, }; @@ -106,6 +107,7 @@ fn extract_links_from_plaintext(input: &str) -> Vec { pub(crate) fn extract_links( input_content: &InputContent, base_url: &Option, + base_dir: &Option, ) -> Result> { let links = match input_content.file_type { FileType::Markdown => extract_links_from_markdown(&input_content.content), @@ -125,9 +127,9 @@ pub(crate) fn extract_links( input_content.input.clone(), )); } else if let Input::FsPath(root) = &input_content.input { - if let Ok(path) = crate::fs_tree::find(&root, &PathBuf::from(&link)) { + if let Ok(path) = fs_tree::find(&root, &PathBuf::from(&link), base_dir) { let input_content = Input::path_content(path)?; - requests.extend(extract_links(&input_content, base_url)?); + requests.extend(extract_links(&input_content, base_url, base_dir)?); } else { info!("Cannot find path to {} in filesystem", &link); } @@ -183,6 +185,7 @@ mod test { extract_links( &InputContent::from_string(input, file_type), &base_url.map(|u| Url::parse(u).unwrap()), + &None, ) // unwrap is fine here as this helper function is only used in tests .unwrap() diff --git a/lychee-lib/src/fs_tree.rs b/lychee-lib/src/fs_tree.rs index b3255ea..44c9791 100644 --- a/lychee-lib/src/fs_tree.rs +++ b/lychee-lib/src/fs_tree.rs @@ -1,18 +1,30 @@ use crate::{ErrorKind, Result}; use std::path::{Path, PathBuf}; -pub(crate) fn find(root: &Path, dst: &Path) -> Result { +pub(crate) fn find(src: &Path, dst: &Path, base_dir: &Option) -> Result { if dst.exists() { return Ok(dst.to_path_buf()); } if dst.is_dir() { return Err(ErrorKind::FileNotFound(dst.into())); } - // Find `dst` in the `root` path - if let Some(parent) = root.parent() { - let rel = parent.join(dst.to_path_buf()); - if rel.exists() { - return Ok(rel); + if dst.is_absolute() { + // Absolute local links (leading slash) require the base_url to + // define the document root. + if let Some(base_dir) = base_dir { + let absolute = base_dir.join(dst.to_path_buf()); + if absolute.exists() { + return Ok(absolute); + } + } + } + if dst.is_relative() { + // Find `dst` in the `root` path + if let Some(parent) = src.parent() { + let relative = parent.join(dst.to_path_buf()); + if relative.exists() { + return Ok(relative); + } } } Err(ErrorKind::FileNotFound(dst.to_path_buf())) @@ -33,7 +45,7 @@ mod test_fs_tree { let dir = tempfile::tempdir()?; let dst = dir.path().join("foo.html"); File::create(&dst)?; - assert_eq!(find(&dummy, &dst)?, dst); + assert_eq!(find(&dummy, &dst, &None)?, dst); Ok(()) } @@ -45,7 +57,7 @@ mod test_fs_tree { let dir = tempfile::tempdir()?; let dst = dir.path().join("./foo.html"); File::create(&dst)?; - assert_eq!(find(&root, &dst)?, dst); + assert_eq!(find(&root, &dst, &None)?, dst); Ok(()) } @@ -57,7 +69,7 @@ mod test_fs_tree { let dir = tempfile::tempdir()?; let dst = dir.path().join("./foo.html"); File::create(&dst)?; - assert_eq!(find(&root, &dst)?, dst); + assert_eq!(find(&root, &dst, &None)?, dst); Ok(()) } @@ -66,7 +78,7 @@ mod test_fs_tree { let root = PathBuf::from("index.html"); // This file does not exist let dst = PathBuf::from("./foo.html"); - assert!(find(&root, &dst).is_err()); + assert!(find(&root, &dst, &None).is_err()); Ok(()) } @@ -81,7 +93,22 @@ mod test_fs_tree { let dst = PathBuf::from("./foo.html"); let dst_absolute = dir.path().join("./foo.html"); File::create(&dst_absolute)?; - assert_eq!(find(&root, &dst)?, dst_absolute); + assert_eq!(find(&root, &dst, &None)?, dst_absolute); + Ok(()) + } + + // dummy + // ./foo.html + // valid base dir + #[test] + fn test_find_absolute_from_base_dir() -> Result<()> { + let dummy = PathBuf::new(); + let dir = tempfile::tempdir()?; + let dst = dir.path().join("foo.html"); + File::create(&dst)?; + let base_dir = dir.path().to_path_buf(); + let dst_absolute = base_dir.join(dst.to_path_buf()); + assert_eq!(find(&dummy, &dst, &Some(base_dir))?, dst_absolute); Ok(()) } @@ -94,7 +121,7 @@ mod test_fs_tree { // We create the absolute path to foo.html, // but we address it under its relative path let dst = PathBuf::from("./foo.html"); - assert!(find(&root, &dst).is_err()); + assert!(find(&root, &dst, &None).is_err()); Ok(()) } @@ -106,7 +133,7 @@ mod test_fs_tree { let dir = tempfile::tempdir()?; let dst = dir.path().join("foo.html"); File::create(&dst)?; - assert_eq!(find(&root, &dst)?, dst); + assert_eq!(find(&root, &dst, &None)?, dst); Ok(()) } @@ -118,7 +145,7 @@ mod test_fs_tree { let dir = tempfile::tempdir()?; let dst = dir.path().join("foo.html"); File::create(&dst)?; - assert_eq!(find(&root, &dst)?, dst); + assert_eq!(find(&root, &dst, &None)?, dst); Ok(()) } @@ -129,7 +156,7 @@ mod test_fs_tree { let root = PathBuf::from("/path/to/"); let dir = tempfile::tempdir()?; File::create(&dir)?; - assert!(find(&root, &dir.into_path()).is_err()); + assert!(find(&root, &dir.into_path(), &None).is_err()); Ok(()) } }