diff --git a/examples/collect_links/collect_links.rs b/examples/collect_links/collect_links.rs
index fbff5f5..fc97cbd 100644
--- a/examples/collect_links/collect_links.rs
+++ b/examples/collect_links/collect_links.rs
@@ -14,8 +14,8 @@ async fn main() -> Result<()> {
];
let links = Collector::new(
- None, // base_url
- false, // don't skip missing inputs
+ None, // base_url
+ None, false, // don't skip missing inputs
10, // max concurrency
)
.collect_links(
diff --git a/lychee-bin/src/main.rs b/lychee-bin/src/main.rs
index 535fa1a..585159f 100644
--- a/lychee-bin/src/main.rs
+++ b/lychee-bin/src/main.rs
@@ -197,10 +197,15 @@ async fn run(cfg: &Config, inputs: Vec) -> Result {
.client()
.map_err(|e| anyhow!(e))?;
- let links = Collector::new(cfg.base_url.clone(), cfg.skip_missing, max_concurrency)
- .collect_links(&inputs)
- .await
- .map_err(|e| anyhow!(e))?;
+ let links = Collector::new(
+ cfg.base_url.clone(),
+ cfg.base_dir.clone(),
+ cfg.skip_missing,
+ max_concurrency,
+ )
+ .collect_links(&inputs)
+ .await
+ .map_err(|e| anyhow!(e))?;
let pb = if cfg.no_progress {
None
diff --git a/lychee-bin/src/options.rs b/lychee-bin/src/options.rs
index ab463df..07b62d2 100644
--- a/lychee-bin/src/options.rs
+++ b/lychee-bin/src/options.rs
@@ -218,7 +218,12 @@ pub(crate) struct Config {
pub(crate) method: String,
/// Base URL to check relative URLs
- #[structopt(short, long, parse(try_from_str))]
+ #[structopt(long, parse(try_from_str))]
+ #[serde(default)]
+ pub(crate) base_dir: Option,
+
+ /// Base URL to check relative URLs
+ #[structopt(long, parse(try_from_str))]
#[serde(default)]
pub(crate) base_url: Option,
diff --git a/lychee-lib/src/collector.rs b/lychee-lib/src/collector.rs
index 31416cc..232ddb0 100644
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@@ -1,11 +1,12 @@
use crate::{extract::extract_links, uri::Uri, Input, Request, Result};
use reqwest::Url;
-use std::collections::HashSet;
+use std::{collections::HashSet, path::PathBuf};
/// Collector keeps the state of link collection
#[derive(Debug, Clone)]
pub struct Collector {
base_url: Option,
+ base_dir: Option,
skip_missing_inputs: bool,
max_concurrency: usize,
cache: HashSet,
@@ -14,9 +15,15 @@ pub struct Collector {
impl Collector {
/// Create a new collector with an empty cache
#[must_use]
- pub fn new(base_url: Option, skip_missing_inputs: bool, max_concurrency: usize) -> Self {
+ pub fn new(
+ base_url: Option,
+ base_dir: Option,
+ skip_missing_inputs: bool,
+ max_concurrency: usize,
+ ) -> Self {
Collector {
base_url,
+ base_dir,
skip_missing_inputs,
max_concurrency,
cache: HashSet::new(),
@@ -52,8 +59,10 @@ impl Collector {
while let Some(result) = contents_rx.recv().await {
for input_content in result? {
let base_url = self.base_url.clone();
- let handle =
- tokio::task::spawn_blocking(move || extract_links(&input_content, &base_url));
+ let base_dir = self.base_dir.clone();
+ let handle = tokio::task::spawn_blocking(move || {
+ extract_links(&input_content, &base_url, &base_dir)
+ });
extract_links_handles.push(handle);
}
}
@@ -160,7 +169,7 @@ mod test {
},
];
- let responses = Collector::new(None, false, 8)
+ let responses = Collector::new(None, None, false, 8)
.collect_links(&inputs)
.await?;
let mut links = responses.into_iter().map(|r| r.uri).collect::>();
diff --git a/lychee-lib/src/extract.rs b/lychee-lib/src/extract.rs
index 1dd310b..5bfafae 100644
--- a/lychee-lib/src/extract.rs
+++ b/lychee-lib/src/extract.rs
@@ -11,6 +11,7 @@ use pulldown_cmark::{Event as MDEvent, Parser, Tag};
use url::Url;
use crate::{
+ fs_tree,
types::{FileType, InputContent},
Input, Request, Result, Uri,
};
@@ -106,6 +107,7 @@ fn extract_links_from_plaintext(input: &str) -> Vec {
pub(crate) fn extract_links(
input_content: &InputContent,
base_url: &Option,
+ base_dir: &Option,
) -> Result> {
let links = match input_content.file_type {
FileType::Markdown => extract_links_from_markdown(&input_content.content),
@@ -125,9 +127,9 @@ pub(crate) fn extract_links(
input_content.input.clone(),
));
} else if let Input::FsPath(root) = &input_content.input {
- if let Ok(path) = crate::fs_tree::find(&root, &PathBuf::from(&link)) {
+ if let Ok(path) = fs_tree::find(&root, &PathBuf::from(&link), base_dir) {
let input_content = Input::path_content(path)?;
- requests.extend(extract_links(&input_content, base_url)?);
+ requests.extend(extract_links(&input_content, base_url, base_dir)?);
} else {
info!("Cannot find path to {} in filesystem", &link);
}
@@ -183,6 +185,7 @@ mod test {
extract_links(
&InputContent::from_string(input, file_type),
&base_url.map(|u| Url::parse(u).unwrap()),
+ &None,
)
// unwrap is fine here as this helper function is only used in tests
.unwrap()
diff --git a/lychee-lib/src/fs_tree.rs b/lychee-lib/src/fs_tree.rs
index b3255ea..44c9791 100644
--- a/lychee-lib/src/fs_tree.rs
+++ b/lychee-lib/src/fs_tree.rs
@@ -1,18 +1,30 @@
use crate::{ErrorKind, Result};
use std::path::{Path, PathBuf};
-pub(crate) fn find(root: &Path, dst: &Path) -> Result {
+pub(crate) fn find(src: &Path, dst: &Path, base_dir: &Option) -> Result {
if dst.exists() {
return Ok(dst.to_path_buf());
}
if dst.is_dir() {
return Err(ErrorKind::FileNotFound(dst.into()));
}
- // Find `dst` in the `root` path
- if let Some(parent) = root.parent() {
- let rel = parent.join(dst.to_path_buf());
- if rel.exists() {
- return Ok(rel);
+ if dst.is_absolute() {
+ // Absolute local links (leading slash) require the base_url to
+ // define the document root.
+ if let Some(base_dir) = base_dir {
+ let absolute = base_dir.join(dst.to_path_buf());
+ if absolute.exists() {
+ return Ok(absolute);
+ }
+ }
+ }
+ if dst.is_relative() {
+ // Find `dst` in the `root` path
+ if let Some(parent) = src.parent() {
+ let relative = parent.join(dst.to_path_buf());
+ if relative.exists() {
+ return Ok(relative);
+ }
}
}
Err(ErrorKind::FileNotFound(dst.to_path_buf()))
@@ -33,7 +45,7 @@ mod test_fs_tree {
let dir = tempfile::tempdir()?;
let dst = dir.path().join("foo.html");
File::create(&dst)?;
- assert_eq!(find(&dummy, &dst)?, dst);
+ assert_eq!(find(&dummy, &dst, &None)?, dst);
Ok(())
}
@@ -45,7 +57,7 @@ mod test_fs_tree {
let dir = tempfile::tempdir()?;
let dst = dir.path().join("./foo.html");
File::create(&dst)?;
- assert_eq!(find(&root, &dst)?, dst);
+ assert_eq!(find(&root, &dst, &None)?, dst);
Ok(())
}
@@ -57,7 +69,7 @@ mod test_fs_tree {
let dir = tempfile::tempdir()?;
let dst = dir.path().join("./foo.html");
File::create(&dst)?;
- assert_eq!(find(&root, &dst)?, dst);
+ assert_eq!(find(&root, &dst, &None)?, dst);
Ok(())
}
@@ -66,7 +78,7 @@ mod test_fs_tree {
let root = PathBuf::from("index.html");
// This file does not exist
let dst = PathBuf::from("./foo.html");
- assert!(find(&root, &dst).is_err());
+ assert!(find(&root, &dst, &None).is_err());
Ok(())
}
@@ -81,7 +93,22 @@ mod test_fs_tree {
let dst = PathBuf::from("./foo.html");
let dst_absolute = dir.path().join("./foo.html");
File::create(&dst_absolute)?;
- assert_eq!(find(&root, &dst)?, dst_absolute);
+ assert_eq!(find(&root, &dst, &None)?, dst_absolute);
+ Ok(())
+ }
+
+ // dummy
+ // ./foo.html
+ // valid base dir
+ #[test]
+ fn test_find_absolute_from_base_dir() -> Result<()> {
+ let dummy = PathBuf::new();
+ let dir = tempfile::tempdir()?;
+ let dst = dir.path().join("foo.html");
+ File::create(&dst)?;
+ let base_dir = dir.path().to_path_buf();
+ let dst_absolute = base_dir.join(dst.to_path_buf());
+ assert_eq!(find(&dummy, &dst, &Some(base_dir))?, dst_absolute);
Ok(())
}
@@ -94,7 +121,7 @@ mod test_fs_tree {
// We create the absolute path to foo.html,
// but we address it under its relative path
let dst = PathBuf::from("./foo.html");
- assert!(find(&root, &dst).is_err());
+ assert!(find(&root, &dst, &None).is_err());
Ok(())
}
@@ -106,7 +133,7 @@ mod test_fs_tree {
let dir = tempfile::tempdir()?;
let dst = dir.path().join("foo.html");
File::create(&dst)?;
- assert_eq!(find(&root, &dst)?, dst);
+ assert_eq!(find(&root, &dst, &None)?, dst);
Ok(())
}
@@ -118,7 +145,7 @@ mod test_fs_tree {
let dir = tempfile::tempdir()?;
let dst = dir.path().join("foo.html");
File::create(&dst)?;
- assert_eq!(find(&root, &dst)?, dst);
+ assert_eq!(find(&root, &dst, &None)?, dst);
Ok(())
}
@@ -129,7 +156,7 @@ mod test_fs_tree {
let root = PathBuf::from("/path/to/");
let dir = tempfile::tempdir()?;
File::create(&dir)?;
- assert!(find(&root, &dir.into_path()).is_err());
+ assert!(find(&root, &dir.into_path(), &None).is_err());
Ok(())
}
}