mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-17 05:00:26 +00:00
Add support for base_dir
This commit is contained in:
parent
d5bb7ee7d7
commit
f9bf52ef10
6 changed files with 78 additions and 29 deletions
|
|
@ -14,8 +14,8 @@ async fn main() -> Result<()> {
|
|||
];
|
||||
|
||||
let links = Collector::new(
|
||||
None, // base_url
|
||||
false, // don't skip missing inputs
|
||||
None, // base_url
|
||||
None, false, // don't skip missing inputs
|
||||
10, // max concurrency
|
||||
)
|
||||
.collect_links(
|
||||
|
|
|
|||
|
|
@ -197,10 +197,15 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
|||
.client()
|
||||
.map_err(|e| anyhow!(e))?;
|
||||
|
||||
let links = Collector::new(cfg.base_url.clone(), cfg.skip_missing, max_concurrency)
|
||||
.collect_links(&inputs)
|
||||
.await
|
||||
.map_err(|e| anyhow!(e))?;
|
||||
let links = Collector::new(
|
||||
cfg.base_url.clone(),
|
||||
cfg.base_dir.clone(),
|
||||
cfg.skip_missing,
|
||||
max_concurrency,
|
||||
)
|
||||
.collect_links(&inputs)
|
||||
.await
|
||||
.map_err(|e| anyhow!(e))?;
|
||||
|
||||
let pb = if cfg.no_progress {
|
||||
None
|
||||
|
|
|
|||
|
|
@ -218,7 +218,12 @@ pub(crate) struct Config {
|
|||
pub(crate) method: String,
|
||||
|
||||
/// Base URL to check relative URLs
|
||||
#[structopt(short, long, parse(try_from_str))]
|
||||
#[structopt(long, parse(try_from_str))]
|
||||
#[serde(default)]
|
||||
pub(crate) base_dir: Option<PathBuf>,
|
||||
|
||||
/// Base URL to check relative URLs
|
||||
#[structopt(long, parse(try_from_str))]
|
||||
#[serde(default)]
|
||||
pub(crate) base_url: Option<Url>,
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,12 @@
|
|||
use crate::{extract::extract_links, uri::Uri, Input, Request, Result};
|
||||
use reqwest::Url;
|
||||
use std::collections::HashSet;
|
||||
use std::{collections::HashSet, path::PathBuf};
|
||||
|
||||
/// Collector keeps the state of link collection
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Collector {
|
||||
base_url: Option<Url>,
|
||||
base_dir: Option<PathBuf>,
|
||||
skip_missing_inputs: bool,
|
||||
max_concurrency: usize,
|
||||
cache: HashSet<Uri>,
|
||||
|
|
@ -14,9 +15,15 @@ pub struct Collector {
|
|||
impl Collector {
|
||||
/// Create a new collector with an empty cache
|
||||
#[must_use]
|
||||
pub fn new(base_url: Option<Url>, skip_missing_inputs: bool, max_concurrency: usize) -> Self {
|
||||
pub fn new(
|
||||
base_url: Option<Url>,
|
||||
base_dir: Option<PathBuf>,
|
||||
skip_missing_inputs: bool,
|
||||
max_concurrency: usize,
|
||||
) -> Self {
|
||||
Collector {
|
||||
base_url,
|
||||
base_dir,
|
||||
skip_missing_inputs,
|
||||
max_concurrency,
|
||||
cache: HashSet::new(),
|
||||
|
|
@ -52,8 +59,10 @@ impl Collector {
|
|||
while let Some(result) = contents_rx.recv().await {
|
||||
for input_content in result? {
|
||||
let base_url = self.base_url.clone();
|
||||
let handle =
|
||||
tokio::task::spawn_blocking(move || extract_links(&input_content, &base_url));
|
||||
let base_dir = self.base_dir.clone();
|
||||
let handle = tokio::task::spawn_blocking(move || {
|
||||
extract_links(&input_content, &base_url, &base_dir)
|
||||
});
|
||||
extract_links_handles.push(handle);
|
||||
}
|
||||
}
|
||||
|
|
@ -160,7 +169,7 @@ mod test {
|
|||
},
|
||||
];
|
||||
|
||||
let responses = Collector::new(None, false, 8)
|
||||
let responses = Collector::new(None, None, false, 8)
|
||||
.collect_links(&inputs)
|
||||
.await?;
|
||||
let mut links = responses.into_iter().map(|r| r.uri).collect::<Vec<Uri>>();
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ use pulldown_cmark::{Event as MDEvent, Parser, Tag};
|
|||
use url::Url;
|
||||
|
||||
use crate::{
|
||||
fs_tree,
|
||||
types::{FileType, InputContent},
|
||||
Input, Request, Result, Uri,
|
||||
};
|
||||
|
|
@ -106,6 +107,7 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
|
|||
pub(crate) fn extract_links(
|
||||
input_content: &InputContent,
|
||||
base_url: &Option<Url>,
|
||||
base_dir: &Option<PathBuf>,
|
||||
) -> Result<HashSet<Request>> {
|
||||
let links = match input_content.file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(&input_content.content),
|
||||
|
|
@ -125,9 +127,9 @@ pub(crate) fn extract_links(
|
|||
input_content.input.clone(),
|
||||
));
|
||||
} else if let Input::FsPath(root) = &input_content.input {
|
||||
if let Ok(path) = crate::fs_tree::find(&root, &PathBuf::from(&link)) {
|
||||
if let Ok(path) = fs_tree::find(&root, &PathBuf::from(&link), base_dir) {
|
||||
let input_content = Input::path_content(path)?;
|
||||
requests.extend(extract_links(&input_content, base_url)?);
|
||||
requests.extend(extract_links(&input_content, base_url, base_dir)?);
|
||||
} else {
|
||||
info!("Cannot find path to {} in filesystem", &link);
|
||||
}
|
||||
|
|
@ -183,6 +185,7 @@ mod test {
|
|||
extract_links(
|
||||
&InputContent::from_string(input, file_type),
|
||||
&base_url.map(|u| Url::parse(u).unwrap()),
|
||||
&None,
|
||||
)
|
||||
// unwrap is fine here as this helper function is only used in tests
|
||||
.unwrap()
|
||||
|
|
|
|||
|
|
@ -1,18 +1,30 @@
|
|||
use crate::{ErrorKind, Result};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub(crate) fn find(root: &Path, dst: &Path) -> Result<PathBuf> {
|
||||
pub(crate) fn find(src: &Path, dst: &Path, base_dir: &Option<PathBuf>) -> Result<PathBuf> {
|
||||
if dst.exists() {
|
||||
return Ok(dst.to_path_buf());
|
||||
}
|
||||
if dst.is_dir() {
|
||||
return Err(ErrorKind::FileNotFound(dst.into()));
|
||||
}
|
||||
// Find `dst` in the `root` path
|
||||
if let Some(parent) = root.parent() {
|
||||
let rel = parent.join(dst.to_path_buf());
|
||||
if rel.exists() {
|
||||
return Ok(rel);
|
||||
if dst.is_absolute() {
|
||||
// Absolute local links (leading slash) require the base_url to
|
||||
// define the document root.
|
||||
if let Some(base_dir) = base_dir {
|
||||
let absolute = base_dir.join(dst.to_path_buf());
|
||||
if absolute.exists() {
|
||||
return Ok(absolute);
|
||||
}
|
||||
}
|
||||
}
|
||||
if dst.is_relative() {
|
||||
// Find `dst` in the `root` path
|
||||
if let Some(parent) = src.parent() {
|
||||
let relative = parent.join(dst.to_path_buf());
|
||||
if relative.exists() {
|
||||
return Ok(relative);
|
||||
}
|
||||
}
|
||||
}
|
||||
Err(ErrorKind::FileNotFound(dst.to_path_buf()))
|
||||
|
|
@ -33,7 +45,7 @@ mod test_fs_tree {
|
|||
let dir = tempfile::tempdir()?;
|
||||
let dst = dir.path().join("foo.html");
|
||||
File::create(&dst)?;
|
||||
assert_eq!(find(&dummy, &dst)?, dst);
|
||||
assert_eq!(find(&dummy, &dst, &None)?, dst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +57,7 @@ mod test_fs_tree {
|
|||
let dir = tempfile::tempdir()?;
|
||||
let dst = dir.path().join("./foo.html");
|
||||
File::create(&dst)?;
|
||||
assert_eq!(find(&root, &dst)?, dst);
|
||||
assert_eq!(find(&root, &dst, &None)?, dst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -57,7 +69,7 @@ mod test_fs_tree {
|
|||
let dir = tempfile::tempdir()?;
|
||||
let dst = dir.path().join("./foo.html");
|
||||
File::create(&dst)?;
|
||||
assert_eq!(find(&root, &dst)?, dst);
|
||||
assert_eq!(find(&root, &dst, &None)?, dst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -66,7 +78,7 @@ mod test_fs_tree {
|
|||
let root = PathBuf::from("index.html");
|
||||
// This file does not exist
|
||||
let dst = PathBuf::from("./foo.html");
|
||||
assert!(find(&root, &dst).is_err());
|
||||
assert!(find(&root, &dst, &None).is_err());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -81,7 +93,22 @@ mod test_fs_tree {
|
|||
let dst = PathBuf::from("./foo.html");
|
||||
let dst_absolute = dir.path().join("./foo.html");
|
||||
File::create(&dst_absolute)?;
|
||||
assert_eq!(find(&root, &dst)?, dst_absolute);
|
||||
assert_eq!(find(&root, &dst, &None)?, dst_absolute);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// dummy
|
||||
// ./foo.html
|
||||
// valid base dir
|
||||
#[test]
|
||||
fn test_find_absolute_from_base_dir() -> Result<()> {
|
||||
let dummy = PathBuf::new();
|
||||
let dir = tempfile::tempdir()?;
|
||||
let dst = dir.path().join("foo.html");
|
||||
File::create(&dst)?;
|
||||
let base_dir = dir.path().to_path_buf();
|
||||
let dst_absolute = base_dir.join(dst.to_path_buf());
|
||||
assert_eq!(find(&dummy, &dst, &Some(base_dir))?, dst_absolute);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +121,7 @@ mod test_fs_tree {
|
|||
// We create the absolute path to foo.html,
|
||||
// but we address it under its relative path
|
||||
let dst = PathBuf::from("./foo.html");
|
||||
assert!(find(&root, &dst).is_err());
|
||||
assert!(find(&root, &dst, &None).is_err());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -106,7 +133,7 @@ mod test_fs_tree {
|
|||
let dir = tempfile::tempdir()?;
|
||||
let dst = dir.path().join("foo.html");
|
||||
File::create(&dst)?;
|
||||
assert_eq!(find(&root, &dst)?, dst);
|
||||
assert_eq!(find(&root, &dst, &None)?, dst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -118,7 +145,7 @@ mod test_fs_tree {
|
|||
let dir = tempfile::tempdir()?;
|
||||
let dst = dir.path().join("foo.html");
|
||||
File::create(&dst)?;
|
||||
assert_eq!(find(&root, &dst)?, dst);
|
||||
assert_eq!(find(&root, &dst, &None)?, dst);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
|
@ -129,7 +156,7 @@ mod test_fs_tree {
|
|||
let root = PathBuf::from("/path/to/");
|
||||
let dir = tempfile::tempdir()?;
|
||||
File::create(&dir)?;
|
||||
assert!(find(&root, &dir.into_path()).is_err());
|
||||
assert!(find(&root, &dir.into_path(), &None).is_err());
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue