Introduce --root-dir (#1576)

* windows

* Introduce --root-path

* lint

* lint

* Simplification

* Add unit tests

* Add integration test

* Sync docs

* Add missing comment to make CI happy

* Revert one of the Windows-specific changes because causing a test failure

* Support both options at the same time

* Revert a comment change that is no longer applicable

* Remove unused code

* Fix and simplification

* Integration test both at the same time

* Unit tests both at the same time

* Remove now redundant comment

* Revert windows-specific change, seems not needed after recent changes

* Use Collector::default()

* extract method and unit tests

* clippy

* clippy: &Option<A> -> Option<&A>

* Remove outdated comment

* Rename --root-path to --root-dir

* Restrict --root-dir to absolute paths for now

* Move root dir check
This commit is contained in:
Trask Stalnaker 2024-12-13 05:36:33 -08:00 committed by GitHub
parent 685b653d14
commit 6d0e94c799
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 432 additions and 191 deletions

View file

@ -480,6 +480,9 @@ Options:
-b, --base <BASE>
Base URL or website root directory to check relative URLs e.g. <https://example.com> or `/path/to/public`
--root-dir <ROOT_DIR>
Root path to use when checking absolute local links, must be an absolute path
--basic-auth <BASIC_AUTH>
Basic authentication support. E.g. `http://example.com username:password`

View file

@ -21,7 +21,7 @@ async fn main() -> Result<()> {
},
];
let links = Collector::new(None) // base
let links = Collector::default()
.skip_missing_inputs(false) // don't skip missing inputs? (default=false)
.skip_hidden(false) // skip hidden files? (default=true)
.skip_ignored(false) // skip files that are ignored by git? (default=true)

View file

@ -0,0 +1,8 @@
<html>
<head>
<title>About</title>
</head>
<body>
<h1 id="fragment">About</h1>
</body>
</html>

View file

@ -0,0 +1,34 @@
<html>
<head>
<title>Index</title>
</head>
<body>
<h1>Index Title</h1>
<a id="good"></a>
<p>
<ul>
<li>
<a href="/nested">home</a>
</li>
<li>
<a href="/nested/about">About</a>
</li>
<li>
<a href="/nested/another page">About</a>
</li>
<li>
<a href="/nested/about/index.html#fragment">Fragment</a>
</li>
<li>
<a href="/nested/about/index.html#missing">Missing</a>
</li>
<li>
<a href="#good">Good</a>
</li>
<li>
<a href="#bad">Bad</a>
</li>
</ul>
</p>
</body>
</html>

View file

@ -192,7 +192,7 @@ async fn progress_bar_task(
while let Some(response) = recv_resp.recv().await {
show_progress(
&mut io::stderr(),
&pb,
pb.as_ref(),
&response,
formatter.as_ref(),
&verbose,
@ -331,7 +331,7 @@ fn ignore_cache(uri: &Uri, status: &Status, cache_exclude_status: &HashSet<u16>)
fn show_progress(
output: &mut dyn Write,
progress_bar: &Option<ProgressBar>,
progress_bar: Option<&ProgressBar>,
response: &Response,
formatter: &dyn ResponseFormatter,
verbose: &Verbosity,
@ -401,7 +401,7 @@ mod tests {
let formatter = get_response_formatter(&options::OutputMode::Plain);
show_progress(
&mut buf,
&None,
None,
&response,
formatter.as_ref(),
&Verbosity::default(),
@ -423,7 +423,7 @@ mod tests {
let formatter = get_response_formatter(&options::OutputMode::Plain);
show_progress(
&mut buf,
&None,
None,
&response,
formatter.as_ref(),
&Verbosity::debug(),

View file

@ -288,7 +288,7 @@ fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> {
async fn run(opts: &LycheeOptions) -> Result<i32> {
let inputs = opts.inputs()?;
let mut collector = Collector::new(opts.config.base.clone())
let mut collector = Collector::new(opts.config.root_dir.clone(), opts.config.base.clone())?
.skip_missing_inputs(opts.config.skip_missing)
.skip_hidden(!opts.config.hidden)
.skip_ignored(!opts.config.no_ignore)

View file

@ -445,6 +445,12 @@ separated list of accepted status codes. This example will accept 200, 201,
#[serde(default)]
pub(crate) base: Option<Base>,
/// Root path to use when checking absolute local links,
/// must be an absolute path
#[arg(long)]
#[serde(default)]
pub(crate) root_dir: Option<PathBuf>,
/// Basic authentication support. E.g. `http://example.com username:password`
#[arg(long)]
#[serde(default)]

View file

@ -393,6 +393,42 @@ mod cli {
.stdout(contains("3 OK"));
}
#[test]
fn test_resolve_paths_from_root_dir() {
let mut cmd = main_command();
let dir = fixtures_path().join("resolve_paths_from_root_dir");
cmd.arg("--offline")
.arg("--include-fragments")
.arg("--root-dir")
.arg(&dir)
.arg(dir.join("nested").join("index.html"))
.env_clear()
.assert()
.failure()
.stdout(contains("7 Total"))
.stdout(contains("5 OK"))
.stdout(contains("2 Errors"));
}
#[test]
fn test_resolve_paths_from_root_dir_and_base_url() {
let mut cmd = main_command();
let dir = fixtures_path();
cmd.arg("--offline")
.arg("--root-dir")
.arg("/resolve_paths")
.arg("--base")
.arg(&dir)
.arg(dir.join("resolve_paths").join("index.html"))
.env_clear()
.assert()
.success()
.stdout(contains("3 Total"))
.stdout(contains("3 OK"));
}
#[test]
fn test_youtube_quirk() {
let url = "https://www.youtube.com/watch?v=NlKuICiT470&list=PLbWDhxwM_45mPVToqaIZNbZeIzFchsKKQ&index=7";

View file

@ -1,3 +1,4 @@
use crate::ErrorKind;
use crate::InputSource;
use crate::{
basic_auth::BasicAuthExtractor, extract::Extractor, types::uri::raw::RawUri, utils::request,
@ -9,6 +10,7 @@ use futures::{
StreamExt,
};
use par_stream::ParStreamExt;
use std::path::PathBuf;
/// Collector keeps the state of link collection
/// It drives the link extraction from inputs
@ -21,13 +23,12 @@ pub struct Collector {
skip_hidden: bool,
include_verbatim: bool,
use_html5ever: bool,
root_dir: Option<PathBuf>,
base: Option<Base>,
}
impl Collector {
/// Create a new collector with an empty cache
#[must_use]
pub const fn new(base: Option<Base>) -> Self {
impl Default for Collector {
fn default() -> Self {
Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
@ -35,9 +36,35 @@ impl Collector {
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
base,
root_dir: None,
base: None,
}
}
}
impl Collector {
/// Create a new collector with an empty cache
///
/// # Errors
///
/// Returns an `Err` if the `root_dir` is not an absolute path
pub fn new(root_dir: Option<PathBuf>, base: Option<Base>) -> Result<Self> {
if let Some(root_dir) = &root_dir {
if root_dir.is_relative() {
return Err(ErrorKind::RootDirMustBeAbsolute(root_dir.clone()));
}
}
Ok(Collector {
basic_auth_extractor: None,
skip_missing_inputs: false,
include_verbatim: false,
use_html5ever: false,
skip_hidden: true,
skip_ignored: true,
root_dir,
base,
})
}
/// Skip missing input files (default is to error if they don't exist)
#[must_use]
@ -119,12 +146,19 @@ impl Collector {
})
.flatten()
.par_then_unordered(None, move |(content, base)| {
let root_dir = self.root_dir.clone();
let basic_auth_extractor = self.basic_auth_extractor.clone();
async move {
let content = content?;
let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
let uris: Vec<RawUri> = extractor.extract(&content);
let requests = request::create(uris, &content, &base, &basic_auth_extractor);
let requests = request::create(
uris,
&content.source,
root_dir.as_ref(),
base.as_ref(),
basic_auth_extractor.as_ref(),
);
Result::Ok(stream::iter(requests.into_iter().map(Ok)))
}
})
@ -148,17 +182,25 @@ mod tests {
};
// Helper function to run the collector on the given inputs
async fn collect(inputs: Vec<Input>, base: Option<Base>) -> HashSet<Uri> {
let responses = Collector::new(base).collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
async fn collect(
inputs: Vec<Input>,
root_dir: Option<PathBuf>,
base: Option<Base>,
) -> Result<HashSet<Uri>> {
let responses = Collector::new(root_dir, base)?.collect_links(inputs);
Ok(responses.map(|r| r.unwrap().uri).collect().await)
}
// Helper function for collecting verbatim links
async fn collect_verbatim(inputs: Vec<Input>, base: Option<Base>) -> HashSet<Uri> {
let responses = Collector::new(base)
async fn collect_verbatim(
inputs: Vec<Input>,
root_dir: Option<PathBuf>,
base: Option<Base>,
) -> Result<HashSet<Uri>> {
let responses = Collector::new(root_dir, base)?
.include_verbatim(true)
.collect_links(inputs);
responses.map(|r| r.unwrap().uri).collect().await
Ok(responses.map(|r| r.unwrap().uri).collect().await)
}
const TEST_STRING: &str = "http://test-string.com";
@ -246,7 +288,7 @@ mod tests {
},
];
let links = collect_verbatim(inputs, None).await;
let links = collect_verbatim(inputs, None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website(TEST_STRING),
@ -269,7 +311,7 @@ mod tests {
file_type_hint: Some(FileType::Markdown),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://endler.dev"),
@ -295,7 +337,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@ -324,7 +366,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website("https://example.com/static/image.png"),
@ -351,7 +393,7 @@ mod tests {
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected = HashSet::from_iter([
website("https://localhost.com/@/internal.md"),
@ -373,7 +415,7 @@ mod tests {
file_type_hint: Some(FileType::Html),
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
// the body links wouldn't be present if the file was parsed strictly as XML
@ -406,7 +448,7 @@ mod tests {
excluded_paths: None,
};
let links = collect(vec![input], None).await;
let links = collect(vec![input], None, None).await.ok().unwrap();
let expected_urls = HashSet::from_iter([
website("https://github.com/lycheeverse/lychee/"),
@ -425,7 +467,7 @@ mod tests {
file_type_hint: None,
excluded_paths: None,
};
let links = collect(vec![input], None).await;
let links = collect(vec![input], None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([mail("user@example.com")]);
@ -468,7 +510,7 @@ mod tests {
},
];
let links = collect(inputs, None).await;
let links = collect(inputs, None, None).await.ok().unwrap();
let expected_links = HashSet::from_iter([
website(&format!(
@ -502,7 +544,7 @@ mod tests {
excluded_paths: None,
};
let links = collect(vec![input], Some(base)).await;
let links = collect(vec![input], None, Some(base)).await.ok().unwrap();
let expected_links = HashSet::from_iter([
path("/path/to/root/index.html"),

View file

@ -71,6 +71,7 @@ pub mod remap;
/// local IPs or e-mail addresses
pub mod filter;
/// Test utilities
#[cfg(test)]
#[macro_use]
pub mod test_utils;

View file

@ -30,15 +30,6 @@ impl Base {
}
}
/// Return the directory if the base is local
#[must_use]
pub(crate) fn dir(&self) -> Option<PathBuf> {
match self {
Self::Remote(_) => None,
Self::Local(d) => Some(d.clone()),
}
}
pub(crate) fn from_source(source: &InputSource) -> Option<Base> {
match &source {
InputSource::RemoteUrl(url) => {

View file

@ -94,6 +94,10 @@ pub enum ErrorKind {
#[error("Cannot convert path '{0}' to a URI")]
InvalidPathToUri(String),
/// Root dir must be an absolute path
#[error("Root dir must be an absolute path: '{0}'")]
RootDirMustBeAbsolute(PathBuf),
/// The given URI type is not supported
#[error("Unsupported URI type: '{0}'")]
UnsupportedUriType(String),
@ -310,6 +314,7 @@ impl Hash for ErrorKind {
Self::InvalidBase(base, e) => (base, e).hash(state),
Self::InvalidBaseJoin(s) => s.hash(state),
Self::InvalidPathToUri(s) => s.hash(state),
Self::RootDirMustBeAbsolute(s) => s.hash(state),
Self::UnsupportedUriType(s) => s.hash(state),
Self::InvalidUrlRemap(remap) => (remap).hash(state),
Self::InvalidHeader(e) => e.to_string().hash(state),

View file

@ -54,7 +54,6 @@ impl<P: AsRef<Path>> From<P> for FileType {
}
/// Helper function to check if a path is likely a URL.
fn is_url(path: &Path) -> bool {
path.to_str()
.and_then(|s| Url::parse(s).ok())

View file

@ -1,4 +1,4 @@
use crate::{Base, ErrorKind, Result};
use crate::{ErrorKind, Result};
use cached::proc_macro::cached;
use once_cell::sync::Lazy;
use path_clean::PathClean;
@ -9,11 +9,6 @@ use std::path::{Path, PathBuf};
static CURRENT_DIR: Lazy<PathBuf> =
Lazy::new(|| env::current_dir().expect("cannot get current dir from environment"));
/// Returns the base if it is a valid `PathBuf`
fn get_base_dir(base: &Option<Base>) -> Option<PathBuf> {
base.as_ref().and_then(Base::dir)
}
/// Create an absolute path out of a `PathBuf`.
///
/// The `clean` method is relatively expensive
@ -29,55 +24,33 @@ pub(crate) fn absolute_path(path: PathBuf) -> PathBuf {
.clean()
}
/// Get the directory name of a given `Path`.
fn dirname(src: &'_ Path) -> Option<&'_ Path> {
if src.is_file() {
return src.parent();
}
Some(src)
}
/// Resolve `dst` that was linked to from within `src`
///
/// Returns Ok(None) in case of an absolute local link without a `base_url`
pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option<Base>) -> Result<Option<PathBuf>> {
pub(crate) fn resolve(
src: &Path,
dst: &PathBuf,
ignore_absolute_local_links: bool,
) -> Result<Option<PathBuf>> {
let resolved = match dst {
relative if dst.is_relative() => {
// Find `dst` in the parent directory of `src`
let Some(parent) = src.parent() else {
return Err(ErrorKind::InvalidFile(relative.to_path_buf()));
return Err(ErrorKind::InvalidFile(relative.clone()));
};
parent.join(relative)
}
absolute if dst.is_absolute() => {
// Absolute local links (leading slash) require the `base_url` to
// define the document root. Silently ignore the link in case the
// `base_url` is not defined.
let Some(base) = get_base_dir(base) else {
if ignore_absolute_local_links {
return Ok(None);
};
let Some(dir) = dirname(&base) else {
return Err(ErrorKind::InvalidBase(
base.display().to_string(),
"The given directory cannot be a base".to_string(),
));
};
join(dir.to_path_buf(), absolute)
}
PathBuf::from(absolute)
}
_ => return Err(ErrorKind::InvalidFile(dst.to_path_buf())),
_ => return Err(ErrorKind::InvalidFile(dst.clone())),
};
Ok(Some(absolute_path(resolved)))
}
/// A cumbersome way to concatenate paths without checking their
/// existence on disk. See <https://github.com/rust-lang/rust/issues/16507>
fn join(base: PathBuf, dst: &Path) -> PathBuf {
let mut abs = base.into_os_string();
let target_str = dst.as_os_str();
abs.push(target_str);
PathBuf::from(abs)
}
/// Check if `child` is a subdirectory/file inside `parent`
///
/// Note that `contains(parent, parent)` will return `true`
@ -110,7 +83,7 @@ mod test_path {
let dummy = PathBuf::from("index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
resolve(&dummy, &abs_path, &None)?,
resolve(&dummy, &abs_path, true)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@ -123,7 +96,7 @@ mod test_path {
let dummy = PathBuf::from("./index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
resolve(&dummy, &abs_path, &None)?,
resolve(&dummy, &abs_path, true)?,
Some(env::current_dir().unwrap().join("foo.html"))
);
Ok(())
@ -136,43 +109,12 @@ mod test_path {
let abs_index = PathBuf::from("/path/to/index.html");
let abs_path = PathBuf::from("./foo.html");
assert_eq!(
resolve(&abs_index, &abs_path, &None)?,
resolve(&abs_index, &abs_path, true)?,
Some(PathBuf::from("/path/to/foo.html"))
);
Ok(())
}
// dummy
// foo.html
// valid base dir
#[test]
fn test_resolve_absolute_from_base_dir() -> Result<()> {
let dummy = PathBuf::new();
let abs_path = PathBuf::from("/foo.html");
let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir")));
assert_eq!(
resolve(&dummy, &abs_path, &base)?,
Some(PathBuf::from("/some/absolute/base/dir/foo.html"))
);
Ok(())
}
// /path/to/index.html
// /other/path/to/foo.html
#[test]
fn test_resolve_absolute_from_absolute() -> Result<()> {
let abs_index = PathBuf::from("/path/to/index.html");
let abs_path = PathBuf::from("/other/path/to/foo.html");
let base = Some(Base::Local(PathBuf::from("/some/absolute/base/dir")));
assert_eq!(
resolve(&abs_index, &abs_path, &base)?,
Some(PathBuf::from(
"/some/absolute/base/dir/other/path/to/foo.html"
))
);
Ok(())
}
#[test]
fn test_contains() {
let parent_dir = tempfile::tempdir().unwrap();

View file

@ -8,14 +8,14 @@ use std::{
use crate::{
basic_auth::BasicAuthExtractor,
types::{uri::raw::RawUri, InputContent, InputSource},
types::{uri::raw::RawUri, InputSource},
utils::{path, url},
Base, BasicAuthCredentials, ErrorKind, Request, Result, Uri,
};
/// Extract basic auth credentials for a given URL.
fn extract_credentials(
extractor: &Option<BasicAuthExtractor>,
extractor: Option<&BasicAuthExtractor>,
uri: &Uri,
) -> Option<BasicAuthCredentials> {
extractor.as_ref().and_then(|ext| ext.matches(uri))
@ -25,10 +25,11 @@ fn extract_credentials(
fn create_request(
raw_uri: &RawUri,
source: &InputSource,
base: &Option<Base>,
extractor: &Option<BasicAuthExtractor>,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> Result<Request> {
let uri = try_parse_into_uri(raw_uri, source, base)?;
let uri = try_parse_into_uri(raw_uri, source, root_dir, base)?;
let source = truncate_source(source);
let element = raw_uri.element.clone();
let attribute = raw_uri.attribute.clone();
@ -48,8 +49,13 @@ fn create_request(
/// to create a valid URI.
/// - If a URI cannot be created from the file path.
/// - If the source is not a file path (i.e. the URI type is not supported).
fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option<Base>) -> Result<Uri> {
let text = raw_uri.text.clone();
fn try_parse_into_uri(
raw_uri: &RawUri,
source: &InputSource,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
) -> Result<Uri> {
let text = prepend_root_dir_if_absolute_local_link(&raw_uri.text, root_dir);
let uri = match Uri::try_from(raw_uri.clone()) {
Ok(uri) => uri,
Err(_) => match base {
@ -58,7 +64,9 @@ fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option<Base
None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
},
None => match source {
InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?,
InputSource::FsPath(root) => {
create_uri_from_file_path(root, &text, root_dir.is_none())?
}
_ => return Err(ErrorKind::UnsupportedUriType(text)),
},
},
@ -81,7 +89,7 @@ pub(crate) fn is_anchor(text: &str) -> bool {
fn create_uri_from_file_path(
file_path: &Path,
link_text: &str,
base: &Option<Base>,
ignore_absolute_local_links: bool,
) -> Result<Uri> {
let target_path = if is_anchor(link_text) {
// For anchors, we need to append the anchor to the file name.
@ -94,7 +102,9 @@ fn create_uri_from_file_path(
} else {
link_text.to_string()
};
let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, base) else {
let Ok(constructed_url) =
resolve_and_create_url(file_path, &target_path, ignore_absolute_local_links)
else {
return Err(ErrorKind::InvalidPathToUri(target_path));
};
Ok(Uri {
@ -125,17 +135,16 @@ fn truncate_source(source: &InputSource) -> InputSource {
/// it will not be added to the `HashSet`.
pub(crate) fn create(
uris: Vec<RawUri>,
input_content: &InputContent,
base: &Option<Base>,
extractor: &Option<BasicAuthExtractor>,
source: &InputSource,
root_dir: Option<&PathBuf>,
base: Option<&Base>,
extractor: Option<&BasicAuthExtractor>,
) -> HashSet<Request> {
let base = base
.clone()
.or_else(|| Base::from_source(&input_content.source));
let base = base.cloned().or_else(|| Base::from_source(source));
uris.into_iter()
.filter_map(|raw_uri| {
match create_request(&raw_uri, &input_content.source, &base, extractor) {
match create_request(&raw_uri, source, root_dir, base.as_ref(), extractor) {
Ok(request) => Some(request),
Err(e) => {
warn!("Error creating request: {:?}", e);
@ -160,7 +169,7 @@ pub(crate) fn create(
fn resolve_and_create_url(
src_path: &Path,
dest_path: &str,
base_uri: &Option<Base>,
ignore_absolute_local_links: bool,
) -> Result<Url> {
let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);
@ -168,8 +177,11 @@ fn resolve_and_create_url(
// This addresses the issue mentioned in the original comment about double-encoding
let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
let Ok(Some(resolved_path)) = path::resolve(src_path, &PathBuf::from(&*decoded_dest), base_uri)
else {
let Ok(Some(resolved_path)) = path::resolve(
src_path,
&PathBuf::from(&*decoded_dest),
ignore_absolute_local_links,
) else {
return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
};
@ -181,10 +193,20 @@ fn resolve_and_create_url(
Ok(url)
}
fn prepend_root_dir_if_absolute_local_link(text: &str, root_dir: Option<&PathBuf>) -> String {
if text.starts_with('/') {
if let Some(path) = root_dir {
if let Some(path_str) = path.to_str() {
return format!("{path_str}{text}");
}
}
}
text.to_string()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::types::FileType;
#[test]
fn test_is_anchor() {
@ -195,28 +217,17 @@ mod tests {
#[test]
fn test_create_uri_from_path() {
let result =
resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap();
resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", true).unwrap();
assert_eq!(result.as_str(), "file:///test+encoding");
}
fn create_input(content: &str, file_type: FileType) -> InputContent {
InputContent {
content: content.to_string(),
file_type,
source: InputSource::String(content.to_string()),
}
}
#[test]
fn test_relative_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
let input = create_input(
r#"<a href="relative.html">Relative Link</a>"#,
FileType::Html,
);
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("relative.html")];
let requests = create(uris, &input, &base, &None);
let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@ -226,14 +237,11 @@ mod tests {
#[test]
fn test_absolute_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
let input = create_input(
r#"<a href="https://another.com/page">Absolute Link</a>"#,
FileType::Html,
);
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://another.com/page")];
let requests = create(uris, &input, &base, &None);
let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@ -243,14 +251,11 @@ mod tests {
#[test]
fn test_root_relative_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
let input = create_input(
r#"<a href="/root-relative">Root Relative Link</a>"#,
FileType::Html,
);
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("/root-relative")];
let requests = create(uris, &input, &base, &None);
let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@ -260,14 +265,11 @@ mod tests {
#[test]
fn test_parent_directory_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
let input = create_input(
r#"<a href="../parent">Parent Directory Link</a>"#,
FileType::Html,
);
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("../parent")];
let requests = create(uris, &input, &base, &None);
let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@ -277,11 +279,156 @@ mod tests {
#[test]
fn test_fragment_url_resolution() {
let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
let input = create_input(r##"<a href="#fragment">Fragment Link</a>"##, FileType::Html);
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("#fragment")];
let requests = create(uris, &input, &base, &None);
let requests = create(uris, &source, None, Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment"));
}
#[test]
fn test_relative_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///some/relative.html"));
}
#[test]
fn test_absolute_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "https://another.com/page"));
}
#[test]
fn test_root_relative_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///tmp/lychee/root-relative"));
}
#[test]
fn test_parent_directory_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///parent"));
}
#[test]
fn test_fragment_url_resolution_from_root_dir() {
let root_dir = PathBuf::from("/tmp/lychee");
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
let requests = create(uris, &source, Some(&root_dir), None, None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "file:///some/page.html#fragment"));
}
#[test]
fn test_relative_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("relative.html")];
let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html"));
}
#[test]
fn test_absolute_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("https://another.com/page")];
let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "https://another.com/page"));
}
#[test]
fn test_root_relative_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("/root-relative")];
let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/tmp/lychee/root-relative"));
}
#[test]
fn test_parent_directory_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("../parent")];
let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
.iter()
.any(|r| r.uri.url.as_str() == "https://example.com/parent"));
}
#[test]
fn test_fragment_url_resolution_from_root_dir_and_base_url() {
let root_dir = PathBuf::from("/tmp/lychee");
let base = Base::try_from("https://example.com/path/page.html").unwrap();
let source = InputSource::FsPath(PathBuf::from("/some/page.html"));
let uris = vec![RawUri::from("#fragment")];
let requests = create(uris, &source, Some(&root_dir), Some(&base), None);
assert_eq!(requests.len(), 1);
assert!(requests
@ -291,14 +438,10 @@ mod tests {
#[test]
fn test_no_base_url_resolution() {
let base = None;
let input = create_input(
r#"<a href="https://example.com/page">Absolute Link</a>"#,
FileType::Html,
);
let source = InputSource::String(String::new());
let uris = vec![RawUri::from("https://example.com/page")];
let requests = create(uris, &input, &base, &None);
let requests = create(uris, &source, None, None, None);
assert_eq!(requests.len(), 1);
assert!(requests
@ -308,11 +451,17 @@ mod tests {
#[test]
fn test_create_request_from_relative_file_path() {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let input_source = InputSource::FsPath(PathBuf::from("page.html"));
let actual =
create_request(&RawUri::from("file.html"), &input_source, &base, &None).unwrap();
let actual = create_request(
&RawUri::from("file.html"),
&input_source,
None,
Some(&base),
None,
)
.unwrap();
assert_eq!(
actual,
@ -330,15 +479,16 @@ mod tests {
#[test]
fn test_create_request_from_absolute_file_path() {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let input_source = InputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
// Use an absolute path that's outside the base directory
let actual = create_request(
&RawUri::from("/usr/local/share/doc/example.html"),
&input_source,
&base,
&None,
None,
Some(&base),
None,
)
.unwrap();
@ -358,29 +508,53 @@ mod tests {
#[test]
fn test_parse_relative_path_into_uri() {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
let input = create_input(
r#"<a href="relative.html">Relative Link</a>"#,
FileType::Html,
);
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let source = InputSource::String(String::new());
let raw_uri = RawUri::from("relative.html");
let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap();
let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
}
#[test]
fn test_parse_absolute_path_into_uri() {
let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
let input = create_input(
r#"<a href="/absolute.html">Absolute Link</a>"#,
FileType::Html,
);
let base = Base::Local(PathBuf::from("/tmp/lychee"));
let source = InputSource::String(String::new());
let raw_uri = RawUri::from("absolute.html");
let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap();
let uri = try_parse_into_uri(&raw_uri, &source, None, Some(&base)).unwrap();
assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
}
#[test]
fn test_prepend_with_absolute_local_link_and_root_dir() {
let text = "/absolute/path";
let root_dir = PathBuf::from("/root");
let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
assert_eq!(result, "/root/absolute/path");
}
#[test]
fn test_prepend_with_absolute_local_link_and_no_root_dir() {
let text = "/absolute/path";
let result = prepend_root_dir_if_absolute_local_link(text, None);
assert_eq!(result, "/absolute/path");
}
#[test]
fn test_prepend_with_relative_link_and_root_dir() {
let text = "relative/path";
let root_dir = PathBuf::from("/root");
let result = prepend_root_dir_if_absolute_local_link(text, Some(&root_dir));
assert_eq!(result, "relative/path");
}
#[test]
fn test_prepend_with_relative_link_and_no_root_dir() {
let text = "relative/path";
let result = prepend_root_dir_if_absolute_local_link(text, None);
assert_eq!(result, "relative/path");
}
}