Fix resolving absolute paths

The previous solution didn't resolve to absolute paths
and rather removed things like `.` and `..`.
This commit is contained in:
Matthias 2021-09-03 01:42:57 +02:00
parent dd3205a87c
commit b7c129c431
5 changed files with 29 additions and 37 deletions

7
Cargo.lock generated
View file

@ -1394,6 +1394,7 @@ dependencies = [
"log",
"markup5ever_rcdom",
"openssl-sys",
"path-clean",
"pretty_assertions",
"pulldown-cmark",
"regex",
@ -1718,6 +1719,12 @@ dependencies = [
"winapi",
]
[[package]]
name = "path-clean"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ecba01bf2678719532c5e3059e0b5f0811273d94b397088b82e3bd0a78c78fdd"
[[package]]
name = "pem"
version = "0.8.3"

View file

@ -148,11 +148,15 @@ lychee ~/projects/*/README.md
# check links in local files (lychee supports advanced globbing and ~ expansion):
lychee "~/projects/big_project/**/README.*"
# ignore case when globbing and check result for each link:
lychee --glob-ignore-case --verbose "~/projects/**/[r]eadme.*"
# check links from epub file (requires atool: https://www.nongnu.org/atool)
acat -F zip {file.epub} "*.xhtml" "*.html" | lychee -
# check links in directory; block network requests
lychee --offline path/to/directory
```
### GitHub token

View file

@ -41,6 +41,7 @@ tokio = { version = "1.6.0", features = ["full"] }
typed-builder = "0.9.1"
url = { version = "2.2.2", features = ["serde"] }
log = "0.4.14"
path-clean = "0.1.0"
[dev-dependencies]
doc-comment = "0.3.3"

View file

@ -39,7 +39,7 @@ pub(crate) fn extract_links(
// Silently ignore anchor links for now
continue;
}
let uri = create_uri(root, base, &link)?;
let uri = create_uri_from_path(root, base, &link)?;
Request::new(Uri { inner: uri }, input_content.input.clone())
} else {
info!("Handling of {} not implemented yet", &link);
@ -122,7 +122,7 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
.collect()
}
fn create_uri(root: &PathBuf, base: &Option<Base>, link: &str) -> Result<Url> {
fn create_uri_from_path(root: &PathBuf, base: &Option<Base>, link: &str) -> Result<Url> {
let link = url::remove_get_params(&link);
let path = path::resolve(root, &PathBuf::from(&link), base)?;
Ok(Url::from_file_path(&path).map_err(|_e| ErrorKind::InvalidPath(path))?)

View file

@ -1,45 +1,25 @@
use crate::{Base, ErrorKind, Result};
use std::path::{Component, Path, PathBuf};
use path_clean::PathClean;
use std::env;
use std::path::{Path, PathBuf};
// Returns the base if it is a valid `PathBuf`
fn get_base_dir(base: &Option<Base>) -> Option<PathBuf> {
base.as_ref().and_then(Base::dir)
}
/// Normalize a path, removing things like `.` and `..`.
///
/// CAUTION: This does not resolve symlinks (unlike
/// [`std::fs::canonicalize`]). This may cause incorrect or surprising
/// behavior at times. This should be used carefully. Unfortunately,
/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often
/// fail, or on Windows returns annoying device paths. This is a problem Cargo
/// needs to improve on.
///
/// Taken from [`cargo`](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61)
pub(crate) fn normalize(path: &Path) -> PathBuf {
let mut components = path.components().peekable();
// https://stackoverflow.com/a/54817755/270334
pub(crate) fn absolute_path(path: impl AsRef<Path>) -> Result<PathBuf> {
let path = path.as_ref();
let mut ret = components.peek().copied().map_or_else(PathBuf::new, |c| {
components.next();
PathBuf::from(c.as_os_str())
});
for component in components {
match component {
Component::Prefix(..) => unreachable!(),
Component::RootDir => {
ret.push(component.as_os_str());
}
Component::CurDir => {}
Component::ParentDir => {
ret.pop();
}
Component::Normal(c) => {
ret.push(c);
}
}
let absolute_path = if path.is_absolute() {
path.to_path_buf()
} else {
env::current_dir()?.join(path)
}
ret
.clean();
Ok(absolute_path)
}
// Get the parent directory of a given `Path`.
@ -59,7 +39,7 @@ pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option<Base>) -> Result<Pat
// Find `dst` in the parent directory of `src`
if let Some(parent) = src.parent() {
let rel_path = parent.join(dst.to_path_buf());
return Ok(normalize(&rel_path));
return Ok(absolute_path(&rel_path)?);
}
}
if dst.is_absolute() {
@ -73,7 +53,7 @@ pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option<Base>) -> Result<Pat
)
})?;
let abs_path = join(dirname(&base), dst);
return Ok(normalize(&abs_path));
return Ok(absolute_path(&abs_path)?);
}
Err(ErrorKind::FileNotFound(dst.to_path_buf()))
}