mirror of
https://github.com/Hopiu/lychee.git
synced 2026-05-02 02:44:45 +00:00
Fix resolving absolute paths
The previous solution didn't resolve to absolute paths and rather removed things like `.` and `..`.
This commit is contained in:
parent
dd3205a87c
commit
b7c129c431
5 changed files with 29 additions and 37 deletions
7
Cargo.lock
generated
7
Cargo.lock
generated
|
|
@ -1394,6 +1394,7 @@ dependencies = [
|
|||
"log",
|
||||
"markup5ever_rcdom",
|
||||
"openssl-sys",
|
||||
"path-clean",
|
||||
"pretty_assertions",
|
||||
"pulldown-cmark",
|
||||
"regex",
|
||||
|
|
@ -1718,6 +1719,12 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "path-clean"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ecba01bf2678719532c5e3059e0b5f0811273d94b397088b82e3bd0a78c78fdd"
|
||||
|
||||
[[package]]
|
||||
name = "pem"
|
||||
version = "0.8.3"
|
||||
|
|
|
|||
|
|
@ -148,11 +148,15 @@ lychee ~/projects/*/README.md
|
|||
|
||||
# check links in local files (lychee supports advanced globbing and ~ expansion):
|
||||
lychee "~/projects/big_project/**/README.*"
|
||||
|
||||
# ignore case when globbing and check result for each link:
|
||||
lychee --glob-ignore-case --verbose "~/projects/**/[r]eadme.*"
|
||||
|
||||
# check links from epub file (requires atool: https://www.nongnu.org/atool)
|
||||
acat -F zip {file.epub} "*.xhtml" "*.html" | lychee -
|
||||
|
||||
# check links in directory; block network requests
|
||||
lychee --offline path/to/directory
|
||||
```
|
||||
|
||||
### GitHub token
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ tokio = { version = "1.6.0", features = ["full"] }
|
|||
typed-builder = "0.9.1"
|
||||
url = { version = "2.2.2", features = ["serde"] }
|
||||
log = "0.4.14"
|
||||
path-clean = "0.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
doc-comment = "0.3.3"
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ pub(crate) fn extract_links(
|
|||
// Silently ignore anchor links for now
|
||||
continue;
|
||||
}
|
||||
let uri = create_uri(root, base, &link)?;
|
||||
let uri = create_uri_from_path(root, base, &link)?;
|
||||
Request::new(Uri { inner: uri }, input_content.input.clone())
|
||||
} else {
|
||||
info!("Handling of {} not implemented yet", &link);
|
||||
|
|
@ -122,7 +122,7 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn create_uri(root: &PathBuf, base: &Option<Base>, link: &str) -> Result<Url> {
|
||||
fn create_uri_from_path(root: &PathBuf, base: &Option<Base>, link: &str) -> Result<Url> {
|
||||
let link = url::remove_get_params(&link);
|
||||
let path = path::resolve(root, &PathBuf::from(&link), base)?;
|
||||
Ok(Url::from_file_path(&path).map_err(|_e| ErrorKind::InvalidPath(path))?)
|
||||
|
|
|
|||
|
|
@ -1,45 +1,25 @@
|
|||
use crate::{Base, ErrorKind, Result};
|
||||
use std::path::{Component, Path, PathBuf};
|
||||
use path_clean::PathClean;
|
||||
use std::env;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
// Returns the base if it is a valid `PathBuf`
|
||||
fn get_base_dir(base: &Option<Base>) -> Option<PathBuf> {
|
||||
base.as_ref().and_then(Base::dir)
|
||||
}
|
||||
|
||||
/// Normalize a path, removing things like `.` and `..`.
|
||||
///
|
||||
/// CAUTION: This does not resolve symlinks (unlike
|
||||
/// [`std::fs::canonicalize`]). This may cause incorrect or surprising
|
||||
/// behavior at times. This should be used carefully. Unfortunately,
|
||||
/// [`std::fs::canonicalize`] can be hard to use correctly, since it can often
|
||||
/// fail, or on Windows returns annoying device paths. This is a problem Cargo
|
||||
/// needs to improve on.
|
||||
///
|
||||
/// Taken from [`cargo`](https://github.com/rust-lang/cargo/blob/fede83ccf973457de319ba6fa0e36ead454d2e20/src/cargo/util/paths.rs#L61)
|
||||
pub(crate) fn normalize(path: &Path) -> PathBuf {
|
||||
let mut components = path.components().peekable();
|
||||
// https://stackoverflow.com/a/54817755/270334
|
||||
pub(crate) fn absolute_path(path: impl AsRef<Path>) -> Result<PathBuf> {
|
||||
let path = path.as_ref();
|
||||
|
||||
let mut ret = components.peek().copied().map_or_else(PathBuf::new, |c| {
|
||||
components.next();
|
||||
PathBuf::from(c.as_os_str())
|
||||
});
|
||||
|
||||
for component in components {
|
||||
match component {
|
||||
Component::Prefix(..) => unreachable!(),
|
||||
Component::RootDir => {
|
||||
ret.push(component.as_os_str());
|
||||
}
|
||||
Component::CurDir => {}
|
||||
Component::ParentDir => {
|
||||
ret.pop();
|
||||
}
|
||||
Component::Normal(c) => {
|
||||
ret.push(c);
|
||||
}
|
||||
}
|
||||
let absolute_path = if path.is_absolute() {
|
||||
path.to_path_buf()
|
||||
} else {
|
||||
env::current_dir()?.join(path)
|
||||
}
|
||||
ret
|
||||
.clean();
|
||||
|
||||
Ok(absolute_path)
|
||||
}
|
||||
|
||||
// Get the parent directory of a given `Path`.
|
||||
|
|
@ -59,7 +39,7 @@ pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option<Base>) -> Result<Pat
|
|||
// Find `dst` in the parent directory of `src`
|
||||
if let Some(parent) = src.parent() {
|
||||
let rel_path = parent.join(dst.to_path_buf());
|
||||
return Ok(normalize(&rel_path));
|
||||
return Ok(absolute_path(&rel_path)?);
|
||||
}
|
||||
}
|
||||
if dst.is_absolute() {
|
||||
|
|
@ -73,7 +53,7 @@ pub(crate) fn resolve(src: &Path, dst: &Path, base: &Option<Base>) -> Result<Pat
|
|||
)
|
||||
})?;
|
||||
let abs_path = join(dirname(&base), dst);
|
||||
return Ok(normalize(&abs_path));
|
||||
return Ok(absolute_path(&abs_path)?);
|
||||
}
|
||||
Err(ErrorKind::FileNotFound(dst.to_path_buf()))
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue