mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-17 11:50:59 +00:00
Avoid double-encoding already encoded destination paths
E.g. `web%20site` becomes `web site`. That's because Url::from_file_path will encode the full URL in the end. This behavior cannot be configured. See https://github.com/lycheeverse/lychee/pull/262#issuecomment-915245411
This commit is contained in:
parent
a75cae54b1
commit
93948d7367
6 changed files with 32 additions and 13 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -1395,6 +1395,7 @@ dependencies = [
|
|||
"markup5ever_rcdom",
|
||||
"openssl-sys",
|
||||
"path-clean",
|
||||
"percent-encoding",
|
||||
"pretty_assertions",
|
||||
"pulldown-cmark",
|
||||
"regex",
|
||||
|
|
|
|||
|
|
@ -160,8 +160,8 @@ mod cli {
|
|||
.env_clear()
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("Total............4"))
|
||||
.stdout(contains("Successful.......4"));
|
||||
.stdout(contains("Total............3"))
|
||||
.stdout(contains("Successful.......3"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -42,6 +42,7 @@ typed-builder = "0.9.1"
|
|||
url = { version = "2.2.2", features = ["serde"] }
|
||||
log = "0.4.14"
|
||||
path-clean = "0.1.0"
|
||||
percent-encoding = "2.1.0"
|
||||
|
||||
[dev-dependencies]
|
||||
doc-comment = "0.3.3"
|
||||
|
|
|
|||
|
|
@ -262,7 +262,7 @@ impl Client {
|
|||
return Status::Ok(StatusCode::OK);
|
||||
}
|
||||
}
|
||||
ErrorKind::InvalidFileUri(uri.clone()).into()
|
||||
ErrorKind::InvalidFilePath(uri.clone()).into()
|
||||
}
|
||||
|
||||
pub async fn check_mail(&self, uri: &Uri) -> Status {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ use html5ever::{
|
|||
};
|
||||
use log::info;
|
||||
use markup5ever_rcdom::{Handle, NodeData, RcDom};
|
||||
use percent_encoding::percent_decode_str;
|
||||
use pulldown_cmark::{Event as MDEvent, Parser, Tag};
|
||||
use reqwest::Url;
|
||||
|
||||
|
|
@ -122,10 +123,16 @@ fn extract_links_from_plaintext(input: &str) -> Vec<String> {
|
|||
.collect()
|
||||
}
|
||||
|
||||
fn create_uri_from_path(root: &Path, base: &Option<Base>, link: &str) -> Result<Url> {
|
||||
let link = url::remove_get_params_and_fragment(link);
|
||||
let path = path::resolve(root, &PathBuf::from(&link), base)?;
|
||||
Url::from_file_path(&path).map_err(|_e| ErrorKind::InvalidPath(path))
|
||||
fn create_uri_from_path(src: &Path, base: &Option<Base>, dst: &str) -> Result<Url> {
|
||||
let dst = url::remove_get_params_and_fragment(dst);
|
||||
// Avoid double-encoding already encoded destination paths by removing any
|
||||
// potential encoding (e.g. `web%20site` becomes `web site`).
|
||||
// That's because Url::from_file_path will encode the full URL in the end.
|
||||
// This behavior cannot be configured.
|
||||
// See https://github.com/lycheeverse/lychee/pull/262#issuecomment-915245411
|
||||
let decoded = percent_decode_str(dst).decode_utf8()?.to_string();
|
||||
let path = path::resolve(src, &PathBuf::from(decoded), base)?;
|
||||
Url::from_file_path(&path).map_err(|_e| ErrorKind::InvalidUrl(path))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ pub enum ErrorKind {
|
|||
// reading files
|
||||
/// Any form of I/O error occurred while reading from a given path.
|
||||
IoError(Option<PathBuf>, std::io::Error),
|
||||
/// Errors which can occur when attempting to interpret a sequence of u8 as a string
|
||||
Utf8Error(std::str::Utf8Error),
|
||||
/// Network error when trying to connect to an endpoint via reqwest.
|
||||
ReqwestError(reqwest::Error),
|
||||
/// Network error when trying to connect to an endpoint via hubcaps.
|
||||
|
|
@ -21,9 +23,9 @@ pub enum ErrorKind {
|
|||
/// The given string can not be parsed into a valid URL, e-mail address, or file path
|
||||
UrlParseError(String, (url::ParseError, Option<fast_chemail::ParseError>)),
|
||||
/// The given URI cannot be converted to a file path
|
||||
InvalidFileUri(Uri),
|
||||
InvalidFilePath(Uri),
|
||||
/// The given path cannot be converted to a URI
|
||||
InvalidPath(PathBuf),
|
||||
InvalidUrl(PathBuf),
|
||||
/// The given mail address is unreachable
|
||||
UnreachableEmailAddress(Uri),
|
||||
/// The given header could not be parsed.
|
||||
|
|
@ -74,8 +76,9 @@ impl Hash for ErrorKind {
|
|||
Self::HubcapsError(e) => e.to_string().hash(state),
|
||||
Self::FileNotFound(e) => e.to_string_lossy().hash(state),
|
||||
Self::UrlParseError(s, e) => (s, e.type_id()).hash(state),
|
||||
Self::InvalidPath(p) => p.hash(state),
|
||||
Self::InvalidFileUri(u) | Self::UnreachableEmailAddress(u) | Self::InsecureURL(u) => {
|
||||
Self::InvalidUrl(p) => p.hash(state),
|
||||
Self::Utf8Error(e) => e.to_string().hash(state),
|
||||
Self::InvalidFilePath(u) | Self::UnreachableEmailAddress(u) | Self::InsecureURL(u) => {
|
||||
u.hash(state);
|
||||
}
|
||||
Self::InvalidBase(base, e) => (base, e).hash(state),
|
||||
|
|
@ -109,8 +112,8 @@ impl Display for ErrorKind {
|
|||
Self::UrlParseError(s, (url_err, None)) => {
|
||||
write!(f, "Cannot parse {} as website url ({})", s, url_err)
|
||||
}
|
||||
Self::InvalidFileUri(u) => write!(f, "Invalid file URI: {}", u),
|
||||
Self::InvalidPath(p) => write!(f, "Invalid path: {}", p.display()),
|
||||
Self::InvalidFilePath(u) => write!(f, "Invalid file URI: {}", u),
|
||||
Self::InvalidUrl(p) => write!(f, "Invalid path: {}", p.display()),
|
||||
Self::UnreachableEmailAddress(uri) => write!(f, "Unreachable mail address: {}", uri),
|
||||
Self::InvalidHeader(e) => e.fmt(f),
|
||||
Self::InvalidGlobPattern(e) => e.fmt(f),
|
||||
|
|
@ -124,6 +127,7 @@ impl Display for ErrorKind {
|
|||
uri
|
||||
),
|
||||
Self::InvalidBase(base, e) => write!(f, "Error with base dir `{}` : {}", base, e),
|
||||
Self::Utf8Error(e) => e.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -143,6 +147,12 @@ impl From<(PathBuf, std::io::Error)> for ErrorKind {
|
|||
}
|
||||
}
|
||||
|
||||
impl From<std::str::Utf8Error> for ErrorKind {
|
||||
fn from(e: std::str::Utf8Error) -> Self {
|
||||
Self::Utf8Error(e)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<std::io::Error> for ErrorKind {
|
||||
fn from(e: std::io::Error) -> Self {
|
||||
Self::IoError(None, e)
|
||||
|
|
|
|||
Loading…
Reference in a new issue