mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Add a way to handle "pretty URLs", i.e. URIs without .html extension (#1422)
In many circumstances (GitHub Pages, Apache configured with MultiViews, etc), web servers process URIs by appending the `.html` file extension when no file is found at the path specified by the URI but a `.html` file corresponding to that path _is_ found. To allow Lychee to use the fast, offline method of checking such files locally via the `file://` scheme, let's handle this scenario gracefully by adding the `--fallback-extensions=html` option. Note: This new option can take a list of file extensions to use; The first one for which a corresponding file is found is then used. Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
This commit is contained in:
parent
255164ce25
commit
8c6eee9b5f
7 changed files with 83 additions and 5 deletions
|
|
@ -389,6 +389,13 @@ Options:
|
|||
--remap <REMAP>
|
||||
Remap URI matching pattern to different URI
|
||||
|
||||
--fallback-extensions <FALLBACK_EXTENSIONS>
|
||||
Test the specified file extensions for URIs when checking files locally.
|
||||
Multiple extensions can be separated by commas. Extensions will be checked in
|
||||
order of appearance.
|
||||
|
||||
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi
|
||||
|
||||
--header <HEADER>
|
||||
Custom request header
|
||||
|
||||
|
|
|
|||
10
fixtures/fallback-extensions/index.html
vendored
Normal file
10
fixtures/fallback-extensions/index.html
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>For Testing pretty URLs</title>
|
||||
</head>
|
||||
<body>
|
||||
<a href="other">other</a>
|
||||
</body>
|
||||
</html>
|
||||
10
fixtures/fallback-extensions/other.htm
vendored
Normal file
10
fixtures/fallback-extensions/other.htm
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>For Testing pretty URLs</title>
|
||||
</head>
|
||||
<body>
|
||||
<a href="index">index</a>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
|
|||
.require_https(cfg.require_https)
|
||||
.cookie_jar(cookie_jar.cloned())
|
||||
.include_fragments(cfg.include_fragments)
|
||||
.fallback_extensions(cfg.fallback_extensions.clone())
|
||||
.build()
|
||||
.client()
|
||||
.context("Failed to create request client")
|
||||
|
|
|
|||
|
|
@ -300,6 +300,19 @@ pub(crate) struct Config {
|
|||
#[arg(long)]
|
||||
pub(crate) remap: Vec<String>,
|
||||
|
||||
/// Automatically append file extensions to `file://` URIs as needed
|
||||
#[serde(default)]
|
||||
#[arg(
|
||||
long,
|
||||
value_delimiter = ',',
|
||||
long_help = "Test the specified file extensions for URIs when checking files locally.
|
||||
Multiple extensions can be separated by commas. Extensions will be checked in
|
||||
order of appearance.
|
||||
|
||||
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi"
|
||||
)]
|
||||
pub(crate) fallback_extensions: Vec<String>,
|
||||
|
||||
/// Custom request header
|
||||
#[arg(long)]
|
||||
#[serde(default)]
|
||||
|
|
@ -439,6 +452,7 @@ impl Config {
|
|||
exclude_loopback: false;
|
||||
exclude_mail: false;
|
||||
remap: Vec::<String>::new();
|
||||
fallback_extensions: Vec::<String>::new();
|
||||
header: Vec::<String>::new();
|
||||
timeout: DEFAULT_TIMEOUT_SECS;
|
||||
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
|
||||
|
|
|
|||
|
|
@ -1556,4 +1556,17 @@ mod cli {
|
|||
// 3 failures because of missing fragments
|
||||
.stdout(contains("3 Errors"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fallback_extensions() {
|
||||
let mut cmd = main_command();
|
||||
let input = fixtures_path().join("fallback-extensions");
|
||||
|
||||
cmd.arg("--verbose")
|
||||
.arg("--fallback-extensions=htm,html")
|
||||
.arg(input)
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("0 Errors"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,6 +95,9 @@ pub struct ClientBuilder {
|
|||
/// make sure rules don't conflict with each other.
|
||||
remaps: Option<Remaps>,
|
||||
|
||||
/// Automatically append file extensions to `file://` URIs as needed
|
||||
fallback_extensions: Vec<String>,
|
||||
|
||||
/// Links matching this set of regular expressions are **always** checked.
|
||||
///
|
||||
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
|
||||
|
|
@ -384,6 +387,7 @@ impl ClientBuilder {
|
|||
reqwest_client,
|
||||
github_client,
|
||||
remaps: self.remaps,
|
||||
fallback_extensions: self.fallback_extensions,
|
||||
filter,
|
||||
max_retries: self.max_retries,
|
||||
retry_wait_time: self.retry_wait_time,
|
||||
|
|
@ -412,6 +416,9 @@ pub struct Client {
|
|||
/// Optional remapping rules for URIs matching pattern.
|
||||
remaps: Option<Remaps>,
|
||||
|
||||
/// Automatically append file extensions to `file://` URIs as needed
|
||||
fallback_extensions: Vec<String>,
|
||||
|
||||
/// Rules to decided whether each link should be checked or ignored.
|
||||
filter: Filter,
|
||||
|
||||
|
|
@ -655,14 +662,30 @@ impl Client {
|
|||
let Ok(path) = uri.url.to_file_path() else {
|
||||
return ErrorKind::InvalidFilePath(uri.clone()).into();
|
||||
};
|
||||
if !path.exists() {
|
||||
|
||||
if path.exists() {
|
||||
if self.include_fragments {
|
||||
return self.check_fragment(&path, uri).await;
|
||||
}
|
||||
return Status::Ok(StatusCode::OK);
|
||||
}
|
||||
|
||||
if path.extension().is_some() {
|
||||
return ErrorKind::InvalidFilePath(uri.clone()).into();
|
||||
}
|
||||
if self.include_fragments {
|
||||
self.check_fragment(&path, uri).await
|
||||
} else {
|
||||
Status::Ok(StatusCode::OK)
|
||||
|
||||
// if the path has no file extension, try to append some
|
||||
let mut path_buf = path.clone();
|
||||
for ext in &self.fallback_extensions {
|
||||
path_buf.set_extension(ext);
|
||||
if path_buf.exists() {
|
||||
if self.include_fragments {
|
||||
return self.check_fragment(&path_buf, uri).await;
|
||||
}
|
||||
return Status::Ok(StatusCode::OK);
|
||||
}
|
||||
}
|
||||
ErrorKind::InvalidFilePath(uri.clone()).into()
|
||||
}
|
||||
|
||||
/// Checks a `file` URI's fragment.
|
||||
|
|
|
|||
Loading…
Reference in a new issue