mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-30 09:54:46 +00:00
Harden URL detection and extend verbatim elements (#899)
Previously remote URLs were incorrectly detected because the string representation of a path is different than the path itself, causing the `http` prefix match to be insufficient. This resulted in unexpected side-effects, such as the incorrect detection of verbatim mode for remote URLs. The check now got improved and unit tests were added to avoid future breakage. On top of that, missing verbatim elements were added
This commit is contained in:
parent
f19ba8b5c4
commit
5654b7c317
5 changed files with 135 additions and 19 deletions
28
fixtures/TEST_VERBATIM.html
vendored
Normal file
28
fixtures/TEST_VERBATIM.html
vendored
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
<!-- Test URLs in verbatim HTML elements -->
|
||||
|
||||
<html>
|
||||
<head>
|
||||
<title>Verbatim HTML</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Verbatim HTML</h1>
|
||||
<p>Some verbatim HTML elements:</p>
|
||||
|
||||
<pre>http://www.example.com/pre</pre>
|
||||
|
||||
<code>http://www.example.com/code</code>
|
||||
|
||||
<samp> http://www.example.com/samp </samp>
|
||||
|
||||
<kbd>http://www.example.com/kbd</kbd>
|
||||
|
||||
<var>http://www.example.com/var</var>
|
||||
|
||||
<address>http://www.example.com/address</address>
|
||||
|
||||
<script>
|
||||
// http://www.example.com/script
|
||||
"http://www.example.com/script";
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
|
@ -19,6 +19,7 @@ mod cli {
|
|||
// constant.
|
||||
const LYCHEE_CACHE_FILE: &str = ".lycheecache";
|
||||
|
||||
/// Helper macro to create a mock server which returns a custom status code.
|
||||
macro_rules! mock_server {
|
||||
($status:expr $(, $func:tt ($($arg:expr),*))*) => {{
|
||||
let mock_server = wiremock::MockServer::start().await;
|
||||
|
|
@ -29,11 +30,25 @@ mod cli {
|
|||
}};
|
||||
}
|
||||
|
||||
/// Helper macro to create a mock server which returns a 200 OK and a custom response body.
|
||||
macro_rules! mock_response {
|
||||
($body:expr) => {{
|
||||
let mock_server = wiremock::MockServer::start().await;
|
||||
let template = wiremock::ResponseTemplate::new(200).set_body_string($body);
|
||||
wiremock::Mock::given(wiremock::matchers::method("GET"))
|
||||
.respond_with(template)
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
mock_server
|
||||
}};
|
||||
}
|
||||
|
||||
/// Gets the "main" binary name (e.g. `lychee`)
|
||||
fn main_command() -> Command {
|
||||
// this gets the "main" binary name (e.g. `lychee`)
|
||||
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name")
|
||||
}
|
||||
|
||||
/// Helper function to get the path to the fixtures directory.
|
||||
fn fixtures_path() -> PathBuf {
|
||||
Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
|
|
@ -82,6 +97,7 @@ mod cli {
|
|||
}
|
||||
}
|
||||
|
||||
/// Helper macro to test the output of the JSON format.
|
||||
macro_rules! test_json_output {
|
||||
($test_file:expr, $expected:expr $(, $arg:expr)*) => {{
|
||||
let mut cmd = main_command();
|
||||
|
|
@ -846,6 +862,20 @@ mod cli {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_verbatim_skipped_by_default() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
let input = fixtures_path().join("TEST_CODE_BLOCKS.md");
|
||||
|
||||
cmd.arg(input)
|
||||
.arg("--dump")
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(is_empty());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_include_verbatim() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
|
|
@ -863,13 +893,15 @@ mod cli {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_verbatim() -> Result<()> {
|
||||
#[tokio::test]
|
||||
async fn test_verbatim_skipped_by_default_via_remote_url() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
let input = fixtures_path().join("TEST_CODE_BLOCKS.md");
|
||||
let file = fixtures_path().join("TEST_VERBATIM.html");
|
||||
let body = fs::read_to_string(file)?;
|
||||
let mock_server = mock_response!(body);
|
||||
|
||||
cmd.arg(input)
|
||||
.arg("--dump")
|
||||
cmd.arg("--dump")
|
||||
.arg(mock_server.uri())
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(is_empty());
|
||||
|
|
@ -877,6 +909,28 @@ mod cli {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_include_verbatim_via_remote_url() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
let file = fixtures_path().join("TEST_VERBATIM.html");
|
||||
let body = fs::read_to_string(file)?;
|
||||
let mock_server = mock_response!(body);
|
||||
|
||||
cmd.arg("--include-verbatim")
|
||||
.arg("--dump")
|
||||
.arg(mock_server.uri())
|
||||
.assert()
|
||||
.success()
|
||||
.stdout(contains("http://www.example.com/pre"))
|
||||
.stdout(contains("http://www.example.com/code"))
|
||||
.stdout(contains("http://www.example.com/samp"))
|
||||
.stdout(contains("http://www.example.com/kbd"))
|
||||
.stdout(contains("http://www.example.com/var"))
|
||||
.stdout(contains("http://www.example.com/address"))
|
||||
.stdout(contains("http://www.example.com/script"));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_require_https() -> Result<()> {
|
||||
let mut cmd = main_command();
|
||||
|
|
|
|||
|
|
@ -17,7 +17,18 @@ use plaintext::extract_plaintext;
|
|||
pub(crate) fn is_verbatim_elem(name: &str) -> bool {
|
||||
matches!(
|
||||
name,
|
||||
"code" | "listing" | "plaintext" | "samp" | "script" | "textarea" | "xmp" | "pre"
|
||||
"address"
|
||||
| "code"
|
||||
| "kbd"
|
||||
| "listing"
|
||||
| "noscript"
|
||||
| "plaintext"
|
||||
| "pre"
|
||||
| "samp"
|
||||
| "script"
|
||||
| "textarea"
|
||||
| "var"
|
||||
| "xmp"
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +119,7 @@ mod tests {
|
|||
assert!(is_verbatim_elem("pre"));
|
||||
assert!(is_verbatim_elem("code"));
|
||||
assert!(is_verbatim_elem("listing"));
|
||||
assert!(is_verbatim_elem("script"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ pub(crate) fn website(url: &str) -> Uri {
|
|||
Uri::from(Url::parse(url).expect("Expected valid Website URI"))
|
||||
}
|
||||
|
||||
/// Creates a mail URI from a string
|
||||
pub(crate) fn mail(address: &str) -> Uri {
|
||||
if address.starts_with("mailto:") {
|
||||
Url::parse(address)
|
||||
|
|
@ -49,6 +50,7 @@ pub(crate) fn mail(address: &str) -> Uri {
|
|||
.into()
|
||||
}
|
||||
|
||||
/// Loads a fixture from the `fixtures` directory
|
||||
pub(crate) fn load_fixture(filename: &str) -> String {
|
||||
let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
|
||||
.parent()
|
||||
|
|
|
|||
|
|
@ -19,18 +19,22 @@ impl Default for FileType {
|
|||
|
||||
impl<P: AsRef<Path>> From<P> for FileType {
|
||||
/// Detect if the given path points to a Markdown, HTML, or plaintext file.
|
||||
//
|
||||
// Assume HTML in case of no extension.
|
||||
//
|
||||
// This is only reasonable for URLs, not paths on disk. For example,
|
||||
// a file named `README` without an extension is more likely to be a
|
||||
// plaintext file.
|
||||
//
|
||||
// A better solution would be to also implement `From<Url> for
|
||||
// FileType`. Unfortunately that's not possible without refactoring, as
|
||||
// `AsRef<Path>` could be implemented for `Url` in the future, which is
|
||||
// why `From<Url> for FileType` is not allowed (orphan rule).
|
||||
//
|
||||
// As a workaround, we check if the scheme is `http` or `https` and
|
||||
// assume HTML in that case.
|
||||
fn from(p: P) -> FileType {
|
||||
let path = p.as_ref();
|
||||
// Assume HTML in case of no extension.
|
||||
// Note: this is only reasonable for URLs; not paths on disk.
|
||||
// For example, `README` without an extension is more likely to be a plaintext file.
|
||||
// A better solution would be to also implement `From<Url> for FileType`.
|
||||
// Unfortunately that's not possible without refactoring, as
|
||||
// `AsRef<Path>` could be implemented for `Url` in the future, which is why
|
||||
// `From<Url> for FileType` is not allowed.
|
||||
// As a workaround, we check if we got a known web-protocol
|
||||
let is_url = path.starts_with("http");
|
||||
|
||||
match path
|
||||
.extension()
|
||||
.and_then(std::ffi::OsStr::to_str)
|
||||
|
|
@ -42,12 +46,19 @@ impl<P: AsRef<Path>> From<P> for FileType {
|
|||
FileType::Markdown
|
||||
}
|
||||
Some("htm" | "html") => FileType::Html,
|
||||
None if is_url => FileType::Html,
|
||||
_ => FileType::Plaintext,
|
||||
None if is_url(path) => FileType::Html,
|
||||
_ => FileType::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Helper function to check if a path is likely a URL.
|
||||
fn is_url(path: &Path) -> bool {
|
||||
path.to_str().map_or(false, |s| {
|
||||
s.starts_with("http://") || s.starts_with("https://")
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -73,4 +84,13 @@ mod tests {
|
|||
FileType::Html
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_url() {
|
||||
assert!(is_url(Path::new("http://foo.com")));
|
||||
assert!(is_url(Path::new("https://foo.com")));
|
||||
assert!(!is_url(Path::new("foo.com")));
|
||||
assert!(!is_url(Path::new("foo")));
|
||||
assert!(!is_url(Path::new("foo/bar")));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue