From c9b707ea740d6c184c7012ba347ab6a16515af86 Mon Sep 17 00:00:00 2001 From: Hugo McNally <45573837+HU90m@users.noreply.github.com> Date: Fri, 5 Jan 2024 14:46:09 +0000 Subject: [PATCH] Decode percent escapes in fragments (#1275) * Added test to check a fragment with a utf8 character --- fixtures/fragments/file1.md | 2 ++ fixtures/fragments/file2.md | 3 +++ lychee-bin/tests/cli.rs | 7 +++++-- lychee-lib/src/utils/fragment_checker.rs | 6 ++++-- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/fixtures/fragments/file1.md b/fixtures/fragments/file1.md index fb3642a..55f2498 100644 --- a/fixtures/fragments/file1.md +++ b/fixtures/fragments/file1.md @@ -40,3 +40,5 @@ Therefore we put the test into a code block for now to prevent false positives. # Kebab Case Fragment [Link to another file type](empty_file#fragment) + +##### Lets wear a hat: ĂȘtre diff --git a/fixtures/fragments/file2.md b/fixtures/fragments/file2.md index 76ac82e..643aeee 100644 --- a/fixtures/fragments/file2.md +++ b/fixtures/fragments/file2.md @@ -5,3 +5,6 @@ This is a test file for the fragment loader. ### Some other heading with custom id {#custom-id} #### Fragment 1 + +[hats](file1.md#lets-wear-a-hat-ĂȘtre) + diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index 171a4b4..cbd5c96 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1440,6 +1440,9 @@ mod cli { .stderr(contains("fixtures/fragments/file1.md#missing-fragment")) .stderr(contains("fixtures/fragments/file2.md#fragment-1")) .stderr(contains("fixtures/fragments/file1.md#kebab-case-fragment")) + .stderr(contains( + "fixtures/fragments/file1.md#lets-wear-a-hat-%C3%AAtre", + )) .stderr(contains("fixtures/fragments/file2.md#missing-fragment")) .stderr(contains("fixtures/fragments/empty_file#fragment")) .stderr(contains("fixtures/fragments/file.html#a-word")) @@ -1448,8 +1451,8 @@ mod cli { .stderr(contains( "fixtures/fragments/file1.md#kebab-case-fragment-1", )) - .stdout(contains("13 Total")) - .stdout(contains("10 OK")) + .stdout(contains("14 Total")) + .stdout(contains("11 OK")) // 3 failures because of missing fragments .stdout(contains("3 Errors")); } diff --git a/lychee-lib/src/utils/fragment_checker.rs b/lychee-lib/src/utils/fragment_checker.rs index 6b49cf7..57016e1 100644 --- a/lychee-lib/src/utils/fragment_checker.rs +++ b/lychee-lib/src/utils/fragment_checker.rs @@ -9,6 +9,7 @@ use crate::{ types::FileType, Result, }; +use percent_encoding::percent_decode_str; use tokio::{fs, sync::Mutex}; use url::Url; @@ -46,6 +47,7 @@ impl FragmentChecker { let Some(fragment) = url.fragment() else { return Ok(true); }; + let fragment = percent_decode_str(fragment).decode_utf8()?; let url_without_frag = Self::remove_fragment(url.clone()); let extractor = match FileType::from(path) { @@ -57,9 +59,9 @@ impl FragmentChecker { Entry::Vacant(entry) => { let content = fs::read_to_string(path).await?; let file_frags = extractor(&content); - Ok(entry.insert(file_frags).contains(fragment)) + Ok(entry.insert(file_frags).contains(&fragment as &str)) } - Entry::Occupied(entry) => Ok(entry.get().contains(fragment)), + Entry::Occupied(entry) => Ok(entry.get().contains(&fragment as &str)), } }