mirror of
https://github.com/Hopiu/lychee.git
synced 2026-05-01 10:24:47 +00:00
feat: add 'user-content-' prefix to support github markdown fragment (#1750)
This commit is contained in:
parent
81f2605118
commit
02f6f5cb49
3 changed files with 64 additions and 10 deletions
2
fixtures/fragments/file.html
vendored
2
fixtures/fragments/file.html
vendored
|
|
@ -24,7 +24,7 @@
|
|||
<a href="#in-the-end">doesn't exist</a><br>
|
||||
<a href="#">To the top</a><br>
|
||||
<a href="#top">To the top alt</a><br>
|
||||
<a href="https://github.com/lycheeverse/lychee#user-content-table-of-contents">To the lychee readme license fragment.</a>
|
||||
<a href="https://github.com/lycheeverse/lychee#table-of-contents">To the lychee readme license fragment.</a>
|
||||
</section>
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
|||
|
|
@ -1890,7 +1890,7 @@ mod cli {
|
|||
.stderr(contains("fixtures/fragments/file.html#top"))
|
||||
.stderr(contains("fixtures/fragments/file2.md#top"))
|
||||
.stderr(contains(
|
||||
"https://github.com/lycheeverse/lychee#user-content-table-of-contents",
|
||||
"https://github.com/lycheeverse/lychee#table-of-contents",
|
||||
))
|
||||
.stderr(contains(
|
||||
"https://github.com/lycheeverse/lychee#non-existent-anchor",
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use std::{
|
||||
borrow::Cow,
|
||||
collections::{HashMap, HashSet, hash_map::Entry},
|
||||
path::Path,
|
||||
sync::Arc,
|
||||
|
|
@ -29,6 +30,61 @@ impl FragmentInput {
|
|||
}
|
||||
}
|
||||
|
||||
/// A fragment builder that expands the given fragments into a list of candidates.
|
||||
struct FragmentBuilder {
|
||||
variants: Vec<String>,
|
||||
decoded: Vec<String>,
|
||||
}
|
||||
|
||||
impl FragmentBuilder {
|
||||
fn new(fragment: &str, url: &Url, file_type: FileType) -> Result<Self> {
|
||||
let mut variants = vec![fragment.into()];
|
||||
// For GitHub links, add "user-content-" prefix to the fragments.
|
||||
// The following cases cannot be handled unless we simulate with a headless browser:
|
||||
// - markdown files from any specific path (includes "blob/master/README.md")
|
||||
// - "issuecomment" fragments from the GitHub issue pages
|
||||
if url
|
||||
.host_str()
|
||||
.is_some_and(|host| host.ends_with("github.com"))
|
||||
{
|
||||
variants.push(format!("user-content-{fragment}"));
|
||||
}
|
||||
|
||||
// Only store the percent-decoded variants if it's different from the original
|
||||
// fragment. This avoids storing and comparing the same fragment twice.
|
||||
let mut decoded = Vec::new();
|
||||
for frag in &variants {
|
||||
let mut require_alloc = false;
|
||||
let mut fragment_decoded: Cow<'_, str> = match percent_decode_str(frag).decode_utf8()? {
|
||||
Cow::Borrowed(s) => s.into(),
|
||||
Cow::Owned(s) => {
|
||||
require_alloc = true;
|
||||
s.into()
|
||||
}
|
||||
};
|
||||
if file_type == FileType::Markdown {
|
||||
let lowercase = fragment_decoded.to_lowercase();
|
||||
if lowercase != fragment_decoded {
|
||||
fragment_decoded = lowercase.into();
|
||||
require_alloc = true;
|
||||
}
|
||||
}
|
||||
if require_alloc {
|
||||
decoded.push(fragment_decoded.into());
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Self { variants, decoded })
|
||||
}
|
||||
|
||||
fn any_matches(&self, fragments: &HashSet<String>) -> bool {
|
||||
self.variants
|
||||
.iter()
|
||||
.chain(self.decoded.iter())
|
||||
.any(|frag| fragments.contains(frag))
|
||||
}
|
||||
}
|
||||
|
||||
/// Holds a cache of fragments for a given URL.
|
||||
///
|
||||
/// Fragments, also known as anchors, are used to link to a specific
|
||||
|
|
@ -67,7 +123,7 @@ impl FragmentChecker {
|
|||
if fragment.is_empty() || fragment.eq_ignore_ascii_case("top") {
|
||||
return Ok(true);
|
||||
}
|
||||
let mut fragment_decoded = percent_decode_str(fragment).decode_utf8()?;
|
||||
|
||||
let url_without_frag = Self::remove_fragment(url.clone());
|
||||
|
||||
let FragmentInput { content, file_type } = input;
|
||||
|
|
@ -76,20 +132,18 @@ impl FragmentChecker {
|
|||
FileType::Html => extract_html_fragments,
|
||||
FileType::Plaintext => return Ok(true),
|
||||
};
|
||||
if file_type == FileType::Markdown {
|
||||
fragment_decoded = fragment_decoded.to_lowercase().into();
|
||||
}
|
||||
|
||||
let fragment_candidates = FragmentBuilder::new(fragment, url, file_type)?;
|
||||
match self.cache.lock().await.entry(url_without_frag) {
|
||||
Entry::Vacant(entry) => {
|
||||
let file_frags = extractor(&content);
|
||||
let contains_fragment =
|
||||
file_frags.contains(fragment) || file_frags.contains(&fragment_decoded as &str);
|
||||
let contains_fragment = fragment_candidates.any_matches(&file_frags);
|
||||
entry.insert(file_frags);
|
||||
Ok(contains_fragment)
|
||||
}
|
||||
Entry::Occupied(entry) => {
|
||||
Ok(entry.get().contains(fragment)
|
||||
|| entry.get().contains(&fragment_decoded as &str))
|
||||
let file_frags = entry.get();
|
||||
Ok(fragment_candidates.any_matches(file_frags))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue