Support underscores in Markdown URLs (#1555)

This commit is contained in:
Matthias Endler 2024-11-07 14:54:42 +01:00 committed by GitHub
parent 6e3219eb55
commit 6b53695be6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,7 +1,7 @@
//! Extract links and fragments from markdown documents
use std::collections::{HashMap, HashSet};
use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd};
use pulldown_cmark::{CowStr, Event, LinkType, Options, Parser, Tag, TagEnd, TextMergeStream};
use crate::{extract::plaintext::extract_raw_uri_from_plaintext, types::uri::raw::RawUri};
@ -19,7 +19,7 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
// which is why we keep track of entries and exits while traversing the input.
let mut inside_code_block = false;
let parser = Parser::new_ext(input, md_extensions());
let parser = TextMergeStream::new(Parser::new_ext(input, md_extensions()));
parser
.filter_map(|event| match event {
// A link.
@ -349,4 +349,28 @@ $$
let uris = extract_markdown(markdown, true);
assert_eq!(uris, expected);
}
#[test]
fn test_underscore_in_urls_middle() {
let markdown = r"https://example.com/_/foo";
let expected = vec![RawUri {
text: "https://example.com/_/foo".to_string(),
element: None,
attribute: None,
}];
let uris = extract_markdown(markdown, true);
assert_eq!(uris, expected);
}
#[test]
fn test_underscore_in_urls_end() {
let markdown = r"https://example.com/_";
let expected = vec![RawUri {
text: "https://example.com/_".to_string(),
element: None,
attribute: None,
}];
let uris = extract_markdown(markdown, true);
assert_eq!(uris, expected);
}
}