mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
Switch to linkify to cover non MD links
This commit is contained in:
parent
fb517dab03
commit
6e0f559b25
3 changed files with 17 additions and 9 deletions
10
Cargo.lock
generated
10
Cargo.lock
generated
|
|
@ -717,6 +717,15 @@ version = "0.2.71"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
|
||||
|
||||
[[package]]
|
||||
name = "linkify"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03951527dd24d2c59f407502e7d88e0948ef06fac23335b556a4c2bc03c22096"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.3.4"
|
||||
|
|
@ -753,6 +762,7 @@ dependencies = [
|
|||
"any",
|
||||
"anyhow",
|
||||
"github-rs",
|
||||
"linkify",
|
||||
"log",
|
||||
"pico-args",
|
||||
"pretty_env_logger",
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ version = "0.1.0"
|
|||
any = "*"
|
||||
anyhow = "*"
|
||||
github-rs = "0.7.0"
|
||||
linkify = "*"
|
||||
log = "0.4"
|
||||
pico-args = "0.3.3"
|
||||
pretty_env_logger = "0.4"
|
||||
|
|
|
|||
|
|
@ -1,19 +1,16 @@
|
|||
use pulldown_cmark::{Event, Parser, Tag};
|
||||
use linkify::{LinkFinder, LinkKind};
|
||||
|
||||
use std::collections::HashSet;
|
||||
use url::Url;
|
||||
|
||||
pub(crate) fn extract_links(md: &str) -> HashSet<Url> {
|
||||
let mut links: Vec<String> = Vec::new();
|
||||
Parser::new(md).for_each(|event| match event {
|
||||
Event::Start(Tag::Link(_, link, _)) => links.push(link.into_string()),
|
||||
Event::Start(Tag::Image(_, link, _)) => links.push(link.into_string()),
|
||||
_ => (),
|
||||
});
|
||||
pub(crate) fn extract_links(input: &str) -> HashSet<Url> {
|
||||
let finder = LinkFinder::new();
|
||||
let links: Vec<_> = finder.links(input).collect();
|
||||
|
||||
// Only keep legit URLs. This sorts out things like anchors.
|
||||
// Silently ignore the parse failures for now.
|
||||
// TODO: Log errors in verbose mode
|
||||
let links: HashSet<Url> = links.iter().flat_map(|l| Url::parse(&l)).collect();
|
||||
let links: HashSet<Url> = links.iter().flat_map(|l| Url::parse(l.as_str())).collect();
|
||||
debug!("Testing links: {:#?}", links);
|
||||
|
||||
links
|
||||
|
|
|
|||
Loading…
Reference in a new issue