Switch to linkify to cover non MD links

This commit is contained in:
Matthias Endler 2020-08-09 23:12:25 +02:00
parent fb517dab03
commit 6e0f559b25
3 changed files with 17 additions and 9 deletions

10
Cargo.lock generated
View file

@ -717,6 +717,15 @@ version = "0.2.71"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9457b06509d27052635f90d6466700c65095fdf75409b3fbdd903e988b886f49"
[[package]]
name = "linkify"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "03951527dd24d2c59f407502e7d88e0948ef06fac23335b556a4c2bc03c22096"
dependencies = [
"memchr",
]
[[package]]
name = "lock_api"
version = "0.3.4"
@ -753,6 +762,7 @@ dependencies = [
"any",
"anyhow",
"github-rs",
"linkify",
"log",
"pico-args",
"pretty_env_logger",

View file

@ -12,6 +12,7 @@ version = "0.1.0"
any = "*"
anyhow = "*"
github-rs = "0.7.0"
linkify = "*"
log = "0.4"
pico-args = "0.3.3"
pretty_env_logger = "0.4"

View file

@ -1,19 +1,16 @@
use pulldown_cmark::{Event, Parser, Tag};
use linkify::{LinkFinder, LinkKind};
use std::collections::HashSet;
use url::Url;
pub(crate) fn extract_links(md: &str) -> HashSet<Url> {
let mut links: Vec<String> = Vec::new();
Parser::new(md).for_each(|event| match event {
Event::Start(Tag::Link(_, link, _)) => links.push(link.into_string()),
Event::Start(Tag::Image(_, link, _)) => links.push(link.into_string()),
_ => (),
});
pub(crate) fn extract_links(input: &str) -> HashSet<Url> {
let finder = LinkFinder::new();
let links: Vec<_> = finder.links(input).collect();
// Only keep legit URLs. This sorts out things like anchors.
// Silently ignore the parse failures for now.
// TODO: Log errors in verbose mode
let links: HashSet<Url> = links.iter().flat_map(|l| Url::parse(&l)).collect();
let links: HashSet<Url> = links.iter().flat_map(|l| Url::parse(l.as_str())).collect();
debug!("Testing links: {:#?}", links);
links