Mailto support (#138)

* Add mailto suport and use try_from for parsing URLs
* Cleanup and document code
This commit is contained in:
Matthias 2021-02-12 10:25:33 +01:00 committed by GitHub
parent 0b148bf5e6
commit 702909c4ab
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 43 additions and 10 deletions

View file

@ -22,3 +22,4 @@ https://example.com
https://www.peerlyst.com/posts/a-list-of-static-analysis-tools-for-c-c-peerlyst
test@example.com
mailto:test2@example.com

View file

@ -5,8 +5,8 @@ use html5ever::tendril::{StrTendril, TendrilSink};
use linkify::LinkFinder;
use markup5ever_rcdom::{Handle, NodeData, RcDom};
use pulldown_cmark::{Event as MDEvent, Parser, Tag};
use std::collections::HashSet;
use std::path::Path;
use std::{collections::HashSet, convert::TryFrom};
use url::Url;
#[derive(Clone, Debug)]
@ -152,14 +152,12 @@ pub(crate) fn extract_links(input_content: &InputContent, base_url: Option<Url>)
// Silently ignore the parse failures for now.
let mut uris = HashSet::new();
for link in links {
match Url::parse(&link) {
Ok(url) => {
uris.insert(Uri::Website(url));
match Uri::try_from(link.as_str()) {
Ok(uri) => {
uris.insert(uri);
}
Err(_) => {
if link.contains('@') {
uris.insert(Uri::Mail(link));
} else if !Path::new(&link).exists() {
if !Path::new(&link).exists() {
if let Some(base_url) = &base_url {
if let Ok(new_url) = base_url.join(&link) {
uris.insert(Uri::Website(new_url));

View file

@ -1,4 +1,4 @@
use anyhow::Result;
use anyhow::{bail, Result};
use serde::{Deserialize, Serialize};
use std::net::IpAddr;
use std::{convert::TryFrom, fmt::Display};
@ -44,7 +44,16 @@ impl TryFrom<&str> for Uri {
type Error = anyhow::Error;
fn try_from(s: &str) -> Result<Self> {
Ok(Uri::Website(Url::parse(s)?))
// Remove the `mailto` scheme if it exists
// to avoid parsing it as a website URL.
let s = s.trim_start_matches("mailto:");
if let Ok(uri) = Url::parse(s) {
return Ok(Uri::Website(uri));
};
if s.contains('@') {
return Ok(Uri::Mail(s.to_string()));
};
bail!("Cannot convert to Uri")
}
}
@ -61,6 +70,23 @@ mod test {
use super::*;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
#[test]
fn test_uri_from_str() {
assert!(matches!(Uri::try_from(""), Err(_)));
assert_eq!(
Uri::try_from("http://example.com").unwrap(),
Uri::Website(url::Url::parse("http://example.com").unwrap())
);
assert_eq!(
Uri::try_from("mail@example.com").unwrap(),
Uri::Mail("mail@example.com".to_string())
);
assert_eq!(
Uri::try_from("mailto:mail@example.com").unwrap(),
Uri::Mail("mail@example.com".to_string())
);
}
#[test]
fn test_uri_host_ip_v4() {
let uri =
@ -86,4 +112,12 @@ mod test {
let ip = uri.host_ip();
assert!(ip.is_none());
}
#[test]
fn test_mail() {
let uri =
Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4"));
let ip = uri.host_ip().expect("Expected a valid IPv4");
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
}
}

View file

@ -231,7 +231,7 @@ mod cli {
.assert()
.success();
let expected = r##"{"total":10,"successful":10,"failures":[],"timeouts":[],"redirects":[],"excludes":[],"errors":[]}"##;
let expected = r##"{"total":11,"successful":11,"failures":[],"timeouts":[],"redirects":[],"excludes":[],"errors":[]}"##;
let output = fs::read_to_string(&outfile)?;
assert_eq!(output, expected);
fs::remove_file(outfile)?;