Clean up error handling and configure reqwests

This commit is contained in:
Matthias Endler 2020-08-08 00:06:17 +02:00
parent a58b3e1232
commit dc7af2d74e
3 changed files with 58 additions and 22 deletions

14
Cargo.lock generated
View file

@ -30,6 +30,18 @@ dependencies = [
"memchr",
]
[[package]]
name = "any"
version = "0.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b2aad95aa1e482901aa0ed78757536ce2ebbfb0ca0f9452ab8aaf78969f758ad"
[[package]]
name = "anyhow"
version = "1.0.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b"
[[package]]
name = "async-compression"
version = "0.3.5"
@ -738,6 +750,8 @@ dependencies = [
name = "lychee"
version = "0.1.0"
dependencies = [
"any",
"anyhow",
"github-rs",
"log",
"pico-args",

View file

@ -8,17 +8,18 @@ license = "Apache-2.0/MIT"
name = "lychee"
repository = "https://github.com/hello-rust/lychee"
version = "0.1.0"
[dependencies]
any = "*"
anyhow = "*"
github-rs = "0.7.0"
log = "0.4"
pico-args = "0.3.3"
pretty_env_logger = "0.4"
pulldown-cmark = "0.7.2"
regex = "1.3.9"
serde_json = "1.0.56"
url = "2.1.1"
log = "0.4"
pretty_env_logger = "0.4"
[dependencies.reqwest]
version = "0.10.7"
features = ["blocking", "gzip"]
version = "0.10.7"

View file

@ -1,29 +1,46 @@
#[macro_use]
extern crate log;
use anyhow::{Context, Result};
use github_rs::client::{Executor, Github};
use github_rs::StatusCode;
use pulldown_cmark::{Event, Parser, Tag};
use regex::Regex;
use reqwest::header::{self, HeaderValue};
use serde_json::Value;
use std::env;
use std::fs;
use std::{env, error::Error};
use url::Url;
struct Checker {
client: Github,
reqwest_client: reqwest::blocking::Client,
gh_client: Github,
}
impl Checker {
/// Creates a new link checker
pub fn new(token: String) -> Self {
let client = Github::new(token).unwrap();
Checker { client }
pub fn try_new(token: String) -> Result<Self> {
let mut headers = header::HeaderMap::new();
// Faking the user agent is necessary for some websites, unfortunately.
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
headers.insert(header::USER_AGENT, HeaderValue::from_str("curl/7.71.1")?);
headers.insert(header::TRANSFER_ENCODING, HeaderValue::from_str("chunked")?);
let reqwest_client = reqwest::blocking::ClientBuilder::new()
.gzip(true)
.default_headers(headers)
.build()?;
let gh_client = Github::new(token).unwrap();
Ok(Checker {
reqwest_client,
gh_client,
})
}
fn check_github(&self, owner: String, repo: String) -> bool {
let (_headers, status, _json) = self
.client
.gh_client
.get()
.repos()
.owner(&owner)
@ -34,7 +51,11 @@ impl Checker {
}
fn check_normal(&self, url: &Url) -> bool {
let res = reqwest::blocking::get(url.as_str());
let res = self.reqwest_client.get(url.as_str()).send();
if res.is_err() {
warn!("Cannot send request: {:?}", res);
return false;
}
if let Ok(res) = res {
if res.status().is_success() {
true
@ -48,11 +69,11 @@ impl Checker {
}
}
fn extract_github(&self, url: &str) -> Result<(String, String), Box<dyn Error>> {
fn extract_github(&self, url: &str) -> Result<(String, String)> {
let re = Regex::new(r"github\.com/([^/]*)/([^/]*)")?;
let caps = re.captures(&url).ok_or("Invalid capture")?;
let owner = caps.get(1).ok_or("Cannot capture owner")?;
let repo = caps.get(2).ok_or("Cannot capture repo")?;
let caps = re.captures(&url).context("Invalid capture")?;
let owner = caps.get(1).context("Cannot capture owner")?;
let repo = caps.get(2).context("Cannot capture repo")?;
Ok((owner.as_str().into(), repo.as_str().into()))
}
@ -91,7 +112,7 @@ struct Args {
input: Option<String>,
}
fn main() -> Result<(), Box<dyn Error>> {
fn main() -> Result<()> {
pretty_env_logger::init();
let mut args = pico_args::Arguments::from_env();
@ -100,7 +121,7 @@ fn main() -> Result<(), Box<dyn Error>> {
input: args.opt_value_from_str(["-i", "--input"])?,
};
let checker = Checker::new(env::var("GITHUB_TOKEN")?);
let checker = Checker::try_new(env::var("GITHUB_TOKEN")?)?;
let md = fs::read_to_string(args.input.unwrap_or("README.md".into()))?;
let links: Vec<Url> = extract_links(&md);
@ -129,7 +150,7 @@ mod test {
#[test]
fn test_is_github() {
let checker = Checker::new("foo".into());
let checker = Checker::try_new("foo".into()).unwrap();
assert_eq!(
checker
.extract_github("https://github.com/mre/idiomatic-rust")
@ -140,7 +161,7 @@ mod test {
#[test]
fn test_github() {
let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap());
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap();
assert_eq!(
checker.check(&Url::parse("https://github.com/mre/idiomatic-rust").unwrap()),
true
@ -149,7 +170,7 @@ mod test {
#[test]
fn test_github_nonexistent() {
let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap());
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap();
assert_eq!(
checker.check(
&Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap()
@ -160,14 +181,14 @@ mod test {
#[test]
fn test_non_github() {
let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap());
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap();
let valid = checker.check(&Url::parse("https://endler.dev").unwrap());
assert_eq!(valid, true);
}
#[test]
fn test_non_github_nonexistent() {
let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap());
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap();
let valid = checker.check(&Url::parse("https://endler.dev/abcd").unwrap());
assert_eq!(valid, false);
}