From dc7af2d74e9a3b66dff63f5510d4f7465d38ca81 Mon Sep 17 00:00:00 2001 From: Matthias Endler Date: Sat, 8 Aug 2020 00:06:17 +0200 Subject: [PATCH] Clean up error handling and configure reqwests --- Cargo.lock | 14 +++++++++++++ Cargo.toml | 9 +++++---- src/main.rs | 57 ++++++++++++++++++++++++++++++++++++----------------- 3 files changed, 58 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4652ac1..cce2f4a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,6 +30,18 @@ dependencies = [ "memchr", ] +[[package]] +name = "any" +version = "0.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2aad95aa1e482901aa0ed78757536ce2ebbfb0ca0f9452ab8aaf78969f758ad" + +[[package]] +name = "anyhow" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b602bfe940d21c130f3895acd65221e8a61270debe89d628b9cb4e3ccb8569b" + [[package]] name = "async-compression" version = "0.3.5" @@ -738,6 +750,8 @@ dependencies = [ name = "lychee" version = "0.1.0" dependencies = [ + "any", + "anyhow", "github-rs", "log", "pico-args", diff --git a/Cargo.toml b/Cargo.toml index 13f687b..de52605 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,17 +8,18 @@ license = "Apache-2.0/MIT" name = "lychee" repository = "https://github.com/hello-rust/lychee" version = "0.1.0" - [dependencies] +any = "*" +anyhow = "*" github-rs = "0.7.0" +log = "0.4" pico-args = "0.3.3" +pretty_env_logger = "0.4" pulldown-cmark = "0.7.2" regex = "1.3.9" serde_json = "1.0.56" url = "2.1.1" -log = "0.4" -pretty_env_logger = "0.4" [dependencies.reqwest] -version = "0.10.7" features = ["blocking", "gzip"] +version = "0.10.7" diff --git a/src/main.rs b/src/main.rs index 1b29a55..c57c75e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,29 +1,46 @@ #[macro_use] extern crate log; +use anyhow::{Context, Result}; use github_rs::client::{Executor, Github}; use github_rs::StatusCode; use pulldown_cmark::{Event, Parser, Tag}; use regex::Regex; +use reqwest::header::{self, HeaderValue}; use serde_json::Value; +use std::env; use std::fs; -use std::{env, error::Error}; use url::Url; struct Checker { - client: Github, + reqwest_client: reqwest::blocking::Client, + gh_client: Github, } impl Checker { /// Creates a new link checker - pub fn new(token: String) -> Self { - let client = Github::new(token).unwrap(); - Checker { client } + pub fn try_new(token: String) -> Result { + let mut headers = header::HeaderMap::new(); + // Faking the user agent is necessary for some websites, unfortunately. + // Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com). + headers.insert(header::USER_AGENT, HeaderValue::from_str("curl/7.71.1")?); + headers.insert(header::TRANSFER_ENCODING, HeaderValue::from_str("chunked")?); + + let reqwest_client = reqwest::blocking::ClientBuilder::new() + .gzip(true) + .default_headers(headers) + .build()?; + + let gh_client = Github::new(token).unwrap(); + Ok(Checker { + reqwest_client, + gh_client, + }) } fn check_github(&self, owner: String, repo: String) -> bool { let (_headers, status, _json) = self - .client + .gh_client .get() .repos() .owner(&owner) @@ -34,7 +51,11 @@ impl Checker { } fn check_normal(&self, url: &Url) -> bool { - let res = reqwest::blocking::get(url.as_str()); + let res = self.reqwest_client.get(url.as_str()).send(); + if res.is_err() { + warn!("Cannot send request: {:?}", res); + return false; + } if let Ok(res) = res { if res.status().is_success() { true @@ -48,11 +69,11 @@ impl Checker { } } - fn extract_github(&self, url: &str) -> Result<(String, String), Box> { + fn extract_github(&self, url: &str) -> Result<(String, String)> { let re = Regex::new(r"github\.com/([^/]*)/([^/]*)")?; - let caps = re.captures(&url).ok_or("Invalid capture")?; - let owner = caps.get(1).ok_or("Cannot capture owner")?; - let repo = caps.get(2).ok_or("Cannot capture repo")?; + let caps = re.captures(&url).context("Invalid capture")?; + let owner = caps.get(1).context("Cannot capture owner")?; + let repo = caps.get(2).context("Cannot capture repo")?; Ok((owner.as_str().into(), repo.as_str().into())) } @@ -91,7 +112,7 @@ struct Args { input: Option, } -fn main() -> Result<(), Box> { +fn main() -> Result<()> { pretty_env_logger::init(); let mut args = pico_args::Arguments::from_env(); @@ -100,7 +121,7 @@ fn main() -> Result<(), Box> { input: args.opt_value_from_str(["-i", "--input"])?, }; - let checker = Checker::new(env::var("GITHUB_TOKEN")?); + let checker = Checker::try_new(env::var("GITHUB_TOKEN")?)?; let md = fs::read_to_string(args.input.unwrap_or("README.md".into()))?; let links: Vec = extract_links(&md); @@ -129,7 +150,7 @@ mod test { #[test] fn test_is_github() { - let checker = Checker::new("foo".into()); + let checker = Checker::try_new("foo".into()).unwrap(); assert_eq!( checker .extract_github("https://github.com/mre/idiomatic-rust") @@ -140,7 +161,7 @@ mod test { #[test] fn test_github() { - let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap()); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap(); assert_eq!( checker.check(&Url::parse("https://github.com/mre/idiomatic-rust").unwrap()), true @@ -149,7 +170,7 @@ mod test { #[test] fn test_github_nonexistent() { - let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap()); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap(); assert_eq!( checker.check( &Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap() @@ -160,14 +181,14 @@ mod test { #[test] fn test_non_github() { - let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap()); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap(); let valid = checker.check(&Url::parse("https://endler.dev").unwrap()); assert_eq!(valid, true); } #[test] fn test_non_github_nonexistent() { - let checker = Checker::new(env::var("GITHUB_TOKEN").unwrap()); + let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap()).unwrap(); let valid = checker.check(&Url::parse("https://endler.dev/abcd").unwrap()); assert_eq!(valid, false); }