Make GITHUB_TOKEN optional (#22)

* Make GITHUB_TOKEN optional

This also makes the token possible to pass in from CLI args.

* Add missing test fixture file

* Normalize exit codes and GitHub checking behavior

The exit code is now defined as 1 for unexpected or config errors,
and 2 for link check failures.

GitHub checking behavior has been tweaked to generate errors if
a GitHub-specific check cannot be performed because of a missing
token.

* Remove short flag for github token
This commit is contained in:
Paweł Romanowski 2020-10-26 23:31:31 +01:00 committed by GitHub
parent ac79314c03
commit 326683f4eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 157 additions and 24 deletions

View file

@ -7,8 +7,9 @@
## What?
This thing was created from [Hello Rust Episode
10](https://hello-rust.show/10/). It's a link checker that treats Github links
specially by using a `GITHUB_TOKEN` to avoid getting blocked by the rate
10](https://hello-rust.show/10/). It's a link checker.
For GitHub links, it can optionally use a `GITHUB_TOKEN` to avoid getting blocked by the rate
limiter.
![Lychee demo](./assets/lychee.gif)
@ -78,7 +79,9 @@ This comparison is made on a best-effort basis. Please create a PR to fix outdat
cargo install lychee
```
Set an environment variable with your token like so `GITHUB_TOKEN=xxxx`.
Optional (to avoid being rate limited for GitHub links): set an environment variable with your token
like so `GITHUB_TOKEN=xxxx`, or use the `--github-token` CLI option. This can also be set in the
config file.
Run it inside a repository with a `README.md` or specify a file with
@ -86,6 +89,12 @@ Run it inside a repository with a `README.md` or specify a file with
lychee <yourfile>
```
### CLI exit codes
- `0` for success (all links checked successfully or excluded/skipped as configured)
- `1` for any unexpected runtime failures or config errors
- `2` for link check failures (if any non-excluded link failed the check)
## Comparison
Collecting other link checkers here to crush them in comparison. :P

3
fixtures/TEST_404.md Normal file
View file

@ -0,0 +1,3 @@
Test file: this link should be a valid link but return a HTTP 404 when followed.
http://httpbin.org/status/404

3
fixtures/TEST_GITHUB.md Normal file
View file

@ -0,0 +1,3 @@
Test file: contains a single GitHub URL.
Lychee: https://github.com/hello-rust/lychee

View file

@ -0,0 +1,3 @@
Test file: contains a single **invalid** (e.g. 404) GitHub URL.
Lychee: https://github.com/mre/idiomatic-rust-doesnt-exist-man

View file

@ -60,6 +60,10 @@ impl Status {
pub fn is_success(&self) -> bool {
matches!(self, Status::Ok(_))
}
pub fn is_excluded(&self) -> bool {
matches!(self, Status::Excluded)
}
}
impl From<reqwest::Error> for Status {
@ -110,7 +114,7 @@ impl Default for Excludes {
/// otherwise a normal HTTP client.
pub(crate) struct Checker<'a> {
reqwest_client: reqwest::Client,
github: Github,
github: Option<Github>,
includes: Option<RegexSet>,
excludes: Excludes,
scheme: Option<String>,
@ -126,7 +130,7 @@ impl<'a> Checker<'a> {
// of arguments is short
#[allow(clippy::too_many_arguments)]
pub fn try_new(
token: String,
github_token: Option<String>,
includes: Option<RegexSet>,
excludes: Excludes,
max_redirects: usize,
@ -160,7 +164,13 @@ impl<'a> Checker<'a> {
let reqwest_client = builder.build()?;
let github = Github::new(user_agent, Credentials::Token(token))?;
let github = match github_token {
Some(token) => {
let github = Github::new(user_agent, Credentials::Token(token))?;
Some(github)
}
None => None,
};
let scheme = scheme.map(|s| s.to_lowercase());
@ -178,11 +188,20 @@ impl<'a> Checker<'a> {
}
async fn check_github(&self, owner: String, repo: String) -> Status {
info!("Check Github: {}/{}", owner, repo);
let repo = self.github.repo(owner, repo).get().await;
match repo {
Err(e) => Status::Error(format!("{}", e)),
Ok(_) => Status::Ok(http::StatusCode::OK),
match &self.github {
Some(github) => {
info!("Check Github: {}/{}", owner, repo);
let repo = github.repo(owner, repo).get().await;
match repo {
Err(e) => Status::Error(format!("{}", e)),
Ok(_) => Status::Ok(http::StatusCode::OK),
}
}
None => Status::Error(
"GitHub token not specified. To check GitHub links reliably, \
use `--github-token` flag / `GITHUB_TOKEN` env var."
.to_string(),
),
}
}
@ -232,6 +251,7 @@ impl<'a> Checker<'a> {
if let Ok((owner, repo)) = self.extract_github(url.as_str()) {
return self.check_github(owner, repo).await;
}
status
}
@ -404,7 +424,7 @@ mod test {
fn get_checker(allow_insecure: bool, custom_headers: HeaderMap) -> Checker<'static> {
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
None,
Excludes::default(),
5,
@ -445,7 +465,10 @@ mod test {
let end = start.elapsed();
assert!(matches!(res, Status::Failed(_)));
assert!(matches!(end.as_secs(), 7));
// on slow connections, this might take a bit longer than nominal backed-off timeout (7 secs)
assert!(end.as_secs() >= 7);
assert!(end.as_secs() <= 8);
}
#[test]
@ -534,7 +557,7 @@ mod test {
.await;
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
None,
Excludes::default(),
5,
@ -561,7 +584,7 @@ mod test {
let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap());
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
includes,
Excludes::default(),
5,
@ -593,7 +616,7 @@ mod test {
let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap());
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
includes,
excludes,
5,
@ -626,7 +649,7 @@ mod test {
Some(RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap());
let checker = Checker::try_new(
"DUMMY_GITHUB_TOKEN".to_string(),
None,
None,
excludes,
5,

View file

@ -1,14 +1,13 @@
#[macro_use]
extern crate log;
use anyhow::anyhow;
use anyhow::Result;
use anyhow::{anyhow, Result};
use futures::future::join_all;
use headers::authorization::Basic;
use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName};
use indicatif::{ProgressBar, ProgressStyle};
use regex::RegexSet;
use std::{collections::HashSet, convert::TryInto, env, time::Duration};
use std::{collections::HashSet, convert::TryInto, time::Duration};
use structopt::StructOpt;
mod checker;
@ -20,6 +19,17 @@ use checker::{Checker, Excludes, Status};
use extract::Uri;
use options::{Config, LycheeOptions};
/// A C-like enum that can be cast to `i32` and used as process exit code.
enum ExitCode {
Success = 0,
// NOTE: exit code 1 is used for any `Result::Err` bubbled up to `main()` using the `?` operator.
// For now, 1 acts as a catch-all for everything non-link related (including config errors),
// until we find a way to structure the error code handling better.
#[allow(unused)]
UnexpectedFailure = 1,
LinkCheckFailure = 2,
}
fn print_summary(found: &HashSet<Uri>, results: &[Status]) {
let found = found.len();
let excluded: usize = results
@ -96,7 +106,7 @@ async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
None
};
let checker = Checker::try_new(
env::var("GITHUB_TOKEN")?,
cfg.github_token,
includes,
excludes,
cfg.max_redirects,
@ -123,7 +133,12 @@ async fn run(cfg: Config, inputs: Vec<String>) -> Result<i32> {
print_summary(&links, &results);
}
Ok(results.iter().all(|r| r.is_success()) as i32)
let success = results.iter().all(|r| r.is_success() || r.is_excluded());
match success {
true => Ok(ExitCode::Success as i32),
false => Ok(ExitCode::LinkCheckFailure as i32),
}
}
fn read_header(input: String) -> Result<(String, String)> {

View file

@ -142,6 +142,14 @@ pub(crate) struct Config {
#[structopt(long, help = "Basic autentication support. Ex 'username:password'")]
#[serde(default)]
pub basic_auth: Option<String>,
#[structopt(
long,
help = "GitHub API token to use when checking github.com links, to avoid rate limiting",
env = "GITHUB_TOKEN"
)]
#[serde(default)]
pub github_token: Option<String>,
}
impl Config {
@ -193,6 +201,7 @@ impl Config {
method: METHOD;
base_url: None;
basic_auth: None;
github_token: None;
}
self

View file

@ -17,8 +17,7 @@ mod cli {
.join("TEST_ALL_PRIVATE.md");
// assert that the command runs OK, and that it excluded all the links
cmd.env("GITHUB_TOKEN", "invalid-token")
.arg("--exclude-all-private")
cmd.arg("--exclude-all-private")
.arg("--verbose")
.arg(test_all_private_path)
.assert()
@ -28,4 +27,73 @@ mod cli {
.stdout(contains("Successful: 0"))
.stdout(contains("Errors: 0"));
}
/// Test that a GitHub link can be checked without specifying the token.
#[test]
fn test_check_github_no_token() {
let mut cmd =
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
let test_github_path = Path::new(module_path!())
.parent()
.unwrap()
.join("fixtures")
.join("TEST_GITHUB.md");
cmd.arg("--verbose")
.arg(test_github_path)
.assert()
.success()
.stdout(contains("Found: 1"))
.stdout(contains("Excluded: 0"))
.stdout(contains("Successful: 1"))
.stdout(contains("Errors: 0"));
}
#[test]
fn test_failure_invalid_method() {
let mut cmd =
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
cmd.arg("--method=invalid-method")
.assert()
.failure()
.code(1)
.stderr(contains(
"Error: Only `get` and `head` allowed, got invalid-method",
));
}
#[test]
fn test_failure_404_link() {
let mut cmd =
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
let test_404_path = Path::new(module_path!())
.parent()
.unwrap()
.join("fixtures")
.join("TEST_404.md");
cmd.arg(test_404_path).assert().failure().code(2);
}
#[test]
fn test_failure_github_404_no_token() {
let mut cmd =
Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name");
let test_github_404_path = Path::new(module_path!())
.parent()
.unwrap()
.join("fixtures")
.join("TEST_GITHUB_404.md");
cmd.arg(test_github_404_path)
.assert()
.failure()
.code(2)
.stdout(contains("https://github.com/mre/idiomatic-rust-doesnt-exist-man \
(GitHub token not specified. To check GitHub links reliably, use `--github-token` flag / `GITHUB_TOKEN` env var.)"));
}
}