diff --git a/README.md b/README.md index 2199d03..1278137 100644 --- a/README.md +++ b/README.md @@ -7,8 +7,9 @@ ## What? This thing was created from [Hello Rust Episode -10](https://hello-rust.show/10/). It's a link checker that treats Github links -specially by using a `GITHUB_TOKEN` to avoid getting blocked by the rate +10](https://hello-rust.show/10/). It's a link checker. + +For GitHub links, it can optionally use a `GITHUB_TOKEN` to avoid getting blocked by the rate limiter. ![Lychee demo](./assets/lychee.gif) @@ -78,7 +79,9 @@ This comparison is made on a best-effort basis. Please create a PR to fix outdat cargo install lychee ``` -Set an environment variable with your token like so `GITHUB_TOKEN=xxxx`. +Optional (to avoid being rate limited for GitHub links): set an environment variable with your token +like so `GITHUB_TOKEN=xxxx`, or use the `--github-token` CLI option. This can also be set in the +config file. Run it inside a repository with a `README.md` or specify a file with @@ -86,6 +89,12 @@ Run it inside a repository with a `README.md` or specify a file with lychee ``` +### CLI exit codes + +- `0` for success (all links checked successfully or excluded/skipped as configured) +- `1` for any unexpected runtime failures or config errors +- `2` for link check failures (if any non-excluded link failed the check) + ## Comparison Collecting other link checkers here to crush them in comparison. :P diff --git a/fixtures/TEST_404.md b/fixtures/TEST_404.md new file mode 100644 index 0000000..e2b546a --- /dev/null +++ b/fixtures/TEST_404.md @@ -0,0 +1,3 @@ +Test file: this link should be a valid link but return a HTTP 404 when followed. + +http://httpbin.org/status/404 diff --git a/fixtures/TEST_GITHUB.md b/fixtures/TEST_GITHUB.md new file mode 100644 index 0000000..85a0492 --- /dev/null +++ b/fixtures/TEST_GITHUB.md @@ -0,0 +1,3 @@ +Test file: contains a single GitHub URL. + +Lychee: https://github.com/hello-rust/lychee diff --git a/fixtures/TEST_GITHUB_404.md b/fixtures/TEST_GITHUB_404.md new file mode 100644 index 0000000..ba0ffbc --- /dev/null +++ b/fixtures/TEST_GITHUB_404.md @@ -0,0 +1,3 @@ +Test file: contains a single **invalid** (e.g. 404) GitHub URL. + +Lychee: https://github.com/mre/idiomatic-rust-doesnt-exist-man diff --git a/src/checker.rs b/src/checker.rs index 78d414c..3f695d0 100644 --- a/src/checker.rs +++ b/src/checker.rs @@ -60,6 +60,10 @@ impl Status { pub fn is_success(&self) -> bool { matches!(self, Status::Ok(_)) } + + pub fn is_excluded(&self) -> bool { + matches!(self, Status::Excluded) + } } impl From for Status { @@ -110,7 +114,7 @@ impl Default for Excludes { /// otherwise a normal HTTP client. pub(crate) struct Checker<'a> { reqwest_client: reqwest::Client, - github: Github, + github: Option, includes: Option, excludes: Excludes, scheme: Option, @@ -126,7 +130,7 @@ impl<'a> Checker<'a> { // of arguments is short #[allow(clippy::too_many_arguments)] pub fn try_new( - token: String, + github_token: Option, includes: Option, excludes: Excludes, max_redirects: usize, @@ -160,7 +164,13 @@ impl<'a> Checker<'a> { let reqwest_client = builder.build()?; - let github = Github::new(user_agent, Credentials::Token(token))?; + let github = match github_token { + Some(token) => { + let github = Github::new(user_agent, Credentials::Token(token))?; + Some(github) + } + None => None, + }; let scheme = scheme.map(|s| s.to_lowercase()); @@ -178,11 +188,20 @@ impl<'a> Checker<'a> { } async fn check_github(&self, owner: String, repo: String) -> Status { - info!("Check Github: {}/{}", owner, repo); - let repo = self.github.repo(owner, repo).get().await; - match repo { - Err(e) => Status::Error(format!("{}", e)), - Ok(_) => Status::Ok(http::StatusCode::OK), + match &self.github { + Some(github) => { + info!("Check Github: {}/{}", owner, repo); + let repo = github.repo(owner, repo).get().await; + match repo { + Err(e) => Status::Error(format!("{}", e)), + Ok(_) => Status::Ok(http::StatusCode::OK), + } + } + None => Status::Error( + "GitHub token not specified. To check GitHub links reliably, \ + use `--github-token` flag / `GITHUB_TOKEN` env var." + .to_string(), + ), } } @@ -232,6 +251,7 @@ impl<'a> Checker<'a> { if let Ok((owner, repo)) = self.extract_github(url.as_str()) { return self.check_github(owner, repo).await; } + status } @@ -404,7 +424,7 @@ mod test { fn get_checker(allow_insecure: bool, custom_headers: HeaderMap) -> Checker<'static> { let checker = Checker::try_new( - "DUMMY_GITHUB_TOKEN".to_string(), + None, None, Excludes::default(), 5, @@ -445,7 +465,10 @@ mod test { let end = start.elapsed(); assert!(matches!(res, Status::Failed(_))); - assert!(matches!(end.as_secs(), 7)); + + // on slow connections, this might take a bit longer than nominal backed-off timeout (7 secs) + assert!(end.as_secs() >= 7); + assert!(end.as_secs() <= 8); } #[test] @@ -534,7 +557,7 @@ mod test { .await; let checker = Checker::try_new( - "DUMMY_GITHUB_TOKEN".to_string(), + None, None, Excludes::default(), 5, @@ -561,7 +584,7 @@ mod test { let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap()); let checker = Checker::try_new( - "DUMMY_GITHUB_TOKEN".to_string(), + None, includes, Excludes::default(), 5, @@ -593,7 +616,7 @@ mod test { let includes = Some(RegexSet::new(&[r"foo.github.com"]).unwrap()); let checker = Checker::try_new( - "DUMMY_GITHUB_TOKEN".to_string(), + None, includes, excludes, 5, @@ -626,7 +649,7 @@ mod test { Some(RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap()); let checker = Checker::try_new( - "DUMMY_GITHUB_TOKEN".to_string(), + None, None, excludes, 5, diff --git a/src/main.rs b/src/main.rs index fce9aa0..bf44c7f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,14 +1,13 @@ #[macro_use] extern crate log; -use anyhow::anyhow; -use anyhow::Result; +use anyhow::{anyhow, Result}; use futures::future::join_all; use headers::authorization::Basic; use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName}; use indicatif::{ProgressBar, ProgressStyle}; use regex::RegexSet; -use std::{collections::HashSet, convert::TryInto, env, time::Duration}; +use std::{collections::HashSet, convert::TryInto, time::Duration}; use structopt::StructOpt; mod checker; @@ -20,6 +19,17 @@ use checker::{Checker, Excludes, Status}; use extract::Uri; use options::{Config, LycheeOptions}; +/// A C-like enum that can be cast to `i32` and used as process exit code. +enum ExitCode { + Success = 0, + // NOTE: exit code 1 is used for any `Result::Err` bubbled up to `main()` using the `?` operator. + // For now, 1 acts as a catch-all for everything non-link related (including config errors), + // until we find a way to structure the error code handling better. + #[allow(unused)] + UnexpectedFailure = 1, + LinkCheckFailure = 2, +} + fn print_summary(found: &HashSet, results: &[Status]) { let found = found.len(); let excluded: usize = results @@ -96,7 +106,7 @@ async fn run(cfg: Config, inputs: Vec) -> Result { None }; let checker = Checker::try_new( - env::var("GITHUB_TOKEN")?, + cfg.github_token, includes, excludes, cfg.max_redirects, @@ -123,7 +133,12 @@ async fn run(cfg: Config, inputs: Vec) -> Result { print_summary(&links, &results); } - Ok(results.iter().all(|r| r.is_success()) as i32) + let success = results.iter().all(|r| r.is_success() || r.is_excluded()); + + match success { + true => Ok(ExitCode::Success as i32), + false => Ok(ExitCode::LinkCheckFailure as i32), + } } fn read_header(input: String) -> Result<(String, String)> { diff --git a/src/options.rs b/src/options.rs index a39aa5a..d6fdf3f 100644 --- a/src/options.rs +++ b/src/options.rs @@ -142,6 +142,14 @@ pub(crate) struct Config { #[structopt(long, help = "Basic autentication support. Ex 'username:password'")] #[serde(default)] pub basic_auth: Option, + + #[structopt( + long, + help = "GitHub API token to use when checking github.com links, to avoid rate limiting", + env = "GITHUB_TOKEN" + )] + #[serde(default)] + pub github_token: Option, } impl Config { @@ -193,6 +201,7 @@ impl Config { method: METHOD; base_url: None; basic_auth: None; + github_token: None; } self diff --git a/tests/cli.rs b/tests/cli.rs index 5b6d23e..8a05c17 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -17,8 +17,7 @@ mod cli { .join("TEST_ALL_PRIVATE.md"); // assert that the command runs OK, and that it excluded all the links - cmd.env("GITHUB_TOKEN", "invalid-token") - .arg("--exclude-all-private") + cmd.arg("--exclude-all-private") .arg("--verbose") .arg(test_all_private_path) .assert() @@ -28,4 +27,73 @@ mod cli { .stdout(contains("Successful: 0")) .stdout(contains("Errors: 0")); } + + /// Test that a GitHub link can be checked without specifying the token. + #[test] + fn test_check_github_no_token() { + let mut cmd = + Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name"); + + let test_github_path = Path::new(module_path!()) + .parent() + .unwrap() + .join("fixtures") + .join("TEST_GITHUB.md"); + + cmd.arg("--verbose") + .arg(test_github_path) + .assert() + .success() + .stdout(contains("Found: 1")) + .stdout(contains("Excluded: 0")) + .stdout(contains("Successful: 1")) + .stdout(contains("Errors: 0")); + } + + #[test] + fn test_failure_invalid_method() { + let mut cmd = + Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name"); + + cmd.arg("--method=invalid-method") + .assert() + .failure() + .code(1) + .stderr(contains( + "Error: Only `get` and `head` allowed, got invalid-method", + )); + } + + #[test] + fn test_failure_404_link() { + let mut cmd = + Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name"); + + let test_404_path = Path::new(module_path!()) + .parent() + .unwrap() + .join("fixtures") + .join("TEST_404.md"); + + cmd.arg(test_404_path).assert().failure().code(2); + } + + #[test] + fn test_failure_github_404_no_token() { + let mut cmd = + Command::cargo_bin(env!("CARGO_PKG_NAME")).expect("Couldn't get cargo package name"); + + let test_github_404_path = Path::new(module_path!()) + .parent() + .unwrap() + .join("fixtures") + .join("TEST_GITHUB_404.md"); + + cmd.arg(test_github_404_path) + .assert() + .failure() + .code(2) + .stdout(contains("https://github.com/mre/idiomatic-rust-doesnt-exist-man \ + (GitHub token not specified. To check GitHub links reliably, use `--github-token` flag / `GITHUB_TOKEN` env var.)")); + } }