2021-04-22 22:27:12 +00:00
|
|
|
//! `lychee` is a fast, asynchronous, resource-friendly link checker.
|
|
|
|
|
//! It is able to find broken hyperlinks and mail addresses inside Markdown,
|
|
|
|
|
//! HTML, `reStructuredText`, and any other format.
|
|
|
|
|
//!
|
|
|
|
|
//! The lychee binary is a wrapper around lychee-lib, which provides
|
|
|
|
|
//! convenience functions for calling lychee from the command-line.
|
|
|
|
|
//!
|
|
|
|
|
//! Run it inside a repository with a `README.md`:
|
|
|
|
|
//! ```
|
|
|
|
|
//! lychee
|
|
|
|
|
//! ```
|
|
|
|
|
//!
|
|
|
|
|
//! You can also specify various types of inputs:
|
|
|
|
|
//!
|
|
|
|
|
//! Check links on a website:
|
|
|
|
|
//!
|
|
|
|
|
//! ```sh
|
|
|
|
|
//! lychee https://endler.dev/
|
|
|
|
|
//! ```
|
|
|
|
|
//!
|
|
|
|
|
//! Check links in a remote file:
|
|
|
|
|
//! ```sh
|
|
|
|
|
//! lychee https://raw.githubusercontent.com/lycheeverse/lychee/master/README.md
|
|
|
|
|
//! ```
|
|
|
|
|
//!
|
|
|
|
|
//! Check links in local file(s):
|
|
|
|
|
//! ```sh
|
|
|
|
|
//! lychee README.md
|
|
|
|
|
//! lychee test.html info.txt
|
|
|
|
|
//! ```
|
|
|
|
|
//!
|
|
|
|
|
//! Check links in local files (by shell glob):
|
|
|
|
|
//! ```sh
|
|
|
|
|
//! lychee ~/projects/*/README.md
|
|
|
|
|
//! ```
|
|
|
|
|
//!
|
|
|
|
|
//! Check links in local files (lychee supports advanced globbing and `~` expansion):
|
|
|
|
|
//! ```sh
|
|
|
|
|
//! lychee "~/projects/big_project/**/README.*"
|
|
|
|
|
//! ```
|
|
|
|
|
//!
|
|
|
|
|
//! Ignore case when globbing and check result for each link:
|
|
|
|
|
//! ```sh
|
|
|
|
|
//! lychee --glob-ignore-case --verbose "~/projects/**/[r]eadme.*"
|
|
|
|
|
//! ```
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
#![warn(clippy::all, clippy::pedantic)]
|
|
|
|
|
#![warn(
|
|
|
|
|
absolute_paths_not_starting_with_crate,
|
2021-09-12 16:10:23 +00:00
|
|
|
rustdoc::invalid_html_tags,
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
missing_copy_implementations,
|
|
|
|
|
missing_debug_implementations,
|
|
|
|
|
semicolon_in_expressions_from_macros,
|
|
|
|
|
unreachable_pub,
|
|
|
|
|
unused_extern_crates,
|
|
|
|
|
variant_size_differences,
|
|
|
|
|
clippy::missing_const_for_fn
|
|
|
|
|
)]
|
|
|
|
|
#![deny(anonymous_parameters, macro_use_extern_crate, pointer_structural_match)]
|
2021-04-22 22:27:12 +00:00
|
|
|
#![deny(missing_docs)]
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
use lychee_lib::Collector;
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
// required for apple silicon
|
|
|
|
|
use ring as _;
|
|
|
|
|
|
2022-03-02 22:39:54 +00:00
|
|
|
use anyhow::{Context, Error, Result};
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
use openssl_sys as _; // required for vendored-openssl feature
|
2021-12-01 17:25:11 +00:00
|
|
|
use ring as _;
|
|
|
|
|
use std::fs::{self, File};
|
2022-03-02 22:39:54 +00:00
|
|
|
use std::io::{self, BufRead, BufReader, ErrorKind, Write};
|
2022-01-14 14:25:51 +00:00
|
|
|
use std::sync::Arc;
|
2020-10-21 00:10:25 +00:00
|
|
|
use structopt::StructOpt;
|
2020-08-09 20:48:02 +00:00
|
|
|
|
2022-01-14 14:25:51 +00:00
|
|
|
mod cache;
|
2021-12-01 17:25:11 +00:00
|
|
|
mod client;
|
2021-11-17 23:44:48 +00:00
|
|
|
mod color;
|
2021-12-01 17:25:11 +00:00
|
|
|
mod commands;
|
2020-08-14 09:43:45 +00:00
|
|
|
mod options;
|
2021-10-04 23:37:43 +00:00
|
|
|
mod parse;
|
2020-11-24 20:30:06 +00:00
|
|
|
mod stats;
|
2022-01-14 14:25:51 +00:00
|
|
|
mod time;
|
2021-11-17 23:44:48 +00:00
|
|
|
mod writer;
|
2020-08-04 22:32:37 +00:00
|
|
|
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
use crate::{
|
2022-01-14 14:25:51 +00:00
|
|
|
cache::{Cache, StoreExt},
|
|
|
|
|
options::{Config, Format, LycheeOptions, LYCHEE_CACHE_FILE, LYCHEE_IGNORE_FILE},
|
2021-11-17 23:44:48 +00:00
|
|
|
stats::ResponseStats,
|
|
|
|
|
writer::StatsWriter,
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
};
|
2020-08-04 22:32:37 +00:00
|
|
|
|
2020-10-26 22:31:31 +00:00
|
|
|
/// A C-like enum that can be cast to `i32` and used as process exit code.
|
|
|
|
|
enum ExitCode {
|
|
|
|
|
Success = 0,
|
|
|
|
|
// NOTE: exit code 1 is used for any `Result::Err` bubbled up to `main()` using the `?` operator.
|
|
|
|
|
// For now, 1 acts as a catch-all for everything non-link related (including config errors),
|
|
|
|
|
// until we find a way to structure the error code handling better.
|
|
|
|
|
#[allow(unused)]
|
|
|
|
|
UnexpectedFailure = 1,
|
|
|
|
|
LinkCheckFailure = 2,
|
|
|
|
|
}
|
|
|
|
|
|
2020-08-14 09:48:55 +00:00
|
|
|
fn main() -> Result<()> {
|
2021-09-12 16:10:23 +00:00
|
|
|
#[cfg(feature = "tokio-console")]
|
|
|
|
|
console_subscriber::init();
|
2021-02-08 10:04:01 +00:00
|
|
|
// std::process::exit doesn't guarantee that all destructors will be ran,
|
2021-12-01 17:25:11 +00:00
|
|
|
// therefore we wrap "main" code in another function to ensure that.
|
2021-02-08 10:04:01 +00:00
|
|
|
// See: https://doc.rust-lang.org/stable/std/process/fn.exit.html
|
|
|
|
|
// Also see: https://www.youtube.com/watch?v=zQC8T71Y8e4
|
|
|
|
|
let exit_code = run_main()?;
|
|
|
|
|
std::process::exit(exit_code);
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-14 14:25:51 +00:00
|
|
|
/// Read lines from file; ignore empty lines
|
2021-11-23 00:39:53 +00:00
|
|
|
fn read_lines(file: &File) -> Result<Vec<String>> {
|
|
|
|
|
let lines: Vec<_> = BufReader::new(file).lines().collect::<Result<_, _>>()?;
|
|
|
|
|
Ok(lines.into_iter().filter(|line| !line.is_empty()).collect())
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
/// Merge all provided config options into one
|
|
|
|
|
/// This includes a potential config file, command-line- and environment variables
|
|
|
|
|
fn load_config() -> Result<LycheeOptions> {
|
2020-12-02 22:28:37 +00:00
|
|
|
let mut opts = LycheeOptions::from_args();
|
2020-08-14 09:48:55 +00:00
|
|
|
|
2020-10-21 00:10:25 +00:00
|
|
|
// Load a potentially existing config file and merge it into the config from the CLI
|
2020-12-02 22:28:37 +00:00
|
|
|
if let Some(c) = Config::load_from_file(&opts.config_file)? {
|
2021-09-03 00:18:58 +00:00
|
|
|
opts.config.merge(c);
|
2020-12-02 22:28:37 +00:00
|
|
|
}
|
2021-09-01 15:37:31 +00:00
|
|
|
|
2021-11-23 00:39:53 +00:00
|
|
|
if let Ok(lycheeignore) = File::open(LYCHEE_IGNORE_FILE) {
|
|
|
|
|
opts.config.exclude.append(&mut read_lines(&lycheeignore)?);
|
|
|
|
|
}
|
|
|
|
|
|
2021-09-01 15:37:31 +00:00
|
|
|
// Load excludes from file
|
|
|
|
|
for path in &opts.config.exclude_file {
|
2021-09-03 00:24:02 +00:00
|
|
|
let file = File::open(path)?;
|
2021-11-23 00:39:53 +00:00
|
|
|
opts.config.exclude.append(&mut read_lines(&file)?);
|
2021-09-01 15:37:31 +00:00
|
|
|
}
|
|
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
Ok(opts)
|
|
|
|
|
}
|
2020-10-21 00:10:25 +00:00
|
|
|
|
2022-01-14 14:25:51 +00:00
|
|
|
#[must_use]
|
|
|
|
|
/// Load cache (if exists and is still valid)
|
|
|
|
|
/// This returns an `Option` as starting without a cache is a common scenario
|
|
|
|
|
/// and we silently discard errors on purpose
|
|
|
|
|
fn load_cache(cfg: &Config) -> Option<Cache> {
|
|
|
|
|
if !cfg.cache {
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Discard entire cache if it hasn't been updated since `max_cache_age`.
|
|
|
|
|
// This is an optimization, which avoids iterating over the file and
|
|
|
|
|
// checking the age of each entry.
|
|
|
|
|
match fs::metadata(LYCHEE_CACHE_FILE) {
|
|
|
|
|
Err(_e) => {
|
|
|
|
|
// No cache found; silently start with empty cache
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
Ok(metadata) => {
|
|
|
|
|
let modified = metadata.modified().ok()?;
|
|
|
|
|
let elapsed = modified.elapsed().ok()?;
|
|
|
|
|
if elapsed > cfg.max_cache_age {
|
2022-03-02 22:39:54 +00:00
|
|
|
eprintln!(
|
2022-01-14 14:25:51 +00:00
|
|
|
"Cache is too old (age: {}, max age: {}). Discarding",
|
|
|
|
|
humantime::format_duration(elapsed),
|
|
|
|
|
humantime::format_duration(cfg.max_cache_age)
|
|
|
|
|
);
|
|
|
|
|
return None;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let cache = Cache::load(LYCHEE_CACHE_FILE, cfg.max_cache_age.as_secs());
|
|
|
|
|
match cache {
|
|
|
|
|
Ok(cache) => Some(cache),
|
|
|
|
|
Err(e) => {
|
2022-03-02 22:39:54 +00:00
|
|
|
eprintln!("Error while loading cache: {e}. Continuing without.");
|
2022-01-14 14:25:51 +00:00
|
|
|
None
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Set up runtime and call lychee entrypoint
|
2021-12-01 17:25:11 +00:00
|
|
|
fn run_main() -> Result<i32> {
|
2022-03-02 22:39:54 +00:00
|
|
|
use std::process::exit;
|
|
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
let opts = load_config()?;
|
|
|
|
|
let runtime = match opts.config.threads {
|
2020-08-14 09:48:55 +00:00
|
|
|
Some(threads) => {
|
2021-02-18 22:33:14 +00:00
|
|
|
// We define our own runtime instead of the `tokio::main` attribute
|
|
|
|
|
// since we want to make the number of threads configurable
|
2021-01-06 23:10:58 +00:00
|
|
|
tokio::runtime::Builder::new_multi_thread()
|
|
|
|
|
.worker_threads(threads)
|
2020-08-14 09:48:55 +00:00
|
|
|
.enable_all()
|
|
|
|
|
.build()?
|
|
|
|
|
}
|
|
|
|
|
None => tokio::runtime::Runtime::new()?,
|
|
|
|
|
};
|
2021-02-08 10:04:01 +00:00
|
|
|
|
2022-03-02 22:39:54 +00:00
|
|
|
match runtime.block_on(run(&opts)) {
|
|
|
|
|
Err(e) if Some(ErrorKind::BrokenPipe) == underlying_io_error_kind(&e) => {
|
|
|
|
|
exit(ExitCode::Success as i32);
|
|
|
|
|
}
|
|
|
|
|
res => res,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Check if the given error can be traced back to an `io::ErrorKind`
|
|
|
|
|
/// This is helpful for troubleshooting the root cause of an error.
|
|
|
|
|
/// Code is taken from the anyhow documentation.
|
|
|
|
|
fn underlying_io_error_kind(error: &Error) -> Option<io::ErrorKind> {
|
|
|
|
|
for cause in error.chain() {
|
|
|
|
|
if let Some(io_error) = cause.downcast_ref::<io::Error>() {
|
|
|
|
|
return Some(io_error.kind());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
None
|
2020-08-14 09:48:55 +00:00
|
|
|
}
|
|
|
|
|
|
2022-01-14 14:25:51 +00:00
|
|
|
/// Run lychee on the given inputs
|
2021-12-01 17:25:11 +00:00
|
|
|
async fn run(opts: &LycheeOptions) -> Result<i32> {
|
|
|
|
|
let inputs = opts.inputs();
|
2022-02-07 21:54:47 +00:00
|
|
|
let requests = Collector::new(opts.config.base.clone())
|
|
|
|
|
.skip_missing_inputs(opts.config.skip_missing)
|
|
|
|
|
// File a bug if you rely on this envvar! It's going to go away eventually.
|
|
|
|
|
.use_html5ever(std::env::var("LYCHEE_USE_HTML5EVER").map_or(false, |x| x == "1"))
|
2021-12-07 10:52:40 +00:00
|
|
|
.collect_links(inputs)
|
|
|
|
|
.await;
|
2020-10-26 08:23:45 +00:00
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
let client = client::create(&opts.config)?;
|
2020-10-26 08:23:45 +00:00
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
let exit_code = if opts.config.dump {
|
|
|
|
|
commands::dump(client, requests, opts.config.verbose).await?
|
Major refactor of codebase (#208)
- The binary component and library component are separated as two
packages in the same workspace.
- `lychee` is the binary component, in `lychee-bin/*`.
- `lychee-lib` is the library component, in `lychee-lib/*`.
- Users can now install only the `lychee-lib`, instead of both
components, that would require fewer dependencies and faster
compilation.
- Dependencies for each component are adjusted and updated. E.g.,
no CLI dependencies for `lychee-lib`.
- CLI tests are only moved to `lychee`, as it has nothing to do
with the library component.
- `Status::Error` is refactored to contain dedicated error enum,
`ErrorKind`.
- The motivation is to delay the formatting of errors to strings.
Note that `e.to_string()` is not necessarily cheap (though
trivial in many cases). The formatting is no delayed until the
error is needed to be displayed to users. So in some cases, if
the error is never used, it means that it won't be formatted at
all.
- Replaced `regex` based matching with one of the following:
- Simple string equality test in the case of 'false positivie'.
- URL parsing based test, in the case of extracting repository and
user name for GitHub links.
- Either cases would be much more efficient than `regex` based
matching. First, there's no need to construct a state machine for
regex. Second, URL is already verified and parsed on its creation,
and extracting its components is fairly cheap. Also, this removes
the dependency on `lazy-static` in `lychee-lib`.
- `types` module now has a sub-directory, and its components are now
separated into their own modules (in that sub-directory).
- `lychee-lib::test_utils` module is only compiled for tests.
- `wiremock` is moved to `dev-dependency` as it's only needed for
`test` modules.
- Dependencies are listed in alphabetical order.
- Imports are organized in the following fashion:
- Imports from `std`
- Imports from 3rd-party crates, and `lychee-lib`.
- Imports from `crate::*` or `super::*`.
- No glob import.
- I followed suggestion from `cargo clippy`, with `clippy::all` and
`clippy:pedantic`.
Co-authored-by: Lucius Hu <lebensterben@users.noreply.github.com>
2021-04-14 23:24:11 +00:00
|
|
|
} else {
|
2022-01-14 14:25:51 +00:00
|
|
|
let cache = load_cache(&opts.config).unwrap_or_default();
|
|
|
|
|
let cache = Arc::new(cache);
|
|
|
|
|
let (stats, cache, exit_code) =
|
|
|
|
|
commands::check(client, cache, requests, &opts.config).await?;
|
2021-12-01 17:25:11 +00:00
|
|
|
write_stats(stats, &opts.config)?;
|
2022-01-14 14:25:51 +00:00
|
|
|
|
|
|
|
|
if opts.config.cache {
|
|
|
|
|
cache.store(LYCHEE_CACHE_FILE)?;
|
|
|
|
|
}
|
|
|
|
|
exit_code
|
2020-10-10 04:31:28 +00:00
|
|
|
};
|
2020-11-24 20:30:06 +00:00
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
Ok(exit_code as i32)
|
|
|
|
|
}
|
2020-10-10 04:31:28 +00:00
|
|
|
|
2021-12-01 17:25:11 +00:00
|
|
|
/// Write final statistics to stdout or to file
|
|
|
|
|
fn write_stats(stats: ResponseStats, cfg: &Config) -> Result<()> {
|
2021-11-17 23:44:48 +00:00
|
|
|
let writer: Box<dyn StatsWriter> = match cfg.format {
|
|
|
|
|
Format::Compact => Box::new(writer::Compact::new()),
|
|
|
|
|
Format::Detailed => Box::new(writer::Detailed::new()),
|
|
|
|
|
Format::Json => Box::new(writer::Json::new()),
|
|
|
|
|
Format::Markdown => Box::new(writer::Markdown::new()),
|
|
|
|
|
};
|
2021-09-20 10:12:35 +00:00
|
|
|
|
2021-11-17 23:44:48 +00:00
|
|
|
let is_empty = stats.is_empty();
|
|
|
|
|
let formatted = writer.write(stats)?;
|
2021-09-20 10:12:35 +00:00
|
|
|
|
2020-12-14 00:15:14 +00:00
|
|
|
if let Some(output) = &cfg.output {
|
2021-09-20 10:12:35 +00:00
|
|
|
fs::write(output, formatted).context("Cannot write status output to file")?;
|
2021-02-17 11:22:31 +00:00
|
|
|
} else {
|
2021-11-17 23:44:48 +00:00
|
|
|
if cfg.verbose && !is_empty {
|
2021-03-14 18:59:52 +00:00
|
|
|
// separate summary from the verbose list of links above
|
2022-03-02 22:39:54 +00:00
|
|
|
writeln!(io::stdout())?;
|
2021-03-14 18:59:52 +00:00
|
|
|
}
|
|
|
|
|
// we assume that the formatted stats don't have a final newline
|
2022-03-02 22:39:54 +00:00
|
|
|
writeln!(io::stdout(), "{formatted}")?;
|
2020-10-26 22:31:31 +00:00
|
|
|
}
|
2021-09-20 10:12:35 +00:00
|
|
|
Ok(())
|
2020-08-04 22:32:37 +00:00
|
|
|
}
|