mirror of
https://github.com/Hopiu/lychee.git
synced 2026-05-12 15:53:11 +00:00
Add support for excluding URLs
This commit is contained in:
parent
1d235b578b
commit
d4a3b09790
4 changed files with 17 additions and 6 deletions
|
|
@ -18,7 +18,7 @@ linkify = "*"
|
|||
log = "0.4"
|
||||
pretty_env_logger = "0.4"
|
||||
pulldown-cmark = "0.7.2"
|
||||
regex = "1.3.9"
|
||||
regex = "*"
|
||||
serde_json = "1.0.56"
|
||||
url = "2.1.1"
|
||||
|
||||
|
|
|
|||
|
|
@ -22,10 +22,10 @@ lychee can...
|
|||
- handle gzip compression
|
||||
- fake user agents (required for some firewalls)
|
||||
- skip non-links like anchors or relative URLs
|
||||
- exclude some websites with regular expressions
|
||||
- SOON: automatically retry and backoff
|
||||
- SOON: optionally ignore SSL certificate errors
|
||||
- SOON: optionally handle redirects
|
||||
- SOON: exclude some websites with regular expressions
|
||||
|
||||
## How?
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
use anyhow::{Context, Result};
|
||||
use github_rs::client::{Executor, Github};
|
||||
use github_rs::StatusCode;
|
||||
use regex::Regex;
|
||||
use regex::{Regex, RegexSet};
|
||||
use reqwest::header::{self, HeaderValue};
|
||||
use serde_json::Value;
|
||||
use url::Url;
|
||||
|
|
@ -11,12 +11,13 @@ use url::Url;
|
|||
pub(crate) struct Checker {
|
||||
reqwest_client: reqwest::Client,
|
||||
gh_client: Github,
|
||||
excludes: RegexSet,
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
impl Checker {
|
||||
/// Creates a new link checker
|
||||
pub fn try_new(token: String, verbose: bool) -> Result<Self> {
|
||||
pub fn try_new(token: String, excludes: RegexSet, verbose: bool) -> Result<Self> {
|
||||
let mut headers = header::HeaderMap::new();
|
||||
// Faking the user agent is necessary for some websites, unfortunately.
|
||||
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
|
||||
|
|
@ -32,6 +33,7 @@ impl Checker {
|
|||
Ok(Checker {
|
||||
reqwest_client,
|
||||
gh_client,
|
||||
excludes,
|
||||
verbose,
|
||||
})
|
||||
}
|
||||
|
|
@ -89,6 +91,12 @@ impl Checker {
|
|||
}
|
||||
|
||||
pub async fn check(&self, url: &Url) -> bool {
|
||||
// TODO: Indicate that the URL was skipped in the return value.
|
||||
// (Perhaps we want to return an enum value here: Status::Skipped)
|
||||
if self.excludes.is_match(url.as_str()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
let ret = self.check_real(&url).await;
|
||||
match ret {
|
||||
true => {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
extern crate log;
|
||||
|
||||
use anyhow::Result;
|
||||
use regex::RegexSet;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
|
||||
|
|
@ -27,7 +28,7 @@ struct LycheeOptions {
|
|||
|
||||
// Accumulate all exclusions in a vector
|
||||
#[options(help = "Exclude URLs from checking (supports regex)")]
|
||||
excludes: Vec<String>,
|
||||
exclude: Vec<String>,
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
|
|
@ -36,7 +37,9 @@ async fn main() -> Result<()> {
|
|||
|
||||
let opts = LycheeOptions::parse_args_default_or_exit();
|
||||
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN")?, opts.verbose)?;
|
||||
let excludes = RegexSet::new(opts.exclude).unwrap();
|
||||
|
||||
let checker = Checker::try_new(env::var("GITHUB_TOKEN")?, excludes, opts.verbose)?;
|
||||
let md = fs::read_to_string(opts.input.unwrap_or_else(|| "README.md".into()))?;
|
||||
let links = extract_links(&md);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue