Make user-agent configurable

This commit is contained in:
Matthias Endler 2020-08-12 13:10:15 +02:00
parent 156f2b03c2
commit cd79f72d2d
3 changed files with 24 additions and 11 deletions

View file

@ -23,6 +23,7 @@ lychee can...
- skip non-links like anchors or relative URLs
- exclude some websites with regular expressions
- handle a configurable number of redirects
- disguise as a different user agent (like curl)
- SOON: automatically retry and backoff
- SOON: optionally ignore SSL certificate errors

View file

@ -70,12 +70,13 @@ impl Checker {
token: String,
excludes: Option<RegexSet>,
max_redirects: usize,
user_agent: String,
verbose: bool,
) -> Result<Self> {
let mut headers = header::HeaderMap::new();
// Faking the user agent is necessary for some websites, unfortunately.
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
headers.insert(header::USER_AGENT, HeaderValue::from_str("curl/7.71.1")?);
headers.insert(header::USER_AGENT, HeaderValue::from_str(&user_agent)?);
headers.insert(header::TRANSFER_ENCODING, HeaderValue::from_str("chunked")?);
let reqwest_client = reqwest::ClientBuilder::new()
@ -191,10 +192,21 @@ mod test {
use std::env;
use url::Url;
fn get_checker() -> Checker {
let checker = Checker::try_new(
env::var("GITHUB_TOKEN").unwrap(),
None,
5,
"curl/7.71.1".to_string(),
false,
)
.unwrap();
checker
}
#[tokio::test]
async fn test_nonexistent() {
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap();
let res = checker
let res = get_checker()
.check(&Url::parse("https://endler.dev/abcd").unwrap())
.await;
assert!(matches!(res, CheckStatus::Failed(_)));
@ -202,9 +214,8 @@ mod test {
#[test]
fn test_is_github() {
let checker = Checker::try_new("foo".into(), None, 5, false).unwrap();
assert_eq!(
checker
get_checker()
.extract_github("https://github.com/mre/idiomatic-rust")
.unwrap(),
("mre".into(), "idiomatic-rust".into())
@ -212,9 +223,8 @@ mod test {
}
#[tokio::test]
async fn test_github() {
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap();
assert!(matches!(
checker
get_checker()
.check(&Url::parse("https://github.com/mre/idiomatic-rust").unwrap())
.await,
CheckStatus::OK
@ -223,8 +233,7 @@ mod test {
#[tokio::test]
async fn test_github_nonexistent() {
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap();
let res = checker
let res = get_checker()
.check(&Url::parse("https://github.com/mre/idiomatic-rust-doesnt-exist-man").unwrap())
.await;
assert!(matches!(res, CheckStatus::FailedGithub(_)));
@ -232,8 +241,7 @@ mod test {
#[tokio::test]
async fn test_non_github() {
let checker = Checker::try_new(env::var("GITHUB_TOKEN").unwrap(), None, 5, false).unwrap();
let res = checker
let res = get_checker()
.check(&Url::parse("https://endler.dev").unwrap())
.await;
assert!(matches!(res, CheckStatus::OK));

View file

@ -29,6 +29,9 @@ struct LycheeOptions {
#[options(help = "Maximum number of allowed redirects", default = "10")]
max_redirects: usize,
#[options(help = "User agent", default = "curl/7.71.1")]
user_agent: String,
// Accumulate all exclusions in a vector
#[options(help = "Exclude URLs from checking (supports regex)")]
exclude: Vec<String>,
@ -46,6 +49,7 @@ async fn main() -> Result<()> {
env::var("GITHUB_TOKEN")?,
Some(excludes),
opts.max_redirects,
opts.user_agent,
opts.verbose,
)?;
let md = fs::read_to_string(opts.input.unwrap_or_else(|| "README.md".into()))?;