mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-11 08:50:59 +00:00
Add simple, standalone client
Adds a new function `lychee::check()`, which removes a lot of boilerplate for simple cases. Adjusted the code, tests, and documentation. The downside is that `check` now returns a Result, so we have to use `?` to get to the response. That's because we have to account for the case where the given string is not a valid URI.
This commit is contained in:
parent
ae2d02b8a0
commit
16cd67331a
8 changed files with 118 additions and 59 deletions
27
README.md
27
README.md
|
|
@ -205,25 +205,35 @@ ARGS:
|
|||
## Library usage
|
||||
|
||||
You can use lychee as a library for your own projects.
|
||||
Simply add it as a dependency and build your client:
|
||||
Here is a "hello world" example:
|
||||
|
||||
```rust
|
||||
use lychee::{Request, Input, ClientBuilder, Status};
|
||||
use lychee::Uri::Website;
|
||||
use url::Url;
|
||||
use std::error::Error;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let response = lychee::check("https://github.com/lycheeverse/lychee").await?;
|
||||
println!("{}", response);
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
This is equivalent to the following snippet, in which we build our own client:
|
||||
|
||||
```rust
|
||||
use lychee::{ClientBuilder, Status};
|
||||
use std::error::Error;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), Box<dyn Error>> {
|
||||
let client = ClientBuilder::default().build()?;
|
||||
let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
let response = client.check(Request::new(Website(url), Input::Stdin)).await;
|
||||
let response = client.check("https://github.com/lycheeverse/lychee").await?;
|
||||
assert!(matches!(response.status, Status::Ok(_)));
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
The client is very customizable, e.g.
|
||||
The client builder is very customizable:
|
||||
|
||||
```rust
|
||||
let client = lychee::ClientBuilder::default()
|
||||
|
|
@ -242,11 +252,12 @@ let client = lychee::ClientBuilder::default()
|
|||
.build()?;
|
||||
```
|
||||
|
||||
All options that you set will be used for all link checks.
|
||||
See the [builder documentation](https://docs.rs/lychee/latest/lychee/struct.ClientBuilder.html) for all options.
|
||||
|
||||
## GitHub Action usage
|
||||
|
||||
GitHub Action is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action)
|
||||
A GitHub Action that uses lychee is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action)
|
||||
which includes usage instructions.
|
||||
|
||||
## Troubleshooting and workarounds
|
||||
|
|
|
|||
|
|
@ -77,7 +77,7 @@ fn show_progress(progress_bar: &Option<ProgressBar>, response: &Response, verbos
|
|||
fn fmt(stats: &ResponseStats, format: &Format) -> Result<String> {
|
||||
Ok(match format {
|
||||
Format::String => stats.to_string(),
|
||||
Format::JSON => serde_json::to_string_pretty(&stats)?,
|
||||
Format::Json => serde_json::to_string_pretty(&stats)?,
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ const MAX_REDIRECTS: usize = 10;
|
|||
#[derive(Debug, Deserialize)]
|
||||
pub enum Format {
|
||||
String,
|
||||
JSON,
|
||||
Json,
|
||||
}
|
||||
|
||||
impl FromStr for Format {
|
||||
|
|
@ -24,7 +24,7 @@ impl FromStr for Format {
|
|||
fn from_str(format: &str) -> Result<Self, Self::Err> {
|
||||
match format {
|
||||
"string" => Ok(Format::String),
|
||||
"json" => Ok(Format::JSON),
|
||||
"json" => Ok(Format::Json),
|
||||
_ => Err(anyhow!("Could not parse format {}", format)),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
use anyhow::{anyhow, Context, Result};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use check_if_email_exists::{check_email, CheckEmailInput};
|
||||
use derive_builder::Builder;
|
||||
use headers::{HeaderMap, HeaderValue};
|
||||
use hubcaps::{Credentials, Github};
|
||||
use regex::{Regex, RegexSet};
|
||||
use reqwest::header;
|
||||
use std::net::IpAddr;
|
||||
use std::{collections::HashSet, time::Duration};
|
||||
use std::{convert::TryInto, net::IpAddr};
|
||||
use tokio::time::sleep;
|
||||
use url::Url;
|
||||
|
||||
|
|
@ -153,30 +153,32 @@ impl ClientBuilder {
|
|||
}
|
||||
|
||||
impl Client {
|
||||
pub async fn check(&self, request: Request) -> Response {
|
||||
pub async fn check<T: TryInto<Request>>(&self, request: T) -> Result<Response> {
|
||||
let request: Request = match request.try_into() {
|
||||
Ok(request) => request,
|
||||
Err(_e) => bail!("Invalid URI:"),
|
||||
};
|
||||
if self.excluded(&request) {
|
||||
return Response::new(request.uri, Status::Excluded, request.source);
|
||||
return Ok(Response::new(request.uri, Status::Excluded, request.source));
|
||||
}
|
||||
let status = match request.uri {
|
||||
Uri::Website(ref url) => self.check_website(&url).await,
|
||||
Uri::Mail(ref address) => {
|
||||
let valid = self.valid_mail(&address).await;
|
||||
if valid {
|
||||
// TODO: We should not be using a HTTP status code for mail
|
||||
Status::Ok(http::StatusCode::OK)
|
||||
} else {
|
||||
Status::Error(format!("Invalid mail address: {}", address))
|
||||
// TODO: We should not be using a HTTP status code for mail
|
||||
match self.valid_mail(&address).await {
|
||||
true => Status::Ok(http::StatusCode::OK),
|
||||
false => Status::Error(format!("Invalid mail address: {}", address)),
|
||||
}
|
||||
}
|
||||
};
|
||||
Response::new(request.uri, status, request.source)
|
||||
Ok(Response::new(request.uri, status, request.source))
|
||||
}
|
||||
|
||||
pub async fn check_website(&self, url: &Url) -> Status {
|
||||
let mut retries: i64 = 3;
|
||||
let mut wait: u64 = 1;
|
||||
let status = loop {
|
||||
let res = self.check_normal(&url).await;
|
||||
let res = self.check_default(&url).await;
|
||||
match res.is_success() {
|
||||
true => return res,
|
||||
false => {
|
||||
|
|
@ -216,7 +218,7 @@ impl Client {
|
|||
}
|
||||
}
|
||||
|
||||
async fn check_normal(&self, url: &Url) -> Status {
|
||||
async fn check_default(&self, url: &Url) -> Status {
|
||||
let request = self
|
||||
.reqwest_client
|
||||
.request(self.method.clone(), url.as_str());
|
||||
|
|
@ -317,6 +319,14 @@ impl Client {
|
|||
}
|
||||
}
|
||||
|
||||
/// A convenience function to check a single URI
|
||||
/// This is the most simple link check and avoids having to create a client manually.
|
||||
/// For more complex scenarios, look into using the `ClientBuilder` instead.
|
||||
pub async fn check<T: TryInto<Request>>(request: T) -> Result<Response> {
|
||||
let client = ClientBuilder::default().build()?;
|
||||
Ok(client.check(request).await?)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::collector::Input;
|
||||
|
|
@ -366,8 +376,9 @@ mod test {
|
|||
let res = ClientBuilder::default()
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url(&mock_server.uri()))
|
||||
.await;
|
||||
.check(mock_server.uri())
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Failed(_)));
|
||||
}
|
||||
|
||||
|
|
@ -385,7 +396,8 @@ mod test {
|
|||
.build()
|
||||
.unwrap()
|
||||
.check(website_url(&mock_server.uri()))
|
||||
.await;
|
||||
.await
|
||||
.unwrap();
|
||||
let end = start.elapsed();
|
||||
|
||||
assert!(matches!(res.status, Status::Failed(_)));
|
||||
|
|
@ -414,6 +426,7 @@ mod test {
|
|||
.unwrap()
|
||||
.check(website_url("https://github.com/lycheeverse/lychee"))
|
||||
.await
|
||||
.unwrap()
|
||||
.status,
|
||||
Status::Ok(_)
|
||||
));
|
||||
|
|
@ -424,8 +437,9 @@ mod test {
|
|||
let res = ClientBuilder::default()
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://github.com/lycheeverse/not-lychee"))
|
||||
.check("https://github.com/lycheeverse/not-lychee")
|
||||
.await
|
||||
.unwrap()
|
||||
.status;
|
||||
assert!(matches!(res, Status::Error(_)));
|
||||
}
|
||||
|
|
@ -444,6 +458,7 @@ mod test {
|
|||
.unwrap()
|
||||
.check(website_url(&mock_server.uri()))
|
||||
.await
|
||||
.unwrap()
|
||||
.status;
|
||||
assert!(matches!(res, Status::Ok(_)));
|
||||
}
|
||||
|
|
@ -453,8 +468,9 @@ mod test {
|
|||
let res = ClientBuilder::default()
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://expired.badssl.com/"))
|
||||
.await;
|
||||
.check("https://expired.badssl.com/")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Error(_)));
|
||||
|
||||
// Same, but ignore certificate error
|
||||
|
|
@ -462,8 +478,9 @@ mod test {
|
|||
.allow_insecure(true)
|
||||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://expired.badssl.com/"))
|
||||
.await;
|
||||
.check("https://expired.badssl.com/")
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Ok(_)));
|
||||
}
|
||||
|
||||
|
|
@ -473,7 +490,8 @@ mod test {
|
|||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://crates.io/crates/lychee"))
|
||||
.await;
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Failed(StatusCode::NOT_FOUND)));
|
||||
|
||||
// Try again, but with a custom header.
|
||||
|
|
@ -486,7 +504,8 @@ mod test {
|
|||
.build()
|
||||
.unwrap()
|
||||
.check(website_url("https://crates.io/crates/lychee"))
|
||||
.await;
|
||||
.await
|
||||
.unwrap();
|
||||
assert!(matches!(res.status, Status::Ok(_)));
|
||||
}
|
||||
|
||||
|
|
@ -511,7 +530,7 @@ mod test {
|
|||
.build()
|
||||
.unwrap();
|
||||
|
||||
let resp = client.check(website_url(&mock_server.uri())).await;
|
||||
let resp = client.check(website_url(&mock_server.uri())).await.unwrap();
|
||||
assert!(matches!(resp.status, Status::Timeout(_)));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,8 +25,10 @@ impl ClientPool {
|
|||
let client = self.pool.get().await;
|
||||
let tx = self.tx.clone();
|
||||
tokio::spawn(async move {
|
||||
let resp = client.check(req).await;
|
||||
tx.send(resp).await.unwrap();
|
||||
let resp = client.check(req).await.expect("Invalid URI");
|
||||
tx.send(resp)
|
||||
.await
|
||||
.expect("Cannot send response to channel");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use url::Url;
|
|||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum FileType {
|
||||
HTML,
|
||||
Html,
|
||||
Markdown,
|
||||
Plaintext,
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ impl<P: AsRef<Path>> From<P> for FileType {
|
|||
match path.extension() {
|
||||
Some(ext) => match ext {
|
||||
_ if ext == "md" => FileType::Markdown,
|
||||
_ if (ext == "htm" || ext == "html") => FileType::HTML,
|
||||
_ if (ext == "htm" || ext == "html") => FileType::Html,
|
||||
_ => FileType::Plaintext,
|
||||
},
|
||||
None => FileType::Plaintext,
|
||||
|
|
@ -147,7 +147,7 @@ pub(crate) fn extract_links(
|
|||
) -> HashSet<Request> {
|
||||
let links = match input_content.file_type {
|
||||
FileType::Markdown => extract_links_from_markdown(&input_content.content),
|
||||
FileType::HTML => extract_links_from_html(&input_content.content),
|
||||
FileType::Html => extract_links_from_html(&input_content.content),
|
||||
FileType::Plaintext => extract_links_from_plaintext(&input_content.content),
|
||||
};
|
||||
|
||||
|
|
@ -234,7 +234,7 @@ mod test {
|
|||
</html>"#;
|
||||
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(input, FileType::HTML),
|
||||
&InputContent::from_string(input, FileType::Html),
|
||||
Some(Url::parse("https://github.com/lycheeverse/").unwrap()),
|
||||
)
|
||||
.into_iter()
|
||||
|
|
@ -305,7 +305,7 @@ mod test {
|
|||
fn test_extract_html5_not_valid_xml() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -328,7 +328,7 @@ mod test {
|
|||
fn test_extract_html5_not_valid_xml_relative_links() {
|
||||
let input = load_fixture("TEST_HTML5.html");
|
||||
let links: HashSet<Uri> = extract_links(
|
||||
&InputContent::from_string(&input, FileType::HTML),
|
||||
&InputContent::from_string(&input, FileType::Html),
|
||||
Some(Url::parse("https://example.com").unwrap()),
|
||||
)
|
||||
.into_iter()
|
||||
|
|
@ -357,7 +357,7 @@ mod test {
|
|||
// this has been problematic with previous XML based parser
|
||||
let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -375,7 +375,7 @@ mod test {
|
|||
// minified HTML with some quirky elements such as href attribute values specified without quotes
|
||||
let input = load_fixture("TEST_HTML5_MINIFIED.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -399,7 +399,7 @@ mod test {
|
|||
// malformed links shouldn't stop the parser from further parsing
|
||||
let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
@ -419,7 +419,7 @@ mod test {
|
|||
// the element name shouldn't matter for attributes like href, src, cite etc
|
||||
let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html");
|
||||
let links: HashSet<Uri> =
|
||||
extract_links(&InputContent::from_string(&input, FileType::HTML), None)
|
||||
extract_links(&InputContent::from_string(&input, FileType::Html), None)
|
||||
.into_iter()
|
||||
.map(|r| r.uri)
|
||||
.collect();
|
||||
|
|
|
|||
25
src/lib.rs
25
src/lib.rs
|
|
@ -2,22 +2,30 @@
|
|||
|
||||
/**
|
||||
* `lychee` is a library for checking links.
|
||||
* The main struct of this crate is `ClientBuilder` which can be used to
|
||||
* configure and run your own link checker.
|
||||
*
|
||||
* "Hello world" example:
|
||||
* ```
|
||||
* use std::error::Error;
|
||||
*
|
||||
* use lychee::{Request, Input, ClientBuilder, Status};
|
||||
* use lychee::Uri::Website;
|
||||
* use url::Url;
|
||||
* #[tokio::main]
|
||||
* async fn main() -> Result<(), Box<dyn Error>> {
|
||||
* let response = lychee::check("https://github.com/lycheeverse/lychee").await?;
|
||||
* println!("{}", response);
|
||||
* Ok(())
|
||||
* }
|
||||
* ```
|
||||
*
|
||||
* For more specific use-cases you can build a lychee client yourself,
|
||||
* using the `ClientBuilder` which can be used to
|
||||
* configure and run your own link checker and grants full flexibility:
|
||||
*
|
||||
* ```
|
||||
* use lychee::{ClientBuilder, Status};
|
||||
* use std::error::Error;
|
||||
*
|
||||
* #[tokio::main]
|
||||
* async fn main() -> Result<(), Box<dyn Error>> {
|
||||
* let client = ClientBuilder::default().build()?;
|
||||
* let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
* let response = client.check(Request::new(Website(url), Input::Stdin)).await;
|
||||
* let response = client.check("https://github.com/lycheeverse/lychee").await?;
|
||||
* assert!(matches!(response.status, Status::Ok(_)));
|
||||
* Ok(())
|
||||
* }
|
||||
|
|
@ -33,6 +41,7 @@ pub mod collector;
|
|||
pub mod extract;
|
||||
pub mod test_utils;
|
||||
|
||||
pub use client::check;
|
||||
pub use client::ClientBuilder;
|
||||
pub use client_pool::ClientPool;
|
||||
pub use collector::Input;
|
||||
|
|
|
|||
26
src/types.rs
26
src/types.rs
|
|
@ -21,18 +21,36 @@ impl Display for Request {
|
|||
}
|
||||
}
|
||||
|
||||
impl TryFrom<String> for Request {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(s: String) -> Result<Self, Self::Error> {
|
||||
let uri = Uri::try_from(s.as_str())?;
|
||||
Ok(Request::new(uri, Input::String(s)))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Request {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(s: &str) -> Result<Self, Self::Error> {
|
||||
let uri = Uri::try_from(s)?;
|
||||
Ok(Request::new(uri, Input::String(s.to_owned())))
|
||||
}
|
||||
}
|
||||
|
||||
/// Specifies how requests to websites will be made
|
||||
pub(crate) enum RequestMethod {
|
||||
GET,
|
||||
HEAD,
|
||||
Get,
|
||||
Head,
|
||||
}
|
||||
|
||||
impl TryFrom<String> for RequestMethod {
|
||||
type Error = anyhow::Error;
|
||||
fn try_from(value: String) -> Result<Self, Self::Error> {
|
||||
match value.to_lowercase().as_ref() {
|
||||
"get" => Ok(RequestMethod::GET),
|
||||
"head" => Ok(RequestMethod::HEAD),
|
||||
"get" => Ok(RequestMethod::Get),
|
||||
"head" => Ok(RequestMethod::Head),
|
||||
_ => Err(anyhow!("Only `get` and `head` allowed, got {}", value)),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue