diff --git a/README.md b/README.md index eecb1ec..3814f04 100644 --- a/README.md +++ b/README.md @@ -205,25 +205,35 @@ ARGS: ## Library usage You can use lychee as a library for your own projects. -Simply add it as a dependency and build your client: +Here is a "hello world" example: ```rust -use lychee::{Request, Input, ClientBuilder, Status}; -use lychee::Uri::Website; -use url::Url; +use std::error::Error; + +#[tokio::main] +async fn main() -> Result<(), Box> { + let response = lychee::check("https://github.com/lycheeverse/lychee").await?; + println!("{}", response); + Ok(()) +} +``` + +This is equivalent to the following snippet, in which we build our own client: + +```rust +use lychee::{ClientBuilder, Status}; use std::error::Error; #[tokio::main] async fn main() -> Result<(), Box> { let client = ClientBuilder::default().build()?; - let url = Url::parse("https://github.com/lycheeverse/lychee")?; - let response = client.check(Request::new(Website(url), Input::Stdin)).await; + let response = client.check("https://github.com/lycheeverse/lychee").await?; assert!(matches!(response.status, Status::Ok(_))); Ok(()) } ``` -The client is very customizable, e.g. +The client builder is very customizable: ```rust let client = lychee::ClientBuilder::default() @@ -242,11 +252,12 @@ let client = lychee::ClientBuilder::default() .build()?; ``` +All options that you set will be used for all link checks. See the [builder documentation](https://docs.rs/lychee/latest/lychee/struct.ClientBuilder.html) for all options. ## GitHub Action usage -GitHub Action is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action) +A GitHub Action that uses lychee is available as a separate repository: [lycheeverse/lychee-action](https://github.com/lycheeverse/lychee-action) which includes usage instructions. ## Troubleshooting and workarounds diff --git a/src/bin/lychee/main.rs b/src/bin/lychee/main.rs index 5f8b97b..e6be371 100644 --- a/src/bin/lychee/main.rs +++ b/src/bin/lychee/main.rs @@ -77,7 +77,7 @@ fn show_progress(progress_bar: &Option, response: &Response, verbos fn fmt(stats: &ResponseStats, format: &Format) -> Result { Ok(match format { Format::String => stats.to_string(), - Format::JSON => serde_json::to_string_pretty(&stats)?, + Format::Json => serde_json::to_string_pretty(&stats)?, }) } diff --git a/src/bin/lychee/options.rs b/src/bin/lychee/options.rs index 17cb80f..5227d0e 100644 --- a/src/bin/lychee/options.rs +++ b/src/bin/lychee/options.rs @@ -16,7 +16,7 @@ const MAX_REDIRECTS: usize = 10; #[derive(Debug, Deserialize)] pub enum Format { String, - JSON, + Json, } impl FromStr for Format { @@ -24,7 +24,7 @@ impl FromStr for Format { fn from_str(format: &str) -> Result { match format { "string" => Ok(Format::String), - "json" => Ok(Format::JSON), + "json" => Ok(Format::Json), _ => Err(anyhow!("Could not parse format {}", format)), } } diff --git a/src/client.rs b/src/client.rs index 23a9b36..3530dee 100644 --- a/src/client.rs +++ b/src/client.rs @@ -1,12 +1,12 @@ -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; use check_if_email_exists::{check_email, CheckEmailInput}; use derive_builder::Builder; use headers::{HeaderMap, HeaderValue}; use hubcaps::{Credentials, Github}; use regex::{Regex, RegexSet}; use reqwest::header; -use std::net::IpAddr; use std::{collections::HashSet, time::Duration}; +use std::{convert::TryInto, net::IpAddr}; use tokio::time::sleep; use url::Url; @@ -153,30 +153,32 @@ impl ClientBuilder { } impl Client { - pub async fn check(&self, request: Request) -> Response { + pub async fn check>(&self, request: T) -> Result { + let request: Request = match request.try_into() { + Ok(request) => request, + Err(_e) => bail!("Invalid URI:"), + }; if self.excluded(&request) { - return Response::new(request.uri, Status::Excluded, request.source); + return Ok(Response::new(request.uri, Status::Excluded, request.source)); } let status = match request.uri { Uri::Website(ref url) => self.check_website(&url).await, Uri::Mail(ref address) => { - let valid = self.valid_mail(&address).await; - if valid { - // TODO: We should not be using a HTTP status code for mail - Status::Ok(http::StatusCode::OK) - } else { - Status::Error(format!("Invalid mail address: {}", address)) + // TODO: We should not be using a HTTP status code for mail + match self.valid_mail(&address).await { + true => Status::Ok(http::StatusCode::OK), + false => Status::Error(format!("Invalid mail address: {}", address)), } } }; - Response::new(request.uri, status, request.source) + Ok(Response::new(request.uri, status, request.source)) } pub async fn check_website(&self, url: &Url) -> Status { let mut retries: i64 = 3; let mut wait: u64 = 1; let status = loop { - let res = self.check_normal(&url).await; + let res = self.check_default(&url).await; match res.is_success() { true => return res, false => { @@ -216,7 +218,7 @@ impl Client { } } - async fn check_normal(&self, url: &Url) -> Status { + async fn check_default(&self, url: &Url) -> Status { let request = self .reqwest_client .request(self.method.clone(), url.as_str()); @@ -317,6 +319,14 @@ impl Client { } } +/// A convenience function to check a single URI +/// This is the most simple link check and avoids having to create a client manually. +/// For more complex scenarios, look into using the `ClientBuilder` instead. +pub async fn check>(request: T) -> Result { + let client = ClientBuilder::default().build()?; + Ok(client.check(request).await?) +} + #[cfg(test)] mod test { use crate::collector::Input; @@ -366,8 +376,9 @@ mod test { let res = ClientBuilder::default() .build() .unwrap() - .check(website_url(&mock_server.uri())) - .await; + .check(mock_server.uri()) + .await + .unwrap(); assert!(matches!(res.status, Status::Failed(_))); } @@ -385,7 +396,8 @@ mod test { .build() .unwrap() .check(website_url(&mock_server.uri())) - .await; + .await + .unwrap(); let end = start.elapsed(); assert!(matches!(res.status, Status::Failed(_))); @@ -414,6 +426,7 @@ mod test { .unwrap() .check(website_url("https://github.com/lycheeverse/lychee")) .await + .unwrap() .status, Status::Ok(_) )); @@ -424,8 +437,9 @@ mod test { let res = ClientBuilder::default() .build() .unwrap() - .check(website_url("https://github.com/lycheeverse/not-lychee")) + .check("https://github.com/lycheeverse/not-lychee") .await + .unwrap() .status; assert!(matches!(res, Status::Error(_))); } @@ -444,6 +458,7 @@ mod test { .unwrap() .check(website_url(&mock_server.uri())) .await + .unwrap() .status; assert!(matches!(res, Status::Ok(_))); } @@ -453,8 +468,9 @@ mod test { let res = ClientBuilder::default() .build() .unwrap() - .check(website_url("https://expired.badssl.com/")) - .await; + .check("https://expired.badssl.com/") + .await + .unwrap(); assert!(matches!(res.status, Status::Error(_))); // Same, but ignore certificate error @@ -462,8 +478,9 @@ mod test { .allow_insecure(true) .build() .unwrap() - .check(website_url("https://expired.badssl.com/")) - .await; + .check("https://expired.badssl.com/") + .await + .unwrap(); assert!(matches!(res.status, Status::Ok(_))); } @@ -473,7 +490,8 @@ mod test { .build() .unwrap() .check(website_url("https://crates.io/crates/lychee")) - .await; + .await + .unwrap(); assert!(matches!(res.status, Status::Failed(StatusCode::NOT_FOUND))); // Try again, but with a custom header. @@ -486,7 +504,8 @@ mod test { .build() .unwrap() .check(website_url("https://crates.io/crates/lychee")) - .await; + .await + .unwrap(); assert!(matches!(res.status, Status::Ok(_))); } @@ -511,7 +530,7 @@ mod test { .build() .unwrap(); - let resp = client.check(website_url(&mock_server.uri())).await; + let resp = client.check(website_url(&mock_server.uri())).await.unwrap(); assert!(matches!(resp.status, Status::Timeout(_))); } diff --git a/src/client_pool.rs b/src/client_pool.rs index 223baa4..56e58b3 100644 --- a/src/client_pool.rs +++ b/src/client_pool.rs @@ -25,8 +25,10 @@ impl ClientPool { let client = self.pool.get().await; let tx = self.tx.clone(); tokio::spawn(async move { - let resp = client.check(req).await; - tx.send(resp).await.unwrap(); + let resp = client.check(req).await.expect("Invalid URI"); + tx.send(resp) + .await + .expect("Cannot send response to channel"); }); } } diff --git a/src/extract.rs b/src/extract.rs index caf30da..07fe944 100644 --- a/src/extract.rs +++ b/src/extract.rs @@ -11,7 +11,7 @@ use url::Url; #[derive(Clone, Debug)] pub enum FileType { - HTML, + Html, Markdown, Plaintext, } @@ -29,7 +29,7 @@ impl> From

for FileType { match path.extension() { Some(ext) => match ext { _ if ext == "md" => FileType::Markdown, - _ if (ext == "htm" || ext == "html") => FileType::HTML, + _ if (ext == "htm" || ext == "html") => FileType::Html, _ => FileType::Plaintext, }, None => FileType::Plaintext, @@ -147,7 +147,7 @@ pub(crate) fn extract_links( ) -> HashSet { let links = match input_content.file_type { FileType::Markdown => extract_links_from_markdown(&input_content.content), - FileType::HTML => extract_links_from_html(&input_content.content), + FileType::Html => extract_links_from_html(&input_content.content), FileType::Plaintext => extract_links_from_plaintext(&input_content.content), }; @@ -234,7 +234,7 @@ mod test { "#; let links: HashSet = extract_links( - &InputContent::from_string(input, FileType::HTML), + &InputContent::from_string(input, FileType::Html), Some(Url::parse("https://github.com/lycheeverse/").unwrap()), ) .into_iter() @@ -305,7 +305,7 @@ mod test { fn test_extract_html5_not_valid_xml() { let input = load_fixture("TEST_HTML5.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -328,7 +328,7 @@ mod test { fn test_extract_html5_not_valid_xml_relative_links() { let input = load_fixture("TEST_HTML5.html"); let links: HashSet = extract_links( - &InputContent::from_string(&input, FileType::HTML), + &InputContent::from_string(&input, FileType::Html), Some(Url::parse("https://example.com").unwrap()), ) .into_iter() @@ -357,7 +357,7 @@ mod test { // this has been problematic with previous XML based parser let input = load_fixture("TEST_HTML5_LOWERCASE_DOCTYPE.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -375,7 +375,7 @@ mod test { // minified HTML with some quirky elements such as href attribute values specified without quotes let input = load_fixture("TEST_HTML5_MINIFIED.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -399,7 +399,7 @@ mod test { // malformed links shouldn't stop the parser from further parsing let input = load_fixture("TEST_HTML5_MALFORMED_LINKS.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); @@ -419,7 +419,7 @@ mod test { // the element name shouldn't matter for attributes like href, src, cite etc let input = load_fixture("TEST_HTML5_CUSTOM_ELEMENTS.html"); let links: HashSet = - extract_links(&InputContent::from_string(&input, FileType::HTML), None) + extract_links(&InputContent::from_string(&input, FileType::Html), None) .into_iter() .map(|r| r.uri) .collect(); diff --git a/src/lib.rs b/src/lib.rs index 32079c1..8345d59 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,22 +2,30 @@ /** * `lychee` is a library for checking links. -* The main struct of this crate is `ClientBuilder` which can be used to -* configure and run your own link checker. -* * "Hello world" example: * ``` +* use std::error::Error; * -* use lychee::{Request, Input, ClientBuilder, Status}; -* use lychee::Uri::Website; -* use url::Url; +* #[tokio::main] +* async fn main() -> Result<(), Box> { +* let response = lychee::check("https://github.com/lycheeverse/lychee").await?; +* println!("{}", response); +* Ok(()) +* } +* ``` +* +* For more specific use-cases you can build a lychee client yourself, +* using the `ClientBuilder` which can be used to +* configure and run your own link checker and grants full flexibility: +* +* ``` +* use lychee::{ClientBuilder, Status}; * use std::error::Error; * * #[tokio::main] * async fn main() -> Result<(), Box> { * let client = ClientBuilder::default().build()?; -* let url = Url::parse("https://github.com/lycheeverse/lychee")?; -* let response = client.check(Request::new(Website(url), Input::Stdin)).await; +* let response = client.check("https://github.com/lycheeverse/lychee").await?; * assert!(matches!(response.status, Status::Ok(_))); * Ok(()) * } @@ -33,6 +41,7 @@ pub mod collector; pub mod extract; pub mod test_utils; +pub use client::check; pub use client::ClientBuilder; pub use client_pool::ClientPool; pub use collector::Input; diff --git a/src/types.rs b/src/types.rs index d2040e6..9d75157 100644 --- a/src/types.rs +++ b/src/types.rs @@ -21,18 +21,36 @@ impl Display for Request { } } +impl TryFrom for Request { + type Error = anyhow::Error; + + fn try_from(s: String) -> Result { + let uri = Uri::try_from(s.as_str())?; + Ok(Request::new(uri, Input::String(s))) + } +} + +impl TryFrom<&str> for Request { + type Error = anyhow::Error; + + fn try_from(s: &str) -> Result { + let uri = Uri::try_from(s)?; + Ok(Request::new(uri, Input::String(s.to_owned()))) + } +} + /// Specifies how requests to websites will be made pub(crate) enum RequestMethod { - GET, - HEAD, + Get, + Head, } impl TryFrom for RequestMethod { type Error = anyhow::Error; fn try_from(value: String) -> Result { match value.to_lowercase().as_ref() { - "get" => Ok(RequestMethod::GET), - "head" => Ok(RequestMethod::HEAD), + "get" => Ok(RequestMethod::Get), + "head" => Ok(RequestMethod::Head), _ => Err(anyhow!("Only `get` and `head` allowed, got {}", value)), } }