chore!: improve client and remap modules (#913)

`lychee_lib::client`:

- Improved documentation.
- Added an log message in `ClientBuilder::client()` when provied user-agent
  overrides the one defined in provied custom header.
- Removed unnecessary error handling in `Client::check()` when setting HTTPS
  scheme because all failure cases should occur when checking this URL the first
  time already.
- Removed unnecessary error handling in `Client::remap()` since
  `lychee-lib::remap::Remaps::remap()` doesn't returns a `Result` anymore.
- Fixed potential integer overflow in `Client::check_website()` when the wait
  time between retries doubles, by using `std::time::Duration::saturating_mul`
  instead.
- Renamed `invalid()` to `validate_url()`.

`lychee_lib::remap`:

- Improved documentation, in particular, clarified (in the comment) that it's
  URLs not URIs being remapped.
- Changed `Remaps::remap()` so it takes `&mut Url` instead of `Uri` as its
  argument, and doesn't return a `Result` as a result.
    - Using `Url` instead of `Uri` because it aligns with the concept of
      remapping locations rather than identifiers.
    - Mutating the URL directly instead of returning a new one for it's more
      straightforward.
    - There is no error handling because we don't convert from URL to URI
      anymore. Furthermore, this always succeed in the first place so we never
      needed error handling.
- Added implementation of `IntoIterator` for `&'a Remaps` and convenience method
  of `Remaps::iter`. (Their mutable or moving counterparts are deliberately
  avoided because we don't want library users to modify all consume the
  remapping rules after its instantiation.)

`lychee_lib::error`:

- Renamed `ErrorKind::InvalidUriRemap` to `InvalidUrlRemap` and improved
  its error message.

Changes to other modules are minor and only serves to accompany aforementioned
changes.
This commit is contained in:
Lucius Hu 2023-01-16 13:14:09 -05:00 committed by GitHub
parent f88c21fc5a
commit e2406089ad
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 241 additions and 155 deletions

View file

@ -48,7 +48,7 @@ where
let mut request = request?;
// Apply URI remappings (if any)
request.uri = params.client.remap(request.uri)?;
params.client.remap(&mut request.uri);
// Avoid panic on broken pipe.
// See https://github.com/rust-lang/rust/issues/46016

View file

@ -39,7 +39,7 @@ use crate::{
pub const DEFAULT_MAX_REDIRECTS: usize = 5;
/// Default number of retries before a request is deemed as failed, 3.
pub const DEFAULT_MAX_RETRIES: u64 = 3;
/// Default wait time in seconds between requests, 1.
/// Default wait time in seconds between retries, 1.
pub const DEFAULT_RETRY_WAIT_TIME_SECS: usize = 1;
/// Default timeout in seconds before a request is deemed as failed, 20.
pub const DEFAULT_TIMEOUT_SECS: usize = 20;
@ -47,10 +47,12 @@ pub const DEFAULT_TIMEOUT_SECS: usize = 20;
pub const DEFAULT_USER_AGENT: &str = concat!("lychee/", env!("CARGO_PKG_VERSION"));
// Constants currently not configurable by the user.
/// A timeout for only the connect phase of a Client.
/// A timeout for only the connect phase of a [`Client`].
const CONNECT_TIMEOUT: u64 = 10;
/// TCP keepalive
/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more info
/// TCP keepalive.
///
/// See <https://tldp.org/HOWTO/TCP-Keepalive-HOWTO/overview.html> for more
/// infomation.
const TCP_KEEPALIVE: u64 = 60;
/// Builder for [`Client`].
@ -61,7 +63,8 @@ const TCP_KEEPALIVE: u64 = 60;
#[builder(builder_method_doc = "
Create a builder for building `ClientBuilder`.
On the builder call, call methods with same name as its fields to set their values.
On the builder call, call methods with the same names as its fields to set their
values.
Finally, call `.build()` to create the instance of `ClientBuilder`.
")]
@ -70,28 +73,32 @@ pub struct ClientBuilder {
///
/// This allows much more request before getting rate-limited.
///
/// ## Rate-limiting Defaults
/// # Rate-limiting Defaults
///
/// As of Feb 2022, it's 60 per hour without GitHub token v.s.
/// 5000 per hour with token.
github_token: Option<SecretString>,
/// Remap URIs matching a pattern to a different URI
/// Remap URIs matching a pattern to a different URI.
///
/// This makes it possible to remap any HTTP/HTTPS endpoint to a different
/// HTTP/HTTPS endpoint. This feature could also be used to proxy
/// HTTP/HTTPS one. This feature could also be used to proxy
/// certain requests.
///
/// # Usage Notes
///
/// Use with caution because a large set of remapping rules may cause
/// performance issues. Furthermore rules are executed in order and multiple
/// mappings for the same URI are allowed, so there is no guarantee that the
/// rules may not conflict with each other.
/// performance issues.
///
/// Furthermore rules are executed sequentially and multiple mappings for
/// the same URI are allowed, so it is up to the library user's discretion to
/// make sure rules don't conflict with each other.
remaps: Option<Remaps>,
/// Links matching this set of regular expressions are **always** checked.
///
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
/// has lower precedence over any other `exclude_` fields or
/// has lower precedence compared to any other `exclude_` fields or
/// [`ClientBuilder::schemes`] below.
includes: Option<RegexSet>,
@ -109,7 +116,7 @@ pub struct ClientBuilder {
/// When `true`, exclude private IP addresses.
///
/// ## IPv4
/// # IPv4
///
/// The private address ranges are defined in [IETF RFC 1918] and include:
///
@ -117,16 +124,17 @@ pub struct ClientBuilder {
/// - `172.16.0.0/12`
/// - `192.168.0.0/16`
///
/// ## IPv6
/// # IPv6
///
/// The address is a unique local address (`fc00::/7`).
///
/// This property is defined in [IETF RFC 4193].
///
/// ## Note
/// # Note
///
/// Unicast site-local network was defined in [IETF RFC 4291], but was fully deprecated in
/// [IETF RFC 3879]. So it is **NOT** considered as private on this purpose.
/// Unicast site-local network was defined in [IETF RFC 4291], but was fully
/// deprecated in [IETF RFC 3879]. So it is **NOT** considered as private on
/// this purpose.
///
/// [IETF RFC 1918]: https://tools.ietf.org/html/rfc1918
/// [IETF RFC 4193]: https://tools.ietf.org/html/rfc4193
@ -136,17 +144,19 @@ pub struct ClientBuilder {
/// When `true`, exclude link-local IPs.
///
/// ## IPv4
/// # IPv4
///
/// The address is `169.254.0.0/16`.
///
/// This property is defined by [IETF RFC 3927].
///
/// ## IPv6
/// # IPv6
///
/// The address is a unicast address with link-local scope, as defined in [RFC 4291].
/// The address is a unicast address with link-local scope, as defined in
/// [RFC 4291].
///
/// A unicast address has link-local scope if it has the prefix `fe80::/10`, as per [RFC 4291 section 2.4].
/// A unicast address has link-local scope if it has the prefix `fe80::/10`,
/// as per [RFC 4291 section 2.4].
///
/// [IETF RFC 3927]: https://tools.ietf.org/html/rfc3927
/// [RFC 4291]: https://tools.ietf.org/html/rfc4291
@ -155,15 +165,16 @@ pub struct ClientBuilder {
/// When `true`, exclude loopback IP addresses.
///
/// ## IPv4
/// # IPv4
///
/// This is a loopback address (`127.0.0.0/8`).
///
/// This property is defined by [IETF RFC 1122].
///
/// ## IPv6
/// # IPv6
///
/// This is the loopback address (`::1`), as defined in [IETF RFC 4291 section 2.5.3].
/// This is the loopback address (`::1`), as defined in
/// [IETF RFC 4291 section 2.5.3].
///
/// [IETF RFC 1122]: https://tools.ietf.org/html/rfc1122
/// [IETF RFC 4291 section 2.5.3]: https://tools.ietf.org/html/rfc4291#section-2.5.3
@ -173,16 +184,24 @@ pub struct ClientBuilder {
exclude_mail: bool,
/// Maximum number of redirects per request before returning an error.
///
/// Defaults to [`DEFAULT_MAX_REDIRECTS`].
#[builder(default = DEFAULT_MAX_REDIRECTS)]
max_redirects: usize,
/// Maximum number of retries per request before returning an error.
///
/// Defaults to [`DEFAULT_MAX_RETRIES`].
#[builder(default = DEFAULT_MAX_RETRIES)]
max_retries: u64,
/// User-agent used for checking links.
///
/// *NOTE*: This may be helpful for bypassing certain firewalls.
/// Defaults to [`DEFAULT_USER_AGENT`].
///
/// # Notes
///
/// This may be helpful for bypassing certain firewalls.
// Faking the user agent is necessary for some websites, unfortunately.
// Otherwise we get a 403 from the firewall (e.g. Sucuri/Cloudproxy on ldra.com).
#[builder(default_code = "String::from(DEFAULT_USER_AGENT)")]
@ -190,22 +209,25 @@ pub struct ClientBuilder {
/// When `true`, accept invalid SSL certificates.
///
/// ## Warning
/// # Warning
///
/// You should think very carefully before using this method. If
/// invalid certificates are trusted, any certificate for any site
/// will be trusted for use. This includes expired certificates. This
/// introduces significant vulnerabilities, and should only be used
/// as a last resort.
/// You should think very carefully before allowing invalid SSL
/// certificates. It will accept any certificate for any site to be trusted
/// including expired certificates. This introduces significant
/// vulnerabilities, and should only be used as a last resort.
// TODO: We should add a warning message in CLI. (Lucius, Jan 2023)
allow_insecure: bool,
/// When non-empty, only links with matched URI schemes are checked.
/// Otherwise, this has no effect.
/// Set of accepted URL schemes.
///
/// Only links with matched URI schemes are checked. This has no effect when
/// it's empty.
schemes: HashSet<String>,
/// Sets the default [headers] for every request. See also [here].
/// Default [headers] for every request.
///
/// This allows working around validation issues on some websites.
/// This allows working around validation issues on some websites. See also
/// [here] for usage examples.
///
/// [headers]: https://docs.rs/http/latest/http/header/struct.HeaderName.html
/// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#method.default_headers
@ -220,17 +242,29 @@ pub struct ClientBuilder {
/// Unmatched return codes/ status codes are deemed as errors.
accepted: Option<HashSet<StatusCode>>,
/// Response timeout per request.
/// Response timeout per request in seconds.
timeout: Option<Duration>,
/// Initial time between retries of failed requests
/// Initial time between retries of failed requests.
///
/// The wait time will increase using an exponential backoff mechanism
retry_wait_time: Option<Duration>,
/// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
///
/// # Notes
///
/// For each request, the wait time increases using an exponential backoff
/// mechanism. For example, if the value is 1 second, then it waits for
/// 2 ^ (N-1) seconds before the N-th retry.
///
/// This prevents spending too much system resources on slow responders and
/// prioritizes other requests.
#[builder(default_code = "Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64)")]
retry_wait_time: Duration,
/// Requires using HTTPS when it's available.
/// When `true`, requires using HTTPS when it's available.
///
/// This would treat unencrypted links as errors when HTTPS is avaliable.
/// It has no effect on non-HTTP schemes or if the URL doesn't support
/// HTTPS.
require_https: bool,
}
@ -248,25 +282,31 @@ impl ClientBuilder {
/// # Errors
///
/// Returns an `Err` if:
/// - The user-agent is invalid.
/// - The request client cannot be created.
/// See [here](https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors).
/// - The Github client cannot be created.
/// - The user-agent contains characters other than ASCII 32-127.
/// - The reqwest client cannot be instantiated. This occurs if a TLS
/// backend cannot be initialized or the resolver fails to load the system
/// configuration. See [here].
/// - The Github client cannot be created. Since the implementation also
/// uses reqwest under the hood, this errors in the same circumstances as
/// the last one.
///
/// [here]: https://docs.rs/reqwest/latest/reqwest/struct.ClientBuilder.html#errors
pub fn client(self) -> Result<Client> {
let Self {
github_token,
remaps,
includes,
excludes,
user_agent,
schemes,
custom_headers: mut headers,
method,
accepted,
..
} = self;
headers.insert(header::USER_AGENT, HeaderValue::from_str(&user_agent)?);
if let Some(prev_user_agent) =
headers.insert(header::USER_AGENT, HeaderValue::try_from(&user_agent)?)
{
// TODO: make this configurable according to verbosity (Lucius, Jan 2023)
println!(
"Found user-agent in headers: {}. Overriding it with {user_agent}.",
prev_user_agent.to_str().unwrap_or("<EFBFBD>"),
);
};
headers.insert(
header::TRANSFER_ENCODING,
@ -288,20 +328,22 @@ impl ClientBuilder {
.build()
.map_err(ErrorKind::NetworkRequest)?;
let github_client = match github_token.as_ref().map(ExposeSecret::expose_secret) {
let github_client = match self.github_token.as_ref().map(ExposeSecret::expose_secret) {
Some(token) if !token.is_empty() => Some(
Octocrab::builder()
.personal_token(token.clone())
.build()
// this is essentially the same reqwest::ClientBuilder::build error
// see https://docs.rs/octocrab/0.18.1/src/octocrab/lib.rs.html#360-364
.map_err(ErrorKind::BuildGithubClient)?,
),
_ => None,
};
let filter = Filter {
includes: includes.map(|regex| Includes { regex }),
excludes: excludes.map(|regex| Excludes { regex }),
schemes,
includes: self.includes.map(|regex| Includes { regex }),
excludes: self.excludes.map(|regex| Excludes { regex }),
schemes: self.schemes,
// exclude_all_private option turns on all "private" excludes,
// including private IPs, link-local IPs and loopback IPs
exclude_private_ips: self.exclude_all_private || self.exclude_private_ips,
@ -310,21 +352,17 @@ impl ClientBuilder {
exclude_mail: self.exclude_mail,
};
let retry_wait_time = self
.retry_wait_time
.unwrap_or_else(|| Duration::from_secs(DEFAULT_RETRY_WAIT_TIME_SECS as u64));
let quirks = Quirks::default();
Ok(Client {
reqwest_client,
github_client,
remaps,
remaps: self.remaps,
filter,
max_retries: self.max_retries,
retry_wait_time,
method,
accepted,
retry_wait_time: self.retry_wait_time,
method: self.method,
accepted: self.accepted,
require_https: self.require_https,
quirks,
})
@ -333,16 +371,17 @@ impl ClientBuilder {
/// Handles incoming requests and returns responses.
///
/// See [`ClientBuilder`] which contains sane defaults for all configuration options.
/// See [`ClientBuilder`] which contains sane defaults for all configuration
/// options.
#[derive(Debug, Clone)]
pub struct Client {
/// Underlying `reqwest` client instance that handles the HTTP requests.
reqwest_client: reqwest::Client,
/// Github client.
/// Optional GitHub client that handles communications with GitHub.
github_client: Option<Octocrab>,
/// Optional remapping rules for URIs matching pattern
/// Optional remapping rules for URIs matching pattern.
remaps: Option<Remaps>,
/// Rules to decided whether each link would be checked or ignored.
@ -351,7 +390,8 @@ pub struct Client {
/// Maximum number of retries per request before returning an error.
max_retries: u64,
/// Initial time between retries of failed requests
/// Initial wait time between retries of failed requests. This doubles after
/// each failure.
retry_wait_time: Duration,
/// HTTP method used for requests, e.g. `GET` or `HEAD`.
@ -374,38 +414,48 @@ pub struct Client {
}
impl Client {
/// Check a single request
/// Check a single request.
///
/// `request` can be either a [`Request`] or a type that can be converted
/// into it. In any case, it must represent a valid URI.
///
/// # Errors
///
/// This returns an `Err` if
/// - `request` is invalid.
/// - The URI of the request is invalid.
/// - Encrypted connection for a HTTP URL is available but unused.
/// (Only checked when `Client::require_https` is `true`.)
/// Returns an `Err` if:
/// - `request` does not represent a valid URI.
/// - Encrypted connection for a HTTP URL is available but unused. (Only
/// checked when `Client::require_https` is `true`.)
#[allow(clippy::missing_panics_doc)]
pub async fn check<T, E>(&self, request: T) -> Result<Response>
where
Request: TryFrom<T, Error = E>,
ErrorKind: From<E>,
{
let Request { uri, source, .. } = request.try_into()?;
let Request {
ref mut uri,
source,
..
} = request.try_into()?;
let uri = self.remap(uri)?;
self.remap(uri);
// TODO: Allow filtering based on element and attribute
let status = if self.filter.is_excluded(&uri) {
let status = if self.is_excluded(uri) {
Status::Excluded
} else if uri.is_file() {
self.check_file(&uri)
self.check_file(uri)
} else if uri.is_mail() {
self.check_mail(&uri).await
self.check_mail(uri).await
} else {
match self.check_website(&uri).await {
match self.check_website(uri).await {
Status::Ok(code) if self.require_https && uri.scheme() == "http" => {
let mut https_uri = uri.clone();
https_uri
.set_scheme("https")
.map_err(|_| ErrorKind::InvalidURI(uri.clone()))?;
{
// here `uri` must be valid, otherwise `check_website` won't
// return `Ok`, thus `set_scheme` won't fail
debug_assert!(!https_uri.url.cannot_be_a_base());
https_uri.set_scheme("https").unwrap();
}
if self.check_website(&https_uri).await.is_success() {
Status::Error(ErrorKind::InsecureURL(https_uri))
} else {
@ -419,15 +469,11 @@ impl Client {
Ok(Response::new(uri.clone(), status, source))
}
/// Remap URI using the client-defined remap patterns
///
/// # Errors
///
/// Returns an error if the remapping value is not a URI
pub fn remap(&self, uri: Uri) -> Result<Uri> {
match self.remaps {
Some(ref remaps) => remaps.remap(uri),
None => Ok(uri),
/// Remap `uri` using the client-defined remapping rules.
pub fn remap(&self, uri: &mut Uri) {
// TODO: this should be logged (Lucius, Jan 2023)
if let Some(ref remaps) = self.remaps {
remaps.remap(&mut uri.url);
}
}
@ -442,7 +488,7 @@ impl Client {
/// Unsupported schemes will be ignored
pub async fn check_website(&self, uri: &Uri) -> Status {
// Workaround for upstream reqwest panic
if invalid(&uri.url) {
if validate_url(&uri.url) {
if matches!(uri.scheme(), "http" | "https") {
// This is a truly invalid URI with a known scheme.
// If we pass that to reqwest it would panic.
@ -456,21 +502,22 @@ impl Client {
}
let mut retries: u64 = 0;
let mut wait = self.retry_wait_time;
let mut wait_time = self.retry_wait_time;
let mut status = self.check_default(uri).await;
while retries < self.max_retries {
if status.is_success() {
return status;
}
sleep(wait).await;
sleep(wait_time).await;
retries += 1;
wait *= 2;
wait_time = wait_time.saturating_mul(2);
status = self.check_default(uri).await;
}
// Pull out the heavy machinery in case of a failed normal request.
// This could be a GitHub URL and we ran into the rate limiter.
// TODO: We should first try to parse the URI as GitHub URI first (Lucius, Jan 2023)
if let Ok(github_uri) = GithubUri::try_from(uri) {
let status = self.check_github(github_uri).await;
// Only return Github status in case of success
@ -494,10 +541,7 @@ impl Client {
/// A better approach would be to download the file through the API or
/// clone the repo, but we chose the pragmatic approach.
async fn check_github(&self, uri: GithubUri) -> Status {
let client = match &self.github_client {
Some(client) => client,
None => return ErrorKind::MissingGitHubToken.into(),
};
let Some(client) = &self.github_client else { return ErrorKind::MissingGitHubToken.into() };
let repo = match client.repos(&uri.owner, &uri.repo).get().await {
Ok(repo) => repo,
Err(e) => return ErrorKind::GithubRequest(e).into(),
@ -572,19 +616,21 @@ impl Client {
// got merged.
// It is exactly the same check that reqwest runs internally, but unfortunately
// it `unwrap`s (and panics!) instead of returning an error, which we could handle.
fn invalid(url: &Url) -> bool {
url.as_str().parse::<http::Uri>().is_err()
fn validate_url(url: &Url) -> bool {
http::Uri::try_from(url.as_str()).is_err()
}
/// A convenience function to check a single URI.
///
/// This provides the simplest link check utility without having to create a [`Client`].
/// For more complex scenarios, see documentation of [`ClientBuilder`] instead.
/// This provides the simplest link check utility without having to create a
/// [`Client`]. For more complex scenarios, see documentation of
/// [`ClientBuilder`] instead.
///
/// # Errors
///
/// Returns an `Err` if:
/// - The request client cannot be built (see [`ClientBuilder::client`] for failure cases).
/// - The request client cannot be built (see [`ClientBuilder::client`] for
/// failure cases).
/// - The request cannot be checked (see [`Client::check`] for failure cases).
pub async fn check<T, E>(request: T) -> Result<Response>
where
@ -633,9 +679,9 @@ mod tests {
assert!(res.status().is_failure());
// on slow connections, this might take a bit longer than nominal backed-off timeout (7 secs)
assert!(end.as_secs() >= 7);
assert!(end.as_secs() <= 8);
// on slow connections, this might take a bit longer than nominal
// backed-off timeout (7 secs)
assert!((7..=8).contains(&end.as_secs()));
}
#[tokio::test]

View file

@ -60,8 +60,6 @@ mod types;
/// Functionality to extract URIs from inputs
pub mod extract;
/// Remapping rules which allow to map URIs matching a pattern to a different
/// URI. Use in moderation as there are no safety- or performance guarantees.
pub mod remap;
/// Filters are a way to define behavior when encountering

View file

@ -1,20 +1,39 @@
//! Remapping rules which allow to map URLs matching a pattern to a different
//! URL.
//!
//! # Notes
//! Use in moderation as there are no sanity or performance guarantees.
//!
//! - There is no constraint on remapping rules upon instantiation or during
//! remapping. In particular, rules are checked sequentially so later rules
//! might contradict with earlier ones if they both match a URL.
//! - A large rule set has a performance impact because the client needs to
//! match every link against all rules.
// Notes on terminology:
// The major difference between URI (Uniform Resource Identifier) and
// URL (Uniform Resource Locator) is that the former is an indentifier for
// resources and the latter is a locator.
// We are not interested in differentiating resources by names and the purpose of
// remapping is to provide an alternative **location** in certain
// circumanstances. Thus the documentation should be about remapping URLs
// (locations), not remapping URIs (identities).
use std::ops::Index;
use crate::{ErrorKind, Result};
use regex::Regex;
use reqwest::Url;
use crate::Uri;
use crate::ErrorKind;
/// Remaps allow mapping from a URI pattern to a specified URI
/// Rules that remap matching URL patterns.
///
/// Some use-cases are
/// - Testing URIs prior to production deployment
/// - Testing URIs behind a proxy
/// Some use-cases are:
/// - Testing URLs prior to production deployment.
/// - Testing URLs behind a proxy.
///
/// Be careful when using this feature because checking every link against a
/// large set of regular expressions has a performance impact. Also there are no
/// constraints on the URI mapping, so the rules might contradict each other.
/// # Notes
/// See module level documentation of usage notes.
#[derive(Debug, Clone)]
pub struct Remaps(Vec<(Regex, Url)>);
@ -25,28 +44,28 @@ impl Remaps {
Self(patterns)
}
/// Remap URI using the client-defined remap patterns
///
/// # Errors
///
/// Returns an error if the remapping value is not a valid URI
pub fn remap(&self, uri: Uri) -> Result<Uri> {
let mut uri = uri;
for (pattern, new_uri) in &self.0 {
if pattern.is_match(uri.as_str()) {
uri = Uri::try_from(new_uri.clone())?;
}
}
Ok(uri)
/// Returns an iterator over the rules.
// `iter_mut` is deliberately avoided.
pub fn iter(&self) -> std::slice::Iter<(Regex, Url)> {
self.0.iter()
}
/// Returns `true` if there are no remappings defined.
/// Remap URL against remapping rules.
pub fn remap(&self, url: &mut Url) {
for (pattern, new_url) in self {
if pattern.is_match(url.as_str()) {
*url = new_url.clone();
}
}
}
/// Returns `true` if there is no remapping rule defined.
#[must_use]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Get the number of defined remap rules
/// Get the number of remapping rules.
#[must_use]
pub fn len(&self) -> usize {
self.0.len()
@ -64,13 +83,24 @@ impl Index<usize> for Remaps {
impl TryFrom<&[String]> for Remaps {
type Error = ErrorKind;
/// Try to convert a slice of `String`s to remapping rules.
///
/// Each string should contain a Regex pattern and a URL, separated by
/// whitespaces.
///
/// # Errors
///
/// Returns an `Err` if:
/// - Any string in the slice is not of the form `REGEX URL`.
/// - REGEX is not a valid regular expression.
/// - URL is not a valid URL.
fn try_from(remaps: &[String]) -> std::result::Result<Self, Self::Error> {
let mut parsed = Vec::new();
for remap in remaps {
let params: Vec<_> = remap.split_whitespace().collect();
if params.len() != 2 {
return Err(ErrorKind::InvalidUriRemap(remap.to_string()));
return Err(ErrorKind::InvalidUrlRemap(remap.to_string()));
}
let pattern = Regex::new(params[0])?;
@ -83,6 +113,18 @@ impl TryFrom<&[String]> for Remaps {
}
}
// Implementation for mutable iterator and moving iterator are deliberately
// avoided
impl<'a> IntoIterator for &'a Remaps {
type Item = &'a (Regex, Url);
type IntoIter = std::slice::Iter<'a, (Regex, Url)>;
fn into_iter(self) -> Self::IntoIter {
self.0.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -90,37 +132,37 @@ mod tests {
#[test]
fn test_remap() {
let pattern = Regex::new("https://example.com").unwrap();
let uri = Uri::try_from("http://127.0.0.1:8080").unwrap();
let remaps = Remaps::new(vec![(pattern, uri.clone().url)]);
let new_url = Url::try_from("http://127.0.0.1:8080").unwrap();
let remaps = Remaps::new(vec![(pattern, new_url.clone())]);
let input = Uri::try_from("https://example.com").unwrap();
let remapped = remaps.remap(input).unwrap();
let mut input = Url::try_from("https://example.com").unwrap();
remaps.remap(&mut input);
assert_eq!(remapped, uri);
assert_eq!(input, new_url);
}
#[test]
fn test_remap_path() {
let pattern = Regex::new("../../issues").unwrap();
let uri = Uri::try_from("https://example.com").unwrap();
let remaps = Remaps::new(vec![(pattern, uri.clone().url)]);
let new_url = Url::try_from("https://example.com").unwrap();
let remaps = Remaps::new(vec![(pattern, new_url.clone())]);
let input = Uri::try_from("file://../../issues").unwrap();
let remapped = remaps.remap(input).unwrap();
let mut input = Url::try_from("file://../../issues").unwrap();
remaps.remap(&mut input);
assert_eq!(remapped, uri);
assert_eq!(input, new_url);
}
#[test]
fn test_remap_skip() {
let pattern = Regex::new("https://example.com").unwrap();
let uri = Uri::try_from("http://127.0.0.1:8080").unwrap();
let remaps = Remaps::new(vec![(pattern, uri.url)]);
let new_url = Url::try_from("http://127.0.0.1:8080").unwrap();
let remaps = Remaps::new(vec![(pattern, new_url)]);
let input = Uri::try_from("https://unrelated.example.com").unwrap();
let remapped = remaps.remap(input.clone()).unwrap();
let mut input = Url::try_from("https://unrelated.example.com").unwrap();
remaps.remap(&mut input);
// URI was not modified
assert_eq!(remapped, input);
// URL was not modified
assert_eq!(input, input);
}
}

View file

@ -67,8 +67,8 @@ pub enum ErrorKind {
#[error("Error with base dir `{0}` : {1}")]
InvalidBase(String, String),
/// The given input can not be parsed into a valid URI remapping
#[error("Error handling URI remap expression. Cannot parse into URI remapping: `{0}`")]
InvalidUriRemap(String),
#[error("Cannot parse into URI remapping, must be a Regex pattern and a URL separated by whitespaces: `{0}`")]
InvalidUrlRemap(String),
/// The given path does not resolve to a valid file
#[error("Cannot find local file {0}")]
FileNotFound(PathBuf),
@ -201,7 +201,7 @@ impl Hash for ErrorKind {
Self::UnreachableEmailAddress(u, ..) => u.hash(state),
Self::InsecureURL(u, ..) => u.hash(state),
Self::InvalidBase(base, e) => (base, e).hash(state),
Self::InvalidUriRemap(remap) => (remap).hash(state),
Self::InvalidUrlRemap(remap) => (remap).hash(state),
Self::InvalidHeader(e) => e.to_string().hash(state),
Self::InvalidGlobPattern(e) => e.to_string().hash(state),
Self::InvalidStatusCode(c) => c.hash(state),