lychee/lychee-bin/src/parse.rs

122 lines
3.9 KiB
Rust
Raw Normal View History

use anyhow::{anyhow, Context, Result};
use headers::{authorization::Basic, Authorization, HeaderMap, HeaderName};
use lychee_lib::{remap::Remaps, Base};
use std::{collections::HashSet, time::Duration};
/// Split a single HTTP header into a (key, value) tuple
fn read_header(input: &str) -> Result<(String, String)> {
let elements: Vec<_> = input.split('=').collect();
if elements.len() != 2 {
return Err(anyhow!(
"Header value must be of the form key=value, got {}",
input
));
}
Ok((elements[0].into(), elements[1].into()))
}
/// Parse seconds into a `Duration`
pub(crate) const fn parse_duration_secs(secs: usize) -> Duration {
Duration::from_secs(secs as u64)
}
/// Parse HTTP headers into a `HeaderMap`
pub(crate) fn parse_headers<T: AsRef<str>>(headers: &[T]) -> Result<HeaderMap> {
let mut out = HeaderMap::new();
for header in headers {
let (key, val) = read_header(header.as_ref())?;
Improve concurrency with streams (#330) * Move to from vec to streams Previously we collected all inputs in one vector before checking the links, which is not ideal. Especially when reading many inputs (e.g. by using a glob pattern), this could cause issues like running out of file handles. By moving to streams we avoid that scenario. This is also the first step towards improving performance for many inputs. To stay as close to the pre-stream behaviour, we want to stop processing as soon as an Err value appears in the stream. This is easiest when the stream is consumed in the main thread. Previously, the stream was consumed in a tokio task and the main thread waited for responses. Now, a tokio task waits for responses (and displays them/registers response stats) and the main thread sends links to the ClientPool. To ensure that the main thread waits for all responses to have arrived before finishing the ProgressBar and printing the stats, it waits for the show_results_task to finish. * Return collected links as Stream * Initialize ProgressBar without length because we can't know the amount of links without blocking * Handle stream results in main thread, not in task * Add basic directory support using jwalk * Add test for HTTP protocol file type (http://) * Remove deadpool (once again): Replaced with `futures::StreamExt::for_each_concurrent`. * Refactor main; fix tests * Move commands into separate submodule * Simplify input handling * Simplify collector * Remove unnecessary unwrap * Simplify main * cleanup check * clean up dump command * Handle requests in parallel * Fix formatting and lints Co-authored-by: Timo Freiberg <self@timofreiberg.com>
2021-12-01 17:25:11 +00:00
out.insert(HeaderName::from_bytes(key.as_bytes())?, val.parse()?);
}
Ok(out)
}
/// Parse URI remaps
pub(crate) fn parse_remaps(remaps: &[String]) -> Result<Remaps> {
Remaps::try_from(remaps)
.context("Remaps must be of the form '<pattern> <uri>' (separated by whitespace)")
}
/// Parse a HTTP basic auth header into username and password
pub(crate) fn parse_basic_auth(auth: &str) -> Result<Authorization<Basic>> {
let params: Vec<_> = auth.split(':').collect();
if params.len() != 2 {
return Err(anyhow!(
"Basic auth value must be of the form username:password, got {}",
auth
));
}
Ok(Authorization::basic(params[0], params[1]))
}
pub(crate) fn parse_base(src: &str) -> Result<Base, lychee_lib::ErrorKind> {
Base::try_from(src)
}
/// Parse HTTP status codes into a set of `StatusCode`
///
/// Note that this function does not convert the status codes into
/// `StatusCode` but rather into `u16` to avoid the need for
/// `http` as a dependency and to support custom status codes, which are
/// necessary for some websites, which don't adhere to the HTTP spec or IANA.
pub(crate) fn parse_statuscodes(accept: &str) -> Result<HashSet<u16>> {
let mut statuscodes = HashSet::new();
for code in accept.split(',') {
let code: u16 = code.parse::<u16>()?;
statuscodes.insert(code);
}
Ok(statuscodes)
}
#[cfg(test)]
2022-06-03 18:13:07 +00:00
mod tests {
use std::collections::HashSet;
use headers::{HeaderMap, HeaderMapExt};
use regex::Regex;
use reqwest::{header, Url};
use super::*;
#[test]
fn test_parse_custom_headers() {
let mut custom = HeaderMap::new();
custom.insert(header::ACCEPT, "text/html".parse().unwrap());
assert_eq!(parse_headers(&["accept=text/html"]).unwrap(), custom);
}
#[test]
fn test_parse_statuscodes() {
let actual = parse_statuscodes("200,204,301").unwrap();
let expected = IntoIterator::into_iter([200, 204, 301]).collect::<HashSet<_>>();
assert_eq!(actual, expected);
}
#[test]
fn test_parse_basic_auth() {
let mut expected = HeaderMap::new();
expected.insert(
header::AUTHORIZATION,
"Basic YWxhZGluOmFicmV0ZXNlc2Ftbw==".parse().unwrap(),
);
let mut actual = HeaderMap::new();
let auth_header = parse_basic_auth("aladin:abretesesamo").unwrap();
actual.typed_insert(auth_header);
assert_eq!(expected, actual);
}
#[test]
fn test_parse_remap() {
let remaps =
parse_remaps(&["https://example.com http://127.0.0.1:8080".to_string()]).unwrap();
assert_eq!(remaps.len(), 1);
let (pattern, url) = remaps[0].to_owned();
assert_eq!(
pattern.to_string(),
Regex::new("https://example.com").unwrap().to_string()
);
assert_eq!(url, Url::try_from("http://127.0.0.1:8080").unwrap());
}
}