Add support for relative links (#1489)

This commit introduces several improvements to the file checking process and URI handling: - Extract file checking logic into separate `Checker` structs (`FileChecker`, `WebsiteChecker`, `MailChecker`) - Improve handling of relative and absolute file paths - Enhance URI parsing and creation from file paths - Refactor `create_request` function for better clarity and error handling These changes provide better support for resolving relative links, handling different base URLs, and working with file paths. Fixes https://github.com/lycheeverse/lychee/issues/1296 and https://github.com/lycheeverse/lychee/issues/1480
2026-05-08 13:54:44 +00:00 · 2024-10-26 04:07:37 +02:00 · 2024-10-26 04:07:37 +02:00 · 3094bbca33
commit 3094bbca33
parent 87d5b56e4f
21 changed files with 1133 additions and 522 deletions
--- a/fixtures/resolve_paths/about/index.html
+++ b/fixtures/resolve_paths/about/index.html
@ -0,0 +1,8 @@
+<html>
+  <head>
+    <title>About</title>
+  </head>
+  <body>
+    <h1 id="fragment">About</h1>
+  </body>
+</html>
--- a/fixtures/resolve_paths/another
+++ b/fixtures/resolve_paths/another
--- a/fixtures/resolve_paths/index.html
+++ b/fixtures/resolve_paths/index.html
@ -0,0 +1,21 @@
+<html>
+    <head>
+        <title>Index</title>
+    </head>
+    <body>
+        <h1>Index Title</h1>
+        <p>
+            <ul>
+                <li>
+                    <a href="/">home</a>
+                </li>
+                <li>
+                    <a href="/about">About</a>
+                </li>
+                <li>
+                    <a href="/another page">About</a>
+                </li>
+            </ul>
+        </p>
+    </body>
+</html>
--- a/lychee-bin/src/client.rs
+++ b/lychee-bin/src/client.rs
@ -55,6 +55,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -

    ClientBuilder::builder()
        .remaps(remaps)
+        .base(cfg.base.clone())
        .includes(includes)
        .excludes(excludes)
        .exclude_all_private(cfg.exclude_all_private)
--- a/lychee-bin/src/commands/check.rs
+++ b/lychee-bin/src/commands/check.rs
@ -429,14 +429,12 @@ mod tests {

    #[tokio::test]
    async fn test_invalid_url() {
-        // Run a normal request with an invalid Url
        let client = ClientBuilder::builder().build().client().unwrap();
-        let request = Request::try_from("http://\"").unwrap();
-        let response = check_url(&client, request).await;
-        assert!(response.status().is_error());
+        let uri = Uri::try_from("http://\"").unwrap();
+        let response = client.check_website(&uri, None).await.unwrap();
        assert!(matches!(
-            response.status(),
-            Status::Error(ErrorKind::InvalidURI(_))
+            response,
+            Status::Unsupported(ErrorKind::BuildRequestClient(_))
        ));
    }

--- a/lychee-bin/src/formatters/response/color.rs
+++ b/lychee-bin/src/formatters/response/color.rs
@ -65,12 +65,18 @@ mod tests {
        }
    }

+    #[cfg(test)]
+    /// Helper function to strip ANSI color codes for tests
+    fn strip_ansi_codes(s: &str) -> String {
+        console::strip_ansi_codes(s).to_string()
+    }
+
    #[test]
    fn test_format_response_with_ok_status() {
        let formatter = ColorFormatter;
        let body = mock_response_body(Status::Ok(StatusCode::OK), "https://example.com");
        assert_eq!(
-            formatter.format_response(&body),
+            strip_ansi_codes(&formatter.format_response(&body)),
            "     [200] https://example.com/"
        );
    }
@ -83,7 +89,7 @@ mod tests {
            "https://example.com/404",
        );
        assert_eq!(
-            formatter.format_response(&body),
+            strip_ansi_codes(&formatter.format_response(&body)),
            "   [ERROR] https://example.com/404"
        );
    }
--- a/lychee-bin/tests/cli.rs
+++ b/lychee-bin/tests/cli.rs
@ -267,17 +267,17 @@ mod cli {
    #[test]
    fn test_resolve_paths() {
        let mut cmd = main_command();
-        let offline_dir = fixtures_path().join("offline");
+        let dir = fixtures_path().join("resolve_paths");

        cmd.arg("--offline")
            .arg("--base")
-            .arg(&offline_dir)
-            .arg(offline_dir.join("index.html"))
+            .arg(&dir)
+            .arg(dir.join("index.html"))
            .env_clear()
            .assert()
            .success()
-            .stdout(contains("4 Total"))
-            .stdout(contains("4 OK"));
+            .stdout(contains("3 Total"))
+            .stdout(contains("3 OK"));
    }

    #[test]
@ -944,13 +944,17 @@ mod cli {

        // check content of cache file
        let data = fs::read_to_string(&cache_file)?;
+
+        if data.is_empty() {
+            println!("Cache file is empty!");
+        }
+
        assert!(data.contains(&format!("{}/,200", mock_server_ok.uri())));
        assert!(!data.contains(&format!("{}/,204", mock_server_no_content.uri())));
        assert!(!data.contains(&format!("{}/,429", mock_server_too_many_requests.uri())));

        // clear the cache file
        fs::remove_file(&cache_file)?;
-
        Ok(())
    }

@ -1216,8 +1220,9 @@ mod cli {
        Ok(())
    }

-    /// If base-dir is not set, don't throw an error in case we encounter
-    /// an absolute local link within a file (e.g. `/about`).
+    /// If `base-dir` is not set, don't throw an error in case we encounter
+    /// an absolute local link (e.g. `/about`) within a file.
+    /// Instead, simply ignore the link.
    #[test]
    fn test_ignore_absolute_local_links_without_base() -> Result<()> {
        let mut cmd = main_command();
@ -1409,9 +1414,7 @@ mod cli {
            .arg("./NOT-A-REAL-TEST-FIXTURE.md")
            .assert()
            .failure()
-            .stderr(contains(
-                "Cannot find local file ./NOT-A-REAL-TEST-FIXTURE.md",
-            ));
+            .stderr(contains("Invalid file path: ./NOT-A-REAL-TEST-FIXTURE.md"));

        Ok(())
    }
@ -1667,4 +1670,46 @@ mod cli {
            .success()
            .stdout(contains("0 Errors"));
    }
+
+    /// Test relative paths
+    ///
+    /// Imagine a web server hosting a site with the following structure:
+    /// root
+    /// └── test
+    ///     ├── index.html
+    ///     └── next.html
+    ///
+    /// where `root/test/index.html` contains `<a href="next.html">next</a>`
+    /// When checking the link in `root/test/index.html` we should be able to
+    /// resolve the relative path to `root/test/next.html`
+    ///
+    /// Note that the relative path is not resolved to the root of the server
+    /// but relative to the file that contains the link.
+    #[tokio::test]
+    async fn test_resolve_relative_paths_in_subfolder() -> Result<()> {
+        let mock_server = wiremock::MockServer::start().await;
+
+        let body = r#"<a href="next.html">next</a>"#;
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .and(wiremock::matchers::path("/test/index.html"))
+            .respond_with(wiremock::ResponseTemplate::new(200).set_body_string(body))
+            .mount(&mock_server)
+            .await;
+
+        wiremock::Mock::given(wiremock::matchers::method("GET"))
+            .and(wiremock::matchers::path("/test/next.html"))
+            .respond_with(wiremock::ResponseTemplate::new(200))
+            .mount(&mock_server)
+            .await;
+
+        let mut cmd = main_command();
+        cmd.arg("--verbose")
+            .arg(format!("{}/test/index.html", mock_server.uri()))
+            .assert()
+            .success()
+            .stdout(contains("1 Total"))
+            .stdout(contains("0 Errors"));
+
+        Ok(())
+    }
 }
--- a/lychee-lib/src/checker.rs
+++ b/lychee-lib/src/checker.rs
@ -1,80 +0,0 @@
-use crate::{
-    chain::{ChainResult, Handler},
-    retry::RetryExt,
-    Status,
-};
-use async_trait::async_trait;
-use http::StatusCode;
-use reqwest::Request;
-use std::{collections::HashSet, time::Duration};
-
-#[derive(Debug, Clone)]
-pub(crate) struct Checker {
-    retry_wait_time: Duration,
-    max_retries: u64,
-    reqwest_client: reqwest::Client,
-    accepted: Option<HashSet<StatusCode>>,
-}
-
-impl Checker {
-    pub(crate) const fn new(
-        retry_wait_time: Duration,
-        max_retries: u64,
-        reqwest_client: reqwest::Client,
-        accepted: Option<HashSet<StatusCode>>,
-    ) -> Self {
-        Self {
-            retry_wait_time,
-            max_retries,
-            reqwest_client,
-            accepted,
-        }
-    }
-
-    /// Retry requests up to `max_retries` times
-    /// with an exponential backoff.
-    pub(crate) async fn retry_request(&self, request: Request) -> Status {
-        let mut retries: u64 = 0;
-        let mut wait_time = self.retry_wait_time;
-
-        let mut status = self.check_default(clone_unwrap(&request)).await;
-        while retries < self.max_retries {
-            if status.is_success() || !status.should_retry() {
-                return status;
-            }
-            retries += 1;
-            tokio::time::sleep(wait_time).await;
-            wait_time = wait_time.saturating_mul(2);
-            status = self.check_default(clone_unwrap(&request)).await;
-        }
-        status
-    }
-
-    /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest).
-    async fn check_default(&self, request: Request) -> Status {
-        match self.reqwest_client.execute(request).await {
-            Ok(ref response) => Status::new(response, self.accepted.clone()),
-            Err(e) => e.into(),
-        }
-    }
-}
-
-/// Clones a `reqwest::Request`.
-///
-/// # Safety
-///
-/// This panics if the request cannot be cloned. This should only happen if the
-/// request body is a `reqwest` stream. We disable the `stream` feature, so the
-/// body should never be a stream.
-///
-/// See <https://github.com/seanmonstar/reqwest/blob/de5dbb1ab849cc301dcefebaeabdf4ce2e0f1e53/src/async_impl/body.rs#L168>
-fn clone_unwrap(request: &Request) -> Request {
-    request.try_clone().expect("Failed to clone request: body was a stream, which should be impossible with `stream` feature disabled")
-}
-
-#[async_trait]
-impl Handler<Request, Status> for Checker {
-    async fn handle(&mut self, input: Request) -> ChainResult<Request, Status> {
-        ChainResult::Done(self.retry_request(input).await)
-    }
-}
--- a/lychee-lib/src/checker/file.rs
+++ b/lychee-lib/src/checker/file.rs
@ -0,0 +1,179 @@
+use http::StatusCode;
+use log::warn;
+use std::path::{Path, PathBuf};
+
+use crate::{utils::fragment_checker::FragmentChecker, Base, ErrorKind, Status, Uri};
+
+/// A utility for checking the existence and validity of file-based URIs.
+///
+/// `FileChecker` resolves and validates file paths, handling both absolute and relative paths.
+/// It supports base path resolution, fallback extensions for files without extensions,
+/// and optional fragment checking for HTML files.
+#[derive(Debug, Clone)]
+pub(crate) struct FileChecker {
+    /// Base path or URL used for resolving relative paths.
+    base: Option<Base>,
+    /// List of file extensions to try if the original path doesn't exist.
+    fallback_extensions: Vec<String>,
+    /// Whether to check for the existence of fragments (e.g., `#section-id`) in HTML files.
+    include_fragments: bool,
+    /// Utility for performing fragment checks in HTML files.
+    fragment_checker: FragmentChecker,
+}
+
+impl FileChecker {
+    /// Creates a new `FileChecker` with the given configuration.
+    ///
+    /// # Arguments
+    ///
+    /// * `base` - Optional base path or URL for resolving relative paths.
+    /// * `fallback_extensions` - List of extensions to try if the original file is not found.
+    /// * `include_fragments` - Whether to check for fragment existence in HTML files.
+    pub(crate) fn new(
+        base: Option<Base>,
+        fallback_extensions: Vec<String>,
+        include_fragments: bool,
+    ) -> Self {
+        Self {
+            base,
+            fallback_extensions,
+            include_fragments,
+            fragment_checker: FragmentChecker::new(),
+        }
+    }
+
+    /// Checks the given file URI for existence and validity.
+    ///
+    /// This method resolves the URI to a file path, checks if the file exists,
+    /// and optionally checks for the existence of fragments in HTML files.
+    ///
+    /// # Arguments
+    ///
+    /// * `uri` - The URI to check.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Status` indicating the result of the check.
+    pub(crate) async fn check(&self, uri: &Uri) -> Status {
+        let Ok(path) = uri.url.to_file_path() else {
+            return ErrorKind::InvalidFilePath(uri.clone()).into();
+        };
+
+        let resolved_path = self.resolve_path(&path);
+        self.check_path(&resolved_path, uri).await
+    }
+
+    /// Resolves the given path using the base path, if one is set.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The path to resolve.
+    ///
+    /// # Returns
+    ///
+    /// Returns the resolved path as a `PathBuf`.
+    fn resolve_path(&self, path: &Path) -> PathBuf {
+        if let Some(Base::Local(base_path)) = &self.base {
+            if path.is_absolute() {
+                let absolute_base_path = if base_path.is_relative() {
+                    std::env::current_dir().unwrap_or_default().join(base_path)
+                } else {
+                    base_path.clone()
+                };
+
+                let stripped = path.strip_prefix("/").unwrap_or(path);
+                absolute_base_path.join(stripped)
+            } else {
+                base_path.join(path)
+            }
+        } else {
+            path.to_path_buf()
+        }
+    }
+
+    /// Checks if the given path exists and performs additional checks if necessary.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The path to check.
+    /// * `uri` - The original URI, used for error reporting.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Status` indicating the result of the check.
+    async fn check_path(&self, path: &Path, uri: &Uri) -> Status {
+        if path.exists() {
+            return self.check_existing_path(path, uri).await;
+        }
+
+        self.check_with_fallback_extensions(path, uri).await
+    }
+
+    /// Checks an existing path, optionally verifying fragments for HTML files.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The path to check.
+    /// * `uri` - The original URI, used for error reporting.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Status` indicating the result of the check.
+    async fn check_existing_path(&self, path: &Path, uri: &Uri) -> Status {
+        if self.include_fragments {
+            self.check_fragment(path, uri).await
+        } else {
+            Status::Ok(StatusCode::OK)
+        }
+    }
+
+    /// Attempts to find a file by trying different extensions specified in `fallback_extensions`.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The original path to check.
+    /// * `uri` - The original URI, used for error reporting.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Status` indicating the result of the check.
+    async fn check_with_fallback_extensions(&self, path: &Path, uri: &Uri) -> Status {
+        let mut path_buf = path.to_path_buf();
+
+        // If the path already has an extension, try it first
+        if path_buf.extension().is_some() && path_buf.exists() {
+            return self.check_existing_path(&path_buf, uri).await;
+        }
+
+        // Try fallback extensions
+        for ext in &self.fallback_extensions {
+            path_buf.set_extension(ext);
+            if path_buf.exists() {
+                return self.check_existing_path(&path_buf, uri).await;
+            }
+        }
+
+        ErrorKind::InvalidFilePath(uri.clone()).into()
+    }
+
+    /// Checks for the existence of a fragment in an HTML file.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The path to the HTML file.
+    /// * `uri` - The original URI, containing the fragment to check.
+    ///
+    /// # Returns
+    ///
+    /// Returns a `Status` indicating the result of the fragment check.
+    async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status {
+        match self.fragment_checker.check(path, &uri.url).await {
+            Ok(true) => Status::Ok(StatusCode::OK),
+            Ok(false) => ErrorKind::InvalidFragment(uri.clone()).into(),
+            Err(err) => {
+                warn!("Skipping fragment check due to the following error: {err}");
+                Status::Ok(StatusCode::OK)
+            }
+        }
+    }
+}
--- a/lychee-lib/src/checker/mail.rs
+++ b/lychee-lib/src/checker/mail.rs
@ -0,0 +1,57 @@
+#[cfg(all(feature = "email-check", feature = "native-tls"))]
+use http::StatusCode;
+
+#[cfg(all(feature = "email-check", feature = "native-tls"))]
+use crate::ErrorKind;
+
+use crate::{Status, Uri};
+
+#[cfg(all(feature = "email-check", feature = "native-tls"))]
+use check_if_email_exists::{check_email, CheckEmailInput, Reachable};
+
+#[cfg(all(feature = "email-check", feature = "native-tls"))]
+use crate::types::mail;
+
+/// A utility for checking the validity of email addresses.
+///
+/// `EmailChecker` is responsible for validating email addresses,
+/// optionally performing reachability checks when the appropriate
+/// features are enabled.
+#[derive(Debug, Clone)]
+pub(crate) struct MailChecker {}
+
+impl MailChecker {
+    /// Creates a new `EmailChecker`.
+    pub(crate) const fn new() -> Self {
+        Self {}
+    }
+
+    /// Check a mail address, or equivalently a `mailto` URI.
+    ///
+    /// URIs may contain query parameters (e.g. `contact@example.com?subject="Hello"`),
+    /// which are ignored by this check. They are not part of the mail address
+    /// and instead passed to a mail client.
+    #[cfg(all(feature = "email-check", feature = "native-tls"))]
+    pub(crate) async fn check_mail(&self, uri: &Uri) -> Status {
+        self.perform_email_check(uri).await
+    }
+
+    /// Ignore the mail check if the `email-check` and `native-tls` features are not enabled.
+    #[cfg(not(all(feature = "email-check", feature = "native-tls")))]
+    pub(crate) async fn check_mail(&self, _uri: &Uri) -> Status {
+        Status::Excluded
+    }
+
+    #[cfg(all(feature = "email-check", feature = "native-tls"))]
+    async fn perform_email_check(&self, uri: &Uri) -> Status {
+        let address = uri.url.path().to_string();
+        let input = CheckEmailInput::new(address);
+        let result = &(check_email(&input).await);
+
+        if let Reachable::Invalid = result.is_reachable {
+            ErrorKind::UnreachableEmailAddress(uri.clone(), mail::error_from_output(result)).into()
+        } else {
+            Status::Ok(StatusCode::OK)
+        }
+    }
+}
--- a/lychee-lib/src/checker/mod.rs
+++ b/lychee-lib/src/checker/mod.rs
@ -0,0 +1,7 @@
+//! Checker Module
+//!
+//! This module contains all checkers, which are responsible for checking the status of a URL.
+
+pub(crate) mod file;
+pub(crate) mod mail;
+pub(crate) mod website;
--- a/lychee-lib/src/checker/website.rs
+++ b/lychee-lib/src/checker/website.rs
@ -0,0 +1,226 @@
+use crate::{
+    chain::{Chain, ChainResult, ClientRequestChains, Handler, RequestChain},
+    quirks::Quirks,
+    retry::RetryExt,
+    types::uri::github::GithubUri,
+    BasicAuthCredentials, ErrorKind, Status, Uri,
+};
+use async_trait::async_trait;
+use http::StatusCode;
+use octocrab::Octocrab;
+use reqwest::Request;
+use std::{collections::HashSet, time::Duration};
+
+#[derive(Debug, Clone)]
+pub(crate) struct WebsiteChecker {
+    /// Request method used for making requests.
+    method: reqwest::Method,
+
+    /// The HTTP client used for requests.
+    reqwest_client: reqwest::Client,
+
+    /// GitHub client used for requests.
+    github_client: Option<Octocrab>,
+
+    /// The chain of plugins to be executed on each request.
+    plugin_request_chain: RequestChain,
+
+    /// Maximum number of retries per request before returning an error.
+    max_retries: u64,
+
+    /// Initial wait time between retries of failed requests. This doubles after
+    /// each failure.
+    retry_wait_time: Duration,
+
+    /// Set of accepted return codes / status codes.
+    ///
+    /// Unmatched return codes/ status codes are deemed as errors.
+    accepted: Option<HashSet<StatusCode>>,
+
+    /// Requires using HTTPS when it's available.
+    ///
+    /// This would treat unencrypted links as errors when HTTPS is available.
+    require_https: bool,
+}
+
+impl WebsiteChecker {
+    #[allow(clippy::too_many_arguments)]
+    pub(crate) const fn new(
+        method: reqwest::Method,
+        retry_wait_time: Duration,
+        max_retries: u64,
+        reqwest_client: reqwest::Client,
+        accepted: Option<HashSet<StatusCode>>,
+        github_client: Option<Octocrab>,
+        require_https: bool,
+        plugin_request_chain: RequestChain,
+    ) -> Self {
+        Self {
+            method,
+            reqwest_client,
+            github_client,
+            plugin_request_chain,
+            max_retries,
+            retry_wait_time,
+            accepted,
+            require_https,
+        }
+    }
+
+    /// Retry requests up to `max_retries` times
+    /// with an exponential backoff.
+    pub(crate) async fn retry_request(&self, request: Request) -> Status {
+        let mut retries: u64 = 0;
+        let mut wait_time = self.retry_wait_time;
+        let mut status = self.check_default(clone_unwrap(&request)).await;
+        while retries < self.max_retries {
+            if status.is_success() || !status.should_retry() {
+                return status;
+            }
+            retries += 1;
+            tokio::time::sleep(wait_time).await;
+            wait_time = wait_time.saturating_mul(2);
+            status = self.check_default(clone_unwrap(&request)).await;
+        }
+        status
+    }
+
+    /// Check a URI using [reqwest](https://github.com/seanmonstar/reqwest).
+    async fn check_default(&self, request: Request) -> Status {
+        match self.reqwest_client.execute(request).await {
+            Ok(ref response) => Status::new(response, self.accepted.clone()),
+            Err(e) => e.into(),
+        }
+    }
+
+    /// Checks the given URI of a website.
+    ///
+    /// # Errors
+    ///
+    /// This returns an `Err` if
+    /// - The URI is invalid.
+    /// - The request failed.
+    /// - The response status code is not accepted.
+    /// - The URI cannot be converted to HTTPS.
+    pub(crate) async fn check_website(
+        &self,
+        uri: &Uri,
+        credentials: Option<BasicAuthCredentials>,
+    ) -> Result<Status, ErrorKind> {
+        let default_chain: RequestChain = Chain::new(vec![
+            Box::<Quirks>::default(),
+            Box::new(credentials),
+            Box::new(self.clone()),
+        ]);
+
+        match self.check_website_inner(uri, &default_chain).await {
+            Status::Ok(code) if self.require_https && uri.scheme() == "http" => {
+                if self
+                    .check_website_inner(&uri.to_https()?, &default_chain)
+                    .await
+                    .is_success()
+                {
+                    Ok(Status::Error(ErrorKind::InsecureURL(uri.to_https()?)))
+                } else {
+                    Ok(Status::Ok(code))
+                }
+            }
+            s => Ok(s),
+        }
+    }
+
+    /// Checks the given URI of a website.
+    ///
+    /// Unsupported schemes will be ignored
+    ///
+    /// Note: we use `inner` to improve compile times by avoiding monomorphization
+    ///
+    /// # Errors
+    ///
+    /// This returns an `Err` if
+    /// - The URI is invalid.
+    /// - The request failed.
+    /// - The response status code is not accepted.
+    async fn check_website_inner(&self, uri: &Uri, default_chain: &RequestChain) -> Status {
+        let request = self
+            .reqwest_client
+            .request(self.method.clone(), uri.as_str())
+            .build();
+
+        let request = match request {
+            Ok(r) => r,
+            Err(e) => return e.into(),
+        };
+
+        let status = ClientRequestChains::new(vec![&self.plugin_request_chain, default_chain])
+            .traverse(request)
+            .await;
+
+        self.handle_github(status, uri).await
+    }
+
+    // Pull out the heavy machinery in case of a failed normal request.
+    // This could be a GitHub URL and we ran into the rate limiter.
+    // TODO: We should try to parse the URI as GitHub URI first (Lucius, Jan 2023)
+    async fn handle_github(&self, status: Status, uri: &Uri) -> Status {
+        if status.is_success() {
+            return status;
+        }
+
+        if let Ok(github_uri) = GithubUri::try_from(uri) {
+            let status = self.check_github(github_uri).await;
+            if status.is_success() {
+                return status;
+            }
+        }
+
+        status
+    }
+
+    /// Check a `uri` hosted on `GitHub` via the GitHub API.
+    ///
+    /// # Caveats
+    ///
+    /// Files inside private repositories won't get checked and instead would
+    /// be reported as valid if the repository itself is reachable through the
+    /// API.
+    ///
+    /// A better approach would be to download the file through the API or
+    /// clone the repo, but we chose the pragmatic approach.
+    async fn check_github(&self, uri: GithubUri) -> Status {
+        let Some(client) = &self.github_client else {
+            return ErrorKind::MissingGitHubToken.into();
+        };
+        let repo = match client.repos(&uri.owner, &uri.repo).get().await {
+            Ok(repo) => repo,
+            Err(e) => return ErrorKind::GithubRequest(Box::new(e)).into(),
+        };
+        if let Some(true) = repo.private {
+            return Status::Ok(StatusCode::OK);
+        } else if let Some(endpoint) = uri.endpoint {
+            return ErrorKind::InvalidGithubUrl(format!("{}/{}/{endpoint}", uri.owner, uri.repo))
+                .into();
+        }
+        Status::Ok(StatusCode::OK)
+    }
+}
+
+/// Clones a `reqwest::Request`.
+///
+/// # Safety
+///
+/// This panics if the request cannot be cloned. This should only happen if the
+/// request body is a `reqwest` stream. We disable the `stream` feature, so the
+/// body should never be a stream.
+///
+/// See <https://github.com/seanmonstar/reqwest/blob/de5dbb1ab849cc301dcefebaeabdf4ce2e0f1e53/src/async_impl/body.rs#L168>
+fn clone_unwrap(request: &Request) -> Request {
+    request.try_clone().expect("Failed to clone request: body was a stream, which should be impossible with `stream` feature disabled")
+}
+
+#[async_trait]
+impl Handler<Request, Status> for WebsiteChecker {
+    async fn handle(&mut self, input: Request) -> ChainResult<Request, Status> {
+        ChainResult::Done(self.retry_request(input).await)
+    }
+}
--- a/lychee-lib/src/client.rs
+++ b/lychee-lib/src/client.rs
@ -15,8 +15,6 @@
 )]
 use std::{collections::HashSet, path::Path, sync::Arc, time::Duration};

-#[cfg(all(feature = "email-check", feature = "native-tls"))]
-use check_if_email_exists::{check_email, CheckEmailInput, Reachable};
 use http::{
    header::{HeaderMap, HeaderValue},
    StatusCode,
@ -24,25 +22,21 @@ use http::{
 use log::{debug, warn};
 use octocrab::Octocrab;
 use regex::RegexSet;
-use reqwest::{header, redirect, Url};
+use reqwest::{header, redirect};
 use reqwest_cookie_store::CookieStoreMutex;
 use secrecy::{ExposeSecret, SecretString};
 use typed_builder::TypedBuilder;

 use crate::{
-    chain::{Chain, ClientRequestChains, RequestChain},
-    checker::Checker,
+    chain::RequestChain,
+    checker::file::FileChecker,
+    checker::{mail::MailChecker, website::WebsiteChecker},
    filter::{Excludes, Filter, Includes},
-    quirks::Quirks,
    remap::Remaps,
-    types::uri::github::GithubUri,
    utils::fragment_checker::FragmentChecker,
-    ErrorKind, Request, Response, Result, Status, Uri,
+    Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri,
 };

-#[cfg(all(feature = "email-check", feature = "native-tls"))]
-use crate::types::mail;
-
 /// Default number of redirects before a request is deemed as failed, 5.
 pub const DEFAULT_MAX_REDIRECTS: usize = 5;
 /// Default number of retries before a request is deemed as failed, 3.
@ -248,6 +242,12 @@ pub struct ClientBuilder {
    /// Response timeout per request in seconds.
    timeout: Option<Duration>,

+    /// Base for resolving paths.
+    ///
+    /// E.g. if the base is `/home/user/` and the path is `file.txt`, the
+    /// resolved path would be `/home/user/file.txt`.
+    base: Option<Base>,
+
    /// Initial time between retries of failed requests.
    ///
    /// Defaults to [`DEFAULT_RETRY_WAIT_TIME_SECS`].
@ -383,20 +383,28 @@ impl ClientBuilder {
            include_mail: self.include_mail,
        };

-        Ok(Client {
+        let website_checker = WebsiteChecker::new(
+            self.method,
+            self.retry_wait_time,
+            self.max_retries,
            reqwest_client,
+            self.accepted,
            github_client,
+            self.require_https,
+            self.plugin_request_chain,
+        );
+
+        Ok(Client {
            remaps: self.remaps,
-            fallback_extensions: self.fallback_extensions,
            filter,
-            max_retries: self.max_retries,
-            retry_wait_time: self.retry_wait_time,
-            method: self.method,
-            accepted: self.accepted,
-            require_https: self.require_https,
-            include_fragments: self.include_fragments,
+            email_checker: MailChecker::new(),
+            website_checker,
+            file_checker: FileChecker::new(
+                self.base,
+                self.fallback_extensions,
+                self.include_fragments,
+            ),
            fragment_checker: FragmentChecker::new(),
-            plugin_request_chain: self.plugin_request_chain,
        })
    }
 }
@ -407,50 +415,23 @@ impl ClientBuilder {
 /// options.
 #[derive(Debug, Clone)]
 pub struct Client {
-    /// Underlying `reqwest` client instance that handles the HTTP requests.
-    reqwest_client: reqwest::Client,
-
-    /// Optional GitHub client that handles communications with GitHub.
-    github_client: Option<Octocrab>,
-
    /// Optional remapping rules for URIs matching pattern.
    remaps: Option<Remaps>,

-    /// Automatically append file extensions to `file://` URIs as needed
-    fallback_extensions: Vec<String>,
-
    /// Rules to decided whether each link should be checked or ignored.
    filter: Filter,

-    /// Maximum number of retries per request before returning an error.
-    max_retries: u64,
+    /// A checker for website URLs.
+    website_checker: WebsiteChecker,

-    /// Initial wait time between retries of failed requests. This doubles after
-    /// each failure.
-    retry_wait_time: Duration,
+    /// A checker for file URLs.
+    file_checker: FileChecker,

-    /// HTTP method used for requests, e.g. `GET` or `HEAD`.
-    ///
-    /// The same method will be used for all links.
-    method: reqwest::Method,
-
-    /// Set of accepted return codes / status codes.
-    ///
-    /// Unmatched return codes/ status codes are deemed as errors.
-    accepted: Option<HashSet<StatusCode>>,
-
-    /// Requires using HTTPS when it's available.
-    ///
-    /// This would treat unencrypted links as errors when HTTPS is available.
-    require_https: bool,
-
-    /// Enable the checking of fragments in links.
-    include_fragments: bool,
+    /// A checker for email URLs.
+    email_checker: MailChecker,

    /// Caches Fragments
    fragment_checker: FragmentChecker,
-
-    plugin_request_chain: RequestChain,
 }

 impl Client {
@ -463,7 +444,7 @@ impl Client {
    ///
    /// Returns an `Err` if:
    /// - `request` does not represent a valid URI.
-    /// - Encrypted connection for a HTTP URL is available but unused.  (Only
+    /// - Encrypted connection for a HTTP URL is available but unused. (Only
    ///   checked when `Client::require_https` is `true`.)
    #[allow(clippy::missing_panics_doc)]
    pub async fn check<T, E>(&self, request: T) -> Result<Response>
@ -493,27 +474,22 @@ impl Client {
            return Ok(Response::new(uri.clone(), Status::Excluded, source));
        }

-        let default_chain: RequestChain = Chain::new(vec![
-            Box::<Quirks>::default(),
-            Box::new(credentials),
-            Box::new(Checker::new(
-                self.retry_wait_time,
-                self.max_retries,
-                self.reqwest_client.clone(),
-                self.accepted.clone(),
-            )),
-        ]);
-
        let status = match uri.scheme() {
+            // We don't check tel: URIs
+            _ if uri.is_tel() => Status::Excluded,
            _ if uri.is_file() => self.check_file(uri).await,
            _ if uri.is_mail() => self.check_mail(uri).await,
-            _ if uri.is_tel() => Status::Excluded,
-            _ => self.check_website(uri, default_chain).await?,
+            _ => self.check_website(uri, credentials).await?,
        };

        Ok(Response::new(uri.clone(), status, source))
    }

+    /// Check a single file using the file checker.
+    pub async fn check_file(&self, uri: &Uri) -> Status {
+        self.file_checker.check(uri).await
+    }
+
    /// Remap `uri` using the client-defined remapping rules.
    ///
    /// # Errors
@ -541,151 +517,17 @@ impl Client {
    /// - The request failed.
    /// - The response status code is not accepted.
    /// - The URI cannot be converted to HTTPS.
-    pub async fn check_website(&self, uri: &Uri, default_chain: RequestChain) -> Result<Status> {
-        match self.check_website_inner(uri, &default_chain).await {
-            Status::Ok(code) if self.require_https && uri.scheme() == "http" => {
-                if self
-                    .check_website_inner(&uri.to_https()?, &default_chain)
-                    .await
-                    .is_success()
-                {
-                    Ok(Status::Error(ErrorKind::InsecureURL(uri.to_https()?)))
-                } else {
-                    // HTTPS is not available for this URI,
-                    // so the original HTTP URL is fine.
-                    Ok(Status::Ok(code))
-                }
-            }
-            s => Ok(s),
-        }
+    pub async fn check_website(
+        &self,
+        uri: &Uri,
+        credentials: Option<BasicAuthCredentials>,
+    ) -> Result<Status> {
+        self.website_checker.check_website(uri, credentials).await
    }

-    /// Checks the given URI of a website.
-    ///
-    /// Unsupported schemes will be ignored
-    ///
-    /// # Errors
-    ///
-    /// This returns an `Err` if
-    /// - The URI is invalid.
-    /// - The request failed.
-    /// - The response status code is not accepted.
-    pub async fn check_website_inner(&self, uri: &Uri, default_chain: &RequestChain) -> Status {
-        // Workaround for upstream reqwest panic
-        if validate_url(&uri.url) {
-            if matches!(uri.scheme(), "http" | "https") {
-                // This is a truly invalid URI with a known scheme.
-                // If we pass that to reqwest it would panic.
-                return Status::Error(ErrorKind::InvalidURI(uri.clone()));
-            }
-            // This is merely a URI with a scheme that is not supported by
-            // reqwest yet. It would be safe to pass that to reqwest and it
-            // wouldn't panic, but it's also unnecessary, because it would
-            // simply return an error.
-            return Status::Unsupported(ErrorKind::InvalidURI(uri.clone()));
-        }
-
-        let request = self
-            .reqwest_client
-            .request(self.method.clone(), uri.as_str())
-            .build();
-
-        let request = match request {
-            Ok(r) => r,
-            Err(e) => return e.into(),
-        };
-
-        let status = ClientRequestChains::new(vec![&self.plugin_request_chain, default_chain])
-            .traverse(request)
-            .await;
-
-        self.handle_github(status, uri).await
-    }
-
-    // Pull out the heavy machinery in case of a failed normal request.
-    // This could be a GitHub URL and we ran into the rate limiter.
-    // TODO: We should first try to parse the URI as GitHub URI first (Lucius, Jan 2023)
-    async fn handle_github(&self, status: Status, uri: &Uri) -> Status {
-        if status.is_success() {
-            return status;
-        }
-
-        if let Ok(github_uri) = GithubUri::try_from(uri) {
-            let status = self.check_github(github_uri).await;
-            // Only return GitHub status in case of success
-            // Otherwise return the original error, which has more information
-            if status.is_success() {
-                return status;
-            }
-        }
-
-        status
-    }
-
-    /// Check a `uri` hosted on `GitHub` via the GitHub API.
-    ///
-    /// # Caveats
-    ///
-    /// Files inside private repositories won't get checked and instead would
-    /// be reported as valid if the repository itself is reachable through the
-    /// API.
-    ///
-    /// A better approach would be to download the file through the API or
-    /// clone the repo, but we chose the pragmatic approach.
-    async fn check_github(&self, uri: GithubUri) -> Status {
-        let Some(client) = &self.github_client else {
-            return ErrorKind::MissingGitHubToken.into();
-        };
-        let repo = match client.repos(&uri.owner, &uri.repo).get().await {
-            Ok(repo) => repo,
-            Err(e) => return ErrorKind::GithubRequest(Box::new(e)).into(),
-        };
-        if let Some(true) = repo.private {
-            // The private repo exists. Assume a given endpoint exists as well
-            // (e.g. `issues` in `github.com/org/private/issues`). This is not
-            // always the case but simplifies the check.
-            return Status::Ok(StatusCode::OK);
-        } else if let Some(endpoint) = uri.endpoint {
-            // The URI returned a non-200 status code from a normal request and
-            // now we find that this public repo is reachable through the API,
-            // so that must mean the full URI (which includes the additional
-            // endpoint) must be invalid.
-            return ErrorKind::InvalidGithubUrl(format!("{}/{}/{endpoint}", uri.owner, uri.repo))
-                .into();
-        }
-        // Found public repo without endpoint
-        Status::Ok(StatusCode::OK)
-    }
-
-    /// Check a `file` URI.
-    pub async fn check_file(&self, uri: &Uri) -> Status {
-        let Ok(path) = uri.url.to_file_path() else {
-            return ErrorKind::InvalidFilePath(uri.clone()).into();
-        };
-
-        if path.exists() {
-            if self.include_fragments {
-                return self.check_fragment(&path, uri).await;
-            }
-            return Status::Ok(StatusCode::OK);
-        }
-
-        if path.extension().is_some() {
-            return ErrorKind::InvalidFilePath(uri.clone()).into();
-        }
-
-        // if the path has no file extension, try to append some
-        let mut path_buf = path.clone();
-        for ext in &self.fallback_extensions {
-            path_buf.set_extension(ext);
-            if path_buf.exists() {
-                if self.include_fragments {
-                    return self.check_fragment(&path_buf, uri).await;
-                }
-                return Status::Ok(StatusCode::OK);
-            }
-        }
-        ErrorKind::InvalidFilePath(uri.clone()).into()
+    /// Checks a `mailto` URI.
+    pub async fn check_mail(&self, uri: &Uri) -> Status {
+        self.email_checker.check_mail(uri).await
    }

    /// Checks a `file` URI's fragment.
@ -699,43 +541,6 @@ impl Client {
            }
        }
    }
-
-    /// Check a mail address, or equivalently a `mailto` URI.
-    ///
-    /// URIs may contain query parameters (e.g. `contact@example.com?subject="Hello"`),
-    /// which are ignored by this check. The are not part of the mail address
-    /// and instead passed to a mail client.
-    #[cfg(all(feature = "email-check", feature = "native-tls"))]
-    pub async fn check_mail(&self, uri: &Uri) -> Status {
-        let address = uri.url.path().to_string();
-        let input = CheckEmailInput::new(address);
-        let result = &(check_email(&input).await);
-
-        if let Reachable::Invalid = result.is_reachable {
-            ErrorKind::UnreachableEmailAddress(uri.clone(), mail::error_from_output(result)).into()
-        } else {
-            Status::Ok(StatusCode::OK)
-        }
-    }
-
-    /// Check a mail address, or equivalently a `mailto` URI.
-    ///
-    /// This implementation simply excludes all email addresses.
-    #[cfg(not(all(feature = "email-check", feature = "native-tls")))]
-    #[allow(clippy::unused_async)]
-    pub async fn check_mail(&self, _uri: &Uri) -> Status {
-        Status::Excluded
-    }
-}
-
-// Check if the given `Url` would cause `reqwest` to panic.
-// This is a workaround for https://github.com/lycheeverse/lychee/issues/539
-// and can be removed once https://github.com/seanmonstar/reqwest/pull/1399
-// got merged.
-// It is exactly the same check that reqwest runs internally, but unfortunately
-// it `unwrap`s (and panics!) instead of returning an error, which we could handle.
-fn validate_url(url: &Url) -> bool {
-    http::Uri::try_from(url.as_str()).is_err()
 }

 /// A shorthand function to check a single URI.
@ -777,7 +582,7 @@ mod tests {
        chain::{ChainResult, Handler, RequestChain},
        mock_server,
        test_utils::get_mock_client_response,
-        Request, Status, Uri,
+        ErrorKind, Request, Status, Uri,
    };

    #[tokio::test]
@ -1026,9 +831,14 @@ mod tests {
    #[tokio::test]
    async fn test_avoid_reqwest_panic() {
        let client = ClientBuilder::builder().build().client().unwrap();
-        // This request will fail, but it won't panic
+        // This request will result in an Unsupported status, but it won't panic
        let res = client.check("http://\"").await.unwrap();
-        assert!(res.status().is_error());
+
+        assert!(matches!(
+            res.status(),
+            Status::Unsupported(ErrorKind::BuildRequestClient(_))
+        ));
+        assert!(res.status().is_unsupported());
    }

    #[tokio::test]
--- a/lychee-lib/src/collector.rs
+++ b/lychee-lib/src/collector.rs
@ -1,3 +1,4 @@
+use crate::InputSource;
 use crate::{
    basic_auth::BasicAuthExtractor, extract::Extractor, types::uri::raw::RawUri, utils::request,
    Base, Input, Request, Result,
@ -99,27 +100,31 @@ impl Collector {
    ///
    /// Will return `Err` if links cannot be extracted from an input
    pub fn collect_links(self, inputs: Vec<Input>) -> impl Stream<Item = Result<Request>> {
-        let base = self.base;
+        let skip_missing_inputs = self.skip_missing_inputs;
+        let skip_hidden = self.skip_hidden;
+        let skip_ignored = self.skip_ignored;
+        let global_base = self.base;
        stream::iter(inputs)
-            .par_then_unordered(None, move |input| async move {
-                input.get_contents(
-                    self.skip_missing_inputs,
-                    self.skip_hidden,
-                    self.skip_ignored,
-                )
+            .par_then_unordered(None, move |input| {
+                let default_base = global_base.clone();
+                async move {
+                    let base = match &input.source {
+                        InputSource::RemoteUrl(url) => Base::try_from(url.as_str()).ok(),
+                        _ => default_base,
+                    };
+                    input
+                        .get_contents(skip_missing_inputs, skip_hidden, skip_ignored)
+                        .map(move |content| (content, base.clone()))
+                }
            })
            .flatten()
-            .par_then_unordered(None, move |content| {
-                // send to parallel worker
-                let base = base.clone();
+            .par_then_unordered(None, move |(content, base)| {
                let basic_auth_extractor = self.basic_auth_extractor.clone();
                async move {
                    let content = content?;
-
                    let extractor = Extractor::new(self.use_html5ever, self.include_verbatim);
                    let uris: Vec<RawUri> = extractor.extract(&content);
-
-                    let requests = request::create(uris, &content, &base, &basic_auth_extractor)?;
+                    let requests = request::create(uris, &content, &base, &basic_auth_extractor);
                    Result::Ok(stream::iter(requests.into_iter().map(Ok)))
                }
            })
@ -137,7 +142,7 @@ mod tests {
    use super::*;
    use crate::{
        mock_server,
-        test_utils::{load_fixture, mail, website},
+        test_utils::{load_fixture, mail, path, website},
        types::{FileType, Input, InputSource},
        Result, Uri,
    };
@ -426,4 +431,85 @@ mod tests {

        assert_eq!(links, expected_links);
    }
+
+    #[tokio::test]
+    async fn test_multiple_remote_urls() {
+        let mock_server_1 = mock_server!(
+            StatusCode::OK,
+            set_body_string(r#"<a href="relative.html">Link</a>"#)
+        );
+        let mock_server_2 = mock_server!(
+            StatusCode::OK,
+            set_body_string(r#"<a href="relative.html">Link</a>"#)
+        );
+
+        let inputs = vec![
+            Input {
+                source: InputSource::RemoteUrl(Box::new(
+                    Url::parse(&format!(
+                        "{}/foo/index.html",
+                        mock_server_1.uri().trim_end_matches('/')
+                    ))
+                    .unwrap(),
+                )),
+                file_type_hint: Some(FileType::Html),
+                excluded_paths: None,
+            },
+            Input {
+                source: InputSource::RemoteUrl(Box::new(
+                    Url::parse(&format!(
+                        "{}/bar/index.html",
+                        mock_server_2.uri().trim_end_matches('/')
+                    ))
+                    .unwrap(),
+                )),
+                file_type_hint: Some(FileType::Html),
+                excluded_paths: None,
+            },
+        ];
+
+        let links = collect(inputs, None).await;
+
+        let expected_links = HashSet::from_iter([
+            website(&format!(
+                "{}/foo/relative.html",
+                mock_server_1.uri().trim_end_matches('/')
+            )),
+            website(&format!(
+                "{}/bar/relative.html",
+                mock_server_2.uri().trim_end_matches('/')
+            )),
+        ]);
+
+        assert_eq!(links, expected_links);
+    }
+
+    #[tokio::test]
+    async fn test_file_path_with_base() {
+        let base = Base::try_from("/path/to/root").unwrap();
+        assert_eq!(base, Base::Local("/path/to/root".into()));
+
+        let input = Input {
+            source: InputSource::String(
+                r#"
+                <a href="index.html">Index</a>
+                <a href="about.html">About</a> 
+                <a href="/another.html">Another</a> 
+            "#
+                .into(),
+            ),
+            file_type_hint: Some(FileType::Html),
+            excluded_paths: None,
+        };
+
+        let links = collect(vec![input], Some(base)).await;
+
+        let expected_links = HashSet::from_iter([
+            path("/path/to/root/index.html"),
+            path("/path/to/root/about.html"),
+            path("/another.html"),
+        ]);
+
+        assert_eq!(links, expected_links);
+    }
 }
--- a/lychee-lib/src/test_utils.rs
+++ b/lychee-lib/src/test_utils.rs
@ -39,6 +39,16 @@ pub(crate) fn website(url: &str) -> Uri {
    Uri::from(Url::parse(url).expect("Expected valid Website URI"))
 }

+/// Helper method to convert a `std::path::Path `into a URI with the `file` scheme
+///
+/// # Panic
+///
+/// This panics if the given path is not absolute, so it should only be used for
+/// testing
+pub(crate) fn path<P: AsRef<Path>>(path: P) -> Uri {
+    Uri::from(Url::from_file_path(path.as_ref()).expect("Expected valid File URI"))
+}
+
 /// Creates a mail URI from a string
 pub(crate) fn mail(address: &str) -> Uri {
    if address.starts_with("mailto:") {
--- a/lychee-lib/src/types/base.rs
+++ b/lychee-lib/src/types/base.rs
@ -23,7 +23,10 @@ impl Base {
    pub(crate) fn join(&self, link: &str) -> Option<Url> {
        match self {
            Self::Remote(url) => url.join(link).ok(),
-            Self::Local(_) => None,
+            Self::Local(path) => {
+                let full_path = path.join(link);
+                Url::from_file_path(full_path).ok()
+            }
        }
    }

@ -36,18 +39,17 @@ impl Base {
        }
    }

-    pub(crate) fn from_source(source: &InputSource) -> Option<Url> {
+    pub(crate) fn from_source(source: &InputSource) -> Option<Base> {
        match &source {
            InputSource::RemoteUrl(url) => {
-                // TODO: This should be refactored.
-                // Cases like https://user:pass@example.com are not handled
-                // We can probably use the original URL and just replace the
-                // path component in the caller of this function
-                if let Some(port) = url.port() {
-                    Url::parse(&format!("{}://{}:{port}", url.scheme(), url.host_str()?)).ok()
-                } else {
-                    Url::parse(&format!("{}://{}", url.scheme(), url.host_str()?)).ok()
-                }
+                // Create a new URL with just the scheme, host, and port
+                let mut base_url = url.clone();
+                base_url.set_path("");
+                base_url.set_query(None);
+                base_url.set_fragment(None);
+
+                // We keep the username and password intact
+                Some(Base::Remote(*base_url))
            }
            // other inputs do not have a URL to extract a base
            _ => None,
@ -101,6 +103,16 @@ mod test_base {
        assert!(Base::try_from("data:text/plain,Hello?World#").is_err());
    }

+    #[test]
+    fn test_valid_local_path_string_as_base() -> Result<()> {
+        let cases = vec!["/tmp/lychee", "/tmp/lychee/", "tmp/lychee/"];
+
+        for case in cases {
+            assert_eq!(Base::try_from(case)?, Base::Local(PathBuf::from(case)));
+        }
+        Ok(())
+    }
+
    #[test]
    fn test_valid_local() -> Result<()> {
        let dir = tempfile::tempdir().unwrap();
@ -123,7 +135,7 @@ mod test_base {
            let url = Url::parse(url).unwrap();
            let source = InputSource::RemoteUrl(Box::new(url.clone()));
            let base = Base::from_source(&source);
-            let expected = Url::parse(expected).unwrap();
+            let expected = Base::Remote(Url::parse(expected).unwrap());
            assert_eq!(base, Some(expected));
        }
    }
--- a/lychee-lib/src/types/error.rs
+++ b/lychee-lib/src/types/error.rs
@ -86,12 +86,24 @@ pub enum ErrorKind {
    #[error("Error with base dir `{0}` : {1}")]
    InvalidBase(String, String),

+    /// Cannot join the given text with the base URL
+    #[error("Cannot join '{0}' with the base URL")]
+    InvalidBaseJoin(String),
+
+    /// Cannot convert the given path to a URI
+    #[error("Cannot convert path '{0}' to a URI")]
+    InvalidPathToUri(String),
+
+    /// The given URI type is not supported
+    #[error("Unsupported URI type: '{0}'")]
+    UnsupportedUriType(String),
+
    /// The given input can not be parsed into a valid URI remapping
    #[error("Error remapping URL: `{0}`")]
    InvalidUrlRemap(String),

    /// The given path does not resolve to a valid file
-    #[error("Cannot find local file {0}")]
+    #[error("Invalid file path: {0}")]
    InvalidFile(PathBuf),

    /// Error while traversing an input directory
@ -246,6 +258,13 @@ impl PartialEq for ErrorKind {
            }
            (Self::Cookies(e1), Self::Cookies(e2)) => e1 == e2,
            (Self::InvalidFile(p1), Self::InvalidFile(p2)) => p1 == p2,
+            (Self::InvalidFilePath(u1), Self::InvalidFilePath(u2)) => u1 == u2,
+            (Self::InvalidFragment(u1), Self::InvalidFragment(u2)) => u1 == u2,
+            (Self::InvalidUrlFromPath(p1), Self::InvalidUrlFromPath(p2)) => p1 == p2,
+            (Self::InvalidBase(b1, e1), Self::InvalidBase(b2, e2)) => b1 == b2 && e1 == e2,
+            (Self::InvalidUrlRemap(r1), Self::InvalidUrlRemap(r2)) => r1 == r2,
+            (Self::EmptyUrl, Self::EmptyUrl) => true,
+
            _ => false,
        }
    }
@ -281,6 +300,9 @@ impl Hash for ErrorKind {
            Self::UnreachableEmailAddress(u, ..) => u.hash(state),
            Self::InsecureURL(u, ..) => u.hash(state),
            Self::InvalidBase(base, e) => (base, e).hash(state),
+            Self::InvalidBaseJoin(s) => s.hash(state),
+            Self::InvalidPathToUri(s) => s.hash(state),
+            Self::UnsupportedUriType(s) => s.hash(state),
            Self::InvalidUrlRemap(remap) => (remap).hash(state),
            Self::InvalidHeader(e) => e.to_string().hash(state),
            Self::InvalidGlobPattern(e) => e.to_string().hash(state),
--- a/lychee-lib/src/types/status_code/selector.rs
+++ b/lychee-lib/src/types/status_code/selector.rs
@ -5,7 +5,7 @@ use thiserror::Error;

 use crate::{types::accept::AcceptRange, AcceptRangeError};

-#[derive(Debug, Error)]
+#[derive(Debug, Error, PartialEq)]
 pub enum StatusCodeSelectorError {
    #[error("invalid/empty input")]
    InvalidInput,
--- a/lychee-lib/src/types/uri/raw.rs
+++ b/lychee-lib/src/types/uri/raw.rs
@ -19,12 +19,6 @@ pub struct RawUri {
    pub attribute: Option<String>,
 }

-impl RawUri {
-    // Taken from https://github.com/getzola/zola/blob/master/components/link_checker/src/lib.rs
-    pub(crate) fn is_anchor(&self) -> bool {
-        self.text.starts_with('#')
-    }
-}
 impl Display for RawUri {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{} (Attribute: {:?})", self.text, self.attribute)
@ -40,17 +34,3 @@ impl From<&str> for RawUri {
        }
    }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_is_anchor() {
-        let raw_uri = RawUri::from("#anchor");
-        assert!(raw_uri.is_anchor());
-
-        let raw_uri = RawUri::from("notan#anchor");
-        assert!(!raw_uri.is_anchor());
-    }
-}
--- a/lychee-lib/src/types/uri/valid.rs
+++ b/lychee-lib/src/types/uri/valid.rs
@ -378,4 +378,9 @@ mod tests {
            website("https://example.com")
        );
    }
+
+    #[test]
+    fn test_file_uri() {
+        assert!(Uri::try_from("file:///path/to/file").unwrap().is_file());
+    }
 }
--- a/lychee-lib/src/utils/request.rs
+++ b/lychee-lib/src/utils/request.rs
@ -1,4 +1,4 @@
-use log::info;
+use log::warn;
 use percent_encoding::percent_decode_str;
 use reqwest::Url;
 use std::{
@ -13,156 +13,374 @@ use crate::{
    Base, BasicAuthCredentials, ErrorKind, Request, Result, Uri,
 };

-const MAX_TRUNCATED_STR_LEN: usize = 100;
-
 /// Extract basic auth credentials for a given URL.
-fn credentials(extractor: &Option<BasicAuthExtractor>, uri: &Uri) -> Option<BasicAuthCredentials> {
+fn extract_credentials(
+    extractor: &Option<BasicAuthExtractor>,
+    uri: &Uri,
+) -> Option<BasicAuthCredentials> {
    extractor.as_ref().and_then(|ext| ext.matches(uri))
 }

+/// Create a request from a raw URI.
+fn create_request(
+    raw_uri: &RawUri,
+    source: &InputSource,
+    base: &Option<Base>,
+    extractor: &Option<BasicAuthExtractor>,
+) -> Result<Request> {
+    let uri = try_parse_into_uri(raw_uri, source, base)?;
+    let source = truncate_source(source);
+    let element = raw_uri.element.clone();
+    let attribute = raw_uri.attribute.clone();
+    let credentials = extract_credentials(extractor, &uri);
+
+    Ok(Request::new(uri, source, element, attribute, credentials))
+}
+
+/// Try to parse the raw URI into a `Uri`.
+///
+/// If the raw URI is not a valid URI, create a URI by joining the base URL with the text.
+/// If the base URL is not available, create a URI from the file path.
+///
+/// # Errors
+///
+/// - If the text (the unparsed URI represented as a `String`) cannot be joined with the base
+///   to create a valid URI.
+/// - If a URI cannot be created from the file path.
+/// - If the source is not a file path (i.e. the URI type is not supported).
+fn try_parse_into_uri(raw_uri: &RawUri, source: &InputSource, base: &Option<Base>) -> Result<Uri> {
+    let text = raw_uri.text.clone();
+    let uri = match Uri::try_from(raw_uri.clone()) {
+        Ok(uri) => uri,
+        Err(_) => match base {
+            Some(base_url) => match base_url.join(&text) {
+                Some(url) => Uri { url },
+                None => return Err(ErrorKind::InvalidBaseJoin(text.clone())),
+            },
+            None => match source {
+                InputSource::FsPath(root) => create_uri_from_file_path(root, &text, base)?,
+                _ => return Err(ErrorKind::UnsupportedUriType(text)),
+            },
+        },
+    };
+    Ok(uri)
+}
+
+// Taken from https://github.com/getzola/zola/blob/master/components/link_checker/src/lib.rs
+pub(crate) fn is_anchor(text: &str) -> bool {
+    text.starts_with('#')
+}
+
+/// Create a URI from a file path
+///
+/// # Errors
+///
+/// - If the link text is an anchor and the file name cannot be extracted from the file path.
+/// - If the path cannot be resolved.
+/// - If the resolved path cannot be converted to a URL.
+fn create_uri_from_file_path(
+    file_path: &Path,
+    link_text: &str,
+    base: &Option<Base>,
+) -> Result<Uri> {
+    let target_path = if is_anchor(link_text) {
+        // For anchors, we need to append the anchor to the file name.
+        let file_name = file_path
+            .file_name()
+            .and_then(|name| name.to_str())
+            .ok_or_else(|| ErrorKind::InvalidFile(file_path.to_path_buf()))?;
+
+        format!("{file_name}{link_text}")
+    } else {
+        link_text.to_string()
+    };
+    let Ok(constructed_url) = resolve_and_create_url(file_path, &target_path, base) else {
+        return Err(ErrorKind::InvalidPathToUri(target_path));
+    };
+    Ok(Uri {
+        url: constructed_url,
+    })
+}
+
+/// Truncate the source in case it gets too long
+///
+/// This is only needed for string inputs.
+/// For other inputs, the source is simply a "label" (an enum variant).
+// TODO: This would not be necessary if we used `Cow` for the source.
+fn truncate_source(source: &InputSource) -> InputSource {
+    const MAX_TRUNCATED_STR_LEN: usize = 100;
+
+    match source {
+        InputSource::String(s) => {
+            InputSource::String(s.chars().take(MAX_TRUNCATED_STR_LEN).collect())
+        }
+        other => other.clone(),
+    }
+}
+
 /// Create requests out of the collected URLs.
 /// Only keeps "valid" URLs. This filters out anchors for example.
+///
+/// If a URLs is ignored (because of the current settings),
+/// it will not be added to the `HashSet`.
 pub(crate) fn create(
    uris: Vec<RawUri>,
    input_content: &InputContent,
    base: &Option<Base>,
    extractor: &Option<BasicAuthExtractor>,
-) -> Result<HashSet<Request>> {
-    let base_url = Base::from_source(&input_content.source);
+) -> HashSet<Request> {
+    let base = base
+        .clone()
+        .or_else(|| Base::from_source(&input_content.source));

-    let requests: Result<Vec<Option<Request>>> = uris
-        .into_iter()
-        .map(|raw_uri| {
-            let is_anchor = raw_uri.is_anchor();
-            let text = raw_uri.text.clone();
-            let element = raw_uri.element.clone();
-            let attribute = raw_uri.attribute.clone();
-
-            // Truncate the source in case it gets too long Ideally we should
-            // avoid the initial String allocation for `source` altogether
-            let source = match &input_content.source {
-                InputSource::String(s) => {
-                    InputSource::String(s.chars().take(MAX_TRUNCATED_STR_LEN).collect())
+    uris.into_iter()
+        .filter_map(|raw_uri| {
+            match create_request(&raw_uri, &input_content.source, &base, extractor) {
+                Ok(request) => Some(request),
+                Err(e) => {
+                    warn!("Error creating request: {:?}", e);
+                    None
                }
-                // Cloning is cheap here
-                c => c.clone(),
-            };
-
-            if let Ok(uri) = Uri::try_from(raw_uri) {
-                let credentials = credentials(extractor, &uri);
-
-                Ok(Some(Request::new(
-                    uri,
-                    source,
-                    element,
-                    attribute,
-                    credentials,
-                )))
-            } else if let Some(url) = base.as_ref().and_then(|u| u.join(&text)) {
-                let uri = Uri { url };
-                let credentials = credentials(extractor, &uri);
-
-                Ok(Some(Request::new(
-                    uri,
-                    source,
-                    element,
-                    attribute,
-                    credentials,
-                )))
-            } else if let InputSource::FsPath(root) = &input_content.source {
-                let path = if is_anchor {
-                    match root.file_name() {
-                        Some(file_name) => match file_name.to_str() {
-                            Some(valid_str) => valid_str.to_string() + &text,
-                            None => return Err(ErrorKind::InvalidFile(root.clone())),
-                        },
-                        None => return Err(ErrorKind::InvalidFile(root.clone())),
-                    }
-                } else {
-                    text
-                };
-
-                if let Some(url) = create_uri_from_path(root, &path, base)? {
-                    let uri = Uri { url };
-                    let credentials = credentials(extractor, &uri);
-
-                    Ok(Some(Request::new(
-                        uri,
-                        source,
-                        element,
-                        attribute,
-                        credentials,
-                    )))
-                } else {
-                    // In case we cannot create a URI from a path but we didn't receive an error,
-                    // it means that some preconditions were not met, e.g. the `base_url` wasn't set.
-                    Ok(None)
-                }
-            } else if let Some(url) = construct_url(&base_url, &text) {
-                if base.is_some() {
-                    Ok(None)
-                } else {
-                    let uri = Uri { url: url? };
-                    let credentials = credentials(extractor, &uri);
-
-                    Ok(Some(Request::new(
-                        uri,
-                        source,
-                        element,
-                        attribute,
-                        credentials,
-                    )))
-                }
-            } else {
-                info!("Handling of `{}` not implemented yet", text);
-                Ok(None)
            }
        })
-        .collect();
-    let requests: Vec<Request> = requests?.into_iter().flatten().collect();
-    Ok(HashSet::from_iter(requests))
+        .collect()
 }

-fn construct_url(base: &Option<Url>, text: &str) -> Option<Result<Url>> {
-    base.as_ref().map(|base| {
-        base.join(text)
-            .map_err(|e| ErrorKind::ParseUrl(e, format!("{base}{text}")))
-    })
-}
+/// Create a URI from a path
+///
+/// `src_path` is the path of the source file.
+/// `dest_path` is the path being linked to.
+/// The optional `base_uri` specifies the base URI to resolve the destination path against.
+///
+/// # Errors
+///
+/// - If the percent-decoded destination path cannot be decoded as UTF-8.
+/// - The path cannot be resolved
+/// - The resolved path cannot be converted to a URL.
+fn resolve_and_create_url(
+    src_path: &Path,
+    dest_path: &str,
+    base_uri: &Option<Base>,
+) -> Result<Url> {
+    let (dest_path, fragment) = url::remove_get_params_and_separate_fragment(dest_path);

-fn create_uri_from_path(src: &Path, dst: &str, base: &Option<Base>) -> Result<Option<Url>> {
-    let (dst, frag) = url::remove_get_params_and_separate_fragment(dst);
-    // Avoid double-encoding already encoded destination paths by removing any
-    // potential encoding (e.g. `web%20site` becomes `web site`).
-    // That's because Url::from_file_path will encode the full URL in the end.
-    // This behavior cannot be configured.
-    // See https://github.com/lycheeverse/lychee/pull/262#issuecomment-915245411
-    // TODO: This is not a perfect solution.
-    // Ideally, only `src` and `base` should be URL encoded (as is done by
-    // `from_file_path` at the moment) while `dst` gets left untouched and simply
-    // appended to the end.
-    let decoded = percent_decode_str(dst).decode_utf8()?;
-    let resolved = path::resolve(src, &PathBuf::from(&*decoded), base)?;
-    match resolved {
-        Some(path) => Url::from_file_path(&path)
-            .map(|mut url| {
-                url.set_fragment(frag);
-                url
-            })
-            .map(Some)
-            .map_err(|_e| ErrorKind::InvalidUrlFromPath(path)),
-        None => Ok(None),
-    }
+    // Decode the destination path to avoid double-encoding
+    // This addresses the issue mentioned in the original comment about double-encoding
+    let decoded_dest = percent_decode_str(dest_path).decode_utf8()?;
+
+    let Ok(Some(resolved_path)) = path::resolve(src_path, &PathBuf::from(&*decoded_dest), base_uri)
+    else {
+        return Err(ErrorKind::InvalidPathToUri(decoded_dest.to_string()));
+    };
+
+    let Ok(mut url) = Url::from_file_path(&resolved_path) else {
+        return Err(ErrorKind::InvalidUrlFromPath(resolved_path.clone()));
+    };
+
+    url.set_fragment(fragment);
+    Ok(url)
 }

 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::types::FileType;
+
+    #[test]
+    fn test_is_anchor() {
+        assert!(is_anchor("#anchor"));
+        assert!(!is_anchor("notan#anchor"));
+    }

    #[test]
    fn test_create_uri_from_path() {
        let result =
-            create_uri_from_path(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap();
-        assert_eq!(result.unwrap().as_str(), "file:///test+encoding");
+            resolve_and_create_url(&PathBuf::from("/README.md"), "test+encoding", &None).unwrap();
+        assert_eq!(result.as_str(), "file:///test+encoding");
+    }
+
+    fn create_input(content: &str, file_type: FileType) -> InputContent {
+        InputContent {
+            content: content.to_string(),
+            file_type,
+            source: InputSource::String(content.to_string()),
+        }
+    }
+
+    #[test]
+    fn test_relative_url_resolution() {
+        let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+        let input = create_input(
+            r#"<a href="relative.html">Relative Link</a>"#,
+            FileType::Html,
+        );
+
+        let uris = vec![RawUri::from("relative.html")];
+        let requests = create(uris, &input, &base, &None);
+
+        assert_eq!(requests.len(), 1);
+        assert!(requests
+            .iter()
+            .any(|r| r.uri.url.as_str() == "https://example.com/path/relative.html"));
+    }
+
+    #[test]
+    fn test_absolute_url_resolution() {
+        let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+        let input = create_input(
+            r#"<a href="https://another.com/page">Absolute Link</a>"#,
+            FileType::Html,
+        );
+
+        let uris = vec![RawUri::from("https://another.com/page")];
+        let requests = create(uris, &input, &base, &None);
+
+        assert_eq!(requests.len(), 1);
+        assert!(requests
+            .iter()
+            .any(|r| r.uri.url.as_str() == "https://another.com/page"));
+    }
+
+    #[test]
+    fn test_root_relative_url_resolution() {
+        let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+        let input = create_input(
+            r#"<a href="/root-relative">Root Relative Link</a>"#,
+            FileType::Html,
+        );
+
+        let uris = vec![RawUri::from("/root-relative")];
+        let requests = create(uris, &input, &base, &None);
+
+        assert_eq!(requests.len(), 1);
+        assert!(requests
+            .iter()
+            .any(|r| r.uri.url.as_str() == "https://example.com/root-relative"));
+    }
+
+    #[test]
+    fn test_parent_directory_url_resolution() {
+        let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+        let input = create_input(
+            r#"<a href="../parent">Parent Directory Link</a>"#,
+            FileType::Html,
+        );
+
+        let uris = vec![RawUri::from("../parent")];
+        let requests = create(uris, &input, &base, &None);
+
+        assert_eq!(requests.len(), 1);
+        assert!(requests
+            .iter()
+            .any(|r| r.uri.url.as_str() == "https://example.com/parent"));
+    }
+
+    #[test]
+    fn test_fragment_url_resolution() {
+        let base = Some(Base::try_from("https://example.com/path/page.html").unwrap());
+        let input = create_input(r##"<a href="#fragment">Fragment Link</a>"##, FileType::Html);
+
+        let uris = vec![RawUri::from("#fragment")];
+        let requests = create(uris, &input, &base, &None);
+
+        assert_eq!(requests.len(), 1);
+        assert!(requests
+            .iter()
+            .any(|r| r.uri.url.as_str() == "https://example.com/path/page.html#fragment"));
+    }
+
+    #[test]
+    fn test_no_base_url_resolution() {
+        let base = None;
+        let input = create_input(
+            r#"<a href="https://example.com/page">Absolute Link</a>"#,
+            FileType::Html,
+        );
+
+        let uris = vec![RawUri::from("https://example.com/page")];
+        let requests = create(uris, &input, &base, &None);
+
+        assert_eq!(requests.len(), 1);
+        assert!(requests
+            .iter()
+            .any(|r| r.uri.url.as_str() == "https://example.com/page"));
+    }
+
+    #[test]
+    fn test_create_request_from_relative_file_path() {
+        let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+        let input_source = InputSource::FsPath(PathBuf::from("page.html"));
+
+        let actual =
+            create_request(&RawUri::from("file.html"), &input_source, &base, &None).unwrap();
+
+        assert_eq!(
+            actual,
+            Request::new(
+                Uri {
+                    url: Url::from_file_path("/tmp/lychee/file.html").unwrap()
+                },
+                input_source,
+                None,
+                None,
+                None,
+            )
+        );
+    }
+
+    #[test]
+    fn test_create_request_from_absolute_file_path() {
+        let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+        let input_source = InputSource::FsPath(PathBuf::from("/tmp/lychee/page.html"));
+
+        // Use an absolute path that's outside the base directory
+        let actual = create_request(
+            &RawUri::from("/usr/local/share/doc/example.html"),
+            &input_source,
+            &base,
+            &None,
+        )
+        .unwrap();
+
+        assert_eq!(
+            actual,
+            Request::new(
+                Uri {
+                    url: Url::from_file_path("/usr/local/share/doc/example.html").unwrap()
+                },
+                input_source,
+                None,
+                None,
+                None,
+            )
+        );
+    }
+
+    #[test]
+    fn test_parse_relative_path_into_uri() {
+        let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+        let input = create_input(
+            r#"<a href="relative.html">Relative Link</a>"#,
+            FileType::Html,
+        );
+
+        let raw_uri = RawUri::from("relative.html");
+        let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap();
+
+        assert_eq!(uri.url.as_str(), "file:///tmp/lychee/relative.html");
+    }
+
+    #[test]
+    fn test_parse_absolute_path_into_uri() {
+        let base = Some(Base::Local(PathBuf::from("/tmp/lychee")));
+        let input = create_input(
+            r#"<a href="/absolute.html">Absolute Link</a>"#,
+            FileType::Html,
+        );
+
+        let raw_uri = RawUri::from("absolute.html");
+        let uri = try_parse_into_uri(&raw_uri, &input.source, &base).unwrap();
+
+        assert_eq!(uri.url.as_str(), "file:///tmp/lychee/absolute.html");
    }
 }