From 208fa80aa6c5fc08762f93cfea0339b2d4a64fb6 Mon Sep 17 00:00:00 2001 From: Keming Date: Sat, 24 May 2025 03:50:26 +0800 Subject: [PATCH] fix: only check the fragment when it's a file (#1713) * fix: only check the fragment when it's a file * add dir fragment test * Clean up unused fragment_check in Client --------- Signed-off-by: Keming Co-authored-by: Matthias --- fixtures/fragments/empty_dir/.gitkeep | 0 fixtures/fragments/file1.md | 2 ++ lychee-bin/tests/cli.rs | 4 ++-- lychee-lib/src/checker/file.rs | 7 +++--- lychee-lib/src/client.rs | 27 ++---------------------- lychee-lib/src/utils/fragment_checker.rs | 6 ++++-- 6 files changed, 14 insertions(+), 32 deletions(-) create mode 100644 fixtures/fragments/empty_dir/.gitkeep diff --git a/fixtures/fragments/empty_dir/.gitkeep b/fixtures/fragments/empty_dir/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fixtures/fragments/file1.md b/fixtures/fragments/file1.md index 389e75b..d4ec6f5 100644 --- a/fixtures/fragments/file1.md +++ b/fixtures/fragments/file1.md @@ -65,3 +65,5 @@ without related HTML element. Browser will scroll to the top of the page. ##### Lets wear a hat: ĂȘtre A link to the non-existing fragment: [try](https://github.com/lycheeverse/lychee#non-existent-anchor). + +Skip the fragment check for directories like: [empty](empty_dir/). diff --git a/lychee-bin/tests/cli.rs b/lychee-bin/tests/cli.rs index ae6e7c8..b5cc108 100644 --- a/lychee-bin/tests/cli.rs +++ b/lychee-bin/tests/cli.rs @@ -1843,8 +1843,8 @@ mod cli { .stderr(contains( "https://github.com/lycheeverse/lychee#non-existent-anchor", )) - .stdout(contains("27 Total")) - .stdout(contains("22 OK")) + .stdout(contains("28 Total")) + .stdout(contains("23 OK")) // 4 failures because of missing fragments .stdout(contains("5 Errors")); } diff --git a/lychee-lib/src/checker/file.rs b/lychee-lib/src/checker/file.rs index 60c823f..8327067 100644 --- a/lychee-lib/src/checker/file.rs +++ b/lychee-lib/src/checker/file.rs @@ -123,7 +123,8 @@ impl FileChecker { /// /// Returns a `Status` indicating the result of the check. async fn check_existing_path(&self, path: &Path, uri: &Uri) -> Status { - if self.include_fragments { + // only files can contain content with fragments + if self.include_fragments && path.is_file() { self.check_fragment(path, uri).await } else { Status::Ok(StatusCode::OK) @@ -175,12 +176,12 @@ impl FileChecker { Ok(true) => Status::Ok(StatusCode::OK), Ok(false) => ErrorKind::InvalidFragment(uri.clone()).into(), Err(err) => { - warn!("Skipping fragment check due to the following error: {err}"); + warn!("Skipping fragment check for {uri} due to the following error: {err}"); Status::Ok(StatusCode::OK) } }, Err(err) => { - warn!("Skipping fragment check due to the following error: {err}"); + warn!("Skipping fragment check for {uri} due to the following error: {err}"); Status::Ok(StatusCode::OK) } } diff --git a/lychee-lib/src/client.rs b/lychee-lib/src/client.rs index fc281b4..f80259c 100644 --- a/lychee-lib/src/client.rs +++ b/lychee-lib/src/client.rs @@ -13,13 +13,13 @@ clippy::default_trait_access, clippy::used_underscore_binding )] -use std::{collections::HashSet, path::Path, sync::Arc, time::Duration}; +use std::{collections::HashSet, sync::Arc, time::Duration}; use http::{ header::{HeaderMap, HeaderValue}, StatusCode, }; -use log::{debug, warn}; +use log::debug; use octocrab::Octocrab; use regex::RegexSet; use reqwest::{header, redirect, tls}; @@ -32,7 +32,6 @@ use crate::{ checker::{file::FileChecker, mail::MailChecker, website::WebsiteChecker}, filter::{Excludes, Filter, Includes}, remap::Remaps, - utils::fragment_checker::{FragmentChecker, FragmentInput}, Base, BasicAuthCredentials, ErrorKind, Request, Response, Result, Status, Uri, }; @@ -410,7 +409,6 @@ impl ClientBuilder { self.fallback_extensions, self.include_fragments, ), - fragment_checker: FragmentChecker::new(), }) } } @@ -435,9 +433,6 @@ pub struct Client { /// A checker for email URLs. email_checker: MailChecker, - - /// Caches Fragments - fragment_checker: FragmentChecker, } impl Client { @@ -535,24 +530,6 @@ impl Client { pub async fn check_mail(&self, uri: &Uri) -> Status { self.email_checker.check_mail(uri).await } - - /// Checks a `file` URI's fragment. - pub async fn check_fragment(&self, path: &Path, uri: &Uri) -> Status { - match FragmentInput::from_path(path).await { - Ok(input) => match self.fragment_checker.check(input, &uri.url).await { - Ok(true) => Status::Ok(StatusCode::OK), - Ok(false) => ErrorKind::InvalidFragment(uri.clone()).into(), - Err(err) => { - warn!("Skipping fragment check due to the following error: {err}"); - Status::Ok(StatusCode::OK) - } - }, - Err(err) => { - warn!("Skipping fragment check due to the following error: {err}"); - Status::Ok(StatusCode::OK) - } - } - } } /// A shorthand function to check a single URI. diff --git a/lychee-lib/src/utils/fragment_checker.rs b/lychee-lib/src/utils/fragment_checker.rs index c86b1f4..fdcd998 100644 --- a/lychee-lib/src/utils/fragment_checker.rs +++ b/lychee-lib/src/utils/fragment_checker.rs @@ -6,7 +6,7 @@ use std::{ use crate::{ extract::{html::html5gum::extract_html_fragments, markdown::extract_markdown_fragments}, - types::FileType, + types::{ErrorKind, FileType}, Result, }; use percent_encoding::percent_decode_str; @@ -21,7 +21,9 @@ pub(crate) struct FragmentInput { impl FragmentInput { pub(crate) async fn from_path(path: &Path) -> Result { - let content = fs::read_to_string(path).await?; + let content = fs::read_to_string(path) + .await + .map_err(|err| ErrorKind::ReadFileInput(err, path.to_path_buf()))?; let file_type = FileType::from(path); Ok(Self { content, file_type }) }