Merge branch 'master' of github.com:lycheeverse/lychee

This commit is contained in:
Matthias 2022-06-21 10:03:35 +02:00
commit 0ad2bc51b7
8 changed files with 175 additions and 56 deletions

4
.devcontainer/Dockerfile Normal file
View file

@ -0,0 +1,4 @@
# See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.236.0/containers/rust/.devcontainer/base.Dockerfile
# [Choice] Debian OS version (use bullseye on local arm64/Apple Silicon): buster, bullseye
ARG VARIANT="buster"
FROM mcr.microsoft.com/vscode/devcontainers/rust:0-${VARIANT}

View file

@ -0,0 +1,57 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.236.0/containers/rust
{
"name": "Rust",
"build": {
"dockerfile": "Dockerfile",
"args": {
// Use the VARIANT arg to pick a Debian OS version: buster, bullseye
// Use bullseye when on local on arm64/Apple Silicon.
"VARIANT": "bullseye"
}
},
"runArgs": [
"--cap-add=SYS_PTRACE",
"--security-opt",
"seccomp=unconfined"
],
// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
// Set *default* container specific settings.json values on container create.
"settings": {
"lldb.executable": "/usr/bin/lldb",
// VS Code don't watch files under ./target
"files.watcherExclude": {
"**/target/**": true
},
"rust-analyzer.checkOnSave.command": "clippy"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"vadimcn.vscode-lldb",
"mutantdino.resourcemonitor",
"matklad.rust-analyzer",
"tamasfe.even-better-toml",
"serayuzgur.crates",
"vscodevim.vim"
]
}
},
// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],
// Use 'postCreateCommand' to run commands after the container is created.
// "postCreateCommand": "rustc --version",
// Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "vscode",
"features": {
"git": "latest",
"github-cli": "latest"
}
}

132
Cargo.lock generated
View file

@ -37,9 +37,9 @@ dependencies = [
[[package]]
name = "anyhow"
version = "1.0.57"
version = "1.0.58"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08f9b8508dccb7687a1d6c4ce66b2b0ecef467c94667de27d8d7fe1f8d2a9cdc"
checksum = "bb07d2053ccdbe10e2af2995a2f116c1330396493dc1269f6a91d0ae82e19704"
[[package]]
name = "arc-swap"
@ -267,7 +267,7 @@ dependencies = [
"futures-io",
"futures-util",
"pin-utils",
"trust-dns-resolver",
"trust-dns-resolver 0.20.4",
]
[[package]]
@ -450,9 +450,9 @@ checksum = "c1db59621ec70f09c5e9b597b220c7a2b43611f4710dc03ceb8748637775692c"
[[package]]
name = "cached"
version = "0.34.0"
version = "0.34.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aadf76ddea74bab35ebeb8f1eb115b9bc04eaee42d8acc0d5f477dee6b176c9a"
checksum = "12f5cd208ba696f870238022d81ca1d80ed9d696fd62341c747f2d8f6ecdd9fe"
dependencies = [
"async-trait",
"async_once",
@ -525,7 +525,7 @@ dependencies = [
"reqwest",
"serde",
"serde_json",
"trust-dns-proto",
"trust-dns-proto 0.20.4",
]
[[package]]
@ -964,6 +964,18 @@ dependencies = [
"syn",
]
[[package]]
name = "enum-as-inner"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73"
dependencies = [
"heck 0.4.0",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "event-listener"
version = "2.5.2"
@ -1274,7 +1286,7 @@ dependencies = [
"indexmap",
"slab",
"tokio",
"tokio-util 0.7.2",
"tokio-util",
"tracing",
]
@ -1580,11 +1592,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f7e2f18aece9709094573a9f24f483c4f65caa4298e2f7ae1b71cc65d853fad7"
dependencies = [
"socket2 0.3.19",
"widestring",
"widestring 0.4.3",
"winapi",
"winreg 0.6.2",
]
[[package]]
name = "ipconfig"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "723519edce41262b05d4143ceb95050e4c614f483e78e9fd9e39a8275a84ad98"
dependencies = [
"socket2 0.4.4",
"widestring 0.5.1",
"winapi",
"winreg 0.7.0",
]
[[package]]
name = "ipnet"
version = "2.5.0"
@ -2610,9 +2634,9 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.11.10"
version = "0.11.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46a1f7aa4f35e5e8b4160449f51afc758f0ce6454315a9fa7d0d113e958c41eb"
checksum = "b75aa69a3f06bbcc66ede33af2af253c6f7a86b1ca0033f60c580a27074fbf92"
dependencies = [
"async-compression",
"base64",
@ -2639,8 +2663,9 @@ dependencies = [
"tokio",
"tokio-native-tls",
"tokio-socks",
"tokio-util 0.6.10",
"trust-dns-resolver",
"tokio-util",
"tower-service",
"trust-dns-resolver 0.21.2",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
@ -3290,20 +3315,6 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.6.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507"
dependencies = [
"bytes",
"futures-core",
"futures-sink",
"log",
"pin-project-lite",
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.7.2"
@ -3390,7 +3401,32 @@ dependencies = [
"async-trait",
"cfg-if",
"data-encoding",
"enum-as-inner",
"enum-as-inner 0.3.4",
"futures-channel",
"futures-io",
"futures-util",
"idna",
"ipnet",
"lazy_static",
"log",
"rand 0.8.5",
"smallvec",
"thiserror",
"tinyvec",
"tokio",
"url",
]
[[package]]
name = "trust-dns-proto"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c31f240f59877c3d4bb3b3ea0ec5a6a0cff07323580ff8c7a605cd7d08b255d"
dependencies = [
"async-trait",
"cfg-if",
"data-encoding",
"enum-as-inner 0.4.0",
"futures-channel",
"futures-io",
"futures-util",
@ -3414,7 +3450,7 @@ checksum = "ecae383baad9995efaa34ce8e57d12c3f305e545887472a492b838f4b5cfb77a"
dependencies = [
"cfg-if",
"futures-util",
"ipconfig",
"ipconfig 0.2.2",
"lazy_static",
"log",
"lru-cache",
@ -3422,8 +3458,27 @@ dependencies = [
"resolv-conf",
"smallvec",
"thiserror",
"trust-dns-proto 0.20.4",
]
[[package]]
name = "trust-dns-resolver"
version = "0.21.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4ba72c2ea84515690c9fcef4c6c660bb9df3036ed1051686de84605b74fd558"
dependencies = [
"cfg-if",
"futures-util",
"ipconfig 0.3.0",
"lazy_static",
"log",
"lru-cache",
"parking_lot 0.12.0",
"resolv-conf",
"smallvec",
"thiserror",
"tokio",
"trust-dns-proto",
"trust-dns-proto 0.21.2",
]
[[package]]
@ -3524,9 +3579,9 @@ checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9"
[[package]]
name = "uuid"
version = "1.1.1"
version = "1.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c6d5d669b51467dcf7b2f1a796ce0f955f05f01cafda6c19d6e95f730df29238"
checksum = "dd6469f4314d5f1ffec476e05f17cc9a78bc7a27a6a857842170bdf8d6f98d2f"
dependencies = [
"getrandom 0.2.6",
]
@ -3710,6 +3765,12 @@ version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c168940144dd21fd8046987c16a46a33d5fc84eec29ef9dcddc2ac9e31526b7c"
[[package]]
name = "widestring"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983"
[[package]]
name = "winapi"
version = "0.3.9"
@ -3793,6 +3854,15 @@ dependencies = [
"winapi",
]
[[package]]
name = "winreg"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69"
dependencies = [
"winapi",
]
[[package]]
name = "winreg"
version = "0.10.1"

View file

@ -12,4 +12,4 @@ lychee-lib = { path = "../../lychee-lib", version = "0.10.0" }
tokio = { version = "1.19.2", features = ["full"] }
regex = "1.5.6"
http = "0.2.8"
reqwest = { version = "0.11.10", features = ["gzip"] }
reqwest = { version = "0.11.11", features = ["gzip"] }

View file

@ -13,4 +13,4 @@ tokio = { version = "1.19.2", features = ["full"] }
regex = "1.5.6"
http = "0.2.8"
tokio-stream = "0.1.9"
reqwest = { version = "0.11.10", features = ["gzip"] }
reqwest = { version = "0.11.11", features = ["gzip"] }

View file

@ -18,7 +18,7 @@ version = "0.10.0"
[dependencies]
lychee-lib = { path = "../lychee-lib", version = "0.10.0", default-features = false }
anyhow = "1.0.57"
anyhow = "1.0.58"
console = "0.15.0"
const_format = "0.2.24"
headers = "0.3.7"
@ -27,7 +27,7 @@ indicatif = "0.16.2"
openssl-sys = "0.9.74"
pad = "0.1.6"
regex = "1.5.6"
reqwest = { version = "0.11.10", features = ["gzip"] }
reqwest = { version = "0.11.11", features = ["gzip"] }
# Make build work on Apple Silicon.
# See https://github.com/briansmith/ring/issues/1163
# This is necessary for the homebrew build
@ -54,7 +54,7 @@ log = "0.4.17"
assert_cmd = "2.0.4"
predicates = "2.1.1"
tempfile = "3.3.0"
uuid = { version = "1.1.1", features = ["v4"] }
uuid = { version = "1.1.2", features = ["v4"] }
wiremock = "0.5.13"
tracing-subscriber = { version = "0.3.11", default-features = false, features = ["fmt", "registry", "env-filter"] }

View file

@ -27,7 +27,7 @@ pulldown-cmark = "0.9.1"
regex = "1.5.6"
# Use trust-dns to avoid lookup failures on high concurrency
# https://github.com/seanmonstar/reqwest/issues/296
reqwest = { version = "0.11.10", features = ["gzip", "trust-dns"] }
reqwest = { version = "0.11.11", features = ["gzip", "trust-dns"] }
# Make build work on Apple Silicon.
# See https://github.com/briansmith/ring/issues/1163
# This is necessary for the homebrew build
@ -43,7 +43,7 @@ path-clean = "0.1.0"
percent-encoding = "2.1.0"
async-stream = "0.3.3"
jwalk = "0.6.0"
cached = "0.34.0"
cached = "0.34.1"
once_cell = "1.12.0"
thiserror = "1.0.31"
futures = "0.3.21"

View file

@ -1,5 +1,3 @@
use std::collections::HashSet;
use crate::types::{uri::raw::RawUri, FileType, InputContent};
mod html5ever;
@ -8,26 +6,16 @@ mod markdown;
mod plaintext;
use markdown::extract_markdown;
use once_cell::sync::Lazy;
use plaintext::extract_plaintext;
/// HTML elements that are deemed verbatim (i.e. preformatted).
/// Check if the given element is in the list of preformatted ("verbatim") tags.
///
/// These will be excluded from link checking by default.
static VERBATIM_ELEMENTS: Lazy<HashSet<String>> = Lazy::new(|| {
HashSet::from_iter([
"pre".into(),
"code".into(),
"textarea".into(),
"samp".into(),
"xmp".into(),
"plaintext".into(),
"listing".into(),
])
});
/// Check if the given element is in the list of preformatted tags
pub(crate) fn is_verbatim_elem(name: &str) -> bool {
VERBATIM_ELEMENTS.contains(name)
matches!(
name,
"pre" | "code" | "textarea" | "samp" | "xmp" | "plaintext" | "listing"
)
}
/// A handler for extracting links from various input formats like Markdown and