mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-17 05:00:26 +00:00
Updated pulldown-cmark dependency and fixed maths parsing (#1473)
* Update pulldown-cmark version to 0.11.0 * Fix markdown math parsing * Fix lints * Disable flaky wayback test --------- Co-authored-by: Matthias <matthias@endler.dev>
This commit is contained in:
parent
66ee7ef93c
commit
4bb8a61545
11 changed files with 289 additions and 79 deletions
6
.gitignore
vendored
6
.gitignore
vendored
|
|
@ -10,11 +10,10 @@ Cargo.lock
|
|||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# IntelliJ generated files
|
||||
# IDE generated files
|
||||
*.idea
|
||||
|
||||
# VSCode files
|
||||
.vscode
|
||||
.zed
|
||||
|
||||
# Lychee-specific files
|
||||
.lycheecache
|
||||
|
|
@ -24,3 +23,4 @@ Cargo.lock
|
|||
|
||||
# Other
|
||||
cookies.json
|
||||
|
||||
|
|
|
|||
230
Cargo.lock
generated
230
Cargo.lock
generated
|
|
@ -455,6 +455,53 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1fdabc7756949593fe60f30ec81974b613357de856987752631dea1e3394c80"
|
||||
|
||||
[[package]]
|
||||
name = "axum"
|
||||
version = "0.7.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"axum-core",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"itoa",
|
||||
"matchit",
|
||||
"memchr",
|
||||
"mime",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"serde",
|
||||
"sync_wrapper 1.0.1",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "axum-core"
|
||||
version = "0.4.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3"
|
||||
dependencies = [
|
||||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper 0.1.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "backtrace"
|
||||
version = "0.3.71"
|
||||
|
|
@ -579,6 +626,12 @@ version = "0.6.8"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
|
||||
|
||||
[[package]]
|
||||
name = "bytes"
|
||||
version = "1.6.0"
|
||||
|
|
@ -801,6 +854,45 @@ dependencies = [
|
|||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console-api"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86ed14aa9c9f927213c6e4f3ef75faaad3406134efe84ba2cb7983431d5f0931"
|
||||
dependencies = [
|
||||
"futures-core",
|
||||
"prost",
|
||||
"prost-types",
|
||||
"tonic",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "console-subscriber"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2e3a111a37f3333946ebf9da370ba5c5577b18eb342ec683eb488dd21980302"
|
||||
dependencies = [
|
||||
"console-api",
|
||||
"crossbeam-channel",
|
||||
"crossbeam-utils",
|
||||
"futures-task",
|
||||
"hdrhistogram",
|
||||
"humantime",
|
||||
"hyper-util",
|
||||
"prost",
|
||||
"prost-types",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"thread_local",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tonic",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_format"
|
||||
version = "0.2.32"
|
||||
|
|
@ -1764,6 +1856,19 @@ dependencies = [
|
|||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hdrhistogram"
|
||||
version = "7.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "765c9198f173dd59ce26ff9f95ef0aafd0a0fe01fb9d72841bc5066a4c06511d"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"byteorder",
|
||||
"flate2",
|
||||
"nom",
|
||||
"num-traits",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "headers"
|
||||
version = "0.4.0"
|
||||
|
|
@ -1956,9 +2061,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.3.1"
|
||||
version = "1.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe575dd17d0862a9a33781c8c4696a55c320909004a67a00fb286ba8b1bc496d"
|
||||
checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
|
|
@ -1983,7 +2088,7 @@ checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c"
|
|||
dependencies = [
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"log",
|
||||
"rustls",
|
||||
|
|
@ -2000,7 +2105,7 @@ version = "0.5.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3203a961e5c83b6f5498933e78b6b263e208c197b63e9c6c53cc82ffd3f63793"
|
||||
dependencies = [
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"pin-project-lite",
|
||||
"tokio",
|
||||
|
|
@ -2028,7 +2133,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
|
|||
dependencies = [
|
||||
"bytes",
|
||||
"http-body-util",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
|
|
@ -2038,16 +2143,16 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hyper-util"
|
||||
version = "0.1.3"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa"
|
||||
checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"pin-project-lite",
|
||||
"socket2 0.5.6",
|
||||
"tokio",
|
||||
|
|
@ -2378,6 +2483,7 @@ dependencies = [
|
|||
"assert_cmd",
|
||||
"clap",
|
||||
"console",
|
||||
"console-subscriber",
|
||||
"const_format",
|
||||
"csv",
|
||||
"dashmap",
|
||||
|
|
@ -2430,7 +2536,7 @@ dependencies = [
|
|||
"html5ever",
|
||||
"html5gum",
|
||||
"http 1.1.0",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"ip_network",
|
||||
"jwalk",
|
||||
"linkify",
|
||||
|
|
@ -2512,6 +2618,12 @@ version = "0.1.10"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2532096657941c2fea9c289d370a250971c689d4f143798ff67113ec042024a5"
|
||||
|
||||
[[package]]
|
||||
name = "matchit"
|
||||
version = "0.7.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94"
|
||||
|
||||
[[package]]
|
||||
name = "md5"
|
||||
version = "0.7.0"
|
||||
|
|
@ -2683,7 +2795,7 @@ dependencies = [
|
|||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"hyper-rustls",
|
||||
"hyper-timeout",
|
||||
"hyper-util",
|
||||
|
|
@ -3128,6 +3240,38 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e13db3d3fde688c61e2446b4d843bc27a7e8af269a69440c0308021dc92333cc"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"prost-derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-derive"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "18bec9b0adc4eba778b33684b7ba3e7137789434769ee3ce3930463ef904cfca"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"itertools",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "prost-types"
|
||||
version = "0.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cee5168b05f49d4b0ca581206eb14a7b22fafd963efe729ac48eb03266e25cc2"
|
||||
dependencies = [
|
||||
"prost",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "psl-types"
|
||||
version = "2.0.11"
|
||||
|
|
@ -3146,16 +3290,23 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "pulldown-cmark"
|
||||
version = "0.9.6"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b"
|
||||
checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
|
||||
dependencies = [
|
||||
"bitflags 2.5.0",
|
||||
"getopts",
|
||||
"memchr",
|
||||
"pulldown-cmark-escape",
|
||||
"unicase",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pulldown-cmark-escape"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"
|
||||
|
||||
[[package]]
|
||||
name = "pwned"
|
||||
version = "0.5.0"
|
||||
|
|
@ -3334,7 +3485,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 0.1.2",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
|
|
@ -3365,7 +3516,7 @@ dependencies = [
|
|||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"hyper-rustls",
|
||||
"hyper-tls 0.6.0",
|
||||
"hyper-util",
|
||||
|
|
@ -3384,7 +3535,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 0.1.2",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
|
|
@ -3989,6 +4140,12 @@ version = "0.1.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394"
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.5.1"
|
||||
|
|
@ -4165,6 +4322,7 @@ dependencies = [
|
|||
"signal-hook-registry",
|
||||
"socket2 0.5.6",
|
||||
"tokio-macros",
|
||||
"tracing",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
|
|
@ -4271,6 +4429,36 @@ dependencies = [
|
|||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tonic"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38659f4a91aba8598d27821589f5db7dddd94601e7a01b1e485a50e5484c7401"
|
||||
dependencies = [
|
||||
"async-stream",
|
||||
"async-trait",
|
||||
"axum",
|
||||
"base64 0.22.0",
|
||||
"bytes",
|
||||
"h2 0.4.4",
|
||||
"http 1.1.0",
|
||||
"http-body 1.0.0",
|
||||
"http-body-util",
|
||||
"hyper 1.4.1",
|
||||
"hyper-timeout",
|
||||
"hyper-util",
|
||||
"percent-encoding",
|
||||
"pin-project",
|
||||
"prost",
|
||||
"socket2 0.5.6",
|
||||
"tokio",
|
||||
"tokio-stream",
|
||||
"tower",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tower"
|
||||
version = "0.4.13"
|
||||
|
|
@ -4279,8 +4467,11 @@ checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c"
|
|||
dependencies = [
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"indexmap 1.9.3",
|
||||
"pin-project",
|
||||
"pin-project-lite",
|
||||
"rand",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower-layer",
|
||||
|
|
@ -4350,6 +4541,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -4524,6 +4716,12 @@ dependencies = [
|
|||
"getrandom",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "value-bag"
|
||||
version = "1.8.1"
|
||||
|
|
@ -4885,7 +5083,7 @@ dependencies = [
|
|||
"futures",
|
||||
"http 1.1.0",
|
||||
"http-body-util",
|
||||
"hyper 1.3.1",
|
||||
"hyper 1.4.1",
|
||||
"hyper-util",
|
||||
"log",
|
||||
"once_cell",
|
||||
|
|
|
|||
36
README.md
36
README.md
|
|
@ -301,7 +301,7 @@ Arguments:
|
|||
Options:
|
||||
-c, --config <CONFIG_FILE>
|
||||
Configuration file to use
|
||||
|
||||
|
||||
[default: lychee.toml]
|
||||
|
||||
-v, --verbose...
|
||||
|
|
@ -319,7 +319,7 @@ Options:
|
|||
|
||||
--max-cache-age <MAX_CACHE_AGE>
|
||||
Discard all cached requests older than this duration
|
||||
|
||||
|
||||
[default: 1d]
|
||||
|
||||
--dump
|
||||
|
|
@ -330,7 +330,7 @@ Options:
|
|||
|
||||
--archive <ARCHIVE>
|
||||
Specify the use of a specific web archive. Can be used in combination with `--suggest`
|
||||
|
||||
|
||||
[possible values: wayback]
|
||||
|
||||
--suggest
|
||||
|
|
@ -338,17 +338,17 @@ Options:
|
|||
|
||||
-m, --max-redirects <MAX_REDIRECTS>
|
||||
Maximum number of allowed redirects
|
||||
|
||||
|
||||
[default: 5]
|
||||
|
||||
--max-retries <MAX_RETRIES>
|
||||
Maximum number of retries per request
|
||||
|
||||
|
||||
[default: 3]
|
||||
|
||||
--max-concurrency <MAX_CONCURRENCY>
|
||||
Maximum number of concurrent network requests
|
||||
|
||||
|
||||
[default: 128]
|
||||
|
||||
-T, --threads <THREADS>
|
||||
|
|
@ -356,7 +356,7 @@ Options:
|
|||
|
||||
-u, --user-agent <USER_AGENT>
|
||||
User agent
|
||||
|
||||
|
||||
[default: lychee/x.y.z]
|
||||
|
||||
-i, --insecure
|
||||
|
|
@ -406,7 +406,7 @@ Options:
|
|||
Test the specified file extensions for URIs when checking files locally.
|
||||
Multiple extensions can be separated by commas. Extensions will be checked in
|
||||
order of appearance.
|
||||
|
||||
|
||||
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi
|
||||
|
||||
--header <HEADER>
|
||||
|
|
@ -414,20 +414,20 @@ Options:
|
|||
|
||||
-a, --accept <ACCEPT>
|
||||
A List of accepted status codes for valid links
|
||||
|
||||
|
||||
The following accept range syntax is supported: [start]..[=]end|code. Some valid
|
||||
examples are:
|
||||
|
||||
|
||||
- 200..=204
|
||||
- 200..204
|
||||
- ..=204
|
||||
- ..204
|
||||
- 200
|
||||
|
||||
|
||||
Use "lychee --accept '200..=204, 429, 500' <inputs>..." to provide a comma-
|
||||
separated list of accepted status codes. This example will accept 200, 201,
|
||||
202, 203, 204, 429, and 500 as valid status codes.
|
||||
|
||||
|
||||
[default: 100..=103,200..=299]
|
||||
|
||||
--include-fragments
|
||||
|
|
@ -435,17 +435,17 @@ Options:
|
|||
|
||||
-t, --timeout <TIMEOUT>
|
||||
Website timeout in seconds from connect to response finished
|
||||
|
||||
|
||||
[default: 20]
|
||||
|
||||
-r, --retry-wait-time <RETRY_WAIT_TIME>
|
||||
Minimum wait time in seconds between retries of failed requests
|
||||
|
||||
|
||||
[default: 1]
|
||||
|
||||
-X, --method <METHOD>
|
||||
Request method
|
||||
|
||||
|
||||
[default: get]
|
||||
|
||||
-b, --base <BASE>
|
||||
|
|
@ -456,7 +456,7 @@ Options:
|
|||
|
||||
--github-token <GITHUB_TOKEN>
|
||||
GitHub API token to use when checking github.com links, to avoid rate limiting
|
||||
|
||||
|
||||
[env: GITHUB_TOKEN]
|
||||
|
||||
--skip-missing
|
||||
|
|
@ -473,13 +473,13 @@ Options:
|
|||
|
||||
--mode <MODE>
|
||||
Set the output display mode. Determines how results are presented in the terminal
|
||||
|
||||
|
||||
[default: color]
|
||||
[possible values: plain, color, emoji]
|
||||
|
||||
-f, --format <FORMAT>
|
||||
Output format of final status report
|
||||
|
||||
|
||||
[default: compact]
|
||||
[possible values: compact, detailed, json, markdown, raw]
|
||||
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ anyhow = "1.0.82"
|
|||
assert-json-diff = "2.0.2"
|
||||
clap = { version = "4.5.4", features = ["env", "derive"] }
|
||||
console = "0.15.8"
|
||||
console-subscriber = { version = "0.4.0", optional = true }
|
||||
const_format = "0.2.32"
|
||||
csv = "1.3.0"
|
||||
dashmap = { version = "5.5.3", features = ["serde"] }
|
||||
|
|
@ -68,17 +69,8 @@ tracing-subscriber = { version = "0.3.18", default-features = false, features =
|
|||
uuid = { version = "1.8.0", features = ["v4"] }
|
||||
wiremock = "0.6.0"
|
||||
|
||||
# console-subscriber is not yet published to crates.io
|
||||
# Users have to uncomment this section and the feature below and build lychee
|
||||
# locally
|
||||
# TODO: Remove this git revision pin after publication
|
||||
#[dependencies.console-subscriber]
|
||||
#optional = true
|
||||
#git = "https://github.com/tokio-rs/console"
|
||||
#rev = "926de99ce4cbfd02c87190f9ec5f1c60b5c305d5"
|
||||
|
||||
[features]
|
||||
#tokio-console = ["console-subscriber", "tracing-subscriber/registry"]
|
||||
tokio-console = ["console-subscriber", "tracing-subscriber/registry"]
|
||||
|
||||
# Compile and statically link a copy of OpenSSL.
|
||||
vendored-openssl = ["openssl-sys/vendored"]
|
||||
|
|
@ -109,4 +101,4 @@ required-features = ["check_example_domains"]
|
|||
[package.metadata.binstall]
|
||||
pkg-url = "{ repo }/releases/download/v{ version }/{ name }-v{ version }-{ target }{ archive-suffix }"
|
||||
bin-dir = "{ bin }{ binary-ext }"
|
||||
pkg-fmt = "tgz"
|
||||
pkg-fmt = "tgz"
|
||||
|
|
@ -60,7 +60,11 @@ mod tests {
|
|||
use std::{error::Error as StdError, time::Duration};
|
||||
use tokio::time::sleep;
|
||||
|
||||
// This test is currently ignored because it is flaky.
|
||||
// The Wayback Machine does not always return a suggestion.
|
||||
// We can consider mocking the endpoint in the future.
|
||||
#[tokio::test]
|
||||
#[ignore]
|
||||
async fn wayback_suggestion() -> Result<(), Box<dyn StdError>> {
|
||||
let target_url = "https://example.com".parse::<Url>()?;
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@
|
|||
variant_size_differences,
|
||||
clippy::missing_const_for_fn
|
||||
)]
|
||||
#![deny(anonymous_parameters, macro_use_extern_crate, pointer_structural_match)]
|
||||
#![deny(anonymous_parameters, macro_use_extern_crate)]
|
||||
#![deny(missing_docs)]
|
||||
|
||||
use std::fs::{self, File};
|
||||
|
|
|
|||
|
|
@ -274,7 +274,7 @@ mod cli {
|
|||
cmd.arg("--offline")
|
||||
.arg("--base")
|
||||
.arg(&offline_dir)
|
||||
.arg(&offline_dir.join("index.html"))
|
||||
.arg(offline_dir.join("index.html"))
|
||||
.env_clear()
|
||||
.assert()
|
||||
.success()
|
||||
|
|
@ -1130,7 +1130,7 @@ mod cli {
|
|||
let offline_dir = fixtures_path().join("offline");
|
||||
|
||||
cmd.arg("--offline")
|
||||
.arg(&offline_dir.join("index.html"))
|
||||
.arg(offline_dir.join("index.html"))
|
||||
.env_clear()
|
||||
.assert()
|
||||
.success()
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ once_cell = "1.19.0"
|
|||
openssl-sys = { version = "0.9.102", optional = true }
|
||||
path-clean = "1.0.1"
|
||||
percent-encoding = "2.3.1"
|
||||
pulldown-cmark = "0.9.6"
|
||||
pulldown-cmark = "0.11.0"
|
||||
regex = "1.10.4"
|
||||
# Use trust-dns to avoid lookup failures on high concurrency
|
||||
# https://github.com/seanmonstar/reqwest/issues/296
|
||||
|
|
|
|||
|
|
@ -1,25 +1,31 @@
|
|||
//! Extract links and fragments from markdown documents
|
||||
use std::collections::{HashMap, HashSet};
|
||||
|
||||
use pulldown_cmark::{Event, Options, Parser, Tag};
|
||||
use pulldown_cmark::{CowStr, Event, Options, Parser, Tag, TagEnd};
|
||||
|
||||
use crate::{extract::plaintext::extract_plaintext, types::uri::raw::RawUri};
|
||||
|
||||
use super::html::html5gum::{extract_html, extract_html_fragments};
|
||||
|
||||
/// Returns the default markdown extensions used by lychee.
|
||||
/// Sadly `|` is not const for `Options` so we can't use a const global.
|
||||
fn md_extensions() -> Options {
|
||||
Options::ENABLE_HEADING_ATTRIBUTES | Options::ENABLE_MATH
|
||||
}
|
||||
|
||||
/// Extract unparsed URL strings from a Markdown string.
|
||||
pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUri> {
|
||||
// In some cases it is undesirable to extract links from within code blocks,
|
||||
// which is why we keep track of entries and exits while traversing the input.
|
||||
let mut inside_code_block = false;
|
||||
|
||||
let parser = Parser::new(input);
|
||||
let parser = Parser::new_ext(input, md_extensions());
|
||||
parser
|
||||
.filter_map(|event| match event {
|
||||
// A link. The first field is the link type, the second the destination URL and the third is a title.
|
||||
Event::Start(Tag::Link(_, uri, _)) => {
|
||||
Event::Start(Tag::Link { dest_url, .. }) => {
|
||||
Some(vec![RawUri {
|
||||
text: uri.to_string(),
|
||||
text: dest_url.to_string(),
|
||||
// Emulate `<a href="...">` tag here to be compatible with
|
||||
// HTML links. We might consider using the actual Markdown
|
||||
// `LinkType` for better granularity in the future
|
||||
|
|
@ -28,9 +34,9 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
|
|||
}])
|
||||
}
|
||||
// An image. The first field is the link type, the second the destination URL and the third is a title.
|
||||
Event::Start(Tag::Image(_, uri, _)) => {
|
||||
Event::Start(Tag::Image { dest_url, .. }) => {
|
||||
Some(vec![RawUri {
|
||||
text: uri.to_string(),
|
||||
text: dest_url.to_string(),
|
||||
// Emulate `<img src="...">` tag here to be compatible with
|
||||
// HTML links. We might consider using the actual Markdown
|
||||
// `LinkType` for better granularity in the future
|
||||
|
|
@ -43,7 +49,7 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
|
|||
inside_code_block = true;
|
||||
None
|
||||
}
|
||||
Event::End(Tag::CodeBlock(_)) => {
|
||||
Event::End(TagEnd::CodeBlock) => {
|
||||
inside_code_block = false;
|
||||
None
|
||||
}
|
||||
|
|
@ -58,7 +64,7 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
|
|||
}
|
||||
|
||||
// An HTML node
|
||||
Event::Html(html) => {
|
||||
Event::Html(html) | Event::InlineHtml(html) => {
|
||||
// This won't exclude verbatim links right now, because HTML gets passed in chunks
|
||||
// by pulldown_cmark. So excluding `<pre>` and `<code>` is not handled right now.
|
||||
Some(extract_html(&html, include_verbatim))
|
||||
|
|
@ -89,37 +95,39 @@ pub(crate) fn extract_markdown(input: &str, include_verbatim: bool) -> Vec<RawUr
|
|||
/// It means a single heading such as `## Frag 1 {#frag-2}` would generate two fragments.
|
||||
pub(crate) fn extract_markdown_fragments(input: &str) -> HashSet<String> {
|
||||
let mut in_heading = false;
|
||||
let mut heading = String::new();
|
||||
let mut heading_text = String::new();
|
||||
let mut heading_id: Option<CowStr<'_>> = None;
|
||||
let mut id_generator = HeadingIdGenerator::default();
|
||||
|
||||
let mut out = HashSet::new();
|
||||
|
||||
for event in Parser::new_ext(input, Options::ENABLE_HEADING_ATTRIBUTES) {
|
||||
for event in Parser::new_ext(input, md_extensions()) {
|
||||
match event {
|
||||
Event::Start(Tag::Heading(..)) => {
|
||||
Event::Start(Tag::Heading { id, .. }) => {
|
||||
heading_id = id;
|
||||
in_heading = true;
|
||||
}
|
||||
Event::End(Tag::Heading(_level, id, _classes)) => {
|
||||
if let Some(frag) = id {
|
||||
Event::End(TagEnd::Heading(_)) => {
|
||||
if let Some(frag) = heading_id.take() {
|
||||
out.insert(frag.to_string());
|
||||
}
|
||||
|
||||
if !heading.is_empty() {
|
||||
let id = id_generator.generate(&heading);
|
||||
if !heading_text.is_empty() {
|
||||
let id = id_generator.generate(&heading_text);
|
||||
out.insert(id);
|
||||
heading.clear();
|
||||
heading_text.clear();
|
||||
}
|
||||
|
||||
in_heading = false;
|
||||
}
|
||||
Event::Text(text) | Event::Code(text) => {
|
||||
if in_heading {
|
||||
heading.push_str(&text);
|
||||
heading_text.push_str(&text);
|
||||
};
|
||||
}
|
||||
|
||||
// An HTML node
|
||||
Event::Html(html) => {
|
||||
Event::Html(html) | Event::InlineHtml(html) => {
|
||||
out.extend(extract_html_fragments(&html));
|
||||
}
|
||||
|
||||
|
|
@ -289,4 +297,15 @@ Some pre-formatted http://pre.com
|
|||
);
|
||||
check("Many spaces", "many----------spaces");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_mardown_math() {
|
||||
let input = r"
|
||||
$$
|
||||
[\psi](\mathbf{L})
|
||||
$$
|
||||
";
|
||||
let uris = extract_markdown(input, true);
|
||||
assert!(uris.is_empty());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@
|
|||
variant_size_differences,
|
||||
clippy::missing_const_for_fn
|
||||
)]
|
||||
#![deny(anonymous_parameters, macro_use_extern_crate, pointer_structural_match)]
|
||||
#![deny(anonymous_parameters, macro_use_extern_crate)]
|
||||
#![deny(missing_docs)]
|
||||
#![allow(clippy::module_name_repetitions)]
|
||||
|
||||
|
|
|
|||
|
|
@ -224,15 +224,12 @@ impl Status {
|
|||
None
|
||||
}
|
||||
}
|
||||
Status::Cached(cache_status) => match cache_status {
|
||||
CacheStatus::Ok(code) | CacheStatus::Error(Some(code)) => {
|
||||
match StatusCode::from_u16(*code) {
|
||||
Ok(code) => Some(code),
|
||||
Err(_) => None,
|
||||
}
|
||||
Status::Cached(CacheStatus::Ok(code) | CacheStatus::Error(Some(code))) => {
|
||||
match StatusCode::from_u16(*code) {
|
||||
Ok(code) => Some(code),
|
||||
Err(_) => None,
|
||||
}
|
||||
_ => None,
|
||||
},
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue