mirror of
https://github.com/Hopiu/lychee.git
synced 2026-04-27 08:24:47 +00:00
This splits up the code into a `lib` and a `bin` to make the runtime usable from other crates. Co-authored-by: Paweł Romanowski <pawroman@pawroman.dev>
This commit is contained in:
parent
b6dbf038d0
commit
b7ab4abb0d
16 changed files with 472 additions and 345 deletions
182
Cargo.lock
generated
182
Cargo.lock
generated
|
|
@ -26,47 +26,47 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "aes"
|
||||
version = "0.4.0"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7001367fde4c768a19d1029f0a8be5abd9308e1119846d5bd9ad26297b8faf5"
|
||||
checksum = "884391ef1066acaa41e766ba8f596341b96e93ce34f9a43e7d24bf0a0eaf0561"
|
||||
dependencies = [
|
||||
"aes-soft",
|
||||
"aesni",
|
||||
"block-cipher",
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aes-gcm"
|
||||
version = "0.6.0"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "86f5007801316299f922a6198d1d09a0bae95786815d066d5880d13f7c45ead1"
|
||||
checksum = "5278b5fabbb9bd46e24aa69b2fdea62c99088e0a950a9be40e3e0101298f88da"
|
||||
dependencies = [
|
||||
"aead",
|
||||
"aes",
|
||||
"block-cipher",
|
||||
"cipher",
|
||||
"ctr",
|
||||
"ghash",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aes-soft"
|
||||
version = "0.4.0"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4925647ee64e5056cf231608957ce7c81e12d6d6e316b9ce1404778cc1d35fa7"
|
||||
checksum = "be14c7498ea50828a38d0e24a765ed2effe92a705885b57d029cd67d45744072"
|
||||
dependencies = [
|
||||
"block-cipher",
|
||||
"byteorder",
|
||||
"opaque-debug 0.2.3",
|
||||
"cipher",
|
||||
"opaque-debug 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aesni"
|
||||
version = "0.7.0"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d050d39b0b7688b3a3254394c3e30a9d66c41dcf9b05b0e2dbdc623f6505d264"
|
||||
checksum = "ea2e11f5e94c2f7d386164cc2aa1f97823fed6f259e486940a71c174dd01b0ce"
|
||||
dependencies = [
|
||||
"block-cipher",
|
||||
"opaque-debug 0.2.3",
|
||||
"cipher",
|
||||
"opaque-debug 0.3.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -203,9 +203,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "async-h1"
|
||||
version = "2.1.2"
|
||||
version = "2.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ca2b5cfe1804f48bb8dfb1b2391e6e9a3fbf89e07514dce3bddb03eb4d529db"
|
||||
checksum = "3fd9a5f3dbb5065856974e08c2ac24e6f81da6e39d2328de1c03a9a2b34ffb01"
|
||||
dependencies = [
|
||||
"async-std",
|
||||
"byte-pool",
|
||||
|
|
@ -379,9 +379,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "base-x"
|
||||
version = "0.2.6"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1b20b618342cf9891c292c4f5ac2cde7287cc5c87e87e9c769d617793607dec1"
|
||||
checksum = "a4521f3e3d031370679b3b140beb36dfe4801b09ac77e30c61941f97df3ef28b"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
|
|
@ -514,15 +514,6 @@ dependencies = [
|
|||
"generic-array 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-cipher"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa136449e765dc7faa244561ccae839c394048667929af599b5d931ebe7b7f10"
|
||||
dependencies = [
|
||||
"generic-array 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "block-padding"
|
||||
version = "0.1.5"
|
||||
|
|
@ -652,6 +643,15 @@ dependencies = [
|
|||
"time 0.1.43",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cipher"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12f8e7987cbd042a63249497f41aed09f8e65add917ea6566effbc56578d6801"
|
||||
dependencies = [
|
||||
"generic-array 0.14.4",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.3"
|
||||
|
|
@ -737,9 +737,9 @@ checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc"
|
|||
|
||||
[[package]]
|
||||
name = "cookie"
|
||||
version = "0.14.2"
|
||||
version = "0.14.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1373a16a4937bc34efec7b391f9c1500c30b8478a701a4f44c9165cc0475a6e0"
|
||||
checksum = "784ad0fbab4f3e9cef09f20e0aea6000ae08d2cb98ac4c0abc53df18803d702f"
|
||||
dependencies = [
|
||||
"aes-gcm",
|
||||
"base64 0.12.3",
|
||||
|
|
@ -748,7 +748,7 @@ dependencies = [
|
|||
"percent-encoding",
|
||||
"rand",
|
||||
"sha2",
|
||||
"time 0.2.22",
|
||||
"time 0.2.23",
|
||||
"version_check",
|
||||
]
|
||||
|
||||
|
|
@ -785,12 +785,12 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.4.3"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "09ee0cc8804d5393478d743b035099520087a5186f3b93fa58cec08fa62407b6"
|
||||
checksum = "b153fe7cbef478c567df0f972e02e6d736db11affe43dfc9c56a9374d1adfb87"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"crossbeam-utils 0.7.2",
|
||||
"maybe-uninit",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -854,14 +854,23 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "crypto-mac"
|
||||
version = "0.8.0"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b584a330336237c1eecd3e94266efb216c56ed91225d634cb2991c5f3fd1aeab"
|
||||
checksum = "4857fd85a0c34b3c3297875b747c1e02e06b6a0ea32dd892d8192b9ce0813ea6"
|
||||
dependencies = [
|
||||
"generic-array 0.14.4",
|
||||
"subtle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ctr"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fb4a30d54f7443bf3d6191dcd486aca19e67cb3c49fa7a06a319966346707e7f"
|
||||
dependencies = [
|
||||
"cipher",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.10.2"
|
||||
|
|
@ -1032,19 +1041,6 @@ dependencies = [
|
|||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "44533bbbb3bb3c1fa17d9f2e4e38bbbaf8396ba82193c4cb1b6445d711445d36"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"humantime",
|
||||
"log",
|
||||
"regex",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "event-listener"
|
||||
version = "2.4.0"
|
||||
|
|
@ -1424,9 +1420,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hkdf"
|
||||
version = "0.9.0"
|
||||
version = "0.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe1149865383e4526a43aee8495f9a325f0b806c63ce6427d06336a590abbbc9"
|
||||
checksum = "51ab2f639c231793c5f6114bdb9bbe50a7dbbfcd7c7c6bd8475dec2d991e964f"
|
||||
dependencies = [
|
||||
"digest 0.9.0",
|
||||
"hmac",
|
||||
|
|
@ -1434,9 +1430,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "hmac"
|
||||
version = "0.8.1"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "126888268dcc288495a26bf004b38c5fdbb31682f992c84ceb046a1f0fe38840"
|
||||
checksum = "c1441c6b1e930e2817404b5046f1f989899143a12bf92de603b69f4e0aee1e15"
|
||||
dependencies = [
|
||||
"crypto-mac",
|
||||
"digest 0.9.0",
|
||||
|
|
@ -1486,13 +1482,16 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "http-types"
|
||||
version = "2.5.0"
|
||||
version = "2.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50e703a631784b7881751ebff731cd645eb4c7f9b6288c19178ba9e1c4788d39"
|
||||
checksum = "6f316f6a06306570e899238d3b85375f350cfceda60ec47807c4164d6e169e58"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"async-channel",
|
||||
"async-std",
|
||||
"base64 0.13.0",
|
||||
"cookie",
|
||||
"futures-lite",
|
||||
"http",
|
||||
"infer",
|
||||
"pin-project-lite 0.1.7",
|
||||
|
|
@ -1532,15 +1531,6 @@ dependencies = [
|
|||
"url",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "humantime"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df004cfca50ef23c36850aaaa59ad52cc70d0e90243c3c7737a4dd32dc7a3c4f"
|
||||
dependencies = [
|
||||
"quick-error",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.13.7"
|
||||
|
|
@ -1643,9 +1633,12 @@ checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac"
|
|||
|
||||
[[package]]
|
||||
name = "instant"
|
||||
version = "0.1.6"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5b141fdc7836c525d4d594027d318c84161ca17aaf8113ab1f81ab93ae897485"
|
||||
checksum = "61124eeebbd69b8190558df225adf7e4caafce0d743919e5d6b19652314ec5ec"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "iovec"
|
||||
|
|
@ -1794,9 +1787,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "28247cc5a5be2f05fbcd76dd0cf2c7d3b5400cb978a28042abcd4fa0b3f8261c"
|
||||
checksum = "dd96ffd135b2fd7b973ac026d28085defbe8983df057ced3eb4f2130b0831312"
|
||||
dependencies = [
|
||||
"scopeguard",
|
||||
]
|
||||
|
|
@ -1836,9 +1829,7 @@ dependencies = [
|
|||
"indicatif",
|
||||
"lazy_static",
|
||||
"linkify",
|
||||
"log",
|
||||
"predicates",
|
||||
"pretty_env_logger",
|
||||
"pulldown-cmark",
|
||||
"quick-xml",
|
||||
"regex",
|
||||
|
|
@ -1898,9 +1889,9 @@ checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
|
|||
|
||||
[[package]]
|
||||
name = "memoffset"
|
||||
version = "0.5.5"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c198b026e1bbf08a937e94c6c60f9ec4a2267f5b0d2eec9c1b21b061ce2be55f"
|
||||
checksum = "043175f069eda7b85febe4a74abbaeff828d9f8b448515d3151a14a3542811aa"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
]
|
||||
|
|
@ -2168,9 +2159,9 @@ checksum = "427c3892f9e783d91cc128285287e70a59e206ca452770ece88a76f7a3eddd72"
|
|||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.11.0"
|
||||
version = "0.11.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a4893845fa2ca272e647da5d0e46660a314ead9c2fdd9a883aabc32e481a8733"
|
||||
checksum = "6d7744ac029df22dca6284efe4e898991d28e3085c706c972bcd7da4a27a15eb"
|
||||
dependencies = [
|
||||
"instant",
|
||||
"lock_api",
|
||||
|
|
@ -2288,11 +2279,11 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "polyval"
|
||||
version = "0.4.1"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a5884790f1ce3553ad55fec37b5aaac5882e0e845a2612df744d6c85c9bf046c"
|
||||
checksum = "b3fd900a291ceb8b99799cc8cd3d1d3403a51721e015bc533528b2ceafcc443c"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if 1.0.0",
|
||||
"universal-hash",
|
||||
]
|
||||
|
||||
|
|
@ -2331,16 +2322,6 @@ dependencies = [
|
|||
"treeline",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pretty_env_logger"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "926d36b9553851b8b0005f1275891b392ee4d2d833852c417ed025477350fb9d"
|
||||
dependencies = [
|
||||
"env_logger",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro-error"
|
||||
version = "1.0.4"
|
||||
|
|
@ -2749,12 +2730,12 @@ checksum = "2579985fda508104f7587689507983eadd6a6e84dd35d6d115361f530916fa0d"
|
|||
|
||||
[[package]]
|
||||
name = "sha2"
|
||||
version = "0.9.1"
|
||||
version = "0.9.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1"
|
||||
checksum = "6e7aab86fe2149bad8c507606bdb3f4ef5e7b2380eb92350f56122cca72a42a8"
|
||||
dependencies = [
|
||||
"block-buffer 0.9.0",
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if 1.0.0",
|
||||
"cpuid-bool",
|
||||
"digest 0.9.0",
|
||||
"opaque-debug 0.3.0",
|
||||
|
|
@ -2837,9 +2818,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
|
|||
|
||||
[[package]]
|
||||
name = "standback"
|
||||
version = "0.2.11"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f4e0831040d2cf2bdfd51b844be71885783d489898a192f254ae25d57cce725c"
|
||||
checksum = "cf906c8b8fc3f6ecd1046e01da1d8ddec83e48c8b08b84dcc02b585a6bedf5a8"
|
||||
dependencies = [
|
||||
"version_check",
|
||||
]
|
||||
|
|
@ -2966,15 +2947,6 @@ dependencies = [
|
|||
"winapi 0.3.9",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "terminal_size"
|
||||
version = "0.1.13"
|
||||
|
|
@ -3044,9 +3016,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "time"
|
||||
version = "0.2.22"
|
||||
version = "0.2.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "55b7151c9065e80917fbf285d9a5d1432f60db41d170ccafc749a136b41a93af"
|
||||
checksum = "bcdaeea317915d59b2b4cd3b5efcd156c309108664277793f5351700c02ce98b"
|
||||
dependencies = [
|
||||
"const_fn",
|
||||
"libc",
|
||||
|
|
@ -3228,9 +3200,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "tracing-serde"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b6ccba2f8f16e0ed268fc765d9b7ff22e965e7185d32f8f1ec8294fe17d86e79"
|
||||
checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b"
|
||||
dependencies = [
|
||||
"serde",
|
||||
"tracing-core",
|
||||
|
|
|
|||
11
Cargo.toml
11
Cargo.toml
|
|
@ -4,6 +4,13 @@ description = "A glorious link checker"
|
|||
documentation = "https://github.com/lycheeverse/lychee/blob/master/README.md"
|
||||
edition = "2018"
|
||||
homepage = "https://github.com/lycheeverse/lychee"
|
||||
keywords = [
|
||||
"link",
|
||||
"checker",
|
||||
"cli",
|
||||
"link-checker",
|
||||
"validator",
|
||||
]
|
||||
license = "Apache-2.0/MIT"
|
||||
name = "lychee"
|
||||
repository = "https://github.com/lycheeverse/lychee"
|
||||
|
|
@ -16,8 +23,6 @@ glob = "0.3"
|
|||
http = "0.2"
|
||||
hubcaps = "0.6"
|
||||
linkify = "0.4.0"
|
||||
log = "0.4"
|
||||
pretty_env_logger = "0.4"
|
||||
regex = "1.4.2"
|
||||
url = "2.2.0"
|
||||
check-if-email-exists = "0.8.15"
|
||||
|
|
@ -32,6 +37,7 @@ derive_builder = "0.9.0"
|
|||
deadpool = "0.6.0"
|
||||
shellexpand = "2.0"
|
||||
lazy_static = "1.1"
|
||||
wiremock = "0.3.0"
|
||||
|
||||
[dependencies.reqwest]
|
||||
features = ["gzip"]
|
||||
|
|
@ -42,7 +48,6 @@ features = ["full"]
|
|||
version = "0.2"
|
||||
|
||||
[dev-dependencies]
|
||||
wiremock = "0.3"
|
||||
assert_cmd = "1.0"
|
||||
predicates = "1.0"
|
||||
uuid = { version = "0.8", features = ["v4"] }
|
||||
|
|
|
|||
187
README.md
187
README.md
|
|
@ -10,43 +10,45 @@ limiter.
|
|||
|
||||
## Features
|
||||
|
||||
This comparison is made on a best-effort basis. Please create a PR to fix outdated information.
|
||||
This comparison is made on a best-effort basis. Please create a PR to fix
|
||||
outdated information.
|
||||
|
||||
| | lychee | [awesome_bot] | [muffet] | [broken-link-checker] | [linkinator] | [linkchecker] | [markdown-link-check] | [fink] |
|
||||
| -------------------- | ------- | ----------- | -------- | ------------------- | ---------- | ----------- | ------------------- | ------ |
|
||||
| Language | Rust | Ruby | Go | JS | TypeScript | Python | JS | PHP |
|
||||
| Async/Parallel | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Static binary | ![yes] | ![no] | ![yes] | ![no] | ![no] | ️ ![no] | ![no] | ![no] |
|
||||
| Markdown files | ![yes] | ![yes] | ![no] | ![no] | ![no] | ![no] | ️ ![yes] | ![no] |
|
||||
| HTML files | ![yes] | ![no] | ![no] | ![yes] | ![yes] | ![no] | ![no] | ![no] |
|
||||
| Text files | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Website support | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| Chunked encodings | ![yes] | ![maybe] | ![maybe] | ![maybe] | ![maybe] | ![no] | ![yes] | ![yes] |
|
||||
| GZIP compression | ![yes] | ![maybe] | ![maybe] | ![yes] | ![maybe] | ![yes] | ![maybe] | ![no] |
|
||||
| Basic Auth | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| Custom user agent | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| Relative URLs | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Skip relative URLs | ![yes] | ![no] | ![no] | ![maybe] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Include patterns | ![yes]️ | ![yes] | ![no] | ![yes] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Exclude patterns | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Handle redirects | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Ignore insecure SSL | ![yes] | ![yes] | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] |
|
||||
| File globbing | ![yes] | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] |
|
||||
| Limit scheme | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| [Custom headers] | ![yes] | ![no] | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] |
|
||||
| Summary | ![yes] | ![yes] | ![yes] | ![maybe] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| `HEAD` requests | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![no] | ![no] |
|
||||
| Colored output | ![yes] | ![maybe] | ![yes] | ![maybe] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| [Filter status code] | ![yes] | ![yes] | ![no] | ![no] | ![no] | ![no] | ![yes] | ![no] |
|
||||
| Custom timeout | ![yes] | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| E-mail links | ![yes] | ![no] | ![no] | ![no] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| Progress bar | ![yes] | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] |
|
||||
| Retry and backoff | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] |
|
||||
| Skip private domains | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
| [Use as lib] | ![no] | ![yes] | ![no] | ![yes] | ![yes] | ![no] | ![yes] | ![no] |
|
||||
| Quiet mode | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Amazing lychee logo | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Config file | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![no] |
|
||||
| | lychee | [awesome_bot] | [muffet] | [broken-link-checker] | [linkinator] | [linkchecker] | [markdown-link-check] | [fink] |
|
||||
| -------------------- | ------- | ------------- | -------- | --------------------- | ------------ | ------------- | --------------------- | ------ |
|
||||
| Language | Rust | Ruby | Go | JS | TypeScript | Python | JS | PHP |
|
||||
| Async/Parallel | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Static binary | ![yes] | ![no] | ![yes] | ![no] | ![no] | ️ ![no] | ![no] | ![no] |
|
||||
| Markdown files | ![yes] | ![yes] | ![no] | ![no] | ![no] | ![no] | ️ ![yes] | ![no] |
|
||||
| HTML files | ![yes] | ![no] | ![no] | ![yes] | ![yes] | ![no] | ![no] | ![no] |
|
||||
| Text files | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Website support | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| Chunked encodings | ![yes] | ![maybe] | ![maybe] | ![maybe] | ![maybe] | ![no] | ![yes] | ![yes] |
|
||||
| GZIP compression | ![yes] | ![maybe] | ![maybe] | ![yes] | ![maybe] | ![yes] | ![maybe] | ![no] |
|
||||
| Basic Auth | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| Custom user agent | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| Relative URLs | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Skip relative URLs | ![yes] | ![no] | ![no] | ![maybe] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Include patterns | ![yes]️ | ![yes] | ![no] | ![yes] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Exclude patterns | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Handle redirects | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Ignore insecure SSL | ![yes] | ![yes] | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] |
|
||||
| File globbing | ![yes] | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] |
|
||||
| Limit scheme | ![yes] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| [Custom headers] | ![yes] | ![no] | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] |
|
||||
| Summary | ![yes] | ![yes] | ![yes] | ![maybe] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| `HEAD` requests | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![yes] | ![no] | ![no] |
|
||||
| Colored output | ![yes] | ![maybe] | ![yes] | ![maybe] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| [Filter status code] | ![yes] | ![yes] | ![no] | ![no] | ![no] | ![no] | ![yes] | ![no] |
|
||||
| Custom timeout | ![yes] | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![no] | ![yes] |
|
||||
| E-mail links | ![yes] | ![no] | ![no] | ![no] | ![no] | ![yes] | ![no] | ![no] |
|
||||
| Progress bar | ![yes] | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] |
|
||||
| Retry and backoff | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![no] | ![yes] | ![no] |
|
||||
| Skip private domains | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
| [Use as lib] | ![yes] | ![yes] | ![no] | ![yes] | ![yes] | ![no] | ![yes] | ![no] |
|
||||
| Quiet mode | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![yes] |
|
||||
| Config file | ![yes] | ![no] | ![no] | ![no] | ![yes] | ![yes] | ![yes] | ![no] |
|
||||
| Use as library | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
| Amazing lychee logo | ![yes] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] | ![no] |
|
||||
|
||||
[awesome_bot]: https://github.com/dkhamsing/awesome_bot
|
||||
[muffet]: https://github.com/raviqqe/muffet
|
||||
|
|
@ -69,11 +71,17 @@ We'd be thankful for any contribution. \
|
|||
We try to keep the issue-tracker up-to-date so you can quickly find a task to work on.
|
||||
|
||||
Try one of these links to get started:
|
||||
* [good first issues](https://github.com/lycheeverse/lychee/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
|
||||
* [help wanted](https://github.com/lycheeverse/lychee/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22)
|
||||
|
||||
## Installation
|
||||
- [good first issues](https://github.com/lycheeverse/lychee/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
|
||||
- [help wanted](https://github.com/lycheeverse/lychee/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22)
|
||||
|
||||
## Using the Commandline Client
|
||||
|
||||
You can run lychee directly from the commandline.
|
||||
|
||||
### Installation
|
||||
|
||||
#### Using cargo
|
||||
|
||||
### Using cargo
|
||||
|
||||
|
|
@ -81,13 +89,13 @@ Try one of these links to get started:
|
|||
cargo install lychee
|
||||
```
|
||||
|
||||
### Using the official Docker image
|
||||
#### Using the official Docker image
|
||||
|
||||
```
|
||||
docker pull lycheeverse/lychee
|
||||
```
|
||||
|
||||
## Usage
|
||||
## Commandline usage
|
||||
|
||||
Run it inside a repository with a `README.md`:
|
||||
|
||||
|
|
@ -117,16 +125,96 @@ lychee "~/projects/big_project/**/README.*"
|
|||
lychee --glob-ignore-case --progress --verbose "~/projects/**/[r]eadme.*"
|
||||
```
|
||||
|
||||
Optional (to avoid getting rate-limited): set an environment variable with your Github token
|
||||
like so `GITHUB_TOKEN=xxxx`, or use the `--github-token` CLI option. It can also be set in the
|
||||
config file.
|
||||
Optionally (to avoid getting rate-limited) you can set an environment variable
|
||||
with your Github token like so `GITHUB_TOKEN=xxxx`, or use the `--github-token`
|
||||
CLI option. It can also be set in the config file. There is an extensive list
|
||||
of commandline parameters to customize the behavior.
|
||||
|
||||
### CLI exit codes
|
||||
### Commandline Parameters
|
||||
|
||||
```
|
||||
USAGE:
|
||||
lychee [FLAGS] [OPTIONS] [--] [inputs]...
|
||||
|
||||
FLAGS:
|
||||
-E, --exclude-all-private Exclude all private IPs from checking. Equivalent to `--exclude-private --exclude-link-
|
||||
local --exclude-loopback`
|
||||
--exclude-link-local Exclude link-local IP address range from checking
|
||||
--exclude-loopback Exclude loopback IP address range from checking
|
||||
--exclude-private Exclude private IP address ranges from checking
|
||||
--glob-ignore-case Ignore case when expanding filesystem path glob inputs
|
||||
--help Prints help information
|
||||
-i, --insecure Proceed for server connections considered insecure (invalid TLS)
|
||||
-p, --progress Show progress
|
||||
--skip-missing Skip missing input files (default is to error if they don't exist)
|
||||
-V, --version Prints version information
|
||||
-v, --verbose Verbose program output
|
||||
|
||||
OPTIONS:
|
||||
-a, --accept <accept> Comma-separated list of accepted status codes for valid links
|
||||
-b, --base-url <base-url> Base URL to check relative URLs
|
||||
--basic-auth <basic-auth> Basic authentication support. E.g. `username:password`
|
||||
-c, --config <config-file> Configuration file to use [default: ./lychee.toml]
|
||||
--exclude <exclude>... Exclude URLs from checking (supports regex)
|
||||
--github-token <github-token> GitHub API token to use when checking github.com links, to avoid rate
|
||||
limiting [env: GITHUB_TOKEN=]
|
||||
-h, --headers <headers>... Custom request headers
|
||||
--include <include>... URLs to check (supports regex). Has preference over all excludes
|
||||
--max-concurrency <max-concurrency> Maximum number of concurrent network requests [default: 128]
|
||||
-m, --max-redirects <max-redirects> Maximum number of allowed redirects [default: 10]
|
||||
-X, --method <method> Request method [default: get]
|
||||
-s, --scheme <scheme> Only test links with the given scheme (e.g. https)
|
||||
-T, --threads <threads> Number of threads to utilize. Defaults to number of cores available to
|
||||
the system
|
||||
-t, --timeout <timeout> Website timeout from connect to response finished [default: 20]
|
||||
-u, --user-agent <user-agent> User agent [default: lychee/0.3.1]
|
||||
|
||||
ARGS:
|
||||
<inputs>... The inputs (where to get links to check from). These can be: files (e.g. `README.md`), file globs
|
||||
(e.g. `"~/git/*/README.md"`), remote URLs (e.g. `https://example.com/README.md`) or standard
|
||||
input (`-`) [default: README.md]
|
||||
```
|
||||
|
||||
### Exit codes
|
||||
|
||||
- `0` for success (all links checked successfully or excluded/skipped as configured)
|
||||
- `1` for missing inputs and any unexpected runtime failures or config errors
|
||||
- `2` for link check failures (if any non-excluded link failed the check)
|
||||
|
||||
## Library usage
|
||||
|
||||
You can use lychee as a library for your own projects.
|
||||
Simply add it as a dependency and build your client:
|
||||
|
||||
```rust
|
||||
use lychee::ClientBuilder;
|
||||
use http::StatusCode
|
||||
|
||||
let client = ClientBuilder::default().build()?;
|
||||
let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
let response = client.check(Website(url)).await?;
|
||||
assert!(matches!(response.status, Status::Ok(_)));
|
||||
```
|
||||
|
||||
The client is very customizable, e.g.
|
||||
|
||||
```rust
|
||||
let client = ClientBuilder::default()
|
||||
.includes(includes)
|
||||
.excludes(excludes)
|
||||
.max_redirects(cfg.max_redirects)
|
||||
.user_agent(cfg.user_agent)
|
||||
.allow_insecure(cfg.insecure)
|
||||
.custom_headers(headers)
|
||||
.method(method)
|
||||
.timeout(timeout)
|
||||
.verbose(cfg.verbose)
|
||||
.github_token(cfg.github_token)
|
||||
.scheme(cfg.scheme)
|
||||
.accepted(accepted)
|
||||
.build()?;
|
||||
```
|
||||
|
||||
## Troubleshooting and workarounds
|
||||
|
||||
We collect a list of common workarounds for various websites in our [troubleshooting guide](./TROUBLESHOOTING.md).
|
||||
|
|
@ -136,8 +224,11 @@ We collect a list of common workarounds for various websites in our [troubleshoo
|
|||
- https://github.com/analysis-tools-dev/static-analysis (soon)
|
||||
- https://github.com/mre/idiomatic-rust (soon)
|
||||
|
||||
If you are using lychee for your project, we'd be delighted to hear about it.
|
||||
|
||||
## Credits
|
||||
|
||||
The first prototype of lychee was built in [episode 10 of Hello Rust](https://hello-rust.show/10/).
|
||||
Thanks to all Github- and Patreon sponsors for supporting the development since the beginning.
|
||||
Also, thanks to all the great contributors who have since made this project more mature.
|
||||
The first prototype of lychee was built in [episode 10 of Hello
|
||||
Rust](https://hello-rust.show/10/). Thanks to all Github- and Patreon sponsors
|
||||
for supporting the development since the beginning. Also, thanks to all the
|
||||
great contributors who have since made this project more mature.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,3 @@
|
|||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use headers::authorization::Basic;
|
||||
use headers::{Authorization, HeaderMap, HeaderMapExt, HeaderName};
|
||||
|
|
@ -11,23 +8,14 @@ use std::{collections::HashSet, time::Duration};
|
|||
use structopt::StructOpt;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
mod client;
|
||||
mod client_pool;
|
||||
mod collector;
|
||||
mod extract;
|
||||
mod options;
|
||||
mod stats;
|
||||
mod types;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test_utils;
|
||||
use crate::options::{Config, LycheeOptions};
|
||||
use crate::stats::ResponseStats;
|
||||
|
||||
use client::ClientBuilder;
|
||||
use client_pool::ClientPool;
|
||||
use collector::Input;
|
||||
use options::{Config, LycheeOptions};
|
||||
use stats::ResponseStats;
|
||||
use types::{Excludes, Response, Status};
|
||||
use lychee::collector::{self, Input};
|
||||
use lychee::{ClientBuilder, ClientPool, Response, Status};
|
||||
|
||||
/// A C-like enum that can be cast to `i32` and used as process exit code.
|
||||
enum ExitCode {
|
||||
|
|
@ -41,7 +29,6 @@ enum ExitCode {
|
|||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
pretty_env_logger::init();
|
||||
let mut opts = LycheeOptions::from_args();
|
||||
|
||||
// Load a potentially existing config file and merge it into the config from the CLI
|
||||
|
|
@ -89,19 +76,22 @@ async fn run(cfg: &Config, inputs: Vec<Input>) -> Result<i32> {
|
|||
let timeout = parse_timeout(cfg.timeout);
|
||||
let max_concurrency = cfg.max_concurrency;
|
||||
let method: reqwest::Method = reqwest::Method::from_str(&cfg.method.to_uppercase())?;
|
||||
let includes = RegexSet::new(&cfg.include)?;
|
||||
let excludes = Excludes::from_options(&cfg);
|
||||
let include = RegexSet::new(&cfg.include)?;
|
||||
let exclude = RegexSet::new(&cfg.exclude)?;
|
||||
|
||||
let client = ClientBuilder::default()
|
||||
.includes(includes)
|
||||
.excludes(excludes)
|
||||
.includes(include)
|
||||
.excludes(exclude)
|
||||
.exclude_all_private(cfg.exclude_all_private)
|
||||
.exclude_private_ips(cfg.exclude_private)
|
||||
.exclude_link_local_ips(cfg.exclude_link_local)
|
||||
.exclude_loopback_ips(cfg.exclude_loopback)
|
||||
.max_redirects(cfg.max_redirects)
|
||||
.user_agent(cfg.user_agent.clone())
|
||||
.allow_insecure(cfg.insecure)
|
||||
.custom_headers(headers)
|
||||
.method(method)
|
||||
.timeout(timeout)
|
||||
.verbose(cfg.verbose)
|
||||
.github_token(cfg.github_token.clone())
|
||||
.scheme(cfg.scheme.clone())
|
||||
.accepted(accepted)
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::collector::Input;
|
||||
use lychee::collector::Input;
|
||||
|
||||
use anyhow::{Error, Result};
|
||||
use lazy_static::lazy_static;
|
||||
|
|
@ -3,9 +3,7 @@ use std::{
|
|||
fmt::{self, Display},
|
||||
};
|
||||
|
||||
use crate::types::Response;
|
||||
use crate::types::Status::*;
|
||||
use crate::types::Uri;
|
||||
use lychee::{Response, Status::*, Uri};
|
||||
|
||||
pub struct ResponseStats {
|
||||
total: usize,
|
||||
|
|
@ -1,7 +1,3 @@
|
|||
use crate::{
|
||||
options::USER_AGENT,
|
||||
types::{Excludes, Response, Status, Uri},
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use check_if_email_exists::{check_email, CheckEmailInput};
|
||||
use derive_builder::Builder;
|
||||
|
|
@ -14,6 +10,11 @@ use std::{collections::HashSet, time::Duration};
|
|||
use tokio::time::delay_for;
|
||||
use url::Url;
|
||||
|
||||
use crate::excludes::Excludes;
|
||||
use crate::types::{Response, Status};
|
||||
use crate::uri::Uri;
|
||||
|
||||
const VERSION: &str = env!("CARGO_PKG_VERSION");
|
||||
const DEFAULT_MAX_REDIRECTS: usize = 5;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
|
|
@ -34,21 +35,59 @@ pub struct Client {
|
|||
#[builder(setter(into))]
|
||||
#[builder(name = "ClientBuilder")]
|
||||
pub struct ClientBuilderInternal {
|
||||
/// Set an optional Github token.
|
||||
/// This allows for more requests before
|
||||
/// getting rate-limited.
|
||||
github_token: Option<String>,
|
||||
/// Check links matching this set of regular expressions
|
||||
includes: Option<RegexSet>,
|
||||
excludes: Excludes,
|
||||
/// Exclude links matching this set of regular expressions
|
||||
excludes: Option<RegexSet>,
|
||||
/// Exclude all private network addresses
|
||||
exclude_all_private: bool,
|
||||
/// Exclude private IP addresses
|
||||
exclude_private_ips: bool,
|
||||
/// Exclude link-local IPs
|
||||
exclude_link_local_ips: bool,
|
||||
/// Exclude loopback IP addresses (e.g. 127.0.0.1)
|
||||
exclude_loopback_ips: bool,
|
||||
/// Maximum number of redirects before returning error
|
||||
max_redirects: usize,
|
||||
/// User agent used for checking links
|
||||
user_agent: String,
|
||||
/// Ignore SSL errors
|
||||
allow_insecure: bool,
|
||||
/// Allowed URI scheme (e.g. https, http).
|
||||
/// This excludes all links from checking, which
|
||||
/// don't specify that scheme in the URL.
|
||||
scheme: Option<String>,
|
||||
/// Map of headers to send to each resource.
|
||||
/// This allows working around validation issues
|
||||
/// on some websites.
|
||||
custom_headers: HeaderMap,
|
||||
/// Request method (e.g. `GET` or `HEAD`)
|
||||
method: reqwest::Method,
|
||||
/// Set of accepted return codes / status codes
|
||||
accepted: Option<HashSet<http::StatusCode>>,
|
||||
/// Response timeout per request
|
||||
timeout: Option<Duration>,
|
||||
verbose: bool,
|
||||
}
|
||||
|
||||
impl ClientBuilder {
|
||||
fn build_excludes(&mut self) -> Excludes {
|
||||
// exclude_all_private option turns on all "private" excludes,
|
||||
// including private IPs, link-local IPs and loopback IPs
|
||||
let enable_exclude = |opt| opt || self.exclude_all_private.unwrap_or_default();
|
||||
|
||||
Excludes {
|
||||
regex: self.excludes.clone().unwrap_or_default(),
|
||||
private_ips: enable_exclude(self.exclude_private_ips.unwrap_or_default()),
|
||||
link_local_ips: enable_exclude(self.exclude_link_local_ips.unwrap_or_default()),
|
||||
loopback_ips: enable_exclude(self.exclude_loopback_ips.unwrap_or_default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// The build method instantiates the client.
|
||||
pub fn build(&mut self) -> Result<Client> {
|
||||
let mut headers = HeaderMap::new();
|
||||
|
||||
|
|
@ -57,7 +96,7 @@ impl ClientBuilder {
|
|||
let user_agent = self
|
||||
.user_agent
|
||||
.clone()
|
||||
.unwrap_or_else(|| USER_AGENT.to_string());
|
||||
.unwrap_or_else(|| format!("lychee/{}", VERSION));
|
||||
|
||||
headers.insert(header::USER_AGENT, HeaderValue::from_str(&user_agent)?);
|
||||
headers.insert(header::TRANSFER_ENCODING, HeaderValue::from_str("chunked")?);
|
||||
|
|
@ -102,7 +141,7 @@ impl ClientBuilder {
|
|||
reqwest_client,
|
||||
github,
|
||||
includes: self.includes.clone().unwrap_or(None),
|
||||
excludes: self.excludes.clone().unwrap_or_default(),
|
||||
excludes: self.build_excludes(),
|
||||
scheme,
|
||||
method: self.method.clone().unwrap_or(reqwest::Method::GET),
|
||||
accepted: self.accepted.clone().unwrap_or(None),
|
||||
|
|
@ -114,7 +153,6 @@ impl Client {
|
|||
async fn check_github(&self, owner: String, repo: String) -> Status {
|
||||
match &self.github {
|
||||
Some(github) => {
|
||||
info!("Check Github: {}/{}", owner, repo);
|
||||
let repo = github.repo(owner, repo).get().await;
|
||||
match repo {
|
||||
Err(e) => Status::Error(format!("{}", e)),
|
||||
|
|
@ -136,10 +174,7 @@ impl Client {
|
|||
let res = request.send().await;
|
||||
match res {
|
||||
Ok(response) => Status::new(response.status(), self.accepted.clone()),
|
||||
Err(e) => {
|
||||
warn!("Invalid response: {:?}", e);
|
||||
e.into()
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -483,13 +518,12 @@ mod test {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_exclude_include_regex() {
|
||||
let mut excludes = Excludes::default();
|
||||
excludes.regex = Some(RegexSet::new(&[r"github.com"]).unwrap());
|
||||
let exclude = Some(RegexSet::new(&[r"github.com"]).unwrap());
|
||||
let includes = RegexSet::new(&[r"foo.github.com"]).unwrap();
|
||||
|
||||
let client = ClientBuilder::default()
|
||||
.includes(includes)
|
||||
.excludes(excludes)
|
||||
.excludes(exclude)
|
||||
.build()
|
||||
.unwrap();
|
||||
|
||||
|
|
@ -506,11 +540,10 @@ mod test {
|
|||
|
||||
#[tokio::test]
|
||||
async fn test_exclude_regex() {
|
||||
let mut excludes = Excludes::default();
|
||||
excludes.regex =
|
||||
let exclude =
|
||||
Some(RegexSet::new(&[r"github.com", r"[a-z]+\.(org|net)", r"@example.com"]).unwrap());
|
||||
|
||||
let client = ClientBuilder::default().excludes(excludes).build().unwrap();
|
||||
let client = ClientBuilder::default().excludes(exclude).build().unwrap();
|
||||
|
||||
assert_eq!(client.excluded(&website_url("http://github.com")), true);
|
||||
assert_eq!(client.excluded(&website_url("http://exclude.org")), true);
|
||||
|
|
|
|||
|
|
@ -2,18 +2,19 @@ use client::Client;
|
|||
use deadpool::unmanaged::Pool;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::uri;
|
||||
use crate::{client, types};
|
||||
|
||||
pub struct ClientPool {
|
||||
tx: mpsc::Sender<types::Response>,
|
||||
rx: mpsc::Receiver<types::Uri>,
|
||||
rx: mpsc::Receiver<uri::Uri>,
|
||||
pool: deadpool::unmanaged::Pool<client::Client>,
|
||||
}
|
||||
|
||||
impl ClientPool {
|
||||
pub fn new(
|
||||
tx: mpsc::Sender<types::Response>,
|
||||
rx: mpsc::Receiver<types::Uri>,
|
||||
rx: mpsc::Receiver<uri::Uri>,
|
||||
clients: Vec<Client>,
|
||||
) -> Self {
|
||||
let pool = Pool::from(clients);
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::extract::{extract_links, FileType};
|
||||
use crate::types::Uri;
|
||||
use crate::uri::Uri;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use glob::glob_with;
|
||||
use reqwest::Url;
|
||||
|
|
@ -14,7 +14,7 @@ const STDIN: &str = "-";
|
|||
|
||||
#[derive(Debug, Clone)]
|
||||
#[non_exhaustive]
|
||||
pub(crate) enum Input {
|
||||
pub enum Input {
|
||||
RemoteUrl(Url),
|
||||
FsGlob { pattern: String, ignore_case: bool },
|
||||
FsPath(PathBuf),
|
||||
|
|
@ -23,7 +23,7 @@ pub(crate) enum Input {
|
|||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct InputContent {
|
||||
pub struct InputContent {
|
||||
pub input: Input,
|
||||
pub file_type: FileType,
|
||||
pub content: String,
|
||||
|
|
@ -41,7 +41,7 @@ impl InputContent {
|
|||
}
|
||||
|
||||
impl Input {
|
||||
pub(crate) fn new(value: &str, glob_ignore_case: bool) -> Self {
|
||||
pub fn new(value: &str, glob_ignore_case: bool) -> Self {
|
||||
if value == STDIN {
|
||||
Self::Stdin
|
||||
} else {
|
||||
|
|
@ -174,7 +174,7 @@ impl ToString for Input {
|
|||
|
||||
/// Fetch all unique links from a slice of inputs
|
||||
/// All relative URLs get prefixed with `base_url` if given.
|
||||
pub(crate) async fn collect_links(
|
||||
pub async fn collect_links(
|
||||
inputs: &[Input],
|
||||
base_url: Option<String>,
|
||||
skip_missing_inputs: bool,
|
||||
|
|
|
|||
26
src/excludes.rs
Normal file
26
src/excludes.rs
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
use regex::RegexSet;
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
/// You can ignore links based on regex patterns or pre-defined IP ranges.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Excludes {
|
||||
pub regex: Option<RegexSet>,
|
||||
/// Example: 192.168.0.1
|
||||
pub private_ips: bool,
|
||||
/// Example: 169.254.0.0
|
||||
pub link_local_ips: bool,
|
||||
/// For IPv4: 127.0. 0.1/8
|
||||
/// For IPv6: ::1/128
|
||||
pub loopback_ips: bool,
|
||||
}
|
||||
|
||||
impl Default for Excludes {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
regex: None,
|
||||
private_ips: false,
|
||||
link_local_ips: false,
|
||||
loopback_ips: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
use crate::collector::InputContent;
|
||||
use crate::types::Uri;
|
||||
use crate::uri::Uri;
|
||||
use linkify::LinkFinder;
|
||||
use pulldown_cmark::{Event as MDEvent, Parser, Tag};
|
||||
use quick_xml::{events::Event as HTMLEvent, Reader};
|
||||
|
|
@ -8,7 +8,7 @@ use std::path::Path;
|
|||
use url::Url;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) enum FileType {
|
||||
pub enum FileType {
|
||||
HTML,
|
||||
Markdown,
|
||||
Plaintext,
|
||||
|
|
|
|||
40
src/lib.rs
Normal file
40
src/lib.rs
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
#[deny(missing_docs)]
|
||||
|
||||
/**
|
||||
* `lychee` is a library for checking links.
|
||||
* The main struct of this crate is `ClientBuilder` which can be used to
|
||||
* configure and run your own link checker.
|
||||
*
|
||||
* "Hello world" example:
|
||||
* ```
|
||||
*
|
||||
* use lychee::{ClientBuilder, Status};
|
||||
* use lychee::Uri::Website;
|
||||
* use url::Url;
|
||||
* use std::error::Error;
|
||||
*
|
||||
* #[tokio::main]
|
||||
* async fn main() -> Result<(), Box<dyn Error>> {
|
||||
* let client = ClientBuilder::default().build()?;
|
||||
* let url = Url::parse("https://github.com/lycheeverse/lychee")?;
|
||||
* let response = client.check(Website(url)).await;
|
||||
* assert!(matches!(response.status, Status::Ok(_)));
|
||||
* Ok(())
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
mod client;
|
||||
mod client_pool;
|
||||
mod excludes;
|
||||
mod types;
|
||||
mod uri;
|
||||
|
||||
pub mod collector;
|
||||
pub mod extract;
|
||||
pub mod test_utils;
|
||||
|
||||
pub use client::ClientBuilder;
|
||||
pub use client_pool::ClientPool;
|
||||
pub use excludes::Excludes;
|
||||
pub use types::*;
|
||||
pub use uri::Uri;
|
||||
|
|
@ -1,22 +1,16 @@
|
|||
#![cfg(test)]
|
||||
|
||||
use http::StatusCode;
|
||||
use wiremock::matchers::path;
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
// TODO: used in cli tests (as duplicate)
|
||||
#[allow(unused)]
|
||||
pub(crate) async fn get_mock_server<S>(response_code: S) -> MockServer
|
||||
pub async fn get_mock_server<S>(response_code: S) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
get_mock_server_with_content(response_code, None).await
|
||||
}
|
||||
|
||||
pub(crate) async fn get_mock_server_with_content<S>(
|
||||
response_code: S,
|
||||
content: Option<&str>,
|
||||
) -> MockServer
|
||||
pub async fn get_mock_server_with_content<S>(response_code: S, content: Option<&str>) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
|
|
|
|||
89
src/types.rs
89
src/types.rs
|
|
@ -1,48 +1,6 @@
|
|||
use crate::options::Config;
|
||||
use crate::uri::Uri;
|
||||
use anyhow::anyhow;
|
||||
use regex::RegexSet;
|
||||
use std::net::IpAddr;
|
||||
use std::{collections::HashSet, convert::TryFrom, fmt::Display};
|
||||
use url::Url;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum Uri {
|
||||
Website(Url),
|
||||
Mail(String),
|
||||
}
|
||||
|
||||
impl Uri {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Uri::Website(url) => url.as_str(),
|
||||
Uri::Mail(address) => address.as_str(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scheme(&self) -> Option<String> {
|
||||
match self {
|
||||
Uri::Website(url) => Some(url.scheme().to_string()),
|
||||
Uri::Mail(_address) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_ip(&self) -> Option<IpAddr> {
|
||||
match self {
|
||||
Self::Website(url) => match url.host()? {
|
||||
url::Host::Ipv4(v4_addr) => Some(v4_addr.into()),
|
||||
url::Host::Ipv6(v6_addr) => Some(v6_addr.into()),
|
||||
_ => None,
|
||||
},
|
||||
Self::Mail(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Uri {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.as_str())
|
||||
}
|
||||
}
|
||||
use std::{collections::HashSet, convert::TryFrom};
|
||||
|
||||
/// Specifies how requests to websites will be made
|
||||
pub(crate) enum RequestMethod {
|
||||
|
|
@ -118,50 +76,11 @@ impl From<reqwest::Error> for Status {
|
|||
}
|
||||
}
|
||||
|
||||
/// Exclude configuration for the link checker.
|
||||
/// You can ignore links based on regex patterns or pre-defined IP ranges.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Excludes {
|
||||
pub regex: Option<RegexSet>,
|
||||
/// Example: 192.168.0.1
|
||||
pub private_ips: bool,
|
||||
/// Example: 169.254.0.0
|
||||
pub link_local_ips: bool,
|
||||
/// For IPv4: 127.0. 0.1/8
|
||||
/// For IPv6: ::1/128
|
||||
pub loopback_ips: bool,
|
||||
}
|
||||
|
||||
impl Excludes {
|
||||
pub fn from_options(config: &Config) -> Self {
|
||||
// exclude_all_private option turns on all "private" excludes,
|
||||
// including private IPs, link-local IPs and loopback IPs
|
||||
let enable_exclude = |opt| opt || config.exclude_all_private;
|
||||
|
||||
Self {
|
||||
regex: RegexSet::new(&config.exclude).ok(),
|
||||
private_ips: enable_exclude(config.exclude_private),
|
||||
link_local_ips: enable_exclude(config.exclude_link_local),
|
||||
loopback_ips: enable_exclude(config.exclude_loopback),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Excludes {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
regex: None,
|
||||
private_ips: false,
|
||||
link_local_ips: false,
|
||||
loopback_ips: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v4() {
|
||||
|
|
|
|||
88
src/uri.rs
Normal file
88
src/uri.rs
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
use anyhow::Result;
|
||||
use std::net::IpAddr;
|
||||
use std::{convert::TryFrom, fmt::Display};
|
||||
use url::Url;
|
||||
|
||||
/// Lychee's own representation of a URI, which encapsulates all support formats
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum Uri {
|
||||
/// Website URL
|
||||
Website(Url),
|
||||
/// Mail address
|
||||
Mail(String),
|
||||
}
|
||||
|
||||
impl Uri {
|
||||
pub fn as_str(&self) -> &str {
|
||||
match self {
|
||||
Uri::Website(url) => url.as_str(),
|
||||
Uri::Mail(address) => address.as_str(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn scheme(&self) -> Option<String> {
|
||||
match self {
|
||||
Uri::Website(url) => Some(url.scheme().to_string()),
|
||||
Uri::Mail(_address) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn host_ip(&self) -> Option<IpAddr> {
|
||||
match self {
|
||||
Self::Website(url) => match url.host()? {
|
||||
url::Host::Ipv4(v4_addr) => Some(v4_addr.into()),
|
||||
url::Host::Ipv6(v6_addr) => Some(v6_addr.into()),
|
||||
_ => None,
|
||||
},
|
||||
Self::Mail(_) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<&str> for Uri {
|
||||
type Error = anyhow::Error;
|
||||
|
||||
fn try_from(s: &str) -> Result<Self> {
|
||||
Ok(Uri::Website(Url::parse(s)?))
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for Uri {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.as_str())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use reqwest::Url;
|
||||
|
||||
use super::*;
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v4() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("http://127.0.0.1").expect("Expected URI with valid IPv4"));
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv4");
|
||||
assert_eq!(ip, IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_v6() {
|
||||
let uri =
|
||||
Uri::Website(Url::parse("https://[2020::0010]").expect("Expected URI with valid IPv6"));
|
||||
let ip = uri.host_ip().expect("Expected a valid IPv6");
|
||||
assert_eq!(
|
||||
ip,
|
||||
IpAddr::V6(Ipv6Addr::new(0x2020, 0, 0, 0, 0, 0, 0, 0x10))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_uri_host_ip_no_ip() {
|
||||
let uri = Uri::Website(Url::parse("https://some.cryptic/url").expect("Expected valid URI"));
|
||||
let ip = uri.host_ip();
|
||||
assert!(ip.is_none());
|
||||
}
|
||||
}
|
||||
54
tests/cli.rs
54
tests/cli.rs
|
|
@ -2,13 +2,11 @@
|
|||
mod cli {
|
||||
use anyhow::Result;
|
||||
use assert_cmd::Command;
|
||||
use http::StatusCode;
|
||||
use lychee::test_utils;
|
||||
use predicates::str::contains;
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use wiremock::matchers::path;
|
||||
use wiremock::{Mock, MockServer, ResponseTemplate};
|
||||
|
||||
fn main_command() -> Command {
|
||||
// this gets the "main" binary name (e.g. `lychee`)
|
||||
|
|
@ -19,35 +17,6 @@ mod cli {
|
|||
Path::new(module_path!()).parent().unwrap().join("fixtures")
|
||||
}
|
||||
|
||||
// TODO: duplicate of test_utils
|
||||
async fn get_mock_server<S>(response_code: S) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
get_mock_server_with_content(response_code, None).await
|
||||
}
|
||||
|
||||
async fn get_mock_server_with_content<S>(response_code: S, content: Option<&str>) -> MockServer
|
||||
where
|
||||
S: Into<StatusCode>,
|
||||
{
|
||||
let mock_server = MockServer::start().await;
|
||||
let template = ResponseTemplate::new(response_code.into());
|
||||
|
||||
let template = if let Some(s) = content {
|
||||
template.set_body_string(s)
|
||||
} else {
|
||||
template
|
||||
};
|
||||
|
||||
Mock::given(path("/"))
|
||||
.respond_with(template)
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
|
||||
mock_server
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exclude_all_private() {
|
||||
let mut cmd = main_command();
|
||||
|
|
@ -85,7 +54,7 @@ mod cli {
|
|||
#[tokio::test]
|
||||
async fn test_failure_404_link() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server = get_mock_server(http::StatusCode::NOT_FOUND).await;
|
||||
let mock_server = test_utils::get_mock_server(http::StatusCode::NOT_FOUND).await;
|
||||
let dir = tempfile::tempdir().expect("Failed to create tempdir");
|
||||
let file_path = dir.path().join("test.txt");
|
||||
let mut file = File::create(&file_path).expect("Failed to create tempfile");
|
||||
|
|
@ -116,7 +85,7 @@ mod cli {
|
|||
#[tokio::test]
|
||||
async fn test_stdin_input() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
|
||||
cmd.arg("-")
|
||||
.write_stdin(mock_server.uri())
|
||||
|
|
@ -127,7 +96,8 @@ mod cli {
|
|||
#[tokio::test]
|
||||
async fn test_stdin_input_failure() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server = get_mock_server(http::StatusCode::INTERNAL_SERVER_ERROR).await;
|
||||
let mock_server =
|
||||
test_utils::get_mock_server(http::StatusCode::INTERNAL_SERVER_ERROR).await;
|
||||
|
||||
cmd.arg("-")
|
||||
.write_stdin(mock_server.uri())
|
||||
|
|
@ -139,8 +109,8 @@ mod cli {
|
|||
#[tokio::test]
|
||||
async fn test_stdin_input_multiple() {
|
||||
let mut cmd = main_command();
|
||||
let mock_server_a = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_a = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
|
||||
// this behavior (treating multiple `-` as separate inputs) is the same as most CLI tools
|
||||
// that accept `-` as stdin, e.g. `cat`, `bat`, `grep` etc.
|
||||
|
|
@ -181,8 +151,8 @@ mod cli {
|
|||
let mut cmd = main_command();
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let mock_server_a = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_a = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
let mut file_a = File::create(dir.path().join("a.md"))?;
|
||||
let mut file_b = File::create(dir.path().join("b.md"))?;
|
||||
|
||||
|
|
@ -204,8 +174,8 @@ mod cli {
|
|||
let mut cmd = main_command();
|
||||
|
||||
let dir = tempfile::tempdir()?;
|
||||
let mock_server_a = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_a = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server_b = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
let mut file_a = File::create(dir.path().join("README.md"))?;
|
||||
let mut file_b = File::create(dir.path().join("readme.md"))?;
|
||||
|
||||
|
|
@ -230,7 +200,7 @@ mod cli {
|
|||
let subdir_level_1 = tempfile::tempdir_in(&dir)?;
|
||||
let subdir_level_2 = tempfile::tempdir_in(&subdir_level_1)?;
|
||||
|
||||
let mock_server = get_mock_server(http::StatusCode::OK).await;
|
||||
let mock_server = test_utils::get_mock_server(http::StatusCode::OK).await;
|
||||
let mut file = File::create(subdir_level_2.path().join("test.md"))?;
|
||||
|
||||
writeln!(file, "{}", mock_server.uri().as_str())?;
|
||||
|
|
|
|||
Loading…
Reference in a new issue