mirror of
https://github.com/Hopiu/lychee.git
synced 2026-03-16 20:50:25 +00:00
This avoids creating a DOM tree for link extraction and instead uses a `TokenSink` for on-the-fly extraction. In hyperfine benchmarks it was about 10-25% faster than the master. Old: 4.557 s ± 0.404 s New: 3.832 s ± 0.131 s The performance fluctuates a little less as well. Some missing element/attribute pairs were also added, which contain links according to the HTML spec. These occur very rarely, but it's good to parse them for completeness' sake. Furthermore tried to clean up a lot of papercuts around our types. We now differentiate between a `RawUri` (stringy-types) and a Uri, which is a properly parsed `URI` type. The extractor now only deals with extracting `RawUri`s while the collector creates the request objects.
48 lines
1.4 KiB
Rust
48 lines
1.4 KiB
Rust
use lychee_lib::{ClientBuilder, Request, Result};
|
|
use std::convert::TryFrom;
|
|
use tokio::sync::mpsc;
|
|
use tokio_stream::wrappers::ReceiverStream;
|
|
|
|
const CONCURRENT_REQUESTS: usize = 4;
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
// These channels are used to send requests and receive responses to and
|
|
// from lychee
|
|
let (send_req, recv_req) = mpsc::channel(CONCURRENT_REQUESTS);
|
|
let (send_resp, mut recv_resp) = mpsc::channel(CONCURRENT_REQUESTS);
|
|
|
|
// Add as many requests as you like
|
|
let requests = vec![Request::try_from("https://example.org")?];
|
|
|
|
// Queue requests
|
|
tokio::spawn(async move {
|
|
for request in requests {
|
|
send_req.send(request).await.unwrap();
|
|
}
|
|
});
|
|
|
|
// Create a default lychee client
|
|
let client = ClientBuilder::default().client()?;
|
|
|
|
// Start receiving requests
|
|
// Requests get streamed into the client and run concurrently
|
|
tokio::spawn(async move {
|
|
futures::StreamExt::for_each_concurrent(
|
|
ReceiverStream::new(recv_req),
|
|
CONCURRENT_REQUESTS,
|
|
|req| async {
|
|
let resp = client.check(req).await.unwrap();
|
|
send_resp.send(resp).await.unwrap();
|
|
},
|
|
)
|
|
.await;
|
|
});
|
|
|
|
// Finally, listen to incoming responses from lychee
|
|
while let Some(response) = recv_resp.recv().await {
|
|
println!("{}", response);
|
|
}
|
|
|
|
Ok(())
|
|
}
|