diff --git a/Cargo.lock b/Cargo.lock index 34cc9bf..a5395b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1394,9 +1394,9 @@ dependencies = [ [[package]] name = "html5gum" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dad48b66db55322add2819ae1d7bda0c32f3415269a08330679dbc8b0afeb30" +checksum = "0e57347b3219b14dab25d1c10c1f29dfd93eb31f87060c79cd78c2634e7b594c" dependencies = [ "jetscii", ] diff --git a/lychee-lib/Cargo.toml b/lychee-lib/Cargo.toml index 690f126..06f5fdd 100644 --- a/lychee-lib/Cargo.toml +++ b/lychee-lib/Cargo.toml @@ -49,7 +49,7 @@ thiserror = "1.0.31" futures = "0.3.21" lazy_static = "1.4.0" html5ever = "0.26.0" -html5gum = "0.4.0" +html5gum = "0.5.1" octocrab = "0.16.0" ip_network = "0.4.1" secrecy = "0.8.0" diff --git a/lychee-lib/src/extract/html5gum.rs b/lychee-lib/src/extract/html5gum.rs index 8b5278c..e641375 100644 --- a/lychee-lib/src/extract/html5gum.rs +++ b/lychee-lib/src/extract/html5gum.rs @@ -1,4 +1,4 @@ -use html5gum::{Emitter, Error, Tokenizer}; +use html5gum::{Emitter, Error, State, Tokenizer}; use super::is_verbatim_elem; use super::plaintext::extract_plaintext; @@ -178,14 +178,24 @@ impl Emitter for &mut LinkExtractor { fn init_end_tag(&mut self) { self.init_start_tag(); + self.current_element_is_closing = true; } fn init_comment(&mut self) { self.flush_current_characters(); } - fn emit_current_tag(&mut self) { + fn emit_current_tag(&mut self) -> Option { + let next_state = if self.current_element_is_closing { + None + } else { + self.last_start_element.clear(); + self.last_start_element.extend(&self.current_element_name); + html5gum::naive_next_state(&self.current_element_name) + }; + self.flush_old_attribute(); + next_state } fn emit_current_doctype(&mut self) {}