From b5f0b22f7a34df49713a03a93552c4ae9681695b Mon Sep 17 00:00:00 2001 From: Evan Lloyd New-Schmidt Date: Tue, 3 Oct 2023 16:26:03 -0400 Subject: [PATCH] Only modify attributes on attached elements I've seen a 10-20% speedup on larger articles. Signed-off-by: Evan Lloyd New-Schmidt --- src/html.rs | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/html.rs b/src/html.rs index e5def92..4e3853f 100644 --- a/src/html.rs +++ b/src/html.rs @@ -326,10 +326,26 @@ fn remove_empty_sections(document: &mut Html) { } fn remove_attrs(document: &mut Html) { - // TODO: See if finding and skipping detached nodes is significantly faster. let mut to_remove = Vec::new(); - for node in document.tree.values_mut() { - let Node::Element(el) = node else { continue }; + + let all_elements: Vec<_> = document + .tree + .root() + .descendants() + .filter_map(ElementRef::wrap) + .map(|el| el.id()) + .collect(); + + trace!("Removing attributes on {} elements", all_elements.len()); + + for id in all_elements { + let Some(mut node) = document.tree.get_mut(id) else { + trace!("Invalid id: {:?}", id); + continue; + }; + let Node::Element(el) = node.value() else { + continue; + }; if el.name() == "span" { for attr in ["style", "class"]