Remove empty sections after other removals
Signed-off-by: Evan Lloyd New-Schmidt <evan@new-schmidt.com>
This commit is contained in:
parent
cc3ae9b629
commit
58f32b43fd
2 changed files with 35 additions and 3 deletions
35
src/html.rs
35
src/html.rs
|
@ -125,6 +125,8 @@ pub fn simplify_html(document: &mut Html, lang: &str) {
|
|||
}
|
||||
remove_ids(document, to_remove.drain(..));
|
||||
|
||||
remove_empty_sections(document);
|
||||
|
||||
remove_comments(document);
|
||||
|
||||
expand_links(document);
|
||||
|
@ -152,6 +154,39 @@ fn remove_comments(document: &mut Html) {
|
|||
remove_ids(document, to_remove.drain(..));
|
||||
}
|
||||
|
||||
fn remove_empty_sections(document: &mut Html) {
|
||||
let mut to_remove = Vec::new();
|
||||
for el in document.select(&HEADERS) {
|
||||
// TODO: does select match on detached nodes?
|
||||
let Some(parent) = el.parent() else { continue; };
|
||||
|
||||
if !parent
|
||||
.value()
|
||||
.as_element()
|
||||
.map(|p| p.name() == "section")
|
||||
.unwrap_or_default()
|
||||
{
|
||||
trace!("Skipping header without section name: {:?}", parent);
|
||||
continue;
|
||||
}
|
||||
|
||||
if el
|
||||
.next_siblings()
|
||||
.filter_map(ElementRef::wrap)
|
||||
.all(|e| is_empty_or_whitespace(&e) || HEADERS.matches(&e))
|
||||
{
|
||||
trace!(
|
||||
"Removing empty section {} {:?}",
|
||||
el.value().name(),
|
||||
el.text().collect::<String>()
|
||||
);
|
||||
to_remove.push(parent.id());
|
||||
}
|
||||
}
|
||||
|
||||
remove_ids(document, to_remove);
|
||||
}
|
||||
|
||||
fn remove_attrs(document: &mut Html) {
|
||||
// TODO: See if finding and skipping detached nodes is significantly faster.
|
||||
let mut to_remove = Vec::new();
|
||||
|
|
|
@ -92,7 +92,4 @@
|
|||
<p>
|
||||
Archaeologists have found the earliest anatomically modern humans in Europe in the Crimean Mountains' Buran-Kaya caves. The fossils are 32,000 years old, with the artifacts linked to the Gravettian culture. The fossils have cut marks suggesting a post-mortem defleshing ritual.
|
||||
</p>
|
||||
<h2 id="Gallery">
|
||||
Gallery
|
||||
</h2>
|
||||
</html>
|
Loading…
Add table
Reference in a new issue