diff --git a/src/html.rs b/src/html.rs index 35f855b..238f243 100644 --- a/src/html.rs +++ b/src/html.rs @@ -52,10 +52,12 @@ pub fn simplify(html: &str, lang: &str) -> String { if bad_sections.contains(&title.trim()) { to_remove.push(header.id()); let header_level = header.value().name(); + trace!("Removing section for header {header_level} {title:?}"); // Strip trailing nodes. for sibling in header.next_siblings() { if let Some(element) = sibling.value().as_element() { if element.name() == header_level { + trace!("Stopping removal at {}", element.name(),); // TODO: Should this check for a higher level? break; } diff --git a/src/main.rs b/src/main.rs index db31011..ee1fa5b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ use std::{ io::{stdin, stdout, BufReader, Read, Write}, num::NonZeroUsize, path::PathBuf, + time::Instant, }; use clap::{CommandFactory, Parser, Subcommand}; @@ -89,7 +90,18 @@ fn main() -> anyhow::Result<()> { let mut input = String::new(); stdin().read_to_string(&mut input)?; + let start = Instant::now(); let output = om_wikiparser::html::simplify(&input, &lang); + let stop = Instant::now(); + let time = stop.duration_since(start); + + { + let input_size = input.len() as isize; + let output_size = output.len() as isize; + let difference = input_size - output_size; + let scale = input_size as f64 / output_size as f64; + info!("Reduced size by {difference} bytes ({scale:.4}x) in {time:?}"); + } stdout().write_all(output.as_bytes())?;