From 0a0317538cf80af9f29fb6223adc4a7c2e3df9c2 Mon Sep 17 00:00:00 2001 From: Evan Lloyd New-Schmidt Date: Fri, 23 Jun 2023 11:30:55 -0400 Subject: [PATCH] Rewrite comments as sentences for readability Signed-off-by: Evan Lloyd New-Schmidt --- src/html.rs | 10 +++++----- src/main.rs | 4 ++-- src/wm/mod.rs | 2 +- src/wm/page.rs | 10 +++++----- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/html.rs b/src/html.rs index d970eef..3c8d185 100644 --- a/src/html.rs +++ b/src/html.rs @@ -26,11 +26,11 @@ pub fn simplify(html: &str, lang: &str) -> String { let mut to_remove = Vec::new(); - // remove sections + // Remove configured sections and all trailing elements until next section. if let Some(bad_sections) = CONFIG.sections_to_remove.get(lang) { for header in document.select(&HEADERS) { - // TODO: should this join all text nodes? + // TODO: Should this join all text nodes? let Some(title) = header.text().next() else { continue }; @@ -38,11 +38,11 @@ pub fn simplify(html: &str, lang: &str) -> String { if bad_sections.contains(&title.trim()) { to_remove.push(header.id()); let header_level = header.value().name(); - // strip trailing nodes + // Strip trailing nodes. for sibling in header.next_siblings() { if let Some(element) = sibling.value().as_element() { if element.name() == header_level { - // TODO: should this check for a higher level? + // TODO: Should this check for a higher level? break; } } @@ -60,7 +60,7 @@ pub fn simplify(html: &str, lang: &str) -> String { warn!("No sections to remove configured for lang {lang:?}"); } - // remove elements with no text that isn't whitespace + // Remove elements with no text that isn't whitespace. for element in document .root_element() diff --git a/src/main.rs b/src/main.rs index f085e3b..6e1c393 100644 --- a/src/main.rs +++ b/src/main.rs @@ -93,8 +93,8 @@ fn main() -> anyhow::Result<()> { info!("Processing dump"); let dump = stdin().lock(); - // TODO: compare different deserialization methods - // docs warn against using a reader directly, and it's slower than tar can decompress the dump + // TODO: Compare different deserialization methods. + // The docs warn against using a reader directly, and it's slower than tar can decompress the dump. // let stream = serde_json::Deserializer::from_reader(dump).into_iter::(); let stream = dump.lines().map(|r| { r.map_err(anyhow::Error::new) diff --git a/src/wm/mod.rs b/src/wm/mod.rs index 4e8cf02..07967f1 100644 --- a/src/wm/mod.rs +++ b/src/wm/mod.rs @@ -119,7 +119,7 @@ pub struct WikipediaTitleNorm { impl WikipediaTitleNorm { fn normalize_title(title: &str) -> String { - // TODO: compare with generator url creation + // TODO: Compare with map generator url creation, ensure covers all cases. title.replace(' ', "_") } diff --git a/src/wm/page.rs b/src/wm/page.rs index c118b5a..d680be5 100644 --- a/src/wm/page.rs +++ b/src/wm/page.rs @@ -3,18 +3,18 @@ use serde::Deserialize; // TODO: consolidate into single struct /// Deserialized Wikimedia Enterprise API Article /// -/// For all available fields, see https://enterprise.wikimedia.com/docs/data-dictionary/ +/// For all available fields, see . #[allow(dead_code)] // TODO: reevaluate fields #[derive(Deserialize)] pub struct Page { - // TODO: check if CoW has a performance impact + // TODO: Check if CoW has a performance impact. pub name: String, pub date_modified: String, pub in_language: Language, #[serde(default)] pub url: String, pub main_entity: Option, - // TODO: see what impact parsing/unescaping/allocating this has + // TODO: See what impact parsing/unescaping/allocating this has. pub article_body: ArticleBody, #[serde(default)] pub redirects: Vec, @@ -27,12 +27,12 @@ pub struct Wikidata { #[derive(Deserialize)] pub struct ArticleBody { - // TODO: look into RawValue to lazily parse/allocate this: + // TODO: Look into RawValue to lazily parse/allocate this: // https://docs.rs/serde_json/latest/serde_json/value/struct.RawValue.html pub html: String, } -#[allow(dead_code)] // TODO: reevaluate fields +#[allow(dead_code)] // TODO: Reevaluate fields. #[derive(Deserialize)] pub struct Redirect { pub url: String,