diff --git a/.github/workflows/rust-checks.yml b/.github/workflows/rust-checks.yml
index a7ac273..279d944 100644
--- a/.github/workflows/rust-checks.yml
+++ b/.github/workflows/rust-checks.yml
@@ -3,9 +3,9 @@ name: Rust Checks
 on:
   pull_request:
     paths-ignore:
-    - .gitignore
-    - LICENSE
-    - README.md
+      - .gitignore
+      - LICENSE
+      - README.md
 
 jobs:
   test:
diff --git a/.github/workflows/shell-checks.yml b/.github/workflows/shell-checks.yml
new file mode 100644
index 0000000..b09a46b
--- /dev/null
+++ b/.github/workflows/shell-checks.yml
@@ -0,0 +1,18 @@
+name: Shell Checks
+
+on:
+  pull_request:
+    paths:
+      - "**.sh"
+
+jobs:
+  test:
+    name: shellcheck
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Run shellcheck
+        run: |
+          shellcheck --version
+          shellcheck -x *.sh
diff --git a/README.md b/README.md
index d1a94ad..60ecb21 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,9 @@
 
 _Extracts articles from [Wikipedia database dumps](https://en.wikipedia.org/wiki/Wikipedia:Database_download) for embedding into the `mwm` map files created by [the Organic Maps generator](https://github.com/organicmaps/organicmaps/blob/master/tools/python/maps_generator/README.md)._
 
+Extracted articles are identified by Wikipedia article titles in url or text form (language-specific), and [Wikidata QIDs](https://www.wikidata.org/wiki/Wikidata:Glossary#QID) (language-agnostic).
+OpenStreetMap commonly stores these as [`wikipedia*=`](https://wiki.openstreetmap.org/wiki/Key:wikipedia) and [`wikidata=`](https://wiki.openstreetmap.org/wiki/Key:wikidata) tags on objects.
+
 ## Configuring
 
 [`article_processing_config.json`](article_processing_config.json) should be updated when adding a new language.
@@ -9,6 +12,11 @@ It defines article sections that are not important for users and should be remov
 
 ## Usage
 
+To use with the map generator, see the [`run.sh` script](run.sh) and its own help documentation.
+It handles preparing the inputs, using multiple dumps, and re-running to convert titles to QIDs and extract them across languages.
+
+To run the wikiparser manually or for development, see below.
+
 First, install [the rust language tools](https://www.rust-lang.org/)
 
 For best performance, use `--release` when building or running.
@@ -19,7 +27,7 @@ Alternatively, build it with `cargo build --release`, which places the binary in
 
 Run the program with the `--help` flag to see all supported arguments.
 
-```shell
+```
 $ cargo run --release -- --help
 Extract article HTML from Wikipedia Enterprise HTML dumps.
 
@@ -57,10 +65,11 @@ It takes as inputs:
 - A file of Wikipedia article titles to extract, one per line (e.g. `https://$LANG.wikipedia.org/wiki/$ARTICLE_TITLE`), passed as a CLI flag `--wikipedia-urls`.
 - A directory to write the extracted articles to, as a CLI argument.
 
-As an example of usage with the map generator:
+As an example of manual usage with the map generator:
 - Assuming this program is installed to `$PATH` as `om-wikiparser`.
 - Download [the dumps in the desired languages](https://dumps.wikimedia.org/other/enterprise_html/runs/) (Use the files with the format `${LANG}wiki-NS0-${DATE}-ENTERPRISE-HTML.json.tar.gz`).
   Set `DUMP_DOWNLOAD_DIR` to the location they are downloaded.
+- Run a maps build with descriptions enabled to generate the `id_to_wikidata.csv` and `wiki_urls.txt` files.
 - Run the following from within the `intermediate_data` subdirectory of the maps build directory:
 
 ```shell
diff --git a/build.rs b/build.rs
new file mode 100644
index 0000000..7777e48
--- /dev/null
+++ b/build.rs
@@ -0,0 +1,32 @@
+use std::process::Command;
+
+/// Pass git-describe through CARGO_GIT_VERSION env variable
+///
+/// NOTE: Cargo.toml still needs to be updated on releases
+fn set_version_from_git() {
+    let cmd = Command::new("git")
+        .arg("describe")
+        .arg("--always")
+        .arg("--dirty")
+        .arg("--tags")
+        .output();
+
+    match cmd {
+        Ok(output) if output.status.success() => {
+            let version = String::from_utf8_lossy(&output.stdout);
+            let version = version.trim();
+            println!("cargo:rustc-env=CARGO_GIT_VERSION={}", version);
+            // rerun when git checks out another ref or any ref changes
+            println!("cargo:rerun-if-changed=.git/refs/");
+            println!("cargo:rerun-if-changed=.git/HEAD");
+        }
+        _ => {
+            // crates.io builds without git, so ignore here
+            eprintln!("git describe failed; ignoring");
+        }
+    }
+}
+
+fn main() {
+    set_version_from_git();
+}
diff --git a/lib.sh b/lib.sh
new file mode 100644
index 0000000..1c8746c
--- /dev/null
+++ b/lib.sh
@@ -0,0 +1,7 @@
+# Shared functions for scripts
+# shellcheck shell=bash
+
+# Write message to stderr with a timestamp and line ending.
+log () {
+    echo -e "$(date '+%Y-%m-%dT%H:%M:%SZ')" "$@" >&2
+}
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..4767b77
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,152 @@
+#! /usr/bin/env bash
+# shellcheck disable=SC2016 # Backticks not used as expansions in documentation.
+USAGE='Usage: ./run.sh [-h] <BUILD_DIR> <DUMP_FILE.json.tar.gz> [<DUMP_FILE.json.tar.gz>...]
+
+A convenience script to run the wikiparser with the maps generator as a drop-in replacement for the descriptions scraper.
+
+Arguments:
+    <BUILD_DIR> An existing directory to place descriptions in.
+                The `id_to_wikidata.csv` and `wiki_urls.txt` files output by the
+                maps generator must be placed in this directory before running.
+                The extracted articles will be placed in a `descriptions`
+                subdirectory within this directory.
+                The `intermediate_data` subfolder of a maps build directory may
+                be used for this. The same folder may be used for multiple runs.
+    <DUMP_FILE> A wikipedia enterprise html dump. These take the form of
+                `enwiki-NS0-20230401-ENTERPRISE-HTML.json.tar.gz`. Multiple
+                dumps in the same language SHOULD NOT be provided, and will
+                result in inconsistent data.
+
+Options:
+    -h      Print this help screen
+
+1. Builds wikiparser.
+2. Extracts wikidata ids and wikipedia urls from generator intermediate files `id_to_wikidata.csv` and `wiki_urls.txt`.
+3. Runs wikiparser in parallel for all input dump files (NOTE: this currently starts 2 processes for each dump files).
+
+For information on running the wikiparser manually, see README.md.
+
+For more information on the map generator, see
+<https://github.com/organicmaps/organicmaps/blob/b52b42bd746fdb8daf05cc048f0b22654cfb9b8e/tools/python/maps_generator/README.md>.
+'
+
+set -euo pipefail
+# set -x
+
+# Parse options.
+while getopts "h" opt
+do
+    case $opt in
+    h)  echo -n "$USAGE" >&2; exit 0;;
+    ?)  echo "$USAGE" | head -n1 >&2; exit 1;;
+    esac
+done
+shift $((OPTIND - 1))
+
+if [ -z "${2-}" ]; then
+    echo "BUILD_DIR and at least one DUMP_FILE are required" >&2
+    echo -n "$USAGE" >&2
+    exit 1
+fi
+
+# Process and canonicalize all path arguments before changing directories.
+
+BUILD_DIR=$(readlink -f -- "$1")
+shift
+if [ ! -d "$BUILD_DIR" ]; then
+    echo "BUILD_DIR '$BUILD_DIR' does not exist or is not a directory" >&2
+    exit 1
+fi
+
+DUMP_FILES=()
+while (( $# > 0 )); do
+    dump_file="$(readlink -f -- "$1")"
+    if [ ! -f "$dump_file" ]; then
+        echo "DUMP_FILE '$dump_file' does not exist or is not a file" >&2
+        exit 1
+    fi
+    DUMP_FILES+=("$dump_file")
+    shift
+done
+
+# Ensure we're running in the directory of this script.
+SCRIPT_PATH=$(dirname "$0")
+cd "$SCRIPT_PATH"
+SCRIPT_PATH=$(pwd)
+
+# only load library after changing to script directory
+source lib.sh
+
+log "Using maps build directory '$BUILD_DIR'"
+
+if ! command -v "cargo" > /dev/null; then
+    echo -e "'cargo' is not installed, cannot build wikiparser.\nSee <https://www.rust-lang.org/>." >&2
+    exit 1
+fi
+
+log "Building wikiparser"
+cargo build --release
+wikiparser=$(pwd)/target/release/om-wikiparser
+
+log "Changing to maps build dir '$BUILD_DIR'"
+cd "$BUILD_DIR"
+
+log "Transforming intermediate generator data"
+for intermediate_file in id_to_wikidata.csv wiki_urls.txt; do
+    if [ ! -e "$intermediate_file" ]; then
+        echo -e "Cannot find intermediate generator file '$intermediate_file' in maps build dir '$BUILD_DIR/'\nWas the descriptions step run?" >&2
+        exit 1
+    fi
+done
+
+cut -f 2 id_to_wikidata.csv > wikidata_ids.txt
+tail -n +2 wiki_urls.txt | cut -f 3 > wikipedia_urls.txt
+
+# Enable backtraces in errors and panics.
+export RUST_BACKTRACE=1
+# Set log level.
+export RUST_LOG=om_wikiparser=info
+
+# Begin extraction.
+OUTPUT_DIR=$(pwd)/descriptions
+if [ ! -e "$OUTPUT_DIR" ]; then
+    mkdir "$OUTPUT_DIR"
+fi
+log "Extracting articles to '$OUTPUT_DIR'"
+
+kill_jobs() {
+    pids=$(jobs -p)
+    if [ -n "$pids" ]; then
+        log "Killing background jobs"
+        # shellcheck disable=SC2086 # PIDs are intentionally expanded.
+        kill $pids
+        log "Waiting for background jobs to stop"
+        wait
+    fi
+}
+
+trap 'kill_jobs' SIGINT SIGTERM EXIT
+
+for dump in "${DUMP_FILES[@]}"; do
+  log "Extracting '$dump'"
+  tar xzOf "$dump" | "$wikiparser" \
+    --wikidata-ids wikidata_ids.txt \
+    --wikipedia-urls wikipedia_urls.txt \
+    --write-new-ids new_qids.txt \
+    "$OUTPUT_DIR" &
+done
+
+wait
+
+log "Beginning extraction of discovered QIDs"
+
+# Extract new qids from other dumps in parallel.
+for dump in "${DUMP_FILES[@]}"; do
+  tar xzOf "$dump" | "$wikiparser" \
+    --wikidata-ids new_qids.txt \
+    "$OUTPUT_DIR" &
+done
+
+wait
+
+log "Finished"
diff --git a/src/html.rs b/src/html.rs
index 24d1d4f..35f855b 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -22,6 +22,19 @@ static CONFIG: Lazy<Config<'static>> = Lazy::new(|| {
 static HEADERS: Lazy<Selector> =
     Lazy::new(|| Selector::parse("h1, h2, h3, h4, h5, h6, h7").unwrap());
 
+/// Elements that should always be kept, regardless of other metrics.
+static ELEMENT_ALLOW_LIST: Lazy<Selector> = Lazy::new(|| {
+    Selector::parse(
+        &[
+            // Meta tags that affect rendering.
+            "head > meta[charset]",
+            "head > meta[http-equiv]",
+        ]
+        .join(", "),
+    )
+    .unwrap()
+});
+
 pub fn simplify(html: &str, lang: &str) -> String {
     let mut document = Html::parse_document(html);
 
@@ -53,8 +66,6 @@ pub fn simplify(html: &str, lang: &str) -> String {
         }
 
         remove_ids(&mut document, to_remove.drain(..));
-    } else {
-        warn!("No sections to remove configured for lang {lang:?}");
     }
 
     for el in document
@@ -62,7 +73,7 @@ pub fn simplify(html: &str, lang: &str) -> String {
         .descendants()
         .filter_map(ElementRef::wrap)
     {
-        if is_image(&el) || is_empty_or_whitespace(&el) {
+        if (is_image(&el) || is_empty_or_whitespace(&el)) && !ELEMENT_ALLOW_LIST.matches(&el) {
             to_remove.push(el.id());
         }
     }
diff --git a/src/main.rs b/src/main.rs
index 637e9b5..f95d656 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -15,11 +15,24 @@ use om_wikiparser::{
     wm::{parse_wikidata_file, parse_wikipedia_file, Page, WikipediaTitleNorm},
 };
 
+/// Get the version returned by `git describe`, e.g.:
+/// - `v2.0` if a git tag
+/// - the commit hash `034ac04` if not a tag
+/// - `034ac04-dirty` if uncommited changes are present,
+/// or the crate version if not available (if installed from crates.io).
+///
+/// See `build.rs` file for more info.
+fn version() -> &'static str {
+    option_env!("CARGO_GIT_VERSION")
+        .or(option_env!("CARGO_PKG_VERSION"))
+        .unwrap_or("unknown")
+}
+
 /// Extract article HTML from Wikipedia Enterprise HTML dumps.
 ///
 /// Expects an uncompressed dump connected to stdin.
 #[derive(Parser)]
-#[command(version)]
+#[command(version = crate::version())]
 struct Args {
     /// Directory to write the extracted articles to.
     output_dir: PathBuf,
@@ -38,6 +51,7 @@ struct Args {
     ///
     /// Use this to save the QIDs of articles you know the url of, but not the QID.
     /// The same path can later be passed to the `--wikidata-ids` option to extract them from another language's dump.
+    /// Writes are atomicly appended to the file, so the same path may be used by multiple concurrent instances.
     #[arg(long, requires("wikipedia_urls"))]
     write_new_ids: Option<PathBuf>,
 }
@@ -173,6 +187,8 @@ fn main() -> anyhow::Result<()> {
         .exit()
     }
 
+    info!("{} {}", Args::command().get_name(), version());
+
     let wikipedia_titles = if let Some(path) = args.wikipedia_urls {
         info!("Loading article urls from {path:?}");
         let urls = parse_wikipedia_file(path)?;
@@ -191,6 +207,15 @@ fn main() -> anyhow::Result<()> {
         Default::default()
     };
 
+    // NOTE: For atomic writes to the same file across threads/processes:
+    // - The file needs to be opened in APPEND mode (`.append(true)`).
+    // - Each write needs to be a single syscall (for Rust, use `format!` for formatting before calling `write!`, or `write!` to a `String` first).
+    // - Each write needs to be under `PIPE_BUF` size (see `man write(3)`), usually 4kb on Linux.
+    //
+    // For more information, see:
+    // - `man write(3posix)`: https://www.man7.org/linux/man-pages/man3/write.3p.html
+    // - `std::fs::OpenOptions::append`: https://doc.rust-lang.org/std/fs/struct.OpenOptions.html#method.append
+    // - https://stackoverflow.com/questions/1154446/is-file-append-atomic-in-unix
     let mut write_new_ids = args
         .write_new_ids
         .as_ref()
@@ -240,10 +265,14 @@ fn main() -> anyhow::Result<()> {
             continue;
         }
 
+        // Write matched new QIDs back to fild.
         if let (Some(f), Some(qid)) = (&mut write_new_ids, &qid) {
             if !is_wikidata_match && !matching_titles.is_empty() {
                 debug!("Writing new id {} for article {:?}", qid, page.name);
-                writeln!(f, "{}", qid).with_context(|| {
+                // NOTE: Write to string buffer first to have a single atomic write syscall.
+                // See `write_new_ids` for more info.
+                let line = format!("{}\n", qid);
+                write!(f, "{}", line).with_context(|| {
                     format!(
                         "writing new id to file {:?}",
                         args.write_new_ids.as_ref().unwrap()