diff --git a/Cargo.lock b/Cargo.lock
index 68dd02e..c814b23 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -524,6 +524,7 @@ dependencies = [
  "clap",
  "env_logger",
  "log",
+ "once_cell",
  "scraper",
  "serde",
  "serde_json",
@@ -533,9 +534,9 @@ dependencies = [
 
 [[package]]
 name = "once_cell"
-version = "1.17.2"
+version = "1.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9670a07f94779e00908f3e686eab508878ebb390ba6e604d3a284c00e8d0487b"
+checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
 
 [[package]]
 name = "parking_lot"
diff --git a/Cargo.toml b/Cargo.toml
index 3cc52c7..09f05f4 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -12,6 +12,7 @@ anyhow = { version = "1.0.71", features = ["backtrace"] }
 clap = { version = "4.3.2", features = ["derive"] }
 env_logger = "0.10.0"
 log = "0.4.18"
+once_cell = "1.18.0"
 scraper = "0.16.0"
 serde = { version = "1.0.163", features = ["derive"] }
 serde_json = "1.0.96"
diff --git a/README.md b/README.md
index 2c6ee28..9c95dd0 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,8 @@
 # wikiparser
 
 _Extracts articles from [Wikipedia database dumps](https://en.wikipedia.org/wiki/Wikipedia:Database_download) for embedding into the `mwm` map files created by [the Organic Maps generator](https://github.com/organicmaps/organicmaps/blob/master/tools/python/maps_generator/README.md)._
+
+## Usage
+
+[`article_processing_config.json`](article_processing_config.json) should be updated when adding a new language.
+It defines article sections that are not important for users and should be removed.
diff --git a/article_processing_config.json b/article_processing_config.json
new file mode 100644
index 0000000..2222efb
--- /dev/null
+++ b/article_processing_config.json
@@ -0,0 +1,44 @@
+{
+  "sections_to_remove": {
+    "de": [
+      "Anmerkungen",
+      "Anmerkungen und Einzelnachweise",
+      "Einzelbelege",
+      "Einzelnachweise",
+      "Filme",
+      "Literatur",
+      "Siehe auch",
+      "Weblinks"
+    ],
+    "en": [
+      "Bibliography",
+      "External links",
+      "Further reading",
+      "References",
+      "See also",
+      "Sources"
+    ],
+    "es": [
+      "Enlaces externos",
+      "Referencias",
+      "Véase también",
+      "Vínculos de interés"
+    ],
+    "fr": [
+      "Articles connexes",
+      "Bibliographie",
+      "Lien externe",
+      "Liens externes",
+      "Notes et références",
+      "Références",
+      "Voir aussi"
+    ],
+    "ru": [
+      "Библиография",
+      "Литература",
+      "Примечания",
+      "См. также",
+      "Ссылки"
+    ]
+  }
+}
diff --git a/src/bin/simplify_html.rs b/src/bin/simplify_html.rs
index d24c7f5..54fae4e 100644
--- a/src/bin/simplify_html.rs
+++ b/src/bin/simplify_html.rs
@@ -10,7 +10,7 @@ fn main() -> anyhow::Result<()> {
     let mut input = String::new();
     stdin().read_to_string(&mut input)?;
 
-    let output = simplify(&input);
+    let output = simplify(&input, "en");
 
     stdout().write_all(output.as_bytes())?;
 
diff --git a/src/html.rs b/src/html.rs
index 9143021..d970eef 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -1,49 +1,63 @@
+use std::collections::{BTreeMap, BTreeSet};
+
+use once_cell::sync::Lazy;
 use scraper::{ElementRef, Html, Selector};
+use serde::Deserialize;
 
-pub fn simplify(html: &str) -> String {
-    // TODO: handle multiple languages
-    let bad_sections = [
-        "External links",
-        "Sources",
-        "See also",
-        "Bibliography",
-        "Further reading",
-        "References",
-    ];
+#[derive(Debug, Deserialize)]
+struct Config<'a> {
+    #[serde(borrow)]
+    sections_to_remove: BTreeMap<&'a str, BTreeSet<&'a str>>,
+}
 
+static CONFIG: Lazy<Config<'static>> = Lazy::new(|| {
+    serde_json::from_str(include_str!(concat!(
+        env!("CARGO_MANIFEST_DIR"),
+        "/article_processing_config.json"
+    )))
+    .expect("\"article_processing_config.json\" is either invalid json or the wrong structure")
+});
+
+static HEADERS: Lazy<Selector> =
+    Lazy::new(|| Selector::parse("h1, h2, h3, h4, h5, h6, h7").unwrap());
+
+pub fn simplify(html: &str, lang: &str) -> String {
     let mut document = Html::parse_document(html);
 
-    // TODO: evaluate this only once
-    let headers = Selector::parse("h1, h2, h3, h4, h5, h6, h7").unwrap();
-
     let mut to_remove = Vec::new();
 
     // remove sections
-    for header in document.select(&headers) {
-        // TODO: should this join all text nodes?
-        let Some(title) = header.text().next() else {
-            continue
-        };
-        if bad_sections.contains(&title) {
-            to_remove.push(header.id());
-            let header_level = header.value().name();
-            // strip trailing nodes
-            for sibling in header.next_siblings() {
-                if let Some(element) = sibling.value().as_element() {
-                    if element.name() == header_level {
-                        // TODO: should this check for a higher level?
-                        break;
+
+    if let Some(bad_sections) = CONFIG.sections_to_remove.get(lang) {
+        for header in document.select(&HEADERS) {
+            // TODO: should this join all text nodes?
+            let Some(title) = header.text().next() else {
+                continue
+            };
+
+            if bad_sections.contains(&title.trim()) {
+                to_remove.push(header.id());
+                let header_level = header.value().name();
+                // strip trailing nodes
+                for sibling in header.next_siblings() {
+                    if let Some(element) = sibling.value().as_element() {
+                        if element.name() == header_level {
+                            // TODO: should this check for a higher level?
+                            break;
+                        }
                     }
+                    to_remove.push(sibling.id());
                 }
-                to_remove.push(sibling.id());
             }
         }
-    }
 
-    for id in to_remove.drain(..) {
-        if let Some(mut node) = document.tree.get_mut(id) {
-            node.detach();
+        for id in to_remove.drain(..) {
+            if let Some(mut node) = document.tree.get_mut(id) {
+                node.detach();
+            }
         }
+    } else {
+        warn!("No sections to remove configured for lang {lang:?}");
     }
 
     // remove elements with no text that isn't whitespace
@@ -66,3 +80,13 @@ pub fn simplify(html: &str) -> String {
 
     document.html()
 }
+
+#[cfg(test)]
+mod test {
+    use super::*;
+
+    #[test]
+    fn static_config_parses() {
+        assert!(!CONFIG.sections_to_remove.is_empty());
+    }
+}
diff --git a/src/lib.rs b/src/lib.rs
index 5648444..15063e9 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,2 +1,5 @@
 pub mod html;
 pub mod wm;
+
+#[macro_use]
+extern crate log;
diff --git a/src/main.rs b/src/main.rs
index 41f61b3..f085e3b 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -9,9 +9,9 @@
 //     --wikipedia-urls /tmp/wikipedia_urls.txt \
 //     output_dir
 use std::{
-    fs::File,
+    fs::{create_dir, File},
     io::{stdin, BufRead, Write},
-    path::PathBuf,
+    path::{Path, PathBuf},
 };
 
 use anyhow::bail;
@@ -33,6 +33,37 @@ struct Args {
     wikipedia_urls: Option<PathBuf>,
 }
 
+fn write(dir: impl AsRef<Path>, page: Page) -> anyhow::Result<()> {
+    let Some(qid) = page.main_entity.map(|e| e.identifier) else {
+        // TODO: handle and still write
+        bail!("Page in list but without wikidata qid: {:?} ({})", page.name, page.url);
+    };
+
+    let mut filename = dir.as_ref().to_owned();
+    filename.push(qid);
+    filename.push(&page.in_language.identifier);
+    filename.set_extension("html");
+
+    debug!("{:?}: {:?}", page.name, filename);
+
+    if filename.exists() {
+        debug!("Exists, skipping");
+        return Ok(());
+    }
+
+    let subfolder = filename.parent().unwrap();
+    if !subfolder.exists() {
+        create_dir(subfolder)?;
+    }
+
+    let html = simplify(&page.article_body.html, &page.in_language.identifier);
+
+    let mut file = File::create(&filename)?;
+    file.write_all(html.as_bytes())?;
+
+    Ok(())
+}
+
 fn main() -> anyhow::Result<()> {
     env_logger::Builder::new()
         .filter_level(log::LevelFilter::Info)
@@ -79,24 +110,9 @@ fn main() -> anyhow::Result<()> {
             continue;
         }
 
-        let Some(qid) = page.main_entity.map(|e| e.identifier) else {
-            warn!("Page in list but without wikidata qid: {:?}", page.name);
-            continue;
-        };
-
-        let filename = args.output_dir.join(qid).with_extension("html");
-
-        debug!("{:?}: {:?}", page.name, filename);
-
-        if filename.exists() {
-            debug!("Exists, skipping");
-            continue;
+        if let Err(e) = write(&args.output_dir, page) {
+            error!("Error writing article: {}", e);
         }
-
-        let html = simplify(&page.article_body.html);
-
-        let mut file = File::create(filename)?;
-        file.write_all(html.as_bytes())?;
     }
 
     Ok(())
diff --git a/src/wm/mod.rs b/src/wm/mod.rs
index 00f433c..4e8cf02 100644
--- a/src/wm/mod.rs
+++ b/src/wm/mod.rs
@@ -58,15 +58,14 @@ pub fn is_wikipedia_match(
     titles: &HashSet<WikipediaTitleNorm>,
     page: &Page,
 ) -> Option<WikipediaTitleNorm> {
-    // TODO: handle multiple languages
-    let title = WikipediaTitleNorm::from_title(&page.name, "en");
+    let title = WikipediaTitleNorm::from_title(&page.name, &page.in_language.identifier);
 
     if titles.get(&title).is_some() {
         return Some(title);
     }
 
     for redirect in &page.redirects {
-        let title = WikipediaTitleNorm::from_title(&redirect.name, "en");
+        let title = WikipediaTitleNorm::from_title(&redirect.name, &page.in_language.identifier);
 
         if titles.get(&title).is_some() {
             return Some(title);
diff --git a/src/wm/page.rs b/src/wm/page.rs
index b830fd9..c118b5a 100644
--- a/src/wm/page.rs
+++ b/src/wm/page.rs
@@ -1,5 +1,6 @@
 use serde::Deserialize;
 
+// TODO: consolidate into single struct
 /// Deserialized Wikimedia Enterprise API Article
 ///
 /// For all available fields, see https://enterprise.wikimedia.com/docs/data-dictionary/
@@ -9,6 +10,7 @@ pub struct Page {
     // TODO: check if CoW has a performance impact
     pub name: String,
     pub date_modified: String,
+    pub in_language: Language,
     #[serde(default)]
     pub url: String,
     pub main_entity: Option<Wikidata>,
@@ -25,6 +27,8 @@ pub struct Wikidata {
 
 #[derive(Deserialize)]
 pub struct ArticleBody {
+    // TODO: look into RawValue to lazily parse/allocate this:
+    // https://docs.rs/serde_json/latest/serde_json/value/struct.RawValue.html
     pub html: String,
 }
 
@@ -34,3 +38,8 @@ pub struct Redirect {
     pub url: String,
     pub name: String,
 }
+
+#[derive(Deserialize)]
+pub struct Language {
+    pub identifier: String,
+}