Add OSM object metadata

- Add @otype and @version columns to the `get-tags` output.
- Parse @otype, @oname, and @version columns in osm tagfiles.
- Attach and output available metadata in the `tag-errors` command.

OSM ids are not shared across nodes, ways, and relations, so the object
type should be saved as well. Including the edit version will make it
easier to see if a mis-tagged object is outdated.

Signed-off-by: Evan Lloyd New-Schmidt <evan@new-schmidt.com>
This commit is contained in:
Evan Lloyd New-Schmidt 2023-08-25 14:05:26 -04:00 committed by Evan Lloyd New-Schmidt
parent a584498c65
commit faf4b760b2
5 changed files with 176 additions and 32 deletions

View file

@ -4,11 +4,14 @@ use std::{
thread,
};
use om_wikiparser::osm::{Id, Kind, Version};
use osmpbf::{BlobDecode, BlobReader, Element};
use rayon::prelude::*;
struct Record {
id: String,
id: Id,
kind: Kind,
version: Option<Version>,
wikidata: String,
wikipedia: String,
}
@ -45,33 +48,47 @@ fn write(recv: mpsc::Receiver<Record>) -> anyhow::Result<usize> {
let mut output = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_writer(stdout().lock());
output.write_record(["@id", "wikidata", "wikipedia"])?;
output.write_record(["@id", "@otype", "@version", "wikidata", "wikipedia"])?;
let mut count = 0;
for Record {
id,
kind,
version,
wikidata,
wikipedia,
} in recv
{
output.write_record([id, wikidata, wikipedia])?;
output.write_record([
id.to_string(),
kind.otype().to_string(),
version.map(|v| v.to_string()).unwrap_or_default(),
wikidata,
wikipedia,
])?;
count += 1;
}
Ok(count)
}
#[rustfmt::skip]
fn extract_tags(el: Element) -> Option<Record> {
match el {
Element::Node(n) => make_record(n.id(), n.tags()),
Element::DenseNode(n) => make_record(n.id(), n.tags()),
Element::Way(w) => make_record(w.id(), w.tags()),
Element::Relation(r) => make_record(r.id(), r.tags()),
Element::Node(n) => make_record(Kind::Node, n.id(), n.info().version(), n.tags()),
Element::DenseNode(n) => make_record(Kind::Node, n.id(), n.info().map(|i| i.version()), n.tags()),
Element::Way(w) => make_record(Kind::Way, w.id(), w.info().version(), w.tags()),
Element::Relation(r) => make_record(Kind::Relation, r.id(), r.info().version(), r.tags()),
}
}
fn make_record<'i>(id: i64, tags: impl 'i + Iterator<Item = (&'i str, &'i str)>) -> Option<Record> {
fn make_record<'i>(
kind: Kind,
id: Id,
version: Option<Version>,
tags: impl 'i + Iterator<Item = (&'i str, &'i str)>,
) -> Option<Record> {
let mut wikipedia = String::new();
let mut wikidata = String::new();
@ -88,7 +105,9 @@ fn make_record<'i>(id: i64, tags: impl 'i + Iterator<Item = (&'i str, &'i str)>)
}
Some(Record {
id: id.to_string(),
id,
kind,
version,
wikipedia,
wikidata,
})

View file

@ -1,4 +1,5 @@
pub mod html;
pub mod osm;
pub mod wm;
#[macro_use]

View file

@ -12,6 +12,7 @@ use std::{
use anyhow::Context;
use clap::{CommandFactory, Parser, Subcommand};
use om_wikiparser::osm;
#[macro_use]
extern crate log;
@ -51,6 +52,7 @@ enum Cmd {
/// Path to a TSV file that contains one or more of `wikidata`, `wikipedia` columns.
///
/// This can be generated with the `get-tags` command or `osmconvert --csv-headline --csv 'wikidata wikipedia'`.
/// If `@id`, `@version`, and `@otype` or `@oname` columns are present, they will be added to the output for additional context.
#[arg(value_name = "FILE.tsv")]
osm_tags: PathBuf,
},
@ -128,22 +130,46 @@ fn main() -> anyhow::Result<()> {
let mut writer = csv::WriterBuilder::new()
.delimiter(b'\t')
.from_writer(stdout().lock());
writer.write_record(["line", "kind", "osm_id", "error", "value"])?;
writer.write_record(["line", "object", "version", "key", "error", "value"])?;
for error in errors {
use om_wikiparser::wm::ParseErrorKind::*;
let kind = error.kind.to_string();
let id = error
let key = match error.kind {
Title(_) => "wikipedia",
Qid(_) => "wikidata",
Tsv(_) => "",
};
// Url or id.
let object = error
.osm_id
.as_ref()
.map(ToString::to_string)
.map(|id| {
error
.osm_type
.and_then(|obj| osm::make_url(obj, id))
.unwrap_or_else(|| id.to_string())
})
.unwrap_or_default();
let version = error.osm_version.map(|v| v.to_string()).unwrap_or_default();
// Capture error chain.
let e: anyhow::Error = match error.kind {
Title(e) => e.into(),
Qid(e) => e.into(),
Tsv(e) => e.into(),
};
let msg = e.to_string();
writer.write_record([&error.line.to_string(), &kind, &id, &msg, &error.text])?;
let msg = format!("{:#}", e);
writer.write_record([
&error.line.to_string(),
&object,
&version,
key,
&msg,
&error.text,
])?;
}
Ok(())

66
src/osm.rs Normal file
View file

@ -0,0 +1,66 @@
//! OpenStreetMap types
/// OSM Object Id
///
/// Negative values indicate an updated/created object that has not been sent to the server.
///
/// See <https://wiki.openstreetmap.org/wiki/Elements#Common_attributes>
pub type Id = i64;
/// OSM Object Version
///
/// See <https://wiki.openstreetmap.org/wiki/Elements#Common_attributes>
pub type Version = i32;
/// OSM Object Type
///
/// See <https://wiki.openstreetmap.org/wiki/Elements>
#[derive(Debug, PartialEq, Eq)]
pub enum Kind {
Node,
Way,
Relation,
}
pub fn make_url(obj: Kind, id: Id) -> Option<String> {
if id < 0 {
return None;
}
Some(format!("https://osm.org/{}/{id}", obj.oname()))
}
impl Kind {
pub fn from_otype(otype: u8) -> Option<Self> {
match otype {
0 => Some(Kind::Node),
1 => Some(Kind::Way),
2 => Some(Kind::Relation),
_ => None,
}
}
pub fn from_oname(oname: &str) -> Option<Self> {
match oname.trim() {
"node" => Some(Kind::Node),
"way" => Some(Kind::Way),
"relation" => Some(Kind::Relation),
_ => None,
}
}
pub fn otype(&self) -> u8 {
match self {
Kind::Node => 0,
Kind::Way => 1,
Kind::Relation => 2,
}
}
pub fn oname(&self) -> &'static str {
match self {
Kind::Node => "node",
Kind::Way => "way",
Kind::Relation => "relation",
}
}
}

View file

@ -10,6 +10,8 @@ pub use title::*;
mod qid;
pub use qid::*;
use crate::osm;
/// Read from a file of urls on each line.
pub fn parse_wikidata_file(path: impl AsRef<OsStr>) -> anyhow::Result<HashSet<Qid>> {
let contents = fs::read_to_string(path.as_ref())?;
@ -73,11 +75,17 @@ pub fn parse_osm_tag_file(
let mut qid_col = None;
let mut title_col = None;
let mut osm_id_col = None;
let mut osm_otype_col = None;
let mut osm_oname_col = None;
let mut osm_version_col = None;
for (column, title) in rdr.headers()?.iter().enumerate() {
match title {
"wikidata" => qid_col = Some(column),
"wikipedia" => title_col = Some(column),
"@id" => osm_id_col = Some(column),
"@otype" => osm_otype_col = Some(column),
"@oname" => osm_oname_col = Some(column),
"@version" => osm_version_col = Some(column),
_ => (),
}
}
@ -97,16 +105,28 @@ pub fn parse_osm_tag_file(
bail!(e)
}
push_error(ParseLineError {
kind: e.into(),
text: String::new(),
line: rdr.position().line(),
osm_id: None,
kind: e.into(),
osm_type: None,
osm_version: None,
});
continue;
}
}
let osm_id = osm_id_col.and_then(|i| row[i].parse().ok());
let parse_metadata = || {
(
osm_id_col.and_then(|i| row[i].trim().parse::<osm::Id>().ok()),
// Prefer otype, use oname if not available
osm_otype_col
.and_then(|i| row[i].trim().parse().ok())
.and_then(osm::Kind::from_otype)
.or_else(|| osm_oname_col.and_then(|i| osm::Kind::from_oname(&row[i]))),
osm_version_col.and_then(|i| row[i].trim().parse::<osm::Version>().ok()),
)
};
let qid = &row[qid_col].trim();
if !qid.is_empty() {
@ -114,12 +134,17 @@ pub fn parse_osm_tag_file(
Ok(qid) => {
qids.insert(qid);
}
Err(e) => push_error(ParseLineError {
text: qid.to_string(),
line: rdr.position().line(),
osm_id,
kind: e.into(),
}),
Err(e) => {
let (osm_id, osm_type, osm_version) = parse_metadata();
push_error(ParseLineError {
kind: e.into(),
text: qid.to_string(),
line: rdr.position().line(),
osm_id,
osm_type,
osm_version,
})
}
}
}
@ -129,12 +154,17 @@ pub fn parse_osm_tag_file(
Ok(title) => {
titles.insert(title);
}
Err(e) => push_error(ParseLineError {
text: title.to_string(),
line: rdr.position().line(),
osm_id,
kind: e.into(),
}),
Err(e) => {
let (osm_id, osm_type, osm_version) = parse_metadata();
push_error(ParseLineError {
kind: e.into(),
text: title.to_string(),
line: rdr.position().line(),
osm_id,
osm_type,
osm_version,
})
}
}
}
}
@ -154,10 +184,12 @@ pub enum ParseErrorKind {
#[derive(Debug)]
pub struct ParseLineError {
pub kind: ParseErrorKind,
pub text: String,
pub line: u64,
pub osm_id: Option<usize>,
pub kind: ParseErrorKind,
pub osm_id: Option<osm::Id>,
pub osm_type: Option<osm::Kind>,
pub osm_version: Option<osm::Version>,
}
impl Display for ParseLineError {