Add OSM object metadata
- Add @otype and @version columns to the `get-tags` output. - Parse @otype, @oname, and @version columns in osm tagfiles. - Attach and output available metadata in the `tag-errors` command. OSM ids are not shared across nodes, ways, and relations, so the object type should be saved as well. Including the edit version will make it easier to see if a mis-tagged object is outdated. Signed-off-by: Evan Lloyd New-Schmidt <evan@new-schmidt.com>
This commit is contained in:
parent
a584498c65
commit
faf4b760b2
5 changed files with 176 additions and 32 deletions
|
@ -4,11 +4,14 @@ use std::{
|
|||
thread,
|
||||
};
|
||||
|
||||
use om_wikiparser::osm::{Id, Kind, Version};
|
||||
use osmpbf::{BlobDecode, BlobReader, Element};
|
||||
use rayon::prelude::*;
|
||||
|
||||
struct Record {
|
||||
id: String,
|
||||
id: Id,
|
||||
kind: Kind,
|
||||
version: Option<Version>,
|
||||
wikidata: String,
|
||||
wikipedia: String,
|
||||
}
|
||||
|
@ -45,33 +48,47 @@ fn write(recv: mpsc::Receiver<Record>) -> anyhow::Result<usize> {
|
|||
let mut output = csv::WriterBuilder::new()
|
||||
.delimiter(b'\t')
|
||||
.from_writer(stdout().lock());
|
||||
output.write_record(["@id", "wikidata", "wikipedia"])?;
|
||||
output.write_record(["@id", "@otype", "@version", "wikidata", "wikipedia"])?;
|
||||
|
||||
let mut count = 0;
|
||||
|
||||
for Record {
|
||||
id,
|
||||
kind,
|
||||
version,
|
||||
wikidata,
|
||||
wikipedia,
|
||||
} in recv
|
||||
{
|
||||
output.write_record([id, wikidata, wikipedia])?;
|
||||
output.write_record([
|
||||
id.to_string(),
|
||||
kind.otype().to_string(),
|
||||
version.map(|v| v.to_string()).unwrap_or_default(),
|
||||
wikidata,
|
||||
wikipedia,
|
||||
])?;
|
||||
count += 1;
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
fn extract_tags(el: Element) -> Option<Record> {
|
||||
match el {
|
||||
Element::Node(n) => make_record(n.id(), n.tags()),
|
||||
Element::DenseNode(n) => make_record(n.id(), n.tags()),
|
||||
Element::Way(w) => make_record(w.id(), w.tags()),
|
||||
Element::Relation(r) => make_record(r.id(), r.tags()),
|
||||
Element::Node(n) => make_record(Kind::Node, n.id(), n.info().version(), n.tags()),
|
||||
Element::DenseNode(n) => make_record(Kind::Node, n.id(), n.info().map(|i| i.version()), n.tags()),
|
||||
Element::Way(w) => make_record(Kind::Way, w.id(), w.info().version(), w.tags()),
|
||||
Element::Relation(r) => make_record(Kind::Relation, r.id(), r.info().version(), r.tags()),
|
||||
}
|
||||
}
|
||||
|
||||
fn make_record<'i>(id: i64, tags: impl 'i + Iterator<Item = (&'i str, &'i str)>) -> Option<Record> {
|
||||
fn make_record<'i>(
|
||||
kind: Kind,
|
||||
id: Id,
|
||||
version: Option<Version>,
|
||||
tags: impl 'i + Iterator<Item = (&'i str, &'i str)>,
|
||||
) -> Option<Record> {
|
||||
let mut wikipedia = String::new();
|
||||
let mut wikidata = String::new();
|
||||
|
||||
|
@ -88,7 +105,9 @@ fn make_record<'i>(id: i64, tags: impl 'i + Iterator<Item = (&'i str, &'i str)>)
|
|||
}
|
||||
|
||||
Some(Record {
|
||||
id: id.to_string(),
|
||||
id,
|
||||
kind,
|
||||
version,
|
||||
wikipedia,
|
||||
wikidata,
|
||||
})
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub mod html;
|
||||
pub mod osm;
|
||||
pub mod wm;
|
||||
|
||||
#[macro_use]
|
||||
|
|
40
src/main.rs
40
src/main.rs
|
@ -12,6 +12,7 @@ use std::{
|
|||
|
||||
use anyhow::Context;
|
||||
use clap::{CommandFactory, Parser, Subcommand};
|
||||
use om_wikiparser::osm;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
|
||||
|
@ -51,6 +52,7 @@ enum Cmd {
|
|||
/// Path to a TSV file that contains one or more of `wikidata`, `wikipedia` columns.
|
||||
///
|
||||
/// This can be generated with the `get-tags` command or `osmconvert --csv-headline --csv 'wikidata wikipedia'`.
|
||||
/// If `@id`, `@version`, and `@otype` or `@oname` columns are present, they will be added to the output for additional context.
|
||||
#[arg(value_name = "FILE.tsv")]
|
||||
osm_tags: PathBuf,
|
||||
},
|
||||
|
@ -128,22 +130,46 @@ fn main() -> anyhow::Result<()> {
|
|||
let mut writer = csv::WriterBuilder::new()
|
||||
.delimiter(b'\t')
|
||||
.from_writer(stdout().lock());
|
||||
writer.write_record(["line", "kind", "osm_id", "error", "value"])?;
|
||||
|
||||
writer.write_record(["line", "object", "version", "key", "error", "value"])?;
|
||||
|
||||
for error in errors {
|
||||
use om_wikiparser::wm::ParseErrorKind::*;
|
||||
let kind = error.kind.to_string();
|
||||
let id = error
|
||||
let key = match error.kind {
|
||||
Title(_) => "wikipedia",
|
||||
Qid(_) => "wikidata",
|
||||
Tsv(_) => "",
|
||||
};
|
||||
|
||||
// Url or id.
|
||||
let object = error
|
||||
.osm_id
|
||||
.as_ref()
|
||||
.map(ToString::to_string)
|
||||
.map(|id| {
|
||||
error
|
||||
.osm_type
|
||||
.and_then(|obj| osm::make_url(obj, id))
|
||||
.unwrap_or_else(|| id.to_string())
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
let version = error.osm_version.map(|v| v.to_string()).unwrap_or_default();
|
||||
|
||||
// Capture error chain.
|
||||
let e: anyhow::Error = match error.kind {
|
||||
Title(e) => e.into(),
|
||||
Qid(e) => e.into(),
|
||||
Tsv(e) => e.into(),
|
||||
};
|
||||
let msg = e.to_string();
|
||||
writer.write_record([&error.line.to_string(), &kind, &id, &msg, &error.text])?;
|
||||
let msg = format!("{:#}", e);
|
||||
|
||||
writer.write_record([
|
||||
&error.line.to_string(),
|
||||
&object,
|
||||
&version,
|
||||
key,
|
||||
&msg,
|
||||
&error.text,
|
||||
])?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
66
src/osm.rs
Normal file
66
src/osm.rs
Normal file
|
@ -0,0 +1,66 @@
|
|||
//! OpenStreetMap types
|
||||
|
||||
/// OSM Object Id
|
||||
///
|
||||
/// Negative values indicate an updated/created object that has not been sent to the server.
|
||||
///
|
||||
/// See <https://wiki.openstreetmap.org/wiki/Elements#Common_attributes>
|
||||
pub type Id = i64;
|
||||
|
||||
/// OSM Object Version
|
||||
///
|
||||
/// See <https://wiki.openstreetmap.org/wiki/Elements#Common_attributes>
|
||||
pub type Version = i32;
|
||||
|
||||
/// OSM Object Type
|
||||
///
|
||||
/// See <https://wiki.openstreetmap.org/wiki/Elements>
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Kind {
|
||||
Node,
|
||||
Way,
|
||||
Relation,
|
||||
}
|
||||
|
||||
pub fn make_url(obj: Kind, id: Id) -> Option<String> {
|
||||
if id < 0 {
|
||||
return None;
|
||||
}
|
||||
Some(format!("https://osm.org/{}/{id}", obj.oname()))
|
||||
}
|
||||
|
||||
impl Kind {
|
||||
pub fn from_otype(otype: u8) -> Option<Self> {
|
||||
match otype {
|
||||
0 => Some(Kind::Node),
|
||||
1 => Some(Kind::Way),
|
||||
2 => Some(Kind::Relation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_oname(oname: &str) -> Option<Self> {
|
||||
match oname.trim() {
|
||||
"node" => Some(Kind::Node),
|
||||
"way" => Some(Kind::Way),
|
||||
"relation" => Some(Kind::Relation),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn otype(&self) -> u8 {
|
||||
match self {
|
||||
Kind::Node => 0,
|
||||
Kind::Way => 1,
|
||||
Kind::Relation => 2,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn oname(&self) -> &'static str {
|
||||
match self {
|
||||
Kind::Node => "node",
|
||||
Kind::Way => "way",
|
||||
Kind::Relation => "relation",
|
||||
}
|
||||
}
|
||||
}
|
|
@ -10,6 +10,8 @@ pub use title::*;
|
|||
mod qid;
|
||||
pub use qid::*;
|
||||
|
||||
use crate::osm;
|
||||
|
||||
/// Read from a file of urls on each line.
|
||||
pub fn parse_wikidata_file(path: impl AsRef<OsStr>) -> anyhow::Result<HashSet<Qid>> {
|
||||
let contents = fs::read_to_string(path.as_ref())?;
|
||||
|
@ -73,11 +75,17 @@ pub fn parse_osm_tag_file(
|
|||
let mut qid_col = None;
|
||||
let mut title_col = None;
|
||||
let mut osm_id_col = None;
|
||||
let mut osm_otype_col = None;
|
||||
let mut osm_oname_col = None;
|
||||
let mut osm_version_col = None;
|
||||
for (column, title) in rdr.headers()?.iter().enumerate() {
|
||||
match title {
|
||||
"wikidata" => qid_col = Some(column),
|
||||
"wikipedia" => title_col = Some(column),
|
||||
"@id" => osm_id_col = Some(column),
|
||||
"@otype" => osm_otype_col = Some(column),
|
||||
"@oname" => osm_oname_col = Some(column),
|
||||
"@version" => osm_version_col = Some(column),
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
@ -97,16 +105,28 @@ pub fn parse_osm_tag_file(
|
|||
bail!(e)
|
||||
}
|
||||
push_error(ParseLineError {
|
||||
kind: e.into(),
|
||||
text: String::new(),
|
||||
line: rdr.position().line(),
|
||||
osm_id: None,
|
||||
kind: e.into(),
|
||||
osm_type: None,
|
||||
osm_version: None,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let osm_id = osm_id_col.and_then(|i| row[i].parse().ok());
|
||||
let parse_metadata = || {
|
||||
(
|
||||
osm_id_col.and_then(|i| row[i].trim().parse::<osm::Id>().ok()),
|
||||
// Prefer otype, use oname if not available
|
||||
osm_otype_col
|
||||
.and_then(|i| row[i].trim().parse().ok())
|
||||
.and_then(osm::Kind::from_otype)
|
||||
.or_else(|| osm_oname_col.and_then(|i| osm::Kind::from_oname(&row[i]))),
|
||||
osm_version_col.and_then(|i| row[i].trim().parse::<osm::Version>().ok()),
|
||||
)
|
||||
};
|
||||
|
||||
let qid = &row[qid_col].trim();
|
||||
if !qid.is_empty() {
|
||||
|
@ -114,12 +134,17 @@ pub fn parse_osm_tag_file(
|
|||
Ok(qid) => {
|
||||
qids.insert(qid);
|
||||
}
|
||||
Err(e) => push_error(ParseLineError {
|
||||
text: qid.to_string(),
|
||||
line: rdr.position().line(),
|
||||
osm_id,
|
||||
kind: e.into(),
|
||||
}),
|
||||
Err(e) => {
|
||||
let (osm_id, osm_type, osm_version) = parse_metadata();
|
||||
push_error(ParseLineError {
|
||||
kind: e.into(),
|
||||
text: qid.to_string(),
|
||||
line: rdr.position().line(),
|
||||
osm_id,
|
||||
osm_type,
|
||||
osm_version,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -129,12 +154,17 @@ pub fn parse_osm_tag_file(
|
|||
Ok(title) => {
|
||||
titles.insert(title);
|
||||
}
|
||||
Err(e) => push_error(ParseLineError {
|
||||
text: title.to_string(),
|
||||
line: rdr.position().line(),
|
||||
osm_id,
|
||||
kind: e.into(),
|
||||
}),
|
||||
Err(e) => {
|
||||
let (osm_id, osm_type, osm_version) = parse_metadata();
|
||||
push_error(ParseLineError {
|
||||
kind: e.into(),
|
||||
text: title.to_string(),
|
||||
line: rdr.position().line(),
|
||||
osm_id,
|
||||
osm_type,
|
||||
osm_version,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -154,10 +184,12 @@ pub enum ParseErrorKind {
|
|||
|
||||
#[derive(Debug)]
|
||||
pub struct ParseLineError {
|
||||
pub kind: ParseErrorKind,
|
||||
pub text: String,
|
||||
pub line: u64,
|
||||
pub osm_id: Option<usize>,
|
||||
pub kind: ParseErrorKind,
|
||||
pub osm_id: Option<osm::Id>,
|
||||
pub osm_type: Option<osm::Kind>,
|
||||
pub osm_version: Option<osm::Version>,
|
||||
}
|
||||
|
||||
impl Display for ParseLineError {
|
||||
|
|
Loading…
Add table
Reference in a new issue