commit 1fccd16fc21e35230d1684372f6af488cc6ac98d
Author: Ilya Zverev <zverik@textual.ru>
Date:   Wed Oct 4 15:58:24 2017 +0300

    Initial commit

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9c602a0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+__pycache__/
+*.log
+*.osm
+validate.sh
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..67db858
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,175 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0262ee8
--- /dev/null
+++ b/README.md
@@ -0,0 +1,24 @@
+# Subway Preprocessor
+
+Here you see a list of scripts that can be used for preprocessing all the metro
+systems in the world from OpenStreetMap. `subway_structure.py` produces
+a list of disjunct systems that can be used for routing and for displaying
+of metro maps.
+
+## How To Validate
+
+* Download or update a planet file in o5m format (using `osmconvert` and `osmupdate`).
+* Use `filter_all_subways.sh` to extract a portion of data for all subways.
+* Run `mapsme_subways.py -x filtered_data.osm` to build metro structures and receive a validation log.
+* Run `validation_to_html.py` on that log to create readable HTML tables.
+
+## Adding Stop Areas To OSM
+
+To quickly add `stop_area` relations for the entire city, use the `make_stop_areas.py` script
+from the `stop_area` directory. Give it a bounding box or a `.json` file download from Overpass API.
+It would produce an JOSM XML file that you should manually check in JOSM. After that
+just upload it.
+
+## Author and License
+
+All scripts were written by Ilya Zverev for MAPS.ME. Published under Apache Licence 2.0.
diff --git a/download_all_subways.sh b/download_all_subways.sh
new file mode 100755
index 0000000..2797520
--- /dev/null
+++ b/download_all_subways.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+# Still times out, do not use unless you want to be blocked for some hours on Overpass API
+TIMEOUT=2000
+QUERY='[out:json][timeout:'$TIMEOUT'];(rel["route"="subway"];rel["route"="light_rail"];rel["public_transport"="stop_area"];rel["public_transport"="stop_area_group"];node["station"="subway"];node["station"="light_rail"];node["railway"="subway_entrance"];);(._;>;);out body center qt;'
+http http://overpass-api.de/api/interpreter "data==$QUERY" --timeout $TIMEOUT > subways-$(date +%y%m%d).json
+http https://overpass-api.de/api/status | grep available
diff --git a/filter_all_subways.sh b/filter_all_subways.sh
new file mode 100755
index 0000000..1f7d980
--- /dev/null
+++ b/filter_all_subways.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+[ $# -lt 1 ] && echo 'Usage: $0 <planet.o5m> [<path_to_osmfilter>]' && exit 1
+OSMFILTER=${2-./osmfilter}
+"$OSMFILTER" "$1" --keep= --keep-relations="route=subway or route=light_rail or route=monorail or route_master=subway or route_master=light_rail or route_master=monorail or public_transport=stop_area or public_transport=stop_area_group" --keep-nodes="station=subway or station=light_rail or station=monorail or railway=subway_entrance" --drop-author -o=subways-$(date +%y%m%d).osm
diff --git a/mapsme_subways.py b/mapsme_subways.py
new file mode 100755
index 0000000..697bf38
--- /dev/null
+++ b/mapsme_subways.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python3
+import argparse
+import json
+import logging
+import os
+import sys
+import time
+import urllib.parse
+import urllib.request
+from subway_structure import download_cities
+
+
+def overpass_request(bboxes=None):
+    query = '[out:json][timeout:1000];('
+    if bboxes is None:
+        bboxes = [None]
+    for bbox in bboxes:
+        bbox_part = '' if not bbox else '({})'.format(','.join(bbox))
+        for t, k, v in (('rel', 'route', 'subway'),
+                        ('rel', 'route', 'light_rail'),
+                        ('rel', 'route_master', 'subway'),
+                        ('rel', 'route_master', 'light_rail'),
+                        ('rel', 'public_transport', 'stop_area'),
+                        ('rel', 'public_transport', 'stop_area_group'),
+                        ('node', 'railway', 'subway_entrance')):
+            query += '{}["{}"="{}"]{};'.format(t, k, v, bbox_part)
+    query += ');(._;>);out body center qt;'
+    logging.debug('Query: %s', query)
+    url = 'http://overpass-api.de/api/interpreter?data={}'.format(urllib.parse.quote(query))
+    response = urllib.request.urlopen(url, timeout=1000)
+    if response.getcode() != 200:
+        raise Exception('Failed to query Overpass API: HTTP {}'.format(response.getcode()))
+    return json.load(response)['elements']
+
+
+def multi_overpass(bboxes):
+    if not bboxes:
+        return overpass_request(None)
+    SLICE_SIZE = 10
+    result = []
+    for i in range(0, len(bboxes) + SLICE_SIZE - 1, SLICE_SIZE):
+        if i > 0:
+            time.sleep(5)
+        result.append(overpass_request(bboxes[i:i+SLICE_SIZE]))
+    return result
+
+
+def load_xml(f):
+    try:
+        from lxml import etree
+    except ImportError:
+        import xml.etree.ElementTree as etree
+
+    elements = []
+    nodes = {}
+    for event, element in etree.iterparse(f):
+        if element.tag in ('node', 'way', 'relation'):
+            el = {'type': element.tag, 'id': int(element.get('id'))}
+            if element.tag == 'node':
+                for n in ('lat', 'lon'):
+                    el[n] = float(element.get(n))
+                nodes[el['id']] = (el['lat'], el['lon'])
+            tags = {}
+            nd = []
+            members = []
+            for sub in element:
+                if sub.tag == 'tag':
+                    tags[sub.get('k')] = sub.get('v')
+                elif sub.tag == 'nd':
+                    nd.append(int(sub.get('ref')))
+                elif sub.tag == 'member':
+                    members.append({'type': sub.get('type'),
+                                    'ref': int(sub.get('ref')),
+                                    'role': sub.get('role', '')})
+            if tags:
+                el['tags'] = tags
+            if nd:
+                el['nodes'] = nd
+            if members:
+                el['members'] = members
+            elements.append(el)
+            element.clear()
+    logging.info('Read %s elements, now finding centers of ways and relations', len(elements))
+
+    # Now make centers, assuming relations go after ways
+    ways = {}
+    relations = {}
+    for el in elements:
+        if el['type'] == 'way' and 'nodes' in el:
+            center = [0, 0]
+            count = 0
+            for nd in el['nodes']:
+                if nd in nodes:
+                    center[0] += nodes[nd][0]
+                    center[1] += nodes[nd][1]
+                    count += 1
+            if count > 0:
+                el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
+                ways[el['id']] = (el['center']['lat'], el['center']['lon'])
+        elif el['type'] == 'relation' and 'members' in el:
+            center = [0, 0]
+            count = 0
+            for m in el['members']:
+                if m['type'] == 'node' and m['ref'] in nodes:
+                    center[0] += nodes[m['ref']][0]
+                    center[1] += nodes[m['ref']][1]
+                    count += 1
+                elif m['type'] == 'way' and m['ref'] in ways:
+                    center[0] += ways[m['ref']][0]
+                    center[1] += ways[m['ref']][1]
+                    count += 1
+            if count > 0:
+                el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
+                relations[el['id']] = (el['center']['lat'], el['center']['lon'])
+
+    # Iterating again, now filling relations that contain only relations
+    for el in elements:
+        if el['type'] == 'relation' and 'members' in el:
+            center = [0, 0]
+            count = 0
+            for m in el['members']:
+                if m['type'] == 'node' and m['ref'] in nodes:
+                    center[0] += nodes[m['ref']][0]
+                    center[1] += nodes[m['ref']][1]
+                    count += 1
+                elif m['type'] == 'way' and m['ref'] in ways:
+                    center[0] += ways[m['ref']][0]
+                    center[1] += ways[m['ref']][1]
+                    count += 1
+                elif m['type'] == 'relation' and m['ref'] in relations:
+                    center[0] += relations[m['ref']][0]
+                    center[1] += relations[m['ref']][1]
+                    count += 1
+            if count > 0:
+                el['center'] = {'lat': center[0]/count, 'lon': center[1]/count}
+                relations[el['id']] = (el['center']['lat'], el['center']['lon'])
+    return elements
+
+
+def merge_mapsme_networks(networks):
+    result = {}
+    for k in ('stops', 'transfers', 'networks'):
+        result[k] = sum([n[k] for n in networks], [])
+    return result
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-i', '--source', help='File to write backup of OSM data, or to read data from')
+    parser.add_argument('-x', '--xml', help='OSM extract with routes, to read data from')
+    parser.add_argument(
+        '-b', '--bbox', action='store_true',
+        help='Use city boundaries to query Overpass API instead of querying the world')
+    parser.add_argument('-q', '--quiet', action='store_true', help='Show only warnings and errors')
+    parser.add_argument('-c', '--city', help='Validate only a single city')
+    parser.add_argument('-o', '--output', help='JSON file for MAPS.ME')
+    parser.add_argument('-n', '--networks', type=argparse.FileType('w'), help='File to write the networks statistics')
+    options = parser.parse_args()
+
+    if options.quiet:
+        log_level = logging.WARNING
+    else:
+        log_level = logging.INFO
+    logging.basicConfig(level=logging.INFO, datefmt='%H:%M:%S',
+                        format='%(asctime)s %(levelname)-7s  %(message)s')
+
+    # Downloading cities from Google Spreadsheets
+    cities = download_cities()
+    if options.city:
+        cities = [c for c in cities if c.name == options.city]
+    logging.info('Read %s metro networks', len(cities))
+    if not cities:
+        sys.exit(2)
+
+    # Reading cached json, loading XML or querying Overpass API
+    if options.source and os.path.exists(options.source):
+        logging.info('Reading %s', options.source)
+        with open(options.source, 'r') as f:
+            osm = json.load(f)
+            if 'elements' in osm:
+                osm = osm['elements']
+    elif options.xml:
+        logging.info('Reading %s', options.xml)
+        osm = load_xml(options.xml)
+        if options.source:
+            with open(options.source, 'w') as f:
+                json.dump(osm, f)
+    else:
+        if options.bbox:
+            bboxes = [c.bbox for c in cities]
+        else:
+            bboxes = None
+        logging.info('Downloading data from Overpass API')
+        osm = multi_overpass(bboxes)
+        if options.source:
+            with open(options.source, 'w') as f:
+                json.dump(osm, f)
+    logging.info('Downloaded %s elements, sorting by city', len(osm))
+
+    # Sorting elements by city and prepare a dict
+    for el in osm:
+        for c in cities:
+            if c.contains(el):
+                c.add(el)
+
+    logging.info('Building routes for each city')
+    good_cities = []
+    for c in cities:
+        c.extract_routes()
+        c.validate()
+        if c.errors == 0:
+            good_cities.append(c)
+
+    logging.info('%s good cities: %s', len(good_cities), ', '.join([c.name for c in good_cities]))
+
+    if options.networks:
+        from collections import Counter
+        for c in cities:
+            networks = Counter()
+            for r in c.routes.values():
+                networks[str(r.network)] += 1
+            print('{}: {}'.format(c.name, '; '.join(
+                ['{} ({})'.format(k, v) for k, v in networks.items()])), file=options.networks)
+
+    # Finally, preparing a JSON file for MAPS.ME
+    if options.output:
+        networks = [c.for_mapsme() for c in cities]
+        with open(options.output, 'w') as f:
+            json.dump(merge_mapsme_networks(networks), f)
diff --git a/stop_areas/make_stop_areas.py b/stop_areas/make_stop_areas.py
new file mode 100755
index 0000000..b804d72
--- /dev/null
+++ b/stop_areas/make_stop_areas.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+import json
+from lxml import etree
+import sys
+import kdtree
+import math
+import re
+import urllib.parse
+import urllib.request
+
+
+QUERY = """
+[out:json][timeout:250][bbox:{{bbox}}];
+(
+    node["railway"="subway_entrance"];
+    node["station"="subway"];
+    node["station"="light_rail"];
+    node["public_transport"="stop_position"]["train"="yes"];
+    node["public_transport"="stop_position"]["subway"="yes"];
+    way["station"="subway"];
+    relation["station"="subway"];
+    way["railway"="platform"];
+    relation["railway"="platform"];
+    relation[route="subway"];
+    relation[route="light_rail"];
+);
+(._;>;);
+(._;rel(bn););
+out meta center qt;
+"""
+
+
+def el_id(el):
+    return el['type'][0] + str(el.get('id', el.get('ref', '')))
+
+
+class StationWrapper:
+    def __init__(self, st):
+        self.coords = (st['lon'], st['lat'])
+        self.station = st
+
+    def __len__(self):
+        return 2
+
+    def __getitem__(self, i):
+        return self.coords[i]
+
+    def distance(self, other):
+        """Calculate distance in meters."""
+        dx = math.radians(self[0] - other['lon']) * math.cos(
+            0.5 * math.radians(self[1] + other['lat']))
+        dy = math.radians(self[1] - other['lat'])
+        return 6378137 * math.sqrt(dx*dx + dy*dy)
+
+
+def overpass_request(bbox):
+    url = 'http://overpass-api.de/api/interpreter?data={}'.format(
+        urllib.parse.quote(QUERY.replace('{{bbox}}', bbox)))
+    response = urllib.request.urlopen(url, timeout=1000)
+    if response.getcode() != 200:
+        raise Exception('Failed to query Overpass API: HTTP {}'.format(response.getcode()))
+    return json.load(response)['elements']
+
+
+def add_stop_areas(src):
+    if not src:
+        raise Exception('Empty dataset provided to add_stop_areas')
+
+    # Add station=* tags to stations in subway and light_rail routes
+    stations = {}
+    for el in src:
+        if 'tags' in el and el['tags'].get('railway', None) == 'station':
+            stations[el_id(el)] = el
+
+    for el in src:
+        if (el['type'] == 'relation' and 'tags' in el and
+                el['tags'].get('route', None) in ('subway', 'light_rail')):
+            for m in el['members']:
+                st = stations.get(el_id(m), None)
+                if st and 'station' not in st['tags']:
+                    st['tags']['station'] = el['tags']['route']
+                    st['modified'] = True
+
+    # Create a kd-tree out of subway stations
+    stations = kdtree.create(dimensions=2)
+    for el in src:
+        if 'tags' in el and el['tags'].get('station', None) in ('subway', 'light_rail'):
+            stations.add(StationWrapper(el))
+
+    # Populate a list of nearby subway exits and platforms for each station
+    MAX_DISTANCE = 300  # meters
+    stop_areas = {}
+    for el in src:
+        if 'tags' not in el:
+            continue
+        if (el['tags'].get('railway', None) not in ('subway_entrance', 'platform') and
+                el['tags'].get('public_transport', None) not in ('platform', 'stop_position')):
+            continue
+        coords = el.get('center', el)
+        station = stations.search_nn((coords['lon'], coords['lat']))[0].data
+        if station.distance(coords) < MAX_DISTANCE:
+            k = (station.station['id'], station.station['tags']['name'])
+            # Disregard exits and platforms that are differently named
+            if el['tags'].get('name', k[1]) == k[1]:
+                if k not in stop_areas:
+                    stop_areas[k] = {el_id(station.station): station.station}
+                stop_areas[k][el_id(el)] = el
+
+    # Find existing stop_area relations for stations and remove these stations
+    for el in src:
+        if el['type'] == 'relation' and el['tags'].get('public_transport', None) == 'stop_area':
+            found = False
+            for m in el['members']:
+                if found:
+                    break
+                for st in stop_areas:
+                    if el_id(m) in stop_areas[st]:
+                        del stop_areas[st]
+                        found = True
+                        break
+
+    # Create OSM XML for new stop_area relations
+    root = etree.Element('osm', version='0.6')
+    rid = -1
+    for st, members in stop_areas.items():
+        rel = etree.SubElement(root, 'relation', id=str(rid))
+        rid -= 1
+        etree.SubElement(rel, 'tag', k='type', v='public_transport')
+        etree.SubElement(rel, 'tag', k='public_transport', v='stop_area')
+        etree.SubElement(rel, 'tag', k='name', v=st[1])
+        for m in members.values():
+            if m['tags'].get('railway', m['tags'].get('public_transport', None)) == 'platform':
+                role = 'platform'
+            elif m['tags'].get('public_transport', None) == 'stop_position':
+                role = 'stop'
+            else:
+                role = ''
+            etree.SubElement(rel, 'member', ref=str(m['id']), type=m['type'], role=role)
+
+    # Add all downloaded elements
+    for el in src:
+        obj = etree.SubElement(root, el['type'])
+        for a in ('id', 'type', 'user', 'uid', 'version', 'changeset', 'timestamp', 'lat', 'lon'):
+            if a in el:
+                obj.set(a, str(el[a]))
+        if 'modified' in el:
+            obj.set('action', 'modify')
+        if 'tags' in el:
+            for k, v in el['tags'].items():
+                etree.SubElement(obj, 'tag', k=k, v=v)
+        if 'members' in el:
+            for m in el['members']:
+                etree.SubElement(obj, 'member', ref=str(m['ref']),
+                                 type=m['type'], role=m.get('role', ''))
+        if 'nodes' in el:
+            for n in el['nodes']:
+                etree.SubElement(obj, 'nd', ref=str(n))
+
+    return etree.tostring(root, pretty_print=True)
+
+
+if __name__ == '__main__':
+    if len(sys.argv) < 2:
+        print('Read a JSON from Overpass and output JOSM OSM XML with added stop_area relations')
+        print('Usage: {} {<export.json>|<bbox>} [output.osm]'.format(sys.argv[0]))
+        sys.exit(1)
+
+    if re.match(r'', sys.argv[1]):
+        src = overpass_request(sys.argv[1])
+    else:
+        with open(sys.argv[1], 'r') as f:
+            src = json.load(f)['elements']
+
+    result = add_stop_areas(src)
+
+    if len(sys.argv) < 3:
+        print(result.decode('utf-8'))
+    else:
+        with open(sys.argv[2], 'wb') as f:
+            f.write(result)
diff --git a/stop_areas/serve.py b/stop_areas/serve.py
new file mode 100755
index 0000000..264516c
--- /dev/null
+++ b/stop_areas/serve.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+from flask import Flask, request, make_response, render_template
+from make_stop_areas import add_stop_areas, overpass_request
+
+app = Flask(__name__)
+app.debug = True
+
+
+@app.route('/')
+def form():
+    return render_template('index.html')
+
+
+@app.route('/process', methods=['GET'])
+def convert():
+    bbox = request.args.get('bbox').split(',')
+    bbox_r = ','.join([bbox[i] for i in (1, 0, 3, 2)])
+    src = overpass_request(bbox_r)
+    if not src:
+        return 'No data from overpass, sorry.'
+    result = add_stop_areas(src)
+    response = make_response(result)
+    response.headers['Content-Disposition'] = 'attachment; filename="stop_areas.osm"'
+    return response
+
+if __name__ == '__main__':
+    app.run()
diff --git a/stop_areas/templates/index.html b/stop_areas/templates/index.html
new file mode 100644
index 0000000..e9b7738
--- /dev/null
+++ b/stop_areas/templates/index.html
@@ -0,0 +1,68 @@
+<!doctype html>
+<html>
+  <head>
+    <meta charset="utf-8">
+    <meta name="viewport" content="width=device-width,height=device-height, user-scalable=no" />
+    <title>Make Stop Areas</title><!-- ...Great Again -->
+    <link rel="stylesheet" href="https://unpkg.com/leaflet@1.2.0/dist/leaflet.css" />
+    <script src="https://unpkg.com/leaflet@1.2.0/dist/leaflet.js"></script>
+    <script src="https://npmcdn.com/leaflet.path.drag@0.0.6/src/Path.Drag.js"></script>
+    <script src="https://unpkg.com/leaflet-editable@1.1.0/src/Leaflet.Editable.js"></script>
+    <style type='text/css'>
+      html, body, #map { margin: 0; height: 100%; }
+    </style>
+  </head>
+  <body>
+    <div id='map'></div>
+    <script type="text/javascript">
+      var map = L.map('map', {editable: true}).setView([22, 10], 3);
+      var box = null;
+      L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
+        minZoom: 3, maxZoom: 16,
+        attribution: 'Map &copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap Contributors</a>'
+      }).addTo(map);
+
+      L.NewRectangleControl = L.Control.extend({
+        onAdd: function (map) {
+            var container = L.DomUtil.create('div', 'leaflet-control leaflet-bar'),
+                link = L.DomUtil.create('a', '', container);
+
+            link.href = '#';
+            link.innerHTML = 'Draw a box around a city';
+            link.style.width = 'auto';
+            link.style.padding = '0 4px';
+            L.DomEvent.on(link, 'click', L.DomEvent.stop)
+                      .on(link, 'click', function () { map.editTools.startRectangle(); });
+
+            return container;
+        }
+      });
+
+      L.GetXMLControl = L.Control.extend({
+        onAdd: function (map) {
+            var container = L.DomUtil.create('div', 'leaflet-control leaflet-bar'),
+                link = L.DomUtil.create('a', '', container);
+
+            link.href = '#';
+            link.innerHTML = 'Download JOSM XML';
+            link.style.width = 'auto';
+            link.style.padding = '0 4px';
+            L.DomEvent.on(link, 'click', L.DomEvent.stop)
+                      .on(link, 'click', function () {
+                        if (!box) return;
+                        bbox = box.getBounds().toBBoxString();
+                        window.open('{{ url_for('convert') }}?bbox='+bbox, 'stop_area');
+                      });
+
+            return container;
+        }
+      });
+
+      map.addControl(new L.NewRectangleControl({ position: 'topleft' }));
+      map.addControl(new L.GetXMLControl({ position: 'topleft' }));
+
+      map.on('editable:drawing:start', function() { if (box) map.removeLayer(box); });
+      map.on('editable:drawing:end', function(obj) { box = obj.layer; box.enableEdit(); });
+    </script>
+  </body>
+</html>
diff --git a/subway_structure.py b/subway_structure.py
new file mode 100644
index 0000000..457e33d
--- /dev/null
+++ b/subway_structure.py
@@ -0,0 +1,408 @@
+import csv
+import logging
+import math
+import urllib.parse
+import urllib.request
+
+
+SPREADSHEET_ID = '1-UHDzfBwHdeyFxgC5cE_MaNQotF3-Y0r1nW9IwpIEj8'
+MAX_DISTANCE_NEARBY = 150  # in meters
+
+
+def el_id(el):
+    if 'type' not in el:
+        raise Exception('What is this element? {}'.format(el))
+    return el['type'][0] + str(el.get('id', el.get('ref', '')))
+
+
+def el_center(el):
+    if 'lat' in el:
+        return (el['lon'], el['lat'])
+    elif 'center' in el:
+        if el['center']['lat'] == 0.0:
+            # Some relations don't have centers. We need route_masters and stop_area_groups.
+            if el['type'] == 'relation' and 'tags' in el and (
+                    el['tags'].get('type', None) == 'route_master' or
+                    el['tags'].get('public_transport', None) == 'stop_area_group'):
+                return None
+        return (el['center']['lon'], el['center']['lat'])
+    return None
+
+
+def distance(p1, p2):
+    if p1 is None or p2 is None:
+        return None
+    dx = math.radians(p1[0] - p2[0]) * math.cos(
+        0.5 * math.radians(p1[1] + p2[1]))
+    dy = math.radians(p1[1] - p2[1])
+    return 6378137 * math.sqrt(dx*dx + dy*dy)
+
+
+class Station:
+    @staticmethod
+    def is_station(el):
+        if el.get('tags', {}).get('railway', None) != 'station':
+            return False
+        if 'construction' in el['tags'] or 'proposed' in el['tags']:
+            return False
+        if (el['tags'].get('station', None) not in ('subway', 'light_rail') and
+                el['tags'].get('subway', None) != 'yes' and
+                el['tags'].get('light_rail', None) != 'yes'):
+            return False
+        return True
+
+    def __init__(self, el, city):
+        """Call this with a railway=station node."""
+        if el.get('tags', {}).get('railway', None) != 'station':
+            raise Exception(
+                'Station object should be instantiated from a station node. Got: {}'.format(el))
+        if not Station.is_station(el):
+            raise Exception('Processing only subway and light rail stations')
+
+        if el['type'] != 'node':
+            city.warn('Station is not a node', el)
+        self.element = el
+        self.is_light = (el['tags'].get('station', None) == 'light_rail' or
+                         el['tags'].get('light_rail', None) == 'yes')
+        self.id = el_id(el)
+        self.elements = set([self.id])
+        if self.id in city.stations:
+            city.error('Station {} {} is listed in two stop_areas, first one:'.format(
+                    el['type'], el['id']), city.stations[self.id].element)
+
+        # Find a stop_area relation
+        self.stop_area = None
+        nearby = []
+        center = el_center(el)
+        if center is None:
+            raise Exception('Could not find center of {}'.format(el))
+        for d in city.elements.values():
+            if 'tags' not in d:
+                continue
+            # If it's a stop_area relation containing "el", record it
+            if d['type'] == 'relation' and d['tags'].get('public_transport', None) == 'stop_area':
+                for m in d['members']:
+                    if m['type'] == el['type'] and m['ref'] == el['id']:
+                        self.stop_area = d
+                        break
+            # Otherwise record all platforms, stops and entrances nearby
+            elif d['type'] != 'relation' and (
+                    d['tags'].get('railway', None) in ('platform', 'subway_entrance') or
+                    d['tags'].get('public_transport', None) in ('platform', 'stop_position')):
+                # Take care to not add other stations
+                if 'station' not in d['tags']:
+                    d_center = el_center(d)
+                    if d_center is not None and distance(center, d_center) <= MAX_DISTANCE_NEARBY:
+                        nearby.append(d)
+            if self.stop_area:
+                break
+
+        if self.stop_area:
+            # If we have a stop area, add all elements from it
+            self.elements.add(el_id(self.stop_area))
+            for m in self.stop_area['members']:
+                k = el_id(m)
+                if k in city.elements:
+                    self.elements.add(k)
+        else:
+            # Otherwise add nearby entrances and stop positions
+            for k in nearby:
+                self.elements.add(el_id(k))
+
+        # TODO: Set name, colour etc.
+        self.name = el['tags'].get('name', 'Unknown')
+        self.colour = el['tags'].get('colour', None)
+
+    def contains(self, el):
+        return el_id(el) in self.elements
+
+
+class Route:
+    """The longest route for a city with a unique ref."""
+    @staticmethod
+    def is_route(el):
+        if el['type'] != 'relation' or el.get('tags', {}).get('type', None) != 'route':
+            return False
+        if 'members' not in el:
+            return False
+        if el['tags'].get('route', None) not in ('subway', 'light_rail'):
+            return False
+        if 'construction' in el['tags'] or 'proposed' in el['tags']:
+            return False
+        if 'ref' not in el['tags'] and 'name' not in el['tags']:
+            return False
+        return True
+
+    @staticmethod
+    def get_network(relation):
+        return relation['tags'].get('network', relation['tags'].get('operator', None))
+
+    def __init__(self, relation, city):
+        if not Route.is_route(relation):
+            raise Exception('The relation does not seem a route: {}'.format(relation))
+        self.element = relation
+        self.id = el_id(relation)
+        if 'ref' not in relation['tags']:
+            city.warn('Missing ref on a route', relation)
+        self.ref = relation['tags'].get('ref', relation['tags'].get('name', None))
+        if 'colour' not in relation['tags']:
+            city.warn('Missing colour on a route', relation)
+        self.colour = relation['tags'].get('colour', None)
+        self.network = Route.get_network(relation)
+        self.is_light = relation['tags']['route'] == 'light_rail'
+        self.rails = []
+        self.stops = []
+        enough_stops = False
+        for m in relation['members']:
+            k = el_id(m)
+            if k in city.stations:
+                st = city.stations[k]
+                if not self.stops or self.stops[-1] != st:
+                    if enough_stops:
+                        if st not in self.stops:
+                            city.warn('Inconsistent platform-stop "{}" in route'.format(st.name),
+                                      relation)
+                    elif st not in self.stops:
+                        self.stops.append(st)
+                        if self.is_light and not st.is_light:
+                            city.warn('Subway station "{}" in light rail route'.format(st.name),
+                                      relation)
+                        elif st.is_light and not self.is_light:
+                            city.warn('Light rail station "{}" in subway route'.format(st.name),
+                                      relation)
+                    elif self.stops[0] == st and not enough_stops:
+                        enough_stops = True
+                    else:
+                        city.warn('Duplicate stop "{}" in route - check stop/platform order'.format(
+                            st.name), relation)
+                continue
+
+            if k not in city.elements:
+                if m['role'] in ('stop', 'platform'):
+                    city.error('{} {} {} for route relation is not in the dataset'.format(
+                        m['role'], m['type'], m['ref']), relation)
+                    raise Exception('Stop or platform is not in the dataset')
+                continue
+            el = city.elements[k]
+            if 'tags' not in el:
+                city.error('Untagged object in a route', relation)
+                continue
+            if m['role'] in ('stop', 'platform'):
+                if el['tags'].get('railway', None) in ('station', 'halt'):
+                    city.error('Missing station={} on a {}'.format(
+                        'light_rail' if self.is_light else 'subway', m['role']), el)
+                elif 'construction' in el['tags'] or 'proposed' in el['tags']:
+                    city.error('An under construction {} in route'.format(m['role']), el)
+                else:
+                    city.error('{} {} {} is not connected to a station in route'.format(
+                        m['role'], m['type'], m['ref']), relation)
+            if el['tags'].get('railway', None) in ('rail', 'subway', 'light_rail'):
+                if 'nodes' in el:
+                    self.rails.append((el['nodes'][0], el['nodes'][1]))
+                else:
+                    city.error('Cannot find nodes in a railway', el)
+                continue
+        if not self.stops:
+            city.error('Route has no stops', relation)
+        for i in range(1, len(self.rails)):
+            connected = sum([(1 if self.rails[i][j[0]] == self.rails[i-1][j[1]] else 0)
+                             for j in ((0, 0), (0, 1), (1, 0), (1, 1))])
+            if not connected:
+                city.warn('Hole in route rails near node {}'.format(self.rails[i][0]), relation)
+                break
+
+
+class RouteMaster:
+    def __init__(self, route):
+        self.routes = [route]
+        self.best = route
+        self.ref = route.ref
+        self.network = route.network
+        self.is_light = route.is_light
+
+    def add(self, route, city):
+        if route.network != self.network:
+            city.error('Route has different network ("{}") from master "{}"'.format(
+                route.network, self.network), route.element)
+        if route.ref != self.ref:
+            city.warn('Route "{}" has different ref from master "{}"'.format(
+                route.ref, self.ref), route.element)
+        if route.is_light != self.is_light:
+            city.error('Incompatible is_light flag: master has {} and route has {}'.format(
+                self.is_light, route.is_light), route.element)
+            return
+        self.routes.append(route)
+        if len(route.stops) > len(self.best.stops):
+            self.best = route
+
+    def __len__(self):
+        return len(self.routes)
+
+    def __get__(self, i):
+        return self.routes[i]
+
+
+class City:
+    def __init__(self, row):
+        self.name = row[0]
+        self.country = row[1]
+        self.continent = row[2]
+        self.num_stations = int(row[3])
+        self.num_lines = int(row[4] or '0')
+        self.num_light_lines = int(row[5] or '0')
+        self.num_interchanges = int(row[6] or '0')
+        self.networks = set(filter(None, [x.strip() for x in row[8].split(';')]))
+        bbox = row[7].split(',')
+        if len(bbox) == 4:
+            self.bbox = [float(bbox[i]) for i in (1, 0, 3, 2)]
+        else:
+            self.bbox = None
+        self.elements = {}   # Dict el_id → el
+        self.stations = {}   # Dict el_id → station
+        self.routes = {}     # Dict route_ref → route
+        self.masters = {}    # Dict el_id of route → el_id of route_master
+        self.transfers = []  # List of lists of stations
+        self.station_ids = set()  # Set of stations' el_id
+        self.errors = 0
+        self.warnings = 0
+
+    def contains(self, el):
+        center = el_center(el)
+        if center:
+            return (self.bbox[0] <= center[1] <= self.bbox[2] and
+                    self.bbox[1] <= center[0] <= self.bbox[3])
+        # Default is True, so we put elements w/o coords in all cities
+        return True
+
+    def add(self, el):
+        if el['type'] == 'relation' and 'members' not in el:
+            return
+        self.elements[el_id(el)] = el
+        if el['type'] == 'relation' and el.get('tags', {}).get('type', None) == 'route_master':
+            for m in el['members']:
+                if m['type'] == 'relation':
+                    self.masters[el_id(m)] = el_id(el)
+
+    def log(self, level, message, el):
+        msg = '{}: {}'.format(self.name, message)
+        if el:
+            tags = el.get('tags', {})
+            msg += ' ({} {}, "{}")'.format(
+                el['type'], el['id'],
+                tags.get('name', tags.get('ref', '')))
+        logging.log(level, msg)
+
+    def warn(self, message, el=None):
+        self.warnings += 1
+        self.log(logging.WARNING, message, el)
+
+    def error(self, message, el=None):
+        self.errors += 1
+        self.log(logging.ERROR, message, el)
+
+    def make_transfer(self, sag):
+        transfer = set()
+        for m in sag['members']:
+            k = el_id(m)
+            if k not in self.stations:
+                return
+            transfer.add(self.stations[k])
+        if transfer:
+            self.transfers.append(transfer)
+
+    def extract_routes(self):
+        for el in self.elements.values():
+            if Station.is_station(el):
+                station = Station(el, self)
+                self.station_ids.add(station.id)
+                for e in station.elements:
+                    self.stations[e] = station
+
+        for el in self.elements.values():
+            if Route.is_route(el):
+                if self.networks and Route.get_network(el) not in self.networks:
+                    continue
+                route = Route(el, self)
+                k = self.masters.get(route.id, route.ref)
+                if k not in self.routes:
+                    self.routes[k] = RouteMaster(route)
+                else:
+                    self.routes[k].add(route, self)
+
+            if (el['type'] == 'relation' and
+                    el.get('tags', {}).get('public_transport', None) == 'stop_area_group'):
+                self.make_transfer(el)
+
+    def count_unused_entrances(self):
+        stop_areas = set()
+        for el in self.elements.values():
+            if (el['type'] == 'relation' and 'tags' in el and
+                    el['tags'].get('public_transport', None) == 'stop_area' and
+                    'members' in el):
+                stop_areas.update([el_id(m) for m in el['members']])
+        unused = []
+        not_in_sa = []
+        for el in self.elements.values():
+            if (el['type'] == 'node' and 'tags' in el and
+                    el['tags'].get('railway', None) == 'subway_entrance'):
+                i = el_id(el)
+                if i not in self.stations:
+                    unused.append(i)
+                if i not in stop_areas:
+                    not_in_sa.append(i)
+        if unused:
+            list_unused = '' if len(unused) > 20 else ': ' + ', '.join(unused)
+            self.error('Found {} unused subway entrances{}'.format(len(unused), list_unused))
+        if not_in_sa:
+            self.warn('{} subway entrances are not in stop_area relations'.format(len(not_in_sa)))
+
+    def validate(self):
+        unused_stations = set(self.station_ids)
+        for rmaster in self.routes.values():
+            for st in rmaster.best.stops:
+                unused_stations.discard(st.id)
+        if unused_stations:
+            self.warn('{} unused stations: {}'.format(
+                len(unused_stations), ', '.join(unused_stations)))
+        self.count_unused_entrances()
+        light_rails = len([x for x in self.routes.values() if x.is_light])
+        if len(self.routes) - light_rails != self.num_lines:
+            self.error('Found {} subway lines, expected {}'.format(
+                len(self.routes) - light_rails, self.num_lines))
+        if light_rails != self.num_light_lines:
+            self.error('Found {} light rail lines, expected {}'.format(
+                light_rails, self.num_light_lines))
+        station_count = len(self.station_ids) - len(unused_stations)
+        if station_count != self.num_stations:
+            self.error('Found {} stations in routes, expected {}'.format(
+                station_count, self.num_stations))
+        if len(self.transfers) != self.num_interchanges:
+            self.error('Found {} interchanges, expected {}'.format(
+                len(self.transfers), self.num_interchanges))
+
+    def for_mapsme(self):
+        stops = []
+        transfers = []
+        routes = []
+        network = {'network': 'TODO', 'agency_id': 12345, 'routes': routes}
+        result = {'stops': stops, 'transfers': transfers, 'networks': [network]}
+        return result
+
+
+def download_cities():
+    url = 'https://docs.google.com/spreadsheets/d/{}/export?format=csv'.format(SPREADSHEET_ID)
+    response = urllib.request.urlopen(url)
+    if response.getcode() != 200:
+        raise Exception('Failed to download cities spreadsheet: HTTP {}'.format(response.getcode()))
+    data = response.read().decode('utf-8')
+    r = csv.reader(data.splitlines())
+    next(r)  # skipping the header
+    names = set()
+    cities = []
+    for row in r:
+        if len(row) > 7 and row[7]:
+            cities.append(City(row))
+            if row[0].strip() in names:
+                logging.warning('Duplicate city name in the google spreadsheet: %s', row[0])
+            names.add(row[0].strip())
+    return cities
diff --git a/v2h_templates.py b/v2h_templates.py
new file mode 100644
index 0000000..a90f4ad
--- /dev/null
+++ b/v2h_templates.py
@@ -0,0 +1,143 @@
+# These are templates for validation_to_html.py
+# Variables should be in curly braces
+
+STYLE = '''
+<style>
+body {
+  font-family: sans-serif;
+  font-size: 12pt;
+}
+th {
+  font-size: 10pt;
+}
+.errors {
+  font-size: 10pt;
+  color: darkred;
+  margin-bottom: 1em;
+}
+.warnings {
+  font-size: 10pt;
+  color: darkblue;
+  margin-bottom: 1em;
+}
+.bold {
+  font-weight: bold;
+}
+.color0 {
+  background: pink;
+}
+.color1 {
+  background: lightgreen;
+}
+</style>
+'''
+
+INDEX_HEADER = '''
+<!doctype html>
+<html>
+<head>
+<title>Subway Validator</title>
+<meta charset="utf-8">
+(s)
+</head>
+<body>
+<h1>Subway Validation Results</h1>
+<table cellspacing="3" cellpadding="2" style="margin-bottom: 1em;">
+'''.replace('(s)', STYLE)
+
+INDEX_CONTINENT = '''
+<tr><td colspan="9">&nbsp;</td></tr>
+<tr>
+<th>Continent</th>
+<th>Country</th>
+<th>Good Cities</th>
+<th>Subway Lines</th>
+<th>Light Rail Lines</th>
+<th>Stations</th>
+<th>Interchanges</th>
+<th>Errors</th>
+<th>Warnings</th>
+</tr>
+<tr>
+<td colspan="2" class="bold color{=cities}">{continent}</td>
+<td class="color{=cities}">{good_cities} / {total_cities}</td>
+<td class="color{=subwayl}">{subwayl_found} / {subwayl_expected}</td>
+<td class="color{=lightrl}">{lightrl_found} / {lightrl_expected}</td>
+<td class="color{=stations}">{stations_found} / {stations_expected}</td>
+<td class="color{=transfers}">{transfers_found} / {transfers_expected}</td>
+<td class="color{=errors}">{num_errors}</td>
+<td class="color{=warnings}">{num_warnings}</td>
+</tr>
+{content}
+'''
+
+INDEX_COUNTRY = '''
+<tr>
+<td>&nbsp;</td>
+<td class="bold color{=cities}"><a href="{file}">{country}</a></td>
+<td class="color{=cities}">{good_cities} / {total_cities}</td>
+<td class="color{=subwayl}">{subwayl_found} / {subwayl_expected}</td>
+<td class="color{=lightrl}">{lightrl_found} / {lightrl_expected}</td>
+<td class="color{=stations}">{stations_found} / {stations_expected}</td>
+<td class="color{=transfers}">{transfers_found} / {transfers_expected}</td>
+<td class="color{=errors}">{num_errors}</td>
+<td class="color{=warnings}">{num_warnings}</td>
+</tr>
+'''
+
+INDEX_FOOTER = '''
+</table>
+<p>Produced by <a href="https://github.com/mapsme/subways">Subway Preprocessor</a> on {date}.
+See <a href="{google}">this spreadsheet</a> for the reference metro statistics and
+<a href="https://en.wikipedia.org/wiki/List_of_metro_systems#List">this wiki page</a> for a list
+of all metro systems.</p>
+</body>
+</html>
+'''
+
+COUNTRY_HEADER = '''
+<!doctype html>
+<html>
+<head>
+<title>Subway Validator: {country}</title>
+<meta charset="utf-8">
+(s)
+</head>
+<body>
+<h1>Subway Validation Results for {country}</h1>
+<p><a href="index.html">Return to the countries list</a>.</p>
+<table cellspacing="3" cellpadding="2">
+<tr>
+<th>City</th>
+<th>Subway Lines</th>
+<th>Light Rail Lines</th>
+<th>Stations</th>
+<th>Interchanges</th>
+<th>Unused Entrances</th>
+</tr>
+'''.replace('(s)', STYLE)
+
+COUNTRY_CITY = '''
+<tr>
+<td class="bold color{good_cities}">{city}</td>
+<td class="color{=subwayl}">sub: {subwayl_found} / {subwayl_expected}</td>
+<td class="color{=lightrl}">lr: {lightrl_found} / {lightrl_expected}</td>
+<td class="color{=stations}">st: {stations_found} / {stations_expected}</td>
+<td class="color{=transfers}">int: {transfers_found} / {transfers_expected}</td>
+<td class="color{=entrances}">e: {unused_entrances}</td>
+</tr>
+<tr><td colspan="5">
+<div class="errors">
+{errors}
+</div><div class="warnings">
+{warnings}
+</div>
+</td></td>
+'''
+
+COUNTRY_FOOTER = '''
+</table>
+<p>Produced by <a href="https://github.com/mapsme/subways">Subway Preprocessor</a> on {date}.</p>
+</body>
+</html>
+'''
diff --git a/validation_to_html.py b/validation_to_html.py
new file mode 100755
index 0000000..9b07f70
--- /dev/null
+++ b/validation_to_html.py
@@ -0,0 +1,180 @@
+#!/usr/bin/env python3
+import datetime
+import re
+import os
+import sys
+from subway_structure import download_cities, SPREADSHEET_ID
+from v2h_templates import *
+
+date = datetime.datetime.now().strftime('%d.%m.%Y %H:%M')
+
+
+class CityData:
+    REGEXPS = (
+        ('subwayl', re.compile(r'Found (\d+) subway lines, expected (\d+)')),
+        ('lightrl', re.compile(r'Found (\d+) light rail.*expected (\d+)')),
+        ('stations', re.compile(r'Found (\d+) stations.*expected (\d+)')),
+        ('transfers', re.compile(r'Found (\d+) interch.*expected (\d+)')),
+    )
+
+    def __init__(self, city=None):
+        self.city = city is not None
+        if city:
+            self.country = city.country
+            self.continent = city.continent
+            self.errors = []
+            self.warnings = []
+        self.data = {
+            'stations_expected': city.num_stations if city else 0,
+            'subwayl_expected': city.num_lines if city else 0,
+            'lightrl_expected': city.num_light_lines if city else 0,
+            'transfers_expected': city.num_interchanges if city else 0,
+            'unused_entrances': 0,
+            'good_cities': 1 if city else 0,
+            'total_cities': 1 if city else 0,
+            'num_errors': 0,
+            'num_warnings': 0
+        }
+        for k, _ in CityData.REGEXPS:
+            self.data[k+'_found'] = self.data[k+'_expected']
+
+    def __get__(self, i):
+        return self.data[i]
+
+    def __set__(self, i, value):
+        self.data[i] = value
+
+    def __add__(self, other):
+        d = CityData()
+        for k in d.data:
+            d.data[k] = self.data[k] + other.data[k]
+        return d
+
+    def format(self, s):
+        def test_eq(v1, v2):
+            return '1' if v1 == v2 else '0'
+
+        for k in self.data:
+            s = s.replace('{'+k+'}', str(self.data[k]))
+        for k in ('subwayl', 'lightrl', 'stations', 'transfers'):
+            s = s.replace('{='+k+'}',
+                          test_eq(self.data[k+'_found'], self.data[k+'_expected']))
+        s = s.replace('{=cities}',
+                      test_eq(self.data['good_cities'], self.data['total_cities']))
+        s = s.replace('{=entrances}', test_eq(self.data['unused_entrances'], 0))
+        for k in ('errors', 'warnings'):
+            s = s.replace('{='+k+'}', test_eq(self.data['num_'+k], 0))
+        return s
+
+    def add_warning(self, msg):
+        self.warnings.append(msg)
+        self.data['num_warnings'] += 1
+
+    def add_error(self, msg):
+        for k, reg in CityData.REGEXPS:
+            m = reg.search(msg)
+            if m:
+                self.data[k+'_found'] = int(m[1])
+                self.data[k+'_expected'] = int(m[2])
+        m = re.search(r'Found (\d+) unused subway e', msg)
+        if m:
+            self.data['unused_entrances'] = int(m[1])
+        self.errors.append(msg)
+        self.data['num_errors'] += 1
+        self.data['good_cities'] = 0
+
+
+def tmpl(s, data=None, **kwargs):
+    if data:
+        s = data.format(s)
+    if kwargs:
+        for k, v in kwargs.items():
+            s = s.replace('{'+k+'}', v)
+    s = s.replace('{date}', date)
+    google_url = 'https://docs.google.com/spreadsheets/d/{}/edit?usp=sharing'.format(SPREADSHEET_ID)
+    s = s.replace('{google}', google_url)
+    return s
+
+
+EXPAND_OSM_TYPE = {'n': 'node', 'w': 'way', 'r': 'relation'}
+RE_SHORT = re.compile(r'([nwr])(\d+)')
+RE_FULL = re.compile(r'(node|way|relation) (\d+)')
+LOG_LINE = re.compile(r'^(\d\d:\d\d:\d\d)\s+([A-Z]+)\s+([^:]+):\s+(.+?)\s*$')
+
+
+def osm_links(s):
+    """Converts object mentions to HTML links."""
+    def link(m):
+        return '<a href="https://www.openstreetmap.org/{}/{}">{}</a>'.format(EXPAND_OSM_TYPE[m[1][0]], m[2], m[0])
+    s = RE_SHORT.sub(link, s)
+    s = RE_FULL.sub(link, s)
+    return s
+
+
+def esc(s):
+    return s.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
+
+
+if len(sys.argv) < 2:
+    print('Reads a log from subway validator and prepares HTML files.')
+    print('Usage: {} <validation.log> [<target_directory>]'.format(sys.argv[0]))
+    sys.exit(1)
+
+cities = {c.name: c for c in download_cities()}
+data = {c.name: CityData(c) for c in cities.values()}
+
+last_city = None
+with open(sys.argv[1], 'r') as f:
+    for line in f:
+        m = LOG_LINE.match(line)
+        if m:
+            level = m.group(2)
+            if level == 'INFO':
+                continue
+            city_name = m.group(3)
+            msg = m.group(4)
+            if city_name not in data:
+                raise Exception('City {} not found in the cities list'.format(city_name))
+            city = data[city_name]
+            if level == 'WARNING':
+                city.add_warning(msg)
+            elif level == 'ERROR':
+                city.add_error(msg)
+
+countries = {}
+continents = {}
+c_by_c = {}  # continent → set of countries
+for c in data.values():
+    countries[c.country] = c + countries.get(c.country, CityData())
+    continents[c.continent] = c + continents.get(c.continent, CityData())
+    if c.continent not in c_by_c:
+        c_by_c[c.continent] = set()
+    c_by_c[c.continent].add(c.country)
+
+date = datetime.datetime.now().strftime('%d.%m.%Y %H:%M')
+path = '.' if len(sys.argv) < 3 else sys.argv[2]
+index = open(os.path.join(path, 'index.html'), 'w')
+index.write(tmpl(INDEX_HEADER))
+
+for continent in sorted(continents.keys()):
+    content = ''
+    for country in sorted(c_by_c[continent]):
+        country_file_name = country.lower().replace(' ', '-') + '.html'
+        content += tmpl(INDEX_COUNTRY, countries[country], file=country_file_name,
+                        country=country, continent=continent)
+        country_file = open(os.path.join(path, country_file_name), 'w')
+        country_file.write(tmpl(COUNTRY_HEADER, country=country, continent=continent))
+        for name, city in sorted(data.items()):
+            if city.country == country:
+                e = '<br>'.join([osm_links(esc(e)) for e in city.errors])
+                w = '<br>'.join([osm_links(esc(w)) for w in city.warnings])
+                country_file.write(tmpl(COUNTRY_CITY, city,
+                                        city=name, country=country, continent=continent,
+                                        errors=e, warnings=w))
+        country_file.write(tmpl(COUNTRY_FOOTER, country=country, continent=continent))
+        country_file.close()
+    index.write(tmpl(INDEX_CONTINENT, continents[continent],
+                     content=content, continent=continent))
+
+index.write(tmpl(INDEX_FOOTER))
+index.close()