From effb90042c6975009260361e120e7309f36866fa Mon Sep 17 00:00:00 2001 From: Ilya Zverev Date: Sat, 4 Jun 2016 12:21:32 +0300 Subject: [PATCH] [mwm.py] Multilang strings --- tools/python/mwm/dump_mwm.py | 23 ++++++++++++----------- tools/python/mwm/mwm.py | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/tools/python/mwm/dump_mwm.py b/tools/python/mwm/dump_mwm.py index 7493f1c423..251c5cd03c 100755 --- a/tools/python/mwm/dump_mwm.py +++ b/tools/python/mwm/dump_mwm.py @@ -1,33 +1,34 @@ #!/usr/bin/python import sys, os.path, random +import json from mwm import MWM if len(sys.argv) < 2: - print 'Dumps some MWM structures.' - print 'Usage: {0} '.format(sys.argv[0]) - sys.exit(1) + print 'Dumps some MWM structures.' + print 'Usage: {0} '.format(sys.argv[0]) + sys.exit(1) mwm = MWM(open(sys.argv[1], 'rb')) mwm.read_types(os.path.join(os.path.dirname(sys.argv[0]), '..', '..', '..', 'data', 'types.txt')) print 'Tags:' tvv = sorted([(k, v[0], v[1]) for k, v in mwm.tags.iteritems()], key=lambda x: x[1]) for tv in tvv: - print ' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2]) + print ' {0:<8}: offs {1:9} len {2:8}'.format(tv[0], tv[1], tv[2]) print 'Version:', mwm.read_version() print 'Header:', mwm.read_header() print 'Metadata count:', len(mwm.read_metadata()) cross = mwm.read_crossmwm() if cross: - print 'Outgoing points:', len(cross['out']), 'incoming:', len(cross['in']) - print 'Outgoing regions:', set(cross['neighbours']) + print 'Outgoing points:', len(cross['out']), 'incoming:', len(cross['in']) + print 'Outgoing regions:', set(cross['neighbours']) print 'Sample features:' count = 5 probability = 1.0 / 1000 for feature in mwm.iter_features(): - if random.random() < probability: - print feature - count -= 1 - if count <= 0: - break + if random.random() < probability: + print json.dumps(feature, ensure_ascii=False) + count -= 1 + if count <= 0: + break diff --git a/tools/python/mwm/mwm.py b/tools/python/mwm/mwm.py index 739872abb3..568a2a6d01 100644 --- a/tools/python/mwm/mwm.py +++ b/tools/python/mwm/mwm.py @@ -11,7 +11,6 @@ from datetime import datetime # - Fix bounds reading in the header # - Fix delta point encoding (coords are plausible, but incorrect) # - Find why polygon geometry is incorrect in iter_features() -# - Multilang string reading # - Find feature ids in the 'dat' section, or find a way to read the 'offs' section class MWM: @@ -388,6 +387,36 @@ class MWM: return self.f.read(sz) def read_multilang(self): + def find_multilang_next(s, i): + i += 1 + while i < len(s): + c = struct.unpack('B', s[i])[0] + if c & 0xC0 == 0x80: + break + if c & 0x80 == 0: + pass + elif c & 0xFE == 0xFE: + i += 6 + elif c & 0xFC == 0xFC: + i += 5 + elif c & 0xF8 == 0xF8: + i += 4 + elif c & 0xF0 == 0xF0: + i += 3 + elif c & 0xE0 == 0xE0: + i += 2 + elif c & 0xC0 == 0xC0: + i += 1 + i += 1 + return i + s = self.read_string() - # TODO! - return s + langs = {} + i = 0 + while i < len(s): + n = find_multilang_next(s, i) + lng = struct.unpack('B', s[i])[0] & 0x3F + if lng < len(self.languages): + langs[self.languages[lng]] = s[i+1:n] + i = n + return langs