From d88064bc84341db8a12a729351015c88af3922d9 Mon Sep 17 00:00:00 2001 From: Alex Zolotarev Date: Sun, 29 May 2011 16:52:57 +0200 Subject: [PATCH] Fixed unique_char_counter for linux build --- tools/osm_unique_char_counter/main.cpp | 43 ++++++++++++++++++- .../osm_unique_char_counter.pro | 10 ++--- 2 files changed, 46 insertions(+), 7 deletions(-) diff --git a/tools/osm_unique_char_counter/main.cpp b/tools/osm_unique_char_counter/main.cpp index 91d1a0a7e4..aab530eec1 100644 --- a/tools/osm_unique_char_counter/main.cpp +++ b/tools/osm_unique_char_counter/main.cpp @@ -8,6 +8,7 @@ #include "../../std/vector.hpp" #include +#include using namespace std; @@ -57,6 +58,7 @@ public: typedef unordered_map CountContT; typedef pair ElemT; +typedef unordered_map UniMapT; bool SortFunc(ElemT const & e1, ElemT const & e2) { @@ -66,6 +68,9 @@ bool SortFunc(ElemT const & e1, ElemT const & e2) struct Counter { CountContT m_counter; + UniMapT & m_uni; + + Counter(UniMapT & uni) : m_uni(uni) {} void operator()(string const & utf8s) { @@ -87,6 +92,7 @@ struct Counter SortVecT v(m_counter.begin(), m_counter.end()); sort(v.begin(), v.end(), SortFunc); + locale loc("en_US.UTF-8"); cout.imbue(loc); @@ -96,7 +102,11 @@ struct Counter { c.clear(); utf8::unchecked::append(v[i].first, back_inserter(c)); - cout << v[i].second << " " << hex << v[i].first << " " << c << endl; + UniMapT::iterator found = m_uni.find(v[i].first); + if (found == m_uni.end()) + cout << dec << v[i].second << " " << hex << v[i].first << " " << c << endl; + else + cout << dec << v[i].second << " " << c << " " << found->second << endl; } } }; @@ -111,7 +121,36 @@ struct StdinReader int main(int argc, char *argv[]) { - Counter c; + if (argc < 2) + { + cerr << "Usage: " << argv[0] << " PathToUnicodeFile" << endl; + return -1; + } + + // load unicodedata.txt file + ifstream f(argv[1]); + if (!f.good()) + { + cerr << "Can't open unicodedata.txt file " << argv[1] << endl; + return -1; + } + + UniMapT m; + + string line; + while (f.good()) + { + getline(f, line); + size_t const semic = line.find(';'); + if (semic == string::npos) + continue; + istringstream stream(line.substr(0, semic)); + strings::UniChar c; + stream >> hex >> c; + m[c] = line; + } + + Counter c(m); XMLDispatcher dispatcher(c); StdinReader reader; ParseXML(reader, dispatcher); diff --git a/tools/osm_unique_char_counter/osm_unique_char_counter.pro b/tools/osm_unique_char_counter/osm_unique_char_counter.pro index 15c00e3207..d37bedbc0d 100644 --- a/tools/osm_unique_char_counter/osm_unique_char_counter.pro +++ b/tools/osm_unique_char_counter/osm_unique_char_counter.pro @@ -1,17 +1,17 @@ # ----------------------------------------------------- # Project created by Alex Zolotarev 2010-01-21T13:23:29 # ----------------------------------------------------- +ROOT_DIR = ../.. + +include($$ROOT_DIR/common.pri) + QT -= gui core TARGET = osm_unique_char_counter CONFIG += console CONFIG -= app_bundle TEMPLATE = app -ROOT_DIR = ../.. -DEPENDENCIES = coding base expat - -include($$ROOT_DIR/common.pri) - +LIBS += -L/media/ssd/alexz/omim-build-release/out/release -lcoding -lbase -lexpat # Additional include directories INCLUDEPATH *= ../../3party/expat/lib \