Add language metainfo generation routine. Use unicode supplementalData.xml as source.

This commit is contained in:
vng 2011-11-21 19:30:10 +03:00 committed by Alex Zolotarev
parent c455888eec
commit ee92fc600f
6 changed files with 171 additions and 10 deletions

View file

@ -142,11 +142,11 @@ namespace
}
};
class DoStoreNames
class DoStoreFile2Name
{
map<string, string> & m_id2name;
map<string, string> & m_file2name;
public:
DoStoreNames(map<string, string> & id2name) : m_id2name(id2name) {}
DoStoreFile2Name(map<string, string> & file2name) : m_file2name(file2name) {}
void operator() (string name, string const & file, string const &,
uint32_t size, int64_t, int)
@ -159,10 +159,23 @@ namespace
name = file.substr(0, i) + '_' + name;
if (name != file)
m_id2name[file] = name;
m_file2name[file] = name;
}
}
};
class DoStoreCode2File
{
multimap<string, string> & m_code2file;
public:
DoStoreCode2File(multimap<string, string> & code2file) : m_code2file(code2file) {}
void operator() (string const &, string const & file, string const & flag,
uint32_t, int64_t, int)
{
m_code2file.insert(make_pair(flag, file));
}
};
}
int64_t LoadCountries(string const & jsonBuffer, CountriesContainerT & countries)
@ -172,10 +185,17 @@ int64_t LoadCountries(string const & jsonBuffer, CountriesContainerT & countries
return LoadCountriesImpl(jsonBuffer, doStore);
}
void LoadCountryNames(string const & jsonBuffer, map<string, string> & id2name)
void LoadCountryFile2Name(string const & jsonBuffer, map<string, string> & id2name)
{
ASSERT ( id2name.empty(), () );
DoStoreNames doStore(id2name);
DoStoreFile2Name doStore(id2name);
LoadCountriesImpl(jsonBuffer, doStore);
}
void LoadCountryCode2File(string const & jsonBuffer, multimap<string, string> & code2file)
{
ASSERT ( code2file.empty(), () );
DoStoreCode2File doStore(code2file);
LoadCountriesImpl(jsonBuffer, doStore);
}

View file

@ -70,7 +70,8 @@ namespace storage
/// @return version of country file or -1 if error was encountered
int64_t LoadCountries(string const & jsonBuffer, CountriesContainerT & countries);
void LoadCountryNames(string const & jsonBuffer, map<string, string> & id2name);
void LoadCountryFile2Name(string const & jsonBuffer, map<string, string> & id2name);
void LoadCountryCode2File(string const & jsonBuffer, multimap<string, string> & code2file);
bool SaveCountries(int64_t version, CountriesContainerT const & countries, string & jsonBuffer);
}

View file

@ -20,7 +20,7 @@ namespace storage
string buffer;
countryR.ReadAsString(buffer);
LoadCountryNames(buffer, m_id2name);
LoadCountryFile2Name(buffer, m_id2name);
}
template <class ToDo>

View file

@ -27,7 +27,7 @@ UNIT_TEST(CountryInfo_ValidName_Smoke)
ReaderPtr<Reader>(GetPlatform().GetReader(COUNTRIES_FILE)).ReadAsString(buffer);
map<string, string> id2name;
storage::LoadCountryNames(buffer, id2name);
storage::LoadCountryFile2Name(buffer, id2name);
TEST(id2name.count("Germany_Baden-Wurttemberg") == 1, ());
TEST(id2name.count("France_Paris & Ile-de-France") == 1, ());

View file

@ -0,0 +1,139 @@
#include "../../testing/testing.hpp"
#include "../country.hpp"
#include "../../platform/platform.hpp"
#include "../../coding/file_reader.hpp"
#include "../../coding/parse_xml.hpp"
#include "../../base/string_utils.hpp"
#include "../../std/fstream.hpp"
/*
namespace
{
class LangXMLGetter
{
string m_path;
multimap<string, string> & m_code2file;
int m_state;
string m_country, m_lang;
double m_percent;
bool m_official;
string m_res;
public:
LangXMLGetter(string const & path, multimap<string, string> & code2file)
: m_path(path), m_code2file(code2file), m_state(0)
{
}
bool Push(string const & name)
{
if (m_state == 0 && name == "territoryInfo")
{
m_state = 1;
}
else if (m_state == 1 && name == "territory")
{
m_country.clear();
m_res.clear();
m_state = 2;
}
else if (m_state == 2 && name == "languagePopulation")
{
m_lang.clear();
m_percent = 0.0;
m_official = false;
m_state = 3;
}
return true;
}
void AddAttr(string const & name, string const & value)
{
switch (m_state)
{
case 2:
if (name == "type")
m_country = value;
break;
case 3:
if (name == "type")
m_lang = value;
else if (name == "populationPercent")
strings::to_double(value, m_percent);
else
{
if (name == "officialStatus" &&
(value == "official" || value == "de_facto_official"))
{
m_official = true;
}
}
}
}
void Pop(string const &)
{
switch (m_state)
{
case 3:
// emit language
if (!m_lang.empty() && (m_percent >= 10.0 || m_official))
{
if (m_res.empty()) m_res = m_lang;
else m_res = m_res + "|" + m_lang;
}
break;
case 2:
// save result languages
if (!m_country.empty() && !m_res.empty())
{
typedef multimap<string, string>::const_iterator iter_t;
strings::MakeLowerCase(m_country);
pair<iter_t, iter_t> r = m_code2file.equal_range(m_country);
while (r.first != r.second)
{
ofstream file((m_path + r.first->second + ".meta").c_str());
file << m_res;
++r.first;
}
}
break;
}
if (m_state > 0) --m_state;
}
void CharData(string const &) {}
};
}
UNIT_TEST(GenerateLanguages)
{
string buffer;
ReaderPtr<Reader>(GetPlatform().GetReader(COUNTRIES_FILE)).ReadAsString(buffer);
multimap<string, string> code2file;
storage::LoadCountryCode2File(buffer, code2file);
string const path = "/Users/alena/omim/omim/data/metainfo_test/";
FileReader reader(path + "supplementalData.xml");
ReaderSource<FileReader> src(reader);
LangXMLGetter parser(path, code2file);
ParseXML(src, parser);
}
*/

View file

@ -6,7 +6,7 @@ CONFIG -= app_bundle
TEMPLATE = app
ROOT_DIR = ../..
DEPENDENCIES = storage indexer platform coding base jansson tomcrypt
DEPENDENCIES = storage indexer platform coding base jansson tomcrypt expat
include($$ROOT_DIR/common.pri)
@ -22,3 +22,4 @@ SOURCES += \
country_test.cpp \
simple_tree_test.cpp \
country_info_test.cpp \
generate_langs.cpp \