forked from organicmaps/organicmaps
[storage] Add optional country_name_synonyms parameter to countries.txt to find corresponding CountryId by country name without countries.txt keys modification.
This commit is contained in:
parent
3cd6936c9e
commit
e08a9d988e
15 changed files with 94 additions and 22 deletions
|
@ -43,7 +43,8 @@ void GetPathToRoot(storage::CountryId const & id, storage::CountryTree const & c
|
|||
void RegionInfoGetter::LoadCountriesTree()
|
||||
{
|
||||
storage::Affiliations affiliations;
|
||||
storage::LoadCountriesFromFile(COUNTRIES_FILE, m_countries, affiliations);
|
||||
storage::CountryNameSynonyms countryNameSynonyms;
|
||||
storage::LoadCountriesFromFile(COUNTRIES_FILE, m_countries, affiliations, countryNameSynonyms);
|
||||
}
|
||||
|
||||
void RegionInfoGetter::SetLocale(string const & locale)
|
||||
|
|
|
@ -108,7 +108,8 @@ int main(int argc, char * argv[])
|
|||
InitDataSource(dataSource, "" /* mwmListPath */);
|
||||
|
||||
storage::Affiliations affiliations;
|
||||
InitAffiliations(affiliations);
|
||||
storage::CountryNameSynonyms countryNameSynonyms;
|
||||
InitStorageData(affiliations, countryNameSynonyms);
|
||||
|
||||
auto engine = InitSearchEngine(dataSource, affiliations, "en" /* locale */, 1 /* numThreads */);
|
||||
|
||||
|
|
|
@ -175,12 +175,14 @@ void InitDataSource(FrozenDataSource & dataSource, string const & mwmListPath)
|
|||
LOG(LINFO, ());
|
||||
}
|
||||
|
||||
void InitAffiliations(storage::Affiliations & affiliations)
|
||||
void InitStorageData(storage::Affiliations & affiliations,
|
||||
storage::CountryNameSynonyms & countryNameSynonyms)
|
||||
{
|
||||
auto const countriesFile = base::JoinPath(GetPlatform().ResourcesDir(), COUNTRIES_FILE);
|
||||
|
||||
storage::CountryTree countries;
|
||||
auto const rv = storage::LoadCountriesFromFile(countriesFile, countries, affiliations);
|
||||
auto const rv =
|
||||
storage::LoadCountriesFromFile(countriesFile, countries, affiliations, countryNameSynonyms);
|
||||
CHECK(rv != -1, ("Can't load countries from:", countriesFile));
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,8 @@ void InitViewport(std::string viewportName, m2::RectD & viewport);
|
|||
|
||||
void InitDataSource(FrozenDataSource & dataSource, std::string const & mwmListPath);
|
||||
|
||||
void InitAffiliations(storage::Affiliations & affiliations);
|
||||
void InitStorageData(storage::Affiliations & affiliations,
|
||||
storage::CountryNameSynonyms & countryNameSynonyms);
|
||||
|
||||
std::unique_ptr<search::tests_support::TestSearchEngine> InitSearchEngine(
|
||||
DataSource & dataSource, storage::Affiliations const & affiliations, std::string const & locale,
|
||||
|
|
|
@ -380,7 +380,8 @@ int main(int argc, char * argv[])
|
|||
InitDataSource(dataSource, FLAGS_mwm_list_path);
|
||||
|
||||
storage::Affiliations affiliations;
|
||||
InitAffiliations(affiliations);
|
||||
storage::CountryNameSynonyms countryNameSynonyms;
|
||||
InitStorageData(affiliations, countryNameSynonyms);
|
||||
|
||||
auto engine = InitSearchEngine(dataSource, affiliations, FLAGS_locale, FLAGS_num_threads);
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ public:
|
|||
CountryId const & parent) = 0;
|
||||
virtual void InsertOldMwmMapping(CountryId const & newId, CountryId const & oldId) = 0;
|
||||
virtual void InsertAffiliation(CountryId const & countryId, string const & affilation) = 0;
|
||||
virtual void InsertCountryNameSynonym(CountryId const & countryId, string const & synonym) = 0;
|
||||
virtual OldMwmMapping GetMapping() const = 0;
|
||||
};
|
||||
|
||||
|
@ -40,11 +41,15 @@ class StoreCountriesSingleMwms : public StoreSingleMwmInterface
|
|||
{
|
||||
CountryTree & m_countries;
|
||||
Affiliations & m_affiliations;
|
||||
CountryNameSynonyms & m_countryNameSynonyms;
|
||||
OldMwmMapping m_idsMapping;
|
||||
|
||||
public:
|
||||
StoreCountriesSingleMwms(CountryTree & countries, Affiliations & affiliations)
|
||||
: m_countries(countries), m_affiliations(affiliations)
|
||||
StoreCountriesSingleMwms(CountryTree & countries, Affiliations & affiliations,
|
||||
CountryNameSynonyms & countryNameSynonyms)
|
||||
: m_countries(countries)
|
||||
, m_affiliations(affiliations)
|
||||
, m_countryNameSynonyms(countryNameSynonyms)
|
||||
{
|
||||
}
|
||||
~StoreCountriesSingleMwms()
|
||||
|
@ -81,6 +86,17 @@ public:
|
|||
m_affiliations[affilation].push_back(countryId);
|
||||
}
|
||||
|
||||
void InsertCountryNameSynonym(CountryId const & countryId, string const & synonym) override
|
||||
{
|
||||
ASSERT(!synonym.empty(), ());
|
||||
ASSERT(!countryId.empty(), ());
|
||||
ASSERT(m_countryNameSynonyms.find(synonym) == m_countryNameSynonyms.end(),
|
||||
("Synonym must identify CountryTree node where the country is located. Country cannot be "
|
||||
"located at multiple nodes."));
|
||||
|
||||
m_countryNameSynonyms[synonym] = countryId;
|
||||
}
|
||||
|
||||
OldMwmMapping GetMapping() const override { return m_idsMapping; }
|
||||
};
|
||||
|
||||
|
@ -104,10 +120,17 @@ public:
|
|||
}
|
||||
|
||||
void InsertOldMwmMapping(CountryId const & /* newId */, CountryId const & /* oldId */) override {}
|
||||
|
||||
void InsertAffiliation(CountryId const & /* countryId */,
|
||||
string const & /* affilation */) override
|
||||
{
|
||||
}
|
||||
|
||||
void InsertCountryNameSynonym(CountryId const & /* countryId */,
|
||||
string const & /* synonym */) override
|
||||
{
|
||||
}
|
||||
|
||||
OldMwmMapping GetMapping() const override
|
||||
{
|
||||
ASSERT(false, ());
|
||||
|
@ -122,6 +145,11 @@ TMwmSubtreeAttrs LoadGroupSingleMwmsImpl(size_t depth, json_t * node, CountryId
|
|||
CountryId id;
|
||||
FromJSONObject(node, "id", id);
|
||||
|
||||
vector<string> countryNameSynonyms;
|
||||
FromJSONObjectOptionalField(node, "country_name_synonyms", countryNameSynonyms);
|
||||
for (auto const & synonym : countryNameSynonyms)
|
||||
store.InsertCountryNameSynonym(id, synonym);
|
||||
|
||||
// Mapping two component (big) mwms to one componenst (small) ones.
|
||||
vector<string> oldIds;
|
||||
FromJSONObjectOptionalField(node, "old", oldIds);
|
||||
|
@ -199,7 +227,8 @@ class StoreCountriesTwoComponentMwms : public StoreTwoComponentMwmInterface
|
|||
CountryTree & m_countries;
|
||||
|
||||
public:
|
||||
StoreCountriesTwoComponentMwms(CountryTree & countries, Affiliations & /* affiliations */)
|
||||
StoreCountriesTwoComponentMwms(CountryTree & countries, Affiliations & /* affiliations */,
|
||||
CountryNameSynonyms & /* countryNameSynonyms */)
|
||||
: m_countries(countries)
|
||||
{
|
||||
}
|
||||
|
@ -308,6 +337,7 @@ bool LoadCountriesTwoComponentMwmsImpl(string const & jsonBuffer,
|
|||
|
||||
int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countries,
|
||||
Affiliations & affiliations,
|
||||
CountryNameSynonyms & countryNameSynonyms,
|
||||
OldMwmMapping * mapping /* = nullptr */)
|
||||
{
|
||||
countries.Clear();
|
||||
|
@ -321,7 +351,7 @@ int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countri
|
|||
|
||||
if (version::IsSingleMwm(version))
|
||||
{
|
||||
StoreCountriesSingleMwms store(countries, affiliations);
|
||||
StoreCountriesSingleMwms store(countries, affiliations, countryNameSynonyms);
|
||||
if (!LoadCountriesSingleMwmsImpl(jsonBuffer, store))
|
||||
return -1;
|
||||
if (mapping)
|
||||
|
@ -329,7 +359,7 @@ int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countri
|
|||
}
|
||||
else
|
||||
{
|
||||
StoreCountriesTwoComponentMwms store(countries, affiliations);
|
||||
StoreCountriesTwoComponentMwms store(countries, affiliations, countryNameSynonyms);
|
||||
if (!LoadCountriesTwoComponentMwmsImpl(jsonBuffer, store))
|
||||
return -1;
|
||||
}
|
||||
|
@ -342,11 +372,12 @@ int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countri
|
|||
}
|
||||
|
||||
int64_t LoadCountriesFromFile(string const & path, CountryTree & countries,
|
||||
Affiliations & affiliations, OldMwmMapping * mapping)
|
||||
Affiliations & affiliations,
|
||||
CountryNameSynonyms & countryNameSynonyms, OldMwmMapping * mapping)
|
||||
{
|
||||
string json;
|
||||
ReaderPtr<Reader>(GetPlatform().GetReader(path)).ReadAsString(json);
|
||||
return LoadCountriesFromBuffer(json, countries, affiliations, mapping);
|
||||
return LoadCountriesFromBuffer(json, countries, affiliations, countryNameSynonyms, mapping);
|
||||
}
|
||||
|
||||
void LoadCountryFile2CountryInfo(string const & jsonBuffer, map<string, CountryInfo> & id2info,
|
||||
|
|
|
@ -254,9 +254,13 @@ private:
|
|||
|
||||
/// @return version of country file or -1 if error was encountered
|
||||
int64_t LoadCountriesFromBuffer(std::string const & buffer, CountryTree & countries,
|
||||
Affiliations & affiliations, OldMwmMapping * mapping = nullptr);
|
||||
Affiliations & affiliations,
|
||||
CountryNameSynonyms & countryNameSynonyms,
|
||||
OldMwmMapping * mapping = nullptr);
|
||||
int64_t LoadCountriesFromFile(std::string const & path, CountryTree & countries,
|
||||
Affiliations & affiliations, OldMwmMapping * mapping = nullptr);
|
||||
Affiliations & affiliations,
|
||||
CountryNameSynonyms & countryNameSynonyms,
|
||||
OldMwmMapping * mapping = nullptr);
|
||||
|
||||
void LoadCountryFile2CountryInfo(std::string const & jsonBuffer,
|
||||
std::map<std::string, CountryInfo> & id2info, bool & isSingleMwm);
|
||||
|
|
|
@ -141,8 +141,8 @@ Storage::Storage(string const & referenceCountriesTxtJsonForTesting,
|
|||
, m_downloadMapOnTheMap(nullptr)
|
||||
, m_maxMwmSizeBytes(0)
|
||||
{
|
||||
m_currentVersion =
|
||||
LoadCountriesFromBuffer(referenceCountriesTxtJsonForTesting, m_countries, m_affiliations);
|
||||
m_currentVersion = LoadCountriesFromBuffer(referenceCountriesTxtJsonForTesting, m_countries,
|
||||
m_affiliations, m_countryNameSynonyms);
|
||||
CHECK_LESS_OR_EQUAL(0, m_currentVersion, ("Can't load test countries file"));
|
||||
CalcMaxMwmSizeBytes();
|
||||
}
|
||||
|
@ -762,8 +762,8 @@ void Storage::LoadCountriesFile(string const & pathToCountriesFile, string const
|
|||
|
||||
if (m_countries.IsEmpty())
|
||||
{
|
||||
m_currentVersion =
|
||||
LoadCountriesFromFile(pathToCountriesFile, m_countries, m_affiliations, mapping);
|
||||
m_currentVersion = LoadCountriesFromFile(pathToCountriesFile, m_countries, m_affiliations,
|
||||
m_countryNameSynonyms, mapping);
|
||||
LOG_SHORT(LINFO, ("Loaded countries list for version:", m_currentVersion));
|
||||
if (m_currentVersion < 0)
|
||||
LOG(LERROR, ("Can't load countries file", pathToCountriesFile));
|
||||
|
|
|
@ -263,6 +263,7 @@ private:
|
|||
// Once filled |m_affiliations| is not changed.
|
||||
// Note. |m_affiliations| is empty in case of countries_obsolete.txt.
|
||||
Affiliations m_affiliations;
|
||||
CountryNameSynonyms m_countryNameSynonyms;
|
||||
|
||||
MwmSize m_maxMwmSizeBytes;
|
||||
|
||||
|
@ -469,6 +470,8 @@ public:
|
|||
|
||||
Affiliations const & GetAffiliations() const { return m_affiliations; }
|
||||
|
||||
CountryNameSynonyms const & GetCountryNameSynonyms() const { return m_countryNameSynonyms; }
|
||||
|
||||
/// \brief Calls |toDo| for each node for subtree with |root|.
|
||||
/// For example ForEachInSubtree(GetRootId()) calls |toDo| for every node including
|
||||
/// the result of GetRootId() call.
|
||||
|
|
|
@ -19,8 +19,10 @@ using CountriesSet = std::set<CountryId>;
|
|||
using CountriesVec = std::vector<CountryId>;
|
||||
using LocalFilePtr = std::shared_ptr<platform::LocalCountryFile>;
|
||||
using OldMwmMapping = std::map<CountryId, CountriesSet>;
|
||||
/// Map from key affiliation words into MWM IDs (file names).
|
||||
/// Map from key affiliation words into CountryIds.
|
||||
using Affiliations = std::unordered_map<std::string, std::vector<CountryId>>;
|
||||
/// Map from country name synonyms and old names into CountryId.
|
||||
using CountryNameSynonyms = std::unordered_map<std::string, CountryId>;
|
||||
|
||||
extern const storage::CountryId kInvalidCountryId;
|
||||
|
||||
|
|
|
@ -208,6 +208,10 @@ class Env:
|
|||
def borders_to_osm_path(self):
|
||||
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
|
||||
|
||||
@property
|
||||
def countries_synonyms_path(self):
|
||||
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
|
||||
|
||||
@property
|
||||
def counties_txt_path(self):
|
||||
return os.path.join(self.mwm_path, "countries.txt")
|
||||
|
|
|
@ -223,6 +223,7 @@ def stage_descriptions(env):
|
|||
def stage_countries_txt(env):
|
||||
countries = hierarchy_to_countries(env.old_to_new_path,
|
||||
env.borders_to_osm_path,
|
||||
env.country_synonyms_path,
|
||||
env.hierarchy_path, env.mwm_path,
|
||||
env.mwm_version)
|
||||
with open(env.counties_txt_path, "w") as f:
|
||||
|
|
|
@ -66,12 +66,15 @@ The post_generation commands are:
|
|||
help="old_vs_new.csv file")
|
||||
parser.add_argument("--osm", required=True,
|
||||
help="borders_vs_osm.csv file")
|
||||
parser.add_argument("--countries_synonyms", required=True,
|
||||
help="countries_synonyms.csv file")
|
||||
parser.add_argument("--mwm_version", type=int, required=True,
|
||||
help="Mwm version")
|
||||
parser.add_argument("-o", "--output", required=True,
|
||||
help="Output countries.txt file (default is stdout)")
|
||||
args = parser.parse_args(sys.argv[2:])
|
||||
countries_json = hierarchy_to_countries_(args.old, args.osm,
|
||||
args.countries_synonyms,
|
||||
args.hierarchy,
|
||||
args.target,
|
||||
args.mwm_version)
|
||||
|
|
|
@ -109,9 +109,24 @@ def parse_borders_vs_osm(borders_vs_osm_csv_path):
|
|||
vsosm[m.group(1)] = [m.group(3)]
|
||||
return vsosm
|
||||
|
||||
def parse_countries_synonyms(countries_synonyms_csv_path):
|
||||
countries_synonyms = {}
|
||||
if not countries_synonyms_csv_path:
|
||||
return countries_synonyms
|
||||
|
||||
with open(countries_synonyms_csv_path) as f:
|
||||
for line in f:
|
||||
m = re.match(r"(.+)\t(.+)", line.strip())
|
||||
assert m
|
||||
if m.group(1) in countries_synonyms:
|
||||
countries_synonyms[m.group(1)].append(m.group(2))
|
||||
else:
|
||||
countries_synonyms[m.group(1)] = [m.group(2)]
|
||||
return countries_synonyms
|
||||
|
||||
def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
|
||||
hierarchy_path, target_path, version):
|
||||
countries_synonyms_csv_path, hierarchy_path,
|
||||
target_path, version):
|
||||
|
||||
def fill_last(last, stack):
|
||||
name = last["id"]
|
||||
|
@ -124,6 +139,7 @@ def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
|
|||
|
||||
oldvs = parse_old_vs_new(old_vs_new_csv_path)
|
||||
vsosm = parse_borders_vs_osm(borders_vs_osm_csv_path)
|
||||
countries_synonyms = parse_countries_synonyms(countries_synonyms_csv_path)
|
||||
stack = [CountryDict(v=version, nameattr="Countries", g=[])]
|
||||
last = None
|
||||
with open(hierarchy_path) as f:
|
||||
|
@ -151,6 +167,8 @@ def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
|
|||
last["old"] = oldvs[items[0]]
|
||||
if items[0] in vsosm:
|
||||
last["affiliations"] = vsosm[items[0]]
|
||||
if items[0] in countries_synonyms:
|
||||
last["country_name_synonyms"] = countries_synonyms[items[0]]
|
||||
|
||||
# the last line is always a file
|
||||
del last["d"]
|
||||
|
|
|
@ -636,7 +636,7 @@ if [ "$MODE" == "resources" ]; then
|
|||
putmode "Step 8: Updating resource lists"
|
||||
# Update countries list
|
||||
$PYTHON36 -m $POST_GENERATION_MODULE hierarchy_to_countries --target "$TARGET" --hierarchy "$DATA_PATH/hierarchy.txt" --mwm_version "$COUNTRIES_VERSION" \
|
||||
--old "$DATA_PATH/old_vs_new.csv" --osm "$DATA_PATH/borders_vs_osm.csv" --output "$TARGET/countries.txt" >> "$PLANET_LOG" 2>&1
|
||||
--old "$DATA_PATH/old_vs_new.csv" --osm "$DATA_PATH/borders_vs_osm.csv" --countries_synonyms "$DATA_PATH/countries_synonyms.csv" --output "$TARGET/countries.txt" >> "$PLANET_LOG" 2>&1
|
||||
|
||||
# A quick fix: chmodding to a+rw all generated files
|
||||
for file in "$TARGET"/*.mwm*; do
|
||||
|
|
Loading…
Add table
Reference in a new issue