[storage] Add optional country_name_synonyms parameter to countries.txt to find corresponding CountryId by country name without countries.txt keys modification.

This commit is contained in:
tatiana-yan 2019-05-24 15:19:15 +03:00 committed by mpimenov
parent 3cd6936c9e
commit e08a9d988e
15 changed files with 94 additions and 22 deletions

View file

@ -43,7 +43,8 @@ void GetPathToRoot(storage::CountryId const & id, storage::CountryTree const & c
void RegionInfoGetter::LoadCountriesTree()
{
storage::Affiliations affiliations;
storage::LoadCountriesFromFile(COUNTRIES_FILE, m_countries, affiliations);
storage::CountryNameSynonyms countryNameSynonyms;
storage::LoadCountriesFromFile(COUNTRIES_FILE, m_countries, affiliations, countryNameSynonyms);
}
void RegionInfoGetter::SetLocale(string const & locale)

View file

@ -108,7 +108,8 @@ int main(int argc, char * argv[])
InitDataSource(dataSource, "" /* mwmListPath */);
storage::Affiliations affiliations;
InitAffiliations(affiliations);
storage::CountryNameSynonyms countryNameSynonyms;
InitStorageData(affiliations, countryNameSynonyms);
auto engine = InitSearchEngine(dataSource, affiliations, "en" /* locale */, 1 /* numThreads */);

View file

@ -175,12 +175,14 @@ void InitDataSource(FrozenDataSource & dataSource, string const & mwmListPath)
LOG(LINFO, ());
}
void InitAffiliations(storage::Affiliations & affiliations)
void InitStorageData(storage::Affiliations & affiliations,
storage::CountryNameSynonyms & countryNameSynonyms)
{
auto const countriesFile = base::JoinPath(GetPlatform().ResourcesDir(), COUNTRIES_FILE);
storage::CountryTree countries;
auto const rv = storage::LoadCountriesFromFile(countriesFile, countries, affiliations);
auto const rv =
storage::LoadCountriesFromFile(countriesFile, countries, affiliations, countryNameSynonyms);
CHECK(rv != -1, ("Can't load countries from:", countriesFile));
}

View file

@ -34,7 +34,8 @@ void InitViewport(std::string viewportName, m2::RectD & viewport);
void InitDataSource(FrozenDataSource & dataSource, std::string const & mwmListPath);
void InitAffiliations(storage::Affiliations & affiliations);
void InitStorageData(storage::Affiliations & affiliations,
storage::CountryNameSynonyms & countryNameSynonyms);
std::unique_ptr<search::tests_support::TestSearchEngine> InitSearchEngine(
DataSource & dataSource, storage::Affiliations const & affiliations, std::string const & locale,

View file

@ -380,7 +380,8 @@ int main(int argc, char * argv[])
InitDataSource(dataSource, FLAGS_mwm_list_path);
storage::Affiliations affiliations;
InitAffiliations(affiliations);
storage::CountryNameSynonyms countryNameSynonyms;
InitStorageData(affiliations, countryNameSynonyms);
auto engine = InitSearchEngine(dataSource, affiliations, FLAGS_locale, FLAGS_num_threads);

View file

@ -33,6 +33,7 @@ public:
CountryId const & parent) = 0;
virtual void InsertOldMwmMapping(CountryId const & newId, CountryId const & oldId) = 0;
virtual void InsertAffiliation(CountryId const & countryId, string const & affilation) = 0;
virtual void InsertCountryNameSynonym(CountryId const & countryId, string const & synonym) = 0;
virtual OldMwmMapping GetMapping() const = 0;
};
@ -40,11 +41,15 @@ class StoreCountriesSingleMwms : public StoreSingleMwmInterface
{
CountryTree & m_countries;
Affiliations & m_affiliations;
CountryNameSynonyms & m_countryNameSynonyms;
OldMwmMapping m_idsMapping;
public:
StoreCountriesSingleMwms(CountryTree & countries, Affiliations & affiliations)
: m_countries(countries), m_affiliations(affiliations)
StoreCountriesSingleMwms(CountryTree & countries, Affiliations & affiliations,
CountryNameSynonyms & countryNameSynonyms)
: m_countries(countries)
, m_affiliations(affiliations)
, m_countryNameSynonyms(countryNameSynonyms)
{
}
~StoreCountriesSingleMwms()
@ -81,6 +86,17 @@ public:
m_affiliations[affilation].push_back(countryId);
}
void InsertCountryNameSynonym(CountryId const & countryId, string const & synonym) override
{
ASSERT(!synonym.empty(), ());
ASSERT(!countryId.empty(), ());
ASSERT(m_countryNameSynonyms.find(synonym) == m_countryNameSynonyms.end(),
("Synonym must identify CountryTree node where the country is located. Country cannot be "
"located at multiple nodes."));
m_countryNameSynonyms[synonym] = countryId;
}
OldMwmMapping GetMapping() const override { return m_idsMapping; }
};
@ -104,10 +120,17 @@ public:
}
void InsertOldMwmMapping(CountryId const & /* newId */, CountryId const & /* oldId */) override {}
void InsertAffiliation(CountryId const & /* countryId */,
string const & /* affilation */) override
{
}
void InsertCountryNameSynonym(CountryId const & /* countryId */,
string const & /* synonym */) override
{
}
OldMwmMapping GetMapping() const override
{
ASSERT(false, ());
@ -122,6 +145,11 @@ TMwmSubtreeAttrs LoadGroupSingleMwmsImpl(size_t depth, json_t * node, CountryId
CountryId id;
FromJSONObject(node, "id", id);
vector<string> countryNameSynonyms;
FromJSONObjectOptionalField(node, "country_name_synonyms", countryNameSynonyms);
for (auto const & synonym : countryNameSynonyms)
store.InsertCountryNameSynonym(id, synonym);
// Mapping two component (big) mwms to one componenst (small) ones.
vector<string> oldIds;
FromJSONObjectOptionalField(node, "old", oldIds);
@ -199,7 +227,8 @@ class StoreCountriesTwoComponentMwms : public StoreTwoComponentMwmInterface
CountryTree & m_countries;
public:
StoreCountriesTwoComponentMwms(CountryTree & countries, Affiliations & /* affiliations */)
StoreCountriesTwoComponentMwms(CountryTree & countries, Affiliations & /* affiliations */,
CountryNameSynonyms & /* countryNameSynonyms */)
: m_countries(countries)
{
}
@ -308,6 +337,7 @@ bool LoadCountriesTwoComponentMwmsImpl(string const & jsonBuffer,
int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countries,
Affiliations & affiliations,
CountryNameSynonyms & countryNameSynonyms,
OldMwmMapping * mapping /* = nullptr */)
{
countries.Clear();
@ -321,7 +351,7 @@ int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countri
if (version::IsSingleMwm(version))
{
StoreCountriesSingleMwms store(countries, affiliations);
StoreCountriesSingleMwms store(countries, affiliations, countryNameSynonyms);
if (!LoadCountriesSingleMwmsImpl(jsonBuffer, store))
return -1;
if (mapping)
@ -329,7 +359,7 @@ int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countri
}
else
{
StoreCountriesTwoComponentMwms store(countries, affiliations);
StoreCountriesTwoComponentMwms store(countries, affiliations, countryNameSynonyms);
if (!LoadCountriesTwoComponentMwmsImpl(jsonBuffer, store))
return -1;
}
@ -342,11 +372,12 @@ int64_t LoadCountriesFromBuffer(string const & jsonBuffer, CountryTree & countri
}
int64_t LoadCountriesFromFile(string const & path, CountryTree & countries,
Affiliations & affiliations, OldMwmMapping * mapping)
Affiliations & affiliations,
CountryNameSynonyms & countryNameSynonyms, OldMwmMapping * mapping)
{
string json;
ReaderPtr<Reader>(GetPlatform().GetReader(path)).ReadAsString(json);
return LoadCountriesFromBuffer(json, countries, affiliations, mapping);
return LoadCountriesFromBuffer(json, countries, affiliations, countryNameSynonyms, mapping);
}
void LoadCountryFile2CountryInfo(string const & jsonBuffer, map<string, CountryInfo> & id2info,

View file

@ -254,9 +254,13 @@ private:
/// @return version of country file or -1 if error was encountered
int64_t LoadCountriesFromBuffer(std::string const & buffer, CountryTree & countries,
Affiliations & affiliations, OldMwmMapping * mapping = nullptr);
Affiliations & affiliations,
CountryNameSynonyms & countryNameSynonyms,
OldMwmMapping * mapping = nullptr);
int64_t LoadCountriesFromFile(std::string const & path, CountryTree & countries,
Affiliations & affiliations, OldMwmMapping * mapping = nullptr);
Affiliations & affiliations,
CountryNameSynonyms & countryNameSynonyms,
OldMwmMapping * mapping = nullptr);
void LoadCountryFile2CountryInfo(std::string const & jsonBuffer,
std::map<std::string, CountryInfo> & id2info, bool & isSingleMwm);

View file

@ -141,8 +141,8 @@ Storage::Storage(string const & referenceCountriesTxtJsonForTesting,
, m_downloadMapOnTheMap(nullptr)
, m_maxMwmSizeBytes(0)
{
m_currentVersion =
LoadCountriesFromBuffer(referenceCountriesTxtJsonForTesting, m_countries, m_affiliations);
m_currentVersion = LoadCountriesFromBuffer(referenceCountriesTxtJsonForTesting, m_countries,
m_affiliations, m_countryNameSynonyms);
CHECK_LESS_OR_EQUAL(0, m_currentVersion, ("Can't load test countries file"));
CalcMaxMwmSizeBytes();
}
@ -762,8 +762,8 @@ void Storage::LoadCountriesFile(string const & pathToCountriesFile, string const
if (m_countries.IsEmpty())
{
m_currentVersion =
LoadCountriesFromFile(pathToCountriesFile, m_countries, m_affiliations, mapping);
m_currentVersion = LoadCountriesFromFile(pathToCountriesFile, m_countries, m_affiliations,
m_countryNameSynonyms, mapping);
LOG_SHORT(LINFO, ("Loaded countries list for version:", m_currentVersion));
if (m_currentVersion < 0)
LOG(LERROR, ("Can't load countries file", pathToCountriesFile));

View file

@ -263,6 +263,7 @@ private:
// Once filled |m_affiliations| is not changed.
// Note. |m_affiliations| is empty in case of countries_obsolete.txt.
Affiliations m_affiliations;
CountryNameSynonyms m_countryNameSynonyms;
MwmSize m_maxMwmSizeBytes;
@ -469,6 +470,8 @@ public:
Affiliations const & GetAffiliations() const { return m_affiliations; }
CountryNameSynonyms const & GetCountryNameSynonyms() const { return m_countryNameSynonyms; }
/// \brief Calls |toDo| for each node for subtree with |root|.
/// For example ForEachInSubtree(GetRootId()) calls |toDo| for every node including
/// the result of GetRootId() call.

View file

@ -19,8 +19,10 @@ using CountriesSet = std::set<CountryId>;
using CountriesVec = std::vector<CountryId>;
using LocalFilePtr = std::shared_ptr<platform::LocalCountryFile>;
using OldMwmMapping = std::map<CountryId, CountriesSet>;
/// Map from key affiliation words into MWM IDs (file names).
/// Map from key affiliation words into CountryIds.
using Affiliations = std::unordered_map<std::string, std::vector<CountryId>>;
/// Map from country name synonyms and old names into CountryId.
using CountryNameSynonyms = std::unordered_map<std::string, CountryId>;
extern const storage::CountryId kInvalidCountryId;

View file

@ -208,6 +208,10 @@ class Env:
def borders_to_osm_path(self):
return os.path.join(self.user_resource_path, "borders_vs_osm.csv")
@property
def countries_synonyms_path(self):
return os.path.join(self.user_resource_path, "countries_synonyms.csv")
@property
def counties_txt_path(self):
return os.path.join(self.mwm_path, "countries.txt")

View file

@ -223,6 +223,7 @@ def stage_descriptions(env):
def stage_countries_txt(env):
countries = hierarchy_to_countries(env.old_to_new_path,
env.borders_to_osm_path,
env.country_synonyms_path,
env.hierarchy_path, env.mwm_path,
env.mwm_version)
with open(env.counties_txt_path, "w") as f:

View file

@ -66,12 +66,15 @@ The post_generation commands are:
help="old_vs_new.csv file")
parser.add_argument("--osm", required=True,
help="borders_vs_osm.csv file")
parser.add_argument("--countries_synonyms", required=True,
help="countries_synonyms.csv file")
parser.add_argument("--mwm_version", type=int, required=True,
help="Mwm version")
parser.add_argument("-o", "--output", required=True,
help="Output countries.txt file (default is stdout)")
args = parser.parse_args(sys.argv[2:])
countries_json = hierarchy_to_countries_(args.old, args.osm,
args.countries_synonyms,
args.hierarchy,
args.target,
args.mwm_version)

View file

@ -109,9 +109,24 @@ def parse_borders_vs_osm(borders_vs_osm_csv_path):
vsosm[m.group(1)] = [m.group(3)]
return vsosm
def parse_countries_synonyms(countries_synonyms_csv_path):
countries_synonyms = {}
if not countries_synonyms_csv_path:
return countries_synonyms
with open(countries_synonyms_csv_path) as f:
for line in f:
m = re.match(r"(.+)\t(.+)", line.strip())
assert m
if m.group(1) in countries_synonyms:
countries_synonyms[m.group(1)].append(m.group(2))
else:
countries_synonyms[m.group(1)] = [m.group(2)]
return countries_synonyms
def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
hierarchy_path, target_path, version):
countries_synonyms_csv_path, hierarchy_path,
target_path, version):
def fill_last(last, stack):
name = last["id"]
@ -124,6 +139,7 @@ def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
oldvs = parse_old_vs_new(old_vs_new_csv_path)
vsosm = parse_borders_vs_osm(borders_vs_osm_csv_path)
countries_synonyms = parse_countries_synonyms(countries_synonyms_csv_path)
stack = [CountryDict(v=version, nameattr="Countries", g=[])]
last = None
with open(hierarchy_path) as f:
@ -151,6 +167,8 @@ def hierarchy_to_countries(old_vs_new_csv_path, borders_vs_osm_csv_path,
last["old"] = oldvs[items[0]]
if items[0] in vsosm:
last["affiliations"] = vsosm[items[0]]
if items[0] in countries_synonyms:
last["country_name_synonyms"] = countries_synonyms[items[0]]
# the last line is always a file
del last["d"]

View file

@ -636,7 +636,7 @@ if [ "$MODE" == "resources" ]; then
putmode "Step 8: Updating resource lists"
# Update countries list
$PYTHON36 -m $POST_GENERATION_MODULE hierarchy_to_countries --target "$TARGET" --hierarchy "$DATA_PATH/hierarchy.txt" --mwm_version "$COUNTRIES_VERSION" \
--old "$DATA_PATH/old_vs_new.csv" --osm "$DATA_PATH/borders_vs_osm.csv" --output "$TARGET/countries.txt" >> "$PLANET_LOG" 2>&1
--old "$DATA_PATH/old_vs_new.csv" --osm "$DATA_PATH/borders_vs_osm.csv" --countries_synonyms "$DATA_PATH/countries_synonyms.csv" --output "$TARGET/countries.txt" >> "$PLANET_LOG" 2>&1
# A quick fix: chmodding to a+rw all generated files
for file in "$TARGET"/*.mwm*; do