diff --git a/generator/generator_tests_support/test_mwm_builder.cpp b/generator/generator_tests_support/test_mwm_builder.cpp index c398bfd126..b00994902c 100644 --- a/generator/generator_tests_support/test_mwm_builder.cpp +++ b/generator/generator_tests_support/test_mwm_builder.cpp @@ -115,10 +115,10 @@ bool TestMwmBuilder::Add(FeatureBuilder & fb) return true; } -void TestMwmBuilder::SetPostcodesData( +void TestMwmBuilder::SetUKPostcodesData( string const & postcodesPath, shared_ptr const & countryInfoGetter) { - m_postcodesPath = postcodesPath; + m_ukPostcodesPath = postcodesPath; m_postcodesCountryInfoGetter = countryInfoGetter; } @@ -159,11 +159,11 @@ void TestMwmBuilder::Finish() true /* forceRebuild */, 1 /* threadsCount */), ("Can't build search index.")); - if (!m_postcodesPath.empty() && m_postcodesCountryInfoGetter) + if (!m_ukPostcodesPath.empty() && m_postcodesCountryInfoGetter) { - CHECK(indexer::BuildPostcodePointsWithInfoGetter(m_file.GetDirectory(), m_file.GetCountryName(), - m_postcodesPath, true /* forceRebuild */, - *m_postcodesCountryInfoGetter), + CHECK(indexer::BuildPostcodePointsWithInfoGetter( + m_file.GetDirectory(), m_file.GetCountryName(), m_ukPostcodesPath, + "" /* usPostcodesDataset */, true /* forceRebuild */, *m_postcodesCountryInfoGetter), ("Can't build postcodes section.")); } diff --git a/generator/generator_tests_support/test_mwm_builder.hpp b/generator/generator_tests_support/test_mwm_builder.hpp index c76f44d8b5..0117012fcc 100644 --- a/generator/generator_tests_support/test_mwm_builder.hpp +++ b/generator/generator_tests_support/test_mwm_builder.hpp @@ -39,8 +39,8 @@ public: void Add(TestFeature const & feature); bool Add(feature::FeatureBuilder & fb); - void SetPostcodesData(std::string const & postcodesPath, - std::shared_ptr const & countryInfoGetter); + void SetUKPostcodesData(std::string const & postcodesPath, + std::shared_ptr const & countryInfoGetter); void SetMwmLanguages(std::vector const & languages); void Finish(); @@ -52,7 +52,7 @@ private: std::unique_ptr m_collector; TestIdToBoundariesTable m_boundariesTable; std::shared_ptr m_postcodesCountryInfoGetter; - std::string m_postcodesPath; + std::string m_ukPostcodesPath; uint32_t m_version = 0; }; } // namespace tests_support diff --git a/generator/generator_tool/generator_tool.cpp b/generator/generator_tool/generator_tool.cpp index ed3b398ff4..891b0fd6da 100644 --- a/generator/generator_tool/generator_tool.cpp +++ b/generator/generator_tool/generator_tool.cpp @@ -166,7 +166,8 @@ DEFINE_string(popular_places_data, "", DEFINE_string(brands_data, "", "Path to json with OSM objects to brand ID map."); DEFINE_string(brands_translations_data, "", "Path to json with brands translations and synonyms."); -DEFINE_string(postcodes_dataset, "", "Path to dataset with postcodes data"); +DEFINE_string(uk_postcodes_dataset, "", "Path to dataset with UK postcodes."); +DEFINE_string(us_postcodes_dataset, "", "Path to dataset with US postcodes."); // Printing stuff. DEFINE_bool(calc_statistics, false, "Calculate feature statistics for specified mwm bucket files."); @@ -379,10 +380,13 @@ MAIN_WITH_ERROR_HANDLING([](int argc, char ** argv) LOG(LCRITICAL, ("Error generating search index.")); } - if (!FLAGS_postcodes_dataset.empty()) + if (!FLAGS_uk_postcodes_dataset.empty() || !FLAGS_us_postcodes_dataset.empty()) { - if (!indexer::BuildPostcodePoints(path, country, FLAGS_postcodes_dataset, true /*forceRebuild*/)) + if (!indexer::BuildPostcodePoints(path, country, FLAGS_uk_postcodes_dataset, + FLAGS_us_postcodes_dataset, true /*forceRebuild*/)) + { LOG(LCRITICAL, ("Error generating postcodes section.")); + } } LOG(LINFO, ("Generating rank table for", datFile)); diff --git a/generator/postcode_points_builder.cpp b/generator/postcode_points_builder.cpp index 7bf893e9e1..222c0af2fb 100644 --- a/generator/postcode_points_builder.cpp +++ b/generator/postcode_points_builder.cpp @@ -96,9 +96,56 @@ void GetUKPostcodes(string const & filename, storage::CountryId const & countryI } } +template +void GetUSPostcodes(string const & filename, storage::CountryId const & countryId, + storage::CountryInfoGetter & infoGetter, vector & valueMapping, + vector> & keyValuePairs) +{ + // Zip;City;State;Latitude;Longitude;Timezone;Daylight savings time flag;geopoint + size_t constexpr kPostcodeIndex = 0; + size_t constexpr kLatIndex = 3; + size_t constexpr kLongIndex = 4; + size_t constexpr kDatasetCount = 8; + + ifstream data; + data.exceptions(fstream::failbit | fstream::badbit); + data.open(filename); + data.exceptions(fstream::badbit); + + string line; + size_t index = 0; + while (getline(data, line)) + { + vector fields; + strings::ParseCSVRow(line, ';', fields); + CHECK_EQUAL(fields.size(), kDatasetCount, (line)); + + double lat; + CHECK(strings::to_double(fields[kLatIndex], lat), ()); + + double lon; + CHECK(strings::to_double(fields[kLongIndex], lon), ()); + + auto const p = mercator::FromLatLon(lat, lon); + + vector countries; + infoGetter.GetRegionsCountryId(p, countries, 200.0 /* lookupRadiusM */); + if (find(countries.begin(), countries.end(), countryId) == countries.end()) + continue; + + auto const postcode = fields[kPostcodeIndex]; + + CHECK_EQUAL(valueMapping.size(), index, ()); + valueMapping.push_back(p); + keyValuePairs.emplace_back(search::NormalizeAndSimplifyString(postcode), Value(index)); + ++index; + } +} + bool BuildPostcodePointsImpl(FilesContainerR & container, storage::CountryId const & country, - string const & dataset, string const & tmpName, - storage::CountryInfoGetter const & infoGetter, Writer & writer) + string const & ukDatasetPath, string const & usDatasetPath, + string const & tmpName, storage::CountryInfoGetter const & infoGetter, + Writer & writer) { using Key = strings::UniString; using Value = Uint64IndexValue; @@ -114,19 +161,39 @@ bool BuildPostcodePointsImpl(FilesContainerR & container, storage::CountryId con header.m_trieOffset = base::asserted_cast(writer.Pos()); vector> ukPostcodesKeyValuePairs; - vector valueMapping; - GetUKPostcodes(dataset, country, infoGetter, valueMapping, ukPostcodesKeyValuePairs); + vector ukPostcodesValueMapping; + if (!ukDatasetPath.empty()) + GetUKPostcodes(ukDatasetPath, country, infoGetter, ukPostcodesValueMapping, + ukPostcodesKeyValuePairs); - if (ukPostcodesKeyValuePairs.empty()) + vector> usPostcodesKeyValuePairs; + vector usPostcodesValueMapping; + if (!usDatasetPath.empty()) + GetUKPostcodes(usDatasetPath, country, infoGetter, usPostcodesValueMapping, + usPostcodesKeyValuePairs); + + if (ukPostcodesKeyValuePairs.empty() && usPostcodesKeyValuePairs.empty()) return false; - sort(ukPostcodesKeyValuePairs.begin(), ukPostcodesKeyValuePairs.end()); + if (!ukPostcodesKeyValuePairs.empty() && !usPostcodesKeyValuePairs.empty()) + { + LOG(LWARNING, + ("Have both US and UK postcodes for", country, "Cannot mix postcodes due to license.")); + return false; + } + + auto & postcodesKeyValuePairs = + !ukPostcodesKeyValuePairs.empty() ? ukPostcodesKeyValuePairs : usPostcodesKeyValuePairs; + auto & valueMapping = + !ukPostcodesKeyValuePairs.empty() ? ukPostcodesValueMapping : usPostcodesValueMapping; + + sort(postcodesKeyValuePairs.begin(), postcodesKeyValuePairs.end()); { FileWriter tmpWriter(tmpName); SingleValueSerializer serializer; trie::Build>( - tmpWriter, serializer, ukPostcodesKeyValuePairs); + tmpWriter, serializer, postcodesKeyValuePairs); } rw_ops::Reverse(FileReader(tmpName), writer); @@ -160,7 +227,8 @@ bool BuildPostcodePointsImpl(FilesContainerR & container, storage::CountryId con namespace indexer { bool BuildPostcodePointsWithInfoGetter(string const & path, string const & country, - string const & datasetPath, bool forceRebuild, + string const & ukDatasetPath, string const & usDatasetPath, + bool forceRebuild, storage::CountryInfoGetter const & infoGetter) { auto const filename = base::JoinPath(path, country + DATA_FILE_EXTENSION); @@ -182,8 +250,8 @@ bool BuildPostcodePointsWithInfoGetter(string const & path, string const & count try { FileWriter writer(postcodesFilePath); - if (!BuildPostcodePointsImpl(readContainer, storage::CountryId(country), datasetPath, - trieTmpFilePath, infoGetter, writer)) + if (!BuildPostcodePointsImpl(readContainer, storage::CountryId(country), ukDatasetPath, + usDatasetPath, trieTmpFilePath, infoGetter, writer)) { // No postcodes for country. return true; @@ -207,13 +275,14 @@ bool BuildPostcodePointsWithInfoGetter(string const & path, string const & count return true; } -bool BuildPostcodePoints(string const & path, string const & country, string const & datasetPath, - bool forceRebuild) +bool BuildPostcodePoints(string const & path, string const & country, string const & ukDatasetPath, + string const & usDatasetPath, bool forceRebuild) { auto const & platform = GetPlatform(); auto const infoGetter = storage::CountryInfoReader::CreateCountryInfoReader(platform); CHECK(infoGetter, ()); - return BuildPostcodePointsWithInfoGetter(path, country, datasetPath, forceRebuild, *infoGetter); + return BuildPostcodePointsWithInfoGetter(path, country, ukDatasetPath, usDatasetPath, + forceRebuild, *infoGetter); } } // namespace indexer diff --git a/generator/postcode_points_builder.hpp b/generator/postcode_points_builder.hpp index 76182f02a6..30c71ea8fa 100644 --- a/generator/postcode_points_builder.hpp +++ b/generator/postcode_points_builder.hpp @@ -11,9 +11,11 @@ namespace indexer { // Builds postcodes section with external postcodes data and writes it to the mwm file. bool BuildPostcodePoints(std::string const & path, std::string const & country, - std::string const & datasetPath, bool forceRebuild); + std::string const & ukDatasetPath, std::string const & usDatasetPath, + bool forceRebuild); // Exposed for testing. bool BuildPostcodePointsWithInfoGetter(std::string const & path, std::string const & country, - std::string const & datasetPath, bool forceRebuild, + std::string const & ukDatasetPath, + std::string const & usDatasetPath, bool forceRebuild, storage::CountryInfoGetter const & infoGetter); } // namespace indexer diff --git a/search/search_integration_tests/postcode_points_tests.cpp b/search/search_integration_tests/postcode_points_tests.cpp index 17c221db64..a2c85a04aa 100644 --- a/search/search_integration_tests/postcode_points_tests.cpp +++ b/search/search_integration_tests/postcode_points_tests.cpp @@ -61,7 +61,7 @@ UNIT_CLASS_TEST(PostcodePointsTest, Smoke) m2::RectD(mercator::FromLatLon(0.99, 0.99), mercator::FromLatLon(3.01, 3.01)))); auto const id = BuildCountry(countryName, [&](TestMwmBuilder & builder) { - builder.SetPostcodesData(postcodesRelativePath, infoGetter); + builder.SetUKPostcodesData(postcodesRelativePath, infoGetter); }); auto handle = m_dataSource.GetMwmHandleById(id); @@ -125,7 +125,7 @@ UNIT_CLASS_TEST(PostcodePointsTest, SearchPostcode) m2::RectD(mercator::FromLatLon(3.0, 3.0), mercator::FromLatLon(7.0, 7.0)))); auto const id = BuildCountry(countryName, [&](TestMwmBuilder & builder) { - builder.SetPostcodesData(postcodesRelativePath, infoGetter); + builder.SetUKPostcodesData(postcodesRelativePath, infoGetter); }); auto test = [&](string const & query, m2::PointD const & expected) { @@ -208,7 +208,7 @@ UNIT_CLASS_TEST(PostcodePointsTest, SearchStreetWithPostcode) "Main street", "en"); auto const id = BuildCountry(countryName, [&](TestMwmBuilder & builder) { - builder.SetPostcodesData(postcodesRelativePath, infoGetter); + builder.SetUKPostcodesData(postcodesRelativePath, infoGetter); builder.Add(streetA); builder.Add(houseA); builder.Add(streetB);