From 905268c6edea0742a768ac8640c5698cc30fedad Mon Sep 17 00:00:00 2001 From: LaGrunge Date: Wed, 7 Aug 2019 11:24:20 +0300 Subject: [PATCH] [generator] use nullbuilding addresses for pois --- .../generator_tests/geo_objects_tests.cpp | 114 +++++++++---- .../geo_objects/geo_object_info_getter.hpp | 10 ++ generator/geo_objects/geo_objects.cpp | 157 ++++++++++-------- generator/geo_objects/geo_objects.hpp | 2 + 4 files changed, 187 insertions(+), 96 deletions(-) diff --git a/generator/generator_tests/geo_objects_tests.cpp b/generator/generator_tests/geo_objects_tests.cpp index 0e95fe726c..d221bea9f6 100644 --- a/generator/generator_tests/geo_objects_tests.cpp +++ b/generator/generator_tests/geo_objects_tests.cpp @@ -7,6 +7,7 @@ #include "generator/generator_tests/common.hpp" #include "generator/geo_objects/geo_object_info_getter.hpp" #include "generator/geo_objects/geo_objects.hpp" +#include "generator/geo_objects/geo_objects_filter.hpp" #include "indexer/classificator_loader.hpp" @@ -29,7 +30,31 @@ bool CheckWeGotExpectedIdsByPoint(m2::PointD const & point, return test == reference; } -void TestMe(std::vector const & osmElements) +std::vector CollectFeatures( + std::vector const & osmElements, ScopedFile const & geoObjectsFeatures, + std::function && accepter) +{ + std::vector expectedIds; + { + FeaturesCollector collector(geoObjectsFeatures.GetFullPath()); + for (OsmElementData const & elementData : osmElements) + { + FeatureBuilder fb = FeatureBuilderFromOmsElementData(elementData); + if (accepter(fb)) + expectedIds.emplace_back(fb.GetMostGenericOsmId()); + + TEST(fb.PreSerialize(), ()); + collector.Collect(fb); + } + } + + return expectedIds; +} + +GeoObjectsGenerator TearUp(std::vector const & osmElements, + ScopedFile const & geoObjectsFeatures, + ScopedFile const & idsWithoutAddresses, + ScopedFile const & geoObjectsKeyValue) { // Absolutely random region. std::shared_ptr value = std::make_shared(LoadFromString( @@ -72,39 +97,33 @@ void TestMe(std::vector const & osmElements) } })")); - auto regionGetter = [&value](auto && point) { return KeyValue{1, value}; }; - classificator::Load(); + auto regionGetter = [value](auto && point) { return KeyValue{1, value}; }; + return GeoObjectsGenerator{regionGetter, + geoObjectsFeatures.GetFullPath(), + idsWithoutAddresses.GetFullPath(), + geoObjectsKeyValue.GetFullPath(), + false /* verbose */, + 1 /* threadsCount */}; +} + +void TestFindReverse(std::vector const & osmElements) +{ + classificator::Load(); ScopedFile const geoObjectsFeatures{"geo_objects_features.mwm", ScopedFile::Mode::DoNotCreate}; ScopedFile const idsWithoutAddresses{"ids_without_addresses.txt", ScopedFile::Mode::DoNotCreate}; ScopedFile const geoObjectsKeyValue{"geo_objects.jsonl", ScopedFile::Mode::DoNotCreate}; - std::vector expectedIds; - - { - FeaturesCollector collector(geoObjectsFeatures.GetFullPath()); - for (OsmElementData const & elementData : osmElements) - { - FeatureBuilder fb = FeatureBuilderFromOmsElementData(elementData); - if (fb.IsPoint()) - expectedIds.emplace_back(fb.GetMostGenericOsmId()); - - TEST(fb.PreSerialize(), ()); - collector.Collect(fb); - } - } - - GeoObjectsGenerator geoObjectsGenerator{regionGetter, - geoObjectsFeatures.GetFullPath(), - idsWithoutAddresses.GetFullPath(), - geoObjectsKeyValue.GetFullPath(), - false /* verbose */ , - 1 /* threadsCount */}; + auto const & expectedIds = CollectFeatures( + osmElements, geoObjectsFeatures, [](FeatureBuilder const & fb) { return fb.IsPoint(); }); + GeoObjectsGenerator geoObjectsGenerator = + TearUp(osmElements, geoObjectsFeatures, idsWithoutAddresses, geoObjectsKeyValue); TEST(geoObjectsGenerator.GenerateGeoObjects(), ("Generate Geo Objects failed")); auto const geoObjectsIndex = MakeTempGeoObjectsIndex(geoObjectsFeatures.GetFullPath()); + TEST(geoObjectsIndex.has_value(), ("Temporary index build failed")); TEST(CheckWeGotExpectedIdsByPoint({1.5, 1.5}, expectedIds, *geoObjectsIndex), ()); @@ -112,9 +131,6 @@ void TestMe(std::vector const & osmElements) TEST(CheckWeGotExpectedIdsByPoint({4, 4}, expectedIds, *geoObjectsIndex), ()); } - - - UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside) { std::vector const osmElements{ @@ -134,7 +150,7 @@ UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside {{1.6, 1.6}}, {}} }; - TestMe(osmElements); + TestFindReverse(osmElements); } UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside2) @@ -158,6 +174,46 @@ UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside {}}, }; - TestMe(osmElements); + TestFindReverse(osmElements); } +void TestPoiHasAddress(std::vector const & osmElements) +{ + classificator::Load(); + ScopedFile const geoObjectsFeatures{"geo_objects_features.mwm", ScopedFile::Mode::DoNotCreate}; + ScopedFile const idsWithoutAddresses{"ids_without_addresses.txt", ScopedFile::Mode::DoNotCreate}; + ScopedFile const geoObjectsKeyValue{"geo_objects.jsonl", ScopedFile::Mode::DoNotCreate}; + + auto const & expectedIds = + CollectFeatures(osmElements, geoObjectsFeatures, + [](FeatureBuilder const & fb) { return GeoObjectsFilter::IsPoi(fb); }); + + GeoObjectsGenerator geoObjectsGenerator = + TearUp(osmElements, geoObjectsFeatures, idsWithoutAddresses, geoObjectsKeyValue); + + TEST(geoObjectsGenerator.GenerateGeoObjects(), ("Generate Geo Objects failed")); + for (GeoObjectId id : expectedIds) + { + std::shared_ptr value = + geoObjectsGenerator.GetKeyValueStorage().Find(id.GetEncodedId()); + TEST(value, ("Id", id, "is not stored in key value")); + TEST(JsonHasBuilding(*value), ("No address for", id)); + } +} + +UNIT_TEST(GenerateGeoObjects_CheckPoiEnricedWithAddress) +{ + std::vector const osmElements{ + {1, {{"addr:housenumber", "111"}, {"addr:street", "Healing street"}}, {{1.6, 1.6}}, {}}, + {2, + {{"building", "commercial"}, {"type", "multipolygon"}, {"name", "superbuilding"}}, + {{1, 1}, {4, 4}}, + {}}, + {3, + {{"shop", "supermarket"}, {"population", "1"}, {"name", "ForgetMeNot"}}, + {{1.5, 1.5}}, + {}}, + }; + + TestPoiHasAddress(osmElements); +} diff --git a/generator/geo_objects/geo_object_info_getter.hpp b/generator/geo_objects/geo_object_info_getter.hpp index a9ed693610..8663160490 100644 --- a/generator/geo_objects/geo_object_info_getter.hpp +++ b/generator/geo_objects/geo_object_info_getter.hpp @@ -37,9 +37,19 @@ public: boost::optional Search( m2::PointD const & point, std::function && pred) const; + std::vector SearchObjectsInIndex(m2::PointD const & point) const + { + return SearchObjectsInIndex(m_index, point); + } + static std::vector SearchObjectsInIndex( indexer::GeoObjectsIndex const & index, m2::PointD const & point); + + KeyValueStorage const & GetKeyValueStorage() const + { + return m_storage; + } private: indexer::GeoObjectsIndex m_index; KeyValueStorage const & m_storage; diff --git a/generator/geo_objects/geo_objects.cpp b/generator/geo_objects/geo_objects.cpp index eeba7658da..9057bb600d 100644 --- a/generator/geo_objects/geo_objects.cpp +++ b/generator/geo_objects/geo_objects.cpp @@ -39,15 +39,6 @@ namespace geo_objects { namespace { -bool HouseHasAddress(JsonValue const & json) -{ - auto && address = - base::GetJSONObligatoryFieldByPath(json, "properties", "locales", "default", "address"); - - auto && building = base::GetJSONOptionalField(address, "building"); - return building && !base::JSONIsNull(building); -} - void UpdateCoordinates(m2::PointD const & point, base::JSONPtr & json) { auto geometry = json_object_get(json.get(), "geometry"); @@ -92,24 +83,6 @@ base::JSONPtr AddAddress(FeatureBuilder const & fb, KeyValue const & regionKeyVa return result; } -std::shared_ptr FindHousePoi(FeatureBuilder const & fb, - GeoObjectInfoGetter const & geoObjectInfoGetter) -{ - return geoObjectInfoGetter.Find(fb.GetKeyPoint(), HouseHasAddress); -} - -base::JSONPtr MakeGeoObjectValueWithoutAddress(FeatureBuilder const & fb, JsonValue const & json) -{ - auto jsonWithAddress = json.MakeDeepCopyJson(); - - auto properties = json_object_get(jsonWithAddress.get(), "properties"); - Localizator localizator(*properties); - localizator.SetLocale("name", Localizator::EasyObjectWithTranslation(fb.GetMultilangName())); - - UpdateCoordinates(fb.GetKeyPoint(), jsonWithAddress); - return jsonWithAddress; -} - void AddBuildingsAndThingsWithHousesThenEnrichAllWithRegionAddresses( KeyValueStorage & geoObjectsKv, GeoObjectsGenerator::RegionInfoGetterProxy const & regionInfoGetter, @@ -135,42 +108,13 @@ void AddBuildingsAndThingsWithHousesThenEnrichAllWithRegionAddresses( LOG(LINFO, ("Added", geoObjectsKv.Size(), "geo objects with addresses.")); } -void AddPoisEnrichedWithHouseAddresses(KeyValueStorage & geoObjectsKv, - GeoObjectInfoGetter const & geoObjectInfoGetter, - std::string const & pathInGeoObjectsTmpMwm, - std::ostream & streamIdsWithoutAddress, bool verbose, - size_t threadsCount) -{ - auto const addressObjectsCount = geoObjectsKv.Size(); - - std::mutex updateMutex; - auto const concurrentTransformer = [&](FeatureBuilder & fb, uint64_t /* currPos */) { - if (!GeoObjectsFilter::IsPoi(fb)) - return; - if (GeoObjectsFilter::IsBuilding(fb) || GeoObjectsFilter::HasHouse(fb)) - return; - - auto const house = FindHousePoi(fb, geoObjectInfoGetter); - if (!house) - return; - - auto const id = fb.GetMostGenericOsmId().GetEncodedId(); - auto jsonValue = MakeGeoObjectValueWithoutAddress(fb, *house); - - std::lock_guard lock(updateMutex); - geoObjectsKv.Insert(id, JsonValue{std::move(jsonValue)}); - streamIdsWithoutAddress << id << "\n"; - }; - - ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, concurrentTransformer); - LOG(LINFO, - ("Added ", geoObjectsKv.Size() - addressObjectsCount, "geo objects without addresses.")); -} - struct NullBuildingsInfo { std::unordered_map m_addressPoints2Buildings; - std::set m_buildingsIds; + // Quite possible to have many points for one building. We want to use + // their addresses for POIs according to buildings and have no idea how to distinguish between + // them, so take one random + std::unordered_map m_Buildings2AddressPoint; }; NullBuildingsInfo GetHelpfulNullBuildings(GeoObjectInfoGetter const & geoObjectInfoGetter, @@ -185,7 +129,7 @@ NullBuildingsInfo GetHelpfulNullBuildings(GeoObjectInfoGetter const & geoObjectI return; auto const buildingId = geoObjectInfoGetter.Search( - fb.GetKeyPoint(), [](JsonValue const & json) { return !HouseHasAddress(json); }); + fb.GetKeyPoint(), [](JsonValue const & json) { return !JsonHasBuilding(json); }); if (!buildingId) return; @@ -193,7 +137,7 @@ NullBuildingsInfo GetHelpfulNullBuildings(GeoObjectInfoGetter const & geoObjectI std::lock_guard lock(updateMutex); result.m_addressPoints2Buildings[id] = *buildingId; - result.m_buildingsIds.insert(*buildingId); + result.m_Buildings2AddressPoint[*buildingId] = id; }; ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, saveIdFold); @@ -212,7 +156,8 @@ BuildingsGeometries GetBuildingsGeometry(std::string const & pathInGeoObjectsTmp auto const saveIdFold = [&](FeatureBuilder & fb, uint64_t /* currPos */) { auto const id = fb.GetMostGenericOsmId(); - if (buildingsInfo.m_buildingsIds.find(id) == buildingsInfo.m_buildingsIds.end() || + if (buildingsInfo.m_Buildings2AddressPoint.find(id) == + buildingsInfo.m_Buildings2AddressPoint.end() || fb.GetParams().GetGeomType() != feature::GeomType::Area) return; @@ -277,7 +222,8 @@ NullBuildingsInfo EnrichPointsWithOuterBuildingGeometry( LOG(LINFO, ("Found", buildingInfo.m_addressPoints2Buildings.size(), "address points with outer building geometry")); - LOG(LINFO, ("Found", buildingInfo.m_buildingsIds.size(), "helpful addressless buildings")); + LOG(LINFO, + ("Found", buildingInfo.m_Buildings2AddressPoint.size(), "helpful addressless buildings")); auto const buildingGeometries = GetBuildingsGeometry(pathInGeoObjectsTmpMwm, buildingInfo, threadsCount); LOG(LINFO, ("Saved", buildingGeometries.size(), "buildings geometries")); @@ -300,6 +246,15 @@ auto Measure(std::string activity, Activist && activist) } } // namespace +bool JsonHasBuilding(JsonValue const & json) +{ + auto && address = + base::GetJSONObligatoryFieldByPath(json, "properties", "locales", "default", "address"); + + auto && building = base::GetJSONOptionalField(address, "building"); + return building && !base::JSONIsNull(building); +} + boost::optional> MakeTempGeoObjectsIndex( std::string const & pathToGeoObjectsTmpMwm) { @@ -322,6 +277,72 @@ boost::optional> MakeTempGeoObjectsIndex( return indexer::ReadIndex, MmapReader>(indexFile); } +std::shared_ptr FindHousePoi(FeatureBuilder const & fb, + GeoObjectInfoGetter const & geoObjectInfoGetter, + NullBuildingsInfo const & buildingsInfo) +{ + std::shared_ptr house = geoObjectInfoGetter.Find(fb.GetKeyPoint(), JsonHasBuilding); + if (house) + return house; + + std::vector potentialIds = + geoObjectInfoGetter.SearchObjectsInIndex(fb.GetKeyPoint()); + + for (base::GeoObjectId id : potentialIds) + { + auto const it = buildingsInfo.m_Buildings2AddressPoint.find(id); + if (it != buildingsInfo.m_Buildings2AddressPoint.end()) + return geoObjectInfoGetter.GetKeyValueStorage().Find(it->second.GetEncodedId()); + } + + return {}; +} + +base::JSONPtr MakeJsonValueWithNameFromFeature(FeatureBuilder const & fb, JsonValue const & json) +{ + auto jsonWithAddress = json.MakeDeepCopyJson(); + + auto properties = json_object_get(jsonWithAddress.get(), "properties"); + Localizator localizator(*properties); + localizator.SetLocale("name", Localizator::EasyObjectWithTranslation(fb.GetMultilangName())); + + UpdateCoordinates(fb.GetKeyPoint(), jsonWithAddress); + return jsonWithAddress; +} + +void AddPoisEnrichedWithHouseAddresses(KeyValueStorage & geoObjectsKv, + GeoObjectInfoGetter const & geoObjectInfoGetter, + NullBuildingsInfo const & buildingsInfo, + std::string const & pathInGeoObjectsTmpMwm, + std::ostream & streamIdsWithoutAddress, bool verbose, + size_t threadsCount) +{ + auto const addressObjectsCount = geoObjectsKv.Size(); + + std::mutex updateMutex; + auto const concurrentTransformer = [&](FeatureBuilder & fb, uint64_t /* currPos */) { + if (!GeoObjectsFilter::IsPoi(fb)) + return; + if (GeoObjectsFilter::IsBuilding(fb) || GeoObjectsFilter::HasHouse(fb)) + return; + + auto const house = FindHousePoi(fb, geoObjectInfoGetter, buildingsInfo); + if (!house) + return; + + auto const id = fb.GetMostGenericOsmId().GetEncodedId(); + auto jsonValue = MakeJsonValueWithNameFromFeature(fb, *house); + + std::lock_guard lock(updateMutex); + geoObjectsKv.Insert(id, JsonValue{std::move(jsonValue)}); + streamIdsWithoutAddress << id << "\n"; + }; + + ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, concurrentTransformer); + LOG(LINFO, + ("Added ", geoObjectsKv.Size() - addressObjectsCount, "geo objects without addresses.")); +} + void FilterAddresslessThanGaveTheirGeometryToInnerPoints(std::string const & pathInGeoObjectsTmpMwm, NullBuildingsInfo const & buildingsInfo, size_t threadsCount) @@ -331,7 +352,8 @@ void FilterAddresslessThanGaveTheirGeometryToInnerPoints(std::string const & pat std::mutex collectorMutex; auto concurrentCollect = [&](FeatureBuilder const & fb, uint64_t /* currPos */) { auto const id = fb.GetMostGenericOsmId(); - if (buildingsInfo.m_buildingsIds.find(id) != buildingsInfo.m_buildingsIds.end()) + if (buildingsInfo.m_Buildings2AddressPoint.find(id) != + buildingsInfo.m_Buildings2AddressPoint.end()) return; std::lock_guard lock(collectorMutex); @@ -405,8 +427,9 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate() std::ofstream streamIdsWithoutAddress(m_pathOutIdsWithoutAddress); - AddPoisEnrichedWithHouseAddresses(m_geoObjectsKv, geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm, - streamIdsWithoutAddress, m_verbose, m_threadsCount); + AddPoisEnrichedWithHouseAddresses(m_geoObjectsKv, geoObjectInfoGetter, buildingInfo, + m_pathInGeoObjectsTmpMwm, streamIdsWithoutAddress, m_verbose, + m_threadsCount); FilterAddresslessThanGaveTheirGeometryToInnerPoints(m_pathInGeoObjectsTmpMwm, buildingInfo, m_threadsCount); diff --git a/generator/geo_objects/geo_objects.hpp b/generator/geo_objects/geo_objects.hpp index 4664b42708..f27576a664 100644 --- a/generator/geo_objects/geo_objects.hpp +++ b/generator/geo_objects/geo_objects.hpp @@ -23,6 +23,8 @@ using IndexReader = ReaderPtr; boost::optional> MakeTempGeoObjectsIndex( std::string const & pathToGeoObjectsTmpMwm); +bool JsonHasBuilding(JsonValue const & json); + class GeoObjectsGenerator { public: