[generator] filter out of geoindex used for inner points nullbuildings and do not filter others

This commit is contained in:
LaGrunge 2019-08-06 18:08:27 +03:00 committed by cc-engineering
parent ff7225469e
commit d71a008674
4 changed files with 111 additions and 99 deletions

View file

@ -29,7 +29,7 @@ bool CheckWeGotExpectedIdsByPoint(m2::PointD const & point,
return test == reference;
}
UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside)
void TestMe(std::vector<OsmElementData> const & osmElements)
{
// Absolutely random region.
std::shared_ptr<JsonValue> value = std::make_shared<JsonValue>(LoadFromString(
@ -79,7 +79,44 @@ UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside
ScopedFile const idsWithoutAddresses{"ids_without_addresses.txt", ScopedFile::Mode::DoNotCreate};
ScopedFile const geoObjectsKeyValue{"geo_objects.jsonl", ScopedFile::Mode::DoNotCreate};
std::vector<base::GeoObjectId> allIds;
std::vector<base::GeoObjectId> expectedIds;
{
FeaturesCollector collector(geoObjectsFeatures.GetFullPath());
for (OsmElementData const & elementData : osmElements)
{
FeatureBuilder fb = FeatureBuilderFromOmsElementData(elementData);
if (fb.IsPoint())
expectedIds.emplace_back(fb.GetMostGenericOsmId());
TEST(fb.PreSerialize(), ());
collector.Collect(fb);
}
}
GeoObjectsGenerator geoObjectsGenerator{regionGetter,
geoObjectsFeatures.GetFullPath(),
idsWithoutAddresses.GetFullPath(),
geoObjectsKeyValue.GetFullPath(),
false /* verbose */ ,
1 /* threadsCount */};
TEST(geoObjectsGenerator.GenerateGeoObjects(), ("Generate Geo Objects failed"));
auto const geoObjectsIndex = MakeTempGeoObjectsIndex(geoObjectsFeatures.GetFullPath());
TEST(geoObjectsIndex.has_value(), ("Temporary index build failed"));
TEST(CheckWeGotExpectedIdsByPoint({1.5, 1.5}, expectedIds, *geoObjectsIndex), ());
TEST(CheckWeGotExpectedIdsByPoint({2, 2}, expectedIds, *geoObjectsIndex), ());
TEST(CheckWeGotExpectedIdsByPoint({4, 4}, expectedIds, *geoObjectsIndex), ());
}
UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside)
{
std::vector<OsmElementData> const osmElements{
{1,
{{"addr:housenumber", "39 с79"},
@ -93,36 +130,34 @@ UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside
{}},
{3,
{{"addr:housenumber", "39 с80"},
{"addr:street", "Ленинградский проспект"}},
{{1.6, 1.6}},
{}}
};
TestMe(osmElements);
}
UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside2)
{
std::vector<OsmElementData> const osmElements{
{1,
{{"addr:housenumber", "39 с80"},
{"addr:street", "Ленинградский проспект"}},
{{1.6, 1.6}},
{}
},
{2,
{{"addr:housenumber", "39 с79"},
{"addr:street", "Ленинградский проспект"},
{"building", "yes"}},
{{1.6, 1.6}},
{{1.5, 1.5}},
{}},
{3,
{{"building", "commercial"}, {"type", "multipolygon"}, {"name", "superbuilding"}},
{{1, 1}, {4, 4}},
{}},
};
{
FeaturesCollector collector(geoObjectsFeatures.GetFullPath());
for (OsmElementData const & elementData : osmElements)
{
FeatureBuilder fb = FeatureBuilderFromOmsElementData(elementData);
allIds.emplace_back(fb.GetMostGenericOsmId());
TEST(fb.PreSerialize(), ());
collector.Collect(fb);
}
}
GeoObjectsGenerator geoObjectsGenerator{regionGetter,
geoObjectsFeatures.GetFullPath(),
idsWithoutAddresses.GetFullPath(),
geoObjectsKeyValue.GetFullPath(),
"*", /* allowAddresslessForCountries */
false /* verbose */ ,
1 /* threadsCount */};
TEST(geoObjectsGenerator.GenerateGeoObjects(), ("Generate Geo Objects failed"));
auto const geoObjectsIndex = MakeTempGeoObjectsIndex(geoObjectsFeatures.GetFullPath());
TEST(geoObjectsIndex.has_value(), ("Temporary index build failed"));
TEST(CheckWeGotExpectedIdsByPoint({1.5, 1.5}, allIds, *geoObjectsIndex), ());
TEST(CheckWeGotExpectedIdsByPoint({2, 2}, allIds, *geoObjectsIndex), ());
TEST(CheckWeGotExpectedIdsByPoint({4, 4}, allIds, *geoObjectsIndex), ());
TestMe(osmElements);
}

View file

@ -434,7 +434,6 @@ int GeneratorToolMain(int argc, char ** argv)
FLAGS_geo_objects_features,
FLAGS_ids_without_addresses,
FLAGS_geo_objects_key_value,
FLAGS_allow_addressless_for_countries,
FLAGS_verbose,
threadsCount};

View file

@ -110,45 +110,7 @@ base::JSONPtr MakeGeoObjectValueWithoutAddress(FeatureBuilder const & fb, JsonVa
return jsonWithAddress;
}
void FilterAddresslessByCountryAndRepackMwm(
std::string const & pathInGeoObjectsTmpMwm, std::string const & includeCountries,
GeoObjectsGenerator::RegionInfoGetterProxy const & regionInfoGetter, size_t threadsCount)
{
auto const path = GetPlatform().TmpPathForFile();
FeaturesCollector collector(path);
std::mutex collectorMutex;
auto concurrentCollect = [&](FeatureBuilder const & fb) {
std::lock_guard<std::mutex> lock(collectorMutex);
collector.Collect(fb);
};
auto const filteringCollector = [&](FeatureBuilder const & fb, uint64_t /* currPos */) {
if (GeoObjectsFilter::HasHouse(fb) || GeoObjectsFilter::IsPoi(fb))
{
concurrentCollect(fb);
return;
}
static_assert(
std::is_base_of<regions::ConcurrentGetProcessability, regions::RegionInfoGetter>::value,
"");
auto regionKeyValue = regionInfoGetter.FindDeepest(fb.GetKeyPoint());
if (!regionKeyValue)
return;
auto && country = base::GetJSONObligatoryFieldByPath(
*regionKeyValue->second, "properties", "locales", "default", "address", "country");
auto countryName = FromJSON<std::string>(country);
auto pos = includeCountries.find(countryName);
if (pos != std::string::npos)
concurrentCollect(fb);
};
ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, filteringCollector);
CHECK(base::RenameFileX(path, pathInGeoObjectsTmpMwm), ());
}
void AddThingsWithHousesAndBuildingsAndEnrichWithRegionAddresses(
void AddBuildingsAndThingsWithHousesThenEnrichAllWithRegionAddresses(
KeyValueStorage & geoObjectsKv,
GeoObjectsGenerator::RegionInfoGetterProxy const & regionInfoGetter,
std::string const & pathInGeoObjectsTmpMwm, bool verbose, size_t threadsCount)
@ -306,9 +268,9 @@ size_t AddBuildingGeometriesToAddressPoints(std::string const & pathInGeoObjects
return pointsEnriched;
}
void EnrichPointsWithOuterBuildingGeometry(GeoObjectInfoGetter const & geoObjectInfoGetter,
std::string const & pathInGeoObjectsTmpMwm,
size_t threadsCount)
NullBuildingsInfo EnrichPointsWithOuterBuildingGeometry(
GeoObjectInfoGetter const & geoObjectInfoGetter, std::string const & pathInGeoObjectsTmpMwm,
size_t threadsCount)
{
auto const buildingInfo =
GetHelpfulNullBuildings(geoObjectInfoGetter, pathInGeoObjectsTmpMwm, threadsCount);
@ -324,6 +286,7 @@ void EnrichPointsWithOuterBuildingGeometry(GeoObjectInfoGetter const & geoObject
pathInGeoObjectsTmpMwm, buildingInfo, buildingGeometries, threadsCount);
LOG(LINFO, (pointsCount, "address points were enriched with outer building geomery"));
return buildingInfo;
}
template <class Activist>
@ -359,15 +322,35 @@ boost::optional<indexer::GeoObjectsIndex<IndexReader>> MakeTempGeoObjectsIndex(
return indexer::ReadIndex<indexer::GeoObjectsIndexBox<IndexReader>, MmapReader>(indexFile);
}
GeoObjectsGenerator::GeoObjectsGenerator(
std::string pathInRegionsIndex, std::string pathInRegionsKv, std::string pathInGeoObjectsTmpMwm,
std::string pathOutIdsWithoutAddress, std::string pathOutGeoObjectsKv,
std::string allowAddresslessForCountries, bool verbose, size_t threadsCount)
void FilterAddresslessThanGaveTheirGeometryToInnerPoints(std::string const & pathInGeoObjectsTmpMwm,
NullBuildingsInfo const & buildingsInfo,
size_t threadsCount)
{
auto const path = GetPlatform().TmpPathForFile();
FeaturesCollector collector(path);
std::mutex collectorMutex;
auto concurrentCollect = [&](FeatureBuilder const & fb, uint64_t /* currPos */) {
auto const id = fb.GetMostGenericOsmId();
if (buildingsInfo.m_buildingsIds.find(id) != buildingsInfo.m_buildingsIds.end())
return;
std::lock_guard<std::mutex> lock(collectorMutex);
collector.Collect(fb);
};
ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, concurrentCollect);
CHECK(base::RenameFileX(path, pathInGeoObjectsTmpMwm), ());
}
GeoObjectsGenerator::GeoObjectsGenerator(std::string pathInRegionsIndex,
std::string pathInRegionsKv,
std::string pathInGeoObjectsTmpMwm,
std::string pathOutIdsWithoutAddress,
std::string pathOutGeoObjectsKv, bool verbose,
size_t threadsCount)
: m_pathInGeoObjectsTmpMwm(std::move(pathInGeoObjectsTmpMwm))
, m_pathOutIdsWithoutAddress(std::move(pathOutIdsWithoutAddress))
, m_pathOutGeoObjectsKv(std::move(pathOutGeoObjectsKv))
, m_allowAddresslessForCountries(std::move(allowAddresslessForCountries))
, m_verbose(verbose)
, m_threadsCount(threadsCount)
, m_geoObjectsKv(InitGeoObjectsKv(m_pathOutGeoObjectsKv))
@ -379,13 +362,11 @@ GeoObjectsGenerator::GeoObjectsGenerator(
GeoObjectsGenerator::GeoObjectsGenerator(RegionInfoGetter && regionInfoGetter,
std::string pathInGeoObjectsTmpMwm,
std::string pathOutIdsWithoutAddress,
std::string pathOutGeoObjectsKv,
std::string allowAddresslessForCountries, bool verbose,
std::string pathOutGeoObjectsKv, bool verbose,
size_t threadsCount)
: m_pathInGeoObjectsTmpMwm(std::move(pathInGeoObjectsTmpMwm))
, m_pathOutIdsWithoutAddress(std::move(pathOutIdsWithoutAddress))
, m_pathOutGeoObjectsKv(std::move(pathOutGeoObjectsKv))
, m_allowAddresslessForCountries(std::move(allowAddresslessForCountries))
, m_verbose(verbose)
, m_threadsCount(threadsCount)
, m_geoObjectsKv(InitGeoObjectsKv(m_pathOutGeoObjectsKv))
@ -403,7 +384,7 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate()
auto geoObjectIndexFuture =
std::async(std::launch::async, MakeTempGeoObjectsIndex, m_pathInGeoObjectsTmpMwm);
AddThingsWithHousesAndBuildingsAndEnrichWithRegionAddresses(
AddBuildingsAndThingsWithHousesThenEnrichAllWithRegionAddresses(
m_geoObjectsKv, m_regionInfoGetter, m_pathInGeoObjectsTmpMwm, m_verbose, m_threadsCount);
LOG(LINFO, ("Geo objects with addresses were built."));
@ -411,6 +392,7 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate()
auto geoObjectIndex = geoObjectIndexFuture.get();
LOG(LINFO, ("Index was built."));
if (!geoObjectIndex)
return false;
@ -418,22 +400,18 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate()
LOG(LINFO, ("Enrich address points with outer null building geometry."));
EnrichPointsWithOuterBuildingGeometry(geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm,
m_threadsCount);
NullBuildingsInfo const & buildingInfo = EnrichPointsWithOuterBuildingGeometry(
geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm, m_threadsCount);
std::ofstream streamIdsWithoutAddress(m_pathOutIdsWithoutAddress);
AddPoisEnrichedWithHouseAddresses(m_geoObjectsKv, geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm,
streamIdsWithoutAddress, m_verbose, m_threadsCount);
if (m_allowAddresslessForCountries != "*")
{
FilterAddresslessByCountryAndRepackMwm(m_pathInGeoObjectsTmpMwm, m_allowAddresslessForCountries,
m_regionInfoGetter, m_threadsCount);
FilterAddresslessThanGaveTheirGeometryToInnerPoints(m_pathInGeoObjectsTmpMwm, buildingInfo,
m_threadsCount);
LOG(LINFO, ("Addressless buildings are filtered except countries",
m_allowAddresslessForCountries, "."));
}
LOG(LINFO, ("Addressless buildings with geometry we used for inner points were filtered"));
LOG(LINFO, ("Geo objects without addresses were built."));
LOG(LINFO, ("Geo objects key-value storage saved to", m_pathOutGeoObjectsKv));

View file

@ -49,23 +49,18 @@ public:
return m_regionInfoGetter ? m_regionInfoGetter->FindDeepest(point)
: m_externalInfoGetter->operator()(point);
}
private:
boost::optional<regions::RegionInfoGetter> m_regionInfoGetter;
boost::optional<RegionInfoGetter> m_externalInfoGetter;
};
// |allowAddresslessForCountries| specifies countries for which addressless buldings are
// constructed in index and key-value files. Countries are specified by osm's default local name
// (or part of name) separated by commas. Default value is '*' (for all countries).
GeoObjectsGenerator(std::string pathInRegionsIndex, std::string pathInRegionsKv,
std::string pathInGeoObjectsTmpMwm, std::string pathOutIdsWithoutAddress,
std::string pathOutGeoObjectsKv, std::string allowAddresslessForCountries,
bool verbose, size_t threadsCount);
std::string pathOutGeoObjectsKv, bool verbose, size_t threadsCount);
GeoObjectsGenerator(RegionInfoGetter && regionInfoGetter, std::string pathInGeoObjectsTmpMwm,
std::string pathOutIdsWithoutAddress, std::string pathOutGeoObjectsKv,
std::string allowAddresslessForCountries, bool verbose, size_t threadsCount);
bool verbose, size_t threadsCount);
// This function generates key-value pairs for geo objects.
// First, we try to generate key-value pairs only for houses, since we cannot say anything about
@ -74,6 +69,11 @@ public:
// index for houses.
bool GenerateGeoObjects();
KeyValueStorage const & GetKeyValueStorage() const
{
return m_geoObjectsKv;
}
private:
bool GenerateGeoObjectsPrivate();
static KeyValueStorage InitGeoObjectsKv(std::string const & pathOutGeoObjectsKv)
@ -85,7 +85,7 @@ private:
std::string m_pathInGeoObjectsTmpMwm;
std::string m_pathOutIdsWithoutAddress;
std::string m_pathOutGeoObjectsKv;
std::string m_allowAddresslessForCountries;
bool m_verbose = false;
size_t m_threadsCount = 1;