forked from organicmaps/organicmaps
[generator] filter out of geoindex used for inner points nullbuildings and do not filter others
This commit is contained in:
parent
ff7225469e
commit
d71a008674
4 changed files with 111 additions and 99 deletions
|
@ -29,7 +29,7 @@ bool CheckWeGotExpectedIdsByPoint(m2::PointD const & point,
|
|||
return test == reference;
|
||||
}
|
||||
|
||||
UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside)
|
||||
void TestMe(std::vector<OsmElementData> const & osmElements)
|
||||
{
|
||||
// Absolutely random region.
|
||||
std::shared_ptr<JsonValue> value = std::make_shared<JsonValue>(LoadFromString(
|
||||
|
@ -79,7 +79,44 @@ UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside
|
|||
ScopedFile const idsWithoutAddresses{"ids_without_addresses.txt", ScopedFile::Mode::DoNotCreate};
|
||||
ScopedFile const geoObjectsKeyValue{"geo_objects.jsonl", ScopedFile::Mode::DoNotCreate};
|
||||
|
||||
std::vector<base::GeoObjectId> allIds;
|
||||
std::vector<base::GeoObjectId> expectedIds;
|
||||
|
||||
{
|
||||
FeaturesCollector collector(geoObjectsFeatures.GetFullPath());
|
||||
for (OsmElementData const & elementData : osmElements)
|
||||
{
|
||||
FeatureBuilder fb = FeatureBuilderFromOmsElementData(elementData);
|
||||
if (fb.IsPoint())
|
||||
expectedIds.emplace_back(fb.GetMostGenericOsmId());
|
||||
|
||||
TEST(fb.PreSerialize(), ());
|
||||
collector.Collect(fb);
|
||||
}
|
||||
}
|
||||
|
||||
GeoObjectsGenerator geoObjectsGenerator{regionGetter,
|
||||
geoObjectsFeatures.GetFullPath(),
|
||||
idsWithoutAddresses.GetFullPath(),
|
||||
geoObjectsKeyValue.GetFullPath(),
|
||||
false /* verbose */ ,
|
||||
1 /* threadsCount */};
|
||||
|
||||
|
||||
TEST(geoObjectsGenerator.GenerateGeoObjects(), ("Generate Geo Objects failed"));
|
||||
|
||||
auto const geoObjectsIndex = MakeTempGeoObjectsIndex(geoObjectsFeatures.GetFullPath());
|
||||
TEST(geoObjectsIndex.has_value(), ("Temporary index build failed"));
|
||||
|
||||
TEST(CheckWeGotExpectedIdsByPoint({1.5, 1.5}, expectedIds, *geoObjectsIndex), ());
|
||||
TEST(CheckWeGotExpectedIdsByPoint({2, 2}, expectedIds, *geoObjectsIndex), ());
|
||||
TEST(CheckWeGotExpectedIdsByPoint({4, 4}, expectedIds, *geoObjectsIndex), ());
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside)
|
||||
{
|
||||
std::vector<OsmElementData> const osmElements{
|
||||
{1,
|
||||
{{"addr:housenumber", "39 с79"},
|
||||
|
@ -93,36 +130,34 @@ UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside
|
|||
{}},
|
||||
{3,
|
||||
{{"addr:housenumber", "39 с80"},
|
||||
{"addr:street", "Ленинградский проспект"}},
|
||||
{{1.6, 1.6}},
|
||||
{}}
|
||||
};
|
||||
TestMe(osmElements);
|
||||
}
|
||||
|
||||
UNIT_TEST(GenerateGeoObjects_AddNullBuildingGeometryForPointsWithAddressesInside2)
|
||||
{
|
||||
std::vector<OsmElementData> const osmElements{
|
||||
{1,
|
||||
{{"addr:housenumber", "39 с80"},
|
||||
{"addr:street", "Ленинградский проспект"}},
|
||||
{{1.6, 1.6}},
|
||||
{}
|
||||
},
|
||||
{2,
|
||||
{{"addr:housenumber", "39 с79"},
|
||||
{"addr:street", "Ленинградский проспект"},
|
||||
{"building", "yes"}},
|
||||
{{1.6, 1.6}},
|
||||
{{1.5, 1.5}},
|
||||
{}},
|
||||
{3,
|
||||
{{"building", "commercial"}, {"type", "multipolygon"}, {"name", "superbuilding"}},
|
||||
{{1, 1}, {4, 4}},
|
||||
{}},
|
||||
|
||||
};
|
||||
|
||||
{
|
||||
FeaturesCollector collector(geoObjectsFeatures.GetFullPath());
|
||||
for (OsmElementData const & elementData : osmElements)
|
||||
{
|
||||
FeatureBuilder fb = FeatureBuilderFromOmsElementData(elementData);
|
||||
allIds.emplace_back(fb.GetMostGenericOsmId());
|
||||
TEST(fb.PreSerialize(), ());
|
||||
collector.Collect(fb);
|
||||
}
|
||||
}
|
||||
|
||||
GeoObjectsGenerator geoObjectsGenerator{regionGetter,
|
||||
geoObjectsFeatures.GetFullPath(),
|
||||
idsWithoutAddresses.GetFullPath(),
|
||||
geoObjectsKeyValue.GetFullPath(),
|
||||
"*", /* allowAddresslessForCountries */
|
||||
false /* verbose */ ,
|
||||
1 /* threadsCount */};
|
||||
TEST(geoObjectsGenerator.GenerateGeoObjects(), ("Generate Geo Objects failed"));
|
||||
|
||||
auto const geoObjectsIndex = MakeTempGeoObjectsIndex(geoObjectsFeatures.GetFullPath());
|
||||
TEST(geoObjectsIndex.has_value(), ("Temporary index build failed"));
|
||||
|
||||
TEST(CheckWeGotExpectedIdsByPoint({1.5, 1.5}, allIds, *geoObjectsIndex), ());
|
||||
TEST(CheckWeGotExpectedIdsByPoint({2, 2}, allIds, *geoObjectsIndex), ());
|
||||
TEST(CheckWeGotExpectedIdsByPoint({4, 4}, allIds, *geoObjectsIndex), ());
|
||||
TestMe(osmElements);
|
||||
}
|
||||
|
||||
|
|
|
@ -434,7 +434,6 @@ int GeneratorToolMain(int argc, char ** argv)
|
|||
FLAGS_geo_objects_features,
|
||||
FLAGS_ids_without_addresses,
|
||||
FLAGS_geo_objects_key_value,
|
||||
FLAGS_allow_addressless_for_countries,
|
||||
FLAGS_verbose,
|
||||
threadsCount};
|
||||
|
||||
|
|
|
@ -110,45 +110,7 @@ base::JSONPtr MakeGeoObjectValueWithoutAddress(FeatureBuilder const & fb, JsonVa
|
|||
return jsonWithAddress;
|
||||
}
|
||||
|
||||
void FilterAddresslessByCountryAndRepackMwm(
|
||||
std::string const & pathInGeoObjectsTmpMwm, std::string const & includeCountries,
|
||||
GeoObjectsGenerator::RegionInfoGetterProxy const & regionInfoGetter, size_t threadsCount)
|
||||
{
|
||||
auto const path = GetPlatform().TmpPathForFile();
|
||||
FeaturesCollector collector(path);
|
||||
std::mutex collectorMutex;
|
||||
auto concurrentCollect = [&](FeatureBuilder const & fb) {
|
||||
std::lock_guard<std::mutex> lock(collectorMutex);
|
||||
collector.Collect(fb);
|
||||
};
|
||||
|
||||
auto const filteringCollector = [&](FeatureBuilder const & fb, uint64_t /* currPos */) {
|
||||
if (GeoObjectsFilter::HasHouse(fb) || GeoObjectsFilter::IsPoi(fb))
|
||||
{
|
||||
concurrentCollect(fb);
|
||||
return;
|
||||
}
|
||||
|
||||
static_assert(
|
||||
std::is_base_of<regions::ConcurrentGetProcessability, regions::RegionInfoGetter>::value,
|
||||
"");
|
||||
auto regionKeyValue = regionInfoGetter.FindDeepest(fb.GetKeyPoint());
|
||||
if (!regionKeyValue)
|
||||
return;
|
||||
|
||||
auto && country = base::GetJSONObligatoryFieldByPath(
|
||||
*regionKeyValue->second, "properties", "locales", "default", "address", "country");
|
||||
auto countryName = FromJSON<std::string>(country);
|
||||
auto pos = includeCountries.find(countryName);
|
||||
if (pos != std::string::npos)
|
||||
concurrentCollect(fb);
|
||||
};
|
||||
|
||||
ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, filteringCollector);
|
||||
CHECK(base::RenameFileX(path, pathInGeoObjectsTmpMwm), ());
|
||||
}
|
||||
|
||||
void AddThingsWithHousesAndBuildingsAndEnrichWithRegionAddresses(
|
||||
void AddBuildingsAndThingsWithHousesThenEnrichAllWithRegionAddresses(
|
||||
KeyValueStorage & geoObjectsKv,
|
||||
GeoObjectsGenerator::RegionInfoGetterProxy const & regionInfoGetter,
|
||||
std::string const & pathInGeoObjectsTmpMwm, bool verbose, size_t threadsCount)
|
||||
|
@ -306,9 +268,9 @@ size_t AddBuildingGeometriesToAddressPoints(std::string const & pathInGeoObjects
|
|||
return pointsEnriched;
|
||||
}
|
||||
|
||||
void EnrichPointsWithOuterBuildingGeometry(GeoObjectInfoGetter const & geoObjectInfoGetter,
|
||||
std::string const & pathInGeoObjectsTmpMwm,
|
||||
size_t threadsCount)
|
||||
NullBuildingsInfo EnrichPointsWithOuterBuildingGeometry(
|
||||
GeoObjectInfoGetter const & geoObjectInfoGetter, std::string const & pathInGeoObjectsTmpMwm,
|
||||
size_t threadsCount)
|
||||
{
|
||||
auto const buildingInfo =
|
||||
GetHelpfulNullBuildings(geoObjectInfoGetter, pathInGeoObjectsTmpMwm, threadsCount);
|
||||
|
@ -324,6 +286,7 @@ void EnrichPointsWithOuterBuildingGeometry(GeoObjectInfoGetter const & geoObject
|
|||
pathInGeoObjectsTmpMwm, buildingInfo, buildingGeometries, threadsCount);
|
||||
|
||||
LOG(LINFO, (pointsCount, "address points were enriched with outer building geomery"));
|
||||
return buildingInfo;
|
||||
}
|
||||
|
||||
template <class Activist>
|
||||
|
@ -359,15 +322,35 @@ boost::optional<indexer::GeoObjectsIndex<IndexReader>> MakeTempGeoObjectsIndex(
|
|||
return indexer::ReadIndex<indexer::GeoObjectsIndexBox<IndexReader>, MmapReader>(indexFile);
|
||||
}
|
||||
|
||||
GeoObjectsGenerator::GeoObjectsGenerator(
|
||||
std::string pathInRegionsIndex, std::string pathInRegionsKv, std::string pathInGeoObjectsTmpMwm,
|
||||
std::string pathOutIdsWithoutAddress, std::string pathOutGeoObjectsKv,
|
||||
std::string allowAddresslessForCountries, bool verbose, size_t threadsCount)
|
||||
void FilterAddresslessThanGaveTheirGeometryToInnerPoints(std::string const & pathInGeoObjectsTmpMwm,
|
||||
NullBuildingsInfo const & buildingsInfo,
|
||||
size_t threadsCount)
|
||||
{
|
||||
auto const path = GetPlatform().TmpPathForFile();
|
||||
FeaturesCollector collector(path);
|
||||
std::mutex collectorMutex;
|
||||
auto concurrentCollect = [&](FeatureBuilder const & fb, uint64_t /* currPos */) {
|
||||
auto const id = fb.GetMostGenericOsmId();
|
||||
if (buildingsInfo.m_buildingsIds.find(id) != buildingsInfo.m_buildingsIds.end())
|
||||
return;
|
||||
|
||||
std::lock_guard<std::mutex> lock(collectorMutex);
|
||||
collector.Collect(fb);
|
||||
};
|
||||
|
||||
ForEachParallelFromDatRawFormat(threadsCount, pathInGeoObjectsTmpMwm, concurrentCollect);
|
||||
CHECK(base::RenameFileX(path, pathInGeoObjectsTmpMwm), ());
|
||||
}
|
||||
|
||||
GeoObjectsGenerator::GeoObjectsGenerator(std::string pathInRegionsIndex,
|
||||
std::string pathInRegionsKv,
|
||||
std::string pathInGeoObjectsTmpMwm,
|
||||
std::string pathOutIdsWithoutAddress,
|
||||
std::string pathOutGeoObjectsKv, bool verbose,
|
||||
size_t threadsCount)
|
||||
: m_pathInGeoObjectsTmpMwm(std::move(pathInGeoObjectsTmpMwm))
|
||||
, m_pathOutIdsWithoutAddress(std::move(pathOutIdsWithoutAddress))
|
||||
, m_pathOutGeoObjectsKv(std::move(pathOutGeoObjectsKv))
|
||||
, m_allowAddresslessForCountries(std::move(allowAddresslessForCountries))
|
||||
, m_verbose(verbose)
|
||||
, m_threadsCount(threadsCount)
|
||||
, m_geoObjectsKv(InitGeoObjectsKv(m_pathOutGeoObjectsKv))
|
||||
|
@ -379,13 +362,11 @@ GeoObjectsGenerator::GeoObjectsGenerator(
|
|||
GeoObjectsGenerator::GeoObjectsGenerator(RegionInfoGetter && regionInfoGetter,
|
||||
std::string pathInGeoObjectsTmpMwm,
|
||||
std::string pathOutIdsWithoutAddress,
|
||||
std::string pathOutGeoObjectsKv,
|
||||
std::string allowAddresslessForCountries, bool verbose,
|
||||
std::string pathOutGeoObjectsKv, bool verbose,
|
||||
size_t threadsCount)
|
||||
: m_pathInGeoObjectsTmpMwm(std::move(pathInGeoObjectsTmpMwm))
|
||||
, m_pathOutIdsWithoutAddress(std::move(pathOutIdsWithoutAddress))
|
||||
, m_pathOutGeoObjectsKv(std::move(pathOutGeoObjectsKv))
|
||||
, m_allowAddresslessForCountries(std::move(allowAddresslessForCountries))
|
||||
, m_verbose(verbose)
|
||||
, m_threadsCount(threadsCount)
|
||||
, m_geoObjectsKv(InitGeoObjectsKv(m_pathOutGeoObjectsKv))
|
||||
|
@ -403,7 +384,7 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate()
|
|||
auto geoObjectIndexFuture =
|
||||
std::async(std::launch::async, MakeTempGeoObjectsIndex, m_pathInGeoObjectsTmpMwm);
|
||||
|
||||
AddThingsWithHousesAndBuildingsAndEnrichWithRegionAddresses(
|
||||
AddBuildingsAndThingsWithHousesThenEnrichAllWithRegionAddresses(
|
||||
m_geoObjectsKv, m_regionInfoGetter, m_pathInGeoObjectsTmpMwm, m_verbose, m_threadsCount);
|
||||
|
||||
LOG(LINFO, ("Geo objects with addresses were built."));
|
||||
|
@ -411,6 +392,7 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate()
|
|||
auto geoObjectIndex = geoObjectIndexFuture.get();
|
||||
|
||||
LOG(LINFO, ("Index was built."));
|
||||
|
||||
if (!geoObjectIndex)
|
||||
return false;
|
||||
|
||||
|
@ -418,22 +400,18 @@ bool GeoObjectsGenerator::GenerateGeoObjectsPrivate()
|
|||
|
||||
LOG(LINFO, ("Enrich address points with outer null building geometry."));
|
||||
|
||||
EnrichPointsWithOuterBuildingGeometry(geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm,
|
||||
m_threadsCount);
|
||||
NullBuildingsInfo const & buildingInfo = EnrichPointsWithOuterBuildingGeometry(
|
||||
geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm, m_threadsCount);
|
||||
|
||||
std::ofstream streamIdsWithoutAddress(m_pathOutIdsWithoutAddress);
|
||||
|
||||
AddPoisEnrichedWithHouseAddresses(m_geoObjectsKv, geoObjectInfoGetter, m_pathInGeoObjectsTmpMwm,
|
||||
streamIdsWithoutAddress, m_verbose, m_threadsCount);
|
||||
|
||||
if (m_allowAddresslessForCountries != "*")
|
||||
{
|
||||
FilterAddresslessByCountryAndRepackMwm(m_pathInGeoObjectsTmpMwm, m_allowAddresslessForCountries,
|
||||
m_regionInfoGetter, m_threadsCount);
|
||||
FilterAddresslessThanGaveTheirGeometryToInnerPoints(m_pathInGeoObjectsTmpMwm, buildingInfo,
|
||||
m_threadsCount);
|
||||
|
||||
LOG(LINFO, ("Addressless buildings are filtered except countries",
|
||||
m_allowAddresslessForCountries, "."));
|
||||
}
|
||||
LOG(LINFO, ("Addressless buildings with geometry we used for inner points were filtered"));
|
||||
|
||||
LOG(LINFO, ("Geo objects without addresses were built."));
|
||||
LOG(LINFO, ("Geo objects key-value storage saved to", m_pathOutGeoObjectsKv));
|
||||
|
|
|
@ -49,23 +49,18 @@ public:
|
|||
return m_regionInfoGetter ? m_regionInfoGetter->FindDeepest(point)
|
||||
: m_externalInfoGetter->operator()(point);
|
||||
}
|
||||
|
||||
private:
|
||||
boost::optional<regions::RegionInfoGetter> m_regionInfoGetter;
|
||||
boost::optional<RegionInfoGetter> m_externalInfoGetter;
|
||||
};
|
||||
|
||||
// |allowAddresslessForCountries| specifies countries for which addressless buldings are
|
||||
// constructed in index and key-value files. Countries are specified by osm's default local name
|
||||
// (or part of name) separated by commas. Default value is '*' (for all countries).
|
||||
GeoObjectsGenerator(std::string pathInRegionsIndex, std::string pathInRegionsKv,
|
||||
std::string pathInGeoObjectsTmpMwm, std::string pathOutIdsWithoutAddress,
|
||||
std::string pathOutGeoObjectsKv, std::string allowAddresslessForCountries,
|
||||
bool verbose, size_t threadsCount);
|
||||
std::string pathOutGeoObjectsKv, bool verbose, size_t threadsCount);
|
||||
|
||||
GeoObjectsGenerator(RegionInfoGetter && regionInfoGetter, std::string pathInGeoObjectsTmpMwm,
|
||||
std::string pathOutIdsWithoutAddress, std::string pathOutGeoObjectsKv,
|
||||
std::string allowAddresslessForCountries, bool verbose, size_t threadsCount);
|
||||
bool verbose, size_t threadsCount);
|
||||
|
||||
// This function generates key-value pairs for geo objects.
|
||||
// First, we try to generate key-value pairs only for houses, since we cannot say anything about
|
||||
|
@ -74,6 +69,11 @@ public:
|
|||
// index for houses.
|
||||
bool GenerateGeoObjects();
|
||||
|
||||
KeyValueStorage const & GetKeyValueStorage() const
|
||||
{
|
||||
return m_geoObjectsKv;
|
||||
}
|
||||
|
||||
private:
|
||||
bool GenerateGeoObjectsPrivate();
|
||||
static KeyValueStorage InitGeoObjectsKv(std::string const & pathOutGeoObjectsKv)
|
||||
|
@ -85,7 +85,7 @@ private:
|
|||
std::string m_pathInGeoObjectsTmpMwm;
|
||||
std::string m_pathOutIdsWithoutAddress;
|
||||
std::string m_pathOutGeoObjectsKv;
|
||||
std::string m_allowAddresslessForCountries;
|
||||
|
||||
bool m_verbose = false;
|
||||
size_t m_threadsCount = 1;
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue