[generator] Refactor generator_tool options: generic output option -> specific file options

This commit is contained in:
Anatoly Serdtcev 2019-10-18 17:14:46 +03:00 committed by LaGrunge
parent 44790df52c
commit 32a9c3207f
11 changed files with 118 additions and 123 deletions

View file

@ -35,16 +35,10 @@ struct GenerateInfo
// Directory for all files.
std::string m_dataPath;
// Current generated file name if --output option is defined.
std::string m_fileName;
NodeStorageType m_nodeStorageType;
OsmSourceType m_osmFileType;
std::string m_osmFileName;
std::vector<std::string> m_bucketNames;
bool m_genAddresses = false;
bool m_preloadCache = false;
bool m_verbose = false;
@ -89,11 +83,5 @@ struct GenerateInfo
{
return base::JoinPath(m_dataPath, fileName + ext);
}
std::string GetAddressesFileName() const
{
return m_genAddresses && !m_fileName.empty() ?
GetTargetFileName(m_fileName, ADDR_FILE_EXTENSION) : "";
}
};
} // namespace feature

View file

@ -57,7 +57,6 @@ struct CliCommandOptions
std::string m_osm_file_name;
std::string m_node_storage;
std::string m_user_resource_path;
std::string m_output;
std::string m_data_path;
std::string m_nodes_list_path;
std::string m_regions_features;
@ -69,6 +68,8 @@ struct CliCommandOptions
std::string m_streets_key_value;
std::string m_streets_features;
std::string m_geo_objects_features;
std::string m_geo_objects_index;
std::string m_geocoder_token_index;
std::string m_key_value;
bool m_preprocess = false;
bool m_generate_region_features = false;
@ -102,9 +103,6 @@ CliCommandOptions DefineOptions(int argc, char * argv[])
("user_resource_path",
po::value(&o.m_user_resource_path)->default_value(""),
"User defined resource path for classificator.txt and etc.")
("output",
po::value(&o.m_output)->default_value(""),
"File name for process (without 'mwm' ext).")
("node_storage",
po::value(&o.m_node_storage)->default_value("map"),
"Type of storage for intermediate points representation. Available: raw, map, mem.")
@ -137,34 +135,40 @@ CliCommandOptions DefineOptions(int argc, char * argv[])
"Path to file containing list of node ids we need to add to locality index. May be empty.")
("regions_index",
po::value(&o.m_regions_index)->default_value(""),
"Input regions index file.")
"Input/Output regions index file.")
("regions_key_value",
po::value(&o.m_regions_key_value)->default_value(""),
"Input regions key-value file.")
"Input/Output regions key-value file.")
("streets_features",
po::value(&o.m_streets_features)->default_value(""),
"Input tmp.mwm file with streets.")
"Input/Output tmp.mwm file with streets.")
("streets_key_value",
po::value(&o.m_streets_key_value)->default_value(""),
"Output streets key-value file.")
("geo_objects_features",
po::value(&o.m_geo_objects_features)->default_value(""),
"Input tmp.mwm file with geo objects.")
"Input/Output tmp.mwm file with geo objects.")
("ids_without_addresses",
po::value(&o.m_ids_without_addresses)->default_value(""),
"Output file with objects ids without addresses.")
("geo_objects_index",
po::value(&o.m_geo_objects_index)->default_value(""),
"Input/Output geo objects index file.")
("geo_objects_key_value",
po::value(&o.m_geo_objects_key_value)->default_value(""),
"Output geo objects key-value file.")
"Input/Output geo objects key-value file.")
("allow_addressless_for_countries",
po::value(&o.m_allow_addressless_for_countries)->default_value("*"),
"Allow addressless buildings for only specified countries separated by commas.")
("regions_features",
po::value(&o.m_regions_features)->default_value(""),
"Input tmp.mwm file with regions.")
"Input/Output tmp.mwm file with regions.")
("generate_geocoder_token_index",
po::value(&o.m_generate_geocoder_token_index)->default_value(false),
"Generate geocoder token index.")
("geocoder_token_index",
po::value(&o.m_geocoder_token_index)->default_value(""),
"Geocoder token index file.")
("key_value",
po::value(&o.m_key_value)->default_value(""),
"Input key-value file (.jsonl or .jsonl.gz).")
@ -227,7 +231,8 @@ int GeneratorToolMain(int argc, char ** argv)
feature::GenerateInfo genInfo;
genInfo.m_verbose = options.m_verbose;
genInfo.m_dataPath = path;
genInfo.m_targetDir = genInfo.m_tmpDir = path;
genInfo.m_targetDir = path;
genInfo.m_tmpDir = path;
/// @todo Probably, it's better to add separate option for .mwm.tmp files.
if (!options.m_data_path.empty())
@ -242,7 +247,9 @@ int GeneratorToolMain(int argc, char ** argv)
genInfo.SetOsmFileType(options.m_osm_file_type);
genInfo.m_osmFileName = options.m_osm_file_name;
genInfo.m_fileName = options.m_output;
auto const regionsInfoPath =
genInfo.GetTmpFileName("region", regions::CollectorRegionInfo::kDefaultExt);
// Use merged style.
GetStyleReader().SetCurrentStyle(MapStyleMerged);
@ -265,21 +272,16 @@ int GeneratorToolMain(int argc, char ** argv)
{
RawGenerator rawGenerator(genInfo, threadsCount);
if (options.m_generate_region_features)
rawGenerator.GenerateRegionFeatures(options.m_output);
rawGenerator.GenerateRegionFeatures(options.m_regions_features, regionsInfoPath);
if (options.m_generate_streets_features)
rawGenerator.GenerateStreetsFeatures(options.m_output);
rawGenerator.GenerateStreetsFeatures(options.m_streets_features);
if (options.m_generate_geo_objects_features)
rawGenerator.GenerateGeoObjectsFeatures(options.m_output);
rawGenerator.GenerateGeoObjectsFeatures(options.m_geo_objects_features);
if (!rawGenerator.Execute())
return EXIT_FAILURE;
genInfo.m_bucketNames = rawGenerator.GetNames();
}
if (genInfo.m_bucketNames.empty() && !options.m_output.empty())
genInfo.m_bucketNames.push_back(options.m_output);
if (!options.m_streets_key_value.empty())
{
streets::GenerateStreets(options.m_regions_index, options.m_regions_key_value,
@ -296,71 +298,72 @@ int GeneratorToolMain(int argc, char ** argv)
return EXIT_FAILURE;
}
if (options.m_generate_geo_objects_index || options.m_generate_regions)
if (options.m_generate_geo_objects_index)
{
if (options.m_output.empty())
if (options.m_geo_objects_index.empty())
{
LOG(LCRITICAL, ("Bad output or intermediate_data_path. Output:", options.m_output));
LOG(LCRITICAL, ("Unspecified geo_objects_index file."));
return EXIT_FAILURE;
}
auto const locDataFile = base::JoinPath(path, options.m_output + LOC_DATA_FILE_EXTENSION);
auto const outFile = base::JoinPath(path, options.m_output + LOC_IDX_FILE_EXTENSION);
if (options.m_generate_geo_objects_index)
auto const locDataFile =
base::FilenameWithoutExt(options.m_geo_objects_index) + LOC_DATA_FILE_EXTENSION;
if (!feature::GenerateGeoObjectsData(options.m_geo_objects_features,
options.m_streets_features,
options.m_nodes_list_path, locDataFile))
{
if (!feature::GenerateGeoObjectsData(options.m_geo_objects_features,
options.m_streets_features,
options.m_nodes_list_path, locDataFile))
{
LOG(LCRITICAL, ("Error generating geo objects data."));
return EXIT_FAILURE;
}
LOG(LINFO, ("Saving geo objects index to", outFile));
if (!indexer::BuildGeoObjectsIndexFromDataFile(
locDataFile, outFile, DataVersion::LoadFromPath(path).GetVersionJson(),
DataVersion::kFileTag))
{
LOG(LCRITICAL, ("Error generating geo objects index."));
return EXIT_FAILURE;
}
LOG(LCRITICAL, ("Error generating geo objects data."));
return EXIT_FAILURE;
}
if (options.m_generate_regions)
LOG(LINFO, ("Saving geo objects index to", options.m_geo_objects_index));
if (!indexer::BuildGeoObjectsIndexFromDataFile(
locDataFile, options.m_geo_objects_index,
DataVersion::LoadFromPath(path).GetVersionJson(),
DataVersion::kFileTag))
{
if (!feature::GenerateRegionsData(options.m_regions_features, locDataFile))
{
LOG(LCRITICAL, ("Error generating regions data."));
return EXIT_FAILURE;
}
LOG(LCRITICAL, ("Error generating geo objects index."));
return EXIT_FAILURE;
}
}
LOG(LINFO, ("Saving regions index to", outFile));
if (options.m_generate_regions)
{
if (options.m_regions_index.empty())
{
LOG(LCRITICAL, ("Unspecified regions_index file."));
return EXIT_FAILURE;
}
if (!indexer::BuildRegionsIndexFromDataFile(locDataFile, outFile,
DataVersion::LoadFromPath(path).GetVersionJson(),
DataVersion::kFileTag))
{
LOG(LCRITICAL, ("Error generating regions index."));
return EXIT_FAILURE;
}
if (!feature::GenerateBorders(options.m_regions_features, outFile))
{
LOG(LCRITICAL, ("Error generating regions borders."));
return EXIT_FAILURE;
}
auto const locDataFile =
base::FilenameWithoutExt(options.m_regions_index) + LOC_DATA_FILE_EXTENSION;
if (!feature::GenerateRegionsData(options.m_regions_features, locDataFile))
{
LOG(LCRITICAL, ("Error generating regions data."));
return EXIT_FAILURE;
}
LOG(LINFO, ("Saving regions index to", options.m_regions_index));
if (!indexer::BuildRegionsIndexFromDataFile(locDataFile, options.m_regions_index,
DataVersion::LoadFromPath(path).GetVersionJson(),
DataVersion::kFileTag))
{
LOG(LCRITICAL, ("Error generating regions index."));
return EXIT_FAILURE;
}
if (!feature::GenerateBorders(options.m_regions_features, options.m_regions_index))
{
LOG(LCRITICAL, ("Error generating regions borders."));
return EXIT_FAILURE;
}
}
if (options.m_generate_regions_kv)
{
auto const pathInRegionsCollector =
genInfo.GetTmpFileName(genInfo.m_fileName, regions::CollectorRegionInfo::kDefaultExt);
auto const pathInRegionsTmpMwm = genInfo.GetTmpFileName(genInfo.m_fileName);
auto const pathOutRepackedRegionsTmpMwm =
genInfo.GetTmpFileName(genInfo.m_fileName + "_repacked");
auto const pathOutRegionsKv = genInfo.GetIntermediateFileName(genInfo.m_fileName, ".jsonl");
regions::GenerateRegions(pathInRegionsTmpMwm, pathInRegionsCollector, pathOutRegionsKv,
pathOutRepackedRegionsTmpMwm, options.m_verbose, threadsCount);
regions::GenerateRegions(options.m_regions_features, regionsInfoPath,
options.m_regions_key_value, options.m_verbose, threadsCount);
}
if (options.m_generate_geocoder_token_index)
@ -370,12 +373,15 @@ int GeneratorToolMain(int argc, char ** argv)
LOG(LCRITICAL, ("Unspecified key-value file."));
return EXIT_FAILURE;
}
if (options.m_geocoder_token_index.empty())
{
LOG(LCRITICAL, ("Unspecified geocoder_token_index file."));
return EXIT_FAILURE;
}
geocoder::Geocoder geocoder;
geocoder.LoadFromJsonl(options.m_key_value, threadsCount);
auto const tokenIndexFile = base::JoinPath(path, options.m_output);
geocoder.SaveToBinaryIndex(tokenIndexFile);
geocoder.SaveToBinaryIndex(options.m_geocoder_token_index);
}
return 0;

View file

@ -13,8 +13,10 @@
#include "geometry/convex_hull.hpp"
#include "platform/platform.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
#include "base/string_utils.hpp"
#include "base/timer.hpp"

View file

@ -36,10 +36,12 @@ std::shared_ptr<FeatureProcessorQueue> RawGenerator::GetQueue()
return m_queue;
}
void RawGenerator::GenerateRegionFeatures(string const & filename)
void RawGenerator::GenerateRegionFeatures(
string const & regionsFeaturesPath, std::string const & regionsInfoPath)
{
auto processor = CreateProcessor(ProcessorType::Simple, m_queue, filename);
m_translators->Append(CreateTranslator(TranslatorType::Regions, processor, m_cache, m_genInfo));
auto processor = CreateProcessor(ProcessorType::Simple, m_queue, regionsFeaturesPath);
m_translators->Append(
CreateTranslator(TranslatorType::Regions, processor, m_cache, regionsInfoPath));
}
void RawGenerator::GenerateStreetsFeatures(string const & filename)
@ -112,7 +114,7 @@ bool RawGenerator::GenerateFilteredFeatures()
CHECK(sourceProcessor, ());
TranslatorsPool translators(m_translators, m_threadsCount);
RawGeneratorWriter rawGeneratorWriter(m_queue, m_genInfo.m_tmpDir);
RawGeneratorWriter rawGeneratorWriter(m_queue);
rawGeneratorWriter.Run();
size_t element_pos = 0;

View file

@ -20,7 +20,8 @@ public:
explicit RawGenerator(feature::GenerateInfo & genInfo, size_t threadsCount = 1,
size_t chunkSize = 1024);
void GenerateRegionFeatures(std::string const & filename);
void GenerateRegionFeatures(
std::string const & regionsFeaturesPath, std::string const & regionsInfoPath);
void GenerateStreetsFeatures(std::string const & filename);
void GenerateGeoObjectsFeatures(std::string const & filename);
void GenerateCustom(std::shared_ptr<TranslatorInterface> const & translator);

View file

@ -8,9 +8,9 @@
namespace generator
{
RawGeneratorWriter::RawGeneratorWriter(std::shared_ptr<FeatureProcessorQueue> const & queue,
std::string const & path)
: m_queue(queue), m_path(path) {}
RawGeneratorWriter::RawGeneratorWriter(std::shared_ptr<FeatureProcessorQueue> const & queue)
: m_queue(queue)
{ }
RawGeneratorWriter::~RawGeneratorWriter()
@ -59,8 +59,7 @@ void RawGeneratorWriter::Write(std::vector<ProcessedData> const & vecChunks)
auto writerIt = m_writers.find(affiliation);
if (writerIt == std::cend(m_writers))
{
auto path = base::JoinPath(m_path, affiliation + DATA_FILE_EXTENSION_TMP);
auto writer = std::make_unique<FileWriter>(std::move(path));
auto writer = std::make_unique<FileWriter>(std::move(affiliation));
writerIt = m_writers.emplace(affiliation, std::move(writer)).first;
}

View file

@ -14,8 +14,7 @@ namespace generator
class RawGeneratorWriter
{
public:
RawGeneratorWriter(std::shared_ptr<FeatureProcessorQueue> const & queue,
std::string const & path);
RawGeneratorWriter(std::shared_ptr<FeatureProcessorQueue> const & queue);
~RawGeneratorWriter();
void Run();
@ -29,7 +28,6 @@ private:
std::thread m_thread;
std::shared_ptr<FeatureProcessorQueue> m_queue;
std::string m_path;
std::unordered_map<std::string, std::unique_ptr<FileWriter>> m_writers;
};
} // namespace generator

View file

@ -9,9 +9,11 @@
#include "geometry/mercator.hpp"
#include "platform/platform.hpp"
#include "base/assert.hpp"
#include "base/logging.hpp"
#include "base/scope_guard.hpp"
#include "base/timer.hpp"
#include <algorithm>
@ -35,24 +37,23 @@ namespace
class RegionsGenerator
{
public:
RegionsGenerator(std::string const & pathInRegionsTmpMwm,
std::string const & pathInRegionsCollector, std::string const & pathOutRegionsKv,
std::string const & pathOutRepackedRegionsTmpMwm, bool verbose,
size_t threadsCount)
: m_pathInRegionsTmpMwm{pathInRegionsTmpMwm}
RegionsGenerator(std::string const & pathRegionsTmpMwm,
std::string const & pathInRegionsCollector,
std::string const & pathOutRegionsKv,
bool verbose, size_t threadsCount)
: m_pathRegionsTmpMwm{pathRegionsTmpMwm}
, m_pathOutRegionsKv{pathOutRegionsKv}
, m_pathOutRepackedRegionsTmpMwm{pathOutRepackedRegionsTmpMwm}
, m_verbose{verbose}
, m_regionsInfoCollector{pathInRegionsCollector}
, m_regionsKv{pathOutRegionsKv, std::ofstream::out}
{
LOG(LINFO, ("Start generating regions from", m_pathInRegionsTmpMwm));
LOG(LINFO, ("Start generating regions from", m_pathRegionsTmpMwm));
auto timer = base::Timer{};
RegionsBuilder::Regions regions;
PlacePointsMap placePointsMap;
std::tie(regions, placePointsMap) =
ReadDatasetFromTmpMwm(m_pathInRegionsTmpMwm, m_regionsInfoCollector);
ReadDatasetFromTmpMwm(m_pathRegionsTmpMwm, m_regionsInfoCollector);
RegionsBuilder builder{std::move(regions), std::move(placePointsMap), threadsCount};
GenerateRegions(builder);
@ -252,7 +253,9 @@ private:
void RepackTmpMwm()
{
feature::FeaturesCollector featuresCollector{m_pathOutRepackedRegionsTmpMwm};
auto const repackedTmpMwm = GetPlatform().TmpPathForFile();
SCOPE_GUARD(removeRepackedTmpMwm, std::bind(Platform::RemoveFileIfExists, repackedTmpMwm));
feature::FeaturesCollector featuresCollector{repackedTmpMwm};
std::set<base::GeoObjectId> processedObjects;
auto const toDo = [&](FeatureBuilder & fb, uint64_t /* currPos */) {
auto const id = fb.GetMostGenericOsmId();
@ -272,9 +275,10 @@ private:
}
};
LOG(LINFO, ("Start regions repacking from", m_pathInRegionsTmpMwm));
feature::ForEachFromDatRawFormat(m_pathInRegionsTmpMwm, toDo);
LOG(LINFO, ("Repacked regions temporary mwm saved to", m_pathOutRepackedRegionsTmpMwm));
LOG(LINFO, ("Start regions repacking for", m_pathRegionsTmpMwm));
feature::ForEachFromDatRawFormat(m_pathRegionsTmpMwm, toDo);
CHECK(base::RenameFileX(repackedTmpMwm, m_pathRegionsTmpMwm), ());
LOG(LINFO, ("Repacked regions temporary mwm saved to", m_pathRegionsTmpMwm));
}
void ResetGeometry(FeatureBuilder & fb, Region const & region)
@ -304,9 +308,8 @@ private:
return seq;
}
std::string m_pathInRegionsTmpMwm;
std::string m_pathRegionsTmpMwm;
std::string m_pathOutRegionsKv;
std::string m_pathOutRepackedRegionsTmpMwm;
bool m_verbose{false};
@ -319,14 +322,13 @@ private:
};
} // namespace
void GenerateRegions(std::string const & pathInRegionsTmpMwm,
void GenerateRegions(std::string const & pathRegionsTmpMwm,
std::string const & pathInRegionsCollector,
std::string const & pathOutRegionsKv,
std::string const & pathOutRepackedRegionsTmpMwm, bool verbose,
size_t threadsCount)
bool verbose, size_t threadsCount)
{
RegionsGenerator(pathInRegionsTmpMwm, pathInRegionsCollector, pathOutRegionsKv,
pathOutRepackedRegionsTmpMwm, verbose, threadsCount);
RegionsGenerator(pathRegionsTmpMwm, pathInRegionsCollector, pathOutRegionsKv,
verbose, threadsCount);
}
} // namespace regions
} // namespace generator

View file

@ -6,10 +6,9 @@ namespace generator
{
namespace regions
{
void GenerateRegions(std::string const & pathInRegionsTmpMwm,
void GenerateRegions(std::string const & pathRegionsTmpMwm,
std::string const & pathInRegionsCollector,
std::string const & pathOutRegionsKv,
std::string const & pathOutRepackedRegionsTmpMwm,
bool verbose,
size_t threadsCount = 1);
} // namespace regions

View file

@ -53,14 +53,12 @@ public:
TranslatorRegion::TranslatorRegion(std::shared_ptr<FeatureProcessorInterface> const & processor,
std::shared_ptr<cache::IntermediateData> const & cache,
feature::GenerateInfo const & info)
std::string const & regionsInfoPath)
: Translator(processor, cache, std::make_shared<FeatureMakerSimple>(cache))
{
SetFilter(std::make_shared<FilterRegions>());
auto filename = info.GetTmpFileName(info.m_fileName, regions::CollectorRegionInfo::kDefaultExt);
SetCollector(std::make_shared<regions::CollectorRegionInfo>(filename));
SetCollector(std::make_shared<regions::CollectorRegionInfo>(regionsInfoPath));
}
std::shared_ptr<TranslatorInterface>

View file

@ -23,7 +23,7 @@ class TranslatorRegion : public Translator
public:
explicit TranslatorRegion(std::shared_ptr<FeatureProcessorInterface> const & processor,
std::shared_ptr<cache::IntermediateData> const & cache,
feature::GenerateInfo const & info);
std::string const & regionsInfoPath);
// TranslatorInterface overrides:
std::shared_ptr<TranslatorInterface> Clone() const override;