[generator:regions] Optimize regions KV-file write

This commit is contained in:
Anatoly Serdtcev 2019-04-02 19:59:49 +03:00
parent 671e84fbe3
commit 61672cbf8f
9 changed files with 59 additions and 69 deletions

View file

@ -146,13 +146,13 @@ RegionsBuilder::Regions MakeTestDataSet1(RegionInfo & collector)
return regions;
}
class Helper : public ToStringPolicyInterface
class StringJoinPolicy : public ToStringPolicyInterface
{
public:
std::string ToString(Node::PtrList const & nodePtrList) const override
std::string ToString(NodePath const & nodePath) const override
{
std::stringstream stream;
for (auto const & n : nodePtrList)
for (auto const & n : nodePath)
stream << n->GetData().GetName();
return stream.str();
@ -196,20 +196,22 @@ UNIT_TEST(RegionsBuilderTest_GetCountryTrees)
auto const filename = MakeCollectorData();
RegionInfo collector(filename);
std::vector<std::string> bankOfNames;
RegionsBuilder builder(MakeTestDataSet1(collector), std::make_unique<Helper>());
RegionsBuilder builder(MakeTestDataSet1(collector));
builder.ForEachNormalizedCountry([&](std::string const & name, Node::Ptr const & tree) {
auto const idStringList = builder.ToIdStringList(tree);
for (auto const & idString : idStringList)
bankOfNames.push_back(idString.second);
Visit(tree, [&](Node::Ptr const & node) {
auto path = MakeNodePath(node);
StringJoinPolicy stringifier;
bankOfNames.push_back(stringifier.ToString(path));
});
});
TEST(ExistsName(bankOfNames, "Country_2"), ());
TEST(ExistsName(bankOfNames, "Country_2_Region_8Country_2"), ());
TEST(ExistsName(bankOfNames, "Country_2Country_2_Region_8"), ());
TEST(ExistsName(bankOfNames, "Country_1"), ());
TEST(ExistsName(bankOfNames, "Country_1_Region_3Country_1"), ());
TEST(ExistsName(bankOfNames, "Country_1_Region_4Country_1"), ());
TEST(ExistsName(bankOfNames, "Country_1_Region_5Country_1"), ());
TEST(ExistsName(bankOfNames, "Country_1_Region_5_Subregion_6Country_1_Region_5Country_1"), ());
TEST(ExistsName(bankOfNames, "Country_1_Region_5_Subregion_7Country_1_Region_5Country_1"), ());
TEST(ExistsName(bankOfNames, "Country_1Country_1_Region_3"), ());
TEST(ExistsName(bankOfNames, "Country_1Country_1_Region_4"), ());
TEST(ExistsName(bankOfNames, "Country_1Country_1_Region_5"), ());
TEST(ExistsName(bankOfNames, "Country_1Country_1_Region_5Country_1_Region_5_Subregion_6"), ());
TEST(ExistsName(bankOfNames, "Country_1Country_1_Region_5Country_1_Region_5_Subregion_7"), ());
}

View file

@ -43,4 +43,12 @@ private:
PtrList m_children;
WeakPtr m_parent;
};
template <typename Data, typename Visitor>
void Visit(std::shared_ptr<PlaceNode<Data>> const & tree, Visitor && visitor)
{
std::forward<Visitor>(visitor)(tree);
for (auto const & subtree : tree->GetChildren())
Visit(subtree, std::forward<Visitor>(visitor));
}
} // namespace generator

View file

@ -96,6 +96,21 @@ size_t MaxDepth(Node::Ptr node)
return depth;
}
NodePath MakeNodePath(Node::Ptr const & node)
{
NodePath path;
auto current = node;
while (current)
{
path.push_back(current);
current = current->GetParent();
}
std::reverse(path.begin(), path.end());
return path;
}
void PrintTree(Node::Ptr node, std::ostream & stream = std::cout, std::string prefix = "",
bool isTail = true)
{

View file

@ -10,11 +10,14 @@ namespace generator
namespace regions
{
using Node = PlaceNode<Region>;
using NodePath = std::vector<Node::Ptr>;
size_t TreeSize(Node::Ptr node);
size_t MaxDepth(Node::Ptr node);
NodePath MakeNodePath(Node::Ptr const & node);
void DebugPrintTree(Node::Ptr const & tree, std::ostream & stream = std::cout);
// This function merges two trees if the roots have the same ids.

View file

@ -57,8 +57,7 @@ public:
Transliteration::Instance().Init(GetPlatform().ResourcesDir());
RegionsBuilder::Regions regions = ReadAndFixData();
auto jsonPolicy = std::make_unique<JsonPolicy>(m_verbose);
RegionsBuilder builder{std::move(regions), std::move(jsonPolicy), threadsCount};
RegionsBuilder builder{std::move(regions), threadsCount};
GenerateRegions(builder);
LOG(LINFO, ("Finish generating regions.", timer.ElapsedSeconds(), "seconds."));
@ -78,14 +77,16 @@ private:
DebugPrintTree(tree);
LOG(LINFO, ("Processing country", name));
auto const idStringList = builder.ToIdStringList(tree);
for (auto const & s : idStringList)
{
regionsKv << static_cast<int64_t>(s.first.GetEncodedId()) << " " << s.second << "\n";
auto jsonPolicy = JsonPolicy{m_verbose};
Visit(tree, [&](auto && node) {
auto id = node->GetData().GetId();
auto path = MakeNodePath(node);
regionsKv << static_cast<int64_t>(id.GetEncodedId()) << " " << jsonPolicy.ToString(path) << "\n";
++countIds;
if (!setIds.insert(s.first).second)
LOG(LWARNING, ("Id alredy exists:", s.first));
}
if (!setIds.insert(id).second)
LOG(LWARNING, ("Id alredy exists:", id));
});
});
LOG(LINFO, ("Regions objects key-value for", builder.GetCountryNames().size(),

View file

@ -30,14 +30,10 @@ Node::Ptr ShrinkToFit(Node::Ptr p)
}
} // namespace
RegionsBuilder::RegionsBuilder(Regions && regions,
std::unique_ptr<ToStringPolicyInterface> toStringPolicy,
size_t threadsCount)
: m_toStringPolicy(std::move(toStringPolicy))
, m_regions(std::move(regions))
RegionsBuilder::RegionsBuilder(Regions && regions, size_t threadsCount)
: m_regions(std::move(regions))
, m_threadsCount(threadsCount)
{
ASSERT(m_toStringPolicy, ());
ASSERT(m_threadsCount != 0, ());
auto const isCountry = [](Region const & r) { return r.IsCountry(); };
@ -47,9 +43,6 @@ RegionsBuilder::RegionsBuilder(Regions && regions,
std::sort(std::begin(m_countries), std::end(m_countries), cmp);
}
RegionsBuilder::RegionsBuilder(Regions && regions, size_t threadsCount)
: RegionsBuilder(std::move(regions), std::make_unique<JsonPolicy>(), threadsCount) {}
RegionsBuilder::Regions const & RegionsBuilder::GetCountries() const
{
return m_countries;
@ -69,33 +62,6 @@ RegionsBuilder::StringsList RegionsBuilder::GetCountryNames() const
return result;
}
RegionsBuilder::IdStringList RegionsBuilder::ToIdStringList(Node::Ptr const & tree) const
{
IdStringList result;
std::queue<Node::Ptr> queue;
queue.push(tree);
while (!queue.empty())
{
const auto el = queue.front();
queue.pop();
Node::PtrList nodes;
auto current = el;
while (current)
{
nodes.push_back(current);
current = current->GetParent();
}
auto string = m_toStringPolicy->ToString(nodes);
auto const id = nodes.front()->GetData().GetId();
result.emplace_back(std::make_pair(id, std::move(string)));
for (auto const & n : el->GetChildren())
queue.push(n);
}
return result;
}
Node::PtrList RegionsBuilder::MakeSelectedRegionsByCountry(Region const & country,
Regions const & allRegions)
{

View file

@ -25,22 +25,17 @@ public:
using CountryTrees = std::multimap<std::string, Node::Ptr>;
using NormalizedCountryFn = std::function<void(std::string const &, Node::Ptr const &)>;
explicit RegionsBuilder(Regions && regions,
std::unique_ptr<ToStringPolicyInterface> toStringPolicy,
size_t threadsCount = 1);
explicit RegionsBuilder(Regions && regions, size_t threadsCount = 1);
Regions const & GetCountries() const;
StringsList GetCountryNames() const;
void ForEachNormalizedCountry(NormalizedCountryFn fn);
IdStringList ToIdStringList(Node::Ptr const & tree) const;
private:
static Node::PtrList MakeSelectedRegionsByCountry(Region const & country,
Regions const & allRegions);
static Node::Ptr BuildCountryRegionTree(Region const & country, Regions const & allRegions);
std::vector<Node::Ptr> BuildCountryRegionTrees(RegionsBuilder::Regions const & countries);
std::unique_ptr<ToStringPolicyInterface> m_toStringPolicy;
Regions m_countries;
Regions m_regions;
size_t m_threadsCount;

View file

@ -14,10 +14,10 @@ namespace generator
{
namespace regions
{
std::string JsonPolicy::ToString(Node::PtrList const & nodePtrList) const
std::string JsonPolicy::ToString(NodePath const & path) const
{
auto const & main = nodePtrList.front()->GetData();
auto const & country = nodePtrList.back()->GetData();
auto const & country = path.front()->GetData();
auto const & main = path.back()->GetData();
auto geometry = base::NewJSONObject();
ToJSONObject(*geometry, "type", "Point");
@ -32,7 +32,7 @@ std::string JsonPolicy::ToString(Node::PtrList const & nodePtrList) const
auto address = base::NewJSONObject();
auto const mainLabel = main.GetLabel();
boost::optional<int64_t> pid;
for (auto const & p : boost::adaptors::reverse(nodePtrList))
for (auto const & p : path)
{
auto const & region = p->GetData();

View file

@ -13,7 +13,7 @@ class ToStringPolicyInterface
public:
virtual ~ToStringPolicyInterface() = default;
virtual std::string ToString(Node::PtrList const & nodePtrList) const = 0;
virtual std::string ToString(NodePath const & path) const = 0;
};
class JsonPolicy : public ToStringPolicyInterface
@ -21,7 +21,7 @@ class JsonPolicy : public ToStringPolicyInterface
public:
JsonPolicy(bool extendedOutput = false) : m_extendedOutput(extendedOutput) {}
std::string ToString(Node::PtrList const & nodePtrList) const override;
std::string ToString(NodePath const & path) const override;
private:
bool m_extendedOutput;