From e98b6dc0d7f2327270b4f7fb00432f1ddd83f859 Mon Sep 17 00:00:00 2001 From: vng Date: Sun, 9 Jan 2011 03:47:48 +0200 Subject: [PATCH] Feature statistics calculation. --- indexer/classificator.hpp | 1 + indexer/feature.cpp | 25 ++++- indexer/feature.hpp | 40 +++++++- indexer/feature_visibility.cpp | 22 ++++- indexer/indexer_tool/indexer_tool.cpp | 14 ++- indexer/indexer_tool/indexer_tool.pro | 2 + indexer/indexer_tool/statistics.cpp | 133 ++++++++++++++++++++++++++ indexer/indexer_tool/statistics.hpp | 69 +++++++++++++ tools/win/calc_statistics.bat | 3 + 9 files changed, 301 insertions(+), 8 deletions(-) create mode 100644 indexer/indexer_tool/statistics.cpp create mode 100644 indexer/indexer_tool/statistics.hpp create mode 100644 tools/win/calc_statistics.bat diff --git a/indexer/classificator.hpp b/indexer/classificator.hpp index 8f1dc59c2b..6e43ce5be0 100644 --- a/indexer/classificator.hpp +++ b/indexer/classificator.hpp @@ -237,6 +237,7 @@ public: ProcessObjects(uint32_t type, ToDo & toDo) const; ClassifObject const * GetObject(uint32_t type) const; + string GetFullObjectName(uint32_t type) const; //@} }; diff --git a/indexer/feature.cpp b/indexer/feature.cpp index 36fe7ef4e1..8d0ebc94a5 100644 --- a/indexer/feature.cpp +++ b/indexer/feature.cpp @@ -377,7 +377,7 @@ void FeatureBase::ParseTypes() const { ASSERT(!m_bTypesParsed, ()); - ArrayByteSource source(DataPtr() + 1); + ArrayByteSource source(DataPtr() + m_TypesOffset); for (size_t i = 0; i < GetTypesCount(); ++i) m_Types[i] = ReadVarUint(source); @@ -557,11 +557,12 @@ m2::RectD FeatureType::GetLimitRect(int scale) const return m_LimitRect; } -void FeatureType::ParseGeometry(int scale) const +uint32_t FeatureType::ParseGeometry(int scale) const { if (!m_bOffsetsParsed) ParseOffsets(); + uint32_t sz = 0; if (Header() & HEADER_IS_LINE) { uint32_t const offset = GetOffset(scale, m_lineOffsets); @@ -573,17 +574,20 @@ void FeatureType::ParseGeometry(int scale) const feature::LoadPoints(m_Geometry, src); CalcRect(m_Geometry, m_LimitRect); + sz = static_cast(src.Pos() - offset); } } m_bGeometryParsed = true; + return sz; } -void FeatureType::ParseTriangles(int scale) const +uint32_t FeatureType::ParseTriangles(int scale) const { if (!m_bOffsetsParsed) ParseOffsets(); + uint32_t sz = 0; if (Header() & HEADER_IS_AREA) { uint32_t const offset = GetOffset(scale, m_trgOffsets); @@ -595,10 +599,12 @@ void FeatureType::ParseTriangles(int scale) const feature::LoadTriangles(m_Triangles, src); CalcRect(m_Triangles, m_LimitRect); + sz = static_cast(src.Pos() - offset); } } m_bTrianglesParsed = true; + return sz; } void FeatureType::ReadOffsetsImpl(ArrayByteSource & src, offsets_t & offsets) @@ -633,6 +639,7 @@ void FeatureType::ParseOffsets() const ReadOffsetsImpl(src, m_trgOffsets); m_bOffsetsParsed = true; + m_Size = CalcOffset(src); } void FeatureType::ParseAll(int scale) const @@ -643,3 +650,15 @@ void FeatureType::ParseAll(int scale) const if (!m_bTrianglesParsed) ParseTriangles(scale); } + +FeatureType::geom_stat_t FeatureType::GetGeometrySize(int scale) const +{ + uint32_t sz = ParseGeometry(scale); + return geom_stat_t(sz, m_Geometry.size()); +} + +FeatureType::geom_stat_t FeatureType::GetTrianglesSize(int scale) const +{ + uint32_t sz = ParseTriangles(scale); + return geom_stat_t(sz, m_Triangles.size()); +} diff --git a/indexer/feature.hpp b/indexer/feature.hpp index 2204a247d1..630f9e0da2 100644 --- a/indexer/feature.hpp +++ b/indexer/feature.hpp @@ -254,6 +254,12 @@ public: void InitFeatureBuilder(FeatureBuilder1 & fb) const; + /// @name Statistic functions. + //@{ + uint32_t GetNameSize() const { return m_CenterOffset - m_NameOffset; } + uint32_t GetTypesSize() const { return m_LayerOffset - m_TypesOffset; } + //@} + protected: void Deserialize(buffer_t & data, uint32_t offset = 0); string DebugString() const; @@ -278,6 +284,7 @@ protected: mutable m2::RectD m_LimitRect; + static uint32_t const m_TypesOffset = 1; mutable uint32_t m_LayerOffset; mutable uint32_t m_NameOffset; mutable uint32_t m_CenterOffset; @@ -380,10 +387,37 @@ public: /// For test cases only. string DebugString(int scale) const; + /// @name Statistic functions. + //@{ + void ParseBeforeStatistic() const + { + if (!m_bOffsetsParsed) + ParseOffsets(); + } + + uint32_t GetOffsetSize() const { return m_Size - m_GeometryOffset; } + uint32_t GetAllSize() const { return m_Size; } + + struct geom_stat_t + { + uint32_t m_size, m_count; + + geom_stat_t(uint32_t sz, size_t count) + : m_size(sz), m_count(static_cast(count)) + { + } + + geom_stat_t() : m_count(0), m_size(0) {} + }; + + geom_stat_t GetGeometrySize(int scale) const; + geom_stat_t GetTrianglesSize(int scale) const; + //@} + private: void ParseOffsets() const; - void ParseGeometry(int scale) const; - void ParseTriangles(int scale) const; + uint32_t ParseGeometry(int scale) const; + uint32_t ParseTriangles(int scale) const; void ParseAll(int scale) const; @@ -394,6 +428,8 @@ private: mutable bool m_bOffsetsParsed; + mutable uint32_t m_Size; + typedef array offsets_t; // should be synhronized with ARRAY_SIZE(g_arrScales) static void ReadOffsetsImpl(ArrayByteSource & src, offsets_t & offsets); diff --git a/indexer/feature_visibility.cpp b/indexer/feature_visibility.cpp index 42a69629c1..eb82ddd846 100644 --- a/indexer/feature_visibility.cpp +++ b/indexer/feature_visibility.cpp @@ -75,6 +75,24 @@ ClassifObject const * Classificator::GetObject(uint32_t type) const return p; } +string Classificator::GetFullObjectName(uint32_t type) const +{ + ClassifObject const * p = &m_root; + uint8_t i = 0; + string s; + + // get the final ClassifObject + uint8_t v; + while (ftype::GetValue(type, i, v)) + { + ++i; + p = p->GetObject(v); + s = s + p->GetName() + '-'; + } + + return s; +} + namespace feature { @@ -123,7 +141,7 @@ int GetDrawRule(FeatureBase const & f, int level, vector & keys, str Classificator const & c = classif(); get_draw_rule doRules(level, static_cast(geoType), keys, names); - for (size_t i = 0; i < types.m_size; ++i) + for (int i = 0; i < types.m_size; ++i) (void)c.ProcessObjects(types.m_types[i], doRules); return geoType; @@ -203,7 +221,7 @@ bool IsDrawableForIndex(FeatureBase const & f, int level) Classificator const & c = classif(); check_is_drawable doCheck(level); - for (size_t i = 0; i < types.m_size; ++i) + for (int i = 0; i < types.m_size; ++i) if (c.ProcessObjects(types.m_types[i], doCheck)) return true; diff --git a/indexer/indexer_tool/indexer_tool.cpp b/indexer/indexer_tool/indexer_tool.cpp index 1720eae2b0..49d7375c6f 100644 --- a/indexer/indexer_tool/indexer_tool.cpp +++ b/indexer/indexer_tool/indexer_tool.cpp @@ -4,6 +4,7 @@ #include "update_generator.hpp" #include "feature_bucketer.hpp" #include "grid_generator.hpp" +#include "statistics.hpp" #include "../classif_routine.hpp" #include "../features_vector.hpp" @@ -37,6 +38,7 @@ DEFINE_bool(generate_features, false, "2nd pass - generate intermediate features DEFINE_bool(generate_geometry, false, "3rd pass - split and simplify geometry and triangles for features"); DEFINE_bool(generate_index, false, "4rd pass - generate index"); DEFINE_bool(generate_grid, false, "Generate grid for given bucketing_level"); +DEFINE_bool(calc_statistics, false, "Calculate feature statistics for specified mwm bucket files"); DEFINE_bool(use_light_nodes, false, "If true, use temporary vector of nodes, instead of huge temp file"); DEFINE_string(data_path, "", "Working directory, 'path_to_exe/../../data' if empty."); @@ -100,7 +102,8 @@ int main(int argc, char ** argv) genInfo.dir = FLAGS_intermediate_data_path; // load classificator only if necessary - if (FLAGS_generate_features || FLAGS_generate_geometry || FLAGS_generate_index) + if (FLAGS_generate_features || FLAGS_generate_geometry || + FLAGS_generate_index || FLAGS_calc_statistics) { classificator::Read(path + "drawing_rules.bin", path + "classificator.txt", @@ -159,6 +162,15 @@ int main(int argc, char ** argv) LOG(LCRITICAL, ("Error generating index.")); } } + + if (FLAGS_calc_statistics) + { + LOG(LINFO, ("Calculating statistics for ", datFile)); + + stats::MapInfo info; + stats::CalcStatistic(datFile, info); + stats::PrintStatistic(info); + } } // Create http update list for countries and corresponding files diff --git a/indexer/indexer_tool/indexer_tool.pro b/indexer/indexer_tool/indexer_tool.pro index 4161af3020..82ddbc5ff1 100644 --- a/indexer/indexer_tool/indexer_tool.pro +++ b/indexer/indexer_tool/indexer_tool.pro @@ -24,6 +24,7 @@ SOURCES += \ tesselator.cpp \ update_generator.cpp \ grid_generator.cpp \ + statistics.cpp \ HEADERS += \ osm_element.hpp \ @@ -35,3 +36,4 @@ HEADERS += \ update_generator.hpp \ feature_bucketer.hpp \ grid_generator.hpp \ + statistics.hpp \ diff --git a/indexer/indexer_tool/statistics.cpp b/indexer/indexer_tool/statistics.cpp new file mode 100644 index 0000000000..79166d67b4 --- /dev/null +++ b/indexer/indexer_tool/statistics.cpp @@ -0,0 +1,133 @@ +#include "../../base/SRC_FIRST.hpp" + +#include "statistics.hpp" + +#include "../feature_processor.hpp" +#include "../classificator.hpp" + +#include "../../base/string_utils.hpp" + +#include "../../std/iostream.hpp" + +#include "../../base/start_mem_debug.hpp" + + +namespace stats +{ + class AccumulateStatistic + { + MapInfo & m_info; + + class ProcessType + { + MapInfo & m_info; + uint32_t m_size; + + public: + ProcessType(MapInfo & info, uint32_t sz) : m_info(info), m_size(sz) {} + void operator() (uint32_t type) + { + m_info.AddToSet(TypeTag(type), m_size, m_info.m_byClassifType); + } + }; + + public: + AccumulateStatistic(MapInfo & info) : m_info(info) {} + + void operator() (FeatureType const & f, uint32_t) + { + f.ParseBeforeStatistic(); + + uint32_t const sz = f.GetAllSize(); + m_info.m_all.Add(sz); + m_info.m_names.Add(f.GetNameSize()); + m_info.m_types.Add(f.GetTypesSize()); + + int const level = 17; + + FeatureType::geom_stat_t geom = f.GetGeometrySize(level); + m_info.AddToSet(geom.m_count, geom.m_size, m_info.m_byPointsCount); + m_info.AddToSet(f.GetFeatureType(), sz, m_info.m_byGeomType); + + ProcessType doProcess(m_info, sz); + f.ForEachTypeRef(doProcess); + } + }; + + void CalcStatistic(string const & fName, MapInfo & info) + { + AccumulateStatistic doProcess(info); + feature::ForEachFromDat(fName, doProcess); + } + + void PrintInfo(char const * prefix, GeneralInfo const & info) + { + cout << prefix << ": size = " << info.m_size << "; count = " << info.m_count << endl; + } + + string GetKey(FeatureBase::FeatureType type) + { + switch (type) + { + case FeatureBase::FEATURE_TYPE_LINE: return "Line"; + case FeatureBase::FEATURE_TYPE_AREA: return "Area"; + default: return "Point"; + } + } + + string GetKey(uint32_t i) + { + return utils::to_string(i); + } + + string GetKey(TypeTag t) + { + return classif().GetFullObjectName(t.m_val); + } + + template + void PrintTop(char const * prefix, TSet const & theSet) + { + cout << prefix << endl; + + vector vec(theSet.begin(), theSet.end()); + + sort(vec.begin(), vec.end(), TSortCr()); + + size_t const count = min(static_cast(10), vec.size()); + for (size_t i = 0; i < count; ++i) + { + cout << i << ". "; + PrintInfo(GetKey(vec[i].m_key).c_str(), vec[i].m_info); + } + } + + struct greater_size + { + template + bool operator() (TInfo const & r1, TInfo const & r2) const + { + return r1.m_info.m_size > r2.m_info.m_size; + } + }; + + struct greater_count + { + template + bool operator() (TInfo const & r1, TInfo const & r2) const + { + return r1.m_info.m_count > r2.m_info.m_count; + } + }; + + void PrintStatistic(MapInfo & info) + { + PrintInfo("ALL", info.m_all); + PrintInfo("NAMES", info.m_names); + PrintInfo("TYPES", info.m_types); + + PrintTop("Top SIZE by Geometry Type", info.m_byGeomType); + PrintTop("Top SIZE by Classificator Type", info.m_byClassifType); + PrintTop("Top SIZE by Points Count", info.m_byPointsCount); + } +} diff --git a/indexer/indexer_tool/statistics.hpp b/indexer/indexer_tool/statistics.hpp new file mode 100644 index 0000000000..e9d2ea2ae3 --- /dev/null +++ b/indexer/indexer_tool/statistics.hpp @@ -0,0 +1,69 @@ +#pragma once + +#include "../feature.hpp" + +#include "../../std/map.hpp" + + +namespace stats +{ + struct GeneralInfo + { + uint64_t m_count, m_size; + + GeneralInfo() : m_count(0), m_size(0) {} + + void Add(uint64_t sz) + { + if (sz > 0) + { + ++m_count; + m_size += sz; + } + } + }; + + template + struct GeneralInfoKey + { + TKey m_key; + GeneralInfo m_info; + + GeneralInfoKey(TKey key) : m_key(key) {} + + bool operator< (GeneralInfoKey const & rhs) const + { + return m_key < rhs.m_key; + } + }; + + struct TypeTag + { + uint32_t m_val; + + TypeTag(uint32_t v) : m_val(v) {} + + bool operator< (TypeTag const & rhs) const + { + return m_val < rhs.m_val; + } + }; + + struct MapInfo + { + set > m_byGeomType; + set > m_byClassifType; + set > m_byPointsCount; + + GeneralInfo m_all, m_names, m_types; + + template + void AddToSet(TKey key, uint32_t sz, TSet & theSet) + { + theSet.insert(GeneralInfoKey(key)).first->m_info.Add(sz); + } + }; + + void CalcStatistic(string const & fName, MapInfo & info); + void PrintStatistic(MapInfo & info); +} diff --git a/tools/win/calc_statistics.bat b/tools/win/calc_statistics.bat new file mode 100644 index 0000000000..1a2ecc6d42 --- /dev/null +++ b/tools/win/calc_statistics.bat @@ -0,0 +1,3 @@ +call set_vars.bat %1 %2 + +%INDEXER_TOOL% --calc_statistics=true --output=%2 --bucketing_level=0