Feature statistics calculation.

This commit is contained in:
vng 2011-01-09 03:47:48 +02:00 committed by Alex Zolotarev
parent 3ef586653d
commit e98b6dc0d7
9 changed files with 301 additions and 8 deletions

View file

@ -237,6 +237,7 @@ public:
ProcessObjects(uint32_t type, ToDo & toDo) const;
ClassifObject const * GetObject(uint32_t type) const;
string GetFullObjectName(uint32_t type) const;
//@}
};

View file

@ -377,7 +377,7 @@ void FeatureBase::ParseTypes() const
{
ASSERT(!m_bTypesParsed, ());
ArrayByteSource source(DataPtr() + 1);
ArrayByteSource source(DataPtr() + m_TypesOffset);
for (size_t i = 0; i < GetTypesCount(); ++i)
m_Types[i] = ReadVarUint<uint32_t>(source);
@ -557,11 +557,12 @@ m2::RectD FeatureType::GetLimitRect(int scale) const
return m_LimitRect;
}
void FeatureType::ParseGeometry(int scale) const
uint32_t FeatureType::ParseGeometry(int scale) const
{
if (!m_bOffsetsParsed)
ParseOffsets();
uint32_t sz = 0;
if (Header() & HEADER_IS_LINE)
{
uint32_t const offset = GetOffset(scale, m_lineOffsets);
@ -573,17 +574,20 @@ void FeatureType::ParseGeometry(int scale) const
feature::LoadPoints(m_Geometry, src);
CalcRect(m_Geometry, m_LimitRect);
sz = static_cast<uint32_t>(src.Pos() - offset);
}
}
m_bGeometryParsed = true;
return sz;
}
void FeatureType::ParseTriangles(int scale) const
uint32_t FeatureType::ParseTriangles(int scale) const
{
if (!m_bOffsetsParsed)
ParseOffsets();
uint32_t sz = 0;
if (Header() & HEADER_IS_AREA)
{
uint32_t const offset = GetOffset(scale, m_trgOffsets);
@ -595,10 +599,12 @@ void FeatureType::ParseTriangles(int scale) const
feature::LoadTriangles(m_Triangles, src);
CalcRect(m_Triangles, m_LimitRect);
sz = static_cast<uint32_t>(src.Pos() - offset);
}
}
m_bTrianglesParsed = true;
return sz;
}
void FeatureType::ReadOffsetsImpl(ArrayByteSource & src, offsets_t & offsets)
@ -633,6 +639,7 @@ void FeatureType::ParseOffsets() const
ReadOffsetsImpl(src, m_trgOffsets);
m_bOffsetsParsed = true;
m_Size = CalcOffset(src);
}
void FeatureType::ParseAll(int scale) const
@ -643,3 +650,15 @@ void FeatureType::ParseAll(int scale) const
if (!m_bTrianglesParsed)
ParseTriangles(scale);
}
FeatureType::geom_stat_t FeatureType::GetGeometrySize(int scale) const
{
uint32_t sz = ParseGeometry(scale);
return geom_stat_t(sz, m_Geometry.size());
}
FeatureType::geom_stat_t FeatureType::GetTrianglesSize(int scale) const
{
uint32_t sz = ParseTriangles(scale);
return geom_stat_t(sz, m_Triangles.size());
}

View file

@ -254,6 +254,12 @@ public:
void InitFeatureBuilder(FeatureBuilder1 & fb) const;
/// @name Statistic functions.
//@{
uint32_t GetNameSize() const { return m_CenterOffset - m_NameOffset; }
uint32_t GetTypesSize() const { return m_LayerOffset - m_TypesOffset; }
//@}
protected:
void Deserialize(buffer_t & data, uint32_t offset = 0);
string DebugString() const;
@ -278,6 +284,7 @@ protected:
mutable m2::RectD m_LimitRect;
static uint32_t const m_TypesOffset = 1;
mutable uint32_t m_LayerOffset;
mutable uint32_t m_NameOffset;
mutable uint32_t m_CenterOffset;
@ -380,10 +387,37 @@ public:
/// For test cases only.
string DebugString(int scale) const;
/// @name Statistic functions.
//@{
void ParseBeforeStatistic() const
{
if (!m_bOffsetsParsed)
ParseOffsets();
}
uint32_t GetOffsetSize() const { return m_Size - m_GeometryOffset; }
uint32_t GetAllSize() const { return m_Size; }
struct geom_stat_t
{
uint32_t m_size, m_count;
geom_stat_t(uint32_t sz, size_t count)
: m_size(sz), m_count(static_cast<uint32_t>(count))
{
}
geom_stat_t() : m_count(0), m_size(0) {}
};
geom_stat_t GetGeometrySize(int scale) const;
geom_stat_t GetTrianglesSize(int scale) const;
//@}
private:
void ParseOffsets() const;
void ParseGeometry(int scale) const;
void ParseTriangles(int scale) const;
uint32_t ParseGeometry(int scale) const;
uint32_t ParseTriangles(int scale) const;
void ParseAll(int scale) const;
@ -394,6 +428,8 @@ private:
mutable bool m_bOffsetsParsed;
mutable uint32_t m_Size;
typedef array<uint32_t, 4> offsets_t; // should be synhronized with ARRAY_SIZE(g_arrScales)
static void ReadOffsetsImpl(ArrayByteSource & src, offsets_t & offsets);

View file

@ -75,6 +75,24 @@ ClassifObject const * Classificator::GetObject(uint32_t type) const
return p;
}
string Classificator::GetFullObjectName(uint32_t type) const
{
ClassifObject const * p = &m_root;
uint8_t i = 0;
string s;
// get the final ClassifObject
uint8_t v;
while (ftype::GetValue(type, i, v))
{
++i;
p = p->GetObject(v);
s = s + p->GetName() + '-';
}
return s;
}
namespace feature
{
@ -123,7 +141,7 @@ int GetDrawRule(FeatureBase const & f, int level, vector<drule::Key> & keys, str
Classificator const & c = classif();
get_draw_rule doRules(level, static_cast<feature_geo_t>(geoType), keys, names);
for (size_t i = 0; i < types.m_size; ++i)
for (int i = 0; i < types.m_size; ++i)
(void)c.ProcessObjects(types.m_types[i], doRules);
return geoType;
@ -203,7 +221,7 @@ bool IsDrawableForIndex(FeatureBase const & f, int level)
Classificator const & c = classif();
check_is_drawable doCheck(level);
for (size_t i = 0; i < types.m_size; ++i)
for (int i = 0; i < types.m_size; ++i)
if (c.ProcessObjects(types.m_types[i], doCheck))
return true;

View file

@ -4,6 +4,7 @@
#include "update_generator.hpp"
#include "feature_bucketer.hpp"
#include "grid_generator.hpp"
#include "statistics.hpp"
#include "../classif_routine.hpp"
#include "../features_vector.hpp"
@ -37,6 +38,7 @@ DEFINE_bool(generate_features, false, "2nd pass - generate intermediate features
DEFINE_bool(generate_geometry, false, "3rd pass - split and simplify geometry and triangles for features");
DEFINE_bool(generate_index, false, "4rd pass - generate index");
DEFINE_bool(generate_grid, false, "Generate grid for given bucketing_level");
DEFINE_bool(calc_statistics, false, "Calculate feature statistics for specified mwm bucket files");
DEFINE_bool(use_light_nodes, false,
"If true, use temporary vector of nodes, instead of huge temp file");
DEFINE_string(data_path, "", "Working directory, 'path_to_exe/../../data' if empty.");
@ -100,7 +102,8 @@ int main(int argc, char ** argv)
genInfo.dir = FLAGS_intermediate_data_path;
// load classificator only if necessary
if (FLAGS_generate_features || FLAGS_generate_geometry || FLAGS_generate_index)
if (FLAGS_generate_features || FLAGS_generate_geometry ||
FLAGS_generate_index || FLAGS_calc_statistics)
{
classificator::Read(path + "drawing_rules.bin",
path + "classificator.txt",
@ -159,6 +162,15 @@ int main(int argc, char ** argv)
LOG(LCRITICAL, ("Error generating index."));
}
}
if (FLAGS_calc_statistics)
{
LOG(LINFO, ("Calculating statistics for ", datFile));
stats::MapInfo info;
stats::CalcStatistic(datFile, info);
stats::PrintStatistic(info);
}
}
// Create http update list for countries and corresponding files

View file

@ -24,6 +24,7 @@ SOURCES += \
tesselator.cpp \
update_generator.cpp \
grid_generator.cpp \
statistics.cpp \
HEADERS += \
osm_element.hpp \
@ -35,3 +36,4 @@ HEADERS += \
update_generator.hpp \
feature_bucketer.hpp \
grid_generator.hpp \
statistics.hpp \

View file

@ -0,0 +1,133 @@
#include "../../base/SRC_FIRST.hpp"
#include "statistics.hpp"
#include "../feature_processor.hpp"
#include "../classificator.hpp"
#include "../../base/string_utils.hpp"
#include "../../std/iostream.hpp"
#include "../../base/start_mem_debug.hpp"
namespace stats
{
class AccumulateStatistic
{
MapInfo & m_info;
class ProcessType
{
MapInfo & m_info;
uint32_t m_size;
public:
ProcessType(MapInfo & info, uint32_t sz) : m_info(info), m_size(sz) {}
void operator() (uint32_t type)
{
m_info.AddToSet(TypeTag(type), m_size, m_info.m_byClassifType);
}
};
public:
AccumulateStatistic(MapInfo & info) : m_info(info) {}
void operator() (FeatureType const & f, uint32_t)
{
f.ParseBeforeStatistic();
uint32_t const sz = f.GetAllSize();
m_info.m_all.Add(sz);
m_info.m_names.Add(f.GetNameSize());
m_info.m_types.Add(f.GetTypesSize());
int const level = 17;
FeatureType::geom_stat_t geom = f.GetGeometrySize(level);
m_info.AddToSet(geom.m_count, geom.m_size, m_info.m_byPointsCount);
m_info.AddToSet(f.GetFeatureType(), sz, m_info.m_byGeomType);
ProcessType doProcess(m_info, sz);
f.ForEachTypeRef(doProcess);
}
};
void CalcStatistic(string const & fName, MapInfo & info)
{
AccumulateStatistic doProcess(info);
feature::ForEachFromDat(fName, doProcess);
}
void PrintInfo(char const * prefix, GeneralInfo const & info)
{
cout << prefix << ": size = " << info.m_size << "; count = " << info.m_count << endl;
}
string GetKey(FeatureBase::FeatureType type)
{
switch (type)
{
case FeatureBase::FEATURE_TYPE_LINE: return "Line";
case FeatureBase::FEATURE_TYPE_AREA: return "Area";
default: return "Point";
}
}
string GetKey(uint32_t i)
{
return utils::to_string(i);
}
string GetKey(TypeTag t)
{
return classif().GetFullObjectName(t.m_val);
}
template <class TSortCr, class TSet>
void PrintTop(char const * prefix, TSet const & theSet)
{
cout << prefix << endl;
vector<typename TSet::value_type> vec(theSet.begin(), theSet.end());
sort(vec.begin(), vec.end(), TSortCr());
size_t const count = min(static_cast<size_t>(10), vec.size());
for (size_t i = 0; i < count; ++i)
{
cout << i << ". ";
PrintInfo(GetKey(vec[i].m_key).c_str(), vec[i].m_info);
}
}
struct greater_size
{
template <class TInfo>
bool operator() (TInfo const & r1, TInfo const & r2) const
{
return r1.m_info.m_size > r2.m_info.m_size;
}
};
struct greater_count
{
template <class TInfo>
bool operator() (TInfo const & r1, TInfo const & r2) const
{
return r1.m_info.m_count > r2.m_info.m_count;
}
};
void PrintStatistic(MapInfo & info)
{
PrintInfo("ALL", info.m_all);
PrintInfo("NAMES", info.m_names);
PrintInfo("TYPES", info.m_types);
PrintTop<greater_size>("Top SIZE by Geometry Type", info.m_byGeomType);
PrintTop<greater_size>("Top SIZE by Classificator Type", info.m_byClassifType);
PrintTop<greater_size>("Top SIZE by Points Count", info.m_byPointsCount);
}
}

View file

@ -0,0 +1,69 @@
#pragma once
#include "../feature.hpp"
#include "../../std/map.hpp"
namespace stats
{
struct GeneralInfo
{
uint64_t m_count, m_size;
GeneralInfo() : m_count(0), m_size(0) {}
void Add(uint64_t sz)
{
if (sz > 0)
{
++m_count;
m_size += sz;
}
}
};
template <class TKey>
struct GeneralInfoKey
{
TKey m_key;
GeneralInfo m_info;
GeneralInfoKey(TKey key) : m_key(key) {}
bool operator< (GeneralInfoKey const & rhs) const
{
return m_key < rhs.m_key;
}
};
struct TypeTag
{
uint32_t m_val;
TypeTag(uint32_t v) : m_val(v) {}
bool operator< (TypeTag const & rhs) const
{
return m_val < rhs.m_val;
}
};
struct MapInfo
{
set<GeneralInfoKey<FeatureBase::FeatureType> > m_byGeomType;
set<GeneralInfoKey<TypeTag> > m_byClassifType;
set<GeneralInfoKey<uint32_t> > m_byPointsCount;
GeneralInfo m_all, m_names, m_types;
template <class TKey, class TSet>
void AddToSet(TKey key, uint32_t sz, TSet & theSet)
{
theSet.insert(GeneralInfoKey<TKey>(key)).first->m_info.Add(sz);
}
};
void CalcStatistic(string const & fName, MapInfo & info);
void PrintStatistic(MapInfo & info);
}

View file

@ -0,0 +1,3 @@
call set_vars.bat %1 %2
%INDEXER_TOOL% --calc_statistics=true --output=%2 --bucketing_level=0