diff --git a/coding/dd_vector.hpp b/coding/dd_vector.hpp index 342ae43165..5e0195b599 100644 --- a/coding/dd_vector.hpp +++ b/coding/dd_vector.hpp @@ -7,12 +7,9 @@ #include "std/type_traits.hpp" #include "std/iterator_facade.hpp" - -template < - typename T, - class TReader, - typename TSize = uint32_t - > class DDVector +// Disk-driven vector. +template +class DDVector { public: typedef T value_type; @@ -166,7 +163,7 @@ private: { uint64_t const sz = m_reader.Size(); if ((sz % sizeof(T)) != 0) - MYTHROW(OpenException, (sz, sizeof(T))); + MYTHROW(OpenException, ("Element size", sizeof(T), "does not divide total size", sz)); m_Size = static_cast(sz / sizeof(T)); } diff --git a/indexer/feature_visibility.cpp b/indexer/feature_visibility.cpp index 7d0c89e2ec..7531bb9c87 100644 --- a/indexer/feature_visibility.cpp +++ b/indexer/feature_visibility.cpp @@ -335,6 +335,7 @@ int GetMinDrawableScale(FeatureBase const & f) if (IsDrawableForIndex(f, level)) return level; + ASSERT(false, ("Feature is never visible.")); return -1; } diff --git a/indexer/index_builder.cpp b/indexer/index_builder.cpp index 8ce7b19e6f..5ae2fb326e 100644 --- a/indexer/index_builder.cpp +++ b/indexer/index_builder.cpp @@ -22,7 +22,7 @@ namespace indexer FeaturesVector featuresVector(readCont, header); FileWriter writer(idxFileName); - BuildIndex(header.GetLastScale() + 1, header.GetLastScale(), featuresVector, writer, tmpFile); + BuildIndex(header, featuresVector, writer, tmpFile); } FilesContainerW(datFile, FileWriter::OP_WRITE_EXISTING).Write(idxFileName, INDEX_FILE_TAG); diff --git a/indexer/index_builder.hpp b/indexer/index_builder.hpp index 264576a9a8..1a55fd37ff 100644 --- a/indexer/index_builder.hpp +++ b/indexer/index_builder.hpp @@ -1,12 +1,13 @@ #pragma once +#include "indexer/data_header.hpp" #include "indexer/scale_index_builder.hpp" + namespace indexer { template - void BuildIndex(uint32_t bucketsCount, - int codingScale, + void BuildIndex(feature::DataHeader const & header, FeaturesVectorT const & featuresVector, WriterT & writer, string const & tmpFilePrefix) @@ -15,7 +16,7 @@ namespace indexer uint64_t indexSize; { SubWriter subWriter(writer); - IndexScales(bucketsCount, codingScale, featuresVector, subWriter, tmpFilePrefix); + covering::IndexScales(header, featuresVector, subWriter, tmpFilePrefix); indexSize = subWriter.Size(); } LOG(LINFO, ("Built scale index. Size =", indexSize)); diff --git a/indexer/indexer_tests/index_builder_test.cpp b/indexer/indexer_tests/index_builder_test.cpp index 4bfb683476..262fd03496 100644 --- a/indexer/indexer_tests/index_builder_test.cpp +++ b/indexer/indexer_tests/index_builder_test.cpp @@ -32,8 +32,7 @@ UNIT_TEST(BuildIndexTest) FeaturesVector featuresVector(originalContainer, header); MemWriter > serialWriter(serialIndex); - indexer::BuildIndex(ScaleIndexBase::GetBucketsCount(), - scales::GetUpperScale(), + indexer::BuildIndex(header, featuresVector, serialWriter, "build_index_test"); } diff --git a/indexer/scale_index_builder.hpp b/indexer/scale_index_builder.hpp index fe04d8f069..17c37c0734 100644 --- a/indexer/scale_index_builder.hpp +++ b/indexer/scale_index_builder.hpp @@ -1,10 +1,10 @@ #pragma once -#include "indexer/scale_index.hpp" +#include "indexer/cell_id.hpp" +#include "indexer/data_header.hpp" +#include "indexer/feature.hpp" #include "indexer/feature_covering.hpp" #include "indexer/feature_visibility.hpp" -#include "indexer/feature.hpp" #include "indexer/interval_index_builder.hpp" -#include "indexer/cell_id.hpp" #include "defines.hpp" @@ -16,13 +16,16 @@ #include "base/base.hpp" #include "base/logging.hpp" #include "base/macros.hpp" +#include "base/scope_guard.hpp" #include "std/string.hpp" -#include "std/vector.hpp" #include "std/utility.hpp" -#include "std/unordered_set.hpp" +#include "std/vector.hpp" +namespace covering +{ + class CellFeaturePair { public: @@ -49,68 +52,131 @@ private: }; STATIC_ASSERT(sizeof(CellFeaturePair) == 12); +class CellFeatureBucketTuple +{ +public: + CellFeatureBucketTuple() {} + CellFeatureBucketTuple(CellFeaturePair p, uint32_t bucket) : m_pair(p), m_bucket(bucket) {} + + bool operator<(CellFeatureBucketTuple const & rhs) const + { + if (m_bucket != rhs.m_bucket) + return m_bucket < rhs.m_bucket; + return m_pair < rhs.m_pair; + } + + CellFeaturePair GetCellFeaturePair() const { return m_pair; } + uint32_t GetBucket() const { return m_bucket; } + +private: + CellFeaturePair m_pair; + uint32_t m_bucket; +}; +STATIC_ASSERT(sizeof(CellFeatureBucketTuple) == 16); + template class FeatureCoverer { - unordered_set & m_skipped; - public: - FeatureCoverer(unordered_set & skipped, - uint32_t bucket, - int codingScale, - SorterT & sorter, - uint32_t & numFeatures) - : m_skipped(skipped), m_Sorter(sorter), - m_codingDepth(covering::GetCodingDepth(codingScale)), - m_ScaleRange(ScaleIndexBase::ScaleRangeForBucket(bucket)), - m_NumFeatures(numFeatures) + FeatureCoverer(feature::DataHeader const & header, SorterT & sorter, vector & featuresInBucket, vector & cellsInBucket) + : m_header(header), + m_bucketsCount(header.GetLastScale() + 1), + m_Sorter(sorter), + m_codingDepth(covering::GetCodingDepth(header.GetLastScale())), + m_featuresInBucket(featuresInBucket), + m_cellsInBucket(cellsInBucket) { - ASSERT_LESS(m_ScaleRange.first, m_ScaleRange.second, ()); + m_featuresInBucket.resize(m_bucketsCount); + m_cellsInBucket.resize(m_bucketsCount); } - + template void operator() (TFeature const & f, uint32_t offset) const { - if (FeatureShouldBeIndexed(f, offset)) + uint32_t minScale = 0; + bool skip = false; + m_scalesIdx = 0; + for (uint32_t bucket = 0; bucket < m_bucketsCount; ++bucket) { - vector const cells = covering::CoverFeature(f, m_codingDepth, 250); - for (int64_t cell : cells) - m_Sorter.Add(CellFeaturePair(cell, offset)); + // There is a one-to-one correspondence between buckets and scales. + // This is not immediately obvious and in fact there was an idea to map + // a bucket to a contiguous range of scales. + // todo(@pimenov): We probably should remove scale_index.hpp altogether. + if (FeatureShouldBeIndexed(f, offset, bucket, skip, minScale)) + { + vector const cells = covering::CoverFeature(f, m_codingDepth, 250); + for (int64_t cell : cells) + m_Sorter.Add(CellFeatureBucketTuple(CellFeaturePair(cell, offset), bucket)); - ++m_NumFeatures; + m_featuresInBucket[bucket] += 1; + m_cellsInBucket[bucket] += cells.size(); + } } } private: template - bool FeatureShouldBeIndexed(TFeature const & f, uint32_t offset) const + bool FeatureShouldBeIndexed(TFeature const & f, uint32_t offset, uint32_t scale, bool & skip, + uint32_t & minScale) const { // Do index features for the first visible interval only once. // If the feature was skipped as empty for the suitable interval, // it should be indexed in the next interval where geometry is not empty. - // This function invokes geometry reading for the needed scale. - if (f.IsEmptyGeometry(m_ScaleRange.second - 1)) + bool needReset = (scale == 0); + while (m_scalesIdx < m_header.GetScalesCount() && m_header.GetScale(m_scalesIdx) < scale) { - m_skipped.insert(offset); + ++m_scalesIdx; + needReset = true; + } + + if (needReset) + f.ResetGeometry(); + + // This function invokes geometry reading for the needed scale. + if (f.IsEmptyGeometry(scale)) + { + skip = true; return false; } - // This function assumes that geometry rect for the needed scale is already initialized. - uint32_t const minScale = feature::GetMinDrawableScale(f); - if (m_ScaleRange.first <= minScale && minScale < m_ScaleRange.second) + if (needReset) { - (void) m_skipped.erase(offset); + // This function assumes that geometry rect for the needed scale is already initialized. + // Note: it works with FeatureBase so in fact it does not use the information about + // the feature's geometry except for the type and the LimitRect. + minScale = feature::GetMinDrawableScale(f); + } + + if (minScale == scale) + { + skip = false; return true; } - - return (minScale < m_ScaleRange.first && m_skipped.erase(offset) == 1); + + if (minScale < scale && skip) + { + skip = false; + return true; + } + return false; } + // We do not need to parse a feature's geometry for every bucket. + // The scales at which geometry changes are encoded in the mwm header. + // We cannot know them beforehand because they are different for + // the common case of a country file and the special case of the world file. + // m_scaleIdx is a position in the scales array that should be reset for every feature + // and then only move forward. Its purpose is to detect the moments when we + // need to reread the feature's geometry. + feature::DataHeader const & m_header; + mutable size_t m_scalesIdx; + + uint32_t m_bucketsCount; SorterT & m_Sorter; int m_codingDepth; - pair m_ScaleRange; - uint32_t & m_NumFeatures; + vector & m_featuresInBucket; + vector & m_cellsInBucket; }; template @@ -131,57 +197,75 @@ private: }; template -inline void IndexScales(uint32_t bucketsCount, - int codingScale, +inline void IndexScales(feature::DataHeader const & header, FeaturesVectorT const & featuresVector, WriterT & writer, string const & tmpFilePrefix) { // TODO: Make scale bucketing dynamic. - STATIC_ASSERT(sizeof(CellFeaturePair) == 12); + int bucketsCount = header.GetLastScale() + 1; - unordered_set skipped; + string const cells2featureAllBucketsFile = + tmpFilePrefix + CELL2FEATURE_SORTED_EXT + ".allbuckets"; + MY_SCOPE_GUARD(cells2featureAllBucketsFileGuard, + bind(&FileWriter::DeleteFileX, cells2featureAllBucketsFile)); + { + FileWriter cellsToFeaturesAllBucketsWriter(cells2featureAllBucketsFile); + + typedef FileSorter > SorterType; + WriterFunctor out(cellsToFeaturesAllBucketsWriter); + SorterType sorter(1024 * 1024 /* bufferBytes */, tmpFilePrefix + CELL2FEATURE_TMP_EXT, out); + vector featuresInBucket(bucketsCount); + vector cellsInBucket(bucketsCount); + featuresVector.ForEachOffset(FeatureCoverer(header, sorter, featuresInBucket, cellsInBucket)); + sorter.SortAndFinish(); + + for (uint32_t bucket = 0; bucket < bucketsCount; ++bucket) + { + uint32_t numCells = cellsInBucket[bucket]; + uint32_t numFeatures = featuresInBucket[bucket]; + LOG(LINFO, ("Building scale index for bucket:", bucket)); + double const cellsPerFeature = numFeatures == 0 ? 0.0 : static_cast(numCells) / static_cast(numFeatures); + LOG(LINFO, ("Features:", numFeatures, "cells:", numCells, "cells per feature:", cellsPerFeature)); + } + } + + FileReader reader(cells2featureAllBucketsFile); + DDVector cellsToFeaturesAllBuckets(reader); VarSerialVectorWriter recordWriter(writer, bucketsCount); + auto it = cellsToFeaturesAllBuckets.begin(); + for (uint32_t bucket = 0; bucket < bucketsCount; ++bucket) { - LOG(LINFO, ("Building scale index for bucket:", bucket)); - - uint32_t numFeatures = 0; string const cells2featureFile = tmpFilePrefix + CELL2FEATURE_SORTED_EXT; + MY_SCOPE_GUARD(cells2featureFileGuard, bind(&FileWriter::DeleteFileX, cells2featureFile)); { FileWriter cellsToFeaturesWriter(cells2featureFile); - - typedef FileSorter > SorterType; WriterFunctor out(cellsToFeaturesWriter); - SorterType sorter(1024*1024, tmpFilePrefix + CELL2FEATURE_TMP_EXT, out); - featuresVector.ForEachOffset( - FeatureCoverer(skipped, bucket, codingScale, sorter, numFeatures)); - // LOG(LINFO, ("Sorting...")); - sorter.SortAndFinish(); + while (it < cellsToFeaturesAllBuckets.end() && it->GetBucket() == bucket) + { + out(it->GetCellFeaturePair()); + ++it; + } } - // LOG(LINFO, ("Indexing...")); { FileReader reader(cells2featureFile); - uint64_t const numCells = reader.Size() / sizeof(CellFeaturePair); DDVector cellsToFeatures(reader); - LOG(LINFO, ("Being indexed", "features:", numFeatures, "cells:", numCells, - "cells per feature:", (numCells + 1.0) / (numFeatures + 1.0))); SubWriter subWriter(writer); - BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter, - RectId::DEPTH_LEVELS * 2 + 1); + LOG(LINFO, ("Building interval index for bucket:", bucket)); + BuildIntervalIndex(cellsToFeatures.begin(), cellsToFeatures.end(), subWriter, RectId::DEPTH_LEVELS * 2 + 1); } - - FileWriter::DeleteFileX(cells2featureFile); - // LOG(LINFO, ("Indexing done.")); recordWriter.FinishRecord(); } - /// @todo Now we can't check this condition here. - /// We have stored features that are invisible even on the last zoom for now (coastlines). - //CHECK(skipped.empty(), ()); + // todo(@pimenov). There was an old todo here that said there were + // features (coastlines) that have been indexed despite being invisible at the last scale. + // This should be impossible but it is better to re-check it. LOG(LINFO, ("All scale indexes done.")); } + +} // namespace covering \ No newline at end of file