From 986dd8d6a85012e84933e5735ddce9194e3821f1 Mon Sep 17 00:00:00 2001 From: tatiana-yan Date: Thu, 20 Dec 2018 13:40:53 +0300 Subject: [PATCH] [search][generator] Store corresponding street feature id as street identifier in HouseToStreetTable. --- coding/map_uint32_to_val.hpp | 3 ++ generator/search_index_builder.cpp | 48 +++++++++++++------- map/framework.cpp | 1 - platform/mwm_traits.cpp | 10 ++++- platform/mwm_traits.hpp | 3 ++ search/house_to_street_table.cpp | 63 +++++++++++++++++++++++--- search/house_to_street_table.hpp | 20 +++++++-- search/mwm_context.cpp | 10 ----- search/mwm_context.hpp | 3 -- search/reverse_geocoder.cpp | 72 +++++++++++++++++++++--------- search/reverse_geocoder.hpp | 11 +++-- 11 files changed, 179 insertions(+), 65 deletions(-) diff --git a/coding/map_uint32_to_val.hpp b/coding/map_uint32_to_val.hpp index dde0f9095a..dd6606937b 100644 --- a/coding/map_uint32_to_val.hpp +++ b/coding/map_uint32_to_val.hpp @@ -183,6 +183,7 @@ public: // Loads MapUint32ToValue instance. Note that |reader| must be alive // until the destruction of loaded table. Returns nullptr if // MapUint32ToValue can't be loaded. + // It's guaranteed that |readBlockCallback| will not be called for empty block. static std::unique_ptr Load(Reader & reader, ReadBlockCallback const & readBlockCallback) { @@ -278,6 +279,7 @@ public: m_ids.push_back(id); } + // It's guaranteed that |writeBlockCallback| will not be called for empty block. void Freeze(Writer & writer, WriteBlockCallback const & writeBlockCallback) const { typename Map::Header header; @@ -306,6 +308,7 @@ public: offsets.push_back(static_cast(variables.size())); auto const endOffset = min(i + Map::kBlockSize, m_values.size()); + CHECK_GREATER(endOffset, i, ()); writeBlockCallback(writer, m_values.cbegin() + i, m_values.cbegin() + endOffset); } } diff --git a/generator/search_index_builder.cpp b/generator/search_index_builder.cpp index 262df2e0fd..03872c50f6 100644 --- a/generator/search_index_builder.cpp +++ b/generator/search_index_builder.cpp @@ -26,11 +26,12 @@ #include "platform/platform.hpp" #include "coding/file_name_utils.hpp" -#include "coding/fixed_bits_ddvector.hpp" +#include "coding/map_uint32_to_val.hpp" #include "coding/reader_writer_ops.hpp" #include "coding/writer.hpp" #include "base/assert.hpp" +#include "base/checked_cast.hpp" #include "base/logging.hpp" #include "base/scope_guard.hpp" #include "base/stl_helpers.hpp" @@ -343,7 +344,6 @@ void AddFeatureNameIndexPairs(FeaturesVectorTest const & features, bool GetStreetIndex(search::MwmContext & ctx, uint32_t featureID, string const & streetName, uint32_t & result) { - size_t streetIndex = 0; strings::UniString const street = search::GetStreetNameAsKey(streetName); bool const hasStreet = !street.empty(); @@ -356,10 +356,11 @@ bool GetStreetIndex(search::MwmContext & ctx, uint32_t featureID, string const & vector streets; search::ReverseGeocoder::GetNearbyStreets(ctx, feature::GetCenter(ft), streets); - streetIndex = search::ReverseGeocoder::GetMatchedStreetIndex(street, streets); - if (streetIndex < streets.size()) + auto const res = search::ReverseGeocoder::GetMatchedStreetIndex(street, streets); + + if (res) { - result = base::checked_cast(streetIndex); + result = *res; return true; } } @@ -393,7 +394,6 @@ void BuildAddressTable(FilesContainerR & container, Writer & writer, uint32_t th vector> contexts(threadsCount); uint32_t address = 0, missing = 0; - map bounds; uint32_t const kEmptyResult = uint32_t(-1); vector results(featuresCount, kEmptyResult); @@ -417,7 +417,6 @@ void BuildAddressTable(FilesContainerR & container, Writer & writer, uint32_t th if (found) { results[i] = streetIndex; - ++bounds[streetIndex]; ++address; } else if (streetIndex > 0) @@ -443,23 +442,42 @@ void BuildAddressTable(FilesContainerR & container, Writer & writer, uint32_t th // Flush results to disk. { - FixedBitsDDVector<3, FileReader>::Builder building2Street(writer); - for (auto i : results) + // Code corresponds to the HouseToStreetTable decoding. + MapUint32ToValueBuilder builder; + uint32_t houseToStreetCount = 0; + for (size_t i = 0; i < results.size(); ++i) { - if (i == kEmptyResult) - building2Street.PushBackUndefined(); - else - building2Street.PushBack(i); + if (results[i] != kEmptyResult) + { + builder.Put(base::asserted_cast(i), results[i]); + ++houseToStreetCount; + } } - LOG(LINFO, ("Address: Building -> Street (opt, all)", building2Street.GetCount())); + // Each street id is encoded as delta from some prediction. + // First street id in the block encoded as VarUint, all other street ids in the block + // encoded as VarInt delta from previous id. + auto const writeBlockCallback = [&](Writer & w, vector::const_iterator begin, + vector::const_iterator end) { + CHECK(begin != end, ("MapUint32ToValueBuilder should guarantee begin != end.")); + WriteVarUint(w, *begin); + auto prevIt = begin; + for (auto it = begin + 1; it != end; ++it) + { + int32_t const delta = base::asserted_cast(*it) - *prevIt; + WriteVarInt(w, delta); + prevIt = it; + } + }; + builder.Freeze(writer, writeBlockCallback); + + LOG(LINFO, ("Address: BuildingToStreet entries count:", houseToStreetCount)); } double matchedPercent = 100; if (address > 0) matchedPercent = 100.0 * (1.0 - static_cast(missing) / static_cast(address)); LOG(LINFO, ("Address: Matched percent", matchedPercent)); - LOG(LINFO, ("Address: Upper bounds", bounds)); } } // namespace diff --git a/map/framework.cpp b/map/framework.cpp index aef02a47ce..28571979a3 100644 --- a/map/framework.cpp +++ b/map/framework.cpp @@ -2956,7 +2956,6 @@ vector TakeSomeStreetsAndLocalize( void SetStreet(search::ReverseGeocoder const & coder, DataSource const & dataSource, FeatureType & ft, osm::EditableMapObject & emo) { - auto const & editor = osm::Editor::Instance(); // Get exact feature's street address (if any) from mwm, // together with all nearby streets. vector streets; diff --git a/platform/mwm_traits.cpp b/platform/mwm_traits.cpp index 9d982e30fb..80fe41ff02 100644 --- a/platform/mwm_traits.cpp +++ b/platform/mwm_traits.cpp @@ -18,7 +18,13 @@ MwmTraits::HouseToStreetTableFormat MwmTraits::GetHouseToStreetTableFormat() con { if (GetFormat() < version::Format::v7) return HouseToStreetTableFormat::Unknown; - return HouseToStreetTableFormat::Fixed3BitsDDVector; + + // todo: (@t.yan) adjust after production maps generation. + uint32_t constexpr kLastVersionWithFixed3BitsDDVector = 181219; + if (GetVersion() <= kLastVersionWithFixed3BitsDDVector) + return HouseToStreetTableFormat::Fixed3BitsDDVector; + + return HouseToStreetTableFormat::EliasFanoMap; } bool MwmTraits::HasOffsetsTable() const { return GetFormat() >= version::Format::v6; } @@ -55,6 +61,8 @@ string DebugPrint(MwmTraits::HouseToStreetTableFormat format) { case MwmTraits::HouseToStreetTableFormat::Fixed3BitsDDVector: return "Fixed3BitsDDVector"; + case MwmTraits::HouseToStreetTableFormat::EliasFanoMap: + return "EliasFanoMap"; case MwmTraits::HouseToStreetTableFormat::Unknown: return "Unknown"; } diff --git a/platform/mwm_traits.hpp b/platform/mwm_traits.hpp index 65f0a4295f..9d2efaf9e4 100644 --- a/platform/mwm_traits.hpp +++ b/platform/mwm_traits.hpp @@ -34,6 +34,9 @@ public: // details. Fixed3BitsDDVector, + // Elias-Fano based map from feature id to corresponding street feature id. + EliasFanoMap, + // The format of relation is unknown. Most likely, an error has occured. Unknown }; diff --git a/search/house_to_street_table.cpp b/search/house_to_street_table.cpp index ab08b6b7ac..868bbb5fa8 100644 --- a/search/house_to_street_table.cpp +++ b/search/house_to_street_table.cpp @@ -5,9 +5,11 @@ #include "platform/mwm_traits.hpp" #include "coding/fixed_bits_ddvector.hpp" +#include "coding/map_uint32_to_val.hpp" #include "coding/reader.hpp" #include "base/assert.hpp" +#include "base/checked_cast.hpp" #include "defines.hpp" @@ -18,12 +20,12 @@ namespace class Fixed3BitsTable : public HouseToStreetTable { public: - using TVector = FixedBitsDDVector<3, ModelReaderPtr>; + using Vector = FixedBitsDDVector<3, ModelReaderPtr>; - Fixed3BitsTable(MwmValue & value) - : m_vector(TVector::Create(value.m_cont.GetReader(SEARCH_ADDRESS_FILE_TAG))) + explicit Fixed3BitsTable(MwmValue & value) + : m_vector(Vector::Create(value.m_cont.GetReader(SEARCH_ADDRESS_FILE_TAG))) { - ASSERT(m_vector.get(), ("Can't instantiate FixedBitsDDVector.")); + ASSERT(m_vector.get(), ("Can't instantiate Fixed3BitsDDVector.")); } // HouseToStreetTable overrides: @@ -32,8 +34,52 @@ public: return m_vector->Get(houseId, streetIndex); } + StreetIdType GetStreetIdType() const override { return StreetIdType::Index; } + private: - unique_ptr m_vector; + unique_ptr m_vector; +}; + +class EliasFanoMap : public HouseToStreetTable +{ +public: + using Map = MapUint32ToValue; + + explicit EliasFanoMap(MwmValue & value) : m_reader(unique_ptr()) + { + auto const readBlockCallback = [&](NonOwningReaderSource & source, uint32_t blockSize, + vector & values) { + CHECK_GREATER(blockSize, 0, ()); + values.resize(blockSize); + values[0] = ReadVarUint(source); + + for (size_t i = 1; i < blockSize && source.Size() > 0; ++i) + { + // Feature ids for all real features are less than numeric_limits::max() + // so we can use delta coding with int32_t difference type. + auto const delta = ReadVarInt(source); + values[i] = base::asserted_cast(values[i - 1] + delta); + } + }; + + m_reader = value.m_cont.GetReader(SEARCH_ADDRESS_FILE_TAG); + ASSERT(m_reader.GetPtr(), ("Can't get", SEARCH_ADDRESS_FILE_TAG, "section reader.")); + + m_map = Map::Load(*m_reader.GetPtr(), readBlockCallback); + ASSERT(m_map.get(), ("Can't instantiate MapUint32ToValue.")); + } + + // HouseToStreetTable overrides: + bool Get(uint32_t houseId, uint32_t & streetIndex) const override + { + return m_map->Get(houseId, streetIndex); + } + + StreetIdType GetStreetIdType() const override { return StreetIdType::FeatureId; } + +private: + FilesContainerR::TReader m_reader; + unique_ptr m_map; }; class DummyTable : public HouseToStreetTable @@ -41,6 +87,7 @@ class DummyTable : public HouseToStreetTable public: // HouseToStreetTable overrides: bool Get(uint32_t /* houseId */, uint32_t & /* streetIndex */) const override { return false; } + StreetIdType GetStreetIdType() const override { return StreetIdType::None; } }; } // namespace @@ -54,7 +101,9 @@ unique_ptr HouseToStreetTable::Load(MwmValue & value) try { if (format == version::MwmTraits::HouseToStreetTableFormat::Fixed3BitsDDVector) - result.reset(new Fixed3BitsTable(value)); + result = make_unique(value); + if (format == version::MwmTraits::HouseToStreetTableFormat::EliasFanoMap) + result = make_unique(value); } catch (Reader::OpenException const & ex) { @@ -62,7 +111,7 @@ unique_ptr HouseToStreetTable::Load(MwmValue & value) } if (!result) - result.reset(new DummyTable()); + result = make_unique(); return result; } diff --git a/search/house_to_street_table.hpp b/search/house_to_street_table.hpp index f395e635ba..47ad9682cb 100644 --- a/search/house_to_street_table.hpp +++ b/search/house_to_street_table.hpp @@ -10,17 +10,29 @@ namespace search class HouseToStreetTable { public: + enum class StreetIdType + { + // Table stores the index number of the correct street corresponding + // to the house in the list of streets generated by ReverseGeocoder. + Index, + // Table stores feature id of street corresponding to the house. + FeatureId, + // Empty table. + None + }; + virtual ~HouseToStreetTable() = default; /// @todo Actually, value may be nullptr in the very common case. /// It's better to construct a table from MwmHandle. static unique_ptr Load(MwmValue & value); - // Returns true and stores to |streetIndex| the index number of the - // correct street corresponding to the house in the list of streets - // generated by ReverseGeocoder. Returns false if there is no such - // street. + // Returns true and stores street identifier to |streetIndex|. + // Street identifier type depends on data version. See StreetIdType. + // Returns false if there is no such street. virtual bool Get(uint32_t houseId, uint32_t & streetIndex) const = 0; + + virtual StreetIdType GetStreetIdType() const = 0; }; } // namespace search diff --git a/search/mwm_context.cpp b/search/mwm_context.cpp index 97a356551a..f32c153dd0 100644 --- a/search/mwm_context.cpp +++ b/search/mwm_context.cpp @@ -40,14 +40,4 @@ bool MwmContext::GetFeature(uint32_t index, FeatureType & ft) const } UNREACHABLE(); } - -bool MwmContext::GetStreetIndex(uint32_t houseId, uint32_t & streetId) -{ - if (!m_houseToStreetTable) - { - m_houseToStreetTable = HouseToStreetTable::Load(m_value); - ASSERT(m_houseToStreetTable, ()); - } - return m_houseToStreetTable->Get(houseId, streetId); -} } // namespace search diff --git a/search/mwm_context.hpp b/search/mwm_context.hpp index bb2e8c52e8..6b337da63d 100644 --- a/search/mwm_context.hpp +++ b/search/mwm_context.hpp @@ -73,8 +73,6 @@ public: // Returns false if feature was deleted by user. WARN_UNUSED_RESULT bool GetFeature(uint32_t index, FeatureType & ft) const; - WARN_UNUSED_RESULT bool GetStreetIndex(uint32_t houseId, uint32_t & streetId); - WARN_UNUSED_RESULT inline bool GetCenter(uint32_t index, m2::PointD & center) { return m_centers.Get(index, center); @@ -104,7 +102,6 @@ private: FeaturesVector m_vector; ScaleIndex m_index; - unique_ptr m_houseToStreetTable; LazyCentersTable m_centers; DISALLOW_COPY_AND_MOVE(MwmContext); diff --git a/search/reverse_geocoder.cpp b/search/reverse_geocoder.cpp index e35118d754..edbfe97a4d 100644 --- a/search/reverse_geocoder.cpp +++ b/search/reverse_geocoder.cpp @@ -86,23 +86,22 @@ void ReverseGeocoder::GetNearbyStreets(FeatureType & ft, vector & street } // static -size_t ReverseGeocoder::GetMatchedStreetIndex(strings::UniString const & keyName, - vector const & streets) +boost::optional ReverseGeocoder::GetMatchedStreetIndex(strings::UniString const & keyName, + vector const & streets) { // Find the exact match or the best match in kSimilarityTresholdPercent limit. - size_t const count = streets.size(); - size_t result = count; + uint32_t result; size_t minPercent = kSimilarityThresholdPercent + 1; - for (size_t i = 0; i < count; ++i) + for (auto const & street : streets) { - strings::UniString const actual = GetStreetNameAsKey(streets[i].m_name); + strings::UniString const actual = GetStreetNameAsKey(street.m_name); size_t const editDistance = strings::EditDistance(keyName.begin(), keyName.end(), actual.begin(), actual.end()); if (editDistance == 0) - return i; + return street.m_id.m_index; if (actual.empty()) continue; @@ -110,12 +109,14 @@ size_t ReverseGeocoder::GetMatchedStreetIndex(strings::UniString const & keyName size_t const percent = editDistance * 100 / actual.size(); if (percent < minPercent) { - result = i; + result = street.m_id.m_index; minPercent = percent; } } - return result; + if (minPercent <= kSimilarityThresholdPercent) + return result; + return {}; } string ReverseGeocoder::GetFeatureStreetName(FeatureType & ft) const @@ -195,23 +196,49 @@ bool ReverseGeocoder::GetNearbyAddress(HouseTable & table, Building const & bld, return true; } - uint32_t ind; - if (!table.Get(bld.m_id, ind)) + uint32_t streetId; + HouseToStreetTable::StreetIdType type; + if (!table.Get(bld.m_id, type, streetId)) return false; - vector streets; - GetNearbyStreets(bld.m_id.m_mwmId, bld.m_center, streets); - if (ind < streets.size()) + switch (type) { - addr.m_building = bld; - addr.m_street = streets[ind]; - return true; - } - else + case HouseToStreetTable::StreetIdType::Index: { - LOG(LWARNING, ("Out of bound street index", ind, "for", bld.m_id)); + vector streets; + GetNearbyStreets(bld.m_id.m_mwmId, bld.m_center, streets); + if (streetId < streets.size()) + { + addr.m_building = bld; + addr.m_street = streets[streetId]; + return true; + } + LOG(LWARNING, ("Out of bound street index", streetId, "for", bld.m_id)); return false; } + case HouseToStreetTable::StreetIdType::FeatureId: + { + FeatureID streetFeature(bld.m_id.m_mwmId, streetId); + string streetName; + double distance; + m_dataSource.ReadFeature( + [&](FeatureType & ft) { + ft.GetName(StringUtf8Multilang::kDefaultCode, streetName); + distance = feature::GetMinDistanceMeters(ft, bld.m_center); + }, + streetFeature); + CHECK(!streetName.empty(), ()); + addr.m_building = bld; + addr.m_street = Street(streetFeature, distance, streetName); + return true; + } + case HouseToStreetTable::StreetIdType::None: + { + // Prior call of table.Get() is expected to fail. + UNREACHABLE(); + } + } + UNREACHABLE(); } void ReverseGeocoder::GetNearbyBuildings(m2::PointD const & center, double radius, @@ -235,7 +262,9 @@ ReverseGeocoder::Building ReverseGeocoder::FromFeature(FeatureType & ft, double return { ft.GetID(), distMeters, ft.GetHouseNumber(), feature::GetCenter(ft) }; } -bool ReverseGeocoder::HouseTable::Get(FeatureID const & fid, uint32_t & streetIndex) +bool ReverseGeocoder::HouseTable::Get(FeatureID const & fid, + HouseToStreetTable::StreetIdType & type, + uint32_t & streetIndex) { if (feature::FakeFeatureIds::IsEditorCreatedFeature(fid.m_index)) return false; @@ -251,6 +280,7 @@ bool ReverseGeocoder::HouseTable::Get(FeatureID const & fid, uint32_t & streetIn m_table = search::HouseToStreetTable::Load(*m_handle.GetValue()); } + type = m_table->GetStreetIdType(); return m_table->Get(fid.m_index, streetIndex); } diff --git a/search/reverse_geocoder.hpp b/search/reverse_geocoder.hpp index d9e6132aa1..31ab18f289 100644 --- a/search/reverse_geocoder.hpp +++ b/search/reverse_geocoder.hpp @@ -13,6 +13,8 @@ #include #include +#include + class FeatureType; class DataSource; @@ -64,8 +66,10 @@ public: } }; - static size_t GetMatchedStreetIndex(strings::UniString const & keyName, - std::vector const & streets); + /// Returns a feature id of street from |streets| whose name best matches |keyName| + /// or empty value if the match was not found. + static boost::optional GetMatchedStreetIndex(strings::UniString const & keyName, + std::vector const & streets); struct Address { @@ -112,7 +116,8 @@ private: { public: explicit HouseTable(DataSource const & dataSource) : m_dataSource(dataSource) {} - bool Get(FeatureID const & fid, uint32_t & streetIndex); + bool Get(FeatureID const & fid, HouseToStreetTable::StreetIdType & type, + uint32_t & streetIndex); private: DataSource const & m_dataSource;