Merge pull request #3243 from mgsergio/fields-validation

[editor] Implement fields validation in editor.
This commit is contained in:
ygorshenin 2016-05-25 14:27:20 +03:00
commit 6da827a933
20 changed files with 411 additions and 182 deletions

View file

@ -1,11 +1,13 @@
#include "indexer/classificator.hpp"
#include "indexer/cuisines.hpp"
#include "indexer/editable_map_object.hpp"
#include "indexer/postcodes_matcher.hpp"
#include "base/macros.hpp"
#include "base/string_utils.hpp"
#include "std/cctype.hpp"
#include "std/cmath.hpp"
namespace osm
{
@ -95,29 +97,6 @@ void EditableMapObject::SetNearbyStreets(vector<LocalizedStreet> && streets)
m_nearbyStreets = move(streets);
}
// static
bool EditableMapObject::ValidateHouseNumber(string const & houseNumber)
{
if (houseNumber.empty())
return true;
strings::UniString us = strings::MakeUniString(houseNumber);
// TODO: Improve this basic limit - it was choosen by @Zverik.
auto constexpr kMaxHouseNumberLength = 15;
if (us.size() > kMaxHouseNumberLength)
return false;
// TODO: Should we allow arabic numbers like U+0661 ١ Arabic-Indic Digit One?
strings::NormalizeDigits(us);
for (strings::UniChar const c : us)
{
// Valid house numbers contain at least one number.
if (c >= '0' && c <= '9')
return true;
}
return false;
}
void EditableMapObject::SetHouseNumber(string const & houseNumber)
{
m_houseNumber = houseNumber;
@ -143,8 +122,14 @@ void EditableMapObject::SetEmail(string const & email)
m_metadata.Set(feature::Metadata::FMD_EMAIL, email);
}
void EditableMapObject::SetWebsite(string const & website)
void EditableMapObject::SetWebsite(string website)
{
if (!website.empty() &&
!strings::StartsWith(website, "http://") &&
!strings::StartsWith(website, "https://"))
{
website = "http://" + website;
}
m_metadata.Set(feature::Metadata::FMD_WEBSITE, website);
m_metadata.Drop(feature::Metadata::FMD_URL);
}
@ -188,13 +173,6 @@ void EditableMapObject::SetFlats(string const & flats)
m_metadata.Set(feature::Metadata::FMD_FLATS, flats);
}
// static
bool EditableMapObject::ValidateBuildingLevels(string const & buildingLevels)
{
uint64_t levels;
return strings::to_uint64(buildingLevels, levels) && levels <= kMaximumLevelsEditableByUsers;
}
void EditableMapObject::SetBuildingLevels(string const & buildingLevels)
{
m_metadata.Set(feature::Metadata::FMD_BUILDING_LEVELS, buildingLevels);
@ -213,4 +191,135 @@ void EditableMapObject::SetOpeningHours(string const & openingHours)
}
void EditableMapObject::SetPointType() { m_geomType = feature::EGeomType::GEOM_POINT; }
// static
bool EditableMapObject::ValidateBuildingLevels(string const & buildingLevels)
{
if (buildingLevels.size() > 18 /* max number of digits in uint_64 */)
return false;
uint64_t levels;
return strings::to_uint64(buildingLevels, levels) && levels <= kMaximumLevelsEditableByUsers;
}
// static
bool EditableMapObject::ValidateHouseNumber(string const & houseNumber)
{
// TODO(mgsergio): Make a better validation, use real samples for example.
if (houseNumber.empty())
return true;
strings::UniString us = strings::MakeUniString(houseNumber);
// TODO: Improve this basic limit - it was choosen by @Zverik.
auto constexpr kMaxHouseNumberLength = 15;
if (us.size() > kMaxHouseNumberLength)
return false;
// TODO: Should we allow arabic numbers like U+0661 ١ Arabic-Indic Digit One?
strings::NormalizeDigits(us);
for (auto const c : us)
{
// Valid house numbers contain at least one digit.
if (strings::IsASCIIDigit(c))
return true;
}
return false;
}
// static
bool EditableMapObject::ValidateFlats(string const & flats)
{
auto it = strings::SimpleTokenizer(flats, ";");
for (; it != strings::SimpleTokenizer(); ++it)
{
auto token = *it;
strings::Trim(token);
vector<string> range(strings::SimpleTokenizer(token, "-"), strings::SimpleTokenizer());
if (range.empty() || range.size() > 2)
return false;
for (auto const & rangeBorder : range)
{
if (!all_of(begin(rangeBorder), end(rangeBorder), isalnum))
return false;
}
}
return true;
}
// static
bool EditableMapObject::ValidatePostCode(string const & postCode)
{
return search::LooksLikePostcode(postCode, false /* IsPrefix */);
}
// static
bool EditableMapObject::ValidatePhone(string const & phone)
{
if (phone.empty())
return true;
auto curr = begin(phone);
auto const last = end(phone);
auto const kMaxNumberLen = 15;
auto const kMinNumberLen = 5;
if (*curr == '+')
++curr;
auto digitsCount = 0;
for (; curr != last; ++curr)
{
auto const isCharValid = isdigit(*curr) || *curr == '(' ||
*curr == ')' || *curr == ' ' || *curr == '-';
if (!isCharValid)
return false;
if (isdigit(*curr))
++digitsCount;
}
return kMinNumberLen <= digitsCount && digitsCount <= kMaxNumberLen;
}
// static
bool EditableMapObject::ValidateWebsite(string const & site)
{
if (site.empty())
return true;
auto const dotPos = find(begin(site), end(site), '.');
// Site should contain at least one dot but not at the begining/and.
if (dotPos == end(site) || site.front() == '.' || site.back() == '.')
return false;
return true;
}
// static
bool EditableMapObject::ValidateEmail(string const & email)
{
if (email.empty())
return true;
auto const atPos = find(begin(email), end(email), '@');
if (atPos == end(email))
return false;
// There should be only one '@' sign.
if (find(next(atPos), end(email), '@') != end(email))
return false;
// There should be at least one '.' sign after '@' ...
if (find(next(atPos), end(email), '.') == end(email))
return false;
// ... not in the end.
if (email.back() == '.')
return false;
return true;
}
} // namespace osm

View file

@ -78,24 +78,26 @@ public:
void SetMercator(m2::PointD const & center);
void SetType(uint32_t featureType);
void SetID(FeatureID const & fid);
// void SetTypes(feature::TypesHolder const & types);
void SetStreet(LocalizedStreet const & st);
void SetNearbyStreets(vector<LocalizedStreet> && streets);
/// @returns false if house number fails validation.
static bool ValidateHouseNumber(string const & houseNumber);
void SetHouseNumber(string const & houseNumber);
void SetPostcode(string const & postcode);
void SetPhone(string const & phone);
void SetFax(string const & fax);
void SetEmail(string const & email);
void SetWebsite(string const & website);
void SetWebsite(string website);
void SetWikipedia(string const & wikipedia);
void SetInternet(Internet internet);
void SetStars(int stars);
void SetOperator(string const & op);
void SetElevation(double ele);
void SetWikipedia(string const & wikipedia);
void SetFlats(string const & flats);
static bool ValidateBuildingLevels(string const & buildingLevels);
void SetBuildingLevels(string const & buildingLevels);
/// @param[in] cuisine is a vector of osm cuisine ids.
void SetCuisines(vector<string> const & cuisine);
@ -104,6 +106,14 @@ public:
/// Special mark that it's a point feature, not area or line.
void SetPointType();
static bool ValidateBuildingLevels(string const & buildingLevels);
static bool ValidateHouseNumber(string const & houseNumber);
static bool ValidateFlats(string const & flats);
static bool ValidatePostCode(string const & postCode);
static bool ValidatePhone(string const & phone);
static bool ValidateWebsite(string const & site);
static bool ValidateEmail(string const & email);
private:
string m_houseNumber;
LocalizedStreet m_street;

View file

@ -51,6 +51,7 @@ SOURCES += \
old/feature_loader_101.cpp \
osm_editor.cpp \
point_to_int64.cpp \
postcodes_matcher.cpp \ # it's in indexer due to editor wich is in indexer and depends on postcodes_marcher
rank_table.cpp \
scales.cpp \
search_delimiters.cpp \ # it's in indexer because of CategoriesHolder dependency.
@ -106,12 +107,14 @@ HEADERS += \
old/interval_index_101.hpp \
osm_editor.hpp \
point_to_int64.hpp \
postcodes_matcher.hpp \ # it's in indexer due to editor wich is in indexer and depends on postcodes_marcher
rank_table.hpp \
scale_index.hpp \
scale_index_builder.hpp \
scales.hpp \
search_delimiters.hpp \ # it's in indexer because of CategoriesHolder dependency.
search_string_utils.hpp \ # it's in indexer because of CategoriesHolder dependency.
string_slice.hpp \
succinct_trie_builder.hpp \
succinct_trie_reader.hpp \
tesselator_decl.hpp \

View file

@ -0,0 +1,111 @@
#include "testing/testing.hpp"
#include "indexer/editable_map_object.hpp"
namespace
{
using osm::EditableMapObject;
UNIT_TEST(EditableMapObject_SetWebsite)
{
EditableMapObject emo;
emo.SetWebsite("https://some.thing.org");
TEST_EQUAL(emo.GetWebsite(), "https://some.thing.org", ());
emo.SetWebsite("http://some.thing.org");
TEST_EQUAL(emo.GetWebsite(), "http://some.thing.org", ());
emo.SetWebsite("some.thing.org");
TEST_EQUAL(emo.GetWebsite(), "http://some.thing.org", ());
emo.SetWebsite("");
TEST_EQUAL(emo.GetWebsite(), "", ());
}
UNIT_TEST(EditableMapObject_ValidateBuildingLevels)
{
TEST(EditableMapObject::ValidateBuildingLevels(""), ());
TEST(EditableMapObject::ValidateBuildingLevels("7"), ());
TEST(EditableMapObject::ValidateBuildingLevels("17"), ());
TEST(EditableMapObject::ValidateBuildingLevels("25"), ());
TEST(!EditableMapObject::ValidateBuildingLevels("26"), ());
TEST(!EditableMapObject::ValidateBuildingLevels("ab"), ());
TEST(!EditableMapObject::ValidateBuildingLevels(
"2345534564564453645534545345534564564453645"), ());
}
UNIT_TEST(EditableMapObject_ValidateHouseNumber)
{
TEST(EditableMapObject::ValidateHouseNumber(""), ());
TEST(EditableMapObject::ValidateHouseNumber("qwer7ty"), ());
TEST(EditableMapObject::ValidateHouseNumber("12345678"), ());
// House number must contain at least one number.
TEST(!EditableMapObject::ValidateHouseNumber("qwerty"), ());
// House number is too long.
TEST(!EditableMapObject::ValidateHouseNumber("1234567890123456"), ());
}
UNIT_TEST(EditableMapObject_ValidateFlats)
{
TEST(EditableMapObject::ValidateFlats(""), ());
TEST(EditableMapObject::ValidateFlats("123"), ());
TEST(EditableMapObject::ValidateFlats("123a"), ());
TEST(EditableMapObject::ValidateFlats("a"), ());
TEST(EditableMapObject::ValidateFlats("123-456;a-e"), ());
TEST(EditableMapObject::ValidateFlats("123-456"), ());
TEST(EditableMapObject::ValidateFlats("123-456; 43-45"), ());
TEST(!EditableMapObject::ValidateFlats("123-456, 43-45"), ());
TEST(!EditableMapObject::ValidateFlats("234-234 124"), ());
TEST(!EditableMapObject::ValidateFlats("123-345-567"), ());
TEST(!EditableMapObject::ValidateFlats("234-234;234("), ());
TEST(!EditableMapObject::ValidateFlats("-;"), ());
}
// See search_tests/postcodes_matcher_test.cpp
// UNIT_TEST(EditableMapObject_ValidatePostCode)
// {
// }
UNIT_TEST(EditableMapObject_ValidatePhone)
{
TEST(EditableMapObject::ValidatePhone(""), ());
TEST(EditableMapObject::ValidatePhone("+7 000 000 00 00"), ());
TEST(EditableMapObject::ValidatePhone("+7 (000) 000 00 00"), ());
TEST(EditableMapObject::ValidatePhone("+7 0000000000"), ());
TEST(EditableMapObject::ValidatePhone("+7 0000 000 000"), ());
TEST(EditableMapObject::ValidatePhone("8 0000-000-000"), ());
TEST(EditableMapObject::ValidatePhone("000 00 00"), ());
TEST(EditableMapObject::ValidatePhone("000 000 00"), ());
TEST(EditableMapObject::ValidatePhone("+00 0000 000 000"), ());
TEST(!EditableMapObject::ValidatePhone("+00 0000 000 0000 000"), ());
TEST(!EditableMapObject::ValidatePhone("00 00"), ());
TEST(!EditableMapObject::ValidatePhone("acb"), ());
TEST(!EditableMapObject::ValidatePhone("000 000 00b"), ());
}
UNIT_TEST(EditableMapObject_ValidateWebsite)
{
TEST(EditableMapObject::ValidateWebsite(""), ());
TEST(EditableMapObject::ValidateWebsite("qwe.rty"), ());
TEST(!EditableMapObject::ValidateWebsite("qwerty"), ());
TEST(!EditableMapObject::ValidateWebsite(".qwerty"), ());
TEST(!EditableMapObject::ValidateWebsite("qwerty."), ());
TEST(!EditableMapObject::ValidateWebsite(".qwerty."), ());
}
UNIT_TEST(EditableMapObject_ValidateEmail)
{
TEST(EditableMapObject::ValidateEmail(""), ());
TEST(EditableMapObject::ValidateEmail("e@ma.il"), ());
TEST(EditableMapObject::ValidateEmail("e@ma.i.l"), ());
TEST(!EditableMapObject::ValidateEmail("e.ma.il"), ());
TEST(!EditableMapObject::ValidateEmail("e@ma@il"), ());
TEST(!EditableMapObject::ValidateEmail("e@ma@i.l"), ());
TEST(!EditableMapObject::ValidateEmail("e@mail"), ());
}
} // namespace

View file

@ -22,6 +22,7 @@ SOURCES += \
cell_id_test.cpp \
checker_test.cpp \
drules_selector_parser_test.cpp \
editable_map_object_test.cpp \
feature_metadata_test.cpp \
feature_xml_test.cpp \
features_offsets_table_test.cpp \
@ -33,6 +34,7 @@ SOURCES += \
interval_index_test.cpp \
mwm_set_test.cpp \
point_to_int64_test.cpp \
postcodes_matcher_tests.cpp \
rank_table_test.cpp \
scales_test.cpp \
search_string_utils_test.cpp \

View file

@ -0,0 +1,45 @@
#include "../../testing/testing.hpp"
#include "indexer/postcodes_matcher.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
#include "std/string.hpp"
#include "std/vector.hpp"
using namespace strings;
namespace search
{
namespace v2
{
namespace
{
UNIT_TEST(PostcodesMatcher_Smoke)
{
TEST(LooksLikePostcode("141701", false /* handleAsPrefix */), ());
TEST(LooksLikePostcode("141", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("BA6 8JP", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("BA6-8JP", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("BA22 9HR", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("BA22", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("DE56 4FW", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("NY 1000", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("AZ 85203", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("AZ", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("803 0271", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("803-0271", true /* handleAsPrefix */), ());
TEST(LooksLikePostcode("〒803-0271", true /* handleAsPrefix */), ());
TEST(!LooksLikePostcode("1 мая", true /* handleAsPrefix */), ());
TEST(!LooksLikePostcode("1 мая улица", true /* handleAsPrefix */), ());
TEST(!LooksLikePostcode("москва", true /* handleAsPrefix */), ());
TEST(!LooksLikePostcode("39 с 79", true /* handleAsPrefix */), ());
}
} // namespace
} // namespace v2
} // namespace search

View file

@ -1,7 +1,4 @@
#include "search/v2/postcodes_matcher.hpp"
#include "search/v2/token_slice.hpp"
#include "indexer/postcodes_matcher.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
@ -19,8 +16,6 @@ using namespace strings;
namespace search
{
namespace v2
{
namespace
{
// Top patterns for postcodes. See
@ -107,7 +102,7 @@ public:
// patterns.
//
// Complexity: O(total length of tokens in |slice|).
bool HasString(TokenSlice const & slice) const
bool HasString(StringSliceBase const & slice, bool isPrefix) const
{
if (slice.Size() == 0)
return m_root.m_isLeaf;
@ -115,7 +110,7 @@ public:
Node const * cur = &m_root;
for (size_t i = 0; i < slice.Size() && cur; ++i)
{
auto const & s = slice.Get(i).front();
auto const & s = slice.Get(i);
cur = cur->Move(make_transform_iterator(s.begin(), &SimplifyChar),
make_transform_iterator(s.end(), &SimplifyChar));
if (cur && i + 1 < slice.Size())
@ -125,11 +120,7 @@ public:
if (!cur)
return false;
if (slice.IsPrefix(slice.Size() - 1))
return true;
// Last token may be not a prefix, but just a part of a multi-token postcode.
if (slice.IsLast(slice.Size() - 1) && cur->Move(' ') != nullptr)
if (isPrefix)
return true;
return cur->m_isLeaf;
@ -168,8 +159,19 @@ PostcodesMatcher const & GetPostcodesMatcher()
}
} // namespace
bool LooksLikePostcode(TokenSlice const & slice) { return GetPostcodesMatcher().HasString(slice); }
bool LooksLikePostcode(StringSliceBase const & slice, bool isPrefix)
{
return GetPostcodesMatcher().HasString(slice, isPrefix);
}
bool LooksLikePostcode(string const & s, bool isPrefix)
{
vector<UniString> tokens;
bool const lastTokenIsPrefix =
TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters());
return LooksLikePostcode(NoPrefixStringSlice(tokens), isPrefix && lastTokenIsPrefix);
}
size_t GetMaxNumTokensInPostcode() { return GetPostcodesMatcher().GetMaxNumTokensInPostcode(); }
} // namespace v2
} // namespace search

View file

@ -0,0 +1,17 @@
#pragma once
#include "indexer/string_slice.hpp"
#include "std/cstdint.hpp"
#include "std/string.hpp"
namespace search
{
/// If isPrefix is true returns true if some postcode starts with s.
/// If isPrefix is false returns true if s equals to some postcode.
bool LooksLikePostcode(StringSliceBase const & slice, bool isPrefix);
/// Splits s into tokens and call LooksLikePostcode(TokenSlice) on the result.
bool LooksLikePostcode(string const & s, bool isPrefix);
size_t GetMaxNumTokensInPostcode();
} // namespace search

36
indexer/string_slice.hpp Normal file
View file

@ -0,0 +1,36 @@
#pragma once
#include "base/string_utils.hpp"
#include "std/cstdint.hpp"
#include "std/string.hpp"
#include "std/vector.hpp"
namespace search
{
class StringSliceBase
{
public:
using TString = strings::UniString;
virtual ~StringSliceBase() = default;
virtual TString const & Get(size_t i) const = 0;
virtual size_t Size() const = 0;
};
class NoPrefixStringSlice : public StringSliceBase
{
public:
NoPrefixStringSlice(vector<TString> const & strings)
: m_strings(strings)
{
}
virtual TString const & Get(size_t i) const override { return m_strings[i]; }
virtual size_t Size() const override { return m_strings.size(); }
private:
vector<TString> const & m_strings;
};
} // namespace search

View file

@ -402,7 +402,7 @@ int Processor::GetCategoryLocales(int8_t(&arr)[3]) const
}
template <class ToDo>
void Processor::ForEachCategoryTypes(v2::QuerySlice const & slice, ToDo toDo) const
void Processor::ForEachCategoryTypes(StringSliceBase const & slice, ToDo toDo) const
{
int8_t arrLocales[3];
int const localesCount = GetCategoryLocales(arrLocales);
@ -611,7 +611,7 @@ class PreResult2Maker
feature::TypesHolder holder(ft);
vector<pair<size_t, size_t>> matched(slice.Size());
m_processor.ForEachCategoryTypes(v2::QuerySliceOnTokens(slice), [&](size_t i, uint32_t t)
m_processor.ForEachCategoryTypes(v2::QuerySlice(slice), [&](size_t i, uint32_t t)
{
++matched[i].second;
if (holder.Has(t))

View file

@ -12,6 +12,7 @@
#include "indexer/ftypes_matcher.hpp"
#include "indexer/index.hpp"
#include "indexer/rank_table.hpp"
#include "indexer/string_slice.hpp"
#include "geometry/rect2d.hpp"
@ -141,7 +142,7 @@ protected:
int GetCategoryLocales(int8_t(&arr)[3]) const;
template <class ToDo>
void ForEachCategoryTypes(v2::QuerySlice const & slice, ToDo toDo) const;
void ForEachCategoryTypes(StringSliceBase const & slice, ToDo toDo) const;
template <class ToDo>
void ProcessEmojiIfNeeded(strings::UniString const & token, size_t ind, ToDo & toDo) const;

View file

@ -55,7 +55,6 @@ HEADERS += \
v2/locality_scorer.hpp \
v2/mwm_context.hpp \
v2/nested_rects_cache.hpp \
v2/postcodes_matcher.hpp \
v2/pre_ranking_info.hpp \
v2/processor_v2.hpp \
v2/rank_table_cache.hpp \
@ -103,7 +102,6 @@ SOURCES += \
v2/locality_scorer.cpp \
v2/mwm_context.cpp \
v2/nested_rects_cache.cpp \
v2/postcodes_matcher.cpp \
v2/pre_ranking_info.cpp \
v2/processor_v2.cpp \
v2/rank_table_cache.cpp \

View file

@ -1,72 +0,0 @@
#include "../../testing/testing.hpp"
#include "search/query_params.hpp"
#include "search/v2/postcodes_matcher.hpp"
#include "search/v2/token_slice.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
#include "base/stl_add.hpp"
#include "base/string_utils.hpp"
#include "std/string.hpp"
#include "std/vector.hpp"
using namespace strings;
namespace search
{
namespace v2
{
namespace
{
bool LooksLikePostcode(string const & s, bool checkPrefix)
{
vector<UniString> tokens;
bool const lastTokenIsPrefix =
TokenizeStringAndCheckIfLastTokenIsPrefix(s, tokens, search::Delimiters());
size_t const numTokens = tokens.size();
QueryParams params;
if (checkPrefix && lastTokenIsPrefix)
{
params.m_prefixTokens.push_back(tokens.back());
tokens.pop_back();
}
for (auto const & token : tokens)
{
params.m_tokens.emplace_back();
params.m_tokens.back().push_back(token);
}
return LooksLikePostcode(TokenSlice(params, 0, numTokens));
}
UNIT_TEST(PostcodesMatcher_Smoke)
{
TEST(LooksLikePostcode("141701", false /* checkPrefix */), ());
TEST(LooksLikePostcode("141", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA6 8JP", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA22 9HR", true /* checkPrefix */), ());
TEST(LooksLikePostcode("BA22", true /* checkPrefix */), ());
TEST(LooksLikePostcode("DE56 4FW", true /* checkPrefix */), ());
TEST(LooksLikePostcode("NY 1000", true /* checkPrefix */), ());
TEST(LooksLikePostcode("AZ 85203", true /* checkPrefix */), ());
TEST(LooksLikePostcode("AZ", true /* checkPrefix */), ());
TEST(LooksLikePostcode("803 0271", true /* checkPrefix */), ());
TEST(LooksLikePostcode("803-0271", true /* checkPrefix */), ());
TEST(LooksLikePostcode("〒803-0271", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("1 мая", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("1 мая улица", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("москва", true /* checkPrefix */), ());
TEST(!LooksLikePostcode("39 с 79", true /* checkPrefix */), ());
}
} // namespace
} // namespace v2
} // namespace search

View file

@ -27,7 +27,6 @@ SOURCES += \
latlon_match_test.cpp \
locality_finder_test.cpp \
locality_scorer_test.cpp \
postcodes_matcher_tests.cpp \
query_saver_tests.cpp \
ranking_tests.cpp \
string_intersection_test.cpp \

View file

@ -7,7 +7,6 @@
#include "search/v2/features_filter.hpp"
#include "search/v2/features_layer_matcher.hpp"
#include "search/v2/locality_scorer.hpp"
#include "search/v2/postcodes_matcher.hpp"
#include "search/v2/token_slice.hpp"
#include "indexer/classificator.hpp"
@ -15,6 +14,7 @@
#include "indexer/feature_impl.hpp"
#include "indexer/ftypes_matcher.hpp"
#include "indexer/index.hpp"
#include "indexer/postcodes_matcher.hpp"
#include "indexer/rank_table.hpp"
#include "indexer/search_delimiters.hpp"
#include "indexer/search_string_utils.hpp"
@ -1036,7 +1036,8 @@ void Geocoder::WithPostcodes(TFn && fn)
break;
TokenSlice slice(m_params, startToken, startToken + n);
if (LooksLikePostcode(slice))
auto const isPrefix = startToken + n == m_numTokens;
if (LooksLikePostcode(QuerySlice(slice), isPrefix))
endToken = startToken + n;
}
if (startToken == endToken)

View file

@ -1,15 +0,0 @@
#pragma once
#include "std/cstdint.hpp"
namespace search
{
namespace v2
{
class TokenSlice;
bool LooksLikePostcode(TokenSlice const & slice);
size_t GetMaxNumTokensInPostcode();
} // namespace v2
} // namespace search

View file

@ -2,6 +2,8 @@
#include "search/query_params.hpp"
#include "indexer/string_slice.hpp"
#include "base/assert.hpp"
#include "std/cstdint.hpp"
@ -67,36 +69,21 @@ private:
vector<size_t> m_indexes;
};
class QuerySlice
class QuerySlice : public StringSliceBase
{
public:
using TString = QueryParams::TString;
virtual ~QuerySlice() = default;
virtual TString const & Get(size_t i) const = 0;
virtual size_t Size() const = 0;
virtual bool IsPrefix(size_t i) const = 0;
bool Empty() const { return Size() == 0; }
};
class QuerySliceOnTokens : public QuerySlice
{
public:
QuerySliceOnTokens(TokenSlice const & slice) : m_slice(slice) {}
QuerySlice(TokenSlice const & slice) : m_slice(slice) {}
// QuerySlice overrides:
QueryParams::TString const & Get(size_t i) const override { return m_slice.Get(i).front(); }
size_t Size() const override { return m_slice.Size(); }
bool IsPrefix(size_t i) const override { return m_slice.IsPrefix(i); }
private:
TokenSlice const m_slice;
};
template <typename TCont>
class QuerySliceOnRawStrings : public QuerySlice
class QuerySliceOnRawStrings : public StringSliceBase
{
public:
QuerySliceOnRawStrings(TCont const & tokens, TString const & prefix)
@ -113,12 +100,6 @@ public:
size_t Size() const override { return m_tokens.size() + (m_prefix.empty() ? 0 : 1); }
bool IsPrefix(size_t i) const override
{
ASSERT_LESS(i, Size(), ());
return i == m_tokens.size();
}
private:
TCont const & m_tokens;
TString const & m_prefix;

View file

@ -13,6 +13,7 @@
using std::abs;
using std::isfinite;
using std::log10;
namespace math
{

View file

@ -152,6 +152,9 @@
6758AED31BB4413000C26E27 /* drules_selector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6758AECF1BB4413000C26E27 /* drules_selector.cpp */; };
6758AED41BB4413000C26E27 /* drules_selector.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 6758AED01BB4413000C26E27 /* drules_selector.hpp */; };
67F183731BD4FCF500AB1840 /* map_style.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 67F183721BD4FCF500AB1840 /* map_style.cpp */; };
E906DE381CF44917004C4F5E /* postcodes_matcher_tests.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E906DE371CF44917004C4F5E /* postcodes_matcher_tests.cpp */; };
E906DE3B1CF44934004C4F5E /* postcodes_matcher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = E906DE391CF44934004C4F5E /* postcodes_matcher.cpp */; };
E906DE3C1CF44934004C4F5E /* postcodes_matcher.hpp in Headers */ = {isa = PBXBuildFile; fileRef = E906DE3A1CF44934004C4F5E /* postcodes_matcher.hpp */; };
F6381BF91CD26C9C004CA943 /* new_feature_categories.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F6381BF71CD26C9C004CA943 /* new_feature_categories.cpp */; };
F6381BFA1CD26C9C004CA943 /* new_feature_categories.hpp in Headers */ = {isa = PBXBuildFile; fileRef = F6381BF81CD26C9C004CA943 /* new_feature_categories.hpp */; };
F6DF5F2D1CD0FC9D00A87154 /* categories_index.cpp in Sources */ = {isa = PBXBuildFile; fileRef = F6DF5F2C1CD0FC9D00A87154 /* categories_index.cpp */; };
@ -322,6 +325,9 @@
6758AECF1BB4413000C26E27 /* drules_selector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = drules_selector.cpp; sourceTree = "<group>"; };
6758AED01BB4413000C26E27 /* drules_selector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = drules_selector.hpp; sourceTree = "<group>"; };
67F183721BD4FCF500AB1840 /* map_style.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = map_style.cpp; sourceTree = "<group>"; };
E906DE371CF44917004C4F5E /* postcodes_matcher_tests.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = postcodes_matcher_tests.cpp; sourceTree = "<group>"; };
E906DE391CF44934004C4F5E /* postcodes_matcher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = postcodes_matcher.cpp; sourceTree = "<group>"; };
E906DE3A1CF44934004C4F5E /* postcodes_matcher.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = postcodes_matcher.hpp; sourceTree = "<group>"; };
F6381BF71CD26C9C004CA943 /* new_feature_categories.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = new_feature_categories.cpp; sourceTree = "<group>"; };
F6381BF81CD26C9C004CA943 /* new_feature_categories.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = new_feature_categories.hpp; sourceTree = "<group>"; };
F6DF5F2C1CD0FC9D00A87154 /* categories_index.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = categories_index.cpp; sourceTree = "<group>"; };
@ -360,6 +366,7 @@
670C60F81AB0657700C38A8C /* indexer_tests */ = {
isa = PBXGroup;
children = (
E906DE371CF44917004C4F5E /* postcodes_matcher_tests.cpp */,
56C74C261C749E8100B71B9F /* categories_test.cpp */,
56C74C271C749E8100B71B9F /* drules_selector_parser_test.cpp */,
56C74C281C749E8100B71B9F /* feature_metadata_test.cpp */,
@ -445,6 +452,8 @@
6753409C1A3F53CB00A0A8C3 /* indexer */ = {
isa = PBXGroup;
children = (
E906DE391CF44934004C4F5E /* postcodes_matcher.cpp */,
E906DE3A1CF44934004C4F5E /* postcodes_matcher.hpp */,
F6381BF71CD26C9C004CA943 /* new_feature_categories.cpp */,
F6381BF81CD26C9C004CA943 /* new_feature_categories.hpp */,
F6DF5F301CD0FD9A00A87154 /* categories_index.hpp */,
@ -567,6 +576,7 @@
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
E906DE3C1CF44934004C4F5E /* postcodes_matcher.hpp in Headers */,
6753414E1A3F540F00A0A8C3 /* types_mapping.hpp in Headers */,
6753411F1A3F540F00A0A8C3 /* feature_loader.hpp in Headers */,
675341151A3F540F00A0A8C3 /* feature_covering.hpp in Headers */,
@ -782,6 +792,7 @@
674125131B4C02F100A3E828 /* map_style_reader.cpp in Sources */,
675341141A3F540F00A0A8C3 /* feature_covering.cpp in Sources */,
56C74C1D1C749E4700B71B9F /* categories_holder.cpp in Sources */,
E906DE381CF44917004C4F5E /* postcodes_matcher_tests.cpp in Sources */,
675341371A3F540F00A0A8C3 /* mwm_set.cpp in Sources */,
675341181A3F540F00A0A8C3 /* feature_decl.cpp in Sources */,
6753410B1A3F540F00A0A8C3 /* drawing_rule_def.cpp in Sources */,
@ -791,6 +802,7 @@
675341281A3F540F00A0A8C3 /* ftypes_matcher.cpp in Sources */,
675341101A3F540F00A0A8C3 /* drules_struct.pb.cc in Sources */,
6758AED11BB4413000C26E27 /* drules_selector_parser.cpp in Sources */,
E906DE3B1CF44934004C4F5E /* postcodes_matcher.cpp in Sources */,
6753413B1A3F540F00A0A8C3 /* point_to_int64.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;

View file

@ -9,8 +9,6 @@
/* Begin PBXBuildFile section */
3400A67D1CA27774003DA0EC /* geometry_cache.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 3400A67B1CA27774003DA0EC /* geometry_cache.cpp */; };
3400A67E1CA27774003DA0EC /* geometry_cache.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 3400A67C1CA27774003DA0EC /* geometry_cache.hpp */; };
341F6D891CCE13C3003F4DB2 /* postcodes_matcher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 341F6D851CCE13C2003F4DB2 /* postcodes_matcher.cpp */; };
341F6D8A1CCE13C3003F4DB2 /* postcodes_matcher.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 341F6D861CCE13C2003F4DB2 /* postcodes_matcher.hpp */; };
341F6D8B1CCE13C3003F4DB2 /* token_slice.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 341F6D871CCE13C2003F4DB2 /* token_slice.cpp */; };
341F6D8C1CCE13C3003F4DB2 /* token_slice.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 341F6D881CCE13C2003F4DB2 /* token_slice.hpp */; };
34527C581C89D6240015050E /* pre_ranking_info.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 34527C541C89D6240015050E /* pre_ranking_info.hpp */; };
@ -103,7 +101,6 @@
675346E11A40560D00A0A8C3 /* geometry_utils.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 675346C31A40560D00A0A8C3 /* geometry_utils.hpp */; };
675346E21A40560D00A0A8C3 /* house_detector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 675346C41A40560D00A0A8C3 /* house_detector.cpp */; };
675346E31A40560D00A0A8C3 /* house_detector.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 675346C51A40560D00A0A8C3 /* house_detector.hpp */; };
675346E41A40560D00A0A8C3 /* indexed_value.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 675346C61A40560D00A0A8C3 /* indexed_value.hpp */; };
675346E51A40560D00A0A8C3 /* intermediate_result.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 675346C71A40560D00A0A8C3 /* intermediate_result.cpp */; };
675346E61A40560D00A0A8C3 /* intermediate_result.hpp in Headers */ = {isa = PBXBuildFile; fileRef = 675346C81A40560D00A0A8C3 /* intermediate_result.hpp */; };
675346E71A40560D00A0A8C3 /* keyword_lang_matcher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 675346C91A40560D00A0A8C3 /* keyword_lang_matcher.cpp */; };
@ -146,8 +143,6 @@
/* Begin PBXFileReference section */
3400A67B1CA27774003DA0EC /* geometry_cache.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = geometry_cache.cpp; sourceTree = "<group>"; };
3400A67C1CA27774003DA0EC /* geometry_cache.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = geometry_cache.hpp; sourceTree = "<group>"; };
341F6D851CCE13C2003F4DB2 /* postcodes_matcher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = postcodes_matcher.cpp; sourceTree = "<group>"; };
341F6D861CCE13C2003F4DB2 /* postcodes_matcher.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = postcodes_matcher.hpp; sourceTree = "<group>"; };
341F6D871CCE13C2003F4DB2 /* token_slice.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = token_slice.cpp; sourceTree = "<group>"; };
341F6D881CCE13C2003F4DB2 /* token_slice.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = token_slice.hpp; sourceTree = "<group>"; };
34527C541C89D6240015050E /* pre_ranking_info.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = pre_ranking_info.hpp; sourceTree = "<group>"; };
@ -243,7 +238,6 @@
675346C31A40560D00A0A8C3 /* geometry_utils.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = geometry_utils.hpp; sourceTree = "<group>"; };
675346C41A40560D00A0A8C3 /* house_detector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = house_detector.cpp; sourceTree = "<group>"; };
675346C51A40560D00A0A8C3 /* house_detector.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = house_detector.hpp; sourceTree = "<group>"; };
675346C61A40560D00A0A8C3 /* indexed_value.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = indexed_value.hpp; sourceTree = "<group>"; };
675346C71A40560D00A0A8C3 /* intermediate_result.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = intermediate_result.cpp; sourceTree = "<group>"; };
675346C81A40560D00A0A8C3 /* intermediate_result.hpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = intermediate_result.hpp; sourceTree = "<group>"; };
675346C91A40560D00A0A8C3 /* keyword_lang_matcher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = keyword_lang_matcher.cpp; sourceTree = "<group>"; };
@ -303,8 +297,6 @@
347F333D1C45413C009758CC /* v2 */ = {
isa = PBXGroup;
children = (
341F6D851CCE13C2003F4DB2 /* postcodes_matcher.cpp */,
341F6D861CCE13C2003F4DB2 /* postcodes_matcher.hpp */,
341F6D871CCE13C2003F4DB2 /* token_slice.cpp */,
341F6D881CCE13C2003F4DB2 /* token_slice.hpp */,
3400A67B1CA27774003DA0EC /* geometry_cache.cpp */,
@ -444,7 +436,6 @@
675346C31A40560D00A0A8C3 /* geometry_utils.hpp */,
675346C41A40560D00A0A8C3 /* house_detector.cpp */,
675346C51A40560D00A0A8C3 /* house_detector.hpp */,
675346C61A40560D00A0A8C3 /* indexed_value.hpp */,
675346C71A40560D00A0A8C3 /* intermediate_result.cpp */,
675346C81A40560D00A0A8C3 /* intermediate_result.hpp */,
675346C91A40560D00A0A8C3 /* keyword_lang_matcher.cpp */,
@ -524,10 +515,8 @@
347F331A1C4540A8009758CC /* dummy_rank_table.hpp in Headers */,
347F335F1C45413C009758CC /* geocoder.hpp in Headers */,
34E44E711C4922E40057A5C4 /* intersection_result.hpp in Headers */,
341F6D8A1CCE13C3003F4DB2 /* postcodes_matcher.hpp in Headers */,
347F33571C45413C009758CC /* features_filter.hpp in Headers */,
345FBED01C8045EB00831ED6 /* ranking_utils.hpp in Headers */,
675346E41A40560D00A0A8C3 /* indexed_value.hpp in Headers */,
341F6D8C1CCE13C3003F4DB2 /* token_slice.hpp in Headers */,
347F33651C45413C009758CC /* mwm_context.hpp in Headers */,
675346DC1A40560D00A0A8C3 /* algos.hpp in Headers */,
@ -649,7 +638,6 @@
341F6D8B1CCE13C3003F4DB2 /* token_slice.cpp in Sources */,
34E44E741C4923100057A5C4 /* cbv_ptr.cpp in Sources */,
347F336C1C45413C009758CC /* street_vicinity_loader.cpp in Sources */,
341F6D891CCE13C3003F4DB2 /* postcodes_matcher.cpp in Sources */,
347F33201C4540A8009758CC /* projection_on_street.cpp in Sources */,
347F33601C45413C009758CC /* house_numbers_matcher.cpp in Sources */,
347F33191C4540A8009758CC /* dummy_rank_table.cpp in Sources */,