forked from organicmaps/organicmaps
Review fixes.
This commit is contained in:
parent
04e5c0eb18
commit
ddbedfde55
9 changed files with 134 additions and 84 deletions
|
@ -73,10 +73,4 @@ impl::Comparer<false, T, C> CompareBy(T (C::*p)() const)
|
|||
{
|
||||
return impl::Comparer<false, T, C>(p);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct Id
|
||||
{
|
||||
T const & operator()(T const & t) const { return t; }
|
||||
};
|
||||
} // namespace my
|
||||
|
|
|
@ -33,7 +33,7 @@ namespace
|
|||
class SearchQueryV2Test : public SearchTest
|
||||
{
|
||||
public:
|
||||
unique_ptr<TestSearchRequest> DoRequest(string const & query)
|
||||
unique_ptr<TestSearchRequest> MakeRequest(string const & query)
|
||||
{
|
||||
SearchParams params;
|
||||
params.m_query = query;
|
||||
|
@ -332,7 +332,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
|
|||
|
||||
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
|
||||
{
|
||||
auto request = DoRequest("golden gate bridge ");
|
||||
auto request = MakeRequest("golden gate bridge ");
|
||||
|
||||
TRules rules = {ExactMatch(wonderlandId, goldenGateBridge),
|
||||
ExactMatch(wonderlandId, goldenGateStreet)};
|
||||
|
@ -342,15 +342,14 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
|
|||
{
|
||||
auto const & info = result.GetRankingInfo();
|
||||
TEST_EQUAL(NAME_SCORE_FULL_MATCH, info.m_nameScore, (result));
|
||||
TEST(!info.m_matchByTrueCats, (result));
|
||||
TEST(!info.m_matchByFalseCats, (result));
|
||||
TEST(my::AlmostEqualAbs(1.0, info.m_nameCoverage, 1e-6), (info.m_nameCoverage));
|
||||
TEST(!info.m_pureCats, (result));
|
||||
TEST(!info.m_falseCats, (result));
|
||||
}
|
||||
}
|
||||
|
||||
// This test is quite important and must always pass.
|
||||
{
|
||||
auto request = DoRequest("cafe лермонтов");
|
||||
auto request = MakeRequest("cafe лермонтов");
|
||||
auto const & results = request->Results();
|
||||
|
||||
TRules rules{ExactMatch(wonderlandId, cafe1), ExactMatch(wonderlandId, cafe2),
|
||||
|
@ -475,6 +474,9 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
|
|||
TestPOI named(m2::PointD(0.0001, 0.0001), "ATM", "en");
|
||||
named.SetTypes({{"amenity", "atm"}});
|
||||
|
||||
TestPOI busStop(m2::PointD(0.00005, 0.0005), "ATM Bus Stop", "en");
|
||||
busStop.SetTypes({{"highway", "bus_stop"}});
|
||||
|
||||
BuildWorld([&](TestMwmBuilder & builder)
|
||||
{
|
||||
builder.Add(sanFrancisco);
|
||||
|
@ -483,24 +485,42 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
|
|||
{
|
||||
builder.Add(named);
|
||||
builder.Add(noname);
|
||||
builder.Add(busStop);
|
||||
});
|
||||
|
||||
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
|
||||
TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named)};
|
||||
|
||||
{
|
||||
auto request = DoRequest("atm");
|
||||
TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named),
|
||||
ExactMatch(wonderlandId, busStop)};
|
||||
|
||||
auto request = MakeRequest("atm");
|
||||
TEST(MatchResults(rules, request->Results()), ());
|
||||
for (auto const & result : request->Results())
|
||||
{
|
||||
Index::FeaturesLoaderGuard loader(m_engine, wonderlandId);
|
||||
FeatureType ft;
|
||||
loader.GetFeatureByIndex(result.GetFeatureID().m_index, ft);
|
||||
|
||||
auto const & info = result.GetRankingInfo();
|
||||
TEST(info.m_matchByTrueCats, (result));
|
||||
TEST(!info.m_matchByFalseCats, (result));
|
||||
|
||||
if (busStop.Matches(ft))
|
||||
{
|
||||
TEST(!info.m_pureCats, (result));
|
||||
TEST(info.m_falseCats, (result));
|
||||
}
|
||||
else
|
||||
{
|
||||
TEST(info.m_pureCats, (result));
|
||||
TEST(!info.m_falseCats, (result));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
auto request = DoRequest("#atm");
|
||||
TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named)};
|
||||
|
||||
auto request = MakeRequest("#atm");
|
||||
|
||||
TEST(MatchResults(rules, request->Results()), ());
|
||||
for (auto const & result : request->Results())
|
||||
|
@ -510,9 +530,6 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
|
|||
// Token with a hashtag should not participate in name-score
|
||||
// calculations.
|
||||
TEST_EQUAL(NAME_SCORE_ZERO, info.m_nameScore, (result));
|
||||
|
||||
// TODO (@y): fix this. Name coverage calculations are flawed.
|
||||
// TEST(my::AlmostEqualAbs(0.0, info.m_nameCoverage, 1e-6), (info.m_nameCoverage));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -3,25 +3,77 @@
|
|||
# Downloads all maps necessary for learning to rank to the current
|
||||
# directory.
|
||||
|
||||
case $# in
|
||||
1) VERSION="$1"
|
||||
;;
|
||||
*) echo "Usage: $0 version" 2>&1
|
||||
exit -1
|
||||
;;
|
||||
esac
|
||||
ALL=
|
||||
VERSION=
|
||||
BASE="http://direct.mapswithme.com/direct"
|
||||
|
||||
display_usage() {
|
||||
echo "Usage: $0 -v [version] -a -h"
|
||||
echo " -v version of maps to download"
|
||||
echo " -a download all maps of the specified version"
|
||||
echo " -h display this message"
|
||||
}
|
||||
|
||||
while getopts ":av:h" opt
|
||||
do
|
||||
case "$opt" in
|
||||
a) ALL=1
|
||||
;;
|
||||
v) VERSION="$OPTARG"
|
||||
;;
|
||||
h) display_usage
|
||||
exit -1
|
||||
;;
|
||||
\?) echo "Invalid option: -$OPTARG" 1>&2
|
||||
;;
|
||||
:) echo "Option -$OPTARG requires an argument" 1>&2
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z "$VERSION" ]
|
||||
then
|
||||
echo "Version of maps is not specified." 1>&2
|
||||
exit -1
|
||||
fi
|
||||
|
||||
if ! curl "$BASE/" 2>/dev/null |
|
||||
sed -n 's/^.*href="\(.*\)\/".*$/\1/p' |
|
||||
grep -v "^../$" | grep -q "$VERSION"
|
||||
then
|
||||
echo "Invalid version: $VERSION" 1>&2
|
||||
exit -1
|
||||
fi
|
||||
|
||||
BASE="http://direct.mapswithme.com/direct/$VERSION/"
|
||||
NAMES=("Australia_Brisbane.mwm"
|
||||
"Belarus_Minsk*.mwm"
|
||||
"Germany_*.mwm"
|
||||
"Russia_*.mwm"
|
||||
"UK_England_*.mwm"
|
||||
"US_California_*.mwm" "US_Maryland_*.mwm")
|
||||
"US_California_*.mwm"
|
||||
"US_Maryland_*.mwm")
|
||||
|
||||
set -e
|
||||
set -x
|
||||
for name in ${NAMES[@]}
|
||||
do
|
||||
wget -r -np -nd -A "$name" "$BASE"
|
||||
done
|
||||
DIR="$BASE/$VERSION"
|
||||
|
||||
if [ "$ALL" ]
|
||||
then
|
||||
echo "Downloading all maps..."
|
||||
|
||||
files=$(curl "$DIR/" 2>/dev/null | sed -n 's/^.*href="\(.*\.mwm\)".*$/\1/p')
|
||||
|
||||
set -e
|
||||
set -x
|
||||
for file in $files
|
||||
do
|
||||
wget -np -nd "$DIR/$file"
|
||||
done
|
||||
else
|
||||
echo "Downloading maps..."
|
||||
|
||||
set -e
|
||||
set -x
|
||||
for name in ${NAMES[@]}
|
||||
do
|
||||
wget -r -np -nd -A "$name" "$DIR/"
|
||||
done
|
||||
fi
|
||||
|
|
|
@ -143,14 +143,15 @@ void DisplayStats(ostream & os, vector<Sample> const & samples, vector<Stats> co
|
|||
ASSERT_EQUAL(stats.size(), n, ());
|
||||
|
||||
size_t numWarnings = 0;
|
||||
for (auto const & stat : stats) {
|
||||
for (auto const & stat : stats)
|
||||
{
|
||||
if (!stat.m_notFound.empty())
|
||||
++numWarnings;
|
||||
}
|
||||
|
||||
if (numWarnings == 0)
|
||||
{
|
||||
os << "All " << stats.size() << " queries OK." << endl;
|
||||
os << "All " << stats.size() << " queries are OK." << endl;
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ def normalize_data(data):
|
|||
data['Rank'] = data['Rank'].apply(lambda v: v / MAX_RANK)
|
||||
data['Relevance'] = data['Relevance'].apply(lambda v: RELEVANCES[v])
|
||||
|
||||
cats = data['MatchByTrueCats'].combine(data['MatchByFalseCats'], max)
|
||||
cats = data['PureCats'].combine(data['FalseCats'], max)
|
||||
|
||||
# Full prefix match is unified with a full match as these features
|
||||
# are collinear. But we need both of them as they're also used in
|
||||
|
@ -49,8 +49,6 @@ def normalize_data(data):
|
|||
# the features too.
|
||||
data['NameScore'] = data['NameScore'].combine(cats, transform_name_score)
|
||||
|
||||
data['NameCoverage'] = data['NameCoverage'].combine(cats, lambda v, c: v if c == 0 else 0.0)
|
||||
|
||||
# Adds dummy variables to data for NAME_SCORES.
|
||||
for ns in NAME_SCORES:
|
||||
data[ns] = data['NameScore'].apply(lambda v: int(ns == v))
|
||||
|
|
|
@ -190,20 +190,11 @@ void UpdateNameScore(string const & name, TSlice const & slice, v2::NameScore &
|
|||
|
||||
template <typename TSlice>
|
||||
void UpdateNameScore(vector<strings::UniString> const & tokens, TSlice const & slice,
|
||||
v2::NameScore & bestScore, double & bestCoverage)
|
||||
v2::NameScore & bestScore)
|
||||
{
|
||||
auto const score = v2::GetNameScore(tokens, slice);
|
||||
auto const coverage =
|
||||
tokens.empty() ? 0 : static_cast<double>(slice.Size()) / static_cast<double>(tokens.size());
|
||||
if (score > bestScore)
|
||||
{
|
||||
bestScore = score;
|
||||
bestCoverage = coverage;
|
||||
}
|
||||
else if (score == bestScore && coverage > bestCoverage)
|
||||
{
|
||||
bestCoverage = coverage;
|
||||
}
|
||||
}
|
||||
|
||||
inline bool IsHashtagged(strings::UniString const & s) { return !s.empty() && s[0] == '#'; }
|
||||
|
@ -663,8 +654,8 @@ class PreResult2Maker
|
|||
vector<strings::UniString> tokens;
|
||||
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
|
||||
|
||||
UpdateNameScore(tokens, slice, info.m_nameScore, info.m_nameCoverage);
|
||||
UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore, info.m_nameCoverage);
|
||||
UpdateNameScore(tokens, slice, info.m_nameScore);
|
||||
UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore);
|
||||
}
|
||||
|
||||
if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING)
|
||||
|
@ -679,16 +670,14 @@ class PreResult2Maker
|
|||
++matched[i].first;
|
||||
});
|
||||
|
||||
info.m_matchByTrueCats =
|
||||
all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
|
||||
{
|
||||
return m.first != 0;
|
||||
});
|
||||
info.m_matchByFalseCats =
|
||||
all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
|
||||
{
|
||||
return m.first == 0 && m.second != 0;
|
||||
});
|
||||
info.m_pureCats = all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
|
||||
{
|
||||
return m.first != 0;
|
||||
});
|
||||
info.m_falseCats = all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
|
||||
{
|
||||
return m.first == 0 && m.second != 0;
|
||||
});
|
||||
}
|
||||
|
||||
uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center,
|
||||
|
|
|
@ -1557,12 +1557,12 @@ SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId)
|
|||
|
||||
bool Geocoder::AllTokensUsed() const
|
||||
{
|
||||
return all_of(m_usedTokens.begin(), m_usedTokens.end(), my::Id<bool>());
|
||||
return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
|
||||
}
|
||||
|
||||
bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const
|
||||
{
|
||||
return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, my::Id<bool>());
|
||||
return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor());
|
||||
}
|
||||
|
||||
size_t Geocoder::NumUnusedTokensGroups() const
|
||||
|
|
|
@ -47,10 +47,9 @@ void RankingInfo::PrintCSVHeader(ostream & os)
|
|||
os << "DistanceToPivot"
|
||||
<< ",Rank"
|
||||
<< ",NameScore"
|
||||
<< ",NameCoverage"
|
||||
<< ",SearchType"
|
||||
<< ",MatchByTrueCats"
|
||||
<< ",MatchByFalseCats";
|
||||
<< ",PureCats"
|
||||
<< ",FalseCats";
|
||||
}
|
||||
|
||||
string DebugPrint(RankingInfo const & info)
|
||||
|
@ -60,10 +59,9 @@ string DebugPrint(RankingInfo const & info)
|
|||
os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
|
||||
os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
|
||||
os << "m_nameScore:" << DebugPrint(info.m_nameScore) << ",";
|
||||
os << "m_nameCoverage:" << info.m_nameCoverage << ",";
|
||||
os << "m_searchType:" << DebugPrint(info.m_searchType) << ",";
|
||||
os << "m_matchByTrueCats:" << info.m_matchByTrueCats << ",";
|
||||
os << "m_matchByFalseCats:" << info.m_matchByFalseCats;
|
||||
os << "m_pureCats:" << info.m_pureCats << ",";
|
||||
os << "m_falseCats:" << info.m_falseCats;
|
||||
os << "]";
|
||||
return os.str();
|
||||
}
|
||||
|
@ -72,8 +70,7 @@ void RankingInfo::ToCSV(ostream & os) const
|
|||
{
|
||||
os << fixed;
|
||||
os << m_distanceToPivot << "," << static_cast<int>(m_rank) << "," << DebugPrint(m_nameScore)
|
||||
<< "," << m_nameCoverage << "," << DebugPrint(m_searchType) << "," << m_matchByTrueCats << ","
|
||||
<< m_matchByFalseCats;
|
||||
<< "," << DebugPrint(m_searchType) << "," << m_pureCats << "," << m_falseCats;
|
||||
}
|
||||
|
||||
double RankingInfo::GetLinearModelRank() const
|
||||
|
@ -86,11 +83,15 @@ double RankingInfo::GetLinearModelRank() const
|
|||
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
|
||||
|
||||
auto nameScore = m_nameScore;
|
||||
auto nameCoverage = m_nameCoverage;
|
||||
if (m_matchByTrueCats || m_matchByFalseCats)
|
||||
if (m_pureCats || m_falseCats)
|
||||
{
|
||||
// If the feature was matched only by categorial tokens, it's
|
||||
// better for ranking to set name score to zero. For example,
|
||||
// when we're looking for a "cafe", cafes "Cafe Pushkin" and
|
||||
// "Lermontov" both match to the request, but must be ranked in
|
||||
// accordance to their distances to the user position or viewport,
|
||||
// in spite of "Cafe Pushkin" has a non-zero name rank.
|
||||
nameScore = NAME_SCORE_ZERO;
|
||||
nameCoverage = 0.0;
|
||||
}
|
||||
|
||||
return kDistanceToPivot * distanceToPivot + kRank * rank + kNameScore[nameScore] +
|
||||
|
|
|
@ -24,19 +24,17 @@ struct RankingInfo
|
|||
// Score for the feature's name.
|
||||
NameScore m_nameScore = NAME_SCORE_ZERO;
|
||||
|
||||
// Fraction of tokens from the query matched to a feature name.
|
||||
double m_nameCoverage = 0;
|
||||
|
||||
// Search type for the feature.
|
||||
SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT;
|
||||
|
||||
// True if the feature was matched only by tokens corresponding to
|
||||
// it's categories.
|
||||
bool m_matchByTrueCats = false;
|
||||
// True if all of the tokens that the feature was matched by
|
||||
// correspond to this feature's categories.
|
||||
bool m_pureCats = false;
|
||||
|
||||
// True if the feature was matched only by tokens don't
|
||||
// corresponding to it's categories.
|
||||
bool m_matchByFalseCats = false;
|
||||
// True if none of the tokens that the feature was matched by
|
||||
// corresponds to this feature's categories although all of the
|
||||
// tokens are categorial ones.
|
||||
bool m_falseCats = false;
|
||||
|
||||
static void PrintCSVHeader(ostream & os);
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue