Review fixes.

This commit is contained in:
Yuri Gorshenin 2016-05-19 14:26:44 +03:00
parent 04e5c0eb18
commit ddbedfde55
9 changed files with 134 additions and 84 deletions

View file

@ -73,10 +73,4 @@ impl::Comparer<false, T, C> CompareBy(T (C::*p)() const)
{
return impl::Comparer<false, T, C>(p);
}
template <typename T>
struct Id
{
T const & operator()(T const & t) const { return t; }
};
} // namespace my

View file

@ -33,7 +33,7 @@ namespace
class SearchQueryV2Test : public SearchTest
{
public:
unique_ptr<TestSearchRequest> DoRequest(string const & query)
unique_ptr<TestSearchRequest> MakeRequest(string const & query)
{
SearchParams params;
params.m_query = query;
@ -332,7 +332,7 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
{
auto request = DoRequest("golden gate bridge ");
auto request = MakeRequest("golden gate bridge ");
TRules rules = {ExactMatch(wonderlandId, goldenGateBridge),
ExactMatch(wonderlandId, goldenGateStreet)};
@ -342,15 +342,14 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestRankingInfo)
{
auto const & info = result.GetRankingInfo();
TEST_EQUAL(NAME_SCORE_FULL_MATCH, info.m_nameScore, (result));
TEST(!info.m_matchByTrueCats, (result));
TEST(!info.m_matchByFalseCats, (result));
TEST(my::AlmostEqualAbs(1.0, info.m_nameCoverage, 1e-6), (info.m_nameCoverage));
TEST(!info.m_pureCats, (result));
TEST(!info.m_falseCats, (result));
}
}
// This test is quite important and must always pass.
{
auto request = DoRequest("cafe лермонтов");
auto request = MakeRequest("cafe лермонтов");
auto const & results = request->Results();
TRules rules{ExactMatch(wonderlandId, cafe1), ExactMatch(wonderlandId, cafe2),
@ -475,6 +474,9 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
TestPOI named(m2::PointD(0.0001, 0.0001), "ATM", "en");
named.SetTypes({{"amenity", "atm"}});
TestPOI busStop(m2::PointD(0.00005, 0.0005), "ATM Bus Stop", "en");
busStop.SetTypes({{"highway", "bus_stop"}});
BuildWorld([&](TestMwmBuilder & builder)
{
builder.Add(sanFrancisco);
@ -483,24 +485,42 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
{
builder.Add(named);
builder.Add(noname);
builder.Add(busStop);
});
SetViewport(m2::RectD(m2::PointD(-0.5, -0.5), m2::PointD(0.5, 0.5)));
TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named)};
{
auto request = DoRequest("atm");
TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named),
ExactMatch(wonderlandId, busStop)};
auto request = MakeRequest("atm");
TEST(MatchResults(rules, request->Results()), ());
for (auto const & result : request->Results())
{
Index::FeaturesLoaderGuard loader(m_engine, wonderlandId);
FeatureType ft;
loader.GetFeatureByIndex(result.GetFeatureID().m_index, ft);
auto const & info = result.GetRankingInfo();
TEST(info.m_matchByTrueCats, (result));
TEST(!info.m_matchByFalseCats, (result));
if (busStop.Matches(ft))
{
TEST(!info.m_pureCats, (result));
TEST(info.m_falseCats, (result));
}
else
{
TEST(info.m_pureCats, (result));
TEST(!info.m_falseCats, (result));
}
}
}
{
auto request = DoRequest("#atm");
TRules const rules = {ExactMatch(wonderlandId, noname), ExactMatch(wonderlandId, named)};
auto request = MakeRequest("#atm");
TEST(MatchResults(rules, request->Results()), ());
for (auto const & result : request->Results())
@ -510,9 +530,6 @@ UNIT_CLASS_TEST(SearchQueryV2Test, TestCategories)
// Token with a hashtag should not participate in name-score
// calculations.
TEST_EQUAL(NAME_SCORE_ZERO, info.m_nameScore, (result));
// TODO (@y): fix this. Name coverage calculations are flawed.
// TEST(my::AlmostEqualAbs(0.0, info.m_nameCoverage, 1e-6), (info.m_nameCoverage));
}
}

View file

@ -3,25 +3,77 @@
# Downloads all maps necessary for learning to rank to the current
# directory.
case $# in
1) VERSION="$1"
;;
*) echo "Usage: $0 version" 2>&1
exit -1
;;
esac
ALL=
VERSION=
BASE="http://direct.mapswithme.com/direct"
display_usage() {
echo "Usage: $0 -v [version] -a -h"
echo " -v version of maps to download"
echo " -a download all maps of the specified version"
echo " -h display this message"
}
while getopts ":av:h" opt
do
case "$opt" in
a) ALL=1
;;
v) VERSION="$OPTARG"
;;
h) display_usage
exit -1
;;
\?) echo "Invalid option: -$OPTARG" 1>&2
;;
:) echo "Option -$OPTARG requires an argument" 1>&2
;;
esac
done
if [ -z "$VERSION" ]
then
echo "Version of maps is not specified." 1>&2
exit -1
fi
if ! curl "$BASE/" 2>/dev/null |
sed -n 's/^.*href="\(.*\)\/".*$/\1/p' |
grep -v "^../$" | grep -q "$VERSION"
then
echo "Invalid version: $VERSION" 1>&2
exit -1
fi
BASE="http://direct.mapswithme.com/direct/$VERSION/"
NAMES=("Australia_Brisbane.mwm"
"Belarus_Minsk*.mwm"
"Germany_*.mwm"
"Russia_*.mwm"
"UK_England_*.mwm"
"US_California_*.mwm" "US_Maryland_*.mwm")
"US_California_*.mwm"
"US_Maryland_*.mwm")
set -e
set -x
for name in ${NAMES[@]}
do
wget -r -np -nd -A "$name" "$BASE"
done
DIR="$BASE/$VERSION"
if [ "$ALL" ]
then
echo "Downloading all maps..."
files=$(curl "$DIR/" 2>/dev/null | sed -n 's/^.*href="\(.*\.mwm\)".*$/\1/p')
set -e
set -x
for file in $files
do
wget -np -nd "$DIR/$file"
done
else
echo "Downloading maps..."
set -e
set -x
for name in ${NAMES[@]}
do
wget -r -np -nd -A "$name" "$DIR/"
done
fi

View file

@ -143,14 +143,15 @@ void DisplayStats(ostream & os, vector<Sample> const & samples, vector<Stats> co
ASSERT_EQUAL(stats.size(), n, ());
size_t numWarnings = 0;
for (auto const & stat : stats) {
for (auto const & stat : stats)
{
if (!stat.m_notFound.empty())
++numWarnings;
}
if (numWarnings == 0)
{
os << "All " << stats.size() << " queries OK." << endl;
os << "All " << stats.size() << " queries are OK." << endl;
return;
}

View file

@ -38,7 +38,7 @@ def normalize_data(data):
data['Rank'] = data['Rank'].apply(lambda v: v / MAX_RANK)
data['Relevance'] = data['Relevance'].apply(lambda v: RELEVANCES[v])
cats = data['MatchByTrueCats'].combine(data['MatchByFalseCats'], max)
cats = data['PureCats'].combine(data['FalseCats'], max)
# Full prefix match is unified with a full match as these features
# are collinear. But we need both of them as they're also used in
@ -49,8 +49,6 @@ def normalize_data(data):
# the features too.
data['NameScore'] = data['NameScore'].combine(cats, transform_name_score)
data['NameCoverage'] = data['NameCoverage'].combine(cats, lambda v, c: v if c == 0 else 0.0)
# Adds dummy variables to data for NAME_SCORES.
for ns in NAME_SCORES:
data[ns] = data['NameScore'].apply(lambda v: int(ns == v))

View file

@ -190,20 +190,11 @@ void UpdateNameScore(string const & name, TSlice const & slice, v2::NameScore &
template <typename TSlice>
void UpdateNameScore(vector<strings::UniString> const & tokens, TSlice const & slice,
v2::NameScore & bestScore, double & bestCoverage)
v2::NameScore & bestScore)
{
auto const score = v2::GetNameScore(tokens, slice);
auto const coverage =
tokens.empty() ? 0 : static_cast<double>(slice.Size()) / static_cast<double>(tokens.size());
if (score > bestScore)
{
bestScore = score;
bestCoverage = coverage;
}
else if (score == bestScore && coverage > bestCoverage)
{
bestCoverage = coverage;
}
}
inline bool IsHashtagged(strings::UniString const & s) { return !s.empty() && s[0] == '#'; }
@ -663,8 +654,8 @@ class PreResult2Maker
vector<strings::UniString> tokens;
SplitUniString(NormalizeAndSimplifyString(name), MakeBackInsertFunctor(tokens), Delimiters());
UpdateNameScore(tokens, slice, info.m_nameScore, info.m_nameCoverage);
UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore, info.m_nameCoverage);
UpdateNameScore(tokens, slice, info.m_nameScore);
UpdateNameScore(tokens, sliceNoCategories, info.m_nameScore);
}
if (info.m_searchType == v2::SearchModel::SEARCH_TYPE_BUILDING)
@ -679,16 +670,14 @@ class PreResult2Maker
++matched[i].first;
});
info.m_matchByTrueCats =
all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
{
return m.first != 0;
});
info.m_matchByFalseCats =
all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
{
return m.first == 0 && m.second != 0;
});
info.m_pureCats = all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
{
return m.first != 0;
});
info.m_falseCats = all_of(matched.begin(), matched.end(), [](pair<size_t, size_t> const & m)
{
return m.first == 0 && m.second != 0;
});
}
uint8_t NormalizeRank(uint8_t rank, v2::SearchModel::SearchType type, m2::PointD const & center,

View file

@ -1557,12 +1557,12 @@ SearchModel::SearchType Geocoder::GetSearchTypeInGeocoding(uint32_t featureId)
bool Geocoder::AllTokensUsed() const
{
return all_of(m_usedTokens.begin(), m_usedTokens.end(), my::Id<bool>());
return all_of(m_usedTokens.begin(), m_usedTokens.end(), IdFunctor());
}
bool Geocoder::HasUsedTokensInRange(size_t from, size_t to) const
{
return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, my::Id<bool>());
return any_of(m_usedTokens.begin() + from, m_usedTokens.begin() + to, IdFunctor());
}
size_t Geocoder::NumUnusedTokensGroups() const

View file

@ -47,10 +47,9 @@ void RankingInfo::PrintCSVHeader(ostream & os)
os << "DistanceToPivot"
<< ",Rank"
<< ",NameScore"
<< ",NameCoverage"
<< ",SearchType"
<< ",MatchByTrueCats"
<< ",MatchByFalseCats";
<< ",PureCats"
<< ",FalseCats";
}
string DebugPrint(RankingInfo const & info)
@ -60,10 +59,9 @@ string DebugPrint(RankingInfo const & info)
os << "m_distanceToPivot:" << info.m_distanceToPivot << ",";
os << "m_rank:" << static_cast<int>(info.m_rank) << ",";
os << "m_nameScore:" << DebugPrint(info.m_nameScore) << ",";
os << "m_nameCoverage:" << info.m_nameCoverage << ",";
os << "m_searchType:" << DebugPrint(info.m_searchType) << ",";
os << "m_matchByTrueCats:" << info.m_matchByTrueCats << ",";
os << "m_matchByFalseCats:" << info.m_matchByFalseCats;
os << "m_pureCats:" << info.m_pureCats << ",";
os << "m_falseCats:" << info.m_falseCats;
os << "]";
return os.str();
}
@ -72,8 +70,7 @@ void RankingInfo::ToCSV(ostream & os) const
{
os << fixed;
os << m_distanceToPivot << "," << static_cast<int>(m_rank) << "," << DebugPrint(m_nameScore)
<< "," << m_nameCoverage << "," << DebugPrint(m_searchType) << "," << m_matchByTrueCats << ","
<< m_matchByFalseCats;
<< "," << DebugPrint(m_searchType) << "," << m_pureCats << "," << m_falseCats;
}
double RankingInfo::GetLinearModelRank() const
@ -86,11 +83,15 @@ double RankingInfo::GetLinearModelRank() const
double const rank = static_cast<double>(m_rank) / numeric_limits<uint8_t>::max();
auto nameScore = m_nameScore;
auto nameCoverage = m_nameCoverage;
if (m_matchByTrueCats || m_matchByFalseCats)
if (m_pureCats || m_falseCats)
{
// If the feature was matched only by categorial tokens, it's
// better for ranking to set name score to zero. For example,
// when we're looking for a "cafe", cafes "Cafe Pushkin" and
// "Lermontov" both match to the request, but must be ranked in
// accordance to their distances to the user position or viewport,
// in spite of "Cafe Pushkin" has a non-zero name rank.
nameScore = NAME_SCORE_ZERO;
nameCoverage = 0.0;
}
return kDistanceToPivot * distanceToPivot + kRank * rank + kNameScore[nameScore] +

View file

@ -24,19 +24,17 @@ struct RankingInfo
// Score for the feature's name.
NameScore m_nameScore = NAME_SCORE_ZERO;
// Fraction of tokens from the query matched to a feature name.
double m_nameCoverage = 0;
// Search type for the feature.
SearchModel::SearchType m_searchType = SearchModel::SEARCH_TYPE_COUNT;
// True if the feature was matched only by tokens corresponding to
// it's categories.
bool m_matchByTrueCats = false;
// True if all of the tokens that the feature was matched by
// correspond to this feature's categories.
bool m_pureCats = false;
// True if the feature was matched only by tokens don't
// corresponding to it's categories.
bool m_matchByFalseCats = false;
// True if none of the tokens that the feature was matched by
// corresponds to this feature's categories although all of the
// tokens are categorial ones.
bool m_falseCats = false;
static void PrintCSVHeader(ostream & os);