From 083eeca32328b117fbb970d214a39bf24b3f9f78 Mon Sep 17 00:00:00 2001 From: Yuri Gorshenin Date: Mon, 28 Mar 2016 18:26:15 +0300 Subject: [PATCH] [search][quality] Fixed IR SVM implementation. --- search/search_quality/scoring_model.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/search/search_quality/scoring_model.py b/search/search_quality/scoring_model.py index 7b22a9bfda..9c5e8d394c 100755 --- a/search/search_quality/scoring_model.py +++ b/search/search_quality/scoring_model.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd import sys -FEATURES = ['DistanceToPivot', 'Rank', 'NameScore', 'SearchType'] +FEATURES = ['DistanceToPivot', 'Rank', 'NameScore', 'NameCoverage', 'SearchType'] DISTANCE_WINDOW = 1e9 MAX_RANK = 255 @@ -122,11 +122,18 @@ def transform_data(data): n, total = len(group), 0 for _, (i, j) in enumerate(itertools.combinations(range(n), 2)): - y = np.sign(relevances.iloc[j] - relevances.iloc[i]) + dr = relevances.iloc[j] - relevances.iloc[i] + y = np.sign(dr) if y == 0: continue x = np.array(features.iloc[j]) - np.array(features.iloc[i]) + + # Need to multiply x by average drop in NDCG when i-th and + # j-th are exchanged. + x *= abs(dr * (1 / log(j + 2, 2) - 1 / log(i + 2, 2))) + + # This is needed to prevent disbalance in classes sizes. if y != k: x = np.negative(x) y = -y