forked from organicmaps/organicmaps
[search] Fixed search quality scripts.
This commit is contained in:
parent
bea4069067
commit
4212a0e4a1
3 changed files with 43 additions and 13 deletions
|
@ -41,10 +41,17 @@ exec /usr/local/bin/sbcl --noinform --quit --load $0 --end-toplevel-options "$@"
|
|||
(maxx :initarg :maxx)
|
||||
(maxy :initarg :maxy)))
|
||||
|
||||
(defun position-x-y (x y)
|
||||
(assert (and (>= x *minx*) (<= x *maxx*)))
|
||||
(assert (and (>= y *miny*) (<= y *maxy*)))
|
||||
(make-instance 'pos :x x :y y))
|
||||
|
||||
(defun position-lat-lon (lat lon)
|
||||
(make-instance 'pos :x (lon-to-x lon) :y (lat-to-y lat)))
|
||||
(position-x-y (lon-to-x lon) (lat-to-y lat)))
|
||||
|
||||
(defun viewport (&key minx miny maxx maxy)
|
||||
(assert (<= minx maxx))
|
||||
(assert (<= miny maxy))
|
||||
(make-instance 'viewport :minx minx :maxx maxx :miny miny :maxy maxy))
|
||||
|
||||
(defclass result ()
|
||||
|
|
|
@ -38,23 +38,23 @@ def normalize_data(data):
|
|||
data['Relevance'] = data['Relevance'].apply(lambda r: RELEVANCES[r])
|
||||
|
||||
|
||||
def compute_ndcg(scores):
|
||||
def compute_ndcg(relevances):
|
||||
"""
|
||||
Computes NDCG (Normalized Discounted Cumulative Gain) for a given
|
||||
array of scores.
|
||||
"""
|
||||
|
||||
scores_summary = collections.defaultdict(int)
|
||||
relevances_summary = collections.defaultdict(int)
|
||||
|
||||
dcg = 0
|
||||
for i, score in enumerate(scores):
|
||||
dcg += score / log(2 + i, 2)
|
||||
scores_summary[score] += 1
|
||||
for i, relevance in enumerate(relevances):
|
||||
dcg += relevance / log(2 + i, 2)
|
||||
relevances_summary[relevance] += 1
|
||||
|
||||
dcg_norm, i = 0, 0
|
||||
for score in sorted(scores_summary.keys(), reverse=True):
|
||||
for _ in range(scores_summary[score]):
|
||||
dcg_norm += score / log(2 + i, 2)
|
||||
for relevance in sorted(relevances_summary.keys(), reverse=True):
|
||||
for _ in range(relevances_summary[relevance]):
|
||||
dcg_norm += relevance / log(2 + i, 2)
|
||||
i += 1
|
||||
|
||||
if dcg_norm == 0:
|
||||
|
@ -62,6 +62,24 @@ def compute_ndcg(scores):
|
|||
return dcg / dcg_norm
|
||||
|
||||
|
||||
def compute_ndcg_without_w(data):
|
||||
"""
|
||||
Computes NDCG (Normalized Discounted Cumulative Gain) for a given
|
||||
data. Returns an array of ndcg scores in the shape [num groups of
|
||||
features].
|
||||
"""
|
||||
|
||||
grouped = data.groupby(data['SampleId'], sort=False).groups
|
||||
|
||||
ndcgs = []
|
||||
for id in grouped:
|
||||
indices = grouped[id]
|
||||
relevances = np.array(data.ix[indices]['Relevance'])
|
||||
ndcgs.append(compute_ndcg(relevances))
|
||||
|
||||
return np.array(ndcgs)
|
||||
|
||||
|
||||
def compute_ndcg_for_w(data, w):
|
||||
"""
|
||||
Computes NDCG (Normalized Discounted Cumulative Gain) for a given
|
||||
|
@ -120,6 +138,11 @@ def transform_data(data):
|
|||
def main(args):
|
||||
data = pd.read_csv(sys.stdin)
|
||||
normalize_data(data)
|
||||
|
||||
ndcg = compute_ndcg_without_w(data);
|
||||
print('Current NDCG: {}, std: {}'.format(np.mean(ndcg), np.std(ndcg)))
|
||||
print()
|
||||
|
||||
x, y = transform_data(data)
|
||||
|
||||
clf = svm.LinearSVC(random_state=args.seed)
|
||||
|
|
|
@ -11,11 +11,11 @@ namespace
|
|||
{
|
||||
// See search/search_quality/scoring_model.py for details. In short,
|
||||
// these coeffs correspond to coeffs in a linear model.
|
||||
double const kDistanceToPivot = 13.531;
|
||||
double const kRank = 16.295;
|
||||
double const kNameScore = 1.0;
|
||||
double const kDistanceToPivot = 24.443;
|
||||
double const kRank = 11.010;
|
||||
double const kNameScore = 1.0
|
||||
double const kNameCoverage = 0.0;
|
||||
double const kSearchType = 10.692;
|
||||
double const kSearchType = 22.378;
|
||||
|
||||
double TransformDistance(double distance)
|
||||
{
|
||||
|
|
Loading…
Add table
Reference in a new issue