Review fixes

2016-05-31 20:27:25 +03:00 · 2016-05-31 20:27:25 +03:00 · 669fcc90b3
commit 669fcc90b3
parent 3e212a01d1
3 changed files with 21 additions and 5 deletions
--- a/generator/booking_dataset.cpp
+++ b/generator/booking_dataset.cpp
@ -271,7 +271,7 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const
    auto const & hotel = GetHotel(j);
    double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon);
    double score = ScoreByLinearNormDistance(distanceMeters);
-    matched = score > kOptimalThreshold;
+    matched = matched || score > kOptimalThreshold;
  }

  return matched;
--- a/generator/booking_quality_check/booking_quality_check.cpp
+++ b/generator/booking_quality_check/booking_quality_check.cpp
@ -57,6 +57,8 @@ int main(int argc, char * argv[])
  vector<size_t> elementIndexes(elements.size());
  iota(elementIndexes.begin(), elementIndexes.end(), 0);

+  // In first implementation, we used random_shufle for reference dataset.
+  // Next time we are going to replace random_shuffle by shuffle with defined seed.
  random_shuffle(elementIndexes.begin(), elementIndexes.end());
  if (FLAGS_selection_size < elementIndexes.size())
    elementIndexes.resize(FLAGS_selection_size);
--- a/tools/python/booking_hotels_quality.py
+++ b/tools/python/booking_hotels_quality.py
@ -9,6 +9,7 @@ import argparse
 import base64
 import json
 import logging
+import matplotlib.pyplot as plt
 import os
 import pickle
 import time
@ -17,7 +18,11 @@ import urllib2
 # init logging
 logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s')

+
 def load_binary_list(path):
+    """
+    Loads binary classifier output
+    """
    bits = []
    with open(path, 'r') as fd:
        for line in fd:
@ -26,7 +31,11 @@ def load_binary_list(path):
            bits.append(1 if line[0] == 'y' else 0)
    return bits

+
 def load_score_list(path):
+    """
+    Loads list of scores
+    """
    scores = []
    with open(path, 'r') as fd:
        for line in fd:
@ -35,15 +44,17 @@ def load_score_list(path):
            scores.append(float(line[line.rfind(':')+2:]))
    return scores

+
 def process_options():
-    parser = argparse.ArgumentParser(description='Download and process booking hotels.')
+    parser = argparse.ArgumentParser(description="Download and process booking hotels.")
    parser.add_argument("-v", "--verbose", action="store_true", dest="verbose")
    parser.add_argument("-q", "--quiet", action="store_false", dest="verbose")

    parser.add_argument("--reference_list", dest="reference_list", help="Path to data files")
    parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file")

-    parser.add_argument("--show", dest="show", default=False, action="store_true", help="Show graph for precision and recall")
+    parser.add_argument("--show", dest="show", default=False, action="store_true",
+                        help="Show graph for precision and recall")

    options = parser.parse_args()

@ -53,6 +64,7 @@ def process_options():

    return options

+
 def main():
    options = process_options()
    reference = load_binary_list(options.reference_list)
@ -60,12 +72,14 @@ def main():

    precision, recall, threshold = metrics.precision_recall_curve(reference, sample)
    aa = zip(precision, recall, threshold)
-    print("Optimal thrashold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r))))
+    print("Optimal threshold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r))))
    print("AUC: {0}".format(metrics.roc_auc_score(reference, sample)))

    if options.show:
-        import matplotlib.pyplot as plt
        plt.plot(recall, precision)
+        plt.title("Precision/Recall")
+        plt.ylabel("Precision")
+        plt.xlabel("Recall")
        plt.show()