From ae4e1495284585c71f50811d8827bb24ee01110f Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Wed, 25 May 2016 12:49:02 +0300 Subject: [PATCH 01/11] [generator] Rename functions --- generator/generator_tests/source_to_element_test.cpp | 8 ++++---- generator/osm_source.cpp | 8 ++++---- generator/osm_source.hpp | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/generator/generator_tests/source_to_element_test.cpp b/generator/generator_tests/source_to_element_test.cpp index 12a3f5c399..a2c80f8e4f 100644 --- a/generator/generator_tests/source_to_element_test.cpp +++ b/generator/generator_tests/source_to_element_test.cpp @@ -12,7 +12,7 @@ UNIT_TEST(Source_To_Element_create_from_xml_test) SourceReader reader(ss); vector elements; - BuildFeaturesFromXML(reader, [&elements](OsmElement * e) + ProcessOsmElementsFromXML(reader, [&elements](OsmElement * e) { elements.push_back(*e); }); @@ -27,7 +27,7 @@ UNIT_TEST(Source_To_Element_create_from_o5m_test) SourceReader reader(ss); vector elements; - BuildFeaturesFromO5M(reader, [&elements](OsmElement * e) + ProcessOsmElementsFromO5M(reader, [&elements](OsmElement * e) { elements.push_back(*e); }); @@ -42,7 +42,7 @@ UNIT_TEST(Source_To_Element_check_equivalence) SourceReader readerXML(ss1); vector elementsXML; - BuildFeaturesFromXML(readerXML, [&elementsXML](OsmElement * e) + ProcessOsmElementsFromXML(readerXML, [&elementsXML](OsmElement * e) { elementsXML.push_back(*e); }); @@ -52,7 +52,7 @@ UNIT_TEST(Source_To_Element_check_equivalence) SourceReader readerO5M(ss2); vector elementsO5M; - BuildFeaturesFromO5M(readerO5M, [&elementsO5M](OsmElement * e) + ProcessOsmElementsFromO5M(readerO5M, [&elementsO5M](OsmElement * e) { elementsO5M.push_back(*e); }); diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 8280a7d8b1..19d83c3229 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -408,7 +408,7 @@ void BuildIntermediateDataFromXML(SourceReader & stream, TCache & cache, TownsDu ParseXMLSequence(stream, parser); } -void BuildFeaturesFromXML(SourceReader & stream, function processor) +void ProcessOsmElementsFromXML(SourceReader & stream, function processor) { XMLSource parser([&](OsmElement * e) { processor(e); }); ParseXMLSequence(stream, parser); @@ -431,7 +431,7 @@ void BuildIntermediateDataFromO5M(SourceReader & stream, TCache & cache, TownsDu } } -void BuildFeaturesFromO5M(SourceReader & stream, function processor) +void ProcessOsmElementsFromO5M(SourceReader & stream, function processor) { using TType = osm::O5MSource::EntityType; @@ -532,10 +532,10 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) switch (info.m_osmFileType) { case feature::GenerateInfo::OsmSourceType::XML: - BuildFeaturesFromXML(reader, fn); + ProcessOsmElementsFromXML(reader, fn); break; case feature::GenerateInfo::OsmSourceType::O5M: - BuildFeaturesFromO5M(reader, fn); + ProcessOsmElementsFromO5M(reader, fn); break; } diff --git a/generator/osm_source.hpp b/generator/osm_source.hpp index 135d8e2829..08f85790ce 100644 --- a/generator/osm_source.hpp +++ b/generator/osm_source.hpp @@ -35,6 +35,6 @@ public: bool GenerateFeatures(feature::GenerateInfo & info); bool GenerateIntermediateData(feature::GenerateInfo & info); -void BuildFeaturesFromO5M(SourceReader & stream, function processor); -void BuildFeaturesFromXML(SourceReader & stream, function processor); +void ProcessOsmElementsFromO5M(SourceReader & stream, function processor); +void ProcessOsmElementsFromXML(SourceReader & stream, function processor); From dbdf721ac905bf3b8ddfc7e7ba0513a01d8971d7 Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Wed, 25 May 2016 12:50:37 +0300 Subject: [PATCH 02/11] [booking] Add quality check for booking --- .../booking_quality_check.cpp | 62 +++++++++ .../generator_tool.xcodeproj/project.pbxproj | 126 +++++++++++++++++- 2 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 generator/booking_quality_check/booking_quality_check.cpp diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp new file mode 100644 index 0000000000..bec9448bdf --- /dev/null +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -0,0 +1,62 @@ +#include "std/iostream.hpp" + +#include "generator/booking_dataset.hpp" +#include "generator/osm_source.hpp" + +#include "3party/gflags/src/gflags/gflags.h" + +DEFINE_bool(generate_classif, false, "Generate classificator."); + +DEFINE_bool(preprocess, false, "1st pass - count features"); +DEFINE_string(osm_file_name, "", "Input .o5m file"); +DEFINE_string(booking_data, "", "Path to booking data in .tsv format"); +DEFINE_uint64(selection_size, 1000, "Selection size"); + +int main(int argc, char * argv[]) +{ + google::SetUsageMessage("Takes OSM XML data from stdin and creates" + " data and index files in several passes."); + google::ParseCommandLineFlags(&argc, &argv, true); + + LOG_SHORT(LINFO, ("Booking data:",FLAGS_booking_data)); + + BookingDataset bookingDataset(FLAGS_booking_data); + + // Here we can add new tags to element!!! + auto const filterAction = [&](OsmElement * e) + { + if (bookingDataset.BookingFilter(*e)) + return; + + }; + + vector elements; + auto const counterAction = [&](OsmElement * e) + { + if (bookingDataset.TourismFilter(*e)) + elements.emplace_back(*e); + }; + + LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm_file_name)); + { + SourceReader reader = FLAGS_osm_file_name.empty() ? SourceReader() : SourceReader(FLAGS_osm_file_name); + ProcessOsmElementsFromO5M(reader, counterAction); + } + LOG_SHORT(LINFO, ("Tourism elements:", elements.size())); + + vector elementIndexes(elements.size()); + size_t counter = 0; + for (auto & e : elementIndexes) + e = counter++; + + random_shuffle(elementIndexes.begin(), elementIndexes.end()); + elementIndexes.resize(FLAGS_selection_size); + + vector selectedElements; + for (size_t i : elementIndexes) + selectedElements.emplace_back(elements[i]); + + + + return 0; +} \ No newline at end of file diff --git a/xcode/generator_tool/generator_tool.xcodeproj/project.pbxproj b/xcode/generator_tool/generator_tool.xcodeproj/project.pbxproj index 058fd0bd77..d8445bd6f5 100644 --- a/xcode/generator_tool/generator_tool.xcodeproj/project.pbxproj +++ b/xcode/generator_tool/generator_tool.xcodeproj/project.pbxproj @@ -17,6 +17,23 @@ 6726C2231A4C2BBD005EEA39 /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343E01A3F600D00A0A8C3 /* IOKit.framework */; }; 6726C2261A4C2BBD005EEA39 /* Cocoa.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343E21A3F607600A0A8C3 /* Cocoa.framework */; }; 6726C2411A4C2D9F005EEA39 /* testingmain.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 6726C2401A4C2D9F005EEA39 /* testingmain.cpp */; }; + 673746671CF4641B005E6D1F /* booking_quality_check.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 673746661CF4641B005E6D1F /* booking_quality_check.cpp */; }; + 673746681CF47A7B005E6D1F /* libgflags.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28CC1B1C7FED001A525C /* libgflags.a */; }; + 673746691CF47BD9005E6D1F /* libgenerator.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28CA1B1C7FED001A525C /* libgenerator.a */; }; + 6737466A1CF47D82005E6D1F /* libbase.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28D71B1C800D001A525C /* libbase.a */; }; + 6737466B1CF47D82005E6D1F /* libplatform.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28D31B1C8001001A525C /* libplatform.a */; }; + 6737466C1CF47D82005E6D1F /* libcoding.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28C91B1C7FED001A525C /* libcoding.a */; }; + 6737466D1CF47D82005E6D1F /* libgeometry.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28CB1B1C7FED001A525C /* libgeometry.a */; }; + 6737466E1CF47D82005E6D1F /* libindexer.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28CD1B1C7FED001A525C /* libindexer.a */; }; + 6737466F1CF47D82005E6D1F /* libexpat.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28C71B1C7FC9001A525C /* libexpat.a */; }; + 673746701CF47E14005E6D1F /* libeditor.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 675340891C523054002CF0D9 /* libeditor.a */; }; + 673746711CF47E14005E6D1F /* librouting.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28D41B1C8001001A525C /* librouting.a */; }; + 673746721CF47E3D005E6D1F /* libpugixml.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 6753408A1C523054002CF0D9 /* libpugixml.a */; }; + 673746731CF47E54005E6D1F /* libjansson.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28ED1B1C80FB001A525C /* libjansson.a */; }; + 673746741CF47E54005E6D1F /* libtomcrypt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28DD1B1C8027001A525C /* libtomcrypt.a */; }; + 673746751CF47E7A005E6D1F /* CoreFoundation.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343DE1A3F5FE500A0A8C3 /* CoreFoundation.framework */; }; + 673746761CF47E7F005E6D1F /* IOKit.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343E01A3F600D00A0A8C3 /* IOKit.framework */; }; + 673746771CF47E83005E6D1F /* Cocoa.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 675343E21A3F607600A0A8C3 /* Cocoa.framework */; }; 674A28C81B1C7FC9001A525C /* libexpat.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28C71B1C7FC9001A525C /* libexpat.a */; }; 674A28CE1B1C7FED001A525C /* libcoding.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28C91B1C7FED001A525C /* libcoding.a */; }; 674A28CF1B1C7FED001A525C /* libgenerator.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 674A28CA1B1C7FED001A525C /* libgenerator.a */; }; @@ -82,6 +99,15 @@ ); runOnlyForDeploymentPostprocessing = 1; }; + 6737465D1CF4639F005E6D1F /* CopyFiles */ = { + isa = PBXCopyFilesBuildPhase; + buildActionMask = 2147483647; + dstPath = /usr/share/man/man1/; + dstSubfolderSpec = 0; + files = ( + ); + runOnlyForDeploymentPostprocessing = 1; + }; 675341561A3F54D800A0A8C3 /* CopyFiles */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; @@ -162,6 +188,8 @@ 6726C1EC1A4C28D5005EEA39 /* triangles_tree_coding_test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = triangles_tree_coding_test.cpp; sourceTree = ""; }; 6726C2351A4C2BBD005EEA39 /* generator_tests */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = generator_tests; sourceTree = BUILT_PRODUCTS_DIR; }; 6726C2401A4C2D9F005EEA39 /* testingmain.cpp */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.cpp.cpp; name = testingmain.cpp; path = ../../testing/testingmain.cpp; sourceTree = ""; }; + 6737465F1CF4639F005E6D1F /* booking_quality_check */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = booking_quality_check; sourceTree = BUILT_PRODUCTS_DIR; }; + 673746661CF4641B005E6D1F /* booking_quality_check.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = booking_quality_check.cpp; sourceTree = ""; }; 674A28C71B1C7FC9001A525C /* libexpat.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libexpat.a; path = "../../../omim-xcode-build/Debug/libexpat.a"; sourceTree = ""; }; 674A28C91B1C7FED001A525C /* libcoding.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libcoding.a; path = "../../../omim-xcode-build/Debug/libcoding.a"; sourceTree = ""; }; 674A28CA1B1C7FED001A525C /* libgenerator.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libgenerator.a; path = "../../../omim-xcode-build/Debug/libgenerator.a"; sourceTree = ""; }; @@ -198,7 +226,6 @@ 67AB92C61B73D03500AB5194 /* libmap.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libmap.a; path = "../../../omim-xcode-build/Debug/libmap.a"; sourceTree = ""; }; 67AB92C81B73D10200AB5194 /* libosrm.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libosrm.a; path = "../../../omim-xcode-build/Debug/libosrm.a"; sourceTree = ""; }; 67AB92CA1B73D10B00AB5194 /* libsuccinct.a */ = {isa = PBXFileReference; lastKnownFileType = archive.ar; name = libsuccinct.a; path = "../../../omim-xcode-build/Debug/libsuccinct.a"; sourceTree = ""; }; - 67AB92CC1B73D15700AB5194 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = System/Library/Frameworks/Foundation.framework; sourceTree = SDKROOT; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ @@ -230,6 +257,29 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 6737465C1CF4639F005E6D1F /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 673746771CF47E83005E6D1F /* Cocoa.framework in Frameworks */, + 673746701CF47E14005E6D1F /* libeditor.a in Frameworks */, + 6737466B1CF47D82005E6D1F /* libplatform.a in Frameworks */, + 673746741CF47E54005E6D1F /* libtomcrypt.a in Frameworks */, + 673746711CF47E14005E6D1F /* librouting.a in Frameworks */, + 6737466D1CF47D82005E6D1F /* libgeometry.a in Frameworks */, + 6737466F1CF47D82005E6D1F /* libexpat.a in Frameworks */, + 673746681CF47A7B005E6D1F /* libgflags.a in Frameworks */, + 673746731CF47E54005E6D1F /* libjansson.a in Frameworks */, + 673746761CF47E7F005E6D1F /* IOKit.framework in Frameworks */, + 673746691CF47BD9005E6D1F /* libgenerator.a in Frameworks */, + 6737466E1CF47D82005E6D1F /* libindexer.a in Frameworks */, + 673746751CF47E7A005E6D1F /* CoreFoundation.framework in Frameworks */, + 673746721CF47E3D005E6D1F /* libpugixml.a in Frameworks */, + 6737466C1CF47D82005E6D1F /* libcoding.a in Frameworks */, + 6737466A1CF47D82005E6D1F /* libbase.a in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; 675341551A3F54D800A0A8C3 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -352,10 +402,20 @@ path = ../../generator/generator_tests; sourceTree = ""; }; + 6737465A1CF46324005E6D1F /* booking_quality_check */ = { + isa = PBXGroup; + children = ( + 673746661CF4641B005E6D1F /* booking_quality_check.cpp */, + ); + name = booking_quality_check; + path = ../../generator/booking_quality_check; + sourceTree = ""; + }; 6753414F1A3F54D800A0A8C3 = { isa = PBXGroup; children = ( 670D05AD1B0E08260013A7AC /* defaults.xcconfig */, + 6737465A1CF46324005E6D1F /* booking_quality_check */, 670B84C41A9F73AB00CE4492 /* std */, 6753453F1A3F6FA600A0A8C3 /* libs */, 6726C1D71A4C27A5005EEA39 /* generator_tests */, @@ -369,6 +429,7 @@ children = ( 675341581A3F54D800A0A8C3 /* generator_tool */, 6726C2351A4C2BBD005EEA39 /* generator_tests */, + 6737465F1CF4639F005E6D1F /* booking_quality_check */, ); name = Products; sourceTree = ""; @@ -411,7 +472,6 @@ 674A28CC1B1C7FED001A525C /* libgflags.a */, 674A28CD1B1C7FED001A525C /* libindexer.a */, 674A28C71B1C7FC9001A525C /* libexpat.a */, - 67AB92CC1B73D15700AB5194 /* Foundation.framework */, 675343E21A3F607600A0A8C3 /* Cocoa.framework */, 675343E01A3F600D00A0A8C3 /* IOKit.framework */, 675343DE1A3F5FE500A0A8C3 /* CoreFoundation.framework */, @@ -439,6 +499,23 @@ productReference = 6726C2351A4C2BBD005EEA39 /* generator_tests */; productType = "com.apple.product-type.tool"; }; + 6737465E1CF4639F005E6D1F /* booking_quality_check */ = { + isa = PBXNativeTarget; + buildConfigurationList = 673746631CF4639F005E6D1F /* Build configuration list for PBXNativeTarget "booking_quality_check" */; + buildPhases = ( + 6737465B1CF4639F005E6D1F /* Sources */, + 6737465C1CF4639F005E6D1F /* Frameworks */, + 6737465D1CF4639F005E6D1F /* CopyFiles */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = booking_quality_check; + productName = booking_quality_check; + productReference = 6737465F1CF4639F005E6D1F /* booking_quality_check */; + productType = "com.apple.product-type.tool"; + }; 675341571A3F54D800A0A8C3 /* generator_tool */ = { isa = PBXNativeTarget; buildConfigurationList = 6753415F1A3F54D800A0A8C3 /* Build configuration list for PBXNativeTarget "generator_tool" */; @@ -465,6 +542,9 @@ LastUpgradeCheck = 0700; ORGANIZATIONNAME = maps.me; TargetAttributes = { + 6737465E1CF4639F005E6D1F = { + CreatedOnToolsVersion = 7.3.1; + }; 675341571A3F54D800A0A8C3 = { CreatedOnToolsVersion = 6.1; }; @@ -484,6 +564,7 @@ targets = ( 675341571A3F54D800A0A8C3 /* generator_tool */, 6726C21C1A4C2BBD005EEA39 /* generator_tests */, + 6737465E1CF4639F005E6D1F /* booking_quality_check */, ); }; /* End PBXProject section */ @@ -510,6 +591,14 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + 6737465B1CF4639F005E6D1F /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 673746671CF4641B005E6D1F /* booking_quality_check.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; 675341541A3F54D800A0A8C3 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -554,6 +643,30 @@ }; name = Release; }; + 673746641CF4639F005E6D1F /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ANALYZER_NONNULL = YES; + CODE_SIGN_IDENTITY = "-"; + DEBUG_INFORMATION_FORMAT = dwarf; + GCC_NO_COMMON_BLOCKS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.11; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Debug; + }; + 673746651CF4639F005E6D1F /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ANALYZER_NONNULL = YES; + CODE_SIGN_IDENTITY = "-"; + COPY_PHASE_STRIP = NO; + GCC_NO_COMMON_BLOCKS = YES; + MACOSX_DEPLOYMENT_TARGET = 10.11; + PRODUCT_NAME = "$(TARGET_NAME)"; + }; + name = Release; + }; 6753415D1A3F54D800A0A8C3 /* Debug */ = { isa = XCBuildConfiguration; baseConfigurationReference = 670D05AD1B0E08260013A7AC /* defaults.xcconfig */; @@ -689,6 +802,15 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; + 673746631CF4639F005E6D1F /* Build configuration list for PBXNativeTarget "booking_quality_check" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 673746641CF4639F005E6D1F /* Debug */, + 673746651CF4639F005E6D1F /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; 675341531A3F54D800A0A8C3 /* Build configuration list for PBXProject "generator_tool" */ = { isa = XCConfigurationList; buildConfigurations = ( From c95607c60a3339052b830a1aedbf6c4da04c0315 Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Thu, 26 May 2016 18:19:31 +0300 Subject: [PATCH 03/11] [booking] Output for manually marking --- .../booking_quality_check.cpp | 32 +++++++++++++++++-- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index bec9448bdf..1ec773b9c1 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -3,6 +3,8 @@ #include "generator/booking_dataset.hpp" #include "generator/osm_source.hpp" +#include "geometry/distance_on_sphere.hpp" + #include "3party/gflags/src/gflags/gflags.h" DEFINE_bool(generate_classif, false, "Generate classificator."); @@ -12,6 +14,18 @@ DEFINE_string(osm_file_name, "", "Input .o5m file"); DEFINE_string(booking_data, "", "Path to booking data in .tsv format"); DEFINE_uint64(selection_size, 1000, "Selection size"); +using namespace generator; + + +ostream & operator << (ostream & s, OsmElement const & e) +{ + for (auto const & tag : e.Tags()) + { + s << tag.key << "=" << tag.value << "\t"; + } + return s; +} + int main(int argc, char * argv[]) { google::SetUsageMessage("Takes OSM XML data from stdin and creates" @@ -52,10 +66,22 @@ int main(int argc, char * argv[]) random_shuffle(elementIndexes.begin(), elementIndexes.end()); elementIndexes.resize(FLAGS_selection_size); - vector selectedElements; for (size_t i : elementIndexes) - selectedElements.emplace_back(elements[i]); - + { + OsmElement const & e = elements[i]; + auto const bookingIndexes = bookingDataset.GetNearestHotels(e.lat, e.lon, 3, 150); + for (size_t const j : bookingIndexes) + { + auto const & hotel = bookingDataset.GetHotel(j); + double const dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); + cout << "# ------------------------------------------" << fixed << setprecision(6) << endl; + cout << "y \t" << i << "\t " << j << " dist: " << dist << endl; + cout << "# " << e << endl; + cout << "# " << hotel << endl; + cout << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat << "&mlon=" << hotel.lon << "#map=18/"<< hotel.lat << "/" << hotel.lon << endl; + } + cout << endl << endl; + } return 0; From a55c1a47d6dad848a82e32772b976b4e1529f094 Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Tue, 31 May 2016 15:16:46 +0300 Subject: [PATCH 04/11] [booking] Save list of skipped objects --- generator/osm_source.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 19d83c3229..4f4128a666 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -516,6 +516,8 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) // If info.m_bookingDatafileName is empty then no data will be loaded. generator::BookingDataset bookingDataset(info.m_bookingDatafileName); + stringstream skippedElements; + // Here we can add new tags to element!!! auto const fn = [&](OsmElement * e) { @@ -523,7 +525,10 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) tagAdmixer(e); if (bookingDataset.BookingFilter(*e)) + { + skippedElements << e->id << endl; return; + } parser.EmitElement(e); }; @@ -545,6 +550,15 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) { bookingDataset.BuildFeatures([&](OsmElement * e) { parser.EmitElement(e); }); LOG(LINFO, ("Processing booking data from", info.m_bookingDatafileName, "done.")); + string skippedElementsPath = info.GetIntermediateFileName("skipped_elements", ".lst"); + ofstream file(skippedElementsPath); + if (file.is_open()) + { + file << skippedElements.str(); + LOG(LINFO, ("Saving skipped elements if into", skippedElementsPath, "done.")); + } + else + LOG(LERROR, ("Can't output into", skippedElementsPath)); } parser.Finish(); From bef32bcb75bed78e9ff9d6d565a92dd960e523e7 Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Tue, 31 May 2016 15:19:53 +0300 Subject: [PATCH 05/11] [booking] Fix the matching function --- generator/booking_dataset.cpp | 15 ++++++++++++--- generator/booking_dataset.hpp | 3 +++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index d2ac8ca57e..9b3caeaac1 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -257,11 +257,20 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const return false; // Find 3 nearest values to a point. - auto const indexes = GetNearestHotels(e.lat, e.lon, 3, 150 /* max distance in meters */); - if (indexes.empty()) + auto const bookingIndexes = GetNearestHotels(e.lat, e.lon, 3, kDistanceLimitInMeters); + if (bookingIndexes.empty()) return false; - bool matched = MatchByName(name, indexes); + bool matched = false; + + for (size_t const j : bookingIndexes) + { + auto const & hotel = GetHotel(j); + double const dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); + double score = (kDistanceLimitInMeters - dist) / kDistanceLimitInMeters; + matched = score > kOptimalThreshold; + } + return matched; } diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp index 6ce74c7118..1a91beede7 100644 --- a/generator/booking_dataset.hpp +++ b/generator/booking_dataset.hpp @@ -15,6 +15,9 @@ namespace generator class BookingDataset { public: + double static constexpr kDistanceLimitInMeters = 150; + double static constexpr kOptimalThreshold = 0.709283; + struct Hotel { enum class Fields From 4b89d45831e4c69ccd7b15c56d2e31d7174871ca Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Tue, 31 May 2016 15:20:54 +0300 Subject: [PATCH 06/11] [booking] Quality check tools --- .../booking_quality_check.cpp | 67 ++++++++++++------- 1 file changed, 44 insertions(+), 23 deletions(-) diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index 1ec773b9c1..e9fc43b824 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -7,43 +7,40 @@ #include "3party/gflags/src/gflags/gflags.h" +#include "std/fstream.hpp" + DEFINE_bool(generate_classif, false, "Generate classificator."); -DEFINE_bool(preprocess, false, "1st pass - count features"); DEFINE_string(osm_file_name, "", "Input .o5m file"); DEFINE_string(booking_data, "", "Path to booking data in .tsv format"); +DEFINE_string(sample_data, "", "Sample output path"); DEFINE_uint64(selection_size, 1000, "Selection size"); using namespace generator; - -ostream & operator << (ostream & s, OsmElement const & e) +ostream & operator<<(ostream & s, OsmElement const & e) { for (auto const & tag : e.Tags()) { - s << tag.key << "=" << tag.value << "\t"; + auto t = tag; + replace(t.key.begin(), t.key.end(), '\n', ' '); + replace(t.value.begin(), t.value.end(), '\n', ' '); + s << t.key << "=" << t.value << "\t"; } return s; } int main(int argc, char * argv[]) { - google::SetUsageMessage("Takes OSM XML data from stdin and creates" - " data and index files in several passes."); + google::SetUsageMessage( + "Takes OSM XML data from stdin and creates" + " data and index files in several passes."); google::ParseCommandLineFlags(&argc, &argv, true); - LOG_SHORT(LINFO, ("Booking data:",FLAGS_booking_data)); + LOG_SHORT(LINFO, ("Booking data:", FLAGS_booking_data)); BookingDataset bookingDataset(FLAGS_booking_data); - // Here we can add new tags to element!!! - auto const filterAction = [&](OsmElement * e) - { - if (bookingDataset.BookingFilter(*e)) - return; - - }; - vector elements; auto const counterAction = [&](OsmElement * e) { @@ -53,7 +50,8 @@ int main(int argc, char * argv[]) LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm_file_name)); { - SourceReader reader = FLAGS_osm_file_name.empty() ? SourceReader() : SourceReader(FLAGS_osm_file_name); + SourceReader reader = + FLAGS_osm_file_name.empty() ? SourceReader() : SourceReader(FLAGS_osm_file_name); ProcessOsmElementsFromO5M(reader, counterAction); } LOG_SHORT(LINFO, ("Tourism elements:", elements.size())); @@ -66,23 +64,46 @@ int main(int argc, char * argv[]) random_shuffle(elementIndexes.begin(), elementIndexes.end()); elementIndexes.resize(FLAGS_selection_size); + stringstream outStream; + for (size_t i : elementIndexes) { OsmElement const & e = elements[i]; - auto const bookingIndexes = bookingDataset.GetNearestHotels(e.lat, e.lon, 3, 150); + auto const bookingIndexes = + bookingDataset.GetNearestHotels(e.lat, e.lon, 3, BookingDataset::kDistanceLimitInMeters); for (size_t const j : bookingIndexes) { auto const & hotel = bookingDataset.GetHotel(j); double const dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); - cout << "# ------------------------------------------" << fixed << setprecision(6) << endl; - cout << "y \t" << i << "\t " << j << " dist: " << dist << endl; - cout << "# " << e << endl; - cout << "# " << hotel << endl; - cout << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat << "&mlon=" << hotel.lon << "#map=18/"<< hotel.lat << "/" << hotel.lon << endl; + double score = + (BookingDataset::kDistanceLimitInMeters - dist) / BookingDataset::kDistanceLimitInMeters; + bool matched = score > BookingDataset::kOptimalThreshold; + + outStream << "# ------------------------------------------" << fixed << setprecision(6) + << endl; + outStream << (matched ? 'y' : 'n') << " \t" << i << "\t " << j << " distance: " << dist + << " score: " << score << endl; + outStream << "# " << e << endl; + outStream << "# " << hotel << endl; + outStream << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat + << "&mlon=" << hotel.lon << "#map=18/" << hotel.lat << "/" << hotel.lon << endl; } - cout << endl << endl; + if (!bookingIndexes.empty()) + outStream << endl << endl; } + if (FLAGS_sample_data.empty()) + { + cout << outStream.str(); + } + else + { + ofstream file(FLAGS_sample_data); + if (file.is_open()) + file << outStream.str(); + else + LOG(LERROR, ("Can't output into", FLAGS_sample_data)); + } return 0; } \ No newline at end of file From 24765c5607ea234e4d9c363e6ac394756b67aaaa Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Tue, 31 May 2016 17:20:29 +0300 Subject: [PATCH 07/11] [booking] Review fixes --- generator/booking_dataset.cpp | 13 ++-- generator/booking_dataset.hpp | 5 ++ .../booking_quality_check.cpp | 45 ++++++------ tools/python/booking_hotels_quality.py | 73 +++++++++++++++++++ 4 files changed, 107 insertions(+), 29 deletions(-) create mode 100755 tools/python/booking_hotels_quality.py diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index 9b3caeaac1..22c254053e 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -223,6 +223,11 @@ void BookingDataset::BuildFeatures(function const & fn) cons } } +double BookingDataset::ScoreByLinearNormDistance(double distance) const +{ + return (kDistanceLimitInMeters - distance) / kDistanceLimitInMeters; +} + void BookingDataset::LoadHotels(string const & path) { m_hotels.clear(); @@ -257,17 +262,15 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const return false; // Find 3 nearest values to a point. - auto const bookingIndexes = GetNearestHotels(e.lat, e.lon, 3, kDistanceLimitInMeters); - if (bookingIndexes.empty()) - return false; + auto const bookingIndexes = GetNearestHotels(e.lat, e.lon, kMaxSelectedElements, kDistanceLimitInMeters); bool matched = false; for (size_t const j : bookingIndexes) { auto const & hotel = GetHotel(j); - double const dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); - double score = (kDistanceLimitInMeters - dist) / kDistanceLimitInMeters; + double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); + double score = ScoreByLinearNormDistance(distanceMeters); matched = score > kOptimalThreshold; } diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp index 1a91beede7..1fe780d647 100644 --- a/generator/booking_dataset.hpp +++ b/generator/booking_dataset.hpp @@ -16,6 +16,9 @@ class BookingDataset { public: double static constexpr kDistanceLimitInMeters = 150; + size_t static constexpr kMaxSelectedElements = 3; + + // Calculated with tools/python/booking_hotels_quality.py double static constexpr kOptimalThreshold = 0.709283; struct Hotel @@ -66,6 +69,8 @@ public: void BuildFeatures(function const & fn) const; + double ScoreByLinearNormDistance(double distance) const; + protected: vector m_hotels; diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index e9fc43b824..57a46dad9c 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -1,13 +1,12 @@ -#include "std/iostream.hpp" - #include "generator/booking_dataset.hpp" #include "generator/osm_source.hpp" #include "geometry/distance_on_sphere.hpp" -#include "3party/gflags/src/gflags/gflags.h" - #include "std/fstream.hpp" +#include "std/iostream.hpp" + +#include "3party/gflags/src/gflags/gflags.h" DEFINE_bool(generate_classif, false, "Generate classificator."); @@ -42,47 +41,45 @@ int main(int argc, char * argv[]) BookingDataset bookingDataset(FLAGS_booking_data); vector elements; - auto const counterAction = [&](OsmElement * e) - { - if (bookingDataset.TourismFilter(*e)) - elements.emplace_back(*e); - }; - LOG_SHORT(LINFO, ("OSM data:", FLAGS_osm_file_name)); { SourceReader reader = FLAGS_osm_file_name.empty() ? SourceReader() : SourceReader(FLAGS_osm_file_name); - ProcessOsmElementsFromO5M(reader, counterAction); + ProcessOsmElementsFromO5M(reader, [&](OsmElement * e) + { + if (bookingDataset.TourismFilter(*e)) + elements.emplace_back(*e); + }); } - LOG_SHORT(LINFO, ("Tourism elements:", elements.size())); + LOG_SHORT(LINFO, ("Num of tourism elements:", elements.size())); vector elementIndexes(elements.size()); - size_t counter = 0; - for (auto & e : elementIndexes) - e = counter++; + for (size_t i = 0; i < elementIndexes.size(); ++i) + elementIndexes[i] = i; random_shuffle(elementIndexes.begin(), elementIndexes.end()); - elementIndexes.resize(FLAGS_selection_size); + if (FLAGS_selection_size < elementIndexes.size()) + elementIndexes.resize(FLAGS_selection_size); stringstream outStream; for (size_t i : elementIndexes) { OsmElement const & e = elements[i]; - auto const bookingIndexes = - bookingDataset.GetNearestHotels(e.lat, e.lon, 3, BookingDataset::kDistanceLimitInMeters); + auto const bookingIndexes = bookingDataset.GetNearestHotels( + e.lat, e.lon, BookingDataset::kMaxSelectedElements, BookingDataset::kDistanceLimitInMeters); for (size_t const j : bookingIndexes) { auto const & hotel = bookingDataset.GetHotel(j); - double const dist = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); - double score = - (BookingDataset::kDistanceLimitInMeters - dist) / BookingDataset::kDistanceLimitInMeters; + double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); + double score = bookingDataset.ScoreByLinearNormDistance(distanceMeters); + bool matched = score > BookingDataset::kOptimalThreshold; outStream << "# ------------------------------------------" << fixed << setprecision(6) << endl; - outStream << (matched ? 'y' : 'n') << " \t" << i << "\t " << j << " distance: " << dist - << " score: " << score << endl; + outStream << (matched ? 'y' : 'n') << " \t" << i << "\t " << j + << " distance: " << distanceMeters << " score: " << score << endl; outStream << "# " << e << endl; outStream << "# " << hotel << endl; outStream << "# URL: https://www.openstreetmap.org/?mlat=" << hotel.lat @@ -106,4 +103,4 @@ int main(int argc, char * argv[]) } return 0; -} \ No newline at end of file +} diff --git a/tools/python/booking_hotels_quality.py b/tools/python/booking_hotels_quality.py new file mode 100755 index 0000000000..00d9b29138 --- /dev/null +++ b/tools/python/booking_hotels_quality.py @@ -0,0 +1,73 @@ +#!/usr/bin/python +# coding: utf8 +from __future__ import print_function + +from collections import namedtuple, defaultdict +from datetime import datetime +from sklearn import metrics +import argparse +import base64 +import json +import logging +import os +import pickle +import time +import urllib2 + +# init logging +logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s') + +def load_binary_list(path): + bits = [] + with open(path, 'r') as fd: + for line in fd: + if (not line.strip()) or line[0] == '#': + continue + bits.append(1 if line[0] == 'y' else 0) + return bits + +def load_score_list(path): + scores = [] + with open(path, 'r') as fd: + for line in fd: + if (not line.strip()) or line[0] == '#': + continue + scores.append(float(line[line.rfind(':')+2:])) + return scores + +def process_options(): + parser = argparse.ArgumentParser(description='Download and process booking hotels.') + parser.add_argument("-v", "--verbose", action="store_true", dest="verbose") + parser.add_argument("-q", "--quiet", action="store_false", dest="verbose") + + parser.add_argument("--reference_list", dest="reference_list", help="Path to data files") + parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file") + + parser.add_argument("--show", dest="show", default=False, action="store_true", help="Show graph for precision and recall") + + options = parser.parse_args() + + if not options.reference_list or not options.sample_list: + parser.print_help() + exit() + + return options + +def main(): + options = process_options() + reference = load_binary_list(options.reference_list) + sample = load_score_list(options.sample_list) + + precision, recall, threshold = metrics.precision_recall_curve(reference, sample) + aa = zip(precision, recall, threshold) + print("Optimal thrashold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r)))) + print("AUC: {0}".format(metrics.roc_auc_score(reference, sample))) + + if options.show: + import matplotlib.pyplot as plt + plt.plot(recall, precision) + plt.show() + + +if __name__ == "__main__": + main() From 3e212a01d1ffe635670ea746b3b4bd725a63fd9c Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Tue, 31 May 2016 17:39:22 +0300 Subject: [PATCH 08/11] Add iota to std --- generator/booking_quality_check/booking_quality_check.cpp | 4 ++-- std/numeric.hpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index 57a46dad9c..bf0f71216e 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -5,6 +5,7 @@ #include "std/fstream.hpp" #include "std/iostream.hpp" +#include "std/numeric.hpp" #include "3party/gflags/src/gflags/gflags.h" @@ -54,8 +55,7 @@ int main(int argc, char * argv[]) LOG_SHORT(LINFO, ("Num of tourism elements:", elements.size())); vector elementIndexes(elements.size()); - for (size_t i = 0; i < elementIndexes.size(); ++i) - elementIndexes[i] = i; + iota(elementIndexes.begin(), elementIndexes.end(), 0); random_shuffle(elementIndexes.begin(), elementIndexes.end()); if (FLAGS_selection_size < elementIndexes.size()) diff --git a/std/numeric.hpp b/std/numeric.hpp index 8ef5ca86b6..9fed8f6d37 100644 --- a/std/numeric.hpp +++ b/std/numeric.hpp @@ -6,6 +6,7 @@ #include using std::accumulate; +using std::iota; #ifdef DEBUG_NEW #define new DEBUG_NEW From 669fcc90b3041141ccd8ab9f3fe76f960c33d6c8 Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Tue, 31 May 2016 20:27:25 +0300 Subject: [PATCH 09/11] Review fixes --- generator/booking_dataset.cpp | 2 +- .../booking_quality_check.cpp | 2 ++ tools/python/booking_hotels_quality.py | 22 +++++++++++++++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index 22c254053e..cc1107dbd5 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -271,7 +271,7 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const auto const & hotel = GetHotel(j); double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); double score = ScoreByLinearNormDistance(distanceMeters); - matched = score > kOptimalThreshold; + matched = matched || score > kOptimalThreshold; } return matched; diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index bf0f71216e..f2ea149721 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -57,6 +57,8 @@ int main(int argc, char * argv[]) vector elementIndexes(elements.size()); iota(elementIndexes.begin(), elementIndexes.end(), 0); + // In first implementation, we used random_shufle for reference dataset. + // Next time we are going to replace random_shuffle by shuffle with defined seed. random_shuffle(elementIndexes.begin(), elementIndexes.end()); if (FLAGS_selection_size < elementIndexes.size()) elementIndexes.resize(FLAGS_selection_size); diff --git a/tools/python/booking_hotels_quality.py b/tools/python/booking_hotels_quality.py index 00d9b29138..d1a58f8a7d 100755 --- a/tools/python/booking_hotels_quality.py +++ b/tools/python/booking_hotels_quality.py @@ -9,6 +9,7 @@ import argparse import base64 import json import logging +import matplotlib.pyplot as plt import os import pickle import time @@ -17,7 +18,11 @@ import urllib2 # init logging logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %(message)s') + def load_binary_list(path): + """ + Loads binary classifier output + """ bits = [] with open(path, 'r') as fd: for line in fd: @@ -26,7 +31,11 @@ def load_binary_list(path): bits.append(1 if line[0] == 'y' else 0) return bits + def load_score_list(path): + """ + Loads list of scores + """ scores = [] with open(path, 'r') as fd: for line in fd: @@ -35,15 +44,17 @@ def load_score_list(path): scores.append(float(line[line.rfind(':')+2:])) return scores + def process_options(): - parser = argparse.ArgumentParser(description='Download and process booking hotels.') + parser = argparse.ArgumentParser(description="Download and process booking hotels.") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose") parser.add_argument("-q", "--quiet", action="store_false", dest="verbose") parser.add_argument("--reference_list", dest="reference_list", help="Path to data files") parser.add_argument("--sample_list", dest="sample_list", help="Name and destination for output file") - parser.add_argument("--show", dest="show", default=False, action="store_true", help="Show graph for precision and recall") + parser.add_argument("--show", dest="show", default=False, action="store_true", + help="Show graph for precision and recall") options = parser.parse_args() @@ -53,6 +64,7 @@ def process_options(): return options + def main(): options = process_options() reference = load_binary_list(options.reference_list) @@ -60,12 +72,14 @@ def main(): precision, recall, threshold = metrics.precision_recall_curve(reference, sample) aa = zip(precision, recall, threshold) - print("Optimal thrashold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r)))) + print("Optimal threshold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r)))) print("AUC: {0}".format(metrics.roc_auc_score(reference, sample))) if options.show: - import matplotlib.pyplot as plt plt.plot(recall, precision) + plt.title("Precision/Recall") + plt.ylabel("Precision") + plt.xlabel("Recall") plt.show() From a2a378784b7f07a09b3a0c8eaf970f85c5583305 Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Wed, 1 Jun 2016 12:39:15 +0300 Subject: [PATCH 10/11] Review fixes C++ --- generator/booking_dataset.cpp | 11 +++++++---- generator/booking_dataset.hpp | 2 +- .../booking_quality_check/booking_quality_check.cpp | 2 +- generator/osm_source.cpp | 4 +++- 4 files changed, 12 insertions(+), 7 deletions(-) diff --git a/generator/booking_dataset.cpp b/generator/booking_dataset.cpp index cc1107dbd5..124c708897 100644 --- a/generator/booking_dataset.cpp +++ b/generator/booking_dataset.cpp @@ -223,9 +223,10 @@ void BookingDataset::BuildFeatures(function const & fn) cons } } -double BookingDataset::ScoreByLinearNormDistance(double distance) const +static double BookingDataset::ScoreByLinearNormDistance(double distance) const { - return (kDistanceLimitInMeters - distance) / kDistanceLimitInMeters; + distance = my::clamp(distance, 0, kDistanceLimitInMeters); + return 1.0 - distance / kDistanceLimitInMeters; } void BookingDataset::LoadHotels(string const & path) @@ -261,7 +262,7 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const if (name.empty()) return false; - // Find 3 nearest values to a point. + // Find |kMaxSelectedElements| nearest values to a point. auto const bookingIndexes = GetNearestHotels(e.lat, e.lon, kMaxSelectedElements, kDistanceLimitInMeters); bool matched = false; @@ -271,7 +272,9 @@ bool BookingDataset::MatchWithBooking(OsmElement const & e) const auto const & hotel = GetHotel(j); double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); double score = ScoreByLinearNormDistance(distanceMeters); - matched = matched || score > kOptimalThreshold; + matched = score > kOptimalThreshold; + if (matched) + break; } return matched; diff --git a/generator/booking_dataset.hpp b/generator/booking_dataset.hpp index 1fe780d647..b6367081a6 100644 --- a/generator/booking_dataset.hpp +++ b/generator/booking_dataset.hpp @@ -69,7 +69,7 @@ public: void BuildFeatures(function const & fn) const; - double ScoreByLinearNormDistance(double distance) const; + static double ScoreByLinearNormDistance(double distance) const; protected: vector m_hotels; diff --git a/generator/booking_quality_check/booking_quality_check.cpp b/generator/booking_quality_check/booking_quality_check.cpp index f2ea149721..1696a69d6c 100644 --- a/generator/booking_quality_check/booking_quality_check.cpp +++ b/generator/booking_quality_check/booking_quality_check.cpp @@ -74,7 +74,7 @@ int main(int argc, char * argv[]) { auto const & hotel = bookingDataset.GetHotel(j); double const distanceMeters = ms::DistanceOnEarth(e.lat, e.lon, hotel.lat, hotel.lon); - double score = bookingDataset.ScoreByLinearNormDistance(distanceMeters); + double score = BookingDataset::ScoreByLinearNormDistance(distanceMeters); bool matched = score > BookingDataset::kOptimalThreshold; diff --git a/generator/osm_source.cpp b/generator/osm_source.cpp index 4f4128a666..65937d335f 100644 --- a/generator/osm_source.cpp +++ b/generator/osm_source.cpp @@ -555,10 +555,12 @@ bool GenerateFeaturesImpl(feature::GenerateInfo & info) if (file.is_open()) { file << skippedElements.str(); - LOG(LINFO, ("Saving skipped elements if into", skippedElementsPath, "done.")); + LOG(LINFO, ("Saving skipped elements to", skippedElementsPath, "done.")); } else + { LOG(LERROR, ("Can't output into", skippedElementsPath)); + } } parser.Finish(); From 500ef15db952dd2bd4c2e479f7560a2ccad32e7f Mon Sep 17 00:00:00 2001 From: Sergey Yershov Date: Wed, 1 Jun 2016 12:51:11 +0300 Subject: [PATCH 11/11] Review fixes Python --- tools/python/booking_hotels_quality.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tools/python/booking_hotels_quality.py b/tools/python/booking_hotels_quality.py index d1a58f8a7d..9c81dad52a 100755 --- a/tools/python/booking_hotels_quality.py +++ b/tools/python/booking_hotels_quality.py @@ -21,7 +21,7 @@ logging.basicConfig(level=logging.DEBUG, format='[%(asctime)s] %(levelname)s: %( def load_binary_list(path): """ - Loads binary classifier output + Loads binary classifier output. """ bits = [] with open(path, 'r') as fd: @@ -34,7 +34,7 @@ def load_binary_list(path): def load_score_list(path): """ - Loads list of scores + Loads list of matching scores. """ scores = [] with open(path, 'r') as fd: @@ -72,7 +72,8 @@ def main(): precision, recall, threshold = metrics.precision_recall_curve(reference, sample) aa = zip(precision, recall, threshold) - print("Optimal threshold: {2} for precision: {0} and recall: {1}".format(*max(aa, key=lambda (p, r, t): p*r/(p+r)))) + max_by_hmean = max(aa, key=lambda (p, r, t): p*r/(p+r)) + print("Optimal threshold: {2} for precision: {0} and recall: {1}".format(*max_by_hmean)) print("AUC: {0}".format(metrics.roc_auc_score(reference, sample))) if options.show: