From 7eec83e7762cc39653202f8076bffcf4e617546a Mon Sep 17 00:00:00 2001 From: vng Date: Thu, 8 Aug 2013 00:15:42 +0300 Subject: [PATCH] Added geocoding for ArticleInfo indexer. --- builder/genindex/main.cpp | 7 +++-- storage/article_info.cpp | 16 ++++++++++- storage/article_info.hpp | 1 + storage/storage_builder.cpp | 56 ++++++++++++++++++++++++++++++++++++- storage/storage_builder.hpp | 3 +- 5 files changed, 77 insertions(+), 6 deletions(-) diff --git a/builder/genindex/main.cpp b/builder/genindex/main.cpp index 150b18a..dc36698 100644 --- a/builder/genindex/main.cpp +++ b/builder/genindex/main.cpp @@ -5,15 +5,16 @@ int main(int argc, char const * argv[]) { - if (argc == 4) + if (argc == 5) { StorageBuilder builder; builder.ParseEntries(argv[1]); builder.ParseRedirects(argv[2]); - builder.Save(argv[3]); + builder.ParseGeocodes(argv[3]); + builder.Save(argv[4]); } else - cout << "Usage: " << endl; + cout << "Usage: " << endl; return 0; } diff --git a/storage/article_info.cpp b/storage/article_info.cpp index 6196d68..4522a3a 100644 --- a/storage/article_info.cpp +++ b/storage/article_info.cpp @@ -4,10 +4,13 @@ #include "../env/strings.hpp" #include "../env/writer.hpp" #include "../env/reader.hpp" +#include "../env/latlon.hpp" +#include "../env/assert.hpp" #include "../std/iterator.hpp" #include "../std/algorithm.hpp" #include "../std/cmath.hpp" +#include "../std/static_assert.hpp" void ArticleInfo::GenerateKey() @@ -15,6 +18,17 @@ void ArticleInfo::GenerateKey() m_key = str::MakeNormalizeAndLowerUtf8(m_title); } +bool ArticleInfo::IsValidCoordinates() const +{ + STATIC_ASSERT(EMPTY_COORD > 200); + if (m_lat < 200.0 && m_lon < 200.0) + { + ASSERT(ll::ValidLat(m_lat) && ll::ValidLon(m_lon), ()); + return true; + } + return false; +} + namespace { @@ -64,7 +78,7 @@ void ArticleInfo::Read(rd::Reader & r) double ArticleInfo::Score(double currLat, double currLon) const { - if (m_lat != EMPTY_COORD && m_lon != EMPTY_COORD) + if (IsValidCoordinates()) return earth::Distance(m_lat, m_lon, currLat, currLon); else return 0.0; diff --git a/storage/article_info.hpp b/storage/article_info.hpp index eaca2fa..419a7e9 100644 --- a/storage/article_info.hpp +++ b/storage/article_info.hpp @@ -46,6 +46,7 @@ public: int32_t m_parentIndex; // NO_PARENT is the root article double m_lat, m_lon; + bool IsValidCoordinates() const; bool m_redirect; diff --git a/storage/storage_builder.cpp b/storage/storage_builder.cpp index bb77d8a..d914b4f 100644 --- a/storage/storage_builder.cpp +++ b/storage/storage_builder.cpp @@ -3,6 +3,7 @@ #include "../env/writer.hpp" #include "../env/assert.hpp" #include "../env/logging.hpp" +#include "../env/latlon.hpp" #include "../std/fstream.hpp" #include "../std/iterator.hpp" @@ -26,7 +27,7 @@ void ProcessEntriesFile(string const & path, ToDo & toDo) continue; entries.clear(); - str::Tokenize(str, "\t", back_inserter(entries)); + str::Tokenize(str, "\t ", back_inserter(entries)); toDo(entries); } @@ -78,6 +79,42 @@ public: } }; +class DoAddGeocodes +{ + StorageBuilder & m_storage; + + static double ToDouble(string const & s) + { + char * stop; + double const d = strtod(s.c_str(), &stop); + CHECK(stop && *stop == 0, (s)); + return d; + } + +public: + DoAddGeocodes(StorageBuilder & storage) : m_storage(storage) {} + + void operator() (vector const & entries) + { + CHECK(entries.size() == 3, (entries)); + + ArticleInfoBuilder * p = m_storage.GetArticle(entries[0]); + if (p) + { + double const lat = ToDouble(entries[1]); + double const lon = ToDouble(entries[2]); + + if (ll::ValidLat(lat) && ll::ValidLon(lon)) + { + p->m_lat = lat; + p->m_lon = lon; + } + else + LOG(WARNING, ("Bad Lat, Lon:", entries[1], entries[2])); + } + } +}; + } void StorageBuilder::ParseEntries(string const & path) @@ -92,6 +129,23 @@ void StorageBuilder::ParseRedirects(string const & path) ProcessEntriesFile(path, doAdd); } +void StorageBuilder::ParseGeocodes(string const & path) +{ + DoAddGeocodes doAdd(*this); + ProcessEntriesFile(path, doAdd); + + for (size_t i = 0; i < m_info.size(); ++i) + { + if (m_info[i].m_redirect) + { + ArticleInfoBuilder const * p = GetArticle(m_info[i].m_url); + CHECK(p, ()); + m_info[i].m_lat = p->m_lat; + m_info[i].m_lon = p->m_lon; + } + } +} + void StorageBuilder::Add(ArticleInfoBuilder const & info) { m_info.push_back(info); diff --git a/storage/storage_builder.hpp b/storage/storage_builder.hpp index b7df4eb..82fd190 100644 --- a/storage/storage_builder.hpp +++ b/storage/storage_builder.hpp @@ -40,6 +40,7 @@ class StorageBuilder public: void ParseEntries(string const & path); void ParseRedirects(string const & path); + void ParseGeocodes(string const & path); void Add(ArticleInfoBuilder const & info); @@ -49,7 +50,7 @@ public: bool operator == (Storage const & s) const; - ArticleInfoBuilder const * GetArticle(string const & url) const + ArticleInfoBuilder * GetArticle(string const & url) { map::const_iterator i = m_url2info.find(url); return (i == m_url2info.end() ? 0 : &m_info[i->second]);