Added geocoding for ArticleInfo indexer.
This commit is contained in:
parent
1531131fc7
commit
7eec83e776
5 changed files with 77 additions and 6 deletions
|
@ -5,15 +5,16 @@
|
|||
|
||||
int main(int argc, char const * argv[])
|
||||
{
|
||||
if (argc == 4)
|
||||
if (argc == 5)
|
||||
{
|
||||
StorageBuilder builder;
|
||||
builder.ParseEntries(argv[1]);
|
||||
builder.ParseRedirects(argv[2]);
|
||||
builder.Save(argv[3]);
|
||||
builder.ParseGeocodes(argv[3]);
|
||||
builder.Save(argv[4]);
|
||||
}
|
||||
else
|
||||
cout << "Usage: <info file> <redirect file> <result file>" << endl;
|
||||
cout << "Usage: <info file> <redirect file> <geocodes file> <result file>" << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -4,10 +4,13 @@
|
|||
#include "../env/strings.hpp"
|
||||
#include "../env/writer.hpp"
|
||||
#include "../env/reader.hpp"
|
||||
#include "../env/latlon.hpp"
|
||||
#include "../env/assert.hpp"
|
||||
|
||||
#include "../std/iterator.hpp"
|
||||
#include "../std/algorithm.hpp"
|
||||
#include "../std/cmath.hpp"
|
||||
#include "../std/static_assert.hpp"
|
||||
|
||||
|
||||
void ArticleInfo::GenerateKey()
|
||||
|
@ -15,6 +18,17 @@ void ArticleInfo::GenerateKey()
|
|||
m_key = str::MakeNormalizeAndLowerUtf8(m_title);
|
||||
}
|
||||
|
||||
bool ArticleInfo::IsValidCoordinates() const
|
||||
{
|
||||
STATIC_ASSERT(EMPTY_COORD > 200);
|
||||
if (m_lat < 200.0 && m_lon < 200.0)
|
||||
{
|
||||
ASSERT(ll::ValidLat(m_lat) && ll::ValidLon(m_lon), ());
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
|
@ -64,7 +78,7 @@ void ArticleInfo::Read(rd::Reader & r)
|
|||
|
||||
double ArticleInfo::Score(double currLat, double currLon) const
|
||||
{
|
||||
if (m_lat != EMPTY_COORD && m_lon != EMPTY_COORD)
|
||||
if (IsValidCoordinates())
|
||||
return earth::Distance(m_lat, m_lon, currLat, currLon);
|
||||
else
|
||||
return 0.0;
|
||||
|
|
|
@ -46,6 +46,7 @@ public:
|
|||
int32_t m_parentIndex; // NO_PARENT is the root article
|
||||
|
||||
double m_lat, m_lon;
|
||||
bool IsValidCoordinates() const;
|
||||
|
||||
bool m_redirect;
|
||||
|
||||
|
|
|
@ -3,6 +3,7 @@
|
|||
#include "../env/writer.hpp"
|
||||
#include "../env/assert.hpp"
|
||||
#include "../env/logging.hpp"
|
||||
#include "../env/latlon.hpp"
|
||||
|
||||
#include "../std/fstream.hpp"
|
||||
#include "../std/iterator.hpp"
|
||||
|
@ -26,7 +27,7 @@ void ProcessEntriesFile(string const & path, ToDo & toDo)
|
|||
continue;
|
||||
|
||||
entries.clear();
|
||||
str::Tokenize(str, "\t", back_inserter(entries));
|
||||
str::Tokenize(str, "\t ", back_inserter(entries));
|
||||
|
||||
toDo(entries);
|
||||
}
|
||||
|
@ -78,6 +79,42 @@ public:
|
|||
}
|
||||
};
|
||||
|
||||
class DoAddGeocodes
|
||||
{
|
||||
StorageBuilder & m_storage;
|
||||
|
||||
static double ToDouble(string const & s)
|
||||
{
|
||||
char * stop;
|
||||
double const d = strtod(s.c_str(), &stop);
|
||||
CHECK(stop && *stop == 0, (s));
|
||||
return d;
|
||||
}
|
||||
|
||||
public:
|
||||
DoAddGeocodes(StorageBuilder & storage) : m_storage(storage) {}
|
||||
|
||||
void operator() (vector<string> const & entries)
|
||||
{
|
||||
CHECK(entries.size() == 3, (entries));
|
||||
|
||||
ArticleInfoBuilder * p = m_storage.GetArticle(entries[0]);
|
||||
if (p)
|
||||
{
|
||||
double const lat = ToDouble(entries[1]);
|
||||
double const lon = ToDouble(entries[2]);
|
||||
|
||||
if (ll::ValidLat(lat) && ll::ValidLon(lon))
|
||||
{
|
||||
p->m_lat = lat;
|
||||
p->m_lon = lon;
|
||||
}
|
||||
else
|
||||
LOG(WARNING, ("Bad Lat, Lon:", entries[1], entries[2]));
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
void StorageBuilder::ParseEntries(string const & path)
|
||||
|
@ -92,6 +129,23 @@ void StorageBuilder::ParseRedirects(string const & path)
|
|||
ProcessEntriesFile(path, doAdd);
|
||||
}
|
||||
|
||||
void StorageBuilder::ParseGeocodes(string const & path)
|
||||
{
|
||||
DoAddGeocodes doAdd(*this);
|
||||
ProcessEntriesFile(path, doAdd);
|
||||
|
||||
for (size_t i = 0; i < m_info.size(); ++i)
|
||||
{
|
||||
if (m_info[i].m_redirect)
|
||||
{
|
||||
ArticleInfoBuilder const * p = GetArticle(m_info[i].m_url);
|
||||
CHECK(p, ());
|
||||
m_info[i].m_lat = p->m_lat;
|
||||
m_info[i].m_lon = p->m_lon;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void StorageBuilder::Add(ArticleInfoBuilder const & info)
|
||||
{
|
||||
m_info.push_back(info);
|
||||
|
|
|
@ -40,6 +40,7 @@ class StorageBuilder
|
|||
public:
|
||||
void ParseEntries(string const & path);
|
||||
void ParseRedirects(string const & path);
|
||||
void ParseGeocodes(string const & path);
|
||||
|
||||
void Add(ArticleInfoBuilder const & info);
|
||||
|
||||
|
@ -49,7 +50,7 @@ public:
|
|||
|
||||
bool operator == (Storage const & s) const;
|
||||
|
||||
ArticleInfoBuilder const * GetArticle(string const & url) const
|
||||
ArticleInfoBuilder * GetArticle(string const & url)
|
||||
{
|
||||
map<string, size_t>::const_iterator i = m_url2info.find(url);
|
||||
return (i == m_url2info.end() ? 0 : &m_info[i->second]);
|
||||
|
|
Reference in a new issue