Added geocoding for ArticleInfo indexer.

This commit is contained in:
vng 2013-08-08 00:15:42 +03:00
parent 1531131fc7
commit 7eec83e776
5 changed files with 77 additions and 6 deletions

View file

@ -5,15 +5,16 @@
int main(int argc, char const * argv[])
{
if (argc == 4)
if (argc == 5)
{
StorageBuilder builder;
builder.ParseEntries(argv[1]);
builder.ParseRedirects(argv[2]);
builder.Save(argv[3]);
builder.ParseGeocodes(argv[3]);
builder.Save(argv[4]);
}
else
cout << "Usage: <info file> <redirect file> <result file>" << endl;
cout << "Usage: <info file> <redirect file> <geocodes file> <result file>" << endl;
return 0;
}

View file

@ -4,10 +4,13 @@
#include "../env/strings.hpp"
#include "../env/writer.hpp"
#include "../env/reader.hpp"
#include "../env/latlon.hpp"
#include "../env/assert.hpp"
#include "../std/iterator.hpp"
#include "../std/algorithm.hpp"
#include "../std/cmath.hpp"
#include "../std/static_assert.hpp"
void ArticleInfo::GenerateKey()
@ -15,6 +18,17 @@ void ArticleInfo::GenerateKey()
m_key = str::MakeNormalizeAndLowerUtf8(m_title);
}
bool ArticleInfo::IsValidCoordinates() const
{
STATIC_ASSERT(EMPTY_COORD > 200);
if (m_lat < 200.0 && m_lon < 200.0)
{
ASSERT(ll::ValidLat(m_lat) && ll::ValidLon(m_lon), ());
return true;
}
return false;
}
namespace
{
@ -64,7 +78,7 @@ void ArticleInfo::Read(rd::Reader & r)
double ArticleInfo::Score(double currLat, double currLon) const
{
if (m_lat != EMPTY_COORD && m_lon != EMPTY_COORD)
if (IsValidCoordinates())
return earth::Distance(m_lat, m_lon, currLat, currLon);
else
return 0.0;

View file

@ -46,6 +46,7 @@ public:
int32_t m_parentIndex; // NO_PARENT is the root article
double m_lat, m_lon;
bool IsValidCoordinates() const;
bool m_redirect;

View file

@ -3,6 +3,7 @@
#include "../env/writer.hpp"
#include "../env/assert.hpp"
#include "../env/logging.hpp"
#include "../env/latlon.hpp"
#include "../std/fstream.hpp"
#include "../std/iterator.hpp"
@ -26,7 +27,7 @@ void ProcessEntriesFile(string const & path, ToDo & toDo)
continue;
entries.clear();
str::Tokenize(str, "\t", back_inserter(entries));
str::Tokenize(str, "\t ", back_inserter(entries));
toDo(entries);
}
@ -78,6 +79,42 @@ public:
}
};
class DoAddGeocodes
{
StorageBuilder & m_storage;
static double ToDouble(string const & s)
{
char * stop;
double const d = strtod(s.c_str(), &stop);
CHECK(stop && *stop == 0, (s));
return d;
}
public:
DoAddGeocodes(StorageBuilder & storage) : m_storage(storage) {}
void operator() (vector<string> const & entries)
{
CHECK(entries.size() == 3, (entries));
ArticleInfoBuilder * p = m_storage.GetArticle(entries[0]);
if (p)
{
double const lat = ToDouble(entries[1]);
double const lon = ToDouble(entries[2]);
if (ll::ValidLat(lat) && ll::ValidLon(lon))
{
p->m_lat = lat;
p->m_lon = lon;
}
else
LOG(WARNING, ("Bad Lat, Lon:", entries[1], entries[2]));
}
}
};
}
void StorageBuilder::ParseEntries(string const & path)
@ -92,6 +129,23 @@ void StorageBuilder::ParseRedirects(string const & path)
ProcessEntriesFile(path, doAdd);
}
void StorageBuilder::ParseGeocodes(string const & path)
{
DoAddGeocodes doAdd(*this);
ProcessEntriesFile(path, doAdd);
for (size_t i = 0; i < m_info.size(); ++i)
{
if (m_info[i].m_redirect)
{
ArticleInfoBuilder const * p = GetArticle(m_info[i].m_url);
CHECK(p, ());
m_info[i].m_lat = p->m_lat;
m_info[i].m_lon = p->m_lon;
}
}
}
void StorageBuilder::Add(ArticleInfoBuilder const & info)
{
m_info.push_back(info);

View file

@ -40,6 +40,7 @@ class StorageBuilder
public:
void ParseEntries(string const & path);
void ParseRedirects(string const & path);
void ParseGeocodes(string const & path);
void Add(ArticleInfoBuilder const & info);
@ -49,7 +50,7 @@ public:
bool operator == (Storage const & s) const;
ArticleInfoBuilder const * GetArticle(string const & url) const
ArticleInfoBuilder * GetArticle(string const & url)
{
map<string, size_t>::const_iterator i = m_url2info.find(url);
return (i == m_url2info.end() ? 0 : &m_info[i->second]);