forked from organicmaps/organicmaps
Add languages getter utility. It uses wikipedia as source.
This commit is contained in:
parent
53322f4a86
commit
ad54bdeecc
12 changed files with 629 additions and 0 deletions
BIN
lang_getter/debug/lang_getter
Executable file
BIN
lang_getter/debug/lang_getter
Executable file
Binary file not shown.
28
lang_getter/lang_getter.pro
Normal file
28
lang_getter/lang_getter.pro
Normal file
|
@ -0,0 +1,28 @@
|
|||
#-------------------------------------------------
|
||||
#
|
||||
# Project created by QtCreator 2011-11-18T08:50:14
|
||||
#
|
||||
#-------------------------------------------------
|
||||
|
||||
QT += core network xml
|
||||
|
||||
QT -= gui
|
||||
|
||||
TARGET = lang_getter
|
||||
CONFIG += console
|
||||
CONFIG -= app_bundle
|
||||
|
||||
TEMPLATE = app
|
||||
|
||||
|
||||
SOURCES += main.cpp \
|
||||
pagedownloader.cpp \
|
||||
logging.cpp \
|
||||
mainmanager.cpp \
|
||||
stringparser.cpp
|
||||
|
||||
HEADERS += \
|
||||
pagedownloader.h \
|
||||
logging.h \
|
||||
mainmanager.h \
|
||||
stringparser.h
|
30
lang_getter/logging.cpp
Normal file
30
lang_getter/logging.cpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
#include "logging.h"
|
||||
|
||||
#include <iostream>
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
Logging::Logging()
|
||||
{
|
||||
}
|
||||
|
||||
void Logging::Print(STATUS s, QString const & msg)
|
||||
{
|
||||
cout << StatusToString(s).toStdString() << " " << msg.toStdString() << endl;
|
||||
}
|
||||
|
||||
void Logging::Percent(qint64 curr, qint64 total)
|
||||
{
|
||||
}
|
||||
|
||||
QString Logging::StatusToString(STATUS s) const
|
||||
{
|
||||
switch (s)
|
||||
{
|
||||
case INFO: return "INFO";
|
||||
case WARNING: return "WARNING";
|
||||
case ERROR: return "ERROR";
|
||||
default: return "NONE";
|
||||
}
|
||||
}
|
16
lang_getter/logging.h
Normal file
16
lang_getter/logging.h
Normal file
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
|
||||
#include <QString>
|
||||
|
||||
class Logging
|
||||
{
|
||||
public:
|
||||
Logging();
|
||||
|
||||
enum STATUS { INFO, WARNING, ERROR };
|
||||
|
||||
void Print(STATUS s, QString const & msg);
|
||||
void Percent(qint64 curr, qint64 total);
|
||||
|
||||
QString StatusToString(STATUS s) const;
|
||||
};
|
13
lang_getter/main.cpp
Normal file
13
lang_getter/main.cpp
Normal file
|
@ -0,0 +1,13 @@
|
|||
#include <QtCore/QCoreApplication>
|
||||
|
||||
#include "mainmanager.h"
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
QCoreApplication a(argc, argv);
|
||||
|
||||
MainManager manager("/Users/alena/omim/omim/data/metainfo/");
|
||||
manager.ProcessCountryList("/Users/alena/omim/omim/data/polygons.lst");
|
||||
|
||||
return a.exec();
|
||||
}
|
304
lang_getter/mainmanager.cpp
Normal file
304
lang_getter/mainmanager.cpp
Normal file
|
@ -0,0 +1,304 @@
|
|||
#include "mainmanager.h"
|
||||
|
||||
#include <QFile>
|
||||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
|
||||
using namespace std;
|
||||
|
||||
void MainManager::Country::AddCode(QString const & code)
|
||||
{
|
||||
if (m_codes.end() == find(m_codes.begin(), m_codes.end(), code))
|
||||
m_codes.push_back(code);
|
||||
}
|
||||
|
||||
void MainManager::Country::AddUrl(size_t url)
|
||||
{
|
||||
if (m_langUrls.end() == find(m_langUrls.begin(), m_langUrls.end(), url))
|
||||
m_langUrls.push_back(url);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void append(QString & res, QString const & s)
|
||||
{
|
||||
if (res.isEmpty()) res = s;
|
||||
else res = res + "|" + s;
|
||||
}
|
||||
}
|
||||
|
||||
bool MainManager::Country::GetResult(QString & res, MainManager const & m) const
|
||||
{
|
||||
res.clear();
|
||||
|
||||
for (size_t i = 0; i < m_codes.size(); ++i)
|
||||
append(res, m_codes[i]);
|
||||
|
||||
for (size_t i = 0; i < m_langUrls.size(); ++i)
|
||||
{
|
||||
QString const code = m.m_langUrls[m_langUrls[i]];
|
||||
if (!code.isEmpty())
|
||||
append(res, code);
|
||||
}
|
||||
|
||||
return !res.isEmpty();
|
||||
}
|
||||
|
||||
|
||||
MainManager::MainManager(QString const & outDir)
|
||||
: m_downloader(m_log), m_parser(m_log), m_outDir(outDir)
|
||||
{
|
||||
}
|
||||
|
||||
char const * MainManager::LangNameToCode(QString const & name)
|
||||
{
|
||||
if (name.contains("English", Qt::CaseInsensitive)) return "en";
|
||||
if (name.contains("Spanish", Qt::CaseInsensitive)) return "es";
|
||||
if (name.contains("French", Qt::CaseInsensitive)) return "fr";
|
||||
if (name.contains("Mandarin", Qt::CaseInsensitive)) return "zh";
|
||||
return 0;
|
||||
}
|
||||
|
||||
void MainManager::ProcessCountryList(QString const & file)
|
||||
{
|
||||
ifstream s(file.toStdString().c_str());
|
||||
if (!s.is_open() || !s.good())
|
||||
{
|
||||
m_log.Print(Logging::ERROR, QString("Can't open file: ") + file);
|
||||
return;
|
||||
}
|
||||
|
||||
char buffer[256];
|
||||
while (s.good())
|
||||
{
|
||||
s.getline(buffer, 256);
|
||||
if (strlen(buffer) > 0)
|
||||
m_countries.push_back(buffer);
|
||||
}
|
||||
|
||||
m_downloader.ConnectFinished(this, SLOT(countryDownloaded(QString const &)));
|
||||
|
||||
m_index = 0;
|
||||
ProcessNextCountry();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void get_country_url(QString & name)
|
||||
{
|
||||
int const i = name.indexOf('_');
|
||||
if (i != -1)
|
||||
name = name.mid(0, i); // for regions return country name
|
||||
|
||||
name.replace(' ', '_'); // make correct wiki url
|
||||
}
|
||||
}
|
||||
|
||||
void MainManager::ProcessNextCountry()
|
||||
{
|
||||
if (m_index >= m_countries.size())
|
||||
{
|
||||
m_downloader.ConnectFinished(this, SLOT(languageDownloaded(QString const &)));
|
||||
|
||||
m_index = 0;
|
||||
ProcessNextLanguage();
|
||||
return;
|
||||
}
|
||||
|
||||
QString url = m_countries[m_index].m_name;
|
||||
get_country_url(url);
|
||||
|
||||
m_downloader.Download(QString("http://en.wikipedia.org/wiki/") + url);
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
class append_result
|
||||
{
|
||||
MainManager & m_manager;
|
||||
public:
|
||||
append_result(MainManager & m) : m_manager(m) {}
|
||||
void operator() (QString const & s)
|
||||
{
|
||||
char const * code = m_manager.LangNameToCode(s);
|
||||
if (code)
|
||||
m_manager.AppendResult(code);
|
||||
}
|
||||
};
|
||||
|
||||
class nodes_iterator
|
||||
{
|
||||
QDomElement m_node;
|
||||
bool m_isList;
|
||||
|
||||
public:
|
||||
nodes_iterator(QDomElement const & root) : m_isList(false)
|
||||
{
|
||||
// process single elements ...
|
||||
m_node = root.firstChildElement("a");
|
||||
if (m_node.isNull())
|
||||
{
|
||||
// ... or compound list
|
||||
m_node = root.firstChildElement("ul");
|
||||
if (!m_node.isNull())
|
||||
{
|
||||
m_node = m_node.firstChildElement("li");
|
||||
m_isList = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool valid() const { return !m_node.isNull(); }
|
||||
|
||||
QDomElement get() const
|
||||
{
|
||||
return (m_isList ? m_node.firstChildElement("a") : m_node);
|
||||
}
|
||||
|
||||
void next()
|
||||
{
|
||||
m_node = m_node.nextSiblingElement(m_isList ? "li" : "a");
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void MainManager::ProcessLangEntry(QString const & xml, QString const & entry)
|
||||
{
|
||||
if (m_parser.InitSubDOM(xml, entry, "td"))
|
||||
{
|
||||
nodes_iterator it(m_parser.Root());
|
||||
|
||||
if (!it.valid())
|
||||
{
|
||||
// try to get language from root node
|
||||
TokenizeString(m_parser.Root().text(), ", ", append_result(*this));
|
||||
}
|
||||
|
||||
// iterate through child nodes
|
||||
while (it.valid())
|
||||
{
|
||||
QDomElement e = it.get();
|
||||
|
||||
char const * code = LangNameToCode(e.text());
|
||||
if (code)
|
||||
{
|
||||
AppendResult(code);
|
||||
}
|
||||
else
|
||||
{
|
||||
QString const url = e.attribute("href");
|
||||
if (!url.isEmpty())
|
||||
AppendLangUrl(url);
|
||||
else
|
||||
m_log.Print(Logging::WARNING, QString("Undefined language without url: ") + e.text());
|
||||
}
|
||||
|
||||
it.next();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MainManager::countryDownloaded(QString const & s)
|
||||
{
|
||||
ProcessLangEntry(s, "Official language(s)");
|
||||
ProcessLangEntry(s, "National language");
|
||||
|
||||
++m_index;
|
||||
ProcessNextCountry();
|
||||
}
|
||||
|
||||
void MainManager::AppendResult(QString const & code)
|
||||
{
|
||||
m_countries[m_index].AddCode(code);
|
||||
}
|
||||
|
||||
void MainManager::AppendLangUrl(QString url)
|
||||
{
|
||||
{
|
||||
int const i = url.lastIndexOf("/");
|
||||
if (i != -1)
|
||||
url = url.mid(i+1);
|
||||
}
|
||||
|
||||
size_t index;
|
||||
{
|
||||
vector<QString>::iterator i = find(m_langUrls.begin(), m_langUrls.end(), url);
|
||||
if (i == m_langUrls.end())
|
||||
{
|
||||
m_langUrls.push_back(url);
|
||||
index = m_langUrls.size()-1;
|
||||
}
|
||||
else
|
||||
index = std::distance(m_langUrls.begin(), i);
|
||||
}
|
||||
|
||||
m_countries[m_index].AddUrl(index);
|
||||
}
|
||||
|
||||
void MainManager::ProcessNextLanguage()
|
||||
{
|
||||
if (m_index >= m_langUrls.size())
|
||||
{
|
||||
CreateResultFiles();
|
||||
m_log.Print(Logging::INFO, "Done!");
|
||||
|
||||
exit(0);
|
||||
return;
|
||||
}
|
||||
|
||||
m_downloader.Download(QString("http://en.wikipedia.org/wiki/") + m_langUrls[m_index]);
|
||||
}
|
||||
|
||||
bool MainManager::ProcessCodeEntry(QString const & xml, QString const & entry)
|
||||
{
|
||||
if (m_parser.InitSubDOM(xml, entry, "td"))
|
||||
{
|
||||
QDomElement e = m_parser.Root().firstChildElement("tt");
|
||||
if (!e.isNull())
|
||||
{
|
||||
QString const name = e.text();
|
||||
if (!name.isEmpty())
|
||||
{
|
||||
m_langUrls[m_index] = name;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void MainManager::languageDownloaded(QString const & s)
|
||||
{
|
||||
if (!ProcessCodeEntry(s, "ISO 639-1"))
|
||||
if (!ProcessCodeEntry(s, "ISO 639-2"))
|
||||
if (!ProcessCodeEntry(s, "ISO 639-3"))
|
||||
{
|
||||
m_log.Print(Logging::WARNING, QString("Can't find code for url: ") + m_langUrls[m_index]);
|
||||
m_langUrls[m_index] = QString();
|
||||
}
|
||||
|
||||
++m_index;
|
||||
ProcessNextLanguage();
|
||||
}
|
||||
|
||||
void MainManager::CreateResultFiles()
|
||||
{
|
||||
m_log.Print(Logging::INFO, "Results:");
|
||||
|
||||
for (size_t i = 0; i < m_countries.size(); ++i)
|
||||
{
|
||||
QString s;
|
||||
if (m_countries[i].GetResult(s, *this))
|
||||
{
|
||||
QFile f(m_outDir + m_countries[i].m_name + QString(".meta"));
|
||||
f.open(QFile::WriteOnly);
|
||||
f.write(s.toStdString().c_str());
|
||||
}
|
||||
else
|
||||
m_log.Print(Logging::WARNING, QString("No languages for country: ") + m_countries[i].m_name);
|
||||
}
|
||||
}
|
64
lang_getter/mainmanager.h
Normal file
64
lang_getter/mainmanager.h
Normal file
|
@ -0,0 +1,64 @@
|
|||
#pragma once
|
||||
#include "logging.h"
|
||||
#include "pagedownloader.h"
|
||||
#include "stringparser.h"
|
||||
|
||||
#include <QObject>
|
||||
#include <vector>
|
||||
|
||||
|
||||
class MainManager : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
Logging m_log;
|
||||
PageDownloader m_downloader;
|
||||
ContentParser m_parser;
|
||||
|
||||
QString m_outDir;
|
||||
|
||||
class Country
|
||||
{
|
||||
std::vector<QString> m_codes;
|
||||
std::vector<size_t> m_langUrls;
|
||||
|
||||
public:
|
||||
QString m_name;
|
||||
|
||||
Country(char const * name) : m_name(name) {}
|
||||
|
||||
void AddCode(QString const & code);
|
||||
void AddUrl(size_t url);
|
||||
|
||||
bool GetResult(QString & res, MainManager const & m) const;
|
||||
};
|
||||
|
||||
std::vector<Country> m_countries;
|
||||
std::vector<QString> m_langUrls;
|
||||
|
||||
size_t m_index;
|
||||
|
||||
public:
|
||||
MainManager(QString const & outDir);
|
||||
|
||||
void ProcessCountryList(QString const & file);
|
||||
|
||||
protected:
|
||||
void ProcessNextCountry();
|
||||
void ProcessLangEntry(QString const & xml, QString const & entry);
|
||||
|
||||
public: // need for functor
|
||||
char const * LangNameToCode(QString const & name);
|
||||
void AppendResult(QString const & code);
|
||||
protected:
|
||||
void AppendLangUrl(QString url);
|
||||
|
||||
void ProcessNextLanguage();
|
||||
bool ProcessCodeEntry(QString const & xml, QString const & entry);
|
||||
|
||||
void CreateResultFiles();
|
||||
|
||||
private slots:
|
||||
void countryDownloaded(QString const & s);
|
||||
void languageDownloaded(QString const & s);
|
||||
};
|
62
lang_getter/pagedownloader.cpp
Normal file
62
lang_getter/pagedownloader.cpp
Normal file
|
@ -0,0 +1,62 @@
|
|||
#include "pagedownloader.h"
|
||||
#include "logging.h"
|
||||
|
||||
#include <QUrl>
|
||||
#include <QNetworkReply>
|
||||
|
||||
|
||||
void PageDownloader::ConnectFinished(QObject * obj, char const * slot)
|
||||
{
|
||||
disconnect(SIGNAL(finished(QString const &)));
|
||||
connect(this, SIGNAL(finished(QString const &)), obj, slot);
|
||||
}
|
||||
|
||||
void PageDownloader::Download(QUrl const & url)
|
||||
{
|
||||
m_res.clear();
|
||||
|
||||
m_reply = m_manager.get(QNetworkRequest(url));
|
||||
connect(m_reply, SIGNAL(finished()), this, SLOT(httpFinished()));
|
||||
connect(m_reply, SIGNAL(readyRead()), this, SLOT(httpReadyRead()));
|
||||
connect(m_reply, SIGNAL(downloadProgress(qint64,qint64)), this,
|
||||
SLOT(updateDataReadProgress(qint64,qint64)));
|
||||
}
|
||||
|
||||
void PageDownloader::Download(QString const & url)
|
||||
{
|
||||
Download(QUrl(url));
|
||||
}
|
||||
|
||||
void PageDownloader::httpFinished()
|
||||
{
|
||||
QString const s = QString::fromUtf8(m_res.constData());
|
||||
QString const url = m_reply->url().toString();
|
||||
|
||||
if (s.isEmpty())
|
||||
{
|
||||
m_log.Print(Logging::WARNING, QString("Downloading of ") +
|
||||
url +
|
||||
QString(" failed."));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_log.Print(Logging::INFO, QString("Downloading of ") +
|
||||
url +
|
||||
QString(" finished successfully."));
|
||||
}
|
||||
|
||||
m_reply->deleteLater();
|
||||
m_reply = 0;
|
||||
|
||||
emit finished(s);
|
||||
}
|
||||
|
||||
void PageDownloader::httpReadyRead()
|
||||
{
|
||||
m_res += m_reply->readAll();
|
||||
}
|
||||
|
||||
void PageDownloader::updateDataReadProgress(qint64 read, qint64 total)
|
||||
{
|
||||
m_log.Percent(read, total);
|
||||
}
|
37
lang_getter/pagedownloader.h
Normal file
37
lang_getter/pagedownloader.h
Normal file
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
|
||||
#include <QObject>
|
||||
#include <QNetworkAccessManager>
|
||||
|
||||
|
||||
class Logging;
|
||||
class QUrl;
|
||||
class QNetworkReply;
|
||||
|
||||
class PageDownloader : public QObject
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
QNetworkAccessManager m_manager;
|
||||
QNetworkReply * m_reply;
|
||||
|
||||
Logging & m_log;
|
||||
|
||||
QByteArray m_res;
|
||||
|
||||
public:
|
||||
PageDownloader(Logging & log) : m_log(log) {}
|
||||
|
||||
void ConnectFinished(QObject * obj, char const * slot);
|
||||
|
||||
void Download(QUrl const & url);
|
||||
void Download(QString const & url);
|
||||
|
||||
signals:
|
||||
void finished(QString const &);
|
||||
|
||||
private slots:
|
||||
void httpFinished();
|
||||
void httpReadyRead();
|
||||
void updateDataReadProgress(qint64 read, qint64 total);
|
||||
};
|
BIN
lang_getter/release/lang_getter
Executable file
BIN
lang_getter/release/lang_getter
Executable file
Binary file not shown.
36
lang_getter/stringparser.cpp
Normal file
36
lang_getter/stringparser.cpp
Normal file
|
@ -0,0 +1,36 @@
|
|||
#include "stringparser.h"
|
||||
#include "logging.h"
|
||||
|
||||
|
||||
bool ContentParser::InitSubDOM(QString const & xml, QString const & entry, QString const & tag)
|
||||
{
|
||||
int const i = xml.indexOf(entry);
|
||||
if (i == -1)
|
||||
{
|
||||
m_log.Print(Logging::INFO, QString("Can't find entry: ") + entry);
|
||||
return false;
|
||||
}
|
||||
|
||||
int const beg = xml.indexOf(QString("<") + tag, i);
|
||||
if (beg == -1 || beg < i)
|
||||
{
|
||||
m_log.Print(Logging::INFO, QString("Can't find tag: ") + tag);
|
||||
return false;
|
||||
}
|
||||
|
||||
QString last = QString("/") + tag + QString(">");
|
||||
int const end = xml.indexOf(last, beg);
|
||||
Q_ASSERT ( end != -1 && beg < end );
|
||||
|
||||
if (!m_doc.setContent(xml.mid(beg, end - beg + last.length())))
|
||||
{
|
||||
m_log.Print(Logging::ERROR, QString("QDomDocument::setContent error"));
|
||||
return false;
|
||||
}
|
||||
|
||||
m_node = m_doc.documentElement();
|
||||
Q_ASSERT ( !m_node.isNull() );
|
||||
Q_ASSERT ( m_node.tagName() == tag );
|
||||
|
||||
return true;
|
||||
}
|
39
lang_getter/stringparser.h
Normal file
39
lang_getter/stringparser.h
Normal file
|
@ -0,0 +1,39 @@
|
|||
#pragma once
|
||||
|
||||
#include <QDomDocument>
|
||||
|
||||
|
||||
class Logging;
|
||||
|
||||
class ContentParser
|
||||
{
|
||||
Logging & m_log;
|
||||
|
||||
QDomDocument m_doc;
|
||||
QDomElement m_node;
|
||||
|
||||
public:
|
||||
ContentParser(Logging & log) : m_log(log) {}
|
||||
|
||||
bool InitSubDOM(QString const & xml, QString const & entry, QString const & tag);
|
||||
QDomElement Root() const { return m_node; }
|
||||
};
|
||||
|
||||
template <class ToDo>
|
||||
void TokenizeString(QString const & s, QString const & delim, ToDo toDo)
|
||||
{
|
||||
int beg = 0;
|
||||
int i = 0;
|
||||
for (; i < s.length(); ++i)
|
||||
{
|
||||
if (delim.indexOf(s[i]) != -1)
|
||||
{
|
||||
if (i > beg)
|
||||
toDo(s.mid(beg, i-beg));
|
||||
beg = i+1;
|
||||
}
|
||||
}
|
||||
|
||||
if (i > beg)
|
||||
toDo(s.mid(beg, i-beg));
|
||||
}
|
Loading…
Add table
Reference in a new issue