forked from organicmaps/organicmaps
[generator] Improved descriptions using wikidata.
This commit is contained in:
parent
2d9e4a90ee
commit
58021db34c
17 changed files with 489 additions and 54 deletions
|
@ -35,6 +35,8 @@ set(SRC
|
|||
coastlines_generator.cpp
|
||||
coastlines_generator.hpp
|
||||
collector_interface.hpp
|
||||
collector_tag.cpp
|
||||
collector_tag.hpp
|
||||
descriptions_section_builder.cpp
|
||||
descriptions_section_builder.hpp
|
||||
dumper.cpp
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/geo_object_id.hpp"
|
||||
|
||||
#include <string>
|
||||
|
||||
struct OsmElement;
|
||||
|
|
42
generator/collector_tag.cpp
Normal file
42
generator/collector_tag.cpp
Normal file
|
@ -0,0 +1,42 @@
|
|||
#include "generator/collector_tag.hpp"
|
||||
|
||||
#include "generator/osm_element.hpp"
|
||||
|
||||
#include "base/geo_object_id.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
namespace generator
|
||||
{
|
||||
CollectorTag::CollectorTag(std::string const & filename, std::string const & tagKey,
|
||||
Validator const & validator, bool ignoreIfNotOpen)
|
||||
: m_tagKey(tagKey), m_validator(validator), m_needCollect(true)
|
||||
{
|
||||
m_stream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
||||
try
|
||||
{
|
||||
m_stream.open(filename);
|
||||
}
|
||||
catch (std::ios::failure const & e)
|
||||
{
|
||||
if (ignoreIfNotOpen)
|
||||
{
|
||||
m_needCollect = false;
|
||||
LOG(LINFO, ("Сould not open file", filename, ". This was ignored."));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CollectorTag::Collect(base::GeoObjectId const & osmId, OsmElement const & el)
|
||||
{
|
||||
if (!m_needCollect)
|
||||
return;
|
||||
|
||||
auto const tag = el.GetTag(m_tagKey);
|
||||
if (!tag.empty() && m_validator(tag))
|
||||
m_stream << osmId.GetEncodedId() << "\t" << tag << "\n";
|
||||
}
|
||||
} // namespace generator
|
37
generator/collector_tag.hpp
Normal file
37
generator/collector_tag.hpp
Normal file
|
@ -0,0 +1,37 @@
|
|||
#pragma once
|
||||
|
||||
#include "generator/collector_interface.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <string>
|
||||
|
||||
struct OsmElement;
|
||||
namespace base
|
||||
{
|
||||
class GeoObjectId;
|
||||
} // namespace base
|
||||
|
||||
namespace generator
|
||||
{
|
||||
// CollectorTag class collects validated value of a tag and to save it to file with following
|
||||
// format: osmId<tab>tagValue.
|
||||
class CollectorTag : public CollectorInterface
|
||||
{
|
||||
public:
|
||||
using Validator = std::function<bool(std::string const & tagValue)>;
|
||||
|
||||
explicit CollectorTag(std::string const & filename, std::string const & tagKey,
|
||||
Validator const & validator, bool ignoreIfNotOpen = false);
|
||||
|
||||
// CollectorInterface overrides:
|
||||
void Collect(base::GeoObjectId const & osmId, OsmElement const & el) override;
|
||||
void Save() override {}
|
||||
|
||||
private:
|
||||
std::ofstream m_stream;
|
||||
std::string m_tagKey;
|
||||
Validator m_validator;
|
||||
bool m_needCollect;
|
||||
};
|
||||
} // namespace generator
|
|
@ -1,9 +1,13 @@
|
|||
#include "generator/descriptions_section_builder.hpp"
|
||||
|
||||
#include "generator/routing_helpers.hpp"
|
||||
|
||||
#include "platform/platform.hpp"
|
||||
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
||||
#include <fstream>
|
||||
#include <iterator>
|
||||
#include <limits>
|
||||
|
@ -26,6 +30,39 @@ std::string GetFileName(std::string path)
|
|||
|
||||
namespace generator
|
||||
{
|
||||
WikidataHelper::WikidataHelper(std::string const & mwmPath, std::string const & id2wikidataPath)
|
||||
: m_mwmPath(mwmPath)
|
||||
, m_id2wikidataPath(id2wikidataPath)
|
||||
{
|
||||
std::string const osmIdsToFeatureIdsPath = m_mwmPath + OSM2FEATURE_FILE_EXTENSION;
|
||||
if (!routing::ParseFeatureIdToOsmIdMapping(osmIdsToFeatureIdsPath, m_featureIdToOsmId))
|
||||
LOG(LCRITICAL, ("Error parse OsmIdToFeatureId mapping."));
|
||||
|
||||
std::ifstream stream;
|
||||
stream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
||||
stream.open(m_id2wikidataPath);
|
||||
stream.exceptions(std::fstream::badbit);
|
||||
uint64_t id;
|
||||
std::string wikidataId;
|
||||
while (stream)
|
||||
{
|
||||
stream >> id >> wikidataId;
|
||||
strings::Trim(wikidataId);
|
||||
m_osmIdToFeatureId.emplace(base::GeoObjectId(id), wikidataId);
|
||||
}
|
||||
}
|
||||
|
||||
boost::optional<std::string> WikidataHelper::GetWikidataId(uint32_t featureId) const
|
||||
{
|
||||
auto const itFeatureIdToOsmId = m_featureIdToOsmId.find(featureId);
|
||||
if (itFeatureIdToOsmId == std::end(m_featureIdToOsmId))
|
||||
return {};
|
||||
|
||||
auto const itOsmIdToFeatureId = m_osmIdToFeatureId.find(itFeatureIdToOsmId->second);
|
||||
return itOsmIdToFeatureId == std::end(m_osmIdToFeatureId) ?
|
||||
boost::optional<std::string>() : itOsmIdToFeatureId->second;
|
||||
}
|
||||
|
||||
std::string DescriptionsCollectionBuilderStat::LangStatisticsToString() const
|
||||
{
|
||||
std::stringstream stream;
|
||||
|
@ -45,12 +82,18 @@ std::string DescriptionsCollectionBuilderStat::LangStatisticsToString() const
|
|||
return stream.str();
|
||||
}
|
||||
|
||||
DescriptionsCollectionBuilder::DescriptionsCollectionBuilder(std::string const & wikipediaDir,
|
||||
std::string const & mwmFile,
|
||||
std::string const & id2wikidataPath)
|
||||
: m_wikidataHelper(mwmFile, id2wikidataPath), m_wikipediaDir(wikipediaDir), m_mwmFile(mwmFile) {}
|
||||
|
||||
DescriptionsCollectionBuilder::DescriptionsCollectionBuilder(std::string const & wikipediaDir,
|
||||
std::string const & mwmFile)
|
||||
: m_wikipediaDir(wikipediaDir), m_mwmFile(mwmFile) {}
|
||||
|
||||
// static
|
||||
std::string DescriptionsCollectionBuilder::MakePath(std::string const & wikipediaDir, std::string wikipediaUrl)
|
||||
std::string DescriptionsCollectionBuilder::MakePathForWikipedia(std::string const & wikipediaDir,
|
||||
std::string wikipediaUrl)
|
||||
{
|
||||
strings::Trim(wikipediaUrl);
|
||||
strings::ReplaceFirst(wikipediaUrl, "http://", "");
|
||||
|
@ -61,6 +104,13 @@ std::string DescriptionsCollectionBuilder::MakePath(std::string const & wikipedi
|
|||
return base::JoinPath(wikipediaDir, wikipediaUrl);
|
||||
}
|
||||
|
||||
// static
|
||||
std::string DescriptionsCollectionBuilder::MakePathForWikidata(std::string const & wikipediaDir,
|
||||
std::string wikidataId)
|
||||
{
|
||||
return base::JoinPath(wikipediaDir, "wikidata", wikidataId);
|
||||
}
|
||||
|
||||
// static
|
||||
size_t DescriptionsCollectionBuilder::FillStringFromFile(std::string const & fullPath, int8_t code,
|
||||
StringUtf8Multilang & str)
|
||||
|
@ -76,10 +126,9 @@ size_t DescriptionsCollectionBuilder::FillStringFromFile(std::string const & ful
|
|||
return contentSize;
|
||||
}
|
||||
|
||||
boost::optional<size_t> DescriptionsCollectionBuilder::FindPageAndFill(std::string wikipediaUrl,
|
||||
boost::optional<size_t> DescriptionsCollectionBuilder::FindPageAndFill(std::string path,
|
||||
StringUtf8Multilang & str)
|
||||
{
|
||||
auto const path = MakePath(m_wikipediaDir, wikipediaUrl);
|
||||
if (!IsValidDir(path))
|
||||
{
|
||||
LOG(LWARNING, ("Directory", path, "not found."));
|
||||
|
@ -108,14 +157,14 @@ boost::optional<size_t> DescriptionsCollectionBuilder::FindPageAndFill(std::stri
|
|||
return size;
|
||||
}
|
||||
|
||||
size_t DescriptionsCollectionBuilder::GetFeatureDescription(std::string const & wikiUrl, uint32_t featureId,
|
||||
size_t DescriptionsCollectionBuilder::GetFeatureDescription(std::string const & path, uint32_t featureId,
|
||||
descriptions::FeatureDescription & description)
|
||||
{
|
||||
if (wikiUrl.empty())
|
||||
if (path.empty())
|
||||
return 0;
|
||||
|
||||
StringUtf8Multilang string;
|
||||
auto const ret = FindPageAndFill(wikiUrl, string);
|
||||
auto const ret = FindPageAndFill(path, string);
|
||||
if (!ret || *ret == 0)
|
||||
return 0;
|
||||
|
||||
|
@ -123,6 +172,12 @@ size_t DescriptionsCollectionBuilder::GetFeatureDescription(std::string const &
|
|||
return *ret;
|
||||
}
|
||||
|
||||
void BuildDescriptionsSection(std::string const & wikipediaDir, std::string const & mwmFile,
|
||||
std::string const & id2wikidataPath)
|
||||
{
|
||||
DescriptionsSectionBuilder<FeatureType>::Build(wikipediaDir, mwmFile, id2wikidataPath);
|
||||
}
|
||||
|
||||
void BuildDescriptionsSection(std::string const & wikipediaDir, std::string const & mwmFile)
|
||||
{
|
||||
DescriptionsSectionBuilder<FeatureType>::Build(wikipediaDir, mwmFile);
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "coding/string_utf8_multilang.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/geo_object_id.hpp"
|
||||
#include "base/logging.hpp"
|
||||
|
||||
#include "defines.hpp"
|
||||
|
@ -19,6 +20,8 @@
|
|||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
|
@ -31,6 +34,21 @@ class TestDescriptionSectionBuilder;
|
|||
|
||||
namespace generator
|
||||
{
|
||||
class WikidataHelper
|
||||
{
|
||||
public:
|
||||
WikidataHelper() = default;
|
||||
explicit WikidataHelper(std::string const & mwmPath, std::string const & id2wikidataPath);
|
||||
|
||||
boost::optional<std::string> GetWikidataId(uint32_t featureId) const;
|
||||
|
||||
private:
|
||||
std::string m_mwmPath;
|
||||
std::string m_id2wikidataPath;
|
||||
std::map<uint32_t, base::GeoObjectId> m_featureIdToOsmId;
|
||||
std::map<base::GeoObjectId, std::string> m_osmIdToFeatureId;
|
||||
};
|
||||
|
||||
template <class T>
|
||||
struct ForEachFromDatAdapt
|
||||
{
|
||||
|
@ -61,13 +79,19 @@ public:
|
|||
}
|
||||
void AddSize(size_t size) { m_size += size; }
|
||||
void IncPage() { ++m_pages; }
|
||||
void IncNumberWikipediaUrls() { ++m_numberWikipediaUrls; }
|
||||
void IncNumberWikidataIds() { ++m_numberWikidataIds; }
|
||||
size_t GetSize() const { return m_size; }
|
||||
size_t GetPages() const { return m_pages; }
|
||||
size_t GetNumberWikipediaUrls() const { return m_numberWikipediaUrls; }
|
||||
size_t GetNumberWikidataIds() const { return m_numberWikidataIds; }
|
||||
LangStatistics const & GetLangStatistics() const { return m_langsStat; }
|
||||
|
||||
private:
|
||||
size_t m_size = 0;
|
||||
size_t m_pages = 0;
|
||||
size_t m_numberWikipediaUrls = 0;
|
||||
size_t m_numberWikidataIds = 0;
|
||||
LangStatistics m_langsStat = {};
|
||||
};
|
||||
|
||||
|
@ -76,6 +100,8 @@ class DescriptionsCollectionBuilder
|
|||
public:
|
||||
friend class generator_tests::TestDescriptionSectionBuilder;
|
||||
|
||||
DescriptionsCollectionBuilder(std::string const & wikipediaDir, std::string const & mwmFile,
|
||||
std::string const & id2wikidataPath);
|
||||
DescriptionsCollectionBuilder(std::string const & wikipediaDir, std::string const & mwmFile);
|
||||
|
||||
template <typename Ft, template <typename> class ForEachFromDatAdapter>
|
||||
|
@ -87,13 +113,35 @@ public:
|
|||
if (!wikiChecker.NeedFeature(f))
|
||||
return;
|
||||
|
||||
std::function<void()> incSource = []() {};
|
||||
descriptions::FeatureDescription description;
|
||||
std::string path;
|
||||
// We first try to get wikipedia url.
|
||||
auto const wikiUrl = f.GetMetadata().GetWikiURL();
|
||||
auto const ret = GetFeatureDescription(wikiUrl, featureId, description);
|
||||
CHECK_GREATER_OR_EQUAL(ret, 0, ());
|
||||
if (!wikiUrl.empty())
|
||||
{
|
||||
path = MakePathForWikipedia(m_wikipediaDir, wikiUrl);
|
||||
incSource = std::bind(&DescriptionsCollectionBuilderStat::IncNumberWikipediaUrls, std::ref(m_stat));
|
||||
}
|
||||
else
|
||||
{
|
||||
// We second try to get wikidata id.
|
||||
auto const wikidataId = m_wikidataHelper.GetWikidataId(featureId);
|
||||
if (wikidataId)
|
||||
{
|
||||
path = MakePathForWikidata(m_wikipediaDir, *wikidataId);
|
||||
incSource = std::bind(&DescriptionsCollectionBuilderStat::IncNumberWikidataIds, std::ref(m_stat));
|
||||
}
|
||||
}
|
||||
|
||||
if (path.empty())
|
||||
return;
|
||||
|
||||
auto const ret = GetFeatureDescription(path, featureId, description);
|
||||
if (ret == 0)
|
||||
return;
|
||||
|
||||
incSource();
|
||||
m_stat.AddSize(ret);
|
||||
m_stat.IncPage();
|
||||
descriptionList.emplace_back(std::move(description));
|
||||
|
@ -104,7 +152,8 @@ public:
|
|||
}
|
||||
|
||||
DescriptionsCollectionBuilderStat const & GetStat() const { return m_stat; }
|
||||
static std::string MakePath(std::string const & wikipediaDir, std::string wikipediaUrl);
|
||||
static std::string MakePathForWikipedia(std::string const & wikipediaDir, std::string wikipediaUrl);
|
||||
static std::string MakePathForWikidata(std::string const & wikipediaDir, std::string wikidataId);
|
||||
|
||||
private:
|
||||
static size_t FillStringFromFile(std::string const & fullPath, int8_t code,
|
||||
|
@ -114,6 +163,7 @@ private:
|
|||
descriptions::FeatureDescription & description);
|
||||
|
||||
DescriptionsCollectionBuilderStat m_stat;
|
||||
WikidataHelper m_wikidataHelper;
|
||||
std::string m_wikipediaDir;
|
||||
std::string m_mwmFile;
|
||||
};
|
||||
|
@ -121,14 +171,28 @@ private:
|
|||
template <typename Ft, template <typename> class ForEachFromDatAdapter = ForEachFromDatAdapt>
|
||||
struct DescriptionsSectionBuilder
|
||||
{
|
||||
static void Build(std::string const & wikipediaDir, std::string const & mwmFile,
|
||||
std::string const & id2wikidataPath)
|
||||
{
|
||||
DescriptionsCollectionBuilder descriptionsCollectionBuilder(wikipediaDir, mwmFile, id2wikidataPath);
|
||||
Build(mwmFile, descriptionsCollectionBuilder);
|
||||
}
|
||||
|
||||
static void Build(std::string const & wikipediaDir, std::string const & mwmFile)
|
||||
{
|
||||
DescriptionsCollectionBuilder descriptionsCollectionBuilder(wikipediaDir, mwmFile);
|
||||
auto descriptionList = descriptionsCollectionBuilder.MakeDescriptions<Ft, ForEachFromDatAdapter>();
|
||||
Build(mwmFile, descriptionsCollectionBuilder);
|
||||
}
|
||||
|
||||
auto const & stat = descriptionsCollectionBuilder.GetStat();
|
||||
private:
|
||||
static void Build(std::string const & mwmFile, DescriptionsCollectionBuilder & builder)
|
||||
{
|
||||
auto descriptionList = builder.MakeDescriptions<Ft, ForEachFromDatAdapter>();
|
||||
auto const & stat = builder.GetStat();
|
||||
auto const size = stat.GetSize();
|
||||
LOG(LINFO, ("Found", stat.GetPages(), "pages for", mwmFile));
|
||||
LOG(LINFO, ("Added", stat.GetNumberWikipediaUrls(), "pages form wikipedia urls for", mwmFile));
|
||||
LOG(LINFO, ("Added", stat.GetNumberWikidataIds(), "pages form wikidata ids for", mwmFile));
|
||||
LOG(LINFO, ("Added", stat.GetPages(), "pages for", mwmFile));
|
||||
LOG(LINFO, ("Total size of added pages (before writing to section):", size));
|
||||
CHECK_GREATER_OR_EQUAL(size, 0, ());
|
||||
if (size == 0)
|
||||
|
@ -147,5 +211,8 @@ struct DescriptionsSectionBuilder
|
|||
}
|
||||
};
|
||||
|
||||
void BuildDescriptionsSection(std::string const & wikipediaDir, std::string const & mwmFile,
|
||||
std::string const & id2wikidataPath);
|
||||
|
||||
void BuildDescriptionsSection(std::string const & wikipediaDir, std::string const & mwmFile);
|
||||
} // namespace generator
|
||||
|
|
|
@ -53,6 +53,8 @@ struct GenerateInfo
|
|||
|
||||
std::string m_popularPlacesFilename;
|
||||
|
||||
std::string m_id2wikidataFilename;
|
||||
|
||||
std::shared_ptr<generator::OsmIdToBoundariesTable> m_boundariesTable;
|
||||
|
||||
uint32_t m_versionDate = 0;
|
||||
|
|
|
@ -77,7 +77,7 @@ public:
|
|||
{
|
||||
for (auto const & m : kWikiData)
|
||||
{
|
||||
auto const dir = DescriptionsCollectionBuilder::MakePath(m_wikiDir, m.m_url);
|
||||
auto const dir = DescriptionsCollectionBuilder::MakePathForWikipedia(m_wikiDir, m.m_url);
|
||||
CHECK(Platform::MkDirRecursively(dir), ());
|
||||
for (auto const & d : m.m_pages)
|
||||
{
|
||||
|
@ -112,13 +112,13 @@ public:
|
|||
{
|
||||
std::string const wikiDir = "/wikiDir/";
|
||||
std::string const wikiUrl = "http://en.wikipedia.org/wiki/Helsinki_Olympic_Stadium/";
|
||||
auto const answer = DescriptionsCollectionBuilder::MakePath(wikiDir, wikiUrl);
|
||||
auto const answer = DescriptionsCollectionBuilder::MakePathForWikipedia(wikiDir, wikiUrl);
|
||||
TEST_EQUAL(trueAnswer, answer, ());
|
||||
}
|
||||
{
|
||||
std::string const wikiDir = "/wikiDir";
|
||||
std::string const wikiUrl = "https://en.wikipedia.org/wiki/Helsinki_Olympic_Stadium";
|
||||
auto const answer = DescriptionsCollectionBuilder::MakePath(wikiDir, wikiUrl);
|
||||
auto const answer = DescriptionsCollectionBuilder::MakePathForWikipedia(wikiDir, wikiUrl);
|
||||
TEST_EQUAL(trueAnswer, answer, ());
|
||||
}
|
||||
}
|
||||
|
@ -130,7 +130,8 @@ public:
|
|||
CHECK(!kWikiData.empty(), ());
|
||||
auto const & first = kWikiData.front();
|
||||
StringUtf8Multilang str;
|
||||
auto const size = b.FindPageAndFill(first.m_url, str);
|
||||
auto const path = DescriptionsCollectionBuilder::MakePathForWikipedia(m_wikiDir, first.m_url);
|
||||
auto const size = b.FindPageAndFill(path, str);
|
||||
TEST(size, ());
|
||||
TEST_EQUAL(*size, GetPageSize(first.m_pages), ());
|
||||
TEST(CheckLangs(str, first.m_pages), ());
|
||||
|
@ -139,7 +140,8 @@ public:
|
|||
DescriptionsCollectionBuilder b(m_wikiDir, kMwmFile);
|
||||
StringUtf8Multilang str;
|
||||
std::string const badUrl = "https://en.wikipedia.org/wiki/Not_exists";
|
||||
auto const size = b.FindPageAndFill(badUrl, str);
|
||||
auto const path = DescriptionsCollectionBuilder::MakePathForWikipedia(m_wikiDir, badUrl);
|
||||
auto const size = b.FindPageAndFill(path, str);
|
||||
TEST(!size, ());
|
||||
}
|
||||
}
|
||||
|
@ -151,7 +153,7 @@ public:
|
|||
auto const & first = kWikiData.front();
|
||||
std::string const lang = "en";
|
||||
auto const langIndex = StringUtf8Multilang::GetLangIndex(lang);
|
||||
auto const path = DescriptionsCollectionBuilder::MakePath(m_wikiDir, first.m_url);
|
||||
auto const path = DescriptionsCollectionBuilder::MakePathForWikipedia(m_wikiDir, first.m_url);
|
||||
auto const fullPath = base::JoinPath(path, (lang + ".html"));
|
||||
StringUtf8Multilang str;
|
||||
// This is a private function and should take the right path fullPath.
|
||||
|
@ -172,7 +174,8 @@ public:
|
|||
auto ft = MakeFeature(first.m_url);
|
||||
descriptions::FeatureDescription description;
|
||||
auto const wikiUrl = ft.GetMetadata().GetWikiURL();
|
||||
auto const size = b.GetFeatureDescription(wikiUrl, featureId, description);
|
||||
auto const path = DescriptionsCollectionBuilder::MakePathForWikipedia(m_wikiDir, wikiUrl);
|
||||
auto const size = b.GetFeatureDescription(path, featureId, description);
|
||||
|
||||
TEST_EQUAL(size, GetPageSize(first.m_pages), ());
|
||||
CHECK_NOT_EQUAL(size, 0, ());
|
||||
|
|
|
@ -153,6 +153,7 @@ DEFINE_string(opentable_data, "", "Path to opentable data in .tsv format.");
|
|||
DEFINE_string(ugc_data, "", "Input UGC source database file name.");
|
||||
|
||||
DEFINE_string(wikipedia_pages, "", "Input dir with wikipedia pages.");
|
||||
DEFINE_string(id2wikidata, "", "Path to file with id to wikidata mapping.");
|
||||
DEFINE_string(dump_wikipedia_urls, "", "Output file with wikipedia urls.");
|
||||
|
||||
DEFINE_bool(generate_popular_places, false, "Generate popular places section.");
|
||||
|
@ -306,6 +307,7 @@ int GeneratorToolMain(int argc, char ** argv)
|
|||
genInfo.m_emitCoasts = FLAGS_emit_coasts;
|
||||
genInfo.m_fileName = FLAGS_output;
|
||||
genInfo.m_genAddresses = FLAGS_generate_addresses_file;
|
||||
genInfo.m_id2wikidataFilename = FLAGS_id2wikidata;
|
||||
|
||||
auto emitter = CreateEmitter(EmitterType::Planet, genInfo);
|
||||
if (!GenerateFeatures(genInfo, emitter))
|
||||
|
@ -433,8 +435,15 @@ int GeneratorToolMain(int argc, char ** argv)
|
|||
{
|
||||
auto const tmpPath = base::JoinPath(genInfo.m_intermediateDir, "tmp");
|
||||
auto const datFiles = platform_helpers::GetFullDataTmpFilePaths(tmpPath);
|
||||
|
||||
WikiUrlDumper wikiUrlDumper(FLAGS_dump_wikipedia_urls, datFiles);
|
||||
wikiUrlDumper.Dump(threadsCount);
|
||||
|
||||
if (!FLAGS_id2wikidata.empty())
|
||||
{
|
||||
WikiDataFilter wikiDataFilter(FLAGS_id2wikidata, datFiles);
|
||||
wikiDataFilter.Filter(threadsCount);
|
||||
}
|
||||
}
|
||||
|
||||
// Enumerate over all dat files that were created.
|
||||
|
@ -601,7 +610,12 @@ int GeneratorToolMain(int argc, char ** argv)
|
|||
}
|
||||
|
||||
if (!FLAGS_wikipedia_pages.empty())
|
||||
BuildDescriptionsSection(FLAGS_wikipedia_pages, datFile);
|
||||
{
|
||||
if (!FLAGS_id2wikidata.empty())
|
||||
BuildDescriptionsSection(FLAGS_wikipedia_pages, datFile, FLAGS_id2wikidata);
|
||||
else
|
||||
BuildDescriptionsSection(FLAGS_wikipedia_pages, datFile);
|
||||
}
|
||||
|
||||
if (FLAGS_generate_popular_places)
|
||||
{
|
||||
|
@ -674,7 +688,7 @@ int GeneratorToolMain(int argc, char ** argv)
|
|||
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
{
|
||||
try
|
||||
{
|
||||
return GeneratorToolMain(argc, argv);
|
||||
|
|
|
@ -153,3 +153,17 @@ std::string DebugPrint(OsmElement::Tag const & tag)
|
|||
ss << tag.key << '=' << tag.value;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
base::GeoObjectId GetGeoObjectId(OsmElement const & element)
|
||||
{
|
||||
switch (element.type)
|
||||
{
|
||||
case OsmElement::EntityType::Node:
|
||||
return base::MakeOsmNode(element.id);
|
||||
case OsmElement::EntityType::Way:
|
||||
return base::MakeOsmWay(element.id);
|
||||
case OsmElement::EntityType::Relation:
|
||||
return base::MakeOsmRelation(element.id);
|
||||
}
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/geo_object_id.hpp"
|
||||
#include "base/math.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
|
@ -162,6 +163,8 @@ struct OsmElement
|
|||
std::string GetTag(std::string const & key) const;
|
||||
};
|
||||
|
||||
base::GeoObjectId GetGeoObjectId(OsmElement const & element);
|
||||
|
||||
std::string DebugPrint(OsmElement const & e);
|
||||
std::string DebugPrint(OsmElement::EntityType e);
|
||||
std::string DebugPrint(OsmElement::Tag const & tag);
|
||||
|
|
|
@ -15,12 +15,37 @@
|
|||
#include "geometry/point2d.hpp"
|
||||
|
||||
#include "base/assert.hpp"
|
||||
#include "base/string_utils.hpp"
|
||||
|
||||
#include <cctype>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace generator
|
||||
{
|
||||
namespace
|
||||
{
|
||||
// https://www.wikidata.org/wiki/Wikidata:Identifiers
|
||||
bool WikiDataValidator(std::string const & tagValue)
|
||||
{
|
||||
if (tagValue.size() < 2)
|
||||
return false;
|
||||
|
||||
size_t pos = 0;
|
||||
// Only items are are needed.
|
||||
if (tagValue[pos++] != 'Q')
|
||||
return false;
|
||||
|
||||
while (pos != tagValue.size())
|
||||
{
|
||||
if (!std::isdigit(tagValue[pos++]))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
TranslatorPlanet::TranslatorPlanet(std::shared_ptr<EmitterInterface> emitter,
|
||||
cache::IntermediateDataReader & holder,
|
||||
feature::GenerateInfo const & info)
|
||||
|
@ -31,6 +56,7 @@ TranslatorPlanet::TranslatorPlanet(std::shared_ptr<EmitterInterface> emitter,
|
|||
, m_nodeRelations(m_routingTagsProcessor)
|
||||
, m_wayRelations(m_routingTagsProcessor)
|
||||
, m_metalinesBuilder(info.GetIntermediateFileName(METALINES_FILENAME))
|
||||
, m_wikiDataCollector(info.m_id2wikidataFilename, "wikidata", WikiDataValidator, true /* ignoreIfNotOpen */)
|
||||
{
|
||||
auto const addrFilePath = info.GetAddressesFileName();
|
||||
if (!addrFilePath.empty())
|
||||
|
@ -182,7 +208,7 @@ bool TranslatorPlanet::ParseType(OsmElement * p, FeatureParams & params)
|
|||
|
||||
m_routingTagsProcessor.m_cameraNodeWriter.Process(*p, params, m_cache);
|
||||
m_routingTagsProcessor.m_roadAccessWriter.Process(*p);
|
||||
|
||||
m_wikiDataCollector.Collect(GetGeoObjectId(*p), *p);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "generator/camera_info_collector.hpp"
|
||||
#include "generator/collector_tag.hpp"
|
||||
#include "generator/metalines_builder.hpp"
|
||||
#include "generator/relation_tags.hpp"
|
||||
#include "generator/routing_helpers.hpp"
|
||||
|
@ -58,5 +59,6 @@ private:
|
|||
RelationTagsNode m_nodeRelations;
|
||||
RelationTagsWay m_wayRelations;
|
||||
feature::MetalinesBuilder m_metalinesBuilder;
|
||||
CollectorTag m_wikiDataCollector;
|
||||
};
|
||||
} // namespace generator
|
||||
|
|
|
@ -65,4 +65,67 @@ void WikiUrlDumper::DumpOne(std::string const & path, std::ostream & stream)
|
|||
stream << path << "\t" << feature.GetMostGenericOsmId() << "\t" << wikiUrl << "\n";
|
||||
});
|
||||
}
|
||||
|
||||
WikiDataFilter::WikiDataFilter(std::string const & path, std::vector<std::string> const & datFiles)
|
||||
: m_path(path), m_dataFiles(datFiles)
|
||||
{
|
||||
std::ifstream stream;
|
||||
stream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
||||
stream.open(m_path);
|
||||
stream.exceptions(std::fstream::badbit);
|
||||
uint64_t id;
|
||||
std::string wikidata;
|
||||
while (stream)
|
||||
{
|
||||
stream >> id >> wikidata;
|
||||
m_id2wikiData.emplace(base::GeoObjectId(id), wikidata);
|
||||
}
|
||||
}
|
||||
|
||||
// static
|
||||
void WikiDataFilter::FilterOne(std::string const & path, std::map<base::GeoObjectId, std::string> const & id2wikiData,
|
||||
std::ostream & stream)
|
||||
{
|
||||
auto const & needWikiUrl = ftypes::WikiChecker::Instance();
|
||||
feature::ForEachFromDatRawFormat(path, [&](FeatureBuilder1 const & feature, uint64_t /* pos */) {
|
||||
if (!needWikiUrl(feature.GetTypesHolder()))
|
||||
return;
|
||||
|
||||
auto const it = id2wikiData.find(feature.GetMostGenericOsmId());
|
||||
if (it == std::end(id2wikiData))
|
||||
return;
|
||||
|
||||
stream << it->first.GetEncodedId() << "\t" << it->second << "\n";
|
||||
});
|
||||
}
|
||||
|
||||
void WikiDataFilter::Filter(size_t cpuCount)
|
||||
{
|
||||
CHECK_GREATER(cpuCount, 0, ());
|
||||
|
||||
base::thread_pool::computational::ThreadPool threadPool(cpuCount);
|
||||
std::vector<std::future<std::string>> futures;
|
||||
futures.reserve(m_dataFiles.size());
|
||||
|
||||
auto const fn = [&](std::string const & filename) {
|
||||
std::stringstream stringStream;
|
||||
FilterOne(filename, m_id2wikiData, stringStream);
|
||||
return stringStream.str();
|
||||
};
|
||||
|
||||
for (auto const & path : m_dataFiles)
|
||||
{
|
||||
auto result = threadPool.Submit(fn, path);
|
||||
futures.emplace_back(std::move(result));
|
||||
}
|
||||
|
||||
std::ofstream stream;
|
||||
stream.exceptions(std::fstream::failbit | std::fstream::badbit);
|
||||
stream.open(m_path);
|
||||
for (auto & f : futures)
|
||||
{
|
||||
auto lines = f.get();
|
||||
stream << lines;
|
||||
}
|
||||
}
|
||||
} // namespace generator
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
#pragma once
|
||||
|
||||
#include "base/geo_object_id.hpp"
|
||||
|
||||
#include <iosfwd>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
|
@ -9,7 +12,7 @@ namespace generator
|
|||
class WikiUrlDumper
|
||||
{
|
||||
public:
|
||||
WikiUrlDumper(std::string const & path, std::vector<std::string> const & datFiles);
|
||||
explicit WikiUrlDumper(std::string const & path, std::vector<std::string> const & datFiles);
|
||||
|
||||
static void DumpOne(std::string const & path, std::ostream & stream);
|
||||
|
||||
|
@ -19,4 +22,20 @@ private:
|
|||
std::string m_path;
|
||||
std::vector<std::string> m_dataFiles;
|
||||
};
|
||||
|
||||
class WikiDataFilter
|
||||
{
|
||||
public:
|
||||
explicit WikiDataFilter(std::string const & path, std::vector<std::string> const & datFiles);
|
||||
|
||||
static void FilterOne(std::string const & path, std::map<base::GeoObjectId, std::string> const & id2wikiData,
|
||||
std::ostream & stream);
|
||||
|
||||
void Filter(size_t cpuCount);
|
||||
|
||||
private:
|
||||
std::string m_path;
|
||||
std::map<base::GeoObjectId, std::string> m_id2wikiData;
|
||||
std::vector<std::string> m_dataFiles;
|
||||
};
|
||||
} // namespace generator
|
||||
|
|
|
@ -6,6 +6,8 @@ import logging
|
|||
import os
|
||||
import random
|
||||
import time
|
||||
import types
|
||||
import urllib.error
|
||||
import urllib.parse
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
|
@ -13,6 +15,7 @@ import htmlmin
|
|||
import requests
|
||||
import wikipediaapi
|
||||
from bs4 import BeautifulSoup
|
||||
from wikidata.client import Client
|
||||
|
||||
"""
|
||||
This script downloads Wikipedia pages for different languages.
|
||||
|
@ -20,7 +23,7 @@ This script downloads Wikipedia pages for different languages.
|
|||
log = logging.getLogger(__name__)
|
||||
|
||||
WORKERS = 80
|
||||
CHUNK_SIZE = 128
|
||||
CHUNK_SIZE = 16
|
||||
REQUEST_ATTEMPTS = 32
|
||||
ATTEMPTS_PAUSE_MS = 4000
|
||||
|
||||
|
@ -48,16 +51,21 @@ class GettingError(MyException):
|
|||
pass
|
||||
|
||||
|
||||
def try_get(obj, prop):
|
||||
def try_get(obj, prop, *args, **kwargs):
|
||||
attempts = REQUEST_ATTEMPTS
|
||||
while attempts != 0:
|
||||
try:
|
||||
return getattr(obj, prop)
|
||||
attr = getattr(obj, prop)
|
||||
is_method = isinstance(attr, types.MethodType)
|
||||
return attr(*args, **kwargs) if is_method else attr
|
||||
except (requests.exceptions.ConnectionError,
|
||||
requests.exceptions.ReadTimeout,
|
||||
json.decoder.JSONDecodeError):
|
||||
time.sleep(random.uniform(0.0, 1.0 / 1000.0 * ATTEMPTS_PAUSE_MS))
|
||||
attempts -= 1
|
||||
except urllib.error.HTTPError as e:
|
||||
if e.code == 404:
|
||||
raise GettingError(f"Page not found {e.msg}")
|
||||
except KeyError:
|
||||
raise GettingError(f"Getting {prop} field failed. {prop} not found.")
|
||||
|
||||
|
@ -80,7 +88,7 @@ def read_popularity(path):
|
|||
return ids
|
||||
|
||||
|
||||
def should_download_wikipage(popularity_set):
|
||||
def should_download_page(popularity_set):
|
||||
@functools.wraps(popularity_set)
|
||||
def wrapped(ident):
|
||||
return popularity_set is None or ident in popularity_set
|
||||
|
@ -184,7 +192,7 @@ def get_wiki_langs(url):
|
|||
return curr_lang
|
||||
|
||||
|
||||
def download_all(path, url, langs):
|
||||
def download_all_from_wikipedia(path, url, langs):
|
||||
try:
|
||||
available_langs = get_wiki_langs(url)
|
||||
except ParseError:
|
||||
|
@ -195,8 +203,8 @@ def download_all(path, url, langs):
|
|||
download(path, lang[1])
|
||||
|
||||
|
||||
def worker(output_dir, checker, langs):
|
||||
@functools.wraps(worker)
|
||||
def wikipedia_worker(output_dir, checker, langs):
|
||||
@functools.wraps(wikipedia_worker)
|
||||
def wrapped(line):
|
||||
if not line.strip():
|
||||
return
|
||||
|
@ -211,20 +219,94 @@ def worker(output_dir, checker, langs):
|
|||
return
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
path = os.path.join(output_dir, parsed.netloc, parsed.path[1:])
|
||||
download_all(path, url, langs)
|
||||
download_all_from_wikipedia(path, url, langs)
|
||||
return wrapped
|
||||
|
||||
|
||||
def download_from_wikipedia_tags(input_file, output_dir, langs, checker):
|
||||
with open(input_file) as file:
|
||||
_ = file.readline()
|
||||
pool = ThreadPool(processes=WORKERS)
|
||||
pool.map(wikipedia_worker(output_dir, checker, langs), file, CHUNK_SIZE)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
def get_wikidata_urls(entity, langs):
|
||||
try:
|
||||
keys = entity.data["sitelinks"].keys()
|
||||
except (KeyError, AttributeError):
|
||||
log.exception(f"Sitelinks not found for {entity.id}.")
|
||||
return None
|
||||
return [
|
||||
entity.data["sitelinks"][k]["url"] for k in keys
|
||||
if any([k.startswith(lang) for lang in langs])
|
||||
]
|
||||
|
||||
|
||||
def wikidata_worker(output_dir, checker, langs):
|
||||
@functools.wraps(wikidata_worker)
|
||||
def wrapped(line):
|
||||
if not line.strip():
|
||||
return
|
||||
try:
|
||||
ident, wikidata_id = line.split("\t")
|
||||
ident = int(ident)
|
||||
wikidata_id = wikidata_id.strip()
|
||||
if not checker(ident):
|
||||
return
|
||||
except (AttributeError, IndexError):
|
||||
log.exception(f"{line} is incorrect.")
|
||||
return
|
||||
client = Client()
|
||||
try:
|
||||
entity = try_get(client, "get", wikidata_id, load=True)
|
||||
except GettingError:
|
||||
log.exception(f"Error: page is not downloaded {wikidata_id}.")
|
||||
return
|
||||
urls = get_wikidata_urls(entity, langs)
|
||||
if not urls:
|
||||
return
|
||||
path = os.path.join(output_dir, wikidata_id)
|
||||
for url in urls:
|
||||
download(path, url)
|
||||
return wrapped
|
||||
|
||||
|
||||
def download_from_wikidata_tags(input_file, output_dir, langs, checker):
|
||||
wikidata_output_dir = os.path.join(output_dir, "wikidata")
|
||||
os.makedirs(wikidata_output_dir, exist_ok=True)
|
||||
with open(input_file) as file:
|
||||
pool = ThreadPool(processes=WORKERS)
|
||||
pool.map(wikidata_worker(wikidata_output_dir, checker, langs), file, CHUNK_SIZE)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
|
||||
def check_and_get_checker(popularity_file):
|
||||
popularity_set = None
|
||||
if popularity_file is None:
|
||||
log.warning(f"Popularity file not set.")
|
||||
elif os.path.exists(popularity_file):
|
||||
popularity_set = read_popularity(popularity_file)
|
||||
log.info(f"Popularity set size: {len(popularity_set)}.")
|
||||
else:
|
||||
log.error(f"Popularity file ({popularity_file}) not found.")
|
||||
return should_download_page(popularity_set)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Download wiki pages.")
|
||||
parser.add_argument("--o", metavar="PATH", type=str,
|
||||
parser.add_argument("--output_dir", metavar="PATH", type=str,
|
||||
help="Output dir for saving pages")
|
||||
parser.add_argument("--p", metavar="PATH", type=str,
|
||||
parser.add_argument("--popularity", metavar="PATH", type=str,
|
||||
help="File with popular object ids for which we "
|
||||
"download wikipedia data. If not given, download "
|
||||
"for all objects.")
|
||||
parser.add_argument('--i', metavar="PATH", type=str, required=True,
|
||||
parser.add_argument('--wikipedia', metavar="PATH", type=str, required=True,
|
||||
help="Input file with wikipedia url.")
|
||||
parser.add_argument('--wikidata', metavar="PATH", type=str,
|
||||
help="Input file with wikidata ids.")
|
||||
parser.add_argument('--langs', metavar="LANGS", type=str, nargs='+',
|
||||
action='append',
|
||||
help="Languages for pages. If left blank, pages in all "
|
||||
|
@ -236,22 +318,20 @@ def main():
|
|||
log.setLevel(logging.WARNING)
|
||||
wikipediaapi.log.setLevel(logging.WARNING)
|
||||
args = parse_args()
|
||||
input_file = args.i
|
||||
output_dir = args.o
|
||||
popularity_file = args.p
|
||||
wikipedia_file = args.wikipedia
|
||||
wikidata_file = args.wikidata
|
||||
output_dir = args.output_dir
|
||||
popularity_file = args.popularity
|
||||
langs = list(itertools.chain.from_iterable(args.langs))
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
popularity_set = read_popularity(popularity_file) if popularity_file else None
|
||||
if popularity_set:
|
||||
log.info(f"Popularity set size: {len(popularity_set)}.")
|
||||
checker = should_download_wikipage(popularity_set)
|
||||
with open(input_file) as file:
|
||||
_ = file.readline()
|
||||
pool = ThreadPool(processes=WORKERS)
|
||||
pool.map(worker(output_dir, checker, langs), file, CHUNK_SIZE)
|
||||
pool.close()
|
||||
pool.join()
|
||||
|
||||
checker = check_and_get_checker(popularity_file)
|
||||
download_from_wikipedia_tags(wikipedia_file, output_dir, langs, checker)
|
||||
if wikidata_file is None:
|
||||
log.warning(f"Wikidata file not set.")
|
||||
elif os.path.exists(wikidata_file):
|
||||
download_from_wikidata_tags(wikidata_file, output_dir, langs, checker)
|
||||
else:
|
||||
log.warning(f"Wikidata ({wikidata_file}) file not set.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
@ -185,6 +185,7 @@ DESCRIPTIONS_DOWNLOADER="$PYTHON_SCRIPTS_PATH/descriptions_downloader.py"
|
|||
LOCALADS_SCRIPT="$PYTHON_SCRIPTS_PATH/local_ads/mwm_to_csv_4localads.py"
|
||||
UGC_FILE="${UGC_FILE:-$INTDIR/ugc_db.sqlite3}"
|
||||
POPULAR_PLACES_FILE="${POPULAR_PLACES_FILE:-$INTDIR/popular_places.csv}"
|
||||
WIKIDATA_FILE="${WIKIDATA_FILE:-$INTDIR/id2wikidata.csv}"
|
||||
BOOKING_SCRIPT="$PYTHON_SCRIPTS_PATH/booking_hotels.py"
|
||||
BOOKING_FILE="${BOOKING_FILE:-$INTDIR/hotels.csv}"
|
||||
OPENTABLE_SCRIPT="$PYTHON_SCRIPTS_PATH/opentable_restaurants.py"
|
||||
|
@ -453,6 +454,9 @@ if [ "$MODE" == "features" ]; then
|
|||
[ -f "$BOOKING_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --booking_data=$BOOKING_FILE"
|
||||
[ -f "$OPENTABLE_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --opentable_data=$OPENTABLE_FILE"
|
||||
[ -f "$POPULAR_PLACES_FILE" ] && PARAMS_SPLIT="$PARAMS_SPLIT --popular_places_data=$POPULAR_PLACES_FILE"
|
||||
[ -n "$OPT_DESCRIPTIONS" ] && PARAMS_SPLIT="$PARAMS_SPLIT --id2wikidata=$WIKIDATA_FILE"
|
||||
|
||||
|
||||
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" \
|
||||
--node_storage=$NODE_STORAGE \
|
||||
--osm_file_type=o5m \
|
||||
|
@ -555,14 +559,18 @@ if [ "$MODE" == "descriptions" ]; then
|
|||
LOG="$LOG_PATH/descriptions.log"
|
||||
LANGS="en ru es"
|
||||
|
||||
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --user_resource_path="$DATA_PATH/" --dump_wikipedia_urls="$URLS_PATH" 2>> $LOG
|
||||
$PYTHON36 $DESCRIPTIONS_DOWNLOADER --i "$URLS_PATH" --o "$WIKI_PAGES_PATH" --langs $LANGS 2>> $LOG
|
||||
"$GENERATOR_TOOL" --intermediate_data_path="$INTDIR/" --user_resource_path="$DATA_PATH/" \
|
||||
--dump_wikipedia_urls="$URLS_PATH" --id2wikidata="$WIKIDATA_FILE" 2>> $LOG
|
||||
|
||||
PARAMS="--wikipedia $URLS_PATH --wikidata $WIKIDATA_FILE --output_dir $WIKI_PAGES_PATH"
|
||||
[ -f "$POPULAR_PLACES_FILE" ] && PARAMS="$PARAMS --popularity=$POPULAR_PLACES_FILE"
|
||||
$PYTHON36 $DESCRIPTIONS_DOWNLOADER $PARAMS --langs $LANGS 2>> $LOG
|
||||
|
||||
for file in "$TARGET"/*.mwm; do
|
||||
if [[ "$file" != *minsk-pass* && "$file" != *World* ]]; then
|
||||
BASENAME="$(basename "$file" .mwm)"
|
||||
"$GENERATOR_TOOL" --wikipedia_pages="$WIKI_PAGES_PATH/" --data_path="$TARGET" --user_resource_path="$DATA_PATH/" \
|
||||
--output="$BASENAME" 2>> "$LOG_PATH/$BASENAME.log" &
|
||||
"$GENERATOR_TOOL" --wikipedia_pages="$WIKI_PAGES_PATH/" --id2wikidata="$WIKIDATA_FILE" \
|
||||
--data_path="$TARGET" --user_resource_path="$DATA_PATH/" --output="$BASENAME" 2>> "$LOG_PATH/$BASENAME.log" &
|
||||
forky
|
||||
fi
|
||||
done
|
||||
|
|
Loading…
Add table
Reference in a new issue