forked from organicmaps/organicmaps
[search] Add synonyms for search index generation.
This commit is contained in:
parent
6bef08ff59
commit
25670bf4af
2 changed files with 155 additions and 5 deletions
66
data/synonims.txt
Normal file
66
data/synonims.txt
Normal file
|
@ -0,0 +1,66 @@
|
|||
United States of America: US, USA
|
||||
Alabama: AL
|
||||
Alaska: AK
|
||||
Arizona: AZ
|
||||
Arkansas: AR
|
||||
California: CA
|
||||
Colorado: CO
|
||||
Connecticut: CT
|
||||
Delaware: DE
|
||||
Florida: FL
|
||||
Georgia: GA
|
||||
Hawaii: HI
|
||||
Idaho: ID
|
||||
Illinois: IL
|
||||
Indiana: IN
|
||||
Iowa: IA
|
||||
Kansas: KS
|
||||
Kentucky: KY
|
||||
Louisiana: LA
|
||||
Maine: ME
|
||||
Maryland: MD
|
||||
Massachusetts: MA
|
||||
Michigan: MI
|
||||
Minnesota: MN
|
||||
Mississippi: MS
|
||||
Missouri: MO
|
||||
Montana: MT
|
||||
Nebraska: NE
|
||||
Nevada: NV
|
||||
New Hampshire: NH
|
||||
New Jersey: NJ
|
||||
New Mexico: NM
|
||||
New York: NY
|
||||
North Carolina: NC
|
||||
North Dakota: ND
|
||||
Ohio: OH
|
||||
Oklahoma: OK
|
||||
Oregon: OR
|
||||
Pennsylvania: PA
|
||||
Rhode Island: RI
|
||||
South Carolina: SC
|
||||
South Dakota: SD
|
||||
Tennessee: TN
|
||||
Texas: TX
|
||||
Utah: UT
|
||||
Vermont: VT
|
||||
Virginia: VA
|
||||
Washington: WA
|
||||
West Virginia: WV
|
||||
Wisconsin: WI
|
||||
Wyoming: WY
|
||||
|
||||
|
||||
Alberta: AB
|
||||
British Columbia: BC
|
||||
Manitoba: MB
|
||||
New Brunswick: NB
|
||||
Newfoundland and Labrador: NL
|
||||
Northwest Territories NT
|
||||
Nova Scotia: NS
|
||||
Nunavut NU
|
||||
Ontario: ON
|
||||
Prince Edward Island: PE
|
||||
Quebec: QC
|
||||
Saskatchewan: SK
|
||||
Yukon YT
|
|
@ -22,33 +22,110 @@
|
|||
|
||||
#include "../base/string_utils.hpp"
|
||||
#include "../base/logging.hpp"
|
||||
#include "../base/stl_add.hpp"
|
||||
|
||||
#include "../std/algorithm.hpp"
|
||||
#include "../std/vector.hpp"
|
||||
#include "../std/unordered_map.hpp"
|
||||
#include "../std/fstream.hpp"
|
||||
|
||||
#define SYNONIMS_FILE "synonims.txt"
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
class SynonimsHolder
|
||||
{
|
||||
unordered_multimap<string, string> m_map;
|
||||
|
||||
public:
|
||||
SynonimsHolder(string const & fPath)
|
||||
{
|
||||
ifstream stream(fPath.c_str());
|
||||
|
||||
string line;
|
||||
vector<string> tokens;
|
||||
|
||||
while (stream.good())
|
||||
{
|
||||
std::getline(stream, line);
|
||||
if (line.empty())
|
||||
continue;
|
||||
|
||||
tokens.clear();
|
||||
strings::Tokenize(line, ":,", MakeBackInsertFunctor(tokens));
|
||||
|
||||
if (tokens.size() > 1)
|
||||
{
|
||||
strings::Trim(tokens[0]);
|
||||
for (size_t i = 1; i < tokens.size(); ++i)
|
||||
{
|
||||
strings::Trim(tokens[i]);
|
||||
// synonim should not has any spaces
|
||||
ASSERT ( tokens[i].find_first_of(" \t") == string::npos, () );
|
||||
m_map.insert(make_pair(tokens[0], tokens[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class ToDo> void ForEach(string const & key, ToDo toDo) const
|
||||
{
|
||||
typedef unordered_multimap<string, string>::const_iterator IterT;
|
||||
|
||||
pair<IterT, IterT> range = m_map.equal_range(key);
|
||||
while (range.first != range.second)
|
||||
{
|
||||
toDo(range.first->second);
|
||||
++range.first;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct FeatureNameInserter
|
||||
{
|
||||
SynonimsHolder * m_synonims;
|
||||
StringsFile & m_names;
|
||||
StringsFile::ValueT m_val;
|
||||
|
||||
FeatureNameInserter(StringsFile & names) : m_names(names) {}
|
||||
FeatureNameInserter(SynonimsHolder * synonims, StringsFile & names)
|
||||
: m_synonims(synonims), m_names(names)
|
||||
{
|
||||
}
|
||||
|
||||
void AddToken(signed char lang, strings::UniString const & s) const
|
||||
{
|
||||
m_names.AddString(StringsFile::StringT(s, lang, m_val));
|
||||
}
|
||||
|
||||
private:
|
||||
typedef buffer_vector<strings::UniString, 32> TokensArrayT;
|
||||
|
||||
class PushSynonims
|
||||
{
|
||||
TokensArrayT & m_tokens;
|
||||
public:
|
||||
PushSynonims(TokensArrayT & tokens) : m_tokens(tokens) {}
|
||||
void operator() (string const & utf8str) const
|
||||
{
|
||||
m_tokens.push_back(search::NormalizeAndSimplifyString(utf8str));
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
bool operator()(signed char lang, string const & name) const
|
||||
{
|
||||
strings::UniString const uniName = search::NormalizeAndSimplifyString(name);
|
||||
|
||||
// split input string on tokens
|
||||
buffer_vector<strings::UniString, 32> tokens;
|
||||
SplitUniString(uniName, MakeBackInsertFunctor(tokens), search::Delimiters());
|
||||
|
||||
// add synonims for input native string
|
||||
if (m_synonims)
|
||||
m_synonims->ForEach(name, PushSynonims(tokens));
|
||||
|
||||
int const maxTokensCount = search::MAX_TOKENS - 1;
|
||||
if (tokens.size() > maxTokensCount)
|
||||
{
|
||||
|
@ -65,6 +142,7 @@ struct FeatureNameInserter
|
|||
|
||||
class FeatureInserter
|
||||
{
|
||||
SynonimsHolder * m_synonims;
|
||||
StringsFile & m_names;
|
||||
|
||||
CategoriesHolder const & m_categories;
|
||||
|
@ -294,11 +372,12 @@ class FeatureInserter
|
|||
};
|
||||
|
||||
public:
|
||||
FeatureInserter(StringsFile & names,
|
||||
FeatureInserter(SynonimsHolder * synonims, StringsFile & names,
|
||||
CategoriesHolder const & catHolder,
|
||||
serial::CodingParams const & cp,
|
||||
pair<int, int> const & scales)
|
||||
: m_names(names), m_categories(catHolder), m_valueSaver(cp), m_scales(scales)
|
||||
: m_synonims(synonims), m_names(names),
|
||||
m_categories(catHolder), m_valueSaver(cp), m_scales(scales)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -314,7 +393,7 @@ public:
|
|||
}
|
||||
|
||||
// init inserter with serialized value
|
||||
FeatureNameInserter inserter(m_names);
|
||||
FeatureNameInserter inserter(m_synonims, m_names);
|
||||
MakeValue(f, types, pos, inserter.m_val);
|
||||
|
||||
// add names of the feature
|
||||
|
@ -366,9 +445,14 @@ void BuildSearchIndex(FilesContainerR const & cont, CategoriesHolder const & cat
|
|||
|
||||
serial::CodingParams cp(search::GetCPForTrie(header.GetDefCodingParams()));
|
||||
|
||||
scoped_ptr<SynonimsHolder> synonims;
|
||||
if (header.GetType() == feature::DataHeader::world)
|
||||
synonims.reset(new SynonimsHolder(GetPlatform().WritablePathForFile(SYNONIMS_FILE)));
|
||||
|
||||
StringsFile names(tmpFilePath);
|
||||
|
||||
featuresV.ForEachOffset(FeatureInserter(names, catHolder, cp, header.GetScaleRange()));
|
||||
featuresV.ForEachOffset(FeatureInserter(synonims.get(), names,
|
||||
catHolder, cp, header.GetScaleRange()));
|
||||
|
||||
names.EndAdding();
|
||||
names.OpenForRead();
|
||||
|
|
Loading…
Add table
Reference in a new issue