forked from organicmaps/organicmaps
[booking] Replace Split with ParseCSVRow
This commit is contained in:
parent
3fac043578
commit
291427d5de
4 changed files with 63 additions and 16 deletions
|
@ -733,13 +733,16 @@ UNIT_TEST(NormalizeDigits_UniString)
|
|||
UNIT_TEST(Split)
|
||||
{
|
||||
vector<string> target;
|
||||
strings::Split(";Test\\;проверка;0;", ';', target);
|
||||
TEST(strings::ParseCSVRow(",Test\\,проверка,0,", target), ());
|
||||
vector<string> expected({"", "Test\\", "проверка", "0", ""});
|
||||
TEST_EQUAL(target, expected, ());
|
||||
strings::Split("and there was none", ' ', target);
|
||||
vector<string> expected2({"and", "there", "", "was", "none"});
|
||||
TEST(strings::ParseCSVRow("and there \"was none\"", target, ' '), ());
|
||||
vector<string> expected2({"and", "there", "", "was none"});
|
||||
TEST_EQUAL(target, expected2, ());
|
||||
strings::Split("", '!', target);
|
||||
TEST(!strings::ParseCSVRow("", target), ());
|
||||
vector<string> expected3;
|
||||
TEST_EQUAL(target, expected3, ());
|
||||
TEST(!strings::ParseCSVRow("\"this, a line.\"", target, ',', 2), (target));
|
||||
vector<string> expected4({"this, a line."});
|
||||
TEST_EQUAL(target, expected4, ());
|
||||
}
|
||||
|
|
|
@ -328,17 +328,61 @@ bool AlmostEqual(string const & str1, string const & str2, size_t mismatchedCoun
|
|||
return false;
|
||||
}
|
||||
|
||||
void Split(string const & s, char delimiter, vector<string> & target)
|
||||
bool ParseCSVRow(string const & s, vector<string> & target, char const delimiter, size_t const columns)
|
||||
{
|
||||
target.clear();
|
||||
using It = TokenizeIterator<SimpleDelimiter, string::const_iterator, true>;
|
||||
bool insideQuotes = false;
|
||||
ostringstream quoted;
|
||||
for (It it(s, SimpleDelimiter(delimiter)); it; ++it)
|
||||
{
|
||||
string column = *it;
|
||||
if (insideQuotes)
|
||||
{
|
||||
if (!column.empty() && column.back() == '"')
|
||||
{
|
||||
// Found the tail quote: remove it and add |quoted| to the vector.
|
||||
insideQuotes = false;
|
||||
column.pop_back();
|
||||
quoted << delimiter << column;
|
||||
target.push_back(quoted.str());
|
||||
quoted.clear();
|
||||
}
|
||||
else
|
||||
quoted << delimiter << column;
|
||||
}
|
||||
else if (!column.empty() && column.front() == '"')
|
||||
{
|
||||
// Found the front quote: if there is the last one also, remove both and append column,
|
||||
// otherwise push the column into a |quoted| buffer.
|
||||
column.erase(0, 1);
|
||||
if (column.back() == '"')
|
||||
{
|
||||
column.pop_back();
|
||||
strings::Trim(column);
|
||||
target.push_back(column);
|
||||
}
|
||||
else
|
||||
{
|
||||
quoted << column;
|
||||
insideQuotes = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
strings::Trim(column);
|
||||
target.push_back(column);
|
||||
}
|
||||
}
|
||||
|
||||
// Special case: if the string is empty, return an empty array instead of {""}.
|
||||
if (s.empty())
|
||||
return;
|
||||
if (target.size() == 1 && target[0].empty())
|
||||
{
|
||||
target.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
using It = TokenizeIterator<SimpleDelimiter, string::const_iterator, true>;
|
||||
for (It it(s, SimpleDelimiter(delimiter)); it; ++it)
|
||||
target.push_back(*it);
|
||||
return columns <= 0 || target.size() == columns;
|
||||
}
|
||||
|
||||
} // namespace strings
|
||||
|
|
|
@ -307,7 +307,9 @@ void Tokenize(string const & str, char const * delims, TFunctor && f)
|
|||
}
|
||||
|
||||
/// Splits a string by the delimiter, keeps empty parts, on an empty string returns an empty vector.
|
||||
void Split(string const & s, char delimiter, vector<string> & target);
|
||||
/// Supports quoted columns, does not support newlines in columns and escaped quotes.
|
||||
/// @return false if the line is empty or number of columns differs from |columns|.
|
||||
bool ParseCSVRow(string const & s, vector<string> & target, char const delimiter = ',', size_t const columns = 0);
|
||||
|
||||
/// @return code of last symbol in string or 0 if s is empty
|
||||
UniChar LastUniChar(string const & s);
|
||||
|
|
|
@ -30,10 +30,8 @@ bool CheckForValues(string const & value)
|
|||
|
||||
BookingDataset::Hotel::Hotel(string const & src)
|
||||
{
|
||||
vector<string> rec(FieldsCount());
|
||||
strings::SimpleTokenizer token(src, "\t");
|
||||
for (size_t i = 0; token && i < rec.size(); ++i, ++token)
|
||||
rec[i] = *token;
|
||||
vector<string> rec;
|
||||
CHECK(strings::ParseCSVRow(src, rec, '\t', FieldsCount()), ("Error parsing hotels.tsv line:", src));
|
||||
|
||||
strings::to_uint(rec[Index(Fields::Id)], id);
|
||||
strings::to_double(rec[Index(Fields::Latitude)], lat);
|
||||
|
@ -176,7 +174,7 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
|
|||
if (!hotel.translations.empty())
|
||||
{
|
||||
vector<string> parts;
|
||||
strings::Split(hotel.translations, '|', parts);
|
||||
strings::ParseCSVRow(hotel.translations, parts, '|');
|
||||
for (auto i = 0; i < parts.size(); i += 3)
|
||||
{
|
||||
e.AddTag("name:" + parts[i], parts[i + 1]);
|
||||
|
|
Loading…
Add table
Reference in a new issue