[booking] Replace Split with ParseCSVRow

This commit is contained in:
Ilya Zverev 2016-06-27 15:07:57 +03:00
parent 3fac043578
commit 291427d5de
4 changed files with 63 additions and 16 deletions

View file

@ -733,13 +733,16 @@ UNIT_TEST(NormalizeDigits_UniString)
UNIT_TEST(Split)
{
vector<string> target;
strings::Split(";Test\\;проверка;0;", ';', target);
TEST(strings::ParseCSVRow(",Test\\,проверка,0,", target), ());
vector<string> expected({"", "Test\\", "проверка", "0", ""});
TEST_EQUAL(target, expected, ());
strings::Split("and there was none", ' ', target);
vector<string> expected2({"and", "there", "", "was", "none"});
TEST(strings::ParseCSVRow("and there \"was none\"", target, ' '), ());
vector<string> expected2({"and", "there", "", "was none"});
TEST_EQUAL(target, expected2, ());
strings::Split("", '!', target);
TEST(!strings::ParseCSVRow("", target), ());
vector<string> expected3;
TEST_EQUAL(target, expected3, ());
TEST(!strings::ParseCSVRow("\"this, a line.\"", target, ',', 2), (target));
vector<string> expected4({"this, a line."});
TEST_EQUAL(target, expected4, ());
}

View file

@ -328,17 +328,61 @@ bool AlmostEqual(string const & str1, string const & str2, size_t mismatchedCoun
return false;
}
void Split(string const & s, char delimiter, vector<string> & target)
bool ParseCSVRow(string const & s, vector<string> & target, char const delimiter, size_t const columns)
{
target.clear();
using It = TokenizeIterator<SimpleDelimiter, string::const_iterator, true>;
bool insideQuotes = false;
ostringstream quoted;
for (It it(s, SimpleDelimiter(delimiter)); it; ++it)
{
string column = *it;
if (insideQuotes)
{
if (!column.empty() && column.back() == '"')
{
// Found the tail quote: remove it and add |quoted| to the vector.
insideQuotes = false;
column.pop_back();
quoted << delimiter << column;
target.push_back(quoted.str());
quoted.clear();
}
else
quoted << delimiter << column;
}
else if (!column.empty() && column.front() == '"')
{
// Found the front quote: if there is the last one also, remove both and append column,
// otherwise push the column into a |quoted| buffer.
column.erase(0, 1);
if (column.back() == '"')
{
column.pop_back();
strings::Trim(column);
target.push_back(column);
}
else
{
quoted << column;
insideQuotes = true;
}
}
else
{
strings::Trim(column);
target.push_back(column);
}
}
// Special case: if the string is empty, return an empty array instead of {""}.
if (s.empty())
return;
if (target.size() == 1 && target[0].empty())
{
target.clear();
return false;
}
using It = TokenizeIterator<SimpleDelimiter, string::const_iterator, true>;
for (It it(s, SimpleDelimiter(delimiter)); it; ++it)
target.push_back(*it);
return columns <= 0 || target.size() == columns;
}
} // namespace strings

View file

@ -307,7 +307,9 @@ void Tokenize(string const & str, char const * delims, TFunctor && f)
}
/// Splits a string by the delimiter, keeps empty parts, on an empty string returns an empty vector.
void Split(string const & s, char delimiter, vector<string> & target);
/// Supports quoted columns, does not support newlines in columns and escaped quotes.
/// @return false if the line is empty or number of columns differs from |columns|.
bool ParseCSVRow(string const & s, vector<string> & target, char const delimiter = ',', size_t const columns = 0);
/// @return code of last symbol in string or 0 if s is empty
UniChar LastUniChar(string const & s);

View file

@ -30,10 +30,8 @@ bool CheckForValues(string const & value)
BookingDataset::Hotel::Hotel(string const & src)
{
vector<string> rec(FieldsCount());
strings::SimpleTokenizer token(src, "\t");
for (size_t i = 0; token && i < rec.size(); ++i, ++token)
rec[i] = *token;
vector<string> rec;
CHECK(strings::ParseCSVRow(src, rec, '\t', FieldsCount()), ("Error parsing hotels.tsv line:", src));
strings::to_uint(rec[Index(Fields::Id)], id);
strings::to_double(rec[Index(Fields::Latitude)], lat);
@ -176,7 +174,7 @@ void BookingDataset::BuildFeatures(function<void(OsmElement *)> const & fn) cons
if (!hotel.translations.empty())
{
vector<string> parts;
strings::Split(hotel.translations, '|', parts);
strings::ParseCSVRow(hotel.translations, parts, '|');
for (auto i = 0; i < parts.size(); i += 3)
{
e.AddTag("name:" + parts[i], parts[i + 1]);