From fa59f1907c139cfa67ff0b982dca3165103de6d7 Mon Sep 17 00:00:00 2001 From: Maksim Andrianov Date: Thu, 3 Oct 2019 23:50:10 +0300 Subject: [PATCH] [coding] Rewritten CSVReader. --- coding/coding_tests/csv_reader_test.cpp | 173 +++++++++++++++---- coding/csv_reader.cpp | 152 +++++++++++++--- coding/csv_reader.hpp | 131 +++++++++++--- generator/popular_places_section_builder.cpp | 6 +- indexer/ftraits.hpp | 6 +- 5 files changed, 381 insertions(+), 87 deletions(-) diff --git a/coding/coding_tests/csv_reader_test.cpp b/coding/coding_tests/csv_reader_test.cpp index 60d3f77085..1bf9707616 100644 --- a/coding/coding_tests/csv_reader_test.cpp +++ b/coding/coding_tests/csv_reader_test.cpp @@ -8,41 +8,59 @@ #include #include -using coding::CSVReader; using platform::tests_support::ScopedFile; -using Row = std::vector; -using File = std::vector; +using Row = coding::CSVReader::Row; +using Rows = coding::CSVReader::Rows; namespace { std::string const kCSV1 = "a,b,c,d\ne,f,g h"; std::string const kCSV2 = "a,b,cd a b, c"; std::string const kCSV3 = ""; +std::string const kCSV4 = "1,2\n3,4\n5,6"; +std::string const kCSV5 = "1,2\n3,4\n\n5,6\n"; } // namespace UNIT_TEST(CSVReaderSmoke) { auto const fileName = "test.csv"; ScopedFile sf(fileName, kCSV1); - FileReader fileReader(sf.GetFullPath()); - CSVReader reader; - reader.Read(fileReader, [](File const & file) { - TEST_EQUAL(file.size(), 1, ()); - TEST_EQUAL(file[0].size(), 3, ()); - Row const firstRow = {"e", "f", "g h"}; + { + FileReader fileReader(sf.GetFullPath()); + coding::CSVReader reader(fileReader, false /* hasHeader */); + auto const file = reader.ReadAll(); + TEST_EQUAL(file.size(), 2, ()); + Row const firstRow = {"a", "b", "c", "d"}; TEST_EQUAL(file[0], firstRow, ()); - }); + Row const secondRow = {"e", "f", "g h"}; + TEST_EQUAL(file[1], secondRow, ()); + } + { + FileReader fileReader(sf.GetFullPath()); + coding::CSVReader reader(fileReader, true /* hasHeader */); + auto const file = reader.ReadAll(); + TEST_EQUAL(file.size(), 1, ()); + Row const headerRow = {"a", "b", "c", "d"}; + TEST_EQUAL(reader.GetHeader(), headerRow, ()); + } +} - CSVReader::Params p; - p.m_readHeader = true; - reader.Read(fileReader, - [](File const & file) { - TEST_EQUAL(file.size(), 2, ()); - Row const headerRow = {"a", "b", "c", "d"}; - TEST_EQUAL(file[0], headerRow, ()); - }, - p); +UNIT_TEST(CSVReaderReadLine) +{ + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kCSV4); + Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}}; + coding::CSVReader reader(sf.GetFullPath()); + size_t index = 0; + while (auto const optionalRow = reader.ReadRow()) + { + TEST_EQUAL(*optionalRow, answer[index], ()); + ++index; + } + TEST_EQUAL(index, answer.size(), ()); + TEST(!reader.ReadRow(), ()); + TEST(!reader.ReadRow(), ()); } UNIT_TEST(CSVReaderCustomDelimiter) @@ -50,25 +68,114 @@ UNIT_TEST(CSVReaderCustomDelimiter) auto const fileName = "test.csv"; ScopedFile sf(fileName, kCSV2); FileReader fileReader(sf.GetFullPath()); - CSVReader reader; - CSVReader::Params p; - p.m_readHeader = true; - p.m_delimiter = ' '; - - reader.Read(fileReader, - [](Row const & row) { - Row const firstRow = {"a,b,cd", "a", "b,", "c"}; - TEST_EQUAL(row, firstRow, ()); - }, - p); + coding::CSVReader reader(fileReader, false /* hasHeader */, ' '); + auto const file = reader.ReadAll(); + TEST_EQUAL(file.size(), 1, ()); + Row const firstRow = {"a,b,cd", "a", "b,", "c"}; + TEST_EQUAL(file[0], firstRow, ()); } UNIT_TEST(CSVReaderEmptyFile) { auto const fileName = "test.csv"; - ScopedFile sf(fileName, kCSV2); + ScopedFile sf(fileName, kCSV3); FileReader fileReader(sf.GetFullPath()); - CSVReader reader; - reader.Read(fileReader, [](File const & file) { TEST_EQUAL(file.size(), 0, ()); }); + coding::CSVReader reader(fileReader); + auto const file = reader.ReadAll(); + TEST_EQUAL(file.size(), 0, ()); +} + +UNIT_TEST(CSVReaderDifferentReaders) +{ + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kCSV4); + Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}}; + { + FileReader fileReader(sf.GetFullPath()); + coding::CSVReader reader(fileReader); + auto const file = reader.ReadAll(); + TEST_EQUAL(file, answer, ()); + } + { + coding::CSVReader reader(sf.GetFullPath()); + auto const file = reader.ReadAll(); + TEST_EQUAL(file, answer, ()); + } + { + std::ifstream stream(sf.GetFullPath()); + coding::CSVReader reader(stream); + auto const file = reader.ReadAll(); + TEST_EQUAL(file, answer, ()); + } +} + +UNIT_TEST(CSVReaderEmptyLines) +{ + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kCSV5); + Rows const answer = {{"1", "2"}, {"3", "4"}, {}, {"5", "6"}}; + { + FileReader fileReader(sf.GetFullPath()); + coding::CSVReader reader(fileReader); + auto const file = reader.ReadAll(); + TEST_EQUAL(file, answer, ()); + } + { + coding::CSVReader reader(sf.GetFullPath()); + auto const file = reader.ReadAll(); + TEST_EQUAL(file, answer, ()); + } + { + std::ifstream stream(sf.GetFullPath()); + coding::CSVReader reader(stream); + auto const file = reader.ReadAll(); + TEST_EQUAL(file, answer, ()); + } +} + +UNIT_TEST(CSVReaderForEachRow) +{ + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kCSV4); + Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}}; + FileReader fileReader(sf.GetFullPath()); + auto reader = coding::CSVReader(fileReader); + size_t index = 0; + reader.ForEachRow([&](auto const & row) { + TEST_EQUAL(row, answer[index], ()); + ++index; + }); + TEST_EQUAL(answer.size(), index, ()); +} + +UNIT_TEST(CSVReaderIterator) +{ + auto const fileName = "test.csv"; + ScopedFile sf(fileName, kCSV4); + Rows const answer = {{"1", "2"}, {"3", "4"}, {"5", "6"}}; + { + FileReader fileReader(sf.GetFullPath()); + coding::CSVRunner runner((coding::CSVReader(fileReader))); + + auto it = runner.begin(); + TEST_EQUAL(*it, answer[0], ()); + ++it; + TEST_EQUAL(*it, answer[1], ()); + auto it2 = it++; + TEST(it2 == it, ()); + TEST_EQUAL(*it2, answer[1], ()); + TEST_EQUAL(*it, answer[2], ()); + ++it; + TEST(it == runner.end(), ()); + } + { + size_t index = 0; + for (auto const & row : coding::CSVRunner(coding::CSVReader(sf.GetFullPath()))) + { + TEST_EQUAL(row, answer[index], ()); + ++index; + } + TEST_EQUAL(index, answer.size(), ()); + } } diff --git a/coding/csv_reader.cpp b/coding/csv_reader.cpp index 7cc135b506..e49c0299ef 100644 --- a/coding/csv_reader.cpp +++ b/coding/csv_reader.cpp @@ -5,35 +5,141 @@ namespace coding { -using namespace std; - -void CSVReader::Read(istringstream & stream, RowByRowCallback const & fn, - Params const & params) const +CSVReader::CSVReader(std::unique_ptr reader, bool hasHeader, char delimiter) + : m_reader(std::move(reader)), m_hasHeader(hasHeader), m_delimiter(delimiter) { - bool readFirstRow = params.m_readHeader; + if (!HasHeader()) + return; - for (string line; getline(stream, line);) + auto const row = ReadRow(); + if (row) + m_header = *row; +} + +CSVReader::CSVReader(std::string const & filename, bool hasHeader, char delimiter) + : CSVReader(std::make_unique(filename), hasHeader, delimiter) +{ +} + +CSVReader::CSVReader(std::istream & stream, bool hasHeader, char delimiter) + : CSVReader(std::make_unique(stream), hasHeader, delimiter) +{ +} + +CSVReader::CSVReader(Reader const & reader, bool hasHeader, char delimiter) + : CSVReader(std::make_unique(reader), hasHeader, delimiter) +{ +} + +bool CSVReader::HasHeader() const { return m_hasHeader; } + +char CSVReader::GetDelimiter() const { return m_delimiter; } + +CSVReader::Row const & CSVReader::GetHeader() const { return m_header; } + +CSVReader::Rows CSVReader::ReadAll() +{ + Rows file; + ForEachRow([&](auto const & row) { + file.emplace_back(row); + }); + return file; +} + +boost::optional CSVReader::ReadRow() +{ + auto const line = m_reader->ReadLine(); + if (!line) + return {}; + + Row row; + strings::ParseCSVRow(*line, m_delimiter, row); + ++m_currentLine; + return row; +} + +size_t CSVReader::GetCurrentLineNumber() const { return m_currentLine; } + +CSVReader::IstreamWrapper::IstreamWrapper(std::istream & stream) : m_stream(stream) {} + +boost::optional CSVReader::IstreamWrapper::ReadLine() +{ + std::string line; + return std::getline(m_stream, line) ? line : boost::optional(); +} + +CSVReader::ReaderWrapper::ReaderWrapper(Reader const & reader) : m_reader(reader) {} + +boost::optional CSVReader::ReaderWrapper::ReadLine() +{ + std::vector line; + char ch = '\0'; + while (m_pos < m_reader.Size() && ch != '\n') { - Row row; - strings::ParseCSVRow(line, params.m_delimiter, row); - if (!readFirstRow) - { - readFirstRow = true; - continue; - } - fn(move(row)); + m_reader.Read(m_pos, &ch, sizeof(ch)); + line.emplace_back(ch); + ++m_pos; } + + if (line.empty()) + return {}; + + auto end = std::end(line); + if (line.back() == '\n') + --end; + + return std::string(std::begin(line), end); } -void CSVReader::Read(istringstream & stream, FullFileCallback const & fn, - Params const & params) const +CSVReader::DefaultReader::DefaultReader(std::string const & filename) { - File file; - Read(stream, [&file](Row && row) - { - file.emplace_back(move(row)); - }, params); - - fn(move(file)); + m_stream.exceptions(std::ios::failbit | std::ios::badbit); + m_stream.open(filename); + m_stream.exceptions(std::ios::badbit); } + +boost::optional CSVReader::DefaultReader::ReadLine() +{ + return IstreamWrapper(m_stream).ReadLine(); +} + +CSVRunner::Iterator::Iterator(CSVReader & reader, bool isEnd) : m_reader(reader) +{ + if (!isEnd) + m_current = m_reader.ReadRow(); +} + +CSVRunner::Iterator::Iterator(Iterator const & other) + : m_reader(other.m_reader), m_current(other.m_current) +{ +} + +CSVRunner::Iterator & CSVRunner::Iterator::operator++() +{ + m_current = m_reader.ReadRow(); + return *this; +} + +CSVRunner::Iterator CSVRunner::Iterator::operator++(int) +{ + Iterator tmp(*this); + operator++(); + return tmp; +} + +bool CSVRunner::Iterator::operator==(Iterator const & other) const +{ + return &m_reader == &other.m_reader && + static_cast(m_current) == static_cast(other.m_current); +} + +bool CSVRunner::Iterator::operator!=(Iterator const & other) const { return !(*this == other); } + +CSVReader::Row & CSVRunner::Iterator::operator*() { return *m_current; } + +CSVRunner::CSVRunner(CSVReader && reader) : m_reader(std::move(reader)) {} + +CSVRunner::Iterator CSVRunner::begin() { return Iterator(m_reader); } + +CSVRunner::Iterator CSVRunner::end() { return Iterator(m_reader, true /* isEnd */); } } // namespace coding diff --git a/coding/csv_reader.hpp b/coding/csv_reader.hpp index cbabf1d420..62ae68a1a1 100644 --- a/coding/csv_reader.hpp +++ b/coding/csv_reader.hpp @@ -2,43 +2,126 @@ #include "coding/reader.hpp" +#include #include #include #include #include +#include + namespace coding { class CSVReader { public: - struct Params + using Row = std::vector; + using Rows = std::vector; + + explicit CSVReader(std::string const & filename, bool hasHeader = false, char delimiter = ','); + explicit CSVReader(std::istream & stream, bool hasHeader = false, char delimiter = ','); + explicit CSVReader(Reader const & reader, bool hasHeader = false, char delimiter = ','); + + bool HasHeader() const; + char GetDelimiter() const; + + Row const & GetHeader() const; + boost::optional ReadRow(); + Rows ReadAll(); + + template + void ForEachRow(Fn && fn) { - Params() {} - bool m_readHeader = false; - char m_delimiter = ','; + while (auto const optRow = ReadRow()) + fn(*optRow); + } + + // The total number of lines read including the header. Count starts at 0. + size_t GetCurrentLineNumber() const; + +private: + class ReaderInterface + { + public: + virtual ~ReaderInterface() = default; + + virtual boost::optional ReadLine() = 0; }; - CSVReader() = default; - - using Row = std::vector; - using File = std::vector; - using RowByRowCallback = std::function; - using FullFileCallback = std::function; - - void Read(std::istringstream & stream, RowByRowCallback const & fn, - Params const & params = {}) const; - - void Read(std::istringstream & stream, FullFileCallback const & fn, - Params const & params = {}) const; - - template - void Read(Reader const & reader, Callback const & fn, Params const & params = {}) const + class IstreamWrapper : public ReaderInterface { - std::string str(static_cast(reader.Size()), '\0'); - reader.Read(0, &str[0], str.size()); - std::istringstream stream(str); - Read(stream, fn, params); - } + public: + explicit IstreamWrapper(std::istream & stream); + + // ReaderInterface overrides: + boost::optional ReadLine() override; + + private: + std::istream & m_stream; + }; + + class ReaderWrapper : public ReaderInterface + { + public: + explicit ReaderWrapper(Reader const & reader); + + // ReaderInterface overrides: + boost::optional ReadLine() override; + + private: + size_t m_pos = 0; + Reader const & m_reader; + }; + + class DefaultReader : public ReaderInterface + { + public: + explicit DefaultReader(std::string const & filename); + + // ReaderInterface overrides: + boost::optional ReadLine() override; + + private: + std::ifstream m_stream; + }; + + explicit CSVReader(std::unique_ptr reader, bool hasHeader, char delimiter); + + std::unique_ptr m_reader; + size_t m_currentLine = 0; + bool m_hasHeader = false; + char m_delimiter = ','; + Row m_header; +}; + +class CSVRunner +{ +public: + explicit CSVRunner(CSVReader && reader); + + class Iterator : public std::iterator + { + public: + Iterator(CSVReader & reader, bool isEnd = false); + Iterator(Iterator const & other); + Iterator & operator++(); + Iterator operator++(int); + // Checks whether both this and other are equal. Two CSVReader iterators are equal if both of + // them are end-of-file iterators or not and both of them refer to the same CSVReader. + bool operator==(Iterator const & other) const; + bool operator!=(Iterator const & other) const; + CSVReader::Row & operator*(); + + private: + CSVReader & m_reader; + boost::optional m_current; + }; + + // Warning: It reads first line. + Iterator begin(); + Iterator end(); + +private: + CSVReader m_reader; }; } // namespace coding diff --git a/generator/popular_places_section_builder.cpp b/generator/popular_places_section_builder.cpp index b32bb3478c..3328118e18 100644 --- a/generator/popular_places_section_builder.cpp +++ b/generator/popular_places_section_builder.cpp @@ -24,9 +24,7 @@ namespace generator { void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places) { - coding::CSVReader reader; - auto const fileReader = FileReader(srcFilename); - reader.Read(fileReader, [&places, &srcFilename](coding::CSVReader::Row const & row) + for (auto const & row : coding::CSVRunner(coding::CSVReader(srcFilename))) { size_t constexpr kOsmIdPos = 0; size_t constexpr kPopularityIndexPos = 1; @@ -62,7 +60,7 @@ void LoadPopularPlaces(std::string const & srcFilename, PopularPlaces & places) LOG(LERROR, ("Popular place duplication in file:", srcFilename, "parsed row:", row)); return; } - }); + } } bool BuildPopularPlacesMwmSection(std::string const & srcFilename, std::string const & mwmFile, diff --git a/indexer/ftraits.hpp b/indexer/ftraits.hpp index 8b0acfd26d..c5897b65c6 100644 --- a/indexer/ftraits.hpp +++ b/indexer/ftraits.hpp @@ -101,9 +101,9 @@ class UGC : public TraitsBase UGC() { - coding::CSVReader reader; auto const fileReader = GetPlatform().GetReader("ugc_types.csv"); - reader.Read(*fileReader, [this](coding::CSVReader::Row const & row) { + for (auto const & row : coding::CSVRunner(coding::CSVReader(*fileReader, true /* hasHeader */))) + { size_t constexpr kTypePos = 0; size_t constexpr kCategoriesPos = 4; @@ -116,7 +116,7 @@ class UGC : public TraitsBase m_matcher.AppendType(std::move(typePath), std::move(item)); else m_excluded.AppendType(std::move(typePath)); - }); + } } UGCTypeMask ReadMasks(coding::CSVReader::Row const & row)