Speedup o5m reading

This commit is contained in:
Sergey Yershov 2019-11-05 16:51:03 +03:00 committed by Gleb
parent 9a9585f212
commit bd9b33759e
4 changed files with 143 additions and 19 deletions

View file

@ -12,6 +12,119 @@
using namespace std;
UNIT_TEST(OSM_O5M_Source_Entity_smoke_test)
{
osm::O5MSource::Entity e{};
e.SkipRemainder();
std::stringstream ss;
ss << e;
EXPECT_STREQ("O5M_CMD_RESET ID: 0", ss.str().c_str());
}
UNIT_TEST(OSM_O5M_Source_Member_type_fail_test)
{
string data(begin(for_fail_o5m_data), end(for_fail_o5m_data));
// Set unreal member type
data[17] = 0x35;
stringstream ss(data);
osm::O5MSource dataset([&ss](uint8_t * buffer, size_t size)
{
return ss.read(reinterpret_cast<char *>(buffer), size).gcount();
}, 10 /* buffer size */);
EXPECT_DEATH({
for (auto const & em : dataset)
for (auto const & member : em.Members())
(void)member;
}, "");
}
UNIT_TEST(OSM_O5M_Source_Member_type_relation_test)
{
string data(begin(for_fail_o5m_data), end(for_fail_o5m_data));
// Set relation member type
data[17] = 0x32;
stringstream ss(data);
osm::O5MSource dataset([&ss](uint8_t * buffer, size_t size)
{
return ss.read(reinterpret_cast<char *>(buffer), size).gcount();
}, 10 /* buffer size */);
for (auto const & em : dataset)
for (auto const & member : em.Members())
TEST_EQUAL(member.type, osm::O5MSource::EntityType::Relation, ());
}
UNIT_TEST(OSM_O5M_Source_Member_empty_test)
{
string data(begin(for_fail_o5m_data), end(for_fail_o5m_data));
// Set incorrect entity size
data[9] = 0x06;
data[12] = 0x03;
stringstream ss(data);
osm::O5MSource dataset([&ss](uint8_t * buffer, size_t size)
{
return ss.read(reinterpret_cast<char *>(buffer), size).gcount();
}, 10 /* buffer size */);
size_t counter = 0;
for (auto const & em : dataset)
for (auto const & member : em.Members())
(void)member, ++counter;
TEST_EQUAL(counter, 0, ());
}
UNIT_TEST(OSM_O5M_Source_Not_o5m_test)
{
string data{"Not o5m"};
stringstream ss(data);
EXPECT_THROW({
try
{
osm::O5MSource dataset([&ss](uint8_t * buffer, size_t size)
{
return ss.read(reinterpret_cast<char *>(buffer), size).gcount();
}, 10 /* buffer size */);
}
catch( const std::runtime_error& e )
{
// and this tests that it has the correct message
EXPECT_STREQ("Incorrect o5m start", e.what() );
throw;
}
}, std::runtime_error );
}
UNIT_TEST(OSM_O5M_Source_Incorrect_header_test)
{
string data(begin(for_fail_o5m_data), end(for_fail_o5m_data));
// Corrupt header
data[6] = '8';
stringstream ss(data);
EXPECT_THROW({
try
{
osm::O5MSource dataset([&ss](uint8_t * buffer, size_t size)
{
return ss.read(reinterpret_cast<char *>(buffer), size).gcount();
}, 10 /* buffer size */);
}
catch( const std::runtime_error& e )
{
// and this tests that it has the correct message
EXPECT_STREQ("Incorrect o5m header", e.what() );
throw;
}
}, std::runtime_error );
}
UNIT_TEST(OSM_O5M_Source_Node_read_test)
{
string data(begin(node2_o5m_data), end(node2_o5m_data));

View file

@ -141,3 +141,20 @@ unsigned char const relation_o5m_data[/* 224 */] = {
0x00, 0x70, 0x6C, 0x61, 0x63, 0x65, 0x00, 0x74, 0x6F, 0x77, 0x6E, 0x00, 0x00, 0x74, 0x79, 0x70,
0x65, 0x00, 0x6D, 0x75, 0x6C, 0x74, 0x69, 0x70, 0x6F, 0x6C, 0x79, 0x67, 0x6F, 0x6E, 0x00, 0xFE};
static_assert(sizeof(relation_o5m_data) == 224, "Size check failed");
char const for_fail_xml_data[] = R"#(<?xml version='1.0' encoding='UTF-8'?>
<osm>
<relation id='1'>
<member type='way' ref='273163' role='outer' />
</relation>
</osm>
)#";
unsigned char const for_fail_o5m_data[/* 25 */] = {
0xFF, 0xE0, 0x04, 0x6F, 0x35, 0x6D, 0x32, 0xFF,
0x12, 0x0E, 0x02, 0x00, 0x0B, 0x96, 0xAC, 0x21,
0x00, 0x31, 0x6F, 0x75, 0x74, 0x65, 0x72, 0x00, 0xFE
};
static_assert(sizeof(for_fail_o5m_data) == 25, "Size check failed");

View file

@ -8,3 +8,5 @@ extern char const way_xml_data[];
extern unsigned char const way_o5m_data[175];
extern char const relation_xml_data[];
extern unsigned char const relation_o5m_data[224];
extern char const for_fail_xml_data[];
extern unsigned char const for_fail_o5m_data[25];

View file

@ -175,9 +175,14 @@ protected:
char const * role = nullptr;
};
std::vector<StringTableRecord> m_stringTable;
std::vector<char> m_stringBuffer;
size_t m_stringCurrentIndex;
// When reading an .o5m coded file, we use a reference table which has 15,000 lines,
// 250+2 characters each (for performance reasons: 256 characters).
// Every string pair we encounter is copied into the table, with one exception: strings pairs
// which are longer than 250 characters are interpreted but not copied into the table.
std::array<StringTableRecord, 15000> m_stringTable;
std::array<char, 1024> m_stringBuffer;
size_t m_stringCurrentIndex = 0;
StreamBuffer m_buffer;
size_t m_remainder;
int64_t m_currentNodeRef = 0;
@ -528,18 +533,6 @@ public:
return this;
}
void InitStringTable()
{
// When reading an .o5m coded file, we use a reference table which has 15,000 lines,
// 250+2 characters each (for performance reasons: 256 characters).
// Every string pair we encounter is copied into the table, with one exception: strings pairs
// which are longer than 250 characters are interpreted but not copied into the table.
m_stringCurrentIndex = 0;
m_stringBuffer.resize(1024);
m_stringTable.resize(15000);
}
void Reset()
{
m_currentNodeRef = 0;
@ -583,11 +576,10 @@ public:
O5MSource(TReadFunc reader, size_t readBufferSizeInBytes = 60000) : m_buffer(reader, readBufferSizeInBytes)
{
if (EntityType::Reset != EntityType(m_buffer.Get()))
{
throw std::runtime_error("Incorrect o5m start");
}
CheckHeader();
InitStringTable();
if (!CheckHeader())
throw std::runtime_error("Incorrect o5m header");
}
bool CheckHeader()