You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kev/Drawer/Module/GeoSigmaDraw/StreamingTsvParser.cpp

88 lines
1.6 KiB
C++

1 month ago
#include "StreamingTsvParser.h"
#include <algorithm>
#include <cstring>
namespace
{
/// 从 buf 中取出一行(到 \n 或 \r\n写入 line返回消费的字节数
size_t TakeLine(const std::string& buf, std::string& line)
{
size_t i = 0;
while (i < buf.size() && buf[i] != '\n' && buf[i] != '\r')
{
++i;
}
line = buf.substr(0, i);
if (i < buf.size())
{
++i;
if (i < buf.size() && buf[i - 1] == '\r' && buf[i] == '\n')
{
++i;
}
}
return i;
}
void SplitByTab(const std::string& line, std::vector<std::string>& out)
{
out.clear();
size_t start = 0;
for (size_t i = 0; i <= line.size(); ++i)
{
if (i == line.size() || line[i] == '\t')
{
out.push_back(line.substr(start, i - start));
start = i + 1;
}
}
}
}
void StreamingTsvParser::Feed(const void* data, size_t size)
{
if (data == nullptr || size == 0)
{
return;
}
const char* p = static_cast<const char*>(data);
m_buffer.append(p, size);
ProcessBuffer();
}
void StreamingTsvParser::End()
{
// 将剩余内容当作最后一行(可能无换行符)
if (!m_buffer.empty())
{
std::vector<std::string> fields;
SplitByTab(m_buffer, fields);
if (!fields.empty() && m_callback)
{
m_callback(m_lineIndex, fields);
}
m_buffer.clear();
}
}
void StreamingTsvParser::ProcessBuffer()
{
if (!m_callback)
{
return;
}
std::string line;
size_t consumed = TakeLine(m_buffer, line);
while (consumed > 0)
{
std::vector<std::string> fields;
SplitByTab(line, fields);
if (!fields.empty())
{
m_callback(m_lineIndex++, fields);
}
m_buffer.erase(0, consumed);
consumed = TakeLine(m_buffer, line);
}
}