You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
kev/Drawer/Module/GeoSigmaDraw/StreamingTsvParser.cpp

88 lines
1.6 KiB
C++

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

#include "StreamingTsvParser.h"
#include <algorithm>
#include <cstring>
namespace
{
/// 从 buf 中取出一行(到 \n 或 \r\n写入 line返回消费的字节数
size_t TakeLine(const std::string& buf, std::string& line)
{
size_t i = 0;
while (i < buf.size() && buf[i] != '\n' && buf[i] != '\r')
{
++i;
}
line = buf.substr(0, i);
if (i < buf.size())
{
++i;
if (i < buf.size() && buf[i - 1] == '\r' && buf[i] == '\n')
{
++i;
}
}
return i;
}
void SplitByTab(const std::string& line, std::vector<std::string>& out)
{
out.clear();
size_t start = 0;
for (size_t i = 0; i <= line.size(); ++i)
{
if (i == line.size() || line[i] == '\t')
{
out.push_back(line.substr(start, i - start));
start = i + 1;
}
}
}
}
void StreamingTsvParser::Feed(const void* data, size_t size)
{
if (data == nullptr || size == 0)
{
return;
}
const char* p = static_cast<const char*>(data);
m_buffer.append(p, size);
ProcessBuffer();
}
void StreamingTsvParser::End()
{
// 将剩余内容当作最后一行(可能无换行符)
if (!m_buffer.empty())
{
std::vector<std::string> fields;
SplitByTab(m_buffer, fields);
if (!fields.empty() && m_callback)
{
m_callback(m_lineIndex, fields);
}
m_buffer.clear();
}
}
void StreamingTsvParser::ProcessBuffer()
{
if (!m_callback)
{
return;
}
std::string line;
size_t consumed = TakeLine(m_buffer, line);
while (consumed > 0)
{
std::vector<std::string> fields;
SplitByTab(line, fields);
if (!fields.empty())
{
m_callback(m_lineIndex++, fields);
}
m_buffer.erase(0, consumed);
consumed = TakeLine(m_buffer, line);
}
}