#include "StreamingTsvParser.h" #include #include namespace { /// 从 buf 中取出一行(到 \n 或 \r\n),写入 line,返回消费的字节数 size_t TakeLine(const std::string& buf, std::string& line) { size_t i = 0; while (i < buf.size() && buf[i] != '\n' && buf[i] != '\r') { ++i; } line = buf.substr(0, i); if (i < buf.size()) { ++i; if (i < buf.size() && buf[i - 1] == '\r' && buf[i] == '\n') { ++i; } } return i; } void SplitByTab(const std::string& line, std::vector& out) { out.clear(); size_t start = 0; for (size_t i = 0; i <= line.size(); ++i) { if (i == line.size() || line[i] == '\t') { out.push_back(line.substr(start, i - start)); start = i + 1; } } } } void StreamingTsvParser::Feed(const void* data, size_t size) { if (data == nullptr || size == 0) { return; } const char* p = static_cast(data); m_buffer.append(p, size); ProcessBuffer(); } void StreamingTsvParser::End() { // 将剩余内容当作最后一行(可能无换行符) if (!m_buffer.empty()) { std::vector fields; SplitByTab(m_buffer, fields); if (!fields.empty() && m_callback) { m_callback(m_lineIndex, fields); } m_buffer.clear(); } } void StreamingTsvParser::ProcessBuffer() { if (!m_callback) { return; } std::string line; size_t consumed = TakeLine(m_buffer, line); while (consumed > 0) { std::vector fields; SplitByTab(line, fields); if (!fields.empty()) { m_callback(m_lineIndex++, fields); } m_buffer.erase(0, consumed); consumed = TakeLine(m_buffer, line); } }