标题:[原创]CSV Parser
取消只看楼主
myajax95
Rank: 16Rank: 16Rank: 16Rank: 16
等 级:版主
威 望:30
帖 子:2978
专家分:0
注 册:2006-3-5
 问题点数:0 回复次数:2 
[原创]CSV Parser

在以前公司的时候用MFC container写过一个处理CSV文件的简单的Parser。换到新公司之后公司有条要求,就是能用STL Container的就不用MFC container。于是刚刚用STL重写了一遍,如果大家觉得有用不妨用用。欢迎帮我查查错。

CSV格式指的是用逗号分隔的字符串(Comma sepatate value)。在读取简单文本数据时应用最为广泛。(数据复杂是一般存储为XML格式。)任何用只有一层结构的数据,或者Class,Object都可以很方便的用CSV读取。

在读写CSV时要注意逗号和引号,如果一列数据已经有一个逗号,就不能再简单的用逗号作分割符,这一列数据会被自动加上""。同样如果一列数据中如果用引号也会造成混淆。CSV会在没个引号后附加一个引号。

我写的Class CCSVLineParser主要用一下几个函数:GetAt(), SetAt(), GetFullString(), SetFullString(), size()。分别是读,写某一列,读写整行和得到总列数。我没有用operator overloading重载符号“[]"而用了GetAt, SetAt()是wo我扩展了一点SetAt()的功能。GetAt(int intIndex)返回所制定的列的字符,如果intIndex越界会throw exception。而SetAt()只有在index小于零时throw exception,如果所制定的列数index过大,CSV会自动括展至这一列。同时表示整行字符的变量也会更新。
把下面的一个字符存在.csv文件然后用excel打开,你会发现他是5列。和程序的接过一样

下面是测试程序:

程序代码:

#include \"stdafx.h\"
#include \"csvlineparser.h\"

// basic_string_erase.cpp
// compile with: /EHsc
#include <string>
#include <iostream>

int _tmain(int argc, _TCHAR* argv[])
{
using namespace std;
string str1 ( \"a,\\"b\\"\\"sassaa\\",\\"aw\\"\\",\\"\\"w\\"\\"jj\\",asd,\\"axsxs\\"\" );
CCSVLineParser csv(str1);

try
{
for (int i = 0; i < (int)csv.size()+1; i++)
{
cout << csv.GetAt(i) << endl;
}
}
catch(CCSVLineParser::Range ex)
{
cout << ex.sText << endl;
}
catch(...)
{
}

cout << str1 << endl;
cout << csv.GetFullString(false) << endl;
cout << csv.GetFullString() << endl;

csv.SetAt(6, string(\"haha\\",\\"haha\"));
cout << csv.GetFullString(false) << endl;
cout << csv.GetFullString() << endl;
}


下面是.h和.cpp
CSVLineParser.h


#pragma once

#include <vector>
#include <string>
using namespace std;

class CCSVColumn
{
public:
CCSVColumn(string sDisplay);
CCSVColumn(string sActual, string sDisplay): m_sActual(sActual), m_sDisplay(sDisplay) {}
private:
string m_sDisplay;
string m_sActual;
friend class CCSVLineParser;
};

class CCSVLineParser
{
public:
CCSVLineParser(string sFullString = \"\");
~CCSVLineParser(void);
size_t size(void);
string GetAt(int intIndex, bool bActual = false);
void SetFullString(string sLine);
void SetAt(int intIndex, string & sColumn);
string GetFullString(bool bActual = true);
bool swapColumn(int intCol1, int intCol2);
class Range
{
public:
Range() : sText(\"Out of range\") {};
string sText;
};
private:
void Read(string sFullString);
vector<CCSVColumn> m_vecElements;
static void PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber);
friend class CCSVColumn;
};




CSVLineParser.cpp
程序代码:

#include \"StdAfx.h\"
#include \".\csvlineparser.h\"

//===============================================================================
#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif
//===============================================================================

CCSVColumn::CCSVColumn(string sDisplay) : m_sDisplay(sDisplay)
{
int intStartPosition=0, intQuoteLocation;

if ((int)sDisplay.find(',') >= 0)
m_sActual += \"\\"\";

while ((intQuoteLocation = (int)sDisplay.find('\"', intStartPosition)) >= 0)
{
m_sActual += string(sDisplay, intStartPosition, intQuoteLocation);
m_sActual += '\"';
intStartPosition = intQuoteLocation+1;
}
m_sActual += string(sDisplay, intStartPosition, (int)sDisplay.size());

if ((int)sDisplay.find(',') >= 0)
m_sActual += \"\\"\";
}

CCSVLineParser::CCSVLineParser(string sFullString)
{
Read(sFullString);
}

CCSVLineParser::~CCSVLineParser(void)
{
}

void CCSVLineParser::SetFullString(string strLine)
{
Read(strLine);
}

void CCSVLineParser::Read(string sFullString)
{
int intFindLocation, intStartLocation, intQuoteNumber;
string sRemaining = sFullString, sDelimiter, sActual, sDisplay, sColumn;
char tmp[200];

m_vecElements.clear();

if (sRemaining.empty())
return;

if (sRemaining.at(0) != '\"')
sDelimiter = \",\";
else
{
sDelimiter = \"\\",\";
sRemaining.erase(0, 1);
}

intStartLocation = 0;
PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);
while ((intFindLocation = (int)sRemaining.find(sDelimiter)) >= 0)
{
sDisplay= string(sRemaining, 0, intFindLocation);
sDisplay = sColumn + sDisplay;
sRemaining.erase(0, intFindLocation+sDelimiter.size());
if (sDelimiter.size() > 1)
intQuoteNumber += 2;
sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);
strcpy(tmp, sActual.c_str());

m_vecElements.push_back(CCSVColumn(sActual, sDisplay));
intStartLocation += (int)sDisplay.size()+intQuoteNumber+1;
// looking for next delimiter
if (*sRemaining.begin() != '\"')
sDelimiter = \",\";
else
{
sDelimiter = \"\\",\";
sRemaining.erase(0, 1);
}
PreReadForQuote(sColumn, sRemaining, sDelimiter, intQuoteNumber);
}
// grab the last column
if (sDelimiter.size() > 1)
{
sRemaining.erase(sRemaining.size()-1 , sRemaining.size()-1);
intQuoteNumber += 2;
}
sDisplay = sColumn + sRemaining;
sActual =string(sFullString, intStartLocation, (int)sDisplay.size()+intQuoteNumber);
m_vecElements.push_back(CCSVColumn(sActual, sDisplay));

return;
}

size_t CCSVLineParser::size(void)
{
return m_vecElements.size();
}


string CCSVLineParser::GetAt(int intIndex, bool bActual)
{
if (intIndex < 0 || intIndex >= (int)m_vecElements.size())
throw Range();

return bActual ? m_vecElements[intIndex].m_sActual : m_vecElements[intIndex].m_sDisplay;
}

void CCSVLineParser::PreReadForQuote(string &sColumn, string &sFullString, string sDelimiter, int &intQuoteNumber)
{
int intQuoteLocation, intDelimiterLocation;
char chararQuote[] = \"\\"\\"\";
string sPrefix;

sColumn.clear();
intQuoteNumber = 0;

if (sFullString.empty() || sDelimiter.empty())
return;

intQuoteLocation = (int)sFullString.find(chararQuote);
intDelimiterLocation = (int)sFullString.find(sDelimiter);

while (intQuoteLocation >= 0 && (intQuoteLocation < intDelimiterLocation || intDelimiterLocation < 0))
{
sPrefix = sFullString;
sPrefix.erase(intQuoteLocation+1 , sPrefix.size()-1 );
sFullString.erase(0, intQuoteLocation+sizeof(chararQuote)/sizeof(char)-1);
sColumn += sPrefix;
intQuoteLocation = (int)sFullString.find(chararQuote);
intDelimiterLocation = (int)sFullString.find(sDelimiter);
intQuoteNumber++;
}
return;
}

void CCSVLineParser::SetAt(int intIndex, string & sColumn)
{
if (intIndex < 0)
throw Range();

// automatically expand to the column
while ((int)m_vecElements.size() < intIndex+1)
m_vecElements.push_back(CCSVColumn(\"\"));
m_vecElements[intIndex] = CCSVColumn(sColumn);
}

string CCSVLineParser::GetFullString(bool bActual)
{
string sFullString;

for (int intColumnCount = 0; intColumnCount < (int)size(); intColumnCount++)
{
if (intColumnCount > 0)
sFullString += ',';

if (bActual)
sFullString += m_vecElements[intColumnCount].m_sActual;
else
sFullString += m_vecElements[intColumnCount].m_sDisplay;
}
return sFullString;
}

// column has to exist.
bool CCSVLineParser::swapColumn(int intCol1, int intCol2)
{
if (intCol1 < 0 || intCol2 < 0 || intCol1 >= (int)size() || intCol2 >= (int)size())
return false;

if (intCol1 == intCol2)
return true;

swap(m_vecElements[intCol1], m_vecElements[intCol2]);
string a=m_vecElements[intCol1].m_sDisplay, b=m_vecElements[intCol2].m_sDisplay;
return true;
}

[此贴子已经被作者于2006-7-15 3:04:38编辑过]

搜索更多相关主题的帖子: Parser CSV 
2006-04-01 04:19
myajax95
Rank: 16Rank: 16Rank: 16Rank: 16
等 级:版主
威 望:30
帖 子:2978
专家分:0
注 册:2006-3-5
得分:0 
修了一个bug,同时加个两列交换的功能。

http://myajax95./
2006-06-04 07:16
myajax95
Rank: 16Rank: 16Rank: 16Rank: 16
等 级:版主
威 望:30
帖 子:2978
专家分:0
注 册:2006-3-5
得分:0 
bug fix: 连续重复适用这个class 的object时,如果一个string是空,前面的text不被清除。
void CCSVLineParser::Read(string sFullString)

http://myajax95./
2006-07-15 03:05



参与讨论请移步原网站贴子:https://bbs.bccn.net/thread-54288-1-1.html




关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.296897 second(s), 8 queries.
Copyright©2004-2025, BCCN.NET, All Rights Reserved