标准CSV解析
很方便的csv解析,可以解析帶有分隔符的字段,可以解析成map方便程序使用。
// // CCSVParse.hpp // CPPAlgorithm // // Created by xujw on 16/2/26. // Copyright ? 2016年 xujw. All rights reserved. //#ifndef CCSVParse_h #define CCSVParse_h#include <stdio.h> #include <vector> #include <string> #include <map> #include <iostream> #include <sstream>/** 全局函數(shù),轉換數(shù)據(jù)類型* toInt* toFloat* toDouble*/ int conToInt(std::string &source); float conToFloat(std::string &source); double conToDouble(std::string &source); //轉成std::string std::string conToString(int s); std::string conToString(float s); std::string conToString(double s);class CCSVParse { public:CCSVParse();~CCSVParse();/** 解析成map形式(數(shù)據(jù)要保證第一行為數(shù)據(jù)類型名,第一列為id名,id不可重復)* 使用時通過id獲取一行數(shù)據(jù)(也是map形式),然后通過類型名作為key獲取數(shù)據(jù)* 格式如下:* id name age* 1 小明 20* 2 小紅 19* 3 小光 18*/std::map<std::string, std::map<std::string, std::string> > parseCsvFileToMap(const std::string &fileName,const std::string &separator = ",");//解析出行列數(shù)據(jù) separtor只能是一個字符(比如 , # 等)std::vector< std::vector<std::string> > parseCsvFile(const std::string &fileName,const std::string &separator = ",");//打印出解析的數(shù)據(jù) 測試用void printParseData() const;inline size_t getRowNum() const {return _gridData.size();};inline void useSimpleModel(bool flag){_useSimpleModel = flag;};/*分隔字符串str:要分隔的字符串/文件seperator:分隔符*/std::vector<std::string> splitString(const std::string &str,const std::string &sparator);std::string loadCsvFile(const std::string &fileName);private://原始數(shù)據(jù)std::vector< std::vector<std::string> > _gridData;bool _useSimpleModel; //是否使用簡單模式 };#endif /* CCSVParse_h */// // CCSVParse.cpp // CPPAlgorithm // // Created by xujw on 16/2/26. // Copyright ? 2016年 xujw. All rights reserved. //#include "CCSVParse.h" #include <assert.h>CCSVParse::CCSVParse():_useSimpleModel(false){} CCSVParse::~CCSVParse(){}std::vector<std::string> CCSVParse::splitString(const std::string &str, const std::string &separator) {std::vector<std::string> resVec;if ("" == str){return resVec;}//方便截取最后一段數(shù)據(jù)std::string dataStr = str + separator;size_t pos = dataStr.find(separator);size_t size = dataStr.size();while (pos != std::string::npos){std::string x = dataStr.substr(0,pos);resVec.push_back(x);dataStr = dataStr.substr(pos+1,size);pos = dataStr.find(separator);}return resVec;// //Method 2 // size_t nStartPosFound = str.find(separator, 0); // size_t nFieldStart = 0; // for (; nStartPosFound != -1; nStartPosFound = str.find(separator, nStartPosFound)) // { // std::string strSub = str.substr(nFieldStart, nStartPosFound - nFieldStart); // nStartPosFound = nStartPosFound + separator.size(); // nFieldStart = nStartPosFound; // // // resVec.push_back(strSub); // } // // // 加入最后一個字段 // if (nFieldStart < str.size()) // { // std::string strSub = str.substr(nFieldStart, str.size() - nFieldStart); // resVec.push_back(strSub); // } // return resVec;}std::string CCSVParse::loadCsvFile(const std::string &fileName) {FILE *pFile = fopen(fileName.c_str(), "rb");if (0 == pFile){return "";}fseek(pFile, 0, SEEK_END); //指針移動到文件結尾long len = ftell(pFile); //獲取文件大小char *pBuffer = new char[len+1];fseek(pFile, 0, SEEK_SET); //指針移動到文件開頭fread(pBuffer, 1, len, pFile); //讀取文件fclose(pFile);//等價于std::string s;s.assign(pBuffer,len);pBuffer[len] = 0;std::string strRead(pBuffer,len);delete [] pBuffer;return strRead; }std::vector<std::vector<std::string>> CCSVParse::parseCsvFile(const std::string &fileName,const std::string &separator) {clock_t before = clock();_gridData.clear();std::string strAllData = loadCsvFile(fileName);if (strAllData.size() == 0){return _gridData;}//分隔符只能是一個字符assert(separator.size() == 1);//簡易模式,字段里面不能包含分隔符if (_useSimpleModel){std::cout<<"使用簡易模式解析"<<std::endl;//分出行和字段std::vector<std::string> ret = splitString(strAllData, "\n");for (size_t i=0; i<ret.size(); i++){std::vector<std::string> rowData = splitString(ret.at(i), separator);_gridData.push_back(rowData);}return _gridData;}//標準模式,字段里面可以包含分隔符//定義狀態(tài)typedef enum stateType{kNewFieldStart = 0, //新字段開始kNonQuotesField, //非引號字段kQuotesField, //引號字段kFieldSeparator, //字段分隔kQuoteInQuotesField, //引號字段中的引號kRowSeparator, //行分隔符(回車)kError //語法錯誤}StateType;//分出行std::vector<std::string> vecRows = splitString(strAllData, "\n");for (int i=0; i<vecRows.size(); i++){//一行一行處理std::string strRowData = vecRows.at(i);if (0 == strRowData.size()){continue;}std::vector< std::string > vecFields;std::string strField;StateType state = kNewFieldStart;for (int j=0; j<strRowData.size(); j++){const char &ch = strRowData.at(j);switch ( state ){case kNewFieldStart:{if (ch == '"'){state = kQuotesField;}else if (ch == separator.at(0)){vecFields.push_back("");state = kFieldSeparator;}else if (ch == '\r' || ch == '\n'){state = kRowSeparator;}else{strField.push_back(ch);state = kNonQuotesField;}}break;case kNonQuotesField:{if (ch == separator.at(0)){vecFields.push_back(strField);strField.clear();state = kFieldSeparator;}else if (ch == '\r' || ch == '\n'){vecFields.push_back(strField);state = kRowSeparator;}else{strField.push_back(ch);}}break;case kQuotesField:{if (ch == '"'){state = kQuoteInQuotesField;}else{strField.push_back(ch);}}break;case kFieldSeparator:{if (ch == separator.at(0)){vecFields.push_back("");}else if (ch == '"'){strField.clear();state = kQuotesField;}else if (ch == '\r' || ch == '\n'){vecFields.push_back("");state = kRowSeparator;}else{strField.push_back(ch);state = kNonQuotesField;}}break;case kQuoteInQuotesField:{if (ch == separator.at(0)){//引號閉合vecFields.push_back(strField);strField.clear();state = kFieldSeparator;}else if (ch == '\r' || ch == '\n'){vecFields.push_back(strField);state = kRowSeparator;}else if (ch == '"'){//轉義引號strField.push_back(ch);state = kQuotesField;}else{//引號字段里包含引號時,需要對內引號進行加引號轉義std::cout<<"語法錯誤: 轉義字符 \" 不能完成轉義 或 引號字段結尾引號沒有緊貼字段分隔符"<<std::endl;assert(false);}}break;case kRowSeparator:{_gridData.push_back(vecFields);continue;}break;case kError:{}break;default:break;}}switch (state){case kNonQuotesField:{vecFields.push_back(strField);_gridData.push_back(vecFields);}break;case kQuoteInQuotesField:{vecFields.push_back(strField);_gridData.push_back(vecFields);}break;case kFieldSeparator:{vecFields.push_back("");_gridData.push_back(vecFields);}break;case kRowSeparator:{_gridData.push_back(vecFields);}break;default:break;}}float used = (float)(clock()-before)/CLOCKS_PER_SEC;std::cout<<"解析此csv花費時間:"<<used<<"S"<<std::endl;return _gridData; }void CCSVParse::printParseData() const {std::cout<<"以下是解析的csv數(shù)據(jù):"<<std::endl;std::cout<<"row counts:"<<_gridData.size()<<std::endl;for (int row=0; row<_gridData.size(); row++){std::vector<std::string> rowData = _gridData.at(row);for (int col = 0; col<rowData.size(); col++){std::cout<<rowData.at(col)<<"\t";}std::cout<<"\n"<<std::endl;}; }std::map<std::string, std::map<std::string, std::string> > CCSVParse::parseCsvFileToMap(const std::string &fileName,const std::string &separator) {//先獲取所有的行列數(shù)據(jù)std::vector<std::vector<std::string>> allData = parseCsvFile(fileName,separator);//轉為字典形式std::map<std::string, std::map<std::string, std::string> > mapAllData;for (size_t i=1; i<allData.size(); i++){std::vector<std::string> rowData = allData.at(i);//數(shù)據(jù)第一行為數(shù)據(jù)類型keystd::vector<std::string> keyData = allData.at(0);std::map<std::string, std::string> mapRow;for (int i=0; i<keyData.size(); i++){std::string key = keyData.at(i);std::string value = rowData.at(i);mapRow[key] = value;}//每一行數(shù)據(jù)的第一列是idmapAllData[rowData.at(0)] = mapRow;}return mapAllData; }#pragma mark--全局函數(shù) 類型轉換 int conToInt(std::string &source) {std::stringstream ss;int res;ss<<source;ss>>res;return res; } float conToFloat(std::string &source) {std::stringstream ss;float res;ss<<source;ss>>res;return res; } double conToDouble(std::string &source) {std::stringstream ss;double res;ss<<source;ss>>res;return res; }std::string conToString(int s) {std::stringstream ss;std::string res;ss<<s;ss>>res;return res; } std::string conToString(float s) {std::stringstream ss;std::string res;ss<<s;ss>>res;return res; } std::string conToString(double s) {std::stringstream ss;std::string res;ss<<s;ss>>res;return res; }測試:
test.csv:
fid,name,age
1,小明,20
2,小剛,20
3,小紅,19
解析結果:
1 小明 20
2 小剛 20
3 小紅 19
下載鏈接:百度云盤下載
轉載于:https://www.cnblogs.com/skyxu123/p/9543806.html
總結
- 上一篇: 7、继承
- 下一篇: HBase 1.1.2 优化插入 Reg