如何在C++中解析逗号分隔的字符串,其中一些元素用逗号引用



我有一个逗号分隔的字符串,我想将其存储在字符串向量中。字符串和矢量为:

string s = "1, 10, 'abc', 'test, 1'";
vector<string> v;

理想情况下,我希望字符串"abc"one_answers"test,1"存储时不带单引号,如下所示,但我可以使用单引号存储它们:

v[0] = "1";
v[1] = "10";
v[2] = "abc";
v[3] = "test, 1";
bool nextToken(const string &s, string::size_type &start, string &token)
{
token.clear();

start = s.find_first_not_of(" t", start);
if (start == string::npos)
return false;

string::size_type end;

if (s[start] == ''')
{
++start;
end = s.find(''', start);
}
else
end = s.find_first_of(" t,", start);

if (end == string::npos)
{
token = s.substr(start);
start = s.size();
}
else
{
token = s.substr(start, end-start);
if ((s[end] != ',') && ((end = s.find(',', end + 1)) == string::npos))
start = s.size();
else
start = end + 1;
}

return true;
}
string s = "1, 10, 'abc', 'test, 1'", token;
vector<string> v;

string::size_type start = 0;
while (nextToken(s, start, token))
v.push_back(token);

演示

这里你需要做的是让自己成为一个解析器,可以随心所欲地进行解析

#include <string>
#include <vector>
using namespace std;
vector<string> parse_string(string master) {
char temp; //the current character
bool encountered = false; //for checking if there is a single quote
string curr_parse; //the current string
vector<string>result; //the return vector
for (int i = 0; i < master.size(); ++i) { //while still in the string
temp = master[i]; //current character
switch (temp) { //switch depending on the character
case ''': //if the character is a single quote

if (encountered) encountered = false; //if we already found a single quote, reset encountered
else encountered = true; //if we haven't found a single quote, set encountered to true
[[fallthrough]];
case ',': //if it is a comma
if (!encountered) { //if we have not found a single quote
result.push_back(curr_parse); //put our current string into our vector
curr_parse = ""; //reset the current string
break; //go to next character
}//if we did find a single quote, go to the default, and push_back the comma
[[fallthrough]];
default: //if it is a normal character
if (encountered && isspace(temp)) curr_parse.push_back(temp); //if we have found a single quote put the whitespace, we don't care
else if (isspace(temp)) break; //if we haven't found a single quote, trash the  whitespace and go to the next character
else if (temp == ''') break; //if the current character is a single quote, trash it and go to the next character.
else curr_parse.push_back(temp); //if all of the above failed, put the character into the current string
break; //go to the next character
}
}
for (int i = 0; i < result.size(); ++i) { 
if (result[i] == "") result.erase(result.begin() + i);  
//check that there are no empty strings in the vector
//if there are, delete them
}
return result;
}

这将根据需要解析字符串,并返回一个向量。然后,你可以在你的程序中使用它:

#include <iostream>
int main() {
string s = "1, 10, 'abc', 'test, 1'";
vector<string> v = parse_string(s);
for (int i = 0; i < v.size(); ++i) {
cout << v[i] << endl;
}
}

它正确地打印出:

1
10
abc
test, 1

一个合适的解决方案需要解析器实现。如果你需要一个快速破解,只需编写一个单元格读取功能(演示(。c++14的std::quoted操作器在这里有很大的帮助。唯一的问题是操纵器需要一个流。使用istringstream可以很容易地解决这个问题——请参阅第二个函数。请注意,字符串的格式为CELL COMMA CELL COMMA... CELL

istream& get_cell(istream& is, string& s)
{
char c;
is >> c; // skips ws
is.unget(); // puts back in the stream the last read character
if (c == ''')
return is >> quoted(s, ''', '\'); // the first character of the cell is ' - read quoted
else
return getline(is, s, ','), is.unget(); // read unqoted, but put back comma - we need it later, in get function
}

vector<string> get(const string& s)
{
istringstream iss{ s };
string cell;
vector<string> r;
while (get_cell(iss, cell))
{
r.push_back( cell );
char comma;
iss >> comma; // expect a cell separator
if (comma != ',')
break; // cell separator not found; we are at the end of stream/string - break the loop
}
if (char c; iss >> c) // we reached the end of what we understand - probe the end of stream
throw "ill formed";
return r;
}

这就是你使用它的方式:

int main()
{
string s = "1, 10, 'abc', 'test, 1'";
try
{
auto v = get(s);
}
catch (const char* e)
{
cout << e;
}
}

最新更新