1 /*
2 * CsvParser.hpp
3 *
4 * Copyright (C) 2021 by RStudio, PBC
5 *
6 * Unless you have received this program directly from RStudio pursuant
7 * to the terms of a commercial license agreement with RStudio, then
8 * this program is licensed to you under the terms of version 3 of the
9 * GNU Affero General Public License. This program is distributed WITHOUT
10 * ANY EXPRESS OR IMPLIED WARRANTY, INCLUDING THOSE OF NON-INFRINGEMENT,
11 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. Please refer to the
12 * AGPL (http://www.gnu.org/licenses/agpl-3.0.txt) for more details.
13 *
14 */
15
16 #ifndef CSV_PARSER_HPP
17 #define CSV_PARSER_HPP
18
19 #include <string>
20 #include <vector>
21 #include <boost/algorithm/string/replace.hpp>
22
23 namespace rstudio {
24 namespace core {
25 namespace text {
26
27 /*
28 Parses up to one line of CSV data. Empty lines will be skipped.
29
30 The value returned is a pair consisting of the line that was
31 successfully parsed and an iterator value that indicates where
32 parsing should begin next time.
33
34 If less than one line of CSV data is available, an empty vector
35 is returned.
36
37 This implementation is RFC4180 compliant.
38
39 Note that if parseCsvLine is called in a loop, the termination
40 condition should be that the returned vector is empty, NOT that
41 the returned iterator == end. (In the case of malformed or
42 incomplete CSV data that do not end with a line break, the
43 returned iterator will never move past the beginning of the
44 last line.)
45 */
46
47 template <typename InputIterator>
parseCsvLine(InputIterator begin,InputIterator end,bool allowMissingEOL=false)48 std::pair<std::vector<std::string>, InputIterator> parseCsvLine(
49 InputIterator begin,
50 InputIterator end,
51 bool allowMissingEOL = false)
52 {
53 std::vector<std::string> line;
54
55 bool inQuote = false;
56
57 std::string element;
58
59 InputIterator pos = begin;
60 while (pos != end)
61 {
62 bool noIncrement = false;
63
64 if (inQuote)
65 {
66 if (*pos == '"')
67 {
68 if (++pos != end)
69 {
70 if (*pos == '"')
71 {
72 element.push_back('"');
73 ++pos;
74 continue;
75 }
76 }
77 noIncrement = true;
78 inQuote = false;
79 }
80 else
81 {
82 element.push_back(*pos);
83 }
84 }
85 else // not in quote
86 {
87 if (*pos == '"')
88 {
89 // starting a quote
90 element.clear();
91 inQuote = true;
92 }
93 else if (*pos == ',')
94 {
95 line.push_back(element);
96 element.clear();
97 }
98 else if (*pos == '\r')
99 {
100 // ignore--expect a \n next
101 }
102 else if (*pos == '\n')
103 {
104 if (!element.empty() || !line.empty())
105 {
106 line.push_back(element);
107 element.clear();
108 }
109
110 begin = ++pos;
111 noIncrement = true;
112
113 // don't return blank lines
114 if (!line.empty())
115 {
116 return std::pair<std::vector<std::string>, InputIterator>(
117 line, begin);
118 }
119 }
120 else
121 {
122 element.push_back(*pos);
123 }
124 }
125
126 if (!noIncrement)
127 ++pos;
128 }
129
130 // if we got here, we failed to find a (terminating) newline
131 if (allowMissingEOL)
132 {
133 line.push_back(element);
134 return std::pair<std::vector<std::string>, InputIterator>(line, end);
135 }
136
137 return std::pair<std::vector<std::string>, InputIterator>(
138 std::vector<std::string>(), begin);
139 }
140
141 template <typename InputIterator>
parseCsvLine(InputIterator begin,InputIterator end,bool allowMissingEOL,std::vector<std::string> * pParsedCsv)142 InputIterator parseCsvLine(InputIterator begin,
143 InputIterator end,
144 bool allowMissingEOL,
145 std::vector<std::string>* pParsedCsv)
146 {
147 std::pair<std::vector<std::string>, InputIterator> parsed =
148 parseCsvLine(begin, end, allowMissingEOL);
149 *pParsedCsv = parsed.first;
150 return parsed.second;
151 }
152
153
154 // Encodes a vector of string values to a line of RFC4180 CSV.
155 //
156 // Note that it's the caller's responsibility to add a terminating newline.
encodeCsvLine(const std::vector<std::string> & values)157 inline std::string encodeCsvLine(const std::vector<std::string>& values)
158 {
159 std::string line;
160 for (unsigned i = 0; i < values.size(); i++)
161 {
162 // escape quotes if needed
163 std::string val(values[i]);
164 boost::algorithm::replace_all(val, "\"", "\"\"");
165
166 // add to the line, with a comma if there are additional values
167 line.append("\"" + val + "\"");
168 if (i < values.size() - 1)
169 line.append(",");
170 }
171 return line;
172 }
173
174 } // namespace text
175 } // namespace core
176 } // namespace rstudio
177
178 #endif // CSV_PARSER_HPP
179