1 /*
2 SPDX-FileCopyrightText: 2009 Tobias Koenig <tokoe@kde.org>
3
4 SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6
7 #include "qcsvreader.h"
8
9 #include <KLocalizedString>
10 #include <QStringList>
11 #include <QTextCodec>
12 #include <QTextStream>
13 #include <QVector>
14
~QCsvBuilderInterface()15 QCsvBuilderInterface::~QCsvBuilderInterface()
16 {
17 }
18
19 class QCsvReaderPrivate
20 {
21 public:
QCsvReaderPrivate(QCsvBuilderInterface * builder)22 explicit QCsvReaderPrivate(QCsvBuilderInterface *builder)
23 : mBuilder(builder)
24 , mCodec(QTextCodec::codecForLocale())
25 {
26 }
27
28 void emitBeginLine(uint row);
29 void emitEndLine(uint row);
30 void emitField(const QString &data, int row, int column);
31
32 QCsvBuilderInterface *const mBuilder;
33 QTextCodec *mCodec = nullptr;
34 QChar mTextQuote = QLatin1Char('"');
35 QChar mDelimiter = QLatin1Char(' ');
36
37 uint mStartRow = 0;
38 bool mNotTerminated = true;
39 };
40
emitBeginLine(uint row)41 void QCsvReaderPrivate::emitBeginLine(uint row)
42 {
43 if ((row - mStartRow) > 0) {
44 mBuilder->beginLine();
45 }
46 }
47
emitEndLine(uint row)48 void QCsvReaderPrivate::emitEndLine(uint row)
49 {
50 if ((row - mStartRow) > 0) {
51 mBuilder->endLine();
52 }
53 }
54
emitField(const QString & data,int row,int column)55 void QCsvReaderPrivate::emitField(const QString &data, int row, int column)
56 {
57 if ((row - mStartRow) > 0) {
58 mBuilder->field(data, row - mStartRow - 1, column - 1);
59 }
60 }
61
QCsvReader(QCsvBuilderInterface * builder)62 QCsvReader::QCsvReader(QCsvBuilderInterface *builder)
63 : d(new QCsvReaderPrivate(builder))
64 {
65 Q_ASSERT(builder);
66 }
67
68 QCsvReader::~QCsvReader() = default;
69
read(QIODevice * device)70 bool QCsvReader::read(QIODevice *device)
71 {
72 enum State {
73 StartLine,
74 QuotedField,
75 QuotedFieldEnd,
76 NormalField,
77 EmptyField,
78 };
79
80 int row;
81 int column;
82
83 QString field;
84 QChar input;
85 State currentState = StartLine;
86
87 row = column = 1;
88
89 d->mBuilder->begin();
90
91 if (!device->isOpen()) {
92 d->emitBeginLine(row);
93 d->mBuilder->error(i18n("Device is not open"));
94 d->emitEndLine(row);
95 d->mBuilder->end();
96 return false;
97 }
98
99 QTextStream inputStream(device);
100 inputStream.setCodec(d->mCodec);
101
102 /**
103 * We use the following state machine to parse CSV:
104 *
105 * digraph {
106 * StartLine -> StartLine [label="\\r\\n"]
107 * StartLine -> QuotedField [label="Quote"]
108 * StartLine -> EmptyField [label="Delimiter"]
109 * StartLine -> NormalField [label="Other Char"]
110 *
111 * QuotedField -> QuotedField [label="\\r\\n"]
112 * QuotedField -> QuotedFieldEnd [label="Quote"]
113 * QuotedField -> QuotedField [label="Delimiter"]
114 * QuotedField -> QuotedField [label="Other Char"]
115 *
116 * QuotedFieldEnd -> StartLine [label="\\r\\n"]
117 * QuotedFieldEnd -> QuotedField [label="Quote"]
118 * QuotedFieldEnd -> EmptyField [label="Delimiter"]
119 * QuotedFieldEnd -> EmptyField [label="Other Char"]
120 *
121 * EmptyField -> StartLine [label="\\r\\n"]
122 * EmptyField -> QuotedField [label="Quote"]
123 * EmptyField -> EmptyField [label="Delimiter"]
124 * EmptyField -> NormalField [label="Other Char"]
125 *
126 * NormalField -> StartLine [label="\\r\\n"]
127 * NormalField -> NormalField [label="Quote"]
128 * NormalField -> EmptyField [label="Delimiter"]
129 * NormalField -> NormalField [label="Other Char"]
130 * }
131 */
132
133 while (!inputStream.atEnd() && d->mNotTerminated) {
134 inputStream >> input;
135
136 switch (currentState) {
137 case StartLine:
138 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
139 currentState = StartLine;
140 } else if (input == d->mTextQuote) {
141 d->emitBeginLine(row);
142 currentState = QuotedField;
143 } else if (input == d->mDelimiter) {
144 d->emitBeginLine(row);
145 d->emitField(field, row, column);
146 column++;
147 currentState = EmptyField;
148 } else {
149 d->emitBeginLine(row);
150 field.append(input);
151 currentState = NormalField;
152 }
153 break;
154 case QuotedField:
155 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
156 field.append(input);
157 currentState = QuotedField;
158 } else if (input == d->mTextQuote) {
159 currentState = QuotedFieldEnd;
160 } else if (input == d->mDelimiter) {
161 field.append(input);
162 currentState = QuotedField;
163 } else {
164 field.append(input);
165 currentState = QuotedField;
166 }
167 break;
168 case QuotedFieldEnd:
169 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
170 d->emitField(field, row, column);
171 field.clear();
172 d->emitEndLine(row);
173 column = 1;
174 row++;
175 currentState = StartLine;
176 } else if (input == d->mTextQuote) {
177 field.append(input);
178 currentState = QuotedField;
179 } else if (input == d->mDelimiter) {
180 d->emitField(field, row, column);
181 field.clear();
182 column++;
183 currentState = EmptyField;
184 } else {
185 d->emitField(field, row, column);
186 field.clear();
187 column++;
188 field.append(input);
189 currentState = EmptyField;
190 }
191 break;
192 case NormalField:
193 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
194 d->emitField(field, row, column);
195 field.clear();
196 d->emitEndLine(row);
197 row++;
198 column = 1;
199 currentState = StartLine;
200 } else if (input == d->mTextQuote) {
201 field.append(input);
202 currentState = NormalField;
203 } else if (input == d->mDelimiter) {
204 d->emitField(field, row, column);
205 field.clear();
206 column++;
207 currentState = EmptyField;
208 } else {
209 field.append(input);
210 currentState = NormalField;
211 }
212 break;
213 case EmptyField:
214 if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
215 d->emitField(QString(), row, column);
216 field.clear();
217 d->emitEndLine(row);
218 column = 1;
219 row++;
220 currentState = StartLine;
221 } else if (input == d->mTextQuote) {
222 currentState = QuotedField;
223 } else if (input == d->mDelimiter) {
224 d->emitField(QString(), row, column);
225 column++;
226 currentState = EmptyField;
227 } else {
228 field.append(input);
229 currentState = NormalField;
230 }
231 break;
232 }
233 }
234
235 if (currentState != StartLine) {
236 if (field.length() > 0) {
237 d->emitField(field, row, column);
238 ++row;
239 field.clear();
240 }
241 d->emitEndLine(row);
242 }
243
244 d->mBuilder->end();
245
246 return true;
247 }
248
setTextQuote(QChar textQuote)249 void QCsvReader::setTextQuote(QChar textQuote)
250 {
251 d->mTextQuote = textQuote;
252 }
253
textQuote() const254 QChar QCsvReader::textQuote() const
255 {
256 return d->mTextQuote;
257 }
258
setDelimiter(QChar delimiter)259 void QCsvReader::setDelimiter(QChar delimiter)
260 {
261 d->mDelimiter = delimiter;
262 }
263
delimiter() const264 QChar QCsvReader::delimiter() const
265 {
266 return d->mDelimiter;
267 }
268
setStartRow(uint startRow)269 void QCsvReader::setStartRow(uint startRow)
270 {
271 d->mStartRow = startRow;
272 }
273
startRow() const274 uint QCsvReader::startRow() const
275 {
276 return d->mStartRow;
277 }
278
setTextCodec(QTextCodec * textCodec)279 void QCsvReader::setTextCodec(QTextCodec *textCodec)
280 {
281 d->mCodec = textCodec;
282 }
283
textCodec() const284 QTextCodec *QCsvReader::textCodec() const
285 {
286 return d->mCodec;
287 }
288
terminate()289 void QCsvReader::terminate()
290 {
291 d->mNotTerminated = false;
292 }
293
294 class QCsvStandardBuilderPrivate
295 {
296 public:
QCsvStandardBuilderPrivate()297 QCsvStandardBuilderPrivate()
298 {
299 init();
300 }
301
302 void init();
303
304 QString mLastErrorString;
305 uint mRowCount;
306 uint mColumnCount;
307 QVector<QStringList> mRows;
308 };
309
init()310 void QCsvStandardBuilderPrivate::init()
311 {
312 mRows.clear();
313 mRowCount = 0;
314 mColumnCount = 0;
315 mLastErrorString.clear();
316 }
317
QCsvStandardBuilder()318 QCsvStandardBuilder::QCsvStandardBuilder()
319 : d(new QCsvStandardBuilderPrivate)
320 {
321 }
322
323 QCsvStandardBuilder::~QCsvStandardBuilder() = default;
324
lastErrorString() const325 QString QCsvStandardBuilder::lastErrorString() const
326 {
327 return d->mLastErrorString;
328 }
329
rowCount() const330 uint QCsvStandardBuilder::rowCount() const
331 {
332 return d->mRowCount;
333 }
334
columnCount() const335 uint QCsvStandardBuilder::columnCount() const
336 {
337 return d->mColumnCount;
338 }
339
data(uint row,uint column) const340 QString QCsvStandardBuilder::data(uint row, uint column) const
341 {
342 if (row > d->mRowCount || column > d->mColumnCount || column >= (uint)d->mRows[row].count()) {
343 return QString();
344 }
345
346 return d->mRows[row][column];
347 }
348
begin()349 void QCsvStandardBuilder::begin()
350 {
351 d->init();
352 }
353
beginLine()354 void QCsvStandardBuilder::beginLine()
355 {
356 d->mRows.append(QStringList());
357 d->mRowCount++;
358 }
359
field(const QString & data,uint row,uint column)360 void QCsvStandardBuilder::field(const QString &data, uint row, uint column)
361 {
362 const uint size = d->mRows[row].size();
363 if (column >= size) {
364 for (uint i = column; i < size + 1; ++i) {
365 d->mRows[row].append(QString());
366 }
367 }
368
369 d->mRows[row][column] = data;
370
371 d->mColumnCount = qMax(d->mColumnCount, column + 1);
372 }
373
endLine()374 void QCsvStandardBuilder::endLine()
375 {
376 }
377
end()378 void QCsvStandardBuilder::end()
379 {
380 }
381
error(const QString & errorMsg)382 void QCsvStandardBuilder::error(const QString &errorMsg)
383 {
384 d->mLastErrorString = errorMsg;
385 }
386