1 /*
2   SPDX-FileCopyrightText: 2009 Tobias Koenig <tokoe@kde.org>
3 
4   SPDX-License-Identifier: LGPL-2.0-or-later
5 */
6 
7 #include "qcsvreader.h"
8 
9 #include <KLocalizedString>
10 #include <QStringList>
11 #include <QTextCodec>
12 #include <QTextStream>
13 #include <QVector>
14 
~QCsvBuilderInterface()15 QCsvBuilderInterface::~QCsvBuilderInterface()
16 {
17 }
18 
19 class QCsvReaderPrivate
20 {
21 public:
QCsvReaderPrivate(QCsvBuilderInterface * builder)22     explicit QCsvReaderPrivate(QCsvBuilderInterface *builder)
23         : mBuilder(builder)
24         , mCodec(QTextCodec::codecForLocale())
25     {
26     }
27 
28     void emitBeginLine(uint row);
29     void emitEndLine(uint row);
30     void emitField(const QString &data, int row, int column);
31 
32     QCsvBuilderInterface *const mBuilder;
33     QTextCodec *mCodec = nullptr;
34     QChar mTextQuote = QLatin1Char('"');
35     QChar mDelimiter = QLatin1Char(' ');
36 
37     uint mStartRow = 0;
38     bool mNotTerminated = true;
39 };
40 
emitBeginLine(uint row)41 void QCsvReaderPrivate::emitBeginLine(uint row)
42 {
43     if ((row - mStartRow) > 0) {
44         mBuilder->beginLine();
45     }
46 }
47 
emitEndLine(uint row)48 void QCsvReaderPrivate::emitEndLine(uint row)
49 {
50     if ((row - mStartRow) > 0) {
51         mBuilder->endLine();
52     }
53 }
54 
emitField(const QString & data,int row,int column)55 void QCsvReaderPrivate::emitField(const QString &data, int row, int column)
56 {
57     if ((row - mStartRow) > 0) {
58         mBuilder->field(data, row - mStartRow - 1, column - 1);
59     }
60 }
61 
QCsvReader(QCsvBuilderInterface * builder)62 QCsvReader::QCsvReader(QCsvBuilderInterface *builder)
63     : d(new QCsvReaderPrivate(builder))
64 {
65     Q_ASSERT(builder);
66 }
67 
68 QCsvReader::~QCsvReader() = default;
69 
read(QIODevice * device)70 bool QCsvReader::read(QIODevice *device)
71 {
72     enum State {
73         StartLine,
74         QuotedField,
75         QuotedFieldEnd,
76         NormalField,
77         EmptyField,
78     };
79 
80     int row;
81     int column;
82 
83     QString field;
84     QChar input;
85     State currentState = StartLine;
86 
87     row = column = 1;
88 
89     d->mBuilder->begin();
90 
91     if (!device->isOpen()) {
92         d->emitBeginLine(row);
93         d->mBuilder->error(i18n("Device is not open"));
94         d->emitEndLine(row);
95         d->mBuilder->end();
96         return false;
97     }
98 
99     QTextStream inputStream(device);
100     inputStream.setCodec(d->mCodec);
101 
102     /**
103      * We use the following state machine to parse CSV:
104      *
105      * digraph {
106      *   StartLine -> StartLine [label="\\r\\n"]
107      *   StartLine -> QuotedField [label="Quote"]
108      *   StartLine -> EmptyField [label="Delimiter"]
109      *   StartLine -> NormalField [label="Other Char"]
110      *
111      *   QuotedField -> QuotedField [label="\\r\\n"]
112      *   QuotedField -> QuotedFieldEnd [label="Quote"]
113      *   QuotedField -> QuotedField [label="Delimiter"]
114      *   QuotedField -> QuotedField [label="Other Char"]
115      *
116      *   QuotedFieldEnd -> StartLine [label="\\r\\n"]
117      *   QuotedFieldEnd -> QuotedField [label="Quote"]
118      *   QuotedFieldEnd -> EmptyField [label="Delimiter"]
119      *   QuotedFieldEnd -> EmptyField [label="Other Char"]
120      *
121      *   EmptyField -> StartLine [label="\\r\\n"]
122      *   EmptyField -> QuotedField [label="Quote"]
123      *   EmptyField -> EmptyField [label="Delimiter"]
124      *   EmptyField -> NormalField [label="Other Char"]
125      *
126      *   NormalField -> StartLine [label="\\r\\n"]
127      *   NormalField -> NormalField [label="Quote"]
128      *   NormalField -> EmptyField [label="Delimiter"]
129      *   NormalField -> NormalField [label="Other Char"]
130      * }
131      */
132 
133     while (!inputStream.atEnd() && d->mNotTerminated) {
134         inputStream >> input;
135 
136         switch (currentState) {
137         case StartLine:
138             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
139                 currentState = StartLine;
140             } else if (input == d->mTextQuote) {
141                 d->emitBeginLine(row);
142                 currentState = QuotedField;
143             } else if (input == d->mDelimiter) {
144                 d->emitBeginLine(row);
145                 d->emitField(field, row, column);
146                 column++;
147                 currentState = EmptyField;
148             } else {
149                 d->emitBeginLine(row);
150                 field.append(input);
151                 currentState = NormalField;
152             }
153             break;
154         case QuotedField:
155             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
156                 field.append(input);
157                 currentState = QuotedField;
158             } else if (input == d->mTextQuote) {
159                 currentState = QuotedFieldEnd;
160             } else if (input == d->mDelimiter) {
161                 field.append(input);
162                 currentState = QuotedField;
163             } else {
164                 field.append(input);
165                 currentState = QuotedField;
166             }
167             break;
168         case QuotedFieldEnd:
169             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
170                 d->emitField(field, row, column);
171                 field.clear();
172                 d->emitEndLine(row);
173                 column = 1;
174                 row++;
175                 currentState = StartLine;
176             } else if (input == d->mTextQuote) {
177                 field.append(input);
178                 currentState = QuotedField;
179             } else if (input == d->mDelimiter) {
180                 d->emitField(field, row, column);
181                 field.clear();
182                 column++;
183                 currentState = EmptyField;
184             } else {
185                 d->emitField(field, row, column);
186                 field.clear();
187                 column++;
188                 field.append(input);
189                 currentState = EmptyField;
190             }
191             break;
192         case NormalField:
193             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
194                 d->emitField(field, row, column);
195                 field.clear();
196                 d->emitEndLine(row);
197                 row++;
198                 column = 1;
199                 currentState = StartLine;
200             } else if (input == d->mTextQuote) {
201                 field.append(input);
202                 currentState = NormalField;
203             } else if (input == d->mDelimiter) {
204                 d->emitField(field, row, column);
205                 field.clear();
206                 column++;
207                 currentState = EmptyField;
208             } else {
209                 field.append(input);
210                 currentState = NormalField;
211             }
212             break;
213         case EmptyField:
214             if (input == QLatin1Char('\r') || input == QLatin1Char('\n')) {
215                 d->emitField(QString(), row, column);
216                 field.clear();
217                 d->emitEndLine(row);
218                 column = 1;
219                 row++;
220                 currentState = StartLine;
221             } else if (input == d->mTextQuote) {
222                 currentState = QuotedField;
223             } else if (input == d->mDelimiter) {
224                 d->emitField(QString(), row, column);
225                 column++;
226                 currentState = EmptyField;
227             } else {
228                 field.append(input);
229                 currentState = NormalField;
230             }
231             break;
232         }
233     }
234 
235     if (currentState != StartLine) {
236         if (field.length() > 0) {
237             d->emitField(field, row, column);
238             ++row;
239             field.clear();
240         }
241         d->emitEndLine(row);
242     }
243 
244     d->mBuilder->end();
245 
246     return true;
247 }
248 
setTextQuote(QChar textQuote)249 void QCsvReader::setTextQuote(QChar textQuote)
250 {
251     d->mTextQuote = textQuote;
252 }
253 
textQuote() const254 QChar QCsvReader::textQuote() const
255 {
256     return d->mTextQuote;
257 }
258 
setDelimiter(QChar delimiter)259 void QCsvReader::setDelimiter(QChar delimiter)
260 {
261     d->mDelimiter = delimiter;
262 }
263 
delimiter() const264 QChar QCsvReader::delimiter() const
265 {
266     return d->mDelimiter;
267 }
268 
setStartRow(uint startRow)269 void QCsvReader::setStartRow(uint startRow)
270 {
271     d->mStartRow = startRow;
272 }
273 
startRow() const274 uint QCsvReader::startRow() const
275 {
276     return d->mStartRow;
277 }
278 
setTextCodec(QTextCodec * textCodec)279 void QCsvReader::setTextCodec(QTextCodec *textCodec)
280 {
281     d->mCodec = textCodec;
282 }
283 
textCodec() const284 QTextCodec *QCsvReader::textCodec() const
285 {
286     return d->mCodec;
287 }
288 
terminate()289 void QCsvReader::terminate()
290 {
291     d->mNotTerminated = false;
292 }
293 
294 class QCsvStandardBuilderPrivate
295 {
296 public:
QCsvStandardBuilderPrivate()297     QCsvStandardBuilderPrivate()
298     {
299         init();
300     }
301 
302     void init();
303 
304     QString mLastErrorString;
305     uint mRowCount;
306     uint mColumnCount;
307     QVector<QStringList> mRows;
308 };
309 
init()310 void QCsvStandardBuilderPrivate::init()
311 {
312     mRows.clear();
313     mRowCount = 0;
314     mColumnCount = 0;
315     mLastErrorString.clear();
316 }
317 
QCsvStandardBuilder()318 QCsvStandardBuilder::QCsvStandardBuilder()
319     : d(new QCsvStandardBuilderPrivate)
320 {
321 }
322 
323 QCsvStandardBuilder::~QCsvStandardBuilder() = default;
324 
lastErrorString() const325 QString QCsvStandardBuilder::lastErrorString() const
326 {
327     return d->mLastErrorString;
328 }
329 
rowCount() const330 uint QCsvStandardBuilder::rowCount() const
331 {
332     return d->mRowCount;
333 }
334 
columnCount() const335 uint QCsvStandardBuilder::columnCount() const
336 {
337     return d->mColumnCount;
338 }
339 
data(uint row,uint column) const340 QString QCsvStandardBuilder::data(uint row, uint column) const
341 {
342     if (row > d->mRowCount || column > d->mColumnCount || column >= (uint)d->mRows[row].count()) {
343         return QString();
344     }
345 
346     return d->mRows[row][column];
347 }
348 
begin()349 void QCsvStandardBuilder::begin()
350 {
351     d->init();
352 }
353 
beginLine()354 void QCsvStandardBuilder::beginLine()
355 {
356     d->mRows.append(QStringList());
357     d->mRowCount++;
358 }
359 
field(const QString & data,uint row,uint column)360 void QCsvStandardBuilder::field(const QString &data, uint row, uint column)
361 {
362     const uint size = d->mRows[row].size();
363     if (column >= size) {
364         for (uint i = column; i < size + 1; ++i) {
365             d->mRows[row].append(QString());
366         }
367     }
368 
369     d->mRows[row][column] = data;
370 
371     d->mColumnCount = qMax(d->mColumnCount, column + 1);
372 }
373 
endLine()374 void QCsvStandardBuilder::endLine()
375 {
376 }
377 
end()378 void QCsvStandardBuilder::end()
379 {
380 }
381 
error(const QString & errorMsg)382 void QCsvStandardBuilder::error(const QString &errorMsg)
383 {
384     d->mLastErrorString = errorMsg;
385 }
386