1 /*
2 * Copyright (C) 2016 Enrico Mariotti <enricomariotti@yahoo.it>
3 * Copyright (C) 2017 KeePassXC Team <team@keepassxc.org>
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 2 or (at your option)
8 * version 3 of the License.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19 #include "CsvParser.h"
20
21 #include <QObject>
22 #include <QTextCodec>
23
24 #include "core/Tools.h"
25
CsvParser()26 CsvParser::CsvParser()
27 : m_ch(0)
28 , m_comment('#')
29 , m_currCol(1)
30 , m_currRow(1)
31 , m_isBackslashSyntax(false)
32 , m_isEof(false)
33 , m_isFileLoaded(false)
34 , m_isGood(true)
35 , m_lastPos(-1)
36 , m_maxCols(0)
37 , m_qualifier('"')
38 , m_separator(',')
39 , m_statusMsg("")
40 {
41 m_csv.setBuffer(&m_array);
42 m_ts.setDevice(&m_csv);
43 m_csv.open(QIODevice::ReadOnly);
44 m_ts.setCodec("UTF-8");
45 }
46
~CsvParser()47 CsvParser::~CsvParser()
48 {
49 m_csv.close();
50 }
51
isFileLoaded()52 bool CsvParser::isFileLoaded()
53 {
54 return m_isFileLoaded;
55 }
56
reparse()57 bool CsvParser::reparse()
58 {
59 reset();
60 return parseFile();
61 }
62
parse(QFile * device)63 bool CsvParser::parse(QFile* device)
64 {
65 clear();
66 if (nullptr == device) {
67 appendStatusMsg(QObject::tr("NULL device"), true);
68 return false;
69 }
70 if (!readFile(device)) {
71 return false;
72 }
73 return parseFile();
74 }
75
readFile(QFile * device)76 bool CsvParser::readFile(QFile* device)
77 {
78 if (device->isOpen()) {
79 device->close();
80 }
81
82 device->open(QIODevice::ReadOnly);
83 if (!Tools::readAllFromDevice(device, m_array)) {
84 appendStatusMsg(QObject::tr("error reading from device"), true);
85 m_isFileLoaded = false;
86 } else {
87 device->close();
88
89 m_array.replace("\r\n", "\n");
90 m_array.replace("\r", "\n");
91 if (0 == m_array.size()) {
92 appendStatusMsg(QObject::tr("file empty").append("\n"));
93 }
94 m_isFileLoaded = true;
95 }
96 return m_isFileLoaded;
97 }
98
reset()99 void CsvParser::reset()
100 {
101 m_ch = 0;
102 m_currCol = 1;
103 m_currRow = 1;
104 m_isEof = false;
105 m_isGood = true;
106 m_lastPos = -1;
107 m_maxCols = 0;
108 m_statusMsg = "";
109 m_ts.seek(0);
110 m_table.clear();
111 // the following are users' concern :)
112 // m_comment = '#';
113 // m_backslashSyntax = false;
114 // m_comment = '#';
115 // m_qualifier = '"';
116 // m_separator = ',';
117 }
118
clear()119 void CsvParser::clear()
120 {
121 reset();
122 m_isFileLoaded = false;
123 m_array.clear();
124 }
125
parseFile()126 bool CsvParser::parseFile()
127 {
128 parseRecord();
129 while (!m_isEof) {
130 if (!skipEndline()) {
131 appendStatusMsg(QObject::tr("malformed string"), true);
132 }
133 m_currRow++;
134 m_currCol = 1;
135 parseRecord();
136 }
137 fillColumns();
138 return m_isGood;
139 }
140
parseRecord()141 void CsvParser::parseRecord()
142 {
143 CsvRow row;
144 if (isComment()) {
145 skipLine();
146 return;
147 }
148 do {
149 parseField(row);
150 getChar(m_ch);
151 } while (isSeparator(m_ch) && !m_isEof);
152
153 if (!m_isEof) {
154 ungetChar();
155 }
156 if (isEmptyRow(row)) {
157 row.clear();
158 return;
159 }
160 m_table.push_back(row);
161 if (m_maxCols < row.size()) {
162 m_maxCols = row.size();
163 }
164 m_currCol++;
165 }
166
parseField(CsvRow & row)167 void CsvParser::parseField(CsvRow& row)
168 {
169 QString field;
170 peek(m_ch);
171 if (!isTerminator(m_ch)) {
172 if (isQualifier(m_ch)) {
173 parseQuoted(field);
174 } else {
175 parseSimple(field);
176 }
177 }
178 row.push_back(field);
179 }
180
parseSimple(QString & s)181 void CsvParser::parseSimple(QString& s)
182 {
183 QChar c;
184 getChar(c);
185 while ((isText(c)) && (!m_isEof)) {
186 s.append(c);
187 getChar(c);
188 }
189 if (!m_isEof) {
190 ungetChar();
191 }
192 }
193
parseQuoted(QString & s)194 void CsvParser::parseQuoted(QString& s)
195 {
196 // read and discard initial qualifier (e.g. quote)
197 getChar(m_ch);
198 parseEscaped(s);
199 // getChar(m_ch);
200 if (!isQualifier(m_ch)) {
201 appendStatusMsg(QObject::tr("missing closing quote"), true);
202 }
203 }
204
parseEscaped(QString & s)205 void CsvParser::parseEscaped(QString& s)
206 {
207 parseEscapedText(s);
208 while (processEscapeMark(s, m_ch)) {
209 parseEscapedText(s);
210 }
211 if (!m_isEof) {
212 ungetChar();
213 }
214 }
215
parseEscapedText(QString & s)216 void CsvParser::parseEscapedText(QString& s)
217 {
218 getChar(m_ch);
219 while ((!isQualifier(m_ch)) && !m_isEof) {
220 s.append(m_ch);
221 getChar(m_ch);
222 }
223 }
224
processEscapeMark(QString & s,QChar c)225 bool CsvParser::processEscapeMark(QString& s, QChar c)
226 {
227 QChar buf;
228 peek(buf);
229 QChar c2;
230 if (true == m_isBackslashSyntax) {
231 // escape-character syntax, e.g. \"
232 if (c != '\\') {
233 return false;
234 }
235 // consume (and append) second qualifier
236 getChar(c2);
237 if (m_isEof) {
238 c2 = '\\';
239 s.append('\\');
240 return false;
241 } else {
242 s.append(c2);
243 return true;
244 }
245 } else {
246 // double quote syntax, e.g. ""
247 if (!isQualifier(c)) {
248 return false;
249 }
250 peek(c2);
251 if (!m_isEof) { // not EOF, can read one char
252 if (isQualifier(c2)) {
253 s.append(c2);
254 getChar(c2);
255 return true;
256 }
257 }
258 return false;
259 }
260 }
261
fillColumns()262 void CsvParser::fillColumns()
263 {
264 // fill shorter rows with empty placeholder columns
265 for (int i = 0; i < m_table.size(); ++i) {
266 int gap = m_maxCols - m_table.at(i).size();
267 if (gap > 0) {
268 CsvRow r = m_table.at(i);
269 for (int j = 0; j < gap; ++j) {
270 r.append(QString(""));
271 }
272 m_table.replace(i, r);
273 }
274 }
275 }
276
skipLine()277 void CsvParser::skipLine()
278 {
279 m_ts.readLine();
280 m_ts.seek(m_ts.pos() - 1);
281 }
282
skipEndline()283 bool CsvParser::skipEndline()
284 {
285 getChar(m_ch);
286 return (m_ch == '\n');
287 }
288
getChar(QChar & c)289 void CsvParser::getChar(QChar& c)
290 {
291 m_isEof = m_ts.atEnd();
292 if (!m_isEof) {
293 m_lastPos = m_ts.pos();
294 m_ts >> c;
295 }
296 }
297
ungetChar()298 void CsvParser::ungetChar()
299 {
300 if (!m_ts.seek(m_lastPos)) {
301 qWarning("CSV Parser: unget lower bound exceeded");
302 m_isGood = false;
303 }
304 }
305
peek(QChar & c)306 void CsvParser::peek(QChar& c)
307 {
308 getChar(c);
309 if (!m_isEof) {
310 ungetChar();
311 }
312 }
313
isQualifier(const QChar & c) const314 bool CsvParser::isQualifier(const QChar& c) const
315 {
316 if (true == m_isBackslashSyntax && (c != m_qualifier)) {
317 return (c == '\\');
318 } else {
319 return (c == m_qualifier);
320 }
321 }
322
isComment()323 bool CsvParser::isComment()
324 {
325 bool result = false;
326 QChar c2;
327 qint64 pos = m_ts.pos();
328
329 do {
330 getChar(c2);
331 } while ((isSpace(c2) || isTab(c2)) && (!m_isEof));
332
333 if (c2 == m_comment) {
334 result = true;
335 }
336 m_ts.seek(pos);
337 return result;
338 }
339
isText(QChar c) const340 bool CsvParser::isText(QChar c) const
341 {
342 return !((isCRLF(c)) || (isSeparator(c)));
343 }
344
isEmptyRow(const CsvRow & row) const345 bool CsvParser::isEmptyRow(const CsvRow& row) const
346 {
347 CsvRow::const_iterator it = row.constBegin();
348 for (; it != row.constEnd(); ++it) {
349 if (((*it) != "\n") && ((*it) != "")) {
350 return false;
351 }
352 }
353 return true;
354 }
355
isCRLF(const QChar & c) const356 bool CsvParser::isCRLF(const QChar& c) const
357 {
358 return (c == '\n');
359 }
360
isSpace(const QChar & c) const361 bool CsvParser::isSpace(const QChar& c) const
362 {
363 return (c == ' ');
364 }
365
isTab(const QChar & c) const366 bool CsvParser::isTab(const QChar& c) const
367 {
368 return (c == '\t');
369 }
370
isSeparator(const QChar & c) const371 bool CsvParser::isSeparator(const QChar& c) const
372 {
373 return (c == m_separator);
374 }
375
isTerminator(const QChar & c) const376 bool CsvParser::isTerminator(const QChar& c) const
377 {
378 return (isSeparator(c) || (c == '\n') || (c == '\r'));
379 }
380
setBackslashSyntax(bool set)381 void CsvParser::setBackslashSyntax(bool set)
382 {
383 m_isBackslashSyntax = set;
384 }
385
setComment(const QChar & c)386 void CsvParser::setComment(const QChar& c)
387 {
388 m_comment = c.unicode();
389 }
390
setCodec(const QString & s)391 void CsvParser::setCodec(const QString& s)
392 {
393 m_ts.setCodec(QTextCodec::codecForName(s.toLocal8Bit()));
394 }
395
setFieldSeparator(const QChar & c)396 void CsvParser::setFieldSeparator(const QChar& c)
397 {
398 m_separator = c.unicode();
399 }
400
setTextQualifier(const QChar & c)401 void CsvParser::setTextQualifier(const QChar& c)
402 {
403 m_qualifier = c.unicode();
404 }
405
getFileSize() const406 int CsvParser::getFileSize() const
407 {
408 return m_csv.size();
409 }
410
getCsvTable() const411 const CsvTable CsvParser::getCsvTable() const
412 {
413 return m_table;
414 }
415
getStatus() const416 QString CsvParser::getStatus() const
417 {
418 return m_statusMsg;
419 }
420
getCsvCols() const421 int CsvParser::getCsvCols() const
422 {
423 if (!m_table.isEmpty() && !m_table.at(0).isEmpty()) {
424 return m_table.at(0).size();
425 } else {
426 return 0;
427 }
428 }
429
getCsvRows() const430 int CsvParser::getCsvRows() const
431 {
432 return m_table.size();
433 }
434
appendStatusMsg(const QString & s,bool isCritical)435 void CsvParser::appendStatusMsg(const QString& s, bool isCritical)
436 {
437 m_statusMsg += QObject::tr("%1: (row, col) %2,%3").arg(s, m_currRow, m_currCol).append("\n");
438 m_isGood = !isCritical;
439 }
440