1 /*
2 SPDX-FileCopyrightText: 2012 Rishab Arora <ra.rishab@gmail.com>
3
4 SPDX-License-Identifier: GPL-2.0-or-later
5 */
6
7 #include "ksparser.h"
8
9 #include <QDebug>
10
11 const int KSParser::EBROKEN_INT = 0;
12 const double KSParser::EBROKEN_DOUBLE = 0.0;
13 const float KSParser::EBROKEN_FLOAT = 0.0;
14 const QString KSParser::EBROKEN_QSTRING = "Null";
15 const bool KSParser::parser_debug_mode_ = false;
16
KSParser(const QString & filename,const char comment_char,const QList<QPair<QString,DataTypes>> & sequence,const char delimiter)17 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence,
18 const char delimiter)
19 : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), delimiter_(delimiter)
20 {
21 if (!file_reader_.openFullPath(filename_))
22 {
23 qWarning() << "Unable to open file: " << filename;
24 readFunctionPtr = &KSParser::DummyRow;
25 }
26 else
27 {
28 readFunctionPtr = &KSParser::ReadCSVRow;
29 qDebug() << "File opened: " << filename;
30 }
31 }
32
KSParser(const QString & filename,const char comment_char,const QList<QPair<QString,DataTypes>> & sequence,const QList<int> & widths)33 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence,
34 const QList<int> &widths)
35 : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), width_sequence_(widths)
36 {
37 if (!file_reader_.openFullPath(filename_))
38 {
39 qWarning() << "Unable to open file: " << filename;
40 readFunctionPtr = &KSParser::DummyRow;
41 }
42 else
43 {
44 readFunctionPtr = &KSParser::ReadFixedWidthRow;
45 qDebug() << "File opened: " << filename;
46 }
47 }
48
ReadNextRow()49 QHash<QString, QVariant> KSParser::ReadNextRow()
50 {
51 return (this->*readFunctionPtr)();
52 }
53
ReadCSVRow()54 QHash<QString, QVariant> KSParser::ReadCSVRow()
55 {
56 /**
57 * @brief read_success(bool) signifies if a row has been successfully read.
58 * If any problem (eg incomplete row) is encountered. The row is discarded
59 * and the while loop continues till it finds a good row or the file ends.
60 **/
61 bool read_success = false;
62 QString next_line;
63 QStringList separated;
64 QHash<QString, QVariant> newRow;
65
66 while (file_reader_.hasMoreLines() && read_success == false)
67 {
68 next_line = file_reader_.readLine();
69 if (next_line.mid(0, 1)[0] == comment_char_)
70 continue;
71 separated = next_line.split(delimiter_);
72 /*
73 * 1) split along delimiter eg. comma (,)
74 * 2) check first and last characters.
75 * if the first letter is '"',
76 * then combine the nexto ones in it till
77 * till you come across the next word which
78 * has the last character as '"'
79 * (CombineQuoteParts
80 *
81 */
82 if (separated.length() == 1)
83 continue; // Length will be 1 if there
84 // is no delimiter
85
86 separated = CombineQuoteParts(separated); // At this point, the
87 // string has been split
88 // taking the quote marks into account
89
90 // Check if the generated list has correct size
91 // If not, continue to next row. (i.e SKIP INCOMPLETE ROW)
92 if (separated.length() != name_type_sequence_.length())
93 continue;
94
95 for (int i = 0; i < name_type_sequence_.length(); i++)
96 {
97 bool ok;
98 newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok);
99 if (!ok && parser_debug_mode_)
100 {
101 qDebug() << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first
102 << " & next_line : " << next_line;
103 }
104 }
105 read_success = true;
106 }
107 /*
108 * This signifies that someone tried to read a row
109 * without checking if HasNextRow is true.
110 * OR
111 * The file was truncated OR the file ends with one or more '\n'
112 */
113 if (file_reader_.hasMoreLines() == false && newRow.size() <= 1)
114 newRow = DummyRow();
115 return newRow;
116 }
117
ReadFixedWidthRow()118 QHash<QString, QVariant> KSParser::ReadFixedWidthRow()
119 {
120 if (name_type_sequence_.length() != (width_sequence_.length() + 1))
121 {
122 // line length is appendeded to width_sequence_ by default.
123 // Hence, the length of width_sequence_ is one less than
124 // name_type_sequence_
125 qWarning() << "Unequal fields and widths! Returning dummy row!";
126 Q_ASSERT(false); // Make sure that in Debug mode, this condition generates an abort.
127 return DummyRow();
128 }
129
130 /**
131 * @brief read_success (bool) signifies if a row has been successfully read.
132 * If any problem (eg incomplete row) is encountered. The row is discarded
133 * and the while loop continues till it finds a good row or the file ends.
134 **/
135 bool read_success = false;
136 QString next_line;
137 QStringList separated;
138 QHash<QString, QVariant> newRow;
139 int total_min_length = 0;
140
141 foreach (const int width_value, width_sequence_)
142 {
143 total_min_length += width_value;
144 }
145 while (file_reader_.hasMoreLines() && read_success == false)
146 {
147 /*
148 * Steps:
149 * 1) Read Line
150 * 2) If it is a comment, loop again
151 * 3) If it is too small, loop again
152 * 4) Else, a) Break it down according to widths
153 * b) Convert each broken down unit to appropriate value
154 * c) set read_success to True denoting we have a valid
155 * conversion
156 */
157 next_line = file_reader_.readLine();
158 if (next_line.mid(0, 1)[0] == comment_char_)
159 continue;
160 if (next_line.length() < total_min_length)
161 continue;
162
163 int curr_width = 0;
164 for (int split : width_sequence_)
165 {
166 // Build separated stringlist. Then assign it afterwards.
167 QString temp_split;
168
169 temp_split = next_line.mid(curr_width, split);
170 // Don't use at(), because it crashes on invalid index
171 curr_width += split;
172 separated.append(temp_split.trimmed());
173 }
174 separated.append(next_line.mid(curr_width).trimmed()); // Append last segment
175
176 // Conversions
177 for (int i = 0; i < name_type_sequence_.length(); ++i)
178 {
179 bool ok;
180 newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok);
181 if (!ok && parser_debug_mode_)
182 {
183 qDebug() << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first
184 << " & next_line : " << next_line;
185 }
186 }
187 read_success = true;
188 }
189 /*
190 * This signifies that someone tried to read a row
191 * without checking if HasNextRow is true.
192 * OR
193 * The file was truncated OR the file ends with one or more '\n'
194 */
195 if (file_reader_.hasMoreLines() == false && newRow.size() <= 1)
196 newRow = DummyRow();
197 return newRow;
198 }
199
DummyRow()200 QHash<QString, QVariant> KSParser::DummyRow()
201 {
202 // qWarning() << "File named " << filename_ << " encountered an error while reading";
203 QHash<QString, QVariant> newRow;
204
205 for (auto &item : name_type_sequence_)
206 {
207 switch (item.second)
208 {
209 case D_QSTRING:
210 newRow[item.first] = EBROKEN_QSTRING;
211 break;
212 case D_DOUBLE:
213 newRow[item.first] = EBROKEN_DOUBLE;
214 break;
215 case D_INT:
216 newRow[item.first] = EBROKEN_INT;
217 break;
218 case D_FLOAT:
219 newRow[item.first] = EBROKEN_FLOAT;
220 break;
221 case D_SKIP:
222 default:
223 break;
224 }
225 }
226 return newRow;
227 }
228
HasNextRow()229 bool KSParser::HasNextRow()
230 {
231 return file_reader_.hasMoreLines();
232 }
233
SetProgress(QString msg,int total_lines,int step_size)234 void KSParser::SetProgress(QString msg, int total_lines, int step_size)
235 {
236 file_reader_.setProgress(msg, total_lines, step_size);
237 }
238
ShowProgress()239 void KSParser::ShowProgress()
240 {
241 file_reader_.showProgress();
242 }
243
CombineQuoteParts(QList<QString> & separated)244 QList<QString> KSParser::CombineQuoteParts(QList<QString> &separated)
245 {
246 QString iter_string;
247 QList<QString> quoteCombined;
248 QStringList::const_iterator iter;
249
250 if (separated.length() == 0)
251 {
252 qDebug() << "Cannot Combine empty list";
253 }
254 else
255 {
256 /* Algorithm:
257 * In the following steps, 'word' implies a unit from 'separated'.
258 * i.e. separated[0], separated[1] etc are 'words'
259 *
260 * 1) Read a word
261 * 2) If word does not start with \" add to final expression. Goto 1)
262 * 3) If word starts with \", push to queue
263 * 4) If word ends with \", empty queue and join each with delimiter.
264 * Add this to final expression. Go to 6)
265 * 5) Read next word. Goto 3) until end of list of words is reached
266 * 6) Goto 1) until end of list of words is reached
267 */
268 iter = separated.constBegin();
269
270 while (iter != separated.constEnd())
271 {
272 QList<QString> queue;
273 iter_string = *iter;
274
275 if (iter_string.indexOf("\"") == 0) // if (quote mark is the first character)
276 {
277 iter_string = (iter_string).remove(0, 1); // remove the quote at the start
278 while (iter_string.lastIndexOf('\"') != (iter_string.length() - 1) &&
279 iter != separated.constEnd()) // handle stuff between parent quotes
280 {
281 queue.append((iter_string));
282 ++iter;
283 iter_string = *iter;
284 }
285 iter_string.chop(1); // remove the quote at the end
286 queue.append(iter_string);
287 }
288 else
289 {
290 queue.append(iter_string);
291 }
292
293 QString col_result;
294 foreach (const QString &join, queue)
295 col_result += (join + delimiter_);
296 col_result.chop(1); // remove extra delimiter
297 quoteCombined.append(col_result);
298 ++iter;
299 }
300 }
301 return quoteCombined;
302 }
303
ConvertToQVariant(const QString & input_string,const KSParser::DataTypes & data_type,bool & ok)304 QVariant KSParser::ConvertToQVariant(const QString &input_string, const KSParser::DataTypes &data_type, bool &ok)
305 {
306 ok = true;
307 QVariant converted_object;
308 switch (data_type)
309 {
310 case D_QSTRING:
311 case D_SKIP:
312 converted_object = input_string;
313 break;
314 case D_DOUBLE:
315 converted_object = input_string.trimmed().toDouble(&ok);
316 if (!ok)
317 converted_object = EBROKEN_DOUBLE;
318 break;
319 case D_INT:
320 converted_object = input_string.trimmed().toInt(&ok);
321 if (!ok)
322 converted_object = EBROKEN_INT;
323 break;
324 case D_FLOAT:
325 converted_object = input_string.trimmed().toFloat(&ok);
326 if (!ok)
327 converted_object = EBROKEN_FLOAT;
328 break;
329 }
330 return converted_object;
331 }
332