1 /*
2     SPDX-FileCopyrightText: 2012 Rishab Arora <ra.rishab@gmail.com>
3 
4     SPDX-License-Identifier: GPL-2.0-or-later
5 */
6 
7 #include "ksparser.h"
8 
9 #include <QDebug>
10 
11 const int KSParser::EBROKEN_INT         = 0;
12 const double KSParser::EBROKEN_DOUBLE   = 0.0;
13 const float KSParser::EBROKEN_FLOAT     = 0.0;
14 const QString KSParser::EBROKEN_QSTRING = "Null";
15 const bool KSParser::parser_debug_mode_ = false;
16 
KSParser(const QString & filename,const char comment_char,const QList<QPair<QString,DataTypes>> & sequence,const char delimiter)17 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence,
18                    const char delimiter)
19     : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), delimiter_(delimiter)
20 {
21     if (!file_reader_.openFullPath(filename_))
22     {
23         qWarning() << "Unable to open file: " << filename;
24         readFunctionPtr = &KSParser::DummyRow;
25     }
26     else
27     {
28         readFunctionPtr = &KSParser::ReadCSVRow;
29         qDebug() << "File opened: " << filename;
30     }
31 }
32 
KSParser(const QString & filename,const char comment_char,const QList<QPair<QString,DataTypes>> & sequence,const QList<int> & widths)33 KSParser::KSParser(const QString &filename, const char comment_char, const QList<QPair<QString, DataTypes>> &sequence,
34                    const QList<int> &widths)
35     : filename_(filename), comment_char_(comment_char), name_type_sequence_(sequence), width_sequence_(widths)
36 {
37     if (!file_reader_.openFullPath(filename_))
38     {
39         qWarning() << "Unable to open file: " << filename;
40         readFunctionPtr = &KSParser::DummyRow;
41     }
42     else
43     {
44         readFunctionPtr = &KSParser::ReadFixedWidthRow;
45         qDebug() << "File opened: " << filename;
46     }
47 }
48 
ReadNextRow()49 QHash<QString, QVariant> KSParser::ReadNextRow()
50 {
51     return (this->*readFunctionPtr)();
52 }
53 
ReadCSVRow()54 QHash<QString, QVariant> KSParser::ReadCSVRow()
55 {
56     /**
57      * @brief read_success(bool) signifies if a row has been successfully read.
58      * If any problem (eg incomplete row) is encountered. The row is discarded
59      * and the while loop continues till it finds a good row or the file ends.
60      **/
61     bool read_success = false;
62     QString next_line;
63     QStringList separated;
64     QHash<QString, QVariant> newRow;
65 
66     while (file_reader_.hasMoreLines() && read_success == false)
67     {
68         next_line = file_reader_.readLine();
69         if (next_line.mid(0, 1)[0] == comment_char_)
70             continue;
71         separated = next_line.split(delimiter_);
72         /*
73             * 1) split along delimiter eg. comma (,)
74             * 2) check first and last characters.
75             *    if the first letter is  '"',
76             *    then combine the nexto ones in it till
77             *    till you come across the next word which
78             *    has the last character as '"'
79             *    (CombineQuoteParts
80             *
81         */
82         if (separated.length() == 1)
83             continue; // Length will be 1 if there
84         // is no delimiter
85 
86         separated = CombineQuoteParts(separated); // At this point, the
87         // string has been split
88         // taking the quote marks into account
89 
90         // Check if the generated list has correct size
91         // If not, continue to next row. (i.e SKIP INCOMPLETE ROW)
92         if (separated.length() != name_type_sequence_.length())
93             continue;
94 
95         for (int i = 0; i < name_type_sequence_.length(); i++)
96         {
97             bool ok;
98             newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok);
99             if (!ok && parser_debug_mode_)
100             {
101                 qDebug() << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first
102                          << " & next_line : " << next_line;
103             }
104         }
105         read_success = true;
106     }
107     /*
108      * This signifies that someone tried to read a row
109      * without checking if HasNextRow is true.
110      * OR
111      * The file was truncated OR the file ends with one or more '\n'
112      */
113     if (file_reader_.hasMoreLines() == false && newRow.size() <= 1)
114         newRow = DummyRow();
115     return newRow;
116 }
117 
ReadFixedWidthRow()118 QHash<QString, QVariant> KSParser::ReadFixedWidthRow()
119 {
120     if (name_type_sequence_.length() != (width_sequence_.length() + 1))
121     {
122         // line length is appendeded to width_sequence_ by default.
123         // Hence, the length of width_sequence_ is one less than
124         // name_type_sequence_
125         qWarning() << "Unequal fields and widths! Returning dummy row!";
126         Q_ASSERT(false); // Make sure that in Debug mode, this condition generates an abort.
127         return DummyRow();
128     }
129 
130     /**
131     * @brief read_success (bool) signifies if a row has been successfully read.
132     * If any problem (eg incomplete row) is encountered. The row is discarded
133     * and the while loop continues till it finds a good row or the file ends.
134     **/
135     bool read_success = false;
136     QString next_line;
137     QStringList separated;
138     QHash<QString, QVariant> newRow;
139     int total_min_length = 0;
140 
141     foreach (const int width_value, width_sequence_)
142     {
143         total_min_length += width_value;
144     }
145     while (file_reader_.hasMoreLines() && read_success == false)
146     {
147         /*
148          * Steps:
149          * 1) Read Line
150          * 2) If it is a comment, loop again
151          * 3) If it is too small, loop again
152          * 4) Else, a) Break it down according to widths
153          *          b) Convert each broken down unit to appropriate value
154          *          c) set read_success to True denoting we have a valid
155          *             conversion
156         */
157         next_line = file_reader_.readLine();
158         if (next_line.mid(0, 1)[0] == comment_char_)
159             continue;
160         if (next_line.length() < total_min_length)
161             continue;
162 
163         int curr_width = 0;
164         for (int split : width_sequence_)
165         {
166             // Build separated stringlist. Then assign it afterwards.
167             QString temp_split;
168 
169             temp_split = next_line.mid(curr_width, split);
170             // Don't use at(), because it crashes on invalid index
171             curr_width += split;
172             separated.append(temp_split.trimmed());
173         }
174         separated.append(next_line.mid(curr_width).trimmed()); // Append last segment
175 
176         // Conversions
177         for (int i = 0; i < name_type_sequence_.length(); ++i)
178         {
179             bool ok;
180             newRow[name_type_sequence_[i].first] = ConvertToQVariant(separated[i], name_type_sequence_[i].second, ok);
181             if (!ok && parser_debug_mode_)
182             {
183                 qDebug() << name_type_sequence_[i].second << "Failed at field: " << name_type_sequence_[i].first
184                          << " & next_line : " << next_line;
185             }
186         }
187         read_success = true;
188     }
189     /*
190      * This signifies that someone tried to read a row
191      * without checking if HasNextRow is true.
192      * OR
193      * The file was truncated OR the file ends with one or more '\n'
194      */
195     if (file_reader_.hasMoreLines() == false && newRow.size() <= 1)
196         newRow = DummyRow();
197     return newRow;
198 }
199 
DummyRow()200 QHash<QString, QVariant> KSParser::DummyRow()
201 {
202     // qWarning() << "File named " << filename_ << " encountered an error while reading";
203     QHash<QString, QVariant> newRow;
204 
205     for (auto &item : name_type_sequence_)
206     {
207         switch (item.second)
208         {
209             case D_QSTRING:
210                 newRow[item.first] = EBROKEN_QSTRING;
211                 break;
212             case D_DOUBLE:
213                 newRow[item.first] = EBROKEN_DOUBLE;
214                 break;
215             case D_INT:
216                 newRow[item.first] = EBROKEN_INT;
217                 break;
218             case D_FLOAT:
219                 newRow[item.first] = EBROKEN_FLOAT;
220                 break;
221             case D_SKIP:
222             default:
223                 break;
224         }
225     }
226     return newRow;
227 }
228 
HasNextRow()229 bool KSParser::HasNextRow()
230 {
231     return file_reader_.hasMoreLines();
232 }
233 
SetProgress(QString msg,int total_lines,int step_size)234 void KSParser::SetProgress(QString msg, int total_lines, int step_size)
235 {
236     file_reader_.setProgress(msg, total_lines, step_size);
237 }
238 
ShowProgress()239 void KSParser::ShowProgress()
240 {
241     file_reader_.showProgress();
242 }
243 
CombineQuoteParts(QList<QString> & separated)244 QList<QString> KSParser::CombineQuoteParts(QList<QString> &separated)
245 {
246     QString iter_string;
247     QList<QString> quoteCombined;
248     QStringList::const_iterator iter;
249 
250     if (separated.length() == 0)
251     {
252         qDebug() << "Cannot Combine empty list";
253     }
254     else
255     {
256         /* Algorithm:
257          * In the following steps, 'word' implies a unit from 'separated'.
258          * i.e. separated[0], separated[1] etc are 'words'
259          *
260          * 1) Read a word
261          * 2) If word does not start with \" add to final expression. Goto 1)
262          * 3) If word starts with \", push to queue
263          * 4) If word ends with \", empty queue and join each with delimiter.
264          *    Add this to final expression. Go to 6)
265          * 5) Read next word. Goto 3) until end of list of words is reached
266          * 6) Goto 1) until end of list of words is reached
267         */
268         iter = separated.constBegin();
269 
270         while (iter != separated.constEnd())
271         {
272             QList<QString> queue;
273             iter_string = *iter;
274 
275             if (iter_string.indexOf("\"") == 0) // if (quote mark is the first character)
276             {
277                 iter_string = (iter_string).remove(0, 1); // remove the quote at the start
278                 while (iter_string.lastIndexOf('\"') != (iter_string.length() - 1) &&
279                        iter != separated.constEnd()) // handle stuff between parent quotes
280                 {
281                     queue.append((iter_string));
282                     ++iter;
283                     iter_string = *iter;
284                 }
285                 iter_string.chop(1); // remove the quote at the end
286                 queue.append(iter_string);
287             }
288             else
289             {
290                 queue.append(iter_string);
291             }
292 
293             QString col_result;
294             foreach (const QString &join, queue)
295                 col_result += (join + delimiter_);
296             col_result.chop(1); // remove extra delimiter
297             quoteCombined.append(col_result);
298             ++iter;
299         }
300     }
301     return quoteCombined;
302 }
303 
ConvertToQVariant(const QString & input_string,const KSParser::DataTypes & data_type,bool & ok)304 QVariant KSParser::ConvertToQVariant(const QString &input_string, const KSParser::DataTypes &data_type, bool &ok)
305 {
306     ok = true;
307     QVariant converted_object;
308     switch (data_type)
309     {
310         case D_QSTRING:
311         case D_SKIP:
312             converted_object = input_string;
313             break;
314         case D_DOUBLE:
315             converted_object = input_string.trimmed().toDouble(&ok);
316             if (!ok)
317                 converted_object = EBROKEN_DOUBLE;
318             break;
319         case D_INT:
320             converted_object = input_string.trimmed().toInt(&ok);
321             if (!ok)
322                 converted_object = EBROKEN_INT;
323             break;
324         case D_FLOAT:
325             converted_object = input_string.trimmed().toFloat(&ok);
326             if (!ok)
327                 converted_object = EBROKEN_FLOAT;
328             break;
329     }
330     return converted_object;
331 }
332