1 /* This file is part of the KDE project
2    Copyright (C) 2005-2017 Jarosław Staniek <staniek@kde.org>
3    Copyright (C) 2012 Oleg Kukharchuk <oleg.kuh@gmail.com>
4 
5    This work is based on kspread/dialogs/kspread_dlg_csv.cc.
6 
7    Copyright (C) 2002-2003 Norbert Andres <nandres@web.de>
8    Copyright (C) 2002-2003 Ariya Hidayat <ariya@kde.org>
9    Copyright (C) 2002 Laurent Montel <montel@kde.org>
10    Copyright (C) 1999 David Faure <faure@kde.org>
11 
12    This library is free software; you can redistribute it and/or
13    modify it under the terms of the GNU Library General Public
14    License as published by the Free Software Foundation; either
15    version 2 of the License, or (at your option) any later version.
16 
17    This library is distributed in the hope that it will be useful,
18    but WITHOUT ANY WARRANTY; without even the implied warranty of
19    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20    Library General Public License for more details.
21 
22    You should have received a copy of the GNU Library General Public License
23    along with this library; see the file COPYING.LIB.  If not, write to
24    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25  * Boston, MA 02110-1301, USA.
26 */
27 
28 #ifndef KEXI_CSVIMPORTDIALOG_H
29 #define KEXI_CSVIMPORTDIALOG_H
30 
31 #include <QList>
32 #include <QRegularExpression>
33 #include <QPixmap>
34 #include <QTextStream>
35 #include <QEvent>
36 #include <QModelIndex>
37 #include <QElapsedTimer>
38 
39 #include <KAssistantDialog>
40 
41 #include <KDbTristate>
42 #include <KDbPreparedStatement>
43 
44 #include "kexicsvimportoptionsdlg.h"
45 
46 class QHBoxLayout;
47 class QGridLayout;
48 class QCheckBox;
49 class QLabel;
50 class QTableView;
51 class QTreeView;
52 class QFile;
53 class QStackedWidget;
54 class QProgressDialog;
55 class QProgressBar;
56 class QRadioButton;
57 class QSpinBox;
58 class KComboBox;
59 class KPageWidgetItem;
60 
61 class KDbConnection;
62 class KDbTableSchema;
63 
64 class KexiCSVCommentWidget;
65 class KexiCSVDelimiterWidget;
66 class KexiCSVTextQuoteComboBox;
67 class KexiCSVInfoLabel;
68 class KexiProject;
69 class KexiCSVImportDialogModel;
70 class KexiCSVImportDialogItemDelegate;
71 class KexiFileWidgetInterface;
72 class KexiNameWidget;
73 class KexiProjectNavigator;
74 class KexiFieldListModel;
75 
76 namespace KexiPart {
77 class Item;
78 }
79 
80 /**
81  * @short Kexi CSV import dialog
82  *
83  * This is temporary solution for Kexi CSV import,
84  * based on kspread/dialogs/kspread_dlg_csv.h, cc.
85  *
86  * Provides dialog for managing CSV (comma separated value) data.
87  *
88  * Currently KexiCSVImportDialog is used for converting text into columns,
89  * inserting text file and pasting text from clipboard, where conversion
90  * from CSV (comma separated value) data is is all required.
91  * The different purposed mentioned above is determined
92  * using mode, which can be Column, File, or Clipboard respectively.
93 */
94 class KexiCSVImportDialog : public KAssistantDialog
95 {
96     Q_OBJECT
97 
98 public:
99     enum Mode { Clipboard, File /*, Column*/ };
100     enum Header { TEXT, NUMBER, DATE, CURRENCY };
101 
102     //! @todo what about making it kexidb-independent?
103     explicit KexiCSVImportDialog(Mode mode, QWidget *parent = 0);
104 
105     virtual ~KexiCSVImportDialog();
106 
107     bool canceled() const;
108 
109 protected:
110     virtual bool eventFilter(QObject *watched, QEvent *e);
111     bool openData();
112     virtual void accept();
113     virtual void reject();
114 
115 private:
116     //! Used in emergency by accept()
117     //! @note @a partItemForSavedTable is IN-OUT
118     void dropDestinationTable(KexiProject* project, KexiPart::Item* &partItemForSavedTable);
119 
120     //! Used in emergency by accept()
121     //! @note @a partItemForSavedTable is IN-OUT
122     void raiseErrorInAccept(KexiProject* project, KexiPart::Item* &partItemForSavedTable);
123 
124     QGridLayout* MyDialogLayout;
125     QHBoxLayout* Layout1;
126     KexiCSVImportDialogModel *m_table;
127     KexiCSVImportDialogItemDelegate *m_tableItemDelegate;
128     QTableView *m_tableView;
129     KexiCSVDelimiterWidget* m_delimiterWidget;
130     KexiCSVCommentWidget* m_commentWidget;
131     bool m_detectDelimiter; //!< true if delimiter should be detected
132                             //!< (true by default, set to false if user sets delimiter)
133     QLabel* m_formatLabel;
134     KComboBox* m_formatCombo;
135     QSpinBox *m_startAtLineSpinBox;
136     KexiCSVTextQuoteComboBox* m_comboQuote;
137     QLabel* m_startAtLineLabel;
138     QLabel* TextLabel2;
139     QCheckBox* m_ignoreDuplicates;
140     QCheckBox* m_1stRowForFieldNames;
141     QCheckBox* m_primaryKeyField;
142     KexiFileWidgetInterface *m_fileIface;
143     QWidget *m_optionsWidget;
144     QWidget *m_saveMethodWidget;
145     KPageWidgetItem *m_openFilePage;
146     KPageWidgetItem *m_optionsPage;
147     KPageWidgetItem *m_saveMethodPage;
148     KPageWidgetItem *m_chooseTablePage;
149     QRadioButton *m_newTableOption;
150     QRadioButton *m_existingTableOption;
151 
152     QStackedWidget *m_tableNameWidget;
153     KPageWidgetItem *m_tableNamePage;
154     KexiNameWidget *m_newTableWidget;
155     KexiProjectNavigator *m_tablesList;
156     QTreeView *m_fieldsListView;
157     QLabel *m_tableCaptionLabel;
158     QLabel *m_tableNameLabel;
159     QLabel *m_recordCountLabel;
160     QLabel *m_colCountLabel;
161 
162     QWidget *m_importWidget;
163     KPageWidgetItem *m_importPage;
164     KexiCSVInfoLabel *m_fromLabel;
165     KexiCSVInfoLabel *m_toLabel;
166     QLabel *m_importProgressLabel;
167 
168     void detectTypeAndUniqueness(int row, int col, const QString& text);
169     void setText(int row, int col, const QString& text, bool inGUI);
170 
171     /*! Parses date from \a text and stores into \a date.
172      m_dateRegExp is used for clever detection;
173      if '/' separated is found, it's assumed the format is american mm/dd/yyyy.
174      This function supports omitted zeros, so 1/2/2006 is parsed properly too.
175      \return true on success. */
176     bool parseDate(const QString& text, QDate& date);
177 
178     /*! Parses time from \a text and stores into \a date.
179      m_timeRegExp1 and m_timeRegExp2 are used for clever detection;
180      both hh:mm:ss and hh:mm are supported.
181      This function supports omitted zeros, so 1:2:3 is parsed properly too.
182      \return true on success. */
183     bool parseTime(const QString& text, QTime& time);
184 
185     /*! Called after the first fillTable() when number of rows is unknown. */
186     void adjustRows(int iRows);
187 
188     int  getHeader(int col);
189     QString getText(int row, int col);
190     void updateColumn(int col);
191     bool isPrimaryKeyAllowed(int col);
192     void setPrimaryKeyIcon(int column, bool set);
193     void updateRowCountInfo();
194     tristate loadRows(QString &field, int &row, int &columnm, int &maxColumn, bool inGUI);
195 
196     /*! Detects delimiter by looking at first 4K bytes of the data. Used by loadRows().
197     The used algorithm:
198     1. Look byte by byte and locate special characters that can be delimiters.
199       Special fact is taken into account: if there are '"' quotes used for text values,
200       delimiters that follow directly the closing quote has higher priority than the one
201       that follows other character. We do not assume that every text value is quoted.
202       Summing up, there is following hierarchy (from highest to lowest):
203       quote+tab, quote+semicolon, quote+comma, tab, semicolon, comma.
204       Space characters are skipped. Text inside quotes is skipped, as well as double
205       (escaped) quotes.
206     2. While scanning the data, for every row following number of tabs, semicolons and commas
207       (only these outside of the quotes) are computed. On every line the values are appended
208       to a separate list (QList<int>).
209     3. After scanning, all the values are checked on the QList<int> of tabs.
210       If the list has more one element (so there was more than one row) and all the values
211       (numbers of tabs) are equal, it's very probable the tab is a delimiter.
212       So, this character is returned as a delimiter.
213       3a. The same algorithm as in 3. is performed for semicolon character.
214       3b. The same algorithm as in 3. is performed for comma character.
215     4. If the step 3. did not return a delimiter, a character found in step 1. with
216       the highest priority is retured as delimiter. */
217     QString detectDelimiterByLookingAtFirstBytesOfFile(QTextStream *inputStream);
218 
219     /*! Callback, called whenever row is loaded in loadRows(). When inGUI is true,
220     nothing is performed, else database buffer is written back to the database. */
221     bool saveRow(bool inGUI);
222 
223     //! @return date built out of @a y, @a m, @a d parts,
224     //! taking m_minimumYearFor100YearSlidingWindow into account
225     QDate buildDate(int y, int m, int d) const;
226 
227     //! Updates size of m_columnNames and m_changedColumnNames if needed
228     void updateColumnVectorSize();
229 
230     QPushButton* configureButton() const;
231 
232     bool m_parseComments;
233     bool m_canceled;
234     bool m_adjustRows;
235     int m_startline;
236     QChar m_textquote;
237     QChar m_commentSymbol;
238     QString m_clipboardData;
239     QByteArray m_fileArray;
240     Mode m_mode;
241 
242     QRegularExpression m_dateRegExp, m_timeRegExp1, m_timeRegExp2, m_fpNumberRegExp1, m_fpNumberRegExp2;
243     bool m_columnsAdjusted; //!< to call adjustColumn() only once
244     bool m_1stRowForFieldNamesDetected; //!< used to force rerun fillTable() after 1st row
245     bool m_firstFillTableCall; //!< used to know whether it's 1st fillTable() call
246     bool m_blockUserEvents;
247     int m_primaryKeyColumn; //!< index of column with PK assigned (-1 if none)
248     int m_maximumRowsForPreview;
249     int m_maximumBytesForPreview;
250     /*! The minimum year for the "100 year sliding date window": range of years that defines
251      where any year expressed as two digits falls. Example: for date window from 1930 to 2029,
252      two-digit years between 0 and 29 fall in the 2000s, and two-digit years between 30 and 99 fall in the 1900s.
253      The default is 1930. */
254     int m_minimumYearFor100YearSlidingWindow;
255 
256     QPixmap m_pkIcon;
257     QString m_fname;
258     QFile* m_file;
259     QTextStream *m_inputStream; //!< used in loadData()
260     KexiCSVImportOptions m_options;
261     QProgressDialog *m_loadingProgressDlg;
262     QProgressBar *m_importingProgressBar;
263     bool m_dialogCanceled;
264     KexiCSVInfoLabel *m_infoLbl;
265     KDbConnection *m_conn; //!< (temp) database connection used for importing
266     KexiFieldListModel *m_fieldsListModel;
267     KDbTableSchema *m_destinationTableSchema;  //!< (temp) dest. table schema used for importing
268     KDbPreparedStatement m_importingStatement;
269     QList<QVariant> m_dbRowBuffer; //!< (temp) used for importing
270     bool m_implicitPrimaryKeyAdded; //!< (temp) used for importing
271     bool m_allRowsLoadedInPreview; //!< we need to know whether all rows were loaded or it's just a partial data preview
272     bool m_stoppedAt_MAX_BYTES_TO_PREVIEW; //!< used to compute m_allRowsLoadedInPreview
273     const QString m_stringNo, m_stringI18nNo, m_stringFalse, m_stringI18nFalse; //!< used for importing boolean values
274     int m_prevColumnForSetText; //!< used for non-gui tracking of skipped clolumns,
275                                 //!< so can be saved to the database,
276                                 //!< e.g. first three columns are saved for ,,,"abc" line in the CSV data
277     QElapsedTimer m_elapsedTimer; //!< Used to update progress
278     qint64 m_elapsedMs;
279 
280     void createImportMethodPage();
281     void createOptionsPage();
282     void createFileOpenPage();
283     void createTableNamePage();
284     void createImportPage();
285 
286     KDbPreparedStatementParameters m_valuesToInsert;
287     KexiPart::Item* m_partItemForSavedTable;
288     bool m_importInProgress;
289     bool m_importCanceled;
290     class Private;
291     Private * const d;
292 
293 public Q_SLOTS:
294     virtual void next();
295 
296 private Q_SLOTS:
297     void fillTable();
298     void fillTableLater();
299     void initLater();
300     void formatChanged(int id);
301     void delimiterChanged(const QString& delimiter);
302     void commentSymbolChanged(const QString& commentSymbol);
303     void startlineSelected(int line);
304     void textquoteSelected(int);
305     void currentCellChanged(const QModelIndex &cur, const QModelIndex &prev);
306     void ignoreDuplicatesChanged(int);
307     void slot1stRowForFieldNamesChanged(int state);
308     void optionsButtonClicked();
309     void slotPrimaryKeyFieldToggled(bool on);
310     void slotCurrentPageChanged(KPageWidgetItem *page, KPageWidgetItem *prev);
311     void slotShowSchema(KexiPart::Item *item);
312     void import();
313 };
314 
315 #endif
316