1 /* This file is part of the KDE project
2 Copyright (C) 2004-2009 Adam Pigg <adam@piggz.co.uk>
3 Copyright (C) 2016 Jarosław Staniek <staniek@kde.org>
4 
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License as published by the Free Software Foundation; either
8 version 2 of the License, or (at your option) any later version.
9 
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 Library General Public License for more details.
14 
15 You should have received a copy of the GNU Library General Public License
16 along with this program; see the file COPYING.  If not, write to
17 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 */
20 
21 #include "TsvMigrate.h"
22 #include <kexi.h>
23 
24 #include <KDbSqlResult>
25 #include <KDbSqlString>
26 
27 #include <KPluginFactory>
28 
29 #include <QDebug>
30 #include <QDir>
31 #include <QTextCodec>
32 
33 const int MAX_SAMPLE_TEXT_SIZE = 1024 * 10; // max 10KiB of text to detect encoding
34 
35 using namespace KexiMigration;
36 
37 /* This is the implementation for the TSV file import routines. */
38 KEXI_PLUGIN_FACTORY(TsvMigrate, "keximigrate_tsv.json")
39 
40 namespace KexiMigration {
41 struct FileInfo
42 {
43     QFile file;
44     QTextCodec *codec;
45     QVector<QString> fieldNames;
46 };
47 }
48 
TsvMigrate(QObject * parent,const QVariantList & args)49 TsvMigrate::TsvMigrate(QObject *parent, const QVariantList& args)
50         : KexiMigrate(parent, args)
51 {
52 }
53 
54 
~TsvMigrate()55 TsvMigrate::~TsvMigrate()
56 {
57 }
58 
drv_createConnection()59 KDbConnection* TsvMigrate::drv_createConnection()
60 {
61     // nothing to do, just success
62     m_result = KDbResult();
63     return nullptr;
64 }
65 
drv_connect()66 bool TsvMigrate::drv_connect()
67 {
68     return QDir().exists(data()->source->databaseName());
69 }
70 
drv_disconnect()71 bool TsvMigrate::drv_disconnect()
72 {
73     return true;
74 }
75 
drv_tableNames(QStringList * tablenames)76 bool TsvMigrate::drv_tableNames(QStringList *tablenames)
77 {
78   // return base part of filename only so table name will look better
79   tablenames->append(QFileInfo(data()->source->databaseName()).baseName());
80   return true;
81 }
82 
83 //! @return next line read from the file split by tabs, decoded to unicode and with last \n removed
readLine(FileInfo * info,bool * eof)84 static QVector<QByteArray> readLine(FileInfo *info, bool *eof)
85 {
86     QByteArray line = info->file.readLine();
87     int count = line.length();
88     if (line.endsWith('\n')) {
89         --count;
90     }
91     if (line.isEmpty()) {
92         *eof = true;
93         return QVector<QByteArray>();
94     }
95     *eof = false;
96     int i = 0;
97     int start = 0;
98     int fields = 0;
99     QVector<QByteArray> result(info->fieldNames.isEmpty() ? 10 : info->fieldNames.count());
100     for (; i < count; ++i) {
101         if (line[i] == '\t') {
102             if (fields >= result.size()) {
103                 result.resize(result.size() * 2);
104             }
105             result[fields] = line.mid(start, i - start);
106             ++fields;
107             start = i + 1;
108         }
109     }
110     result[fields] = line.mid(start, i - start); // last value
111     result.resize(fields + 1);
112     return result;
113 }
114 
drv_copyTable(const QString & srcTable,KDbConnection * destConn,KDbTableSchema * dstTable,const RecordFilter * recordFilter)115 bool TsvMigrate::drv_copyTable(const QString& srcTable, KDbConnection *destConn,
116                                KDbTableSchema* dstTable,
117                                const RecordFilter *recordFilter)
118 {
119     Q_UNUSED(srcTable)
120     FileInfo info;
121     if (!openFile(&info)) {
122         return false;
123     }
124     Q_FOREVER {
125         bool eof;
126         QVector<QByteArray> line = readLine(&info, &eof);
127         if (eof) {
128             break;
129         }
130         QList<QVariant> vals;
131         for(int i = 0; i < line.count(); ++i) {
132             vals.append(line.at(i));
133         }
134         if (recordFilter && !(*recordFilter)(vals)) {
135             continue;
136         }
137         if (!destConn->insertRecord(dstTable, vals)) {
138             return false;
139         }
140     }
141     return true;
142 }
143 
drv_readTableSchema(const QString & originalName,KDbTableSchema * tableSchema)144 bool TsvMigrate::drv_readTableSchema(const QString& originalName, KDbTableSchema *tableSchema)
145 {
146     Q_UNUSED(originalName)
147     FileInfo info;
148     if (!openFile(&info)) {
149         return false;
150     }
151     for (const QString &name : info.fieldNames) {
152         KDbField *f = new KDbField(name, KDbField::Text);
153         if (!tableSchema->addField(f)) {
154             delete f;
155             tableSchema->clear();
156             return false;
157         }
158     }
159     return true;
160 }
161 
162 class TsvRecord : public KDbSqlRecord
163 {
164 public:
TsvRecord(const QVector<QByteArray> & values,const FileInfo & m_info)165     inline explicit TsvRecord(const QVector<QByteArray> &values, const FileInfo &m_info)
166         : m_values(values), m_info(&m_info)
167     {
168     }
169 
stringValue(int index)170     inline QString stringValue(int index) Q_DECL_OVERRIDE {
171         return m_info->codec->toUnicode(m_values.value(index));
172     }
173 
toByteArray(int index)174     inline QByteArray toByteArray(int index) Q_DECL_OVERRIDE {
175         return m_values.value(index);
176     }
177 
cstringValue(int index)178     inline KDbSqlString cstringValue(int index) Q_DECL_OVERRIDE {
179         return KDbSqlString(m_values[index].constData(), m_values[index].length());
180     }
181 
182 private:
183     const QVector<QByteArray> m_values;
184     const FileInfo *m_info;
185 };
186 
187 class TsvResult : public KDbSqlResult
188 {
189 public:
TsvResult(FileInfo * info)190     inline explicit TsvResult(FileInfo *info) : m_info(info), m_eof(false) {
191         Q_ASSERT(info);
192     }
193 
fieldsCount()194     inline int fieldsCount() Q_DECL_OVERRIDE {
195         return m_info->fieldNames.count();
196     }
197 
198     //! Not needed for ImportTableWizard
field(int index)199     inline KDbSqlField *field(int index) Q_DECL_OVERRIDE {
200         Q_UNUSED(index);
201         return nullptr;
202     }
203 
204     //! Not needed for ImportTableWizard
createField(const QString & tableName,int index)205     inline KDbField* createField(const QString &tableName, int index) Q_DECL_OVERRIDE {
206         Q_UNUSED(tableName);
207         Q_UNUSED(index);
208         return nullptr;
209     }
210 
fetchRecord()211     inline QSharedPointer<KDbSqlRecord> fetchRecord() Q_DECL_OVERRIDE {
212         QSharedPointer<KDbSqlRecord> sqlRecord;
213         QVector<QByteArray> record = readLine(m_info, &m_eof);
214         if (!m_eof) {
215             sqlRecord.reset(new TsvRecord(record, *m_info));
216         }
217         return sqlRecord;
218     }
219 
lastResult()220     inline KDbResult lastResult() Q_DECL_OVERRIDE {
221         return KDbResult();
222     }
223 
~TsvResult()224     inline ~TsvResult() {
225         delete m_info;
226     }
227 
228 private:
229     FileInfo *m_info;
230     bool m_eof;
231 };
232 
drv_readFromTable(const QString & tableName)233 QSharedPointer<KDbSqlResult> TsvMigrate::drv_readFromTable(const QString &tableName)
234 {
235     Q_UNUSED(tableName)
236     QSharedPointer<KDbSqlResult> sqlResult;
237     QScopedPointer<FileInfo> info(new FileInfo);
238     if (openFile(info.data())) {
239         sqlResult.reset(new TsvResult(info.take()));
240     }
241     return sqlResult;
242 }
243 
openFile(FileInfo * info)244 bool TsvMigrate::openFile(FileInfo *info)
245 {
246     info->file.setFileName(data()->source->databaseName());
247     if (!info->file.open(QIODevice::ReadOnly | QIODevice::Text)) {
248         return false;
249     }
250     {
251         const QByteArray sample(info->file.read(MAX_SAMPLE_TEXT_SIZE));
252         info->codec = QTextCodec::codecForUtfText(sample);
253     }
254 
255     if (!info->file.seek(0)) {
256         info->codec = 0;
257         info->file.close();
258         return false;
259     }
260     bool eof;
261     QVector<QByteArray> record = readLine(info, &eof);
262     info->fieldNames.resize(record.count());
263     for (int i = 0; i < record.count(); ++i) {
264         info->fieldNames[i] = info->codec->toUnicode(record[i]);
265     }
266     return !eof;
267 }
268 
269 #include "TsvMigrate.moc"
270