1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_IMPORT_ANNOTATIONS_FROM_CSV_TASK_H_
23 #define _U2_IMPORT_ANNOTATIONS_FROM_CSV_TASK_H_
24 
25 #include <QMap>
26 #include <QPointer>
27 
28 #include <U2Core/AnnotationData.h>
29 #include <U2Core/Task.h>
30 
31 #include "CSVColumnConfiguration.h"
32 
33 namespace U2 {
34 
35 // FIXME: implement splitToken as a default value for parsing script (i.e line.split(<separator>))
36 class CSVParsingConfig {
37 public:
CSVParsingConfig()38     CSVParsingConfig()
39         : defaultAnnotationName("misc_feature"), linesToSkip(0), keepEmptyParts(true), removeQuotes(true) {
40     }
41     QString defaultAnnotationName;
42     QString splitToken;
43     int linesToSkip;
44     QString prefixToSkip;
45     bool keepEmptyParts;
46     QList<ColumnConfig> columns;
47     QString parsingScript;
48     bool removeQuotes;
49 
50     static QBitArray QUOTES;
51 };
52 
53 class ImportAnnotationsFromCSVTaskConfig {
54 public:
ImportAnnotationsFromCSVTaskConfig()55     ImportAnnotationsFromCSVTaskConfig()
56         : addToProject(true) {
57     }
58 
59     QString csvFile;
60     QString dstFile;
61     bool addToProject;
62     DocumentFormatId formatId;
63 
64     CSVParsingConfig parsingOptions;
65 };
66 
67 class ReadCSVAsAnnotationsTask;
68 class SaveDocumentTask;
69 class AddDocumentTask;
70 class Annotation;
71 class Document;
72 
73 class ImportAnnotationsFromCSVTask : public Task {
74     Q_OBJECT
75 public:
76     ImportAnnotationsFromCSVTask(ImportAnnotationsFromCSVTaskConfig &config);
77 
78     QList<Task *> onSubTaskFinished(Task *subTask);
79 
80 private:
81     QMap<QString, QList<SharedAnnotationData>> prepareAnnotations() const;
82 
83     Document *prepareNewDocument(const QMap<QString, QList<SharedAnnotationData>> &annotations);
84 
85     ImportAnnotationsFromCSVTaskConfig config;
86     ReadCSVAsAnnotationsTask *readTask;
87     SaveDocumentTask *writeTask;
88     AddDocumentTask *addTask;
89     QPointer<Document> doc;
90 };
91 
92 class ReadCSVAsAnnotationsTask : public Task {
93     Q_OBJECT
94 public:
95     ReadCSVAsAnnotationsTask(const QString &file, const CSVParsingConfig &config);
96 
97     void run();
98 
getResult()99     QMap<QString, QList<SharedAnnotationData>> getResult() const {
100         return result;
101     }
102 
103     static QList<QStringList> parseLinesIntoTokens(const QString &text, const CSVParsingConfig &config, int &maxColumns, TaskStateInfo &ti);
104 
105     static QStringList parseLineIntoTokens(const QString &line, const CSVParsingConfig &config, TaskStateInfo &ti, int lineNum = 1);
106 
107     static QString guessSeparatorString(const QString &text, const CSVParsingConfig &config);
108 
109     // script variable that holds line value
110     static QString LINE_VAR;
111     // script variable that holds parsed line numbers. Lines that skipped to not increment this value
112     static QString LINE_NUM_VAR;
113 
114 private:
115     QString file;
116     CSVParsingConfig config;
117     // Group name <-> annotations
118     QMap<QString, QList<SharedAnnotationData>> result;
119 };
120 
121 }  // namespace U2
122 
123 #endif
124