1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "ExportAnnotations2CSVTask.h"
23 
24 #include <QScopedPointer>
25 
26 #include <U2Core/AnnotationTableObject.h>
27 #include <U2Core/AppContext.h>
28 #include <U2Core/Counter.h>
29 #include <U2Core/DNATranslation.h>
30 #include <U2Core/IOAdapter.h>
31 #include <U2Core/IOAdapterUtils.h>
32 #include <U2Core/L10n.h>
33 #include <U2Core/TextUtils.h>
34 #include <U2Core/U2SafePoints.h>
35 
36 namespace U2 {
37 
38 const QString ExportAnnotations2CSVTask::SEQUENCE_NAME = "sequence_name";
39 
ExportAnnotations2CSVTask(const QList<Annotation * > & annotations,const QByteArray & sequence,const QString & _seqName,const DNATranslation * complementTranslation,bool exportSequence,bool _exportSeqName,const QString & url,bool apnd,const QString & sep)40 ExportAnnotations2CSVTask::ExportAnnotations2CSVTask(const QList<Annotation *> &annotations, const QByteArray &sequence, const QString &_seqName, const DNATranslation *complementTranslation, bool exportSequence, bool _exportSeqName, const QString &url, bool apnd, const QString &sep)
41     : Task(tr("Export annotations to CSV format"), TaskFlag_None), annotations(annotations), sequence(sequence), seqName(_seqName),
42       complementTranslation(complementTranslation), exportSequence(exportSequence), exportSequenceName(_exportSeqName), url(url),
43       append(apnd), separator(sep) {
44     GCOUNTER(cvar, "ExportAnnotattions2CSVTask");
45 }
46 
writeCSVLine(const QStringList & container,IOAdapter * ioAdapter,const QString & separator,U2OpStatus & os)47 static void writeCSVLine(const QStringList &container, IOAdapter *ioAdapter, const QString &separator, U2OpStatus &os) {
48     bool first = true;
49     foreach (const QString &value, container) {
50         if (!first) {
51             if (0 == ioAdapter->writeBlock(separator.toLatin1())) {
52                 os.setError(L10N::errorWritingFile(ioAdapter->getURL()));
53                 return;
54             }
55         }
56         QString preparedStr = value;
57         preparedStr.replace("\"", "\"\"");
58         preparedStr = "\"" + preparedStr + "\"";
59 
60         if (0 == ioAdapter->writeBlock(preparedStr.toLocal8Bit())) {
61             os.setError(L10N::errorWritingFile(ioAdapter->getURL()));
62             return;
63         }
64         first = false;
65     }
66     if (0 == ioAdapter->writeBlock("\n")) {
67         os.setError(L10N::errorWritingFile(ioAdapter->getURL()));
68         return;
69     }
70 }
71 
run()72 void ExportAnnotations2CSVTask::run() {
73     QScopedPointer<IOAdapter> ioAdapter;
74 
75     IOAdapterId ioAdapterId = IOAdapterUtils::url2io(url);
76     IOAdapterRegistry *ioRegistry = AppContext::getIOAdapterRegistry();
77     CHECK_EXT(nullptr != ioRegistry,
78               stateInfo.setError(tr("Invalid I/O environment!").arg(url)), );
79     IOAdapterFactory *ioAdapterFactory = ioRegistry->getIOAdapterFactoryById(ioAdapterId);
80     CHECK_EXT(nullptr != ioAdapterFactory,
81               stateInfo.setError(tr("No IO adapter found for URL: %1").arg(url)), );
82     ioAdapter.reset(ioAdapterFactory->createIOAdapter());
83 
84     if (!ioAdapter->open(url, append ? IOAdapterMode_Append : IOAdapterMode_Write)) {
85         stateInfo.setError(L10N::errorOpeningFileWrite(url));
86         return;
87     }
88 
89     QHash<QString, int> columnIndices;
90     QStringList columnNames;
91     columnNames << tr("Group") << tr("Name") << tr("Start") << tr("End") << tr("Length") << tr("Complementary");
92     if (exportSequenceName) {
93         columnNames << tr("Sequence name");
94     }
95     if (exportSequence) {
96         columnNames << tr("Sequence");
97     }
98 
99     bool hasSequenceNameQualifier = false;
100     foreach (Annotation *annotation, annotations) {
101         foreach (const U2Qualifier &qualifier, annotation->getQualifiers()) {
102             const QString &qName = qualifier.name;
103             if (qName == SEQUENCE_NAME) {
104                 hasSequenceNameQualifier = true;
105                 continue;
106             }
107             if (!columnIndices.contains(qName)) {
108                 columnIndices.insert(qName, columnNames.size());
109                 columnNames.append(qName);
110             }
111         }
112     }
113     writeCSVLine(columnNames, ioAdapter.data(), separator, stateInfo);
114     CHECK_OP(stateInfo, );
115 
116     bool noComplementarySequence = false;
117     foreach (Annotation *annotation, annotations) {
118         foreach (const U2Region &region, annotation->getRegions()) {
119             QStringList values;
120             values << annotation->getGroup()->getGroupPath();
121             values << annotation->getName();
122             values << QString::number(region.startPos + 1);
123             values << QString::number(region.startPos + region.length);
124             values << QString::number(region.length);
125 
126             const bool isComplementary = annotation->getStrand().isCompementary();
127             values << ((isComplementary) ? tr("yes") : tr("no"));
128 
129             if (exportSequenceName) {
130                 if (!seqName.isEmpty()) {
131                     values << seqName.toLatin1();
132                 } else if (hasSequenceNameQualifier) {
133                     foreach (const U2Qualifier &qf, annotation->getQualifiers()) {
134                         if (qf.name == SEQUENCE_NAME) {
135                             values << qf.value;
136                         }
137                     }
138                 }
139             }
140             if (exportSequence) {
141                 QByteArray sequencePart = sequence.mid(region.startPos, region.length);
142                 if (isComplementary) {
143                     if (complementTranslation != nullptr) {
144                         complementTranslation->translate(sequencePart.data(), sequencePart.size());
145                         TextUtils::reverse(sequencePart.data(), sequencePart.size());
146                     } else {
147                         noComplementarySequence = true;
148                         sequencePart.clear();
149                     }
150                 }
151                 values << sequencePart;
152             }
153 
154             // add empty strings as default qualifier values
155             while (values.size() < columnNames.size()) {
156                 values << QString();
157             }
158 
159             foreach (const U2Qualifier &qualifier, annotation->getQualifiers()) {
160                 if (qualifier.name == SEQUENCE_NAME) {
161                     continue;
162                 }
163 
164                 int qualifiedIndex = columnIndices[qualifier.name];
165                 SAFE_POINT(qualifiedIndex > 0 && qualifiedIndex < values.length(), "Invalid qualifier index", );
166                 values[qualifiedIndex] = qualifier.value;
167             }
168             writeCSVLine(values, ioAdapter.data(), separator, stateInfo);
169             CHECK_OP(stateInfo, );
170         }
171     }
172     if (noComplementarySequence) {
173         taskLog.error(tr("Attaching a sequence to an annotation was ignored. The annotation is on the complementary strand. Can not generate a complementary sequence for a non-nucleic alphabet."));
174     }
175 }
176 
177 }  // namespace U2
178