1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "ExportTasks.h"
23
24 #include <QFileInfo>
25
26 #include <U2Core/AddDocumentTask.h>
27 #include <U2Core/AppContext.h>
28 #include <U2Core/Counter.h>
29 #include <U2Core/DNAChromatogramObject.h>
30 #include <U2Core/DNASequenceObject.h>
31 #include <U2Core/DNASequenceUtils.h>
32 #include <U2Core/DNATranslation.h>
33 #include <U2Core/DNATranslationImpl.h>
34 #include <U2Core/DocumentModel.h>
35 #include <U2Core/GObjectRelationRoles.h>
36 #include <U2Core/IOAdapter.h>
37 #include <U2Core/IOAdapterUtils.h>
38 #include <U2Core/LoadDocumentTask.h>
39 #include <U2Core/MSAUtils.h>
40 #include <U2Core/MultipleSequenceAlignmentImporter.h>
41 #include <U2Core/MultipleSequenceAlignmentObject.h>
42 #include <U2Core/ProjectModel.h>
43 #include <U2Core/TextUtils.h>
44 #include <U2Core/U2SafePoints.h>
45 #include <U2Core/U2SequenceUtils.h>
46
47 #include <U2Formats/SCFFormat.h>
48
49 namespace U2 {
50
51 //////////////////////////////////////////////////////////////////////////
52 // DNAExportAlignmentTask
ExportAlignmentTask(const MultipleSequenceAlignment & _ma,const QString & _url,const DocumentFormatId & _documentFormatId)53 ExportAlignmentTask::ExportAlignmentTask(const MultipleSequenceAlignment &_ma, const QString &_url, const DocumentFormatId &_documentFormatId)
54 : DocumentProviderTask(tr("Export alignment to %1").arg(_url), TaskFlag_None), ma(_ma->getCopy()), url(_url), documentFormatId(_documentFormatId) {
55 GCOUNTER(cvar, "ExportAlignmentTask");
56 documentDescription = QFileInfo(url).fileName();
57 setVerboseLogMode(true);
58 CHECK_EXT(!ma->isEmpty(), setError(tr("Nothing to export: multiple alignment is empty")), );
59 }
60
run()61 void ExportAlignmentTask::run() {
62 DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(documentFormatId);
63 SAFE_POINT(format != nullptr, L10N::nullPointerError("sequence document format"), );
64 IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(url));
65 SAFE_POINT(iof != nullptr, L10N::nullPointerError("I/O adapter factory"), );
66 QScopedPointer<Document> exportedDocument(format->createNewLoadedDocument(iof, url, stateInfo));
67 CHECK_OP(stateInfo, );
68
69 MultipleSequenceAlignmentObject *obj = MultipleSequenceAlignmentImporter::createAlignment(exportedDocument->getDbiRef(), ma, stateInfo);
70 CHECK_OP(stateInfo, );
71
72 exportedDocument->addObject(obj);
73 format->storeDocument(exportedDocument.get(), stateInfo);
74 CHECK_OP(stateInfo, );
75 exportedDocument.reset(); // Release resources.
76
77 // Now reload the document.
78 // Reason: document format may have some limits and change the original data: trim sequence names or replace spaces with underscores.
79 resultDocument = format->loadDocument(iof, url, {}, stateInfo);
80 }
81
82 //////////////////////////////////////////////////////////////////////////
83 // export alignment 2 sequence format
84
ExportMSA2SequencesTask(const MultipleSequenceAlignment & _ma,const QString & _url,bool _trimLeadingAndTrailingGaps,const DocumentFormatId & _documentFormatId)85 ExportMSA2SequencesTask::ExportMSA2SequencesTask(const MultipleSequenceAlignment &_ma,
86 const QString &_url,
87 bool _trimLeadingAndTrailingGaps,
88 const DocumentFormatId &_documentFormatId)
89 : DocumentProviderTask(tr("Export alignment as sequence to %1").arg(_url), TaskFlag_None), ma(_ma->getCopy()), url(_url),
90 trimLeadingAndTrailingGaps(_trimLeadingAndTrailingGaps), documentFormatId(_documentFormatId) {
91 documentDescription = QFileInfo(url).fileName();
92 GCOUNTER(cvar, "ExportMSA2SequencesTask");
93 setVerboseLogMode(true);
94 }
95
run()96 void ExportMSA2SequencesTask::run() {
97 DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(documentFormatId);
98 SAFE_POINT(format != nullptr, L10N::nullPointerError("sequence document format"), );
99 IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(url));
100 SAFE_POINT(iof != nullptr, L10N::nullPointerError("I/O adapter factory"), );
101 QScopedPointer<Document> exportedDocument(format->createNewLoadedDocument(iof, url, stateInfo));
102 CHECK_OP(stateInfo, );
103 QList<DNASequence> sequenceList = MSAUtils::convertMsaToSequenceList(ma, stateInfo, trimLeadingAndTrailingGaps);
104 CHECK_OP(stateInfo, );
105 QSet<QString> usedNames;
106 for (DNASequence &sequence : sequenceList) {
107 QString name = sequence.getName();
108 if (usedNames.contains(name)) {
109 name = TextUtils::variate(name, " ", usedNames, false, 1);
110 sequence.setName(name);
111 }
112 U2EntityRef seqRef = U2SequenceUtils::import(stateInfo, exportedDocument->getDbiRef(), sequence);
113 CHECK_OP(stateInfo, );
114 exportedDocument->addObject(new U2SequenceObject(name, seqRef));
115 usedNames.insert(name);
116 }
117 format->storeDocument(exportedDocument.get(), stateInfo);
118 CHECK_OP(stateInfo, );
119 exportedDocument.reset(); // Release resources.
120
121 // Now reload the document.
122 // Reason: document format may have some limits and change the original data: trim sequence names or replace spaces with underscores.
123 resultDocument = format->loadDocument(iof, url, {}, stateInfo);
124 }
125
126 //////////////////////////////////////////////////////////////////////////
127 // export nucleic alignment 2 amino alignment
128
ExportMSA2MSATask(const MultipleSequenceAlignment & msa,const QList<qint64> & rowIds,const U2Region & columnRegion,const QString & _url,const DNATranslation * _aminoTranslation,const DocumentFormatId & _documentFormatId,bool _trimGaps,bool _convertUnknownToGap,bool _reverseComplement,int _translationFrame)129 ExportMSA2MSATask::ExportMSA2MSATask(const MultipleSequenceAlignment &msa,
130 const QList<qint64> &rowIds,
131 const U2Region &columnRegion,
132 const QString &_url,
133 const DNATranslation *_aminoTranslation,
134 const DocumentFormatId &_documentFormatId,
135 bool _trimGaps,
136 bool _convertUnknownToGap,
137 bool _reverseComplement,
138 int _translationFrame)
139 : DocumentProviderTask(tr("Export alignment as alignment to %1").arg(_url), TaskFlag_None),
140 url(_url), documentFormatId(_documentFormatId), aminoTranslation(_aminoTranslation), trimLeadingAndTrailingGaps(_trimGaps),
141 convertUnknownToGap(_convertUnknownToGap), reverseComplement(_reverseComplement), translationFrame(_translationFrame) {
142 GCOUNTER(cvar, "ExportMSA2MSATask");
143 documentDescription = QFileInfo(url).fileName();
144
145 CHECK_EXT(!msa->isEmpty(), setError(tr("Nothing to export: multiple alignment is empty")), );
146
147 SAFE_POINT_EXT(translationFrame >= 0 && translationFrame <= 2, setError(tr("Illegal translation frame offset: %1").arg(translationFrame)), );
148 SAFE_POINT_EXT(aminoTranslation == nullptr || aminoTranslation->isThree2One(), setError(tr("Invalid amino translation: %1").arg(aminoTranslation->getTranslationName())), );
149 setVerboseLogMode(true);
150
151 sequenceList = MSAUtils::convertMsaToSequenceList(msa, stateInfo, trimLeadingAndTrailingGaps, rowIds.toSet(), columnRegion);
152 CHECK_OP(stateInfo, )
153 }
154
run()155 void ExportMSA2MSATask::run() {
156 DocumentFormat *format = AppContext::getDocumentFormatRegistry()->getFormatById(documentFormatId);
157 SAFE_POINT(format != nullptr, L10N::nullPointerError("sequence document format"), );
158 IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(url));
159 SAFE_POINT(iof != nullptr, L10N::nullPointerError("I/O adapter factory"), );
160 QScopedPointer<Document> exportedDocument(format->createNewLoadedDocument(iof, url, stateInfo));
161 CHECK_OP(stateInfo, );
162
163 QList<DNASequence> resultSequenceList;
164 for (const DNASequence &originalSequence : sequenceList) {
165 DNASequence sequence = reverseComplement ? DNASequenceUtils::reverseComplement(originalSequence) : originalSequence;
166 sequence.seq = sequence.seq.right(sequence.seq.length() - translationFrame);
167 QString name = sequence.getName();
168 if (aminoTranslation != nullptr) {
169 name += "(translated)";
170
171 const QByteArray &seq = sequence.seq;
172 int aminoSequenceLength = seq.length() / 3;
173 QByteArray resultData(aminoSequenceLength, '\0');
174 CHECK_EXT(resultData.size() == aminoSequenceLength, L10N::outOfMemory(), );
175 aminoTranslation->translate(seq.constData(), seq.length(), resultData.data(), resultData.length());
176
177 if (!trimLeadingAndTrailingGaps && convertUnknownToGap) {
178 resultData.replace("X", "-");
179 }
180 resultData.replace("*", "X");
181 DNASequence resultSequence(name, resultData, aminoTranslation->getDstAlphabet());
182 resultSequenceList << resultSequence;
183 } else {
184 resultSequenceList << sequence;
185 }
186 }
187 MultipleSequenceAlignment aminoMa = MSAUtils::seq2ma(resultSequenceList, stateInfo);
188 CHECK_OP(stateInfo, );
189
190 MultipleSequenceAlignmentObject *obj = MultipleSequenceAlignmentImporter::createAlignment(exportedDocument->getDbiRef(), aminoMa, stateInfo);
191 CHECK_OP(stateInfo, );
192
193 exportedDocument->addObject(obj);
194 format->storeDocument(exportedDocument.get(), stateInfo);
195 CHECK_OP(stateInfo, );
196
197 // Now reload the document.
198 // Reason: document format may have some limits and change the original data: trim sequence names or replace spaces with underscores.
199 resultDocument = format->loadDocument(iof, url, {}, stateInfo);
200 }
201
202 //////////////////////////////////////////////////////////////////////////
203 // export chromatogram to SCF
204
ExportDNAChromatogramTask(DNAChromatogramObject * _obj,const ExportChromatogramTaskSettings & _settings)205 ExportDNAChromatogramTask::ExportDNAChromatogramTask(DNAChromatogramObject *_obj, const ExportChromatogramTaskSettings &_settings)
206 : DocumentProviderTask(tr("Export chromatogram to SCF"), TaskFlags_NR_FOSCOE), chromaObject(_obj), settings(_settings), loadTask(nullptr) {
207 GCOUNTER(cvar, "ExportDNAChromatogramTask");
208 setVerboseLogMode(true);
209 }
210
prepare()211 void ExportDNAChromatogramTask::prepare() {
212 Document *d = chromaObject->getDocument();
213 SAFE_POINT_EXT(d != nullptr, setError(L10N::internalError("Chromatogram object has no associated document")), );
214
215 QList<GObjectRelation> relatedObjs = chromaObject->findRelatedObjectsByRole(ObjectRole_Sequence);
216 SAFE_POINT_EXT(relatedObjs.count() == 1, setError("Sequence related to chromatogram is not found!"), );
217
218 QString seqObjName = relatedObjs.first().ref.objName;
219
220 GObject *resObj = d->findGObjectByName(seqObjName);
221 auto sObj = qobject_cast<U2SequenceObject *>(resObj);
222 SAFE_POINT_EXT(sObj != nullptr, setError(L10N::nullPointerError("sequence object is null")), );
223
224 DNAChromatogram cd = chromaObject->getChromatogram();
225 QByteArray seq = sObj->getWholeSequenceData(stateInfo);
226 CHECK_OP(stateInfo, );
227
228 if (settings.reverse) {
229 TextUtils::reverse(seq.data(), seq.length());
230 reverseVector(cd.A);
231 reverseVector(cd.C);
232 reverseVector(cd.G);
233 reverseVector(cd.T);
234 int offset = 0;
235 if (chromaObject->getDocument()->getDocumentFormatId() == BaseDocumentFormats::ABIF) {
236 int baseNum = cd.baseCalls.count();
237 int seqLen = cd.seqLength;
238 // this is required for base <-> peak correspondence
239 if (baseNum > seqLen) {
240 cd.baseCalls.remove(baseNum - 1);
241 cd.prob_A.remove(baseNum - 1);
242 cd.prob_C.remove(baseNum - 1);
243 cd.prob_G.remove(baseNum - 1);
244 cd.prob_T.remove(baseNum - 1);
245 }
246 } else if (chromaObject->getDocument()->getDocumentFormatId() == BaseDocumentFormats::SCF) {
247 // SCF format particularities
248 offset = -1;
249 }
250
251 for (int i = 0; i < cd.seqLength; ++i) {
252 cd.baseCalls[i] = cd.traceLength - cd.baseCalls[i] + offset;
253 }
254 reverseVector(cd.baseCalls);
255 reverseVector(cd.prob_A);
256 reverseVector(cd.prob_C);
257 reverseVector(cd.prob_G);
258 reverseVector(cd.prob_T);
259 }
260
261 if (settings.complement) {
262 DNATranslation *tr = AppContext::getDNATranslationRegistry()->lookupTranslation(BaseDNATranslationIds::NUCL_DNA_DEFAULT_COMPLEMENT);
263 tr->translate(seq.data(), seq.length());
264 qSwap(cd.A, cd.T);
265 qSwap(cd.C, cd.G);
266 qSwap(cd.prob_A, cd.prob_T);
267 qSwap(cd.prob_C, cd.prob_G);
268 }
269
270 SCFFormat::exportDocumentToSCF(settings.url, cd, seq, stateInfo);
271 CHECK_OP(stateInfo, );
272
273 if (settings.loadDocument) {
274 IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
275 loadTask = new LoadDocumentTask(BaseDocumentFormats::SCF, settings.url, iof);
276 addSubTask(loadTask);
277 }
278 }
onSubTaskFinished(Task * subTask)279 QList<Task *> ExportDNAChromatogramTask::onSubTaskFinished(Task *subTask) {
280 if (subTask == loadTask) {
281 resultDocument = loadTask->takeDocument();
282 }
283 return {};
284 }
285
286 } // namespace U2
287