1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "DnaAssemblyUtils.h"
23 
24 #include <QAction>
25 #include <QDir>
26 #include <QMessageBox>
27 
28 #include <U2Algorithm/DnaAssemblyAlgRegistry.h>
29 #include <U2Algorithm/DnaAssemblyMultiTask.h>
30 #include <U2Algorithm/GenomeAssemblyMultiTask.h>
31 #include <U2Algorithm/GenomeAssemblyRegistry.h>
32 
33 #include <U2Core/AddDocumentTask.h>
34 #include <U2Core/AppContext.h>
35 #include <U2Core/AppSettings.h>
36 #include <U2Core/BaseDocumentFormats.h>
37 #include <U2Core/DNASequenceObject.h>
38 #include <U2Core/DocumentModel.h>
39 #include <U2Core/DocumentUtils.h>
40 #include <U2Core/GObjectSelection.h>
41 #include <U2Core/GUrlUtils.h>
42 #include <U2Core/L10n.h>
43 #include <U2Core/MultiTask.h>
44 #include <U2Core/ProjectModel.h>
45 #include <U2Core/QObjectScopedPointer.h>
46 #include <U2Core/U2OpStatusUtils.h>
47 #include <U2Core/U2SafePoints.h>
48 #include <U2Core/UserApplicationsSettings.h>
49 
50 #include <U2Formats/ConvertAssemblyToSamTask.h>
51 #include <U2Formats/ConvertFileTask.h>
52 #include <U2Formats/FastqFormat.h>
53 #include <U2Formats/PairedFastqComparator.h>
54 
55 #include <U2Gui/OpenViewTask.h>
56 #include <U2Gui/ToolsMenu.h>
57 
58 #include "BuildIndexDialog.h"
59 #include "ConvertAssemblyToSamDialog.h"
60 #include "DnaAssemblyDialog.h"
61 #include "GenomeAssemblyDialog.h"
62 
63 namespace U2 {
64 
DnaAssemblySupport()65 DnaAssemblySupport::DnaAssemblySupport() {
66     QAction *convertAssemblyToSamAction = new QAction(tr("Convert UGENE assembly database to SAM..."), this);
67     convertAssemblyToSamAction->setObjectName(ToolsMenu::NGS_CONVERT_SAM);
68     convertAssemblyToSamAction->setIcon(QIcon(":core/images/align.png"));
69     connect(convertAssemblyToSamAction, SIGNAL(triggered()), SLOT(sl_showConvertToSamDialog()));
70     ToolsMenu::addAction(ToolsMenu::NGS_MENU, convertAssemblyToSamAction);
71 
72     QAction *dnaAssemblyAction = new QAction(tr("Map reads to reference..."), this);
73     dnaAssemblyAction->setObjectName(ToolsMenu::NGS_MAP);
74     dnaAssemblyAction->setIcon(QIcon(":core/images/align.png"));
75     connect(dnaAssemblyAction, SIGNAL(triggered()), SLOT(sl_showDnaAssemblyDialog()));
76     ToolsMenu::addAction(ToolsMenu::NGS_MENU, dnaAssemblyAction);
77 
78     QAction *buildIndexAction = new QAction(tr("Build index for reads mapping..."), this);
79     buildIndexAction->setObjectName(ToolsMenu::NGS_INDEX);
80     buildIndexAction->setIcon(QIcon(":core/images/align.png"));
81     connect(buildIndexAction, SIGNAL(triggered()), SLOT(sl_showBuildIndexDialog()));
82     ToolsMenu::addAction(ToolsMenu::NGS_MENU, buildIndexAction);
83 }
84 
sl_showDnaAssemblyDialog()85 void DnaAssemblySupport::sl_showDnaAssemblyDialog() {
86     DnaAssemblyAlgRegistry *registry = AppContext::getDnaAssemblyAlgRegistry();
87     if (registry->getRegisteredAlgorithmIds().isEmpty()) {
88         QMessageBox::information(QApplication::activeWindow(), tr("DNA Assembly"), tr("There are no algorithms for DNA assembly available.\nPlease, check your plugin list."));
89         return;
90     }
91 
92     QObjectScopedPointer<DnaAssemblyDialog> dlg = new DnaAssemblyDialog(QApplication::activeWindow());
93     dlg->exec();
94     CHECK(!dlg.isNull(), );
95 
96     if (QDialog::Accepted == dlg->result()) {
97         DnaAssemblyToRefTaskSettings s;
98         s.samOutput = dlg->isSamOutput();
99         s.refSeqUrl = dlg->getRefSeqUrl();
100         s.algName = dlg->getAlgorithmName();
101         s.resultFileName = dlg->getResultFileName();
102         s.setCustomSettings(dlg->getCustomSettings());
103         s.shortReadSets = dlg->getShortReadSets();
104         s.pairedReads = dlg->isPaired();
105         s.openView = true;
106         s.prebuiltIndex = dlg->isPrebuiltIndex();
107         Task *assemblyTask = new DnaAssemblyTaskWithConversions(s, true);
108         AppContext::getTaskScheduler()->registerTopLevelTask(assemblyTask);
109     }
110 }
111 
sl_showGenomeAssemblyDialog()112 void DnaAssemblySupport::sl_showGenomeAssemblyDialog() {
113     GenomeAssemblyAlgRegistry *registry = AppContext::getGenomeAssemblyAlgRegistry();
114     if (registry->getRegisteredAlgorithmIds().isEmpty()) {
115         QMessageBox::information(QApplication::activeWindow(), tr("Genome Assembly"), tr("There are no algorithms for genome assembly available.\nPlease, check external tools in the settings."));
116         return;
117     }
118 
119     QObjectScopedPointer<GenomeAssemblyDialog> dlg = new GenomeAssemblyDialog(QApplication::activeWindow());
120     dlg->exec();
121     CHECK(!dlg.isNull(), );
122 
123     if (QDialog::Accepted == dlg->result()) {
124         GenomeAssemblyTaskSettings s;
125         s.algName = dlg->getAlgorithmName();
126         s.outDir = dlg->getOutDir();
127         s.setCustomSettings(dlg->getCustomSettings());
128         s.reads = dlg->getReads();
129         s.openView = true;
130         Task *assemblyTask = new GenomeAssemblyMultiTask(s);
131         AppContext::getTaskScheduler()->registerTopLevelTask(assemblyTask);
132     }
133 }
134 
sl_showBuildIndexDialog()135 void DnaAssemblySupport::sl_showBuildIndexDialog() {
136     DnaAssemblyAlgRegistry *registry = AppContext::getDnaAssemblyAlgRegistry();
137     if (registry->getRegisteredAlgorithmIds().isEmpty()) {
138         QMessageBox::information(QApplication::activeWindow(), tr("DNA Assembly"), tr("There are no algorithms for DNA assembly available.\nPlease, check your plugin list."));
139         return;
140     }
141 
142     QObjectScopedPointer<BuildIndexDialog> dlg = new BuildIndexDialog(registry, QApplication::activeWindow());
143     dlg->exec();
144     CHECK(!dlg.isNull(), );
145 
146     if (QDialog::Accepted == dlg->result()) {
147         DnaAssemblyToRefTaskSettings s;
148         s.refSeqUrl = dlg->getRefSeqUrl();
149         s.algName = dlg->getAlgorithmName();
150         s.resultFileName = dlg->getIndexFileName();
151         s.indexFileName = dlg->getIndexFileName();
152         s.setCustomSettings(dlg->getCustomSettings());
153         s.openView = false;
154         s.prebuiltIndex = false;
155         s.pairedReads = false;
156         Task *assemblyTask = new DnaAssemblyTaskWithConversions(s, false, true);
157         AppContext::getTaskScheduler()->registerTopLevelTask(assemblyTask);
158     }
159 }
160 
sl_showConvertToSamDialog()161 void DnaAssemblySupport::sl_showConvertToSamDialog() {
162     QObjectScopedPointer<ConvertAssemblyToSamDialog> dlg = new ConvertAssemblyToSamDialog(QApplication::activeWindow());
163     dlg->exec();
164     CHECK(!dlg.isNull(), );
165 
166     if (QDialog::Accepted == dlg->result()) {
167         Task *convertTask = new ConvertAssemblyToSamTask(dlg->getDbFileUrl(), dlg->getSamFileUrl());
168         AppContext::getTaskScheduler()->registerTopLevelTask(convertTask);
169     }
170 }
171 
172 namespace {
173 enum Result {
174     UNKNOWN,
175     CORRECT,
176     INCORRECT
177 };
178 
isCorrectFormat(const GUrl & url,const QStringList & targetFormats,QString & detectedFormat)179 static Result isCorrectFormat(const GUrl &url, const QStringList &targetFormats, QString &detectedFormat) {
180     DocumentUtils::Detection r = DocumentUtils::detectFormat(url, detectedFormat);
181     CHECK(DocumentUtils::UNKNOWN != r, UNKNOWN);
182 
183     bool correct = targetFormats.contains(detectedFormat);
184     if (correct) {
185         return CORRECT;
186     }
187     return INCORRECT;
188 }
189 
getConvertTask(const GUrl & url,const QStringList & targetFormats)190 ConvertFileTask *getConvertTask(const GUrl &url, const QStringList &targetFormats) {
191     QString detectedFormat;
192     Result r = isCorrectFormat(url, targetFormats, detectedFormat);
193     if (UNKNOWN == r) {
194         coreLog.info("Unknown file format: " + url.getURLString());
195         return nullptr;
196     }
197 
198     if (INCORRECT == r) {
199         QDir dir = QFileInfo(url.getURLString()).absoluteDir();
200         return new DefaultConvertFileTask(url, detectedFormat, targetFormats.first(), dir.absolutePath());
201     }
202     return nullptr;
203 }
204 }  // namespace
205 
206 #define CHECK_FILE(url, targetFormats) \
207     QString format; \
208     Result r = isCorrectFormat(url, targetFormats, format); \
209     if (UNKNOWN == r) { \
210         unknownFormatFiles << url; \
211     } else if (INCORRECT == r) { \
212         result[url.getURLString()] = format; \
213     }
214 
215 #define PREPARE_FILE(url, targetFormats) \
216     if (!toConvert.contains(url.getURLString())) { \
217         ConvertFileTask *task = getConvertTask(url, targetFormats); \
218         if (nullptr != task) { \
219             addSubTask(task); \
220             conversionTasksCount++; \
221             toConvert << url.getURLString(); \
222         } \
223     }
224 
toConvert(const DnaAssemblyToRefTaskSettings & settings,QList<GUrl> & unknownFormatFiles)225 QMap<QString, QString> DnaAssemblySupport::toConvert(const DnaAssemblyToRefTaskSettings &settings, QList<GUrl> &unknownFormatFiles) {
226     QMap<QString, QString> result;
227     DnaAssemblyAlgorithmEnv *env = AppContext::getDnaAssemblyAlgRegistry()->getAlgorithm(settings.algName);
228     SAFE_POINT(nullptr != env, "Unknown algorithm: " + settings.algName, result);
229 
230     foreach (const GUrl &url, settings.getShortReadUrls()) {
231         CHECK_FILE(url, env->getReadsFormats());
232     }
233 
234     if (!settings.prebuiltIndex) {
235         CHECK_FILE(settings.refSeqUrl, env->getRefrerenceFormats());
236     }
237     return result;
238 }
239 
toConvertText(const QMap<QString,QString> & files)240 QString DnaAssemblySupport::toConvertText(const QMap<QString, QString> &files) {
241     QStringList strings;
242     foreach (const QString &url, files.keys()) {
243         QString format = files[url];
244         strings << url + " [" + format + "]";
245     }
246     return strings.join("\n");
247 }
248 
unknownText(const QList<GUrl> & unknownFormatFiles)249 QString DnaAssemblySupport::unknownText(const QList<GUrl> &unknownFormatFiles) {
250     QStringList strings;
251     foreach (const GUrl &url, unknownFormatFiles) {
252         strings << url.getURLString();
253     }
254     return strings.join("\n");
255 }
256 
257 /************************************************************************/
258 /* FilterUnpairedReads */
259 /************************************************************************/
FilterUnpairedReadsTask(const DnaAssemblyToRefTaskSettings & settings)260 FilterUnpairedReadsTask::FilterUnpairedReadsTask(const DnaAssemblyToRefTaskSettings &settings)
261     : Task(tr("Filter unpaired reads task"), TaskFlags_FOSE_COSC),
262       settings(settings) {
263     tmpDirPath = settings.tmpDirectoryForFilteredFiles.isEmpty() ? AppContext::getAppSettings()->getUserAppsSettings()->getCurrentProcessTemporaryDirPath() : settings.tmpDirectoryForFilteredFiles;
264 }
265 
run()266 void FilterUnpairedReadsTask::run() {
267     SAFE_POINT_EXT(settings.pairedReads,
268                    setError(tr("Filtering unpaired reads is launched on not-paired data")), );
269 
270     QList<ShortReadSet> upstream;
271     QList<ShortReadSet> downstream;
272     foreach (const ShortReadSet &set, settings.shortReadSets) {
273         if (set.order == ShortReadSet::UpstreamMate) {
274             upstream << set;
275         } else {
276             downstream << set;
277         }
278     }
279     SAFE_POINT_EXT(upstream.size() == downstream.size(), setError(tr("The count of upstream files is not equal to the count of downstream files")), );
280 
281     for (int i = 0; i < upstream.size(); i++) {
282         QString tmpFileUpstream = getTmpFilePath(upstream[i].url);
283         CHECK_OP(stateInfo, );
284         QString tmpFileDownstream = getTmpFilePath(downstream[i].url);
285         CHECK_OP(stateInfo, );
286 
287         filteredReads << ShortReadSet(GUrl(tmpFileUpstream), ShortReadSet::PairedEndReads, ShortReadSet::UpstreamMate);
288         filteredReads << ShortReadSet(GUrl(tmpFileDownstream), ShortReadSet::PairedEndReads, ShortReadSet::DownstreamMate);
289 
290         compareFiles(upstream[i].url.getURLString(), downstream[i].url.getURLString(), tmpFileUpstream, tmpFileDownstream);
291         CHECK_OP(stateInfo, );
292     }
293 }
294 
getTmpFilePath(const GUrl & initialFile)295 QString FilterUnpairedReadsTask::getTmpFilePath(const GUrl &initialFile) {
296     QString result = GUrlUtils::prepareTmpFileLocation(tmpDirPath, initialFile.baseFileName(), "fastq", stateInfo);
297     CHECK_OP(stateInfo, QString());
298     return result;
299 }
300 
compareFiles(const GUrl & upstream,const GUrl & downstream,const GUrl & upstreamFiltered,const GUrl & downstreamFiltered)301 void FilterUnpairedReadsTask::compareFiles(const GUrl &upstream, const GUrl &downstream, const GUrl &upstreamFiltered, const GUrl &downstreamFiltered) {
302     PairedFastqComparator comparator(upstream.getURLString(), downstream.getURLString(), upstreamFiltered.getURLString(), downstreamFiltered.getURLString(), stateInfo);
303     CHECK_OP(stateInfo, );
304     comparator.compare(stateInfo);
305     CHECK_OP(stateInfo, );
306 
307     if (comparator.getUnpairedCount() != 0) {
308         stateInfo.addWarning(tr("%1 read pairs were mapped, %2 reads without a pair from files \"%3\" and \"%4\" were skipped.")
309                                  .arg(comparator.getPairsCount())
310                                  .arg(comparator.getUnpairedCount())
311                                  .arg(QFileInfo(upstream.getURLString()).fileName())
312                                  .arg(QFileInfo(downstream.getURLString()).fileName()));
313     }
314 }
315 
316 /************************************************************************/
317 /* DnaAssemblyTaskWithConversions */
318 /************************************************************************/
DnaAssemblyTaskWithConversions(const DnaAssemblyToRefTaskSettings & settings,bool viewResult,bool justBuildIndex)319 DnaAssemblyTaskWithConversions::DnaAssemblyTaskWithConversions(const DnaAssemblyToRefTaskSettings &settings, bool viewResult, bool justBuildIndex)
320     : ExternalToolSupportTask("Dna assembly task", TaskFlags(TaskFlags_NR_FOSCOE | TaskFlag_CollectChildrenWarnings)), settings(settings), viewResult(viewResult),
321       justBuildIndex(justBuildIndex), conversionTasksCount(0), assemblyTask(nullptr) {
322 }
323 
getSettings() const324 const DnaAssemblyToRefTaskSettings &DnaAssemblyTaskWithConversions::getSettings() const {
325     return settings;
326 }
prepare()327 void DnaAssemblyTaskWithConversions::prepare() {
328     DnaAssemblyAlgorithmEnv *env = AppContext::getDnaAssemblyAlgRegistry()->getAlgorithm(settings.algName);
329     if (env == nullptr) {
330         setError(QString("Algorithm %1 is not found").arg(settings.algName));
331         return;
332     }
333 
334     QSet<QString> toConvert;
335     Q_UNUSED(toConvert);
336     foreach (const GUrl &url, settings.getShortReadUrls()) {
337         PREPARE_FILE(url, env->getReadsFormats());
338     }
339 
340     if (!settings.prebuiltIndex) {
341         PREPARE_FILE(settings.refSeqUrl, env->getRefrerenceFormats());
342     }
343 
344     if (0 == conversionTasksCount) {
345         if (settings.filterUnpaired && settings.pairedReads) {
346             addSubTask(new FilterUnpairedReadsTask(settings));
347             return;
348         }
349         assemblyTask = new DnaAssemblyMultiTask(settings, viewResult, justBuildIndex);
350         assemblyTask->addListeners(getListeners());
351         addSubTask(assemblyTask);
352     }
353 }
354 
onSubTaskFinished(Task * subTask)355 QList<Task *> DnaAssemblyTaskWithConversions::onSubTaskFinished(Task *subTask) {
356     QList<Task *> result;
357     FilterUnpairedReadsTask *filterTask = qobject_cast<FilterUnpairedReadsTask *>(subTask);
358     if (filterTask != nullptr) {
359         settings.shortReadSets = filterTask->getFilteredReadList();
360     }
361     CHECK(!subTask->hasError(), result);
362     CHECK(!hasError(), result);
363 
364     ConvertFileTask *convertTask = qobject_cast<ConvertFileTask *>(subTask);
365     if (nullptr != convertTask) {
366         SAFE_POINT_EXT(conversionTasksCount > 0, setError("Conversions task count error"), result);
367         if (convertTask->getSourceURL() == settings.refSeqUrl) {
368             settings.refSeqUrl = convertTask->getResult();
369         }
370 
371         for (QList<ShortReadSet>::Iterator i = settings.shortReadSets.begin(); i != settings.shortReadSets.end(); i++) {
372             if (convertTask->getSourceURL() == i->url) {
373                 i->url = convertTask->getResult();
374             }
375         }
376         conversionTasksCount--;
377 
378         if (0 == conversionTasksCount) {
379             if (settings.filterUnpaired && settings.pairedReads) {
380                 result << new FilterUnpairedReadsTask(settings);
381                 return result;
382             }
383             assemblyTask = new DnaAssemblyMultiTask(settings, viewResult, justBuildIndex);
384             result << assemblyTask;
385         }
386     }
387     if (settings.filterUnpaired && filterTask != nullptr) {
388         assemblyTask = new DnaAssemblyMultiTask(settings, viewResult, justBuildIndex);
389         result << assemblyTask;
390     }
391 
392     return result;
393 }
394 
report()395 Task::ReportResult DnaAssemblyTaskWithConversions::report() {
396     if (settings.filterUnpaired && settings.pairedReads) {
397         foreach (const ShortReadSet &set, settings.shortReadSets) {
398             if (!QFile::remove(set.url.getURLString())) {
399                 stateInfo.addWarning(tr("Cannot remove temporary file %1").arg(set.url.getURLString()));
400             }
401         }
402     }
403     return ReportResult_Finished;
404 }
405 
406 }  // namespace U2
407