1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "GenomeAlignerWorker.h"
23
24 #include <U2Algorithm/OpenCLGpuRegistry.h>
25
26 #include <U2Core/AppContext.h>
27 #include <U2Core/FailTask.h>
28 #include <U2Core/GUrlUtils.h>
29 #include <U2Core/Log.h>
30
31 #include <U2Designer/DelegateEditors.h>
32
33 #include <U2Gui/DialogUtils.h>
34
35 #include <U2Lang/ActorPrototypeRegistry.h>
36 #include <U2Lang/BaseActorCategories.h>
37 #include <U2Lang/BaseAttributes.h>
38 #include <U2Lang/BasePorts.h>
39 #include <U2Lang/BaseSlots.h>
40 #include <U2Lang/BaseTypes.h>
41 #include <U2Lang/CoreLibConstants.h>
42 #include <U2Lang/IntegralBusModel.h>
43 #include <U2Lang/WorkflowEnv.h>
44 #include <U2Lang/WorkflowMonitor.h>
45
46 #include "GenomeAlignerPlugin.h"
47
48 namespace U2 {
49 namespace LocalWorkflow {
50
51 static const QString IN_TYPE_ID("Bowtie2-data");
52 static const QString OUT_TYPE_ID("Bowtie2-data-out");
53 static const QString IN_PORT_DESCR("in-data");
54 static const QString OUT_PORT_DESCR("out-data");
55 static const QString INDEX_PORT_ID("in-gen-al-index");
56 static const QString INDEX_OUT_PORT_ID("out-gen-al-index");
57 static const QString OUTPUT_DIR("output-dir");
58 static const QString OUTPUT_NAME = "outname";
59
60 static const QString READS_URL_SLOT_ID("readsurl");
61 static const QString READS_PAIRED_URL_SLOT_ID("readspairedurl");
62
63 static const QString ASSEBLY_OUT_SLOT_ID("assembly-out");
64
65 static const QString BASE_GENOME_ALIGNER_SUBDIR("genome_aligner");
66 static const QString BASE_GENOME_ALIGNER_OUTFILE("out.sam");
67
68 const QString GenomeAlignerWorkerFactory::ACTOR_ID("genome-aligner");
69
70 static const QString ABS_OR_PERC_MISMATCHES_ATTR("if-absolute-mismatches-value");
71 static const QString MISMATCHES_ATTR("absolute-mismatches");
72 static const QString PERCENT_MISMATCHES_ATTR("percentage-mismatches");
73 static const QString REVERSE_ATTR("reverse");
74 static const QString BEST_ATTR("best");
75 static const QString GPU_ATTR("gpu");
76 static const QString QUAL_ATTR("quality-threshold");
77 static const QString REFERENCE_GENOME("reference");
78 static const QString REFERENCE_INPUT_TYPE = "reference-input-type";
79 static const QString INDEX_DIR("index-dir");
80 static const QString INDEX_BASENAME("index-basename");
81
82 /************************************************************************/
83 /* Genome aligner worker */
84 /************************************************************************/
85
GenomeAlignerWorker(Actor * a)86 GenomeAlignerWorker::GenomeAlignerWorker(Actor *a)
87 : BaseWorker(a, false), inChannel(nullptr), output(nullptr) {
88 }
89
init()90 void GenomeAlignerWorker::init() {
91 inChannel = ports.value(IN_PORT_DESCR);
92 output = ports.value(OUT_PORT_DESCR);
93 }
94
tick()95 Task *GenomeAlignerWorker::tick() {
96 if (inChannel->hasMessage()) {
97 U2OpStatus2Log os;
98 if (inChannel->isEnded()) {
99 algoLog.error(GenomeAlignerWorker::tr("Short reads list is empty."));
100 return nullptr;
101 }
102 Message m = getMessageAndSetupScriptValues(inChannel);
103 QVariantMap data = m.getData().toMap();
104
105 // settings.indexFileName = getValue<QString>(REFERENCE_GENOME);
106 DnaAssemblyToRefTaskSettings settings = getSettings(os);
107 if (os.hasError()) {
108 return new FailTask(os.getError());
109 }
110 QString readsUrl = data[READS_URL_SLOT_ID].toString();
111 if (data.contains(READS_PAIRED_URL_SLOT_ID)) {
112 // paired
113 QString readsPairedUrl = data[READS_PAIRED_URL_SLOT_ID].toString();
114 settings.shortReadSets.append(ShortReadSet(readsUrl, ShortReadSet::PairedEndReads, ShortReadSet::UpstreamMate));
115 settings.shortReadSets.append(ShortReadSet(readsPairedUrl, ShortReadSet::PairedEndReads, ShortReadSet::DownstreamMate));
116 settings.pairedReads = true;
117 } else {
118 // single
119 settings.shortReadSets.append(ShortReadSet(readsUrl, ShortReadSet::SingleEndReads, ShortReadSet::UpstreamMate));
120 settings.pairedReads = false;
121 }
122
123 Task *t = new GenomeAlignerTask(settings);
124 connect(t, SIGNAL(si_stateChanged()), SLOT(sl_taskFinished()));
125 return t;
126 } else if (inChannel->isEnded()) {
127 setDone();
128 output->setEnded();
129 }
130 return nullptr;
131 }
132
cleanup()133 void GenomeAlignerWorker::cleanup() {
134 }
135
sl_taskFinished()136 void GenomeAlignerWorker::sl_taskFinished() {
137 GenomeAlignerTask *t = dynamic_cast<GenomeAlignerTask *>(sender());
138 if (!t->isFinished() || t->hasError() || t->isCanceled()) {
139 return;
140 }
141
142 QString url = t->getSettings().resultFileName.getURLString();
143
144 QVariantMap data;
145 data[ASSEBLY_OUT_SLOT_ID] = qVariantFromValue<QString>(url);
146 output->put(Message(output->getBusType(), data));
147
148 context->getMonitor()->addOutputFile(url, getActor()->getId());
149
150 if (inChannel->isEnded() && !inChannel->hasMessage()) {
151 setDone();
152 output->setEnded();
153 }
154 }
155
getSettings(U2OpStatus & os)156 DnaAssemblyToRefTaskSettings GenomeAlignerWorker::getSettings(U2OpStatus &os) {
157 DnaAssemblyToRefTaskSettings settings;
158
159 QString referenceInputType = getValue<QString>(REFERENCE_INPUT_TYPE);
160 if (referenceInputType == DnaAssemblyToRefTaskSettings::INDEX) {
161 settings.prebuiltIndex = true;
162 settings.indexDir = getValue<QString>(INDEX_DIR);
163 settings.indexBasename = getValue<QString>(INDEX_BASENAME);
164
165 QString baseUrl = QDir(settings.indexDir).filePath(settings.indexBasename);
166 settings.refSeqUrl = baseUrl;
167 settings.indexFileName = baseUrl;
168 } else {
169 settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
170 settings.prebuiltIndex = false;
171
172 settings.indexDir = "";
173 settings.indexBasename = "";
174 settings.indexFileName = QDir(settings.refSeqUrl.dirPath()).filePath(settings.refSeqUrl.baseFileName());
175 }
176
177 QString outDir = GUrlUtils::createDirectory(
178 getValue<QString>(OUTPUT_DIR) + QDir::separator() + BASE_GENOME_ALIGNER_SUBDIR,
179 "_",
180 os);
181 CHECK_OP(os, settings);
182
183 if (!outDir.endsWith(QDir::separator())) {
184 outDir = outDir + QDir::separator();
185 }
186
187 QString outFileName = getValue<QString>(OUTPUT_NAME);
188 if (outFileName.isEmpty()) {
189 outFileName = BASE_GENOME_ALIGNER_OUTFILE;
190 }
191 settings.resultFileName = outDir + outFileName;
192
193 bool absMismatches = actor->getParameter(ABS_OR_PERC_MISMATCHES_ATTR)->getAttributeValue<bool>(context);
194 settings.setCustomValue(GenomeAlignerTask::OPTION_IF_ABS_MISMATCHES, absMismatches);
195 int nMismatches = actor->getParameter(MISMATCHES_ATTR)->getAttributeValue<int>(context);
196 settings.setCustomValue(GenomeAlignerTask::OPTION_MISMATCHES, nMismatches);
197 int ptMismatches = actor->getParameter(PERCENT_MISMATCHES_ATTR)->getAttributeValue<int>(context);
198 settings.setCustomValue(GenomeAlignerTask::OPTION_PERCENTAGE_MISMATCHES, ptMismatches);
199 bool alignReverse = actor->getParameter(REVERSE_ATTR)->getAttributeValue<bool>(context);
200 settings.setCustomValue(GenomeAlignerTask::OPTION_ALIGN_REVERSED, alignReverse);
201 bool best = actor->getParameter(BEST_ATTR)->getAttributeValue<bool>(context);
202 settings.setCustomValue(GenomeAlignerTask::OPTION_BEST, best);
203 int qual = actor->getParameter(QUAL_ATTR)->getAttributeValue<int>(context);
204 settings.setCustomValue(GenomeAlignerTask::OPTION_QUAL_THRESHOLD, qual);
205 if (GenomeAlignerWorkerFactory::openclEnabled) {
206 bool gpu = actor->getParameter(GPU_ATTR)->getAttributeValue<bool>(context);
207 settings.setCustomValue(GenomeAlignerTask::OPTION_OPENCL, gpu);
208 }
209 return settings;
210 }
211
composeRichDoc()212 QString GenomeAlignerPrompter::composeRichDoc() {
213 QString res = "";
214
215 Actor *readsProducer = qobject_cast<IntegralBusPort *>(target->getPort(IN_PORT_DESCR))->getProducer(READS_URL_SLOT_ID);
216
217 QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
218 QString readsUrl = readsProducer ? readsProducer->getLabel() : unsetStr;
219
220 res.append(tr("Maps input reads from <u>%1</u> ").arg(readsUrl));
221
222 QVariant inputType = getParameter(REFERENCE_INPUT_TYPE);
223 if (inputType == DnaAssemblyToRefTaskSettings::INDEX) {
224 QString baseName = getHyperlink(INDEX_BASENAME, getURL(INDEX_BASENAME));
225 res.append(tr(" to reference sequence with index <u>%1</u>.").arg(baseName));
226 } else {
227 QString genome = getHyperlink(REFERENCE_GENOME, getURL(REFERENCE_GENOME));
228 res.append(tr(" to reference sequence <u>%1</u>.").arg(genome));
229 }
230
231 return res;
232 }
233
234 /************************************************************************/
235 /* Factory */
236 /************************************************************************/
237 bool GenomeAlignerWorkerFactory::openclEnabled(false);
238
239 class GenomeAlignerInputSlotsValidator : public PortValidator {
240 public:
validate(const IntegralBusPort * port,NotificationsList & notificationList) const241 bool validate(const IntegralBusPort *port, NotificationsList ¬ificationList) const {
242 QVariant busMap = port->getParameter(Workflow::IntegralBusPort::BUS_MAP_ATTR_ID)->getAttributePureValue();
243 bool data = isBinded(busMap.value<StrStrMap>(), READS_URL_SLOT_ID);
244 if (!data) {
245 QString dataName = slotName(port, READS_URL_SLOT_ID);
246 notificationList.append(WorkflowNotification(GenomeAlignerWorker::tr("The slot must be not empty: '%1'").arg(dataName)));
247 return false;
248 }
249
250 QString slot1Val = busMap.value<StrStrMap>().value(READS_URL_SLOT_ID);
251 QString slot2Val = busMap.value<StrStrMap>().value(READS_PAIRED_URL_SLOT_ID);
252 U2OpStatusImpl os;
253 const QList<IntegralBusSlot> &slots1 = IntegralBusSlot::listFromString(slot1Val, os);
254 const QList<IntegralBusSlot> &slots2 = IntegralBusSlot::listFromString(slot2Val, os);
255
256 bool hasCommonElements = false;
257
258 foreach (const IntegralBusSlot &ibsl1, slots1) {
259 if (hasCommonElements) {
260 break;
261 }
262 foreach (const IntegralBusSlot &ibsl2, slots2) {
263 if (ibsl1 == ibsl2) {
264 hasCommonElements = true;
265 break;
266 }
267 }
268 }
269
270 if (hasCommonElements) {
271 notificationList.append(WorkflowNotification(GenomeAlignerWorker::tr("Bowtie2 cannot recognize read pairs from the same file. Please, perform demultiplexing first.")));
272 return false;
273 }
274
275 return true;
276 }
277 };
278
init()279 void GenomeAlignerWorkerFactory::init() {
280 QList<PortDescriptor *> p;
281 // in port
282 QMap<Descriptor, DataTypePtr> inTypeMap;
283 Descriptor readsDesc(READS_URL_SLOT_ID,
284 GenomeAlignerWorker::tr("URL of a file with reads"),
285 GenomeAlignerWorker::tr("Input reads to be aligned."));
286 Descriptor readsPairedDesc(READS_PAIRED_URL_SLOT_ID,
287 GenomeAlignerWorker::tr("URL of a file with mate reads"),
288 GenomeAlignerWorker::tr("Input mate reads to be aligned."));
289
290 inTypeMap[readsDesc] = BaseTypes::STRING_TYPE();
291 inTypeMap[readsPairedDesc] = BaseTypes::STRING_TYPE();
292
293 Descriptor inPortDesc(IN_PORT_DESCR,
294 GenomeAlignerWorker::tr("Genome aligner data"),
295 GenomeAlignerWorker::tr("Input reads to be aligned with Bowtie2."));
296
297 DataTypePtr inTypeSet(new MapDataType(IN_TYPE_ID, inTypeMap));
298 p << new PortDescriptor(inPortDesc, inTypeSet, true);
299 // out port
300 QMap<Descriptor, DataTypePtr> outTypeMap;
301 Descriptor assemblyOutDesc(ASSEBLY_OUT_SLOT_ID,
302 GenomeAlignerWorker::tr("Assembly URL"),
303 GenomeAlignerWorker::tr("Output assembly URL."));
304
305 Descriptor outPortDesc(OUT_PORT_DESCR,
306 GenomeAlignerWorker::tr("Genome aligner output data"),
307 GenomeAlignerWorker::tr("Output assembly files."));
308
309 outTypeMap[assemblyOutDesc] = BaseTypes::STRING_TYPE();
310
311 DataTypePtr outTypeSet(new MapDataType(OUT_TYPE_ID, outTypeMap));
312 p << new PortDescriptor(outPortDesc, outTypeSet, false, true);
313
314 QList<Attribute *> attrs;
315 {
316 Descriptor outDir(OUTPUT_DIR,
317 GenomeAlignerWorker::tr("Output folder"),
318 GenomeAlignerWorker::tr("Folder to save UGENE genome aligner output files."));
319
320 Descriptor outName(OUTPUT_NAME,
321 GenomeAlignerWorker::tr("Output file name"),
322 GenomeAlignerWorker::tr("Base name of the output file. 'out.sam' by default"));
323
324 Descriptor referenceInputType(REFERENCE_INPUT_TYPE,
325 GenomeAlignerWorker::tr("Reference input type"),
326 GenomeAlignerWorker::tr("Select \"Sequence\" to input a reference genome as a sequence file. "
327 "<br/>Note that any sequence file format, supported by UGENE, is allowed (FASTA, GenBank, etc.). "
328 "<br/>The index will be generated automatically in this case. "
329 "<br/>Select \"Index\" to input already generated index files, specific for the tool."));
330
331 Descriptor refGenome(REFERENCE_GENOME,
332 GenomeAlignerWorker::tr("Reference genome"),
333 GenomeAlignerWorker::tr("Path to indexed reference genome."));
334
335 Descriptor indexDir(INDEX_DIR,
336 GenomeAlignerWorker::tr("Genome Aligner index folder"),
337 GenomeAlignerWorker::tr("The folder with the index for the reference sequence."));
338
339 Descriptor indexBasename(INDEX_BASENAME,
340 GenomeAlignerWorker::tr("Genome Aligner index basename"),
341 GenomeAlignerWorker::tr("The basename of the index for the reference sequence."));
342
343 Descriptor absMismatches(ABS_OR_PERC_MISMATCHES_ATTR,
344 GenomeAlignerWorker::tr("Is absolute mismatches values?"),
345 GenomeAlignerWorker::tr("<html><body><p><b>true</b> - absolute mismatches mode is used</p><p><b>false</b> - percentage mismatches mode is used</p>\
346 You can choose absolute or percentage mismatches values mode.</body></html>"));
347 Descriptor mismatches(MISMATCHES_ATTR,
348 GenomeAlignerWorker::tr("Absolute mismatches"),
349 GenomeAlignerWorker::tr("<html><body>Number of mismatches allowed while aligning reads.</body></html>"));
350 Descriptor ptMismatches(PERCENT_MISMATCHES_ATTR,
351 GenomeAlignerWorker::tr("Percentage mismatches"),
352 GenomeAlignerWorker::tr("<html><body>Percentage of mismatches allowed while aligning reads.</body></html>"));
353 Descriptor reverse(REVERSE_ATTR,
354 GenomeAlignerWorker::tr("Align reverse complement reads"),
355 GenomeAlignerWorker::tr("<html><body>Set this option to align both direct and reverse complement reads.</body></html>"));
356 Descriptor best(BEST_ATTR,
357 GenomeAlignerWorker::tr("Use \"best\"-mode"),
358 GenomeAlignerWorker::tr("<html><body>Report only the best alignment for each read (in terms of mismatches).</body></html>"));
359 Descriptor qual(QUAL_ATTR,
360 GenomeAlignerWorker::tr("Omit reads with qualities lower than"),
361 GenomeAlignerWorker::tr("<html><body>Omit reads with qualities lower than the specified value. Reads that have no qualities are not omited.\
362 <p>Set <b>\"0\"</b> to switch off this option.</p></body></html>"));
363
364 #ifdef OPENCL_SUPPORT
365 openclEnabled = AppContext::getOpenCLGpuRegistry()->getEnabledGpu() != nullptr;
366 #endif
367 if (openclEnabled) {
368 Descriptor gpu(GPU_ATTR, GenomeAlignerWorker::tr("Use GPU-optimization"), GenomeAlignerWorker::tr("<html><body>Use GPU-calculatings while aligning reads. This option requires OpenCL-enable GPU-device.</body></html>"));
369
370 attrs << new Attribute(gpu, BaseTypes::BOOL_TYPE(), false /*required*/, false);
371 }
372
373 attrs << new Attribute(referenceInputType, BaseTypes::STRING_TYPE(), true, QVariant(DnaAssemblyToRefTaskSettings::SEQUENCE));
374 Attribute *attrRefGenom = new Attribute(refGenome, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, QVariant(""));
375 attrRefGenom->addRelation(new VisibilityRelation(REFERENCE_INPUT_TYPE, DnaAssemblyToRefTaskSettings::SEQUENCE));
376 attrs << attrRefGenom;
377 Attribute *attrIndexDir = new Attribute(indexDir, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, QVariant(""));
378 attrIndexDir->addRelation(new VisibilityRelation(REFERENCE_INPUT_TYPE, DnaAssemblyToRefTaskSettings::INDEX));
379 attrs << attrIndexDir;
380 Attribute *attrIndexBasename = new Attribute(indexBasename, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, QVariant(""));
381 attrIndexBasename->addRelation(new VisibilityRelation(REFERENCE_INPUT_TYPE, DnaAssemblyToRefTaskSettings::INDEX));
382 attrs << attrIndexBasename;
383
384 attrs << new Attribute(outDir, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, QVariant(""));
385 attrs << new Attribute(outName, BaseTypes::STRING_TYPE(), Attribute::Required | Attribute::NeedValidateEncoding, QVariant(BASE_GENOME_ALIGNER_OUTFILE));
386 attrs << new Attribute(absMismatches, BaseTypes::BOOL_TYPE(), true /*required*/, true);
387 Attribute *mismatchesAttr = new Attribute(mismatches, BaseTypes::NUM_TYPE(), false, 0);
388 mismatchesAttr->addRelation(new VisibilityRelation(ABS_OR_PERC_MISMATCHES_ATTR, QVariant(true)));
389 attrs << mismatchesAttr;
390 Attribute *ptMismatchesAttr = new Attribute(ptMismatches, BaseTypes::NUM_TYPE(), false, 0);
391 ptMismatchesAttr->addRelation(new VisibilityRelation(ABS_OR_PERC_MISMATCHES_ATTR, QVariant(false)));
392 attrs << ptMismatchesAttr;
393 attrs << new Attribute(reverse, BaseTypes::BOOL_TYPE(), false /*required*/, false);
394 attrs << new Attribute(best, BaseTypes::BOOL_TYPE(), false /*required*/, true);
395 attrs << new Attribute(qual, BaseTypes::NUM_TYPE(), false /*required*/, 0);
396 }
397
398 Descriptor desc(ACTOR_ID, GenomeAlignerWorker::tr("Map Reads with UGENE Genome Aligner"), GenomeAlignerWorker::tr("Genome Aligner is a program for mapping short DNA sequence reads"
399 " to a long reference sequence, developed by the UGENE team."
400 "<br/><br/>Provide URL(s) to FASTA or FASTQ file(s) with NGS reads to the input"
401 " port of the element, set up the reference sequence in the parameters."
402 " The result is saved to the specified SAM file, URL to the file is passed"
403 " to the output port."));
404 ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, attrs);
405
406 QMap<QString, PropertyDelegate *> delegates;
407
408 {
409 QVariantMap rip;
410 rip[GenomeAlignerWorker::tr("Sequence")] = DnaAssemblyToRefTaskSettings::SEQUENCE;
411 rip[GenomeAlignerWorker::tr("Index")] = DnaAssemblyToRefTaskSettings::INDEX;
412 delegates[REFERENCE_INPUT_TYPE] = new ComboBoxDelegate(rip);
413
414 delegates[REFERENCE_GENOME] = new URLDelegate("", "", false, false, false);
415 delegates[INDEX_DIR] = new URLDelegate("", "", false, true, false, nullptr, "", true);
416
417 delegates[OUTPUT_DIR] = new URLDelegate("", "", false, true);
418 delegates[ABS_OR_PERC_MISMATCHES_ATTR] = new ComboBoxWithBoolsDelegate();
419
420 QVariantMap m;
421 m["minimum"] = 0;
422 m["maximum"] = 3;
423 delegates[MISMATCHES_ATTR] = new SpinBoxDelegate(m);
424
425 QVariantMap ptM;
426 ptM["minimum"] = 0;
427 ptM["maximum"] = 10;
428 delegates[PERCENT_MISMATCHES_ATTR] = new SpinBoxDelegate(ptM);
429
430 QVariantMap q;
431 q["minimum"] = 0;
432 q["maximum"] = 70;
433 delegates[QUAL_ATTR] = new SpinBoxDelegate(q);
434 }
435
436 proto->setEditor(new DelegateEditor(delegates));
437 proto->setPrompter(new GenomeAlignerPrompter());
438 proto->setPortValidator(IN_PORT_DESCR, new GenomeAlignerInputSlotsValidator());
439 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_MAP_ASSEMBLE_READS(), proto);
440
441 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
442 localDomain->registerEntry(new GenomeAlignerWorkerFactory());
443 }
444 } // namespace LocalWorkflow
445 } // namespace U2
446