1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "BedToolsWorkersLibrary.h"
23
24 #include <QTextStream>
25
26 #include <U2Core/AnnotationTableObject.h>
27 #include <U2Core/AppContext.h>
28 #include <U2Core/Counter.h>
29 #include <U2Core/DataPathRegistry.h>
30 #include <U2Core/FileAndDirectoryUtils.h>
31 #include <U2Core/GUrlUtils.h>
32 #include <U2Core/TaskSignalMapper.h>
33 #include <U2Core/U2OpStatusUtils.h>
34 #include <U2Core/U2SafePoints.h>
35
36 #include <U2Designer/DelegateEditors.h>
37
38 #include <U2Lang/ActorPrototypeRegistry.h>
39 #include <U2Lang/BaseActorCategories.h>
40 #include <U2Lang/BaseAttributes.h>
41 #include <U2Lang/BasePorts.h>
42 #include <U2Lang/BaseSlots.h>
43 #include <U2Lang/BaseTypes.h>
44 #include <U2Lang/DbiDataStorage.h>
45 #include <U2Lang/IntegralBusModel.h>
46 #include <U2Lang/WorkflowEnv.h>
47 #include <U2Lang/WorkflowMonitor.h>
48
49 #include "BedtoolsIntersectTask.h"
50 #include "BedtoolsSupport.h"
51
52 namespace U2 {
53 namespace LocalWorkflow {
54
55 ///////////////////////////////////////////////////////////////
56 //Slopbed
57 const QString SlopbedWorkerFactory::ACTOR_ID("slopbed");
58 static const QString GENOME_ID("genome-id");
59 static const QString B_ID("b-id");
60 static const QString L_ID("l-id");
61 static const QString R_ID("r-id");
62 static const QString S_ID("s-id");
63 static const QString PCT_ID("pct-id");
64 static const QString HEADER_ID("header-id");
65 static const QString FILTER_ID("filter-id");
66
67 /************************************************************************/
68 /* SlopbedPrompter */
69 /************************************************************************/
composeRichDoc()70 QString SlopbedPrompter::composeRichDoc() {
71 IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(BaseNGSWorker::INPUT_PORT));
72 const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
73 QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
74 QString producerName = tr(" from <u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);
75
76 QString doc = tr("Increases the size of each feature in files from %1 with bedtool slop.").arg(producerName);
77 return doc;
78 }
79
80 /************************************************************************/
81 /* SlopbedWorkerFactory */
82 /************************************************************************/
init()83 void SlopbedWorkerFactory::init() {
84 //init data path
85 U2DataPath *dataPath = nullptr;
86 U2DataPathRegistry *dpr = AppContext::getDataPathRegistry();
87 if (dpr) {
88 U2DataPath *dp = dpr->getDataPathByName(BedtoolsSupport::GENOMES_DATA_NAME);
89 if (dp && dp->isValid()) {
90 dataPath = dp;
91 }
92 }
93 Descriptor desc(ACTOR_ID, SlopbedWorker::tr("slopBed"), SlopbedWorker::tr("Increases the size of each feature in files using bedtools slop."));
94
95 QList<PortDescriptor *> p;
96 {
97 Descriptor inD(BaseNGSWorker::INPUT_PORT, SlopbedWorker::tr("Input File"), SlopbedWorker::tr("Set of files to bedtools slop"));
98 Descriptor outD(BaseNGSWorker::OUTPUT_PORT, SlopbedWorker::tr("Output File"), SlopbedWorker::tr("Output file"));
99
100 QMap<Descriptor, DataTypePtr> inM;
101 inM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
102 p << new PortDescriptor(inD, DataTypePtr(new MapDataType("sb.input-url", inM)), true);
103
104 QMap<Descriptor, DataTypePtr> outM;
105 outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
106 p << new PortDescriptor(outD, DataTypePtr(new MapDataType("sb.output-url", outM)), false, true);
107 }
108
109 QList<Attribute *> a;
110 {
111 Descriptor outDir(BaseNGSWorker::OUT_MODE_ID, SlopbedWorker::tr("Output folder"), SlopbedWorker::tr("Select an output folder. <b>Custom</b> - specify the output folder in the 'Custom folder' parameter. "
112 "<b>Workflow</b> - internal workflow folder. "
113 "<b>Input file</b> - the folder of the input file."));
114
115 Descriptor customDir(BaseNGSWorker::CUSTOM_DIR_ID, SlopbedWorker::tr("Custom folder"), SlopbedWorker::tr("Select the custom output folder."));
116
117 Descriptor outName(BaseNGSWorker::OUT_NAME_ID, SlopbedWorker::tr("Output file name"), SlopbedWorker::tr("A name of an output file. If default of empty value is provided the output name is the name of the first file with additional extension."));
118
119 Descriptor genomeAttrDesc(GENOME_ID, SlopbedWorker::tr("Genome"), SlopbedWorker::tr("In order to prevent the extension of intervals beyond chromosome boundaries, bedtools slop requires a genome file defining the length of each chromosome or contig. The format of the file is: <chromName><TAB><chromSize> (-g)."));
120
121 Descriptor bAttr(B_ID, SlopbedWorker::tr("Each direction increase"), SlopbedWorker::tr("Increase the BED/GFF/VCF entry by the same number base pairs in each direction. If this parameter is used -l and -l are ignored. Enter 0 to disable. (-b)"));
122
123 Descriptor lAttr(L_ID, SlopbedWorker::tr("Subtract from start"), SlopbedWorker::tr("The number of base pairs to subtract from the start coordinate. Enter 0 to disable. (-l)"));
124
125 Descriptor rAttr(R_ID, SlopbedWorker::tr("Add to end"), SlopbedWorker::tr("The number of base pairs to add to the end coordinate. Enter 0 to disable. (-r)"));
126
127 Descriptor sAttr(S_ID, SlopbedWorker::tr("Strand-based"), SlopbedWorker::tr("Define -l and -r based on strand. For example. if used, -l 500 for a negative-stranded feature, it will add 500 bp to the end coordinate. (-s)"));
128
129 Descriptor pctAttr(PCT_ID, SlopbedWorker::tr("As fraction"), SlopbedWorker::tr("Define -l and -r as a fraction of the feature’s length. E.g. if used on a 1000bp feature, -l 0.50, will add 500 bp “upstream”. (-pct)"));
130
131 Descriptor headerAttr(HEADER_ID, SlopbedWorker::tr("Print header"), SlopbedWorker::tr("Print the header from the input file prior to results. (-header)"));
132
133 Descriptor filterAttr(FILTER_ID, SlopbedWorker::tr("Filter start>end fields"), SlopbedWorker::tr("Remove lines with start position greater than end position"));
134
135 a << new Attribute(outDir, BaseTypes::NUM_TYPE(), false, QVariant(FileAndDirectoryUtils::WORKFLOW_INTERNAL));
136 Attribute *customDirAttr = new Attribute(customDir, BaseTypes::STRING_TYPE(), false, QVariant(""));
137 customDirAttr->addRelation(new VisibilityRelation(BaseNGSWorker::OUT_MODE_ID, FileAndDirectoryUtils::CUSTOM));
138 a << customDirAttr;
139 a << new Attribute(outName, BaseTypes::STRING_TYPE(), false, QVariant(BaseNGSWorker::DEFAULT_NAME));
140
141 Attribute *genomeAttr = nullptr;
142 if (dataPath) {
143 const QList<QString> &dataNames = dataPath->getDataNames();
144 if (!dataNames.isEmpty()) {
145 genomeAttr = new Attribute(genomeAttrDesc, BaseTypes::STRING_TYPE(), true, dataPath->getPathByName(dataNames.first()));
146 } else {
147 genomeAttr = new Attribute(genomeAttrDesc, BaseTypes::STRING_TYPE(), true);
148 }
149 } else {
150 genomeAttr = new Attribute(genomeAttrDesc, BaseTypes::STRING_TYPE(), true);
151 }
152 a << genomeAttr;
153
154 a << new Attribute(bAttr, BaseTypes::NUM_TYPE(), false, QVariant(0));
155 a << new Attribute(lAttr, BaseTypes::NUM_TYPE(), false, QVariant(0));
156 a << new Attribute(rAttr, BaseTypes::NUM_TYPE(), false, QVariant(0));
157 a << new Attribute(sAttr, BaseTypes::BOOL_TYPE(), false, QVariant(false));
158 a << new Attribute(pctAttr, BaseTypes::BOOL_TYPE(), false, QVariant(false));
159 a << new Attribute(headerAttr, BaseTypes::BOOL_TYPE(), false, QVariant(false));
160 a << new Attribute(filterAttr, BaseTypes::BOOL_TYPE(), false, QVariant(false));
161 }
162
163 QMap<QString, PropertyDelegate *> delegates;
164 {
165 QVariantMap directoryMap;
166 QString fileDir = SlopbedWorker::tr("Input file");
167 QString workflowDir = SlopbedWorker::tr("Workflow");
168 QString customD = SlopbedWorker::tr("Custom");
169 directoryMap[fileDir] = FileAndDirectoryUtils::FILE_DIRECTORY;
170 directoryMap[workflowDir] = FileAndDirectoryUtils::WORKFLOW_INTERNAL;
171 directoryMap[customD] = FileAndDirectoryUtils::CUSTOM;
172 delegates[BaseNGSWorker::OUT_MODE_ID] = new ComboBoxDelegate(directoryMap);
173
174 delegates[BaseNGSWorker::CUSTOM_DIR_ID] = new URLDelegate("", "", false, true);
175
176 QVariantMap vm;
177 if (dataPath) {
178 vm = dataPath->getDataItemsVariantMap();
179 }
180 delegates[GENOME_ID] = new ComboBoxWithUrlsDelegate(vm);
181
182 QVariantMap lenMap;
183 lenMap["minimum"] = QVariant(0);
184 lenMap["maximum"] = QVariant(INT_MAX);
185 delegates[B_ID] = new SpinBoxDelegate(lenMap);
186 delegates[L_ID] = new SpinBoxDelegate(lenMap);
187 delegates[R_ID] = new SpinBoxDelegate(lenMap);
188 }
189
190 ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
191 proto->setEditor(new DelegateEditor(delegates));
192 proto->setPrompter(new SlopbedPrompter());
193 proto->addExternalTool(BedtoolsSupport::ET_BEDTOOLS_ID);
194
195 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_BASIC(), proto);
196 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
197 localDomain->registerEntry(new SlopbedWorkerFactory());
198 }
199
200 /************************************************************************/
201 /* SlopbedWorker */
202 /************************************************************************/
SlopbedWorker(Actor * a)203 SlopbedWorker::SlopbedWorker(Actor *a)
204 : BaseNGSWorker(a) {
205 }
206
getCustomParameters() const207 QVariantMap SlopbedWorker::getCustomParameters() const {
208 QVariantMap res;
209 const QString genomePath = getValue<QString>(GENOME_ID);
210 if (!genomePath.isEmpty()) {
211 res["-g"] = genomePath;
212 }
213 const int b = getValue<int>(B_ID);
214 if (b != 0) {
215 res["-b"] = b;
216 } else {
217 const int l = getValue<int>(L_ID);
218 const int r = getValue<int>(R_ID);
219 res["-l"] = l;
220 res["-r"] = r;
221 }
222 const bool s = getValue<bool>(S_ID);
223 if (s) {
224 res["-s"] = "";
225 }
226 const int pct = getValue<bool>(PCT_ID);
227 if (pct) {
228 res["-pct"] = "";
229 }
230 const bool header = getValue<bool>(HEADER_ID);
231 if (header) {
232 res["-header"] = "";
233 }
234 const bool filter = getValue<bool>(FILTER_ID);
235 if (filter) {
236 res["-filter"] = "";
237 }
238 return res;
239 }
240
getDefaultFileName() const241 QString SlopbedWorker::getDefaultFileName() const {
242 return ".sb.bed";
243 }
244
getTask(const BaseNGSSetting & settings) const245 Task *SlopbedWorker::getTask(const BaseNGSSetting &settings) const {
246 return new SlopbedTask(settings);
247 }
248
249 //////////////////////////////////////////////////////
250 //SlopbedTask
SlopbedTask(const BaseNGSSetting & settings)251 SlopbedTask::SlopbedTask(const BaseNGSSetting &settings)
252 : BaseNGSTask(settings), filterLines(false) {
253 GCOUNTER(cvar, "NGS:SlopBedTask");
254 }
255
prepareStep()256 void SlopbedTask::prepareStep() {
257 Task *etTask = getExternalToolTask(BedtoolsSupport::ET_BEDTOOLS_ID);
258 CHECK(etTask != nullptr, );
259
260 addSubTask(etTask);
261 }
262
finishStep()263 void SlopbedTask::finishStep() {
264 if (filterLines) {
265 QFile f(getResult());
266 if (f.open(QIODevice::ReadWrite | QIODevice::Text)) {
267 QString s;
268 QTextStream t(&f);
269 while (!t.atEnd()) {
270 QString line = t.readLine();
271 QStringList fields = line.split('\t');
272 if (fields.length() >= 3) {
273 bool parsed = true;
274 qint64 start = fields.at(1).toInt(&parsed);
275 if (parsed) {
276 qint64 end = fields.at(2).toInt(&parsed);
277 if (parsed) {
278 if (start > end) {
279 continue;
280 }
281 }
282 }
283 }
284 s.append(line + "\n");
285 }
286 f.resize(0);
287 t << s;
288 f.close();
289 }
290 }
291 }
292
getParameters(U2OpStatus & os)293 QStringList SlopbedTask::getParameters(U2OpStatus &os) {
294 QStringList res;
295 res << "slop";
296 res << "-i";
297 res << settings.inputUrl;
298
299 if (!settings.customParameters.contains("-g")) {
300 os.setError("No genome file");
301 return res;
302 } else {
303 res << "-g";
304 res << settings.customParameters["-g"].toString();
305 }
306
307 if (!(settings.customParameters.contains("-b") || (settings.customParameters.contains("-r") && settings.customParameters.contains("-l")))) {
308 os.setError("Need -l and -r together or -b alone.");
309 return res;
310 }
311
312 if (settings.customParameters.contains("-b")) {
313 res << "-b";
314 res << settings.customParameters["-b"].toString();
315 } else {
316 if (settings.customParameters.contains("-r")) {
317 res << "-r";
318 res << settings.customParameters["-r"].toString();
319 }
320
321 if (settings.customParameters.contains("-l")) {
322 res << "-l";
323 res << settings.customParameters["-l"].toString();
324 }
325 }
326
327 if (settings.customParameters.contains("-s")) {
328 res << "-s";
329 }
330
331 if (settings.customParameters.contains("-pct")) {
332 res << "-pct";
333 }
334
335 if (settings.customParameters.contains("-header")) {
336 res << "-header";
337 }
338
339 if (settings.customParameters.contains("-filter")) {
340 filterLines = true;
341 }
342
343 return res;
344 }
345
346 ///////////////////////////////////////////////////////////////
347 //Genomecov
348 const QString GenomecovWorkerFactory::ACTOR_ID("genomecov");
349
350 static const QString MODE_ID("mode-id");
351 static const QString SPLIT_ID("split-id");
352 static const QString STRAND_ID("strand-id");
353 static const QString PRIME5_ID("prime5-id");
354 static const QString PRIME3_ID("prime3-id");
355 static const QString MAX_ID("max-id");
356 static const QString SCALE_ID("scale-id");
357 static const QString TRACKLINE_ID("trackline-id");
358 static const QString TRACKOPTS_ID("trackopts-id");
359
360 namespace {
361
362 enum GenomecovMode {
363 Histograms = 0,
364 EachPos1Based,
365 EachPos0Based,
366 BedGraph,
367 BedGraphIncludeZero
368 };
369
getDescriptionByMode(GenomecovMode mode)370 QString getDescriptionByMode(GenomecovMode mode) {
371 QString res = "";
372 if (mode == Histograms) {
373 res = QString("Compute a histogram of coverage.");
374
375 } else if (mode == EachPos0Based) {
376 res = QString("Compute the depth of feature coverage for each base on each chromosome (0-based).");
377
378 } else if (mode == EachPos1Based) {
379 res = QString("Compute the depth of feature coverage for each base on each chromosome (1-based).");
380
381 } else if (mode == BedGraph) {
382 res = QString("Produces genome-wide coverage output in BEDGRAPH format.");
383
384 } else if (mode == BedGraphIncludeZero) {
385 res = QString("Produces genome-wide coverage output in BEDGRAPH format (including uncovered).");
386 }
387
388 return res;
389 }
390
getNameByMode(GenomecovMode mode)391 QString getNameByMode(GenomecovMode mode) {
392 QString res = "";
393 if (mode == Histograms) {
394 res = QString("Histogram");
395
396 } else if (mode == EachPos0Based) {
397 res = QString("Per-base (0-based)");
398
399 } else if (mode == EachPos1Based) {
400 res = QString("Per-base (1-based)");
401
402 } else if (mode == BedGraph) {
403 res = QString("BEDGRAPH");
404
405 } else if (mode == BedGraphIncludeZero) {
406 res = QString("BEDGRAPH (including uncoveded)");
407 }
408
409 return res;
410 }
411
getParameterByMode(GenomecovMode mode)412 QString getParameterByMode(GenomecovMode mode) {
413 QString res = "";
414 if (mode == Histograms) {
415 res = QString("");
416
417 } else if (mode == EachPos0Based) {
418 res = QString("-dz");
419
420 } else if (mode == EachPos1Based) {
421 res = QString("-d");
422
423 } else if (mode == BedGraph) {
424 res = QString("-bg");
425
426 } else if (mode == BedGraphIncludeZero) {
427 res = QString("-bga");
428 }
429
430 return res;
431 }
432
433 } // namespace
434 /************************************************************************/
435 /* GenomecovPrompter */
436 /************************************************************************/
composeRichDoc()437 QString GenomecovPrompter::composeRichDoc() {
438 IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(BaseNGSWorker::INPUT_PORT));
439 const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
440 QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
441 QString producerName = tr(" from <u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);
442 QString descr = getDescriptionByMode(GenomecovMode(getParameter(MODE_ID).toInt()));
443
444 QString doc = tr("%1 from %2 with bedtool genomecov.").arg(descr).arg(producerName);
445 return doc;
446 }
447
448 /************************************************************************/
449 /* GenomecovWorkerFactory */
450 /************************************************************************/
init()451 void GenomecovWorkerFactory::init() {
452 //init data path
453 U2DataPath *dataPath = nullptr;
454 U2DataPathRegistry *dpr = AppContext::getDataPathRegistry();
455 if (dpr) {
456 U2DataPath *dp = dpr->getDataPathByName(BedtoolsSupport::GENOMES_DATA_NAME);
457 if (dp && dp->isValid()) {
458 dataPath = dp;
459 }
460 }
461 Descriptor desc(ACTOR_ID, GenomecovWorker::tr("Genome Coverage"), GenomecovWorker::tr("Calculates genome coverage using bedtools genomecov."));
462
463 QList<PortDescriptor *> p;
464 {
465 Descriptor inD(BaseNGSWorker::INPUT_PORT, GenomecovWorker::tr("Input File"), GenomecovWorker::tr("Set of files to NGS slop"));
466 Descriptor outD(BaseNGSWorker::OUTPUT_PORT, GenomecovWorker::tr("Output File"), GenomecovWorker::tr("Output file"));
467
468 QMap<Descriptor, DataTypePtr> inM;
469 inM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
470 p << new PortDescriptor(inD, DataTypePtr(new MapDataType("sb.input-url", inM)), true);
471
472 QMap<Descriptor, DataTypePtr> outM;
473 outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
474 p << new PortDescriptor(outD, DataTypePtr(new MapDataType("sb.output-url", outM)), false, true);
475 }
476
477 QList<Attribute *> a;
478 {
479 Descriptor outDir(BaseNGSWorker::OUT_MODE_ID, GenomecovWorker::tr("Output folder"), GenomecovWorker::tr("Select an output folder. <b>Custom</b> - specify the output folder in the 'Custom folder' parameter. "
480 "<b>Workflow</b> - internal workflow folder. "
481 "<b>Input file</b> - the folder of the input file."));
482
483 Descriptor customDir(BaseNGSWorker::CUSTOM_DIR_ID, GenomecovWorker::tr("Custom folder"), GenomecovWorker::tr("Select the custom output folder."));
484
485 Descriptor outName(BaseNGSWorker::OUT_NAME_ID, GenomecovWorker::tr("Output file name"), GenomecovWorker::tr("A name of an output file. If default of empty value is provided the output name is the name of the first file with additional extension."));
486
487 Descriptor genomeAttrDesc(GENOME_ID, GenomecovWorker::tr("Genome"), GenomecovWorker::tr("In order to prevent the extension of intervals beyond chromosome boundaries, bedtools slop requires a genome file defining the length of each chromosome or contig. The format of the file is: <chromName><TAB><chromSize>. (-g)"));
488
489 Descriptor mAttr(MODE_ID, GenomecovWorker::tr("Report mode"), GenomecovWorker::tr("<b>%1 (%2)</b> - %3 \n"
490 "<b>%4 (%5)</b> - %6 \n"
491 "<b>%7 (%8)</b> - %9 \n"
492 "<b>%10 (%11)</b> - %12 \n"
493 "<b>%13 (%14)</b> - %15 \n")
494 .arg(getNameByMode(Histograms))
495 .arg(getParameterByMode(Histograms))
496 .arg(getDescriptionByMode(Histograms))
497 .arg(getNameByMode(EachPos0Based))
498 .arg(getParameterByMode(EachPos0Based))
499 .arg(getDescriptionByMode(EachPos0Based))
500 .arg(getNameByMode(EachPos1Based))
501 .arg(getParameterByMode(EachPos1Based))
502 .arg(getDescriptionByMode(EachPos1Based))
503 .arg(getNameByMode(BedGraph))
504 .arg(getParameterByMode(BedGraph))
505 .arg(getDescriptionByMode(BedGraph))
506 .arg(getNameByMode(BedGraphIncludeZero))
507 .arg(getParameterByMode(BedGraphIncludeZero))
508 .arg(getDescriptionByMode(BedGraphIncludeZero)));
509
510 Descriptor splitAttrDesc(SPLIT_ID, GenomecovWorker::tr("Split"), GenomecovWorker::tr("Treat “split” BAM or BED12 entries as distinct BED intervals when computing coverage. For BAM files, this uses the CIGAR “N” and “D” operations to infer the blocks for computing coverage. For BED12 files, this uses the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). (-split)"));
511
512 Descriptor strandAttrDesc(STRAND_ID, GenomecovWorker::tr("Strand"), GenomecovWorker::tr("Calculate coverage of intervals from a specific strand. With BED files, requires at least 6 columns (strand is column 6). (-strand)"));
513
514 Descriptor prime5AttrDesc(PRIME5_ID, GenomecovWorker::tr("5 prime"), GenomecovWorker::tr("Calculate coverage of 5’ positions (instead of entire interval). (-5)"));
515
516 Descriptor prime3AttrDesc(PRIME3_ID, GenomecovWorker::tr("3 prime"), GenomecovWorker::tr("Calculate coverage of 3’ positions (instead of entire interval). (-3)"));
517
518 Descriptor maxAttrDesc(MAX_ID, GenomecovWorker::tr("Max"), GenomecovWorker::tr("Combine all positions with a depth >= max into a single bin in the histogram. (-max)"));
519
520 Descriptor scaleAttrDesc(SCALE_ID, GenomecovWorker::tr("Scale"), GenomecovWorker::tr("Scale the coverage by a constant factor.Each coverage value is multiplied by this factor before being reported. Useful for normalizing coverage by, e.g., reads per million (RPM). Default is 1.0; i.e., unscaled. (-scale)"));
521
522 Descriptor tracklineAttrDesc(TRACKLINE_ID, GenomecovWorker::tr("Trackline"), GenomecovWorker::tr("Adds a UCSC/Genome-Browser track line definition in the first line of the output. (-trackline)"));
523
524 Descriptor trackoptsAttrDesc(TRACKOPTS_ID, GenomecovWorker::tr("Trackopts"), GenomecovWorker::tr("Writes additional track line definition parameters in the first line. (-trackopts)"));
525
526 a << new Attribute(outDir, BaseTypes::NUM_TYPE(), false, QVariant(FileAndDirectoryUtils::WORKFLOW_INTERNAL));
527 Attribute *customDirAttr = new Attribute(customDir, BaseTypes::STRING_TYPE(), false, QVariant(""));
528 customDirAttr->addRelation(new VisibilityRelation(BaseNGSWorker::OUT_MODE_ID, FileAndDirectoryUtils::CUSTOM));
529 a << customDirAttr;
530 a << new Attribute(outName, BaseTypes::STRING_TYPE(), false, QVariant(BaseNGSWorker::DEFAULT_NAME));
531
532 Attribute *genomeAttr = nullptr;
533 if (dataPath) {
534 const QList<QString> &dataNames = dataPath->getDataNames();
535 if (!dataNames.isEmpty()) {
536 genomeAttr = new Attribute(genomeAttrDesc, BaseTypes::STRING_TYPE(), true, dataPath->getPathByName(dataNames.first()));
537 } else {
538 genomeAttr = new Attribute(genomeAttrDesc, BaseTypes::STRING_TYPE(), true);
539 }
540 } else {
541 genomeAttr = new Attribute(genomeAttrDesc, BaseTypes::STRING_TYPE(), true);
542 }
543 a << genomeAttr;
544
545 a << new Attribute(mAttr, BaseTypes::NUM_TYPE(), false, QVariant(Histograms));
546 a << new Attribute(splitAttrDesc, BaseTypes::BOOL_TYPE(), false, QVariant(false));
547 a << new Attribute(strandAttrDesc, BaseTypes::BOOL_TYPE(), false, QVariant(false));
548 a << new Attribute(prime5AttrDesc, BaseTypes::BOOL_TYPE(), false, QVariant(false));
549 a << new Attribute(prime3AttrDesc, BaseTypes::BOOL_TYPE(), false, QVariant(false));
550 a << new Attribute(maxAttrDesc, BaseTypes::NUM_TYPE(), false, QVariant(INT_MAX));
551 a << new Attribute(scaleAttrDesc, BaseTypes::NUM_TYPE(), false, QVariant(1.0));
552 a << new Attribute(tracklineAttrDesc, BaseTypes::BOOL_TYPE(), false, QVariant(false));
553 a << new Attribute(trackoptsAttrDesc, BaseTypes::STRING_TYPE(), false, QVariant(""));
554 }
555
556 QMap<QString, PropertyDelegate *> delegates;
557 {
558 QVariantMap directoryMap;
559 QString fileDir = GenomecovWorker::tr("Input file");
560 QString workflowDir = GenomecovWorker::tr("Workflow");
561 QString customD = GenomecovWorker::tr("Custom");
562 directoryMap[fileDir] = FileAndDirectoryUtils::FILE_DIRECTORY;
563 directoryMap[workflowDir] = FileAndDirectoryUtils::WORKFLOW_INTERNAL;
564 directoryMap[customD] = FileAndDirectoryUtils::CUSTOM;
565 delegates[BaseNGSWorker::OUT_MODE_ID] = new ComboBoxDelegate(directoryMap);
566
567 delegates[BaseNGSWorker::CUSTOM_DIR_ID] = new URLDelegate("", "", false, true);
568
569 QVariantMap vm;
570 if (dataPath) {
571 vm = dataPath->getDataItemsVariantMap();
572 }
573 delegates[GENOME_ID] = new ComboBoxWithUrlsDelegate(vm);
574
575 QVariantMap modes;
576 modes[getNameByMode(Histograms)] = Histograms;
577 modes[getNameByMode(EachPos0Based)] = EachPos0Based;
578 modes[getNameByMode(EachPos1Based)] = EachPos1Based;
579 modes[getNameByMode(BedGraph)] = BedGraph;
580 modes[getNameByMode(BedGraphIncludeZero)] = BedGraphIncludeZero;
581 delegates[MODE_ID] = new ComboBoxDelegate(modes);
582
583 QVariantMap lenMap;
584 lenMap["minimum"] = QVariant(0);
585 lenMap["maximum"] = QVariant(INT_MAX);
586 delegates[MAX_ID] = new SpinBoxDelegate(lenMap);
587
588 QVariantMap lenFMap;
589 lenFMap["minimum"] = QVariant(0);
590 lenFMap["maximum"] = QVariant(1);
591 delegates[SCALE_ID] = new DoubleSpinBoxDelegate(lenFMap);
592 }
593
594 ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
595 proto->setEditor(new DelegateEditor(delegates));
596 proto->setPrompter(new GenomecovPrompter());
597 proto->addExternalTool(BedtoolsSupport::ET_BEDTOOLS_ID);
598
599 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_BASIC(), proto);
600 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
601 localDomain->registerEntry(new GenomecovWorkerFactory());
602 }
603
604 /************************************************************************/
605 /* GenomecovWorker */
606 /************************************************************************/
GenomecovWorker(Actor * a)607 GenomecovWorker::GenomecovWorker(Actor *a)
608 : BaseNGSWorker(a) {
609 }
610
getCustomParameters() const611 QVariantMap GenomecovWorker::getCustomParameters() const {
612 QVariantMap res;
613 const QString genomePath = getValue<QString>(GENOME_ID);
614 if (!genomePath.isEmpty()) {
615 res["-g"] = genomePath;
616 }
617 const QString mode = getParameterByMode(GenomecovMode(getValue<int>(MODE_ID)));
618 if (!mode.isEmpty()) {
619 res[mode] = "";
620 }
621
622 if (getValue<bool>(SPLIT_ID)) {
623 res["-split"] = "";
624 }
625 if (getValue<bool>(STRAND_ID)) {
626 res["-strand"] = "";
627 }
628 if (getValue<bool>(PRIME5_ID)) {
629 res["-5"] = "";
630 }
631 if (getValue<bool>(PRIME3_ID)) {
632 res["-3"] = "";
633 }
634 if (getValue<bool>(TRACKLINE_ID)) {
635 res["-trackline"] = "";
636 const QString trackopts = getValue<QString>(TRACKOPTS_ID);
637 if (!trackopts.isEmpty()) {
638 res["-trackopts"] = trackopts;
639 }
640 }
641
642 const int max = getValue<int>(MAX_ID);
643 if (max != INT_MAX) {
644 res["-max"] = max;
645 }
646 const float scale = getValue<float>(SCALE_ID);
647 if (scale != 1.0) {
648 res["-scale"] = scale;
649 }
650 return res;
651 }
652
getDefaultFileName() const653 QString GenomecovWorker::getDefaultFileName() const {
654 return ".gc";
655 }
656
getTask(const BaseNGSSetting & settings) const657 Task *GenomecovWorker::getTask(const BaseNGSSetting &settings) const {
658 return new GenomecovTask(settings);
659 }
660
661 //////////////////////////////////////////////////////
662 //GenomecovTask
GenomecovTask(const BaseNGSSetting & settings)663 GenomecovTask::GenomecovTask(const BaseNGSSetting &settings)
664 : BaseNGSTask(settings) {
665 GCOUNTER(cvar, "NGS:GenomeCovTask");
666 }
667
prepareStep()668 void GenomecovTask::prepareStep() {
669 Task *etTask = getExternalToolTask(BedtoolsSupport::ET_BEDTOOLS_ID);
670 CHECK(etTask != nullptr, );
671
672 addSubTask(etTask);
673 }
674
getParameters(U2OpStatus & os)675 QStringList GenomecovTask::getParameters(U2OpStatus &os) {
676 QStringList res;
677 res << "genomecov";
678
679 const QString detectedFormat = FileAndDirectoryUtils::detectFormat(settings.inputUrl);
680 if (detectedFormat.isEmpty()) {
681 os.setError(tr("Unknown file format: ") + settings.inputUrl);
682 return res;
683 }
684
685 if (detectedFormat == BaseDocumentFormats::BAM) {
686 res << "-ibam";
687 res << settings.inputUrl;
688 } else {
689 res << "-i";
690 res << settings.inputUrl;
691
692 if (!settings.customParameters.contains("-g")) {
693 os.setError("No genome file");
694 return res;
695 } else {
696 res << "-g";
697 res << settings.customParameters["-g"].toString();
698 }
699 }
700
701 if (settings.customParameters.contains("-d")) {
702 res << "-d";
703 }
704
705 if (settings.customParameters.contains("-dz")) {
706 res << "-dz";
707 }
708
709 if (settings.customParameters.contains("-bg")) {
710 res << "-bg";
711 }
712
713 if (settings.customParameters.contains("-bga")) {
714 res << "-bga";
715 }
716
717 if (settings.customParameters.contains("-split")) {
718 res << "-split";
719 }
720
721 if (settings.customParameters.contains("-strand")) {
722 res << "-strand";
723 }
724
725 if (settings.customParameters.contains("-5")) {
726 res << "-5";
727 }
728
729 if (settings.customParameters.contains("-3")) {
730 res << "-3";
731 }
732
733 if (settings.customParameters.contains("-trackline")) {
734 res << "-trackline";
735 if (settings.customParameters.contains("-trackopts")) {
736 res << "-trackopts";
737 res << settings.customParameters["-trackopts"].toString();
738 }
739 }
740
741 if (settings.customParameters.contains("-max")) {
742 res << "-max";
743 res << settings.customParameters["-max"].toString();
744 }
745
746 if (settings.customParameters.contains("-scale")) {
747 res << "-scale";
748 res << settings.customParameters["-scale"].toString();
749 }
750 return res;
751 }
752
753 /************************************************************************/
754 /* IntersectAnnotationsWorker */
755 /************************************************************************/
756
757 const QString BedtoolsIntersectWorkerFactory::ACTOR_ID("intersect-annotations");
758
759 const static QString IN_PORT_A_ID("input-annotations-a");
760 const static QString IN_PORT_B_ID("input-annotations-b");
761 const static QString OUT_PORT_ID("output-intersect-annotations");
762
763 const static QString MIN_OVERLAP("minimum-overlap");
764 const static QString REPORT("report");
765 const static QString UNIQUE("unique");
766
BedtoolsIntersectWorker(Actor * a)767 BedtoolsIntersectWorker::BedtoolsIntersectWorker(Actor *a)
768 : BaseWorker(a, false),
769 inputA(nullptr),
770 inputB(nullptr),
771 output(nullptr) {
772 }
773
init()774 void BedtoolsIntersectWorker::init() {
775 inputA = ports.value(IN_PORT_A_ID);
776 inputB = ports.value(IN_PORT_B_ID);
777 output = ports.value(OUT_PORT_ID);
778 }
779
tick()780 Task *BedtoolsIntersectWorker::tick() {
781 storeMessages(inputA, storeA);
782 storeMessages(inputB, storeB);
783
784 if (inputA->isEnded() && inputB->isEnded()) {
785 return createTask();
786 }
787
788 return nullptr;
789 }
790
isReady() const791 bool BedtoolsIntersectWorker::isReady() const {
792 if (isDone()) {
793 return false;
794 }
795
796 int hasA = inputA->hasMessage();
797 bool endedA = inputA->isEnded();
798
799 int hasB = inputB->hasMessage();
800 bool endedB = inputB->isEnded();
801
802 return hasA || hasB || (endedA && endedB);
803 }
804
sl_taskFinished(Task * task)805 void BedtoolsIntersectWorker::sl_taskFinished(Task *task) {
806 if (task->isCanceled() || task->hasError()) {
807 return;
808 }
809 BedtoolsIntersectAnnotationsByEntityTask *intersectTask = qobject_cast<BedtoolsIntersectAnnotationsByEntityTask *>(task);
810 if (intersectTask == nullptr) {
811 return;
812 }
813 setDone();
814
815 QList<GObject *> objList = intersectTask->getResult();
816 CHECK_EXT(!objList.isEmpty(), output->setEnded(), );
817
818 foreach (GObject *gObj, objList) {
819 AnnotationTableObject *obj = qobject_cast<AnnotationTableObject *>(gObj);
820 CHECK_EXT(obj != nullptr, output->setEnded(), );
821 const SharedDbiDataHandler tableId = context->getDataStorage()->putAnnotationTable(obj);
822 output->put(Message(BaseTypes::ANNOTATION_TABLE_TYPE(),
823 qVariantFromValue<SharedDbiDataHandler>(tableId)));
824 }
825
826 output->setEnded();
827 }
828
createTask()829 Task *BedtoolsIntersectWorker::createTask() {
830 BedtoolsIntersectByEntityRefSettings settings;
831
832 settings.minOverlap = actor->getParameter(MIN_OVERLAP)->getAttributeValue<double>(context) / 100;
833 settings.report = (BedtoolsIntersectSettings::Report)(actor->getParameter(REPORT)->getAttributeValue<int>(context));
834 settings.unique = actor->getParameter(UNIQUE)->getAttributeValue<bool>(context);
835
836 settings.entitiesA = getAnnotationsEntityRefFromMessages(storeA, IN_PORT_A_ID);
837 settings.entitiesB = getAnnotationsEntityRefFromMessages(storeB, IN_PORT_B_ID);
838
839 BedtoolsIntersectAnnotationsByEntityTask *t = new BedtoolsIntersectAnnotationsByEntityTask(settings);
840 t->addListeners(createLogListeners());
841 connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
842 return t;
843 }
844
getAnnotationsEntityRefFromMessages(const QList<Message> & mList,const QString & portId)845 QList<U2EntityRef> BedtoolsIntersectWorker::getAnnotationsEntityRefFromMessages(const QList<Message> &mList, const QString &portId) {
846 QList<U2EntityRef> res;
847
848 U2OpStatusImpl os;
849 foreach (const Message &m, mList) {
850 CHECK(!m.isEmpty(), res);
851 U2EntityRef ref = getAnnotationsEntityRef(m, portId, os);
852 res << ref;
853 }
854 return res;
855 }
856
getAnnotationsEntityRef(const Message & m,const QString & portId,U2OpStatus & os)857 U2EntityRef BedtoolsIntersectWorker::getAnnotationsEntityRef(const Message &m, const QString &portId, U2OpStatus &os) {
858 const QVariantMap data = m.getData().toMap();
859 CHECK_EXT(data.contains(portId), os.setError(tr("Data not found by %1 id").arg(portId)), U2EntityRef());
860
861 const SharedDbiDataHandler dbiHandler = data[portId].value<SharedDbiDataHandler>();
862 const AnnotationTableObject *obj = StorageUtils::getAnnotationTableObject(context->getDataStorage(), dbiHandler);
863 CHECK_EXT(obj != nullptr, os.setError(tr("Can not get annotation table object")), U2EntityRef());
864
865 return obj->getEntityRef();
866 }
867
storeMessages(IntegralBus * bus,QList<Message> & store)868 void BedtoolsIntersectWorker::storeMessages(IntegralBus *bus, QList<Message> &store) {
869 while (bus->hasMessage()) {
870 store << getMessageAndSetupScriptValues(bus);
871 }
872 }
873
init()874 void BedtoolsIntersectWorkerFactory::init() {
875 QList<PortDescriptor *> portDescs;
876 {
877 Descriptor inDescA(IN_PORT_A_ID, BedtoolsIntersectWorker::tr("Annotations A"), BedtoolsIntersectWorker::tr("Annotations A"));
878 QMap<Descriptor, DataTypePtr> inM_A;
879 inM_A[inDescA] = BaseTypes::ANNOTATION_TABLE_TYPE();
880 portDescs << new PortDescriptor(inDescA, DataTypePtr(new MapDataType("in.anns.a", inM_A)), /*input*/ true);
881
882 Descriptor inDescB(IN_PORT_B_ID, BedtoolsIntersectWorker::tr("Annotations B"), BedtoolsIntersectWorker::tr("Annotations B"));
883 QMap<Descriptor, DataTypePtr> inM_B;
884 inM_B[inDescB] = BaseTypes::ANNOTATION_TABLE_TYPE();
885 portDescs << new PortDescriptor(inDescB, DataTypePtr(new MapDataType("in.anns.b", inM_B)), /*input*/ true);
886
887 Descriptor outDesc(OUT_PORT_ID, BedtoolsIntersectWorker::tr("Annotations"), BedtoolsIntersectWorker::tr("Result annotations"));
888 QMap<Descriptor, DataTypePtr> outM;
889 outM[outDesc] = BaseTypes::ANNOTATION_TABLE_TYPE();
890 portDescs << new PortDescriptor(outDesc, DataTypePtr(new MapDataType("out.anns", outM)), /*intput*/ false);
891 }
892
893 QList<Attribute *> attribs;
894 {
895 Descriptor minOverlapDesc(MIN_OVERLAP,
896 BedtoolsIntersectWorker::tr("Minimum overlap"),
897 BedtoolsIntersectWorker::tr("Minimum overlap required as a fraction of an annotation from set A."
898 "<br/>By default, even 1 bp overlap between annotations from set A and set B is taken into account."
899 " Yet sometimes you may want to restrict reported overlaps to cases where the annotations in B overlaps"
900 " at least X% (e.g. 50%) of the A annotation. "));
901
902 Descriptor reportDesc(REPORT,
903 BedtoolsIntersectWorker::tr("Result annotations"),
904 BedtoolsIntersectWorker::tr("Select one of the following:"
905 "<ul>"
906 "<li><i>Shared interval</i> to report intervals shared"
907 " between overlapped annotations from set A and set B."
908 "</li>"
909 "<li><i>Overlapped annotations from A</i> to report annotations"
910 " from set A that have an overlap with annotations from set B."
911 "</li>"
912 "<li><i>Non-overlapped annotations from A</i> to report annotations"
913 " from set A that have NO overlap with annotations from set B."
914 "</li></ul>"));
915 Descriptor uniqueDesc(UNIQUE,
916 BedtoolsIntersectWorker::tr("Unique overlaps"),
917 BedtoolsIntersectWorker::tr("If the parameter value is \"True\", write original A entry once if any overlaps found in B."
918 " In other words, just report the fact at least one overlap was found in B.<br/>"
919 "The minimum overlap number is ignored in this case.<br/><br/>"
920 "If the parameter value is \"False\", the A annotation is reported for every overlap found."));
921
922 attribs << new Attribute(reportDesc, BaseTypes::NUM_TYPE(), /*required*/ false, QVariant(BedtoolsIntersectSettings::Report_OverlapedA));
923
924 Attribute *uniqueAttr = new Attribute(uniqueDesc, BaseTypes::BOOL_TYPE(), /*required*/ false, QVariant(true));
925 uniqueAttr->addRelation(new VisibilityRelation(REPORT, QVariantList() << BedtoolsIntersectSettings::Report_OverlapedA));
926 attribs << uniqueAttr;
927
928 Attribute *minOverlapAttr = new Attribute(minOverlapDesc, BaseTypes::NUM_TYPE(), /*required*/ false, QVariant(BedtoolsIntersectSettings::DEFAULT_MIN_OVERLAP * 100));
929 minOverlapAttr->addRelation(new VisibilityRelation(REPORT, QVariantList() << BedtoolsIntersectSettings::Report_Intervals << BedtoolsIntersectSettings::Report_OverlapedA));
930 minOverlapAttr->addRelation(new VisibilityRelation(UNIQUE, QVariantList() << false));
931 attribs << minOverlapAttr;
932 }
933
934 QMap<QString, PropertyDelegate *> delegates;
935 {
936 QVariantMap spinMap;
937 spinMap["minimum"] = QVariant(BedtoolsIntersectSettings::DEFAULT_MIN_OVERLAP * 100);
938 spinMap["maximum"] = QVariant(100);
939 spinMap["suffix"] = QVariant("%");
940 spinMap["decimals"] = 7;
941 delegates[MIN_OVERLAP] = new DoubleSpinBoxDelegate(spinMap);
942
943 QVariantMap comboMap;
944 comboMap["Shared interval"] = BedtoolsIntersectSettings::Report_Intervals;
945 comboMap["Overlapped annotations from A"] = BedtoolsIntersectSettings::Report_OverlapedA;
946 comboMap["Non-overlapped annotations from A"] = BedtoolsIntersectSettings::Report_NonOverlappedA;
947 delegates[REPORT] = new ComboBoxDelegate(comboMap);
948
949 delegates[UNIQUE] = new ComboBoxWithBoolsDelegate();
950 }
951
952 Descriptor desc(BedtoolsIntersectWorkerFactory::ACTOR_ID,
953 BedtoolsIntersectWorker::tr("Intersect Annotations"),
954 BedtoolsIntersectWorker::tr("Intersects two sets of annotations denoted as A and B."));
955 ActorPrototype *proto = new IntegralBusActorPrototype(desc, portDescs, attribs);
956 proto->setPrompter(new BedtoolsIntersectPrompter());
957 proto->setEditor(new DelegateEditor(delegates));
958 proto->addExternalTool(BedtoolsSupport::ET_BEDTOOLS_ID);
959
960 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_BASIC(), proto);
961 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
962 localDomain->registerEntry(new BedtoolsIntersectWorkerFactory());
963 }
964
composeRichDoc()965 QString BedtoolsIntersectPrompter::composeRichDoc() {
966 QString a = getProducersOrUnset(IN_PORT_A_ID, IN_PORT_A_ID);
967 QString b = getProducersOrUnset(IN_PORT_B_ID, IN_PORT_B_ID);
968
969 QString res = QString(tr("Intersect annotations from <u>%1</u> (<b>set A</b>) with annotations from <u>%2</u> (<b>set B</b>). Report ")
970 .arg(a)
971 .arg(b));
972
973 BedtoolsIntersectSettings::Report r = (BedtoolsIntersectSettings::Report)target->getParameter(REPORT)->getAttributePureValue().toInt();
974 QString reportHyperlinkText;
975 switch (r) {
976 case BedtoolsIntersectSettings::Report_Intervals:
977 reportHyperlinkText = "intervals shared between annotations from <b>set A</b> and <b>set B</b>.";
978 break;
979 case BedtoolsIntersectSettings::Report_OverlapedA:
980 reportHyperlinkText = "overlapped annotations from <b>set A</b>.";
981 break;
982 case BedtoolsIntersectSettings::Report_NonOverlappedA:
983 reportHyperlinkText = "non-overlapped annotations from <b>set A</b>.";
984 }
985
986 if (target->getParameter(UNIQUE)->getAttributePureValue().toBool() && r == BedtoolsIntersectSettings::Report_OverlapedA) {
987 res.append(getHyperlink(UNIQUE, "<u>unique</u> "));
988 }
989
990 res.append(getHyperlink(REPORT, "<u>" + reportHyperlinkText + "</u>"));
991 return res;
992 }
993
994 } // namespace LocalWorkflow
995 } // namespace U2
996