1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "BowtieWorker.h"
23 
24 #include <U2Algorithm/DnaAssemblyTask.h>
25 
26 #include <U2Designer/DelegateEditors.h>
27 
28 #include <U2Lang/ActorPrototypeRegistry.h>
29 #include <U2Lang/BaseActorCategories.h>
30 #include <U2Lang/BaseTypes.h>
31 #include <U2Lang/WorkflowEnv.h>
32 
33 #include "BowtieSupport.h"
34 #include "BowtieTask.h"
35 
36 namespace U2 {
37 namespace LocalWorkflow {
38 
39 const QString BowtieWorkerFactory::ACTOR_ID("align-reads-with-bowtie");
40 
41 static const QString MISMATCHES_TYPE = "mismatches_type";
42 static const QString MISMATCHES_NUMBER = "mismatches_number";
43 static const QString MAQERR = "maqerr";
44 static const QString SEED_LEN = "seedLen";
45 static const QString NOFW = "nofw";
46 static const QString NORC = "norc";
47 static const QString MAXBTS = "maxbts";
48 static const QString TRYHARD = "tryhard";
49 static const QString CHUNKMBS = "chunkmbs";
50 static const QString NOMAQROUND = "nomaqround";
51 static const QString SEED = "seed";
52 static const QString BEST = "best";
53 static const QString ALL = "all";
54 static const QString COLORSPACE = "colorspace";
55 
56 static const QString BASE_Bowtie_SUBDIR("bowtie");
57 static const QString BASE_Bowtie_OUTFILE("out.sam");
58 
59 /************************************************************************/
60 /* Worker */
61 /************************************************************************/
BowtieWorker(Actor * p)62 BowtieWorker::BowtieWorker(Actor *p)
63     : BaseShortReadsAlignerWorker(p, BowtieTask::taskName) {
64 }
65 
getCustomParameters() const66 QVariantMap BowtieWorker::getCustomParameters() const {
67     QMap<QString, QVariant> customSettings;
68 
69     QString mismatchesType = getValue<QString>(MISMATCHES_TYPE);
70     if (mismatchesType == "-n mode") {
71         customSettings.insert(BowtieTask::OPTION_N_MISMATCHES, getValue<int>(MISMATCHES_NUMBER));
72     } else {
73         customSettings.insert(BowtieTask::OPTION_V_MISMATCHES, getValue<int>(MISMATCHES_NUMBER));
74     }
75 
76     customSettings.insert(BowtieTask::OPTION_MAQERR, getValue<int>(MAQERR));
77     customSettings.insert(BowtieTask::OPTION_MAXBTS, getValue<int>(MAXBTS));
78     customSettings.insert(BowtieTask::OPTION_SEED_LEN, getValue<int>(SEED_LEN));
79     customSettings.insert(BowtieTask::OPTION_CHUNKMBS, getValue<int>(CHUNKMBS));
80     customSettings.insert(BowtieTask::OPTION_SEED, getValue<int>(SEED));
81 
82     customSettings.insert(BowtieTask::OPTION_NOFW, getValue<bool>(NOFW));
83     customSettings.insert(BowtieTask::OPTION_NORC, getValue<bool>(NORC));
84     customSettings.insert(BowtieTask::OPTION_TRYHARD, getValue<bool>(TRYHARD));
85     customSettings.insert(BowtieTask::OPTION_BEST, getValue<bool>(BEST));
86     customSettings.insert(BowtieTask::OPTION_ALL, getValue<bool>(ALL));
87     customSettings.insert(BowtieTask::OPTION_NOMAQROUND, getValue<bool>(NOMAQROUND));
88     customSettings.insert(BowtieTask::OPTION_THREADS, 4);
89 
90     return customSettings;
91 }
92 
getDefaultFileName() const93 QString BowtieWorker::getDefaultFileName() const {
94     return BASE_Bowtie_OUTFILE;
95 }
96 
getBaseSubdir() const97 QString BowtieWorker::getBaseSubdir() const {
98     return BASE_Bowtie_SUBDIR;
99 }
100 
setGenomeIndex(DnaAssemblyToRefTaskSettings & settings)101 void BowtieWorker::setGenomeIndex(DnaAssemblyToRefTaskSettings &settings) {
102     settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
103 
104     QStringList suffixes = QStringList() << BowtieTask::indexSuffixes << BowtieTask::largeIndexSuffixes;
105     if (!DnaAssemblyToReferenceTask::isIndexUrl(settings.refSeqUrl.getURLString(), suffixes)) {
106         settings.indexFileName = QDir(settings.refSeqUrl.dirPath()).filePath(settings.refSeqUrl.baseFileName());
107     }
108 }
109 
110 /************************************************************************/
111 /* Factory */
112 /************************************************************************/
init()113 void BowtieWorkerFactory::init() {
114     QList<Attribute *> attrs;
115     QMap<QString, PropertyDelegate *> delegates;
116     addCommonAttributes(attrs, delegates, BowtieWorker::tr("Bowtie index folder"), BowtieWorker::tr("Bowtie index basename"));
117     {
118         static const QString MISMATCHES_TYPE = "mismatches_type";
119         static const QString N_MISMATCHES = "n-mismatches";
120         static const QString V_MISMATCHES = "v-mismatches";
121         static const QString MAQERR = "maqerr";
122         static const QString SEED_LEN = "seedLen";
123         static const QString NOFW = "nofw";
124         static const QString NORC = "norc";
125         static const QString MAXBTS = "maxbts";
126         static const QString TRYHARD = "tryhard";
127         static const QString CHUNKMBS = "chunkmbs";
128         static const QString NOMAQROUND = "nomaqround";
129         static const QString SEED = "seed";
130         static const QString BEST = "best";
131         static const QString ALL = "all";
132         static const QString COLORSPACE = "colorspace";
133         static const QString THREADS = "threads";
134 
135         Descriptor mismatchesType(MISMATCHES_TYPE,
136                                   BowtieWorker::tr("Mode"),
137                                   BowtieWorker::tr("When the -n option is specified (which is the default), bowtie determines which alignments \
138                               are valid according to the following policy, which is similar to Maq's default policy. \
139                               In -v mode, alignments may have no more than V mismatches, where V may be a number from 0 \
140                               through 3 set using the -v option. Quality values are ignored. The -v option is mutually exclusive with the -n option."));
141 
142         Descriptor mismatchesNumber(MISMATCHES_NUMBER,
143                                     BowtieWorker::tr("Mismatches number"),
144                                     BowtieWorker::tr("Mismatches number."));
145 
146         Descriptor maqError(MAQERR,
147                             BowtieWorker::tr("Mismatches number"),
148                             BowtieWorker::tr("Maximum permitted total of quality values at all mismatched read positions throughout the entire alignment, \
149                              not just in the seed. The default is 70. Like Maq, bowtie rounds quality values to the nearest 10 and saturates at 30; \
150                              rounding can be disabled with --nomaqround."));
151 
152         Descriptor seedLen(SEED_LEN,
153                            BowtieWorker::tr("Seed length"),
154                            BowtieWorker::tr("The "
155                                             "seed length"
156                                             "; i.e., the number of bases on the high-quality end of the read to which the \
157                               -n ceiling applies. The lowest permitted setting is 5 and the default is 28. bowtie is faster for larger values of -l."));
158 
159         Descriptor noForward(NOFW,
160                              BowtieWorker::tr("No forward orientation"),
161                              BowtieWorker::tr("If --nofw is specified, bowtie will not attempt to align against the forward reference strand."));
162 
163         Descriptor noReverse(NORC,
164                              BowtieWorker::tr("No reverse-complement orientation"),
165                              BowtieWorker::tr("If --norc is specified, bowtie will not attempt to align against the reverse-complement reference strand."));
166 
167         Descriptor maxBacktracks(MAXBTS,
168                                  BowtieWorker::tr("Maximum of backtracks"),
169                                  BowtieWorker::tr("The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and a paired-end alignment \
170                               consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is \
171                               considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case.  \
172                               If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, \
173                               not the trimmed mates. Default: 250."));
174 
175         Descriptor tryHard(TRYHARD,
176                            BowtieWorker::tr("Try as hard"),
177                            BowtieWorker::tr("Try as hard as possible to find valid alignments when they exist, including paired-end alignments. \
178                                This is equivalent to specifying very high values for the --maxbts and --pairtries options. This mode is generally much \
179                                slower than the default settings, but can be useful for certain problems. This mode is slower when (a) the reference is \
180                                very repetitive, (b) the reads are low quality, or (c) not many reads have valid alignments."));
181 
182         Descriptor chunkmbs(CHUNKMBS,
183                             BowtieWorker::tr("Best hits"),
184                             BowtieWorker::tr("The number of megabytes of memory a given thread is given to store path descriptors in --best mode. Best-first \
185                                search must keep track of many paths at once to ensure it is always extending the path with the lowest cumulative cost. \
186                                Bowtie tries to minimize the memory impact of the descriptors, but they can still grow very large in some cases. \
187                                If you receive an error message saying that chunk memory has been exhausted in --best mode, \
188                                try adjusting this parameter up to dedicate more memory to the descriptors. Default: 64."));
189 
190         Descriptor noMaqRounding(NOMAQROUND,
191                                  BowtieWorker::tr("No Maq rounding"),
192                                  BowtieWorker::tr("Maq accepts quality values in the Phred quality scale, but internally rounds values to the nearest 10, \
193              with a maximum of 30. By default, bowtie also rounds this way. --nomaqround prevents this rounding in bowtie."));
194 
195         Descriptor seed(SEED,
196                         BowtieWorker::tr("Seed"),
197                         BowtieWorker::tr("Use <int> as the seed for pseudo-random number generator."));
198 
199         Descriptor best(BEST,
200                         BowtieWorker::tr("Best alignments"),
201                         BowtieWorker::tr("Make Bowtie guarantee that reported singleton alignments are "
202                                          "best"
203                                          " in terms of stratum \
204                              (i.e. number of mismatches, or mismatches in the seed in the case of -n mode) and in terms of \
205                              the quality values at the mismatched position(s). bowtie is somewhat slower when --best is specified."));
206 
207         Descriptor allAln(ALL,
208                           BowtieWorker::tr("All alignments"),
209                           BowtieWorker::tr("Report all valid alignments per read or pair."));
210 
211         Descriptor colorspace(COLORSPACE,
212                               BowtieWorker::tr("Colorspace"),
213                               BowtieWorker::tr("When -C is specified, read sequences are treated as colors. Colors may be encoded either as numbers \
214                               (0=blue, 1=green, 2=orange, 3=red) or as characters A/C/G/T (A=blue, C=green, G=orange, T=red)."));
215 
216         attrs << new Attribute(mismatchesType, BaseTypes::STRING_TYPE(), false, QVariant("-n mode"));
217         attrs << new Attribute(mismatchesNumber, BaseTypes::NUM_TYPE(), false, QVariant(2));
218         attrs << new Attribute(maqError, BaseTypes::NUM_TYPE(), false, QVariant(70));
219 
220         attrs << new Attribute(seedLen, BaseTypes::NUM_TYPE(), false, QVariant(28));
221         attrs << new Attribute(maxBacktracks, BaseTypes::NUM_TYPE(), false, QVariant(800));
222         attrs << new Attribute(chunkmbs, BaseTypes::NUM_TYPE(), false, QVariant(64));
223         attrs << new Attribute(seed, BaseTypes::NUM_TYPE(), false, QVariant(0));
224 
225         attrs << new Attribute(colorspace, BaseTypes::BOOL_TYPE(), false, QVariant(false));
226         attrs << new Attribute(noMaqRounding, BaseTypes::BOOL_TYPE(), false, QVariant(false));
227         attrs << new Attribute(noForward, BaseTypes::BOOL_TYPE(), false, QVariant(false));
228         attrs << new Attribute(noReverse, BaseTypes::BOOL_TYPE(), false, QVariant(false));
229         attrs << new Attribute(tryHard, BaseTypes::BOOL_TYPE(), false, QVariant(false));
230         attrs << new Attribute(best, BaseTypes::BOOL_TYPE(), false, QVariant(false));
231         attrs << new Attribute(allAln, BaseTypes::BOOL_TYPE(), false, QVariant(false));
232     }
233 
234     {
235         QVariantMap spinMap;
236         spinMap["minimum"] = QVariant(0);
237         spinMap["maximum"] = QVariant(INT_MAX);
238         delegates[MISMATCHES_NUMBER] = new SpinBoxDelegate(spinMap);
239         delegates[MAQERR] = new SpinBoxDelegate(spinMap);
240         delegates[SEED_LEN] = new SpinBoxDelegate(spinMap);
241         delegates[SEED] = new SpinBoxDelegate(spinMap);
242         delegates[MAXBTS] = new SpinBoxDelegate(spinMap);
243         delegates[CHUNKMBS] = new SpinBoxDelegate(spinMap);
244 
245         QVariantMap vm;
246         vm["-n mode"] = "-n mode";
247         vm["-v mode"] = "-v mode";
248         delegates[MISMATCHES_TYPE] = new ComboBoxDelegate(vm);
249     }
250 
251     Descriptor protoDesc(BowtieWorkerFactory::ACTOR_ID,
252                          BowtieWorker::tr("Map Reads with Bowtie"),
253                          BowtieWorker::tr("Bowtie is a program for mapping short DNA sequence reads to a long reference sequence."
254                                           " It uses Burrows-Wheeler techniques extended with quality-aware backtracking"
255                                           " algorithm that permits mismatches."
256                                           "<br/><br/>Provide URL(s) to FASTA or FASTQ file(s) with NGS reads to the input"
257                                           " port of the element, set up the reference sequence in the parameters."
258                                           " The result is saved to the specified SAM file, URL to the file is passed"
259                                           " to the output port."));
260 
261     ActorPrototype *proto = new IntegralBusActorPrototype(protoDesc, getPortDescriptors(), attrs);
262     proto->setPrompter(new ShortReadsAlignerPrompter());
263     proto->setEditor(new DelegateEditor(delegates));
264     proto->setPortValidator(IN_PORT_DESCR, new ShortReadsAlignerSlotsValidator);
265     proto->addExternalTool(BowtieSupport::ET_BOWTIE_ID);
266     WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_MAP_ASSEMBLE_READS(), proto);
267     WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID)->registerEntry(new BowtieWorkerFactory());
268 }
269 
createWorker(Actor * a)270 Worker *BowtieWorkerFactory::createWorker(Actor *a) {
271     return new BowtieWorker(a);
272 }
273 
274 }    // namespace LocalWorkflow
275 }    // namespace U2
276