1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "BowtieWorker.h"
23
24 #include <U2Algorithm/DnaAssemblyTask.h>
25
26 #include <U2Designer/DelegateEditors.h>
27
28 #include <U2Lang/ActorPrototypeRegistry.h>
29 #include <U2Lang/BaseActorCategories.h>
30 #include <U2Lang/BaseTypes.h>
31 #include <U2Lang/WorkflowEnv.h>
32
33 #include "BowtieSupport.h"
34 #include "BowtieTask.h"
35
36 namespace U2 {
37 namespace LocalWorkflow {
38
39 const QString BowtieWorkerFactory::ACTOR_ID("align-reads-with-bowtie");
40
41 static const QString MISMATCHES_TYPE = "mismatches_type";
42 static const QString MISMATCHES_NUMBER = "mismatches_number";
43 static const QString MAQERR = "maqerr";
44 static const QString SEED_LEN = "seedLen";
45 static const QString NOFW = "nofw";
46 static const QString NORC = "norc";
47 static const QString MAXBTS = "maxbts";
48 static const QString TRYHARD = "tryhard";
49 static const QString CHUNKMBS = "chunkmbs";
50 static const QString NOMAQROUND = "nomaqround";
51 static const QString SEED = "seed";
52 static const QString BEST = "best";
53 static const QString ALL = "all";
54 static const QString COLORSPACE = "colorspace";
55
56 static const QString BASE_Bowtie_SUBDIR("bowtie");
57 static const QString BASE_Bowtie_OUTFILE("out.sam");
58
59 /************************************************************************/
60 /* Worker */
61 /************************************************************************/
BowtieWorker(Actor * p)62 BowtieWorker::BowtieWorker(Actor *p)
63 : BaseShortReadsAlignerWorker(p, BowtieTask::taskName) {
64 }
65
getCustomParameters() const66 QVariantMap BowtieWorker::getCustomParameters() const {
67 QMap<QString, QVariant> customSettings;
68
69 QString mismatchesType = getValue<QString>(MISMATCHES_TYPE);
70 if (mismatchesType == "-n mode") {
71 customSettings.insert(BowtieTask::OPTION_N_MISMATCHES, getValue<int>(MISMATCHES_NUMBER));
72 } else {
73 customSettings.insert(BowtieTask::OPTION_V_MISMATCHES, getValue<int>(MISMATCHES_NUMBER));
74 }
75
76 customSettings.insert(BowtieTask::OPTION_MAQERR, getValue<int>(MAQERR));
77 customSettings.insert(BowtieTask::OPTION_MAXBTS, getValue<int>(MAXBTS));
78 customSettings.insert(BowtieTask::OPTION_SEED_LEN, getValue<int>(SEED_LEN));
79 customSettings.insert(BowtieTask::OPTION_CHUNKMBS, getValue<int>(CHUNKMBS));
80 customSettings.insert(BowtieTask::OPTION_SEED, getValue<int>(SEED));
81
82 customSettings.insert(BowtieTask::OPTION_NOFW, getValue<bool>(NOFW));
83 customSettings.insert(BowtieTask::OPTION_NORC, getValue<bool>(NORC));
84 customSettings.insert(BowtieTask::OPTION_TRYHARD, getValue<bool>(TRYHARD));
85 customSettings.insert(BowtieTask::OPTION_BEST, getValue<bool>(BEST));
86 customSettings.insert(BowtieTask::OPTION_ALL, getValue<bool>(ALL));
87 customSettings.insert(BowtieTask::OPTION_NOMAQROUND, getValue<bool>(NOMAQROUND));
88 customSettings.insert(BowtieTask::OPTION_THREADS, 4);
89
90 return customSettings;
91 }
92
getDefaultFileName() const93 QString BowtieWorker::getDefaultFileName() const {
94 return BASE_Bowtie_OUTFILE;
95 }
96
getBaseSubdir() const97 QString BowtieWorker::getBaseSubdir() const {
98 return BASE_Bowtie_SUBDIR;
99 }
100
setGenomeIndex(DnaAssemblyToRefTaskSettings & settings)101 void BowtieWorker::setGenomeIndex(DnaAssemblyToRefTaskSettings &settings) {
102 settings.refSeqUrl = getValue<QString>(REFERENCE_GENOME);
103
104 QStringList suffixes = QStringList() << BowtieTask::indexSuffixes << BowtieTask::largeIndexSuffixes;
105 if (!DnaAssemblyToReferenceTask::isIndexUrl(settings.refSeqUrl.getURLString(), suffixes)) {
106 settings.indexFileName = QDir(settings.refSeqUrl.dirPath()).filePath(settings.refSeqUrl.baseFileName());
107 }
108 }
109
110 /************************************************************************/
111 /* Factory */
112 /************************************************************************/
init()113 void BowtieWorkerFactory::init() {
114 QList<Attribute *> attrs;
115 QMap<QString, PropertyDelegate *> delegates;
116 addCommonAttributes(attrs, delegates, BowtieWorker::tr("Bowtie index folder"), BowtieWorker::tr("Bowtie index basename"));
117 {
118 static const QString MISMATCHES_TYPE = "mismatches_type";
119 static const QString N_MISMATCHES = "n-mismatches";
120 static const QString V_MISMATCHES = "v-mismatches";
121 static const QString MAQERR = "maqerr";
122 static const QString SEED_LEN = "seedLen";
123 static const QString NOFW = "nofw";
124 static const QString NORC = "norc";
125 static const QString MAXBTS = "maxbts";
126 static const QString TRYHARD = "tryhard";
127 static const QString CHUNKMBS = "chunkmbs";
128 static const QString NOMAQROUND = "nomaqround";
129 static const QString SEED = "seed";
130 static const QString BEST = "best";
131 static const QString ALL = "all";
132 static const QString COLORSPACE = "colorspace";
133 static const QString THREADS = "threads";
134
135 Descriptor mismatchesType(MISMATCHES_TYPE,
136 BowtieWorker::tr("Mode"),
137 BowtieWorker::tr("When the -n option is specified (which is the default), bowtie determines which alignments \
138 are valid according to the following policy, which is similar to Maq's default policy. \
139 In -v mode, alignments may have no more than V mismatches, where V may be a number from 0 \
140 through 3 set using the -v option. Quality values are ignored. The -v option is mutually exclusive with the -n option."));
141
142 Descriptor mismatchesNumber(MISMATCHES_NUMBER,
143 BowtieWorker::tr("Mismatches number"),
144 BowtieWorker::tr("Mismatches number."));
145
146 Descriptor maqError(MAQERR,
147 BowtieWorker::tr("Mismatches number"),
148 BowtieWorker::tr("Maximum permitted total of quality values at all mismatched read positions throughout the entire alignment, \
149 not just in the seed. The default is 70. Like Maq, bowtie rounds quality values to the nearest 10 and saturates at 30; \
150 rounding can be disabled with --nomaqround."));
151
152 Descriptor seedLen(SEED_LEN,
153 BowtieWorker::tr("Seed length"),
154 BowtieWorker::tr("The "
155 "seed length"
156 "; i.e., the number of bases on the high-quality end of the read to which the \
157 -n ceiling applies. The lowest permitted setting is 5 and the default is 28. bowtie is faster for larger values of -l."));
158
159 Descriptor noForward(NOFW,
160 BowtieWorker::tr("No forward orientation"),
161 BowtieWorker::tr("If --nofw is specified, bowtie will not attempt to align against the forward reference strand."));
162
163 Descriptor noReverse(NORC,
164 BowtieWorker::tr("No reverse-complement orientation"),
165 BowtieWorker::tr("If --norc is specified, bowtie will not attempt to align against the reverse-complement reference strand."));
166
167 Descriptor maxBacktracks(MAXBTS,
168 BowtieWorker::tr("Maximum of backtracks"),
169 BowtieWorker::tr("The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and a paired-end alignment \
170 consists of two 20-bp alignments in the proper orientation with a 60-bp gap between them, that alignment is \
171 considered valid (as long as -I is also satisfied). A 61-bp gap would not be valid in that case. \
172 If trimming options -3 or -5 are also used, the -X constraint is applied with respect to the untrimmed mates, \
173 not the trimmed mates. Default: 250."));
174
175 Descriptor tryHard(TRYHARD,
176 BowtieWorker::tr("Try as hard"),
177 BowtieWorker::tr("Try as hard as possible to find valid alignments when they exist, including paired-end alignments. \
178 This is equivalent to specifying very high values for the --maxbts and --pairtries options. This mode is generally much \
179 slower than the default settings, but can be useful for certain problems. This mode is slower when (a) the reference is \
180 very repetitive, (b) the reads are low quality, or (c) not many reads have valid alignments."));
181
182 Descriptor chunkmbs(CHUNKMBS,
183 BowtieWorker::tr("Best hits"),
184 BowtieWorker::tr("The number of megabytes of memory a given thread is given to store path descriptors in --best mode. Best-first \
185 search must keep track of many paths at once to ensure it is always extending the path with the lowest cumulative cost. \
186 Bowtie tries to minimize the memory impact of the descriptors, but they can still grow very large in some cases. \
187 If you receive an error message saying that chunk memory has been exhausted in --best mode, \
188 try adjusting this parameter up to dedicate more memory to the descriptors. Default: 64."));
189
190 Descriptor noMaqRounding(NOMAQROUND,
191 BowtieWorker::tr("No Maq rounding"),
192 BowtieWorker::tr("Maq accepts quality values in the Phred quality scale, but internally rounds values to the nearest 10, \
193 with a maximum of 30. By default, bowtie also rounds this way. --nomaqround prevents this rounding in bowtie."));
194
195 Descriptor seed(SEED,
196 BowtieWorker::tr("Seed"),
197 BowtieWorker::tr("Use <int> as the seed for pseudo-random number generator."));
198
199 Descriptor best(BEST,
200 BowtieWorker::tr("Best alignments"),
201 BowtieWorker::tr("Make Bowtie guarantee that reported singleton alignments are "
202 "best"
203 " in terms of stratum \
204 (i.e. number of mismatches, or mismatches in the seed in the case of -n mode) and in terms of \
205 the quality values at the mismatched position(s). bowtie is somewhat slower when --best is specified."));
206
207 Descriptor allAln(ALL,
208 BowtieWorker::tr("All alignments"),
209 BowtieWorker::tr("Report all valid alignments per read or pair."));
210
211 Descriptor colorspace(COLORSPACE,
212 BowtieWorker::tr("Colorspace"),
213 BowtieWorker::tr("When -C is specified, read sequences are treated as colors. Colors may be encoded either as numbers \
214 (0=blue, 1=green, 2=orange, 3=red) or as characters A/C/G/T (A=blue, C=green, G=orange, T=red)."));
215
216 attrs << new Attribute(mismatchesType, BaseTypes::STRING_TYPE(), false, QVariant("-n mode"));
217 attrs << new Attribute(mismatchesNumber, BaseTypes::NUM_TYPE(), false, QVariant(2));
218 attrs << new Attribute(maqError, BaseTypes::NUM_TYPE(), false, QVariant(70));
219
220 attrs << new Attribute(seedLen, BaseTypes::NUM_TYPE(), false, QVariant(28));
221 attrs << new Attribute(maxBacktracks, BaseTypes::NUM_TYPE(), false, QVariant(800));
222 attrs << new Attribute(chunkmbs, BaseTypes::NUM_TYPE(), false, QVariant(64));
223 attrs << new Attribute(seed, BaseTypes::NUM_TYPE(), false, QVariant(0));
224
225 attrs << new Attribute(colorspace, BaseTypes::BOOL_TYPE(), false, QVariant(false));
226 attrs << new Attribute(noMaqRounding, BaseTypes::BOOL_TYPE(), false, QVariant(false));
227 attrs << new Attribute(noForward, BaseTypes::BOOL_TYPE(), false, QVariant(false));
228 attrs << new Attribute(noReverse, BaseTypes::BOOL_TYPE(), false, QVariant(false));
229 attrs << new Attribute(tryHard, BaseTypes::BOOL_TYPE(), false, QVariant(false));
230 attrs << new Attribute(best, BaseTypes::BOOL_TYPE(), false, QVariant(false));
231 attrs << new Attribute(allAln, BaseTypes::BOOL_TYPE(), false, QVariant(false));
232 }
233
234 {
235 QVariantMap spinMap;
236 spinMap["minimum"] = QVariant(0);
237 spinMap["maximum"] = QVariant(INT_MAX);
238 delegates[MISMATCHES_NUMBER] = new SpinBoxDelegate(spinMap);
239 delegates[MAQERR] = new SpinBoxDelegate(spinMap);
240 delegates[SEED_LEN] = new SpinBoxDelegate(spinMap);
241 delegates[SEED] = new SpinBoxDelegate(spinMap);
242 delegates[MAXBTS] = new SpinBoxDelegate(spinMap);
243 delegates[CHUNKMBS] = new SpinBoxDelegate(spinMap);
244
245 QVariantMap vm;
246 vm["-n mode"] = "-n mode";
247 vm["-v mode"] = "-v mode";
248 delegates[MISMATCHES_TYPE] = new ComboBoxDelegate(vm);
249 }
250
251 Descriptor protoDesc(BowtieWorkerFactory::ACTOR_ID,
252 BowtieWorker::tr("Map Reads with Bowtie"),
253 BowtieWorker::tr("Bowtie is a program for mapping short DNA sequence reads to a long reference sequence."
254 " It uses Burrows-Wheeler techniques extended with quality-aware backtracking"
255 " algorithm that permits mismatches."
256 "<br/><br/>Provide URL(s) to FASTA or FASTQ file(s) with NGS reads to the input"
257 " port of the element, set up the reference sequence in the parameters."
258 " The result is saved to the specified SAM file, URL to the file is passed"
259 " to the output port."));
260
261 ActorPrototype *proto = new IntegralBusActorPrototype(protoDesc, getPortDescriptors(), attrs);
262 proto->setPrompter(new ShortReadsAlignerPrompter());
263 proto->setEditor(new DelegateEditor(delegates));
264 proto->setPortValidator(IN_PORT_DESCR, new ShortReadsAlignerSlotsValidator);
265 proto->addExternalTool(BowtieSupport::ET_BOWTIE_ID);
266 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_MAP_ASSEMBLE_READS(), proto);
267 WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID)->registerEntry(new BowtieWorkerFactory());
268 }
269
createWorker(Actor * a)270 Worker *BowtieWorkerFactory::createWorker(Actor *a) {
271 return new BowtieWorker(a);
272 }
273
274 } // namespace LocalWorkflow
275 } // namespace U2
276