1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "FilterBamWorker.h"
23
24 #include <U2Core/BaseDocumentFormats.h>
25 #include <U2Core/DocumentUtils.h>
26 #include <U2Core/FailTask.h>
27 #include <U2Core/FileAndDirectoryUtils.h>
28 #include <U2Core/GObject.h>
29 #include <U2Core/IOAdapter.h>
30 #include <U2Core/TaskSignalMapper.h>
31 #include <U2Core/U2SafePoints.h>
32
33 #include <U2Designer/DelegateEditors.h>
34
35 #include <U2Formats/BAMUtils.h>
36
37 #include <U2Lang/ActorPrototypeRegistry.h>
38 #include <U2Lang/BaseActorCategories.h>
39 #include <U2Lang/BaseAttributes.h>
40 #include <U2Lang/BaseSlots.h>
41 #include <U2Lang/BaseTypes.h>
42 #include <U2Lang/IntegralBusModel.h>
43 #include <U2Lang/WorkflowEnv.h>
44 #include <U2Lang/WorkflowMonitor.h>
45
46 namespace U2 {
47 namespace LocalWorkflow {
48
49 const QString FilterBamWorkerFactory::ACTOR_ID("filter-bam");
50 static const QString SHORT_NAME("mb");
51 static const QString INPUT_PORT("in-file");
52 static const QString OUTPUT_PORT("out-file");
53 static const QString OUTPUT_SUBDIR("Filtered_BAM/");
54 static const QString OUT_MODE_ID("out-mode");
55 static const QString CUSTOM_DIR_ID("custom-dir");
56 static const QString OUT_NAME_ID("out-name");
57 static const QString OUT_FORMAT_ID("out-format");
58 static const QString REGION_ID("region");
59 static const QString MAPQ_ID("mapq");
60 static const QString ACCEPT_FLAG_ID("accept-flag");
61 static const QString FLAG_ID("flag");
62
63 /************************************************************************/
64 /* FilterBamPrompter */
65 /************************************************************************/
composeRichDoc()66 QString FilterBamPrompter::composeRichDoc() {
67 IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(INPUT_PORT));
68 const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
69 QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
70 QString producerName = tr("<u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);
71
72 QString doc = tr("Filter BAM/SAM files from %1 with SAMTools view.").arg(producerName);
73 return doc;
74 }
75
76 /************************************************************************/
77 /* FilterBamWorkerFactory */
78 /************************************************************************/
79 namespace {
80 static const QString DEFAULT_NAME("Default");
81
getFilterCodes()82 QMap<QString, QString> getFilterCodes() {
83 QMap<QString, QString> res;
84 res.insert("Read is paired", "0x0001");
85 res.insert("Read is mapped in a proper pair", "0x0002");
86 res.insert("The read is unmapped", "0x0004");
87 res.insert("The mate is unmapped", "0x0008");
88 res.insert("Read strand", "0x0010");
89 res.insert("Mate strand", "0x0020");
90 res.insert("Read is the first in a pair", "0x0040");
91 res.insert("Read is the second in a pair", "0x0080");
92 res.insert("The alignment or this read is not primary", "0x0100");
93 res.insert("The read fails platform/vendor quality checks", "0x0200");
94 res.insert("The read is a PCR or optical duplicate", "0x0400");
95 return res;
96 }
97
getHexValueByFilterString(const QString & filterString,const QMap<QString,QString> & codes)98 QString getHexValueByFilterString(const QString &filterString, const QMap<QString, QString> &codes) {
99 int val = 0;
100 QStringList filterCodes = filterString.split(",");
101 foreach (const QString fCode, filterCodes) {
102 if (codes.contains(fCode)) {
103 bool ok = true;
104 val += codes.value(fCode).toInt(&ok, 16);
105 }
106 }
107 if (val == 0) {
108 return "";
109 }
110 return QString::number(val, 16);
111 }
112 } // namespace
113
init()114 void FilterBamWorkerFactory::init() {
115 Descriptor desc(ACTOR_ID, FilterBamWorker::tr("Filter BAM/SAM files"), FilterBamWorker::tr("Filters BAM/SAM files using SAMTools view."));
116
117 QList<PortDescriptor *> p;
118 {
119 Descriptor inD(INPUT_PORT, FilterBamWorker::tr("BAM/SAM File"), FilterBamWorker::tr("Set of BAM/SAM files to filter"));
120 Descriptor outD(OUTPUT_PORT, FilterBamWorker::tr("Filtered BAM/SAM files"), FilterBamWorker::tr("Filtered BAM/SAM files"));
121
122 QMap<Descriptor, DataTypePtr> inM;
123 inM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
124 p << new PortDescriptor(inD, DataTypePtr(new MapDataType(SHORT_NAME + ".input-url", inM)), true);
125
126 QMap<Descriptor, DataTypePtr> outM;
127 outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
128 p << new PortDescriptor(outD, DataTypePtr(new MapDataType(SHORT_NAME + ".output-url", outM)), false, true);
129 }
130
131 QList<Attribute *> a;
132 {
133 Descriptor outDir(OUT_MODE_ID, FilterBamWorker::tr("Output folder"), FilterBamWorker::tr("Select an output folder. <b>Custom</b> - specify the output folder in the 'Custom folder' parameter. "
134 "<b>Workflow</b> - internal workflow folder. "
135 "<b>Input file</b> - the folder of the input file."));
136
137 Descriptor customDir(CUSTOM_DIR_ID, FilterBamWorker::tr("Custom folder"), FilterBamWorker::tr("Select the custom output folder."));
138
139 Descriptor outName(OUT_NAME_ID, FilterBamWorker::tr("Output name"), FilterBamWorker::tr("A name of an output BAM/SAM file. If default of empty value is provided the output name is the name of the first BAM/SAM file with .filtered extension."));
140
141 Descriptor outFormat(OUT_FORMAT_ID, FilterBamWorker::tr("Output format"), FilterBamWorker::tr("Format of an output assembly file."));
142
143 Descriptor regionFilter(REGION_ID, FilterBamWorker::tr("Region"), FilterBamWorker::tr("Regions to filter. For BAM output only. chr2 to output the whole chr2. chr2:1000 to output regions of chr 2 starting from 1000. "
144 "chr2:1000-2000 to output regions of chr2 between 1000 and 2000 including the end point. To input multiple regions use the space separator (e.g. chr1 chr2 chr3:1000-2000)."));
145
146 Descriptor mapqFilter(MAPQ_ID, FilterBamWorker::tr("MAPQ threshold"), FilterBamWorker::tr("Minimum MAPQ quality score."));
147
148 Descriptor flagAccept(ACCEPT_FLAG_ID, FilterBamWorker::tr("Accept flag"), FilterBamWorker::tr("Only output alignments with the selected items. Select the items in the combobox to configure bit flag. Do not select the items to avoid filtration by this parameter."));
149
150 Descriptor flagFilter(FLAG_ID, FilterBamWorker::tr("Skip flag"), FilterBamWorker::tr("Skip alignment with the selected items. Select the items in the combobox to configure bit flag. Do not select the items to avoid filtration by this parameter."));
151
152 a << new Attribute(outDir, BaseTypes::NUM_TYPE(), false, QVariant(FileAndDirectoryUtils::WORKFLOW_INTERNAL));
153 Attribute *customDirAttr = new Attribute(customDir, BaseTypes::STRING_TYPE(), false, QVariant(""));
154 customDirAttr->addRelation(new VisibilityRelation(OUT_MODE_ID, FileAndDirectoryUtils::CUSTOM));
155 a << customDirAttr;
156 a << new Attribute(outName, BaseTypes::STRING_TYPE(), false, QVariant(DEFAULT_NAME));
157 a << new Attribute(outFormat, BaseTypes::STRING_TYPE(), false, QVariant(BaseDocumentFormats::BAM));
158 a << new Attribute(regionFilter, BaseTypes::STRING_TYPE(), false, QVariant(""));
159 a << new Attribute(mapqFilter, BaseTypes::NUM_TYPE(), false, QVariant(0));
160 a << new Attribute(flagAccept, BaseTypes::STRING_TYPE(), false, QVariant(""));
161 a << new Attribute(flagFilter, BaseTypes::STRING_TYPE(), false, QVariant(""));
162 }
163
164 QMap<QString, PropertyDelegate *> delegates;
165 {
166 QVariantMap directoryMap;
167 QString fileDir = FilterBamWorker::tr("Input file");
168 QString workflowDir = FilterBamWorker::tr("Workflow");
169 QString customD = FilterBamWorker::tr("Custom");
170 directoryMap[fileDir] = FileAndDirectoryUtils::FILE_DIRECTORY;
171 directoryMap[workflowDir] = FileAndDirectoryUtils::WORKFLOW_INTERNAL;
172 directoryMap[customD] = FileAndDirectoryUtils::CUSTOM;
173 delegates[OUT_MODE_ID] = new ComboBoxDelegate(directoryMap);
174
175 delegates[CUSTOM_DIR_ID] = new URLDelegate("", "", false, true);
176
177 QVariantMap formatMap;
178 formatMap[BaseDocumentFormats::BAM] = BaseDocumentFormats::BAM;
179 formatMap[BaseDocumentFormats::SAM] = BaseDocumentFormats::SAM;
180 auto outputFormatComboBoxDelegate = new ComboBoxDelegate(formatMap);
181 outputFormatComboBoxDelegate->setSortFlag(true);
182 outputFormatComboBoxDelegate->setItemTextFormatter(QSharedPointer<StringFormatter>(new DocumentNameByIdFormatter()));
183 delegates[OUT_FORMAT_ID] = outputFormatComboBoxDelegate;
184 QVariantMap lenMap;
185 lenMap["minimum"] = QVariant(0);
186 lenMap["maximum"] = QVariant(254);
187 delegates[MAPQ_ID] = new SpinBoxDelegate(lenMap);
188
189 QVariantMap flags;
190 QMap<QString, QString> filterCodes = getFilterCodes();
191 foreach (const QString &key, filterCodes.keys()) {
192 flags[key] = false;
193 }
194 delegates[ACCEPT_FLAG_ID] = new ComboBoxWithChecksDelegate(flags);
195 delegates[FLAG_ID] = new ComboBoxWithChecksDelegate(flags);
196 }
197
198 ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
199 proto->setEditor(new DelegateEditor(delegates));
200 proto->setPrompter(new FilterBamPrompter());
201 // no way to include tool support files, so ids passed to functions manually
202 proto->addExternalTool("USUPP_SAMTOOLS"); // SamToolsExtToolSupport::ET_SAMTOOLS_EXT_ID
203
204 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_BASIC(), proto);
205 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
206 localDomain->registerEntry(new FilterBamWorkerFactory());
207 }
208
209 /************************************************************************/
210 /* FilterBamWorker */
211 /************************************************************************/
FilterBamWorker(Actor * a)212 FilterBamWorker::FilterBamWorker(Actor *a)
213 : BaseWorker(a), inputUrlPort(nullptr), outputUrlPort(nullptr) {
214 }
215
init()216 void FilterBamWorker::init() {
217 inputUrlPort = ports.value(INPUT_PORT);
218 outputUrlPort = ports.value(OUTPUT_PORT);
219 }
220
tick()221 Task *FilterBamWorker::tick() {
222 if (inputUrlPort->hasMessage()) {
223 const QString url = takeUrl();
224 CHECK(!url.isEmpty(), nullptr);
225
226 const QString detectedFormat = FileAndDirectoryUtils::detectFormat(url);
227 if (detectedFormat.isEmpty()) {
228 coreLog.info(tr("Unknown file format: ") + url);
229 return nullptr;
230 }
231 if (detectedFormat == BaseDocumentFormats::BAM || detectedFormat == BaseDocumentFormats::SAM) {
232 const QString outputDir = FileAndDirectoryUtils::createWorkingDir(url, getValue<int>(OUT_MODE_ID), getValue<QString>(CUSTOM_DIR_ID), context->workingDir());
233
234 BamFilterSetting setting;
235 setting.outDir = outputDir;
236 setting.outName = getTargetName(url, outputDir);
237 setting.inputUrl = url;
238 setting.inputFormat = detectedFormat;
239 setting.outputFormat = getValue<QString>(OUT_FORMAT_ID);
240 setting.mapq = getValue<int>(MAPQ_ID);
241 setting.acceptFilter = getHexValueByFilterString(getValue<QString>(ACCEPT_FLAG_ID), getFilterCodes());
242 setting.skipFilter = getHexValueByFilterString(getValue<QString>(FLAG_ID), getFilterCodes());
243 setting.regionFilter = getValue<QString>(REGION_ID);
244
245 ExternalToolSupportTask *t = new SamtoolsViewFilterTask(setting);
246 t->addListeners(createLogListeners());
247 connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
248 return t;
249 }
250 }
251
252 if (inputUrlPort->isEnded()) {
253 setDone();
254 outputUrlPort->setEnded();
255 }
256 return nullptr;
257 }
258
cleanup()259 void FilterBamWorker::cleanup() {
260 outUrls.clear();
261 }
262
263 namespace {
getTargetUrl(Task * task)264 QString getTargetUrl(Task *task) {
265 SamtoolsViewFilterTask *filterTask = dynamic_cast<SamtoolsViewFilterTask *>(task);
266
267 if (nullptr != filterTask) {
268 return filterTask->getResult();
269 }
270 return "";
271 }
272 } // namespace
273
sl_taskFinished(Task * task)274 void FilterBamWorker::sl_taskFinished(Task *task) {
275 CHECK(!task->hasError(), );
276 CHECK(!task->isCanceled(), );
277
278 QString url = getTargetUrl(task);
279 CHECK(!url.isEmpty(), );
280
281 sendResult(url);
282 monitor()->addOutputFile(url, getActorId());
283 }
284
takeUrl()285 QString FilterBamWorker::takeUrl() {
286 const Message inputMessage = getMessageAndSetupScriptValues(inputUrlPort);
287 if (inputMessage.isEmpty()) {
288 outputUrlPort->transit();
289 return "";
290 }
291
292 const QVariantMap data = inputMessage.getData().toMap();
293 return data[BaseSlots::URL_SLOT().getId()].toString();
294 }
295
sendResult(const QString & url)296 void FilterBamWorker::sendResult(const QString &url) {
297 const Message message(BaseTypes::STRING_TYPE(), url);
298 outputUrlPort->put(message);
299 }
300
getTargetName(const QString & fileUrl,const QString & outDir)301 QString FilterBamWorker::getTargetName(const QString &fileUrl, const QString &outDir) {
302 QString name = getValue<QString>(OUT_NAME_ID);
303
304 if (name == DEFAULT_NAME || name.isEmpty()) {
305 name = QFileInfo(fileUrl).fileName();
306 name = name + ".filtered.bam";
307 }
308 if (outUrls.contains(outDir + name)) {
309 name.append(QString("_%1").arg(outUrls.size()));
310 }
311 outUrls.append(outDir + name);
312 return name;
313 }
314
315 ////////////////////////////////////////////////////////
316 // BamFilterSetting
getSamtoolsArguments() const317 QStringList BamFilterSetting::getSamtoolsArguments() const {
318 QStringList result;
319
320 result << "view";
321 if (inputFormat == BaseDocumentFormats::SAM) {
322 result << "-S";
323 }
324 if (outputFormat == BaseDocumentFormats::BAM) {
325 result << "-b";
326 }
327
328 if (!acceptFilter.isEmpty()) {
329 result << "-f" << acceptFilter;
330 }
331
332 if (!skipFilter.isEmpty()) {
333 result << "-F" << skipFilter;
334 }
335 result << "-q" << QString::number(mapq);
336
337 result << "-o" << outDir + outName;
338
339 result << inputUrl;
340
341 if (!regionFilter.isEmpty()) {
342 QStringList regions = regionFilter.split(" ");
343 foreach (const QString ®, regions) {
344 result << reg;
345 }
346 }
347
348 return result;
349 }
350
351 ////////////////////////////////////////////////////////
352 // SamtoolsViewFilterTask
353 const QString SamtoolsViewFilterTask::SAMTOOLS_ID = "USUPP_SAMTOOLS";
354
SamtoolsViewFilterTask(const BamFilterSetting & settings)355 SamtoolsViewFilterTask::SamtoolsViewFilterTask(const BamFilterSetting &settings)
356 : ExternalToolSupportTask(tr("Samtool view (filter) for %1 ").arg(settings.inputUrl), TaskFlags(TaskFlag_None)), settings(settings), resultUrl("") {
357 }
358
prepare()359 void SamtoolsViewFilterTask::prepare() {
360 if (settings.inputUrl.isEmpty()) {
361 setError(tr("No assembly URL to filter"));
362 return;
363 }
364
365 const QDir outDir = QFileInfo(settings.outDir).absoluteDir();
366 if (!outDir.exists()) {
367 setError(tr("Folder does not exist: ") + outDir.absolutePath());
368 return;
369 }
370
371 if (settings.outputFormat == BaseDocumentFormats::BAM && !settings.regionFilter.isEmpty()) {
372 BAMUtils::createBamIndex(settings.inputUrl, stateInfo);
373 }
374 }
375
run()376 void SamtoolsViewFilterTask::run() {
377 CHECK_OP(stateInfo, );
378
379 ProcessRun samtools = ExternalToolSupportUtils::prepareProcess(SAMTOOLS_ID, settings.getSamtoolsArguments(), "", QStringList(), stateInfo, getListener(0));
380 CHECK_OP(stateInfo, );
381 QScopedPointer<QProcess> sp(samtools.process);
382 QScopedPointer<ExternalToolRunTaskHelper> sh(new ExternalToolRunTaskHelper(samtools.process, new ExternalToolLogParser(), stateInfo));
383 setListenerForHelper(sh.data(), 0);
384
385 start(samtools, "SAMtools");
386 CHECK_OP(stateInfo, );
387
388 while (!samtools.process->waitForFinished(1000)) {
389 if (isCanceled()) {
390 CmdlineTaskRunner::killProcessTree(samtools.process);
391 return;
392 }
393 }
394 checkExitCode(samtools.process, "SAMtools");
395
396 if (!hasError()) {
397 resultUrl = settings.outDir + settings.outName;
398 }
399 }
400
start(const ProcessRun & pRun,const QString & toolName)401 void SamtoolsViewFilterTask::start(const ProcessRun &pRun, const QString &toolName) {
402 pRun.process->start(pRun.program, pRun.arguments);
403 bool started = pRun.process->waitForStarted();
404 CHECK_EXT(started, setError(tr("Can not run %1 tool").arg(toolName)), );
405 }
406
checkExitCode(QProcess * process,const QString & toolName)407 void SamtoolsViewFilterTask::checkExitCode(QProcess *process, const QString &toolName) {
408 int exitCode = process->exitCode();
409 if (exitCode != EXIT_SUCCESS && !hasError()) {
410 setError(tr("%1 tool exited with code %2").arg(toolName).arg(exitCode));
411 } else {
412 algoLog.details(tr("Tool %1 finished successfully").arg(toolName));
413 }
414 }
415
416 } // namespace LocalWorkflow
417 } // namespace U2
418