1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "FilterBamWorker.h"
23 
24 #include <U2Core/BaseDocumentFormats.h>
25 #include <U2Core/DocumentUtils.h>
26 #include <U2Core/FailTask.h>
27 #include <U2Core/FileAndDirectoryUtils.h>
28 #include <U2Core/GObject.h>
29 #include <U2Core/IOAdapter.h>
30 #include <U2Core/TaskSignalMapper.h>
31 #include <U2Core/U2SafePoints.h>
32 
33 #include <U2Designer/DelegateEditors.h>
34 
35 #include <U2Formats/BAMUtils.h>
36 
37 #include <U2Lang/ActorPrototypeRegistry.h>
38 #include <U2Lang/BaseActorCategories.h>
39 #include <U2Lang/BaseAttributes.h>
40 #include <U2Lang/BaseSlots.h>
41 #include <U2Lang/BaseTypes.h>
42 #include <U2Lang/IntegralBusModel.h>
43 #include <U2Lang/WorkflowEnv.h>
44 #include <U2Lang/WorkflowMonitor.h>
45 
46 namespace U2 {
47 namespace LocalWorkflow {
48 
49 const QString FilterBamWorkerFactory::ACTOR_ID("filter-bam");
50 static const QString SHORT_NAME("mb");
51 static const QString INPUT_PORT("in-file");
52 static const QString OUTPUT_PORT("out-file");
53 static const QString OUTPUT_SUBDIR("Filtered_BAM/");
54 static const QString OUT_MODE_ID("out-mode");
55 static const QString CUSTOM_DIR_ID("custom-dir");
56 static const QString OUT_NAME_ID("out-name");
57 static const QString OUT_FORMAT_ID("out-format");
58 static const QString REGION_ID("region");
59 static const QString MAPQ_ID("mapq");
60 static const QString ACCEPT_FLAG_ID("accept-flag");
61 static const QString FLAG_ID("flag");
62 
63 /************************************************************************/
64 /* FilterBamPrompter */
65 /************************************************************************/
composeRichDoc()66 QString FilterBamPrompter::composeRichDoc() {
67     IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(INPUT_PORT));
68     const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
69     QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
70     QString producerName = tr("<u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);
71 
72     QString doc = tr("Filter BAM/SAM files from %1 with SAMTools view.").arg(producerName);
73     return doc;
74 }
75 
76 /************************************************************************/
77 /* FilterBamWorkerFactory */
78 /************************************************************************/
79 namespace {
80 static const QString DEFAULT_NAME("Default");
81 
getFilterCodes()82 QMap<QString, QString> getFilterCodes() {
83     QMap<QString, QString> res;
84     res.insert("Read is paired", "0x0001");
85     res.insert("Read is mapped in a proper pair", "0x0002");
86     res.insert("The read is unmapped", "0x0004");
87     res.insert("The mate is unmapped", "0x0008");
88     res.insert("Read strand", "0x0010");
89     res.insert("Mate strand", "0x0020");
90     res.insert("Read is the first in a pair", "0x0040");
91     res.insert("Read is the second in a pair", "0x0080");
92     res.insert("The alignment or this read is not primary", "0x0100");
93     res.insert("The read fails platform/vendor quality checks", "0x0200");
94     res.insert("The read is a PCR or optical duplicate", "0x0400");
95     return res;
96 }
97 
getHexValueByFilterString(const QString & filterString,const QMap<QString,QString> & codes)98 QString getHexValueByFilterString(const QString &filterString, const QMap<QString, QString> &codes) {
99     int val = 0;
100     QStringList filterCodes = filterString.split(",");
101     foreach (const QString fCode, filterCodes) {
102         if (codes.contains(fCode)) {
103             bool ok = true;
104             val += codes.value(fCode).toInt(&ok, 16);
105         }
106     }
107     if (val == 0) {
108         return "";
109     }
110     return QString::number(val, 16);
111 }
112 }  // namespace
113 
init()114 void FilterBamWorkerFactory::init() {
115     Descriptor desc(ACTOR_ID, FilterBamWorker::tr("Filter BAM/SAM files"), FilterBamWorker::tr("Filters BAM/SAM files using SAMTools view."));
116 
117     QList<PortDescriptor *> p;
118     {
119         Descriptor inD(INPUT_PORT, FilterBamWorker::tr("BAM/SAM File"), FilterBamWorker::tr("Set of BAM/SAM files to filter"));
120         Descriptor outD(OUTPUT_PORT, FilterBamWorker::tr("Filtered BAM/SAM files"), FilterBamWorker::tr("Filtered BAM/SAM files"));
121 
122         QMap<Descriptor, DataTypePtr> inM;
123         inM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
124         p << new PortDescriptor(inD, DataTypePtr(new MapDataType(SHORT_NAME + ".input-url", inM)), true);
125 
126         QMap<Descriptor, DataTypePtr> outM;
127         outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
128         p << new PortDescriptor(outD, DataTypePtr(new MapDataType(SHORT_NAME + ".output-url", outM)), false, true);
129     }
130 
131     QList<Attribute *> a;
132     {
133         Descriptor outDir(OUT_MODE_ID, FilterBamWorker::tr("Output folder"), FilterBamWorker::tr("Select an output folder. <b>Custom</b> - specify the output folder in the 'Custom folder' parameter. "
134                                                                                                  "<b>Workflow</b> - internal workflow folder. "
135                                                                                                  "<b>Input file</b> - the folder of the input file."));
136 
137         Descriptor customDir(CUSTOM_DIR_ID, FilterBamWorker::tr("Custom folder"), FilterBamWorker::tr("Select the custom output folder."));
138 
139         Descriptor outName(OUT_NAME_ID, FilterBamWorker::tr("Output name"), FilterBamWorker::tr("A name of an output BAM/SAM file. If default of empty value is provided the output name is the name of the first BAM/SAM file with .filtered extension."));
140 
141         Descriptor outFormat(OUT_FORMAT_ID, FilterBamWorker::tr("Output format"), FilterBamWorker::tr("Format of an output assembly file."));
142 
143         Descriptor regionFilter(REGION_ID, FilterBamWorker::tr("Region"), FilterBamWorker::tr("Regions to filter. For BAM output only. chr2 to output the whole chr2. chr2:1000 to output regions of chr 2 starting from 1000. "
144                                                                                               "chr2:1000-2000 to output regions of chr2 between 1000 and 2000 including the end point. To input multiple regions use the space separator (e.g. chr1 chr2 chr3:1000-2000)."));
145 
146         Descriptor mapqFilter(MAPQ_ID, FilterBamWorker::tr("MAPQ threshold"), FilterBamWorker::tr("Minimum MAPQ quality score."));
147 
148         Descriptor flagAccept(ACCEPT_FLAG_ID, FilterBamWorker::tr("Accept flag"), FilterBamWorker::tr("Only output alignments with the selected items. Select the items in the combobox to configure bit flag. Do not select the items to avoid filtration by this parameter."));
149 
150         Descriptor flagFilter(FLAG_ID, FilterBamWorker::tr("Skip flag"), FilterBamWorker::tr("Skip alignment with the selected items. Select the items in the combobox to configure bit flag. Do not select the items to avoid filtration by this parameter."));
151 
152         a << new Attribute(outDir, BaseTypes::NUM_TYPE(), false, QVariant(FileAndDirectoryUtils::WORKFLOW_INTERNAL));
153         Attribute *customDirAttr = new Attribute(customDir, BaseTypes::STRING_TYPE(), false, QVariant(""));
154         customDirAttr->addRelation(new VisibilityRelation(OUT_MODE_ID, FileAndDirectoryUtils::CUSTOM));
155         a << customDirAttr;
156         a << new Attribute(outName, BaseTypes::STRING_TYPE(), false, QVariant(DEFAULT_NAME));
157         a << new Attribute(outFormat, BaseTypes::STRING_TYPE(), false, QVariant(BaseDocumentFormats::BAM));
158         a << new Attribute(regionFilter, BaseTypes::STRING_TYPE(), false, QVariant(""));
159         a << new Attribute(mapqFilter, BaseTypes::NUM_TYPE(), false, QVariant(0));
160         a << new Attribute(flagAccept, BaseTypes::STRING_TYPE(), false, QVariant(""));
161         a << new Attribute(flagFilter, BaseTypes::STRING_TYPE(), false, QVariant(""));
162     }
163 
164     QMap<QString, PropertyDelegate *> delegates;
165     {
166         QVariantMap directoryMap;
167         QString fileDir = FilterBamWorker::tr("Input file");
168         QString workflowDir = FilterBamWorker::tr("Workflow");
169         QString customD = FilterBamWorker::tr("Custom");
170         directoryMap[fileDir] = FileAndDirectoryUtils::FILE_DIRECTORY;
171         directoryMap[workflowDir] = FileAndDirectoryUtils::WORKFLOW_INTERNAL;
172         directoryMap[customD] = FileAndDirectoryUtils::CUSTOM;
173         delegates[OUT_MODE_ID] = new ComboBoxDelegate(directoryMap);
174 
175         delegates[CUSTOM_DIR_ID] = new URLDelegate("", "", false, true);
176 
177         QVariantMap formatMap;
178         formatMap[BaseDocumentFormats::BAM] = BaseDocumentFormats::BAM;
179         formatMap[BaseDocumentFormats::SAM] = BaseDocumentFormats::SAM;
180         auto outputFormatComboBoxDelegate = new ComboBoxDelegate(formatMap);
181         outputFormatComboBoxDelegate->setSortFlag(true);
182         outputFormatComboBoxDelegate->setItemTextFormatter(QSharedPointer<StringFormatter>(new DocumentNameByIdFormatter()));
183         delegates[OUT_FORMAT_ID] = outputFormatComboBoxDelegate;
184         QVariantMap lenMap;
185         lenMap["minimum"] = QVariant(0);
186         lenMap["maximum"] = QVariant(254);
187         delegates[MAPQ_ID] = new SpinBoxDelegate(lenMap);
188 
189         QVariantMap flags;
190         QMap<QString, QString> filterCodes = getFilterCodes();
191         foreach (const QString &key, filterCodes.keys()) {
192             flags[key] = false;
193         }
194         delegates[ACCEPT_FLAG_ID] = new ComboBoxWithChecksDelegate(flags);
195         delegates[FLAG_ID] = new ComboBoxWithChecksDelegate(flags);
196     }
197 
198     ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
199     proto->setEditor(new DelegateEditor(delegates));
200     proto->setPrompter(new FilterBamPrompter());
201     // no way to include tool support files, so ids passed to functions manually
202     proto->addExternalTool("USUPP_SAMTOOLS");  // SamToolsExtToolSupport::ET_SAMTOOLS_EXT_ID
203 
204     WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_BASIC(), proto);
205     DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
206     localDomain->registerEntry(new FilterBamWorkerFactory());
207 }
208 
209 /************************************************************************/
210 /* FilterBamWorker */
211 /************************************************************************/
FilterBamWorker(Actor * a)212 FilterBamWorker::FilterBamWorker(Actor *a)
213     : BaseWorker(a), inputUrlPort(nullptr), outputUrlPort(nullptr) {
214 }
215 
init()216 void FilterBamWorker::init() {
217     inputUrlPort = ports.value(INPUT_PORT);
218     outputUrlPort = ports.value(OUTPUT_PORT);
219 }
220 
tick()221 Task *FilterBamWorker::tick() {
222     if (inputUrlPort->hasMessage()) {
223         const QString url = takeUrl();
224         CHECK(!url.isEmpty(), nullptr);
225 
226         const QString detectedFormat = FileAndDirectoryUtils::detectFormat(url);
227         if (detectedFormat.isEmpty()) {
228             coreLog.info(tr("Unknown file format: ") + url);
229             return nullptr;
230         }
231         if (detectedFormat == BaseDocumentFormats::BAM || detectedFormat == BaseDocumentFormats::SAM) {
232             const QString outputDir = FileAndDirectoryUtils::createWorkingDir(url, getValue<int>(OUT_MODE_ID), getValue<QString>(CUSTOM_DIR_ID), context->workingDir());
233 
234             BamFilterSetting setting;
235             setting.outDir = outputDir;
236             setting.outName = getTargetName(url, outputDir);
237             setting.inputUrl = url;
238             setting.inputFormat = detectedFormat;
239             setting.outputFormat = getValue<QString>(OUT_FORMAT_ID);
240             setting.mapq = getValue<int>(MAPQ_ID);
241             setting.acceptFilter = getHexValueByFilterString(getValue<QString>(ACCEPT_FLAG_ID), getFilterCodes());
242             setting.skipFilter = getHexValueByFilterString(getValue<QString>(FLAG_ID), getFilterCodes());
243             setting.regionFilter = getValue<QString>(REGION_ID);
244 
245             ExternalToolSupportTask *t = new SamtoolsViewFilterTask(setting);
246             t->addListeners(createLogListeners());
247             connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
248             return t;
249         }
250     }
251 
252     if (inputUrlPort->isEnded()) {
253         setDone();
254         outputUrlPort->setEnded();
255     }
256     return nullptr;
257 }
258 
cleanup()259 void FilterBamWorker::cleanup() {
260     outUrls.clear();
261 }
262 
263 namespace {
getTargetUrl(Task * task)264 QString getTargetUrl(Task *task) {
265     SamtoolsViewFilterTask *filterTask = dynamic_cast<SamtoolsViewFilterTask *>(task);
266 
267     if (nullptr != filterTask) {
268         return filterTask->getResult();
269     }
270     return "";
271 }
272 }  // namespace
273 
sl_taskFinished(Task * task)274 void FilterBamWorker::sl_taskFinished(Task *task) {
275     CHECK(!task->hasError(), );
276     CHECK(!task->isCanceled(), );
277 
278     QString url = getTargetUrl(task);
279     CHECK(!url.isEmpty(), );
280 
281     sendResult(url);
282     monitor()->addOutputFile(url, getActorId());
283 }
284 
takeUrl()285 QString FilterBamWorker::takeUrl() {
286     const Message inputMessage = getMessageAndSetupScriptValues(inputUrlPort);
287     if (inputMessage.isEmpty()) {
288         outputUrlPort->transit();
289         return "";
290     }
291 
292     const QVariantMap data = inputMessage.getData().toMap();
293     return data[BaseSlots::URL_SLOT().getId()].toString();
294 }
295 
sendResult(const QString & url)296 void FilterBamWorker::sendResult(const QString &url) {
297     const Message message(BaseTypes::STRING_TYPE(), url);
298     outputUrlPort->put(message);
299 }
300 
getTargetName(const QString & fileUrl,const QString & outDir)301 QString FilterBamWorker::getTargetName(const QString &fileUrl, const QString &outDir) {
302     QString name = getValue<QString>(OUT_NAME_ID);
303 
304     if (name == DEFAULT_NAME || name.isEmpty()) {
305         name = QFileInfo(fileUrl).fileName();
306         name = name + ".filtered.bam";
307     }
308     if (outUrls.contains(outDir + name)) {
309         name.append(QString("_%1").arg(outUrls.size()));
310     }
311     outUrls.append(outDir + name);
312     return name;
313 }
314 
315 ////////////////////////////////////////////////////////
316 // BamFilterSetting
getSamtoolsArguments() const317 QStringList BamFilterSetting::getSamtoolsArguments() const {
318     QStringList result;
319 
320     result << "view";
321     if (inputFormat == BaseDocumentFormats::SAM) {
322         result << "-S";
323     }
324     if (outputFormat == BaseDocumentFormats::BAM) {
325         result << "-b";
326     }
327 
328     if (!acceptFilter.isEmpty()) {
329         result << "-f" << acceptFilter;
330     }
331 
332     if (!skipFilter.isEmpty()) {
333         result << "-F" << skipFilter;
334     }
335     result << "-q" << QString::number(mapq);
336 
337     result << "-o" << outDir + outName;
338 
339     result << inputUrl;
340 
341     if (!regionFilter.isEmpty()) {
342         QStringList regions = regionFilter.split(" ");
343         foreach (const QString &reg, regions) {
344             result << reg;
345         }
346     }
347 
348     return result;
349 }
350 
351 ////////////////////////////////////////////////////////
352 // SamtoolsViewFilterTask
353 const QString SamtoolsViewFilterTask::SAMTOOLS_ID = "USUPP_SAMTOOLS";
354 
SamtoolsViewFilterTask(const BamFilterSetting & settings)355 SamtoolsViewFilterTask::SamtoolsViewFilterTask(const BamFilterSetting &settings)
356     : ExternalToolSupportTask(tr("Samtool view (filter) for %1 ").arg(settings.inputUrl), TaskFlags(TaskFlag_None)), settings(settings), resultUrl("") {
357 }
358 
prepare()359 void SamtoolsViewFilterTask::prepare() {
360     if (settings.inputUrl.isEmpty()) {
361         setError(tr("No assembly URL to filter"));
362         return;
363     }
364 
365     const QDir outDir = QFileInfo(settings.outDir).absoluteDir();
366     if (!outDir.exists()) {
367         setError(tr("Folder does not exist: ") + outDir.absolutePath());
368         return;
369     }
370 
371     if (settings.outputFormat == BaseDocumentFormats::BAM && !settings.regionFilter.isEmpty()) {
372         BAMUtils::createBamIndex(settings.inputUrl, stateInfo);
373     }
374 }
375 
run()376 void SamtoolsViewFilterTask::run() {
377     CHECK_OP(stateInfo, );
378 
379     ProcessRun samtools = ExternalToolSupportUtils::prepareProcess(SAMTOOLS_ID, settings.getSamtoolsArguments(), "", QStringList(), stateInfo, getListener(0));
380     CHECK_OP(stateInfo, );
381     QScopedPointer<QProcess> sp(samtools.process);
382     QScopedPointer<ExternalToolRunTaskHelper> sh(new ExternalToolRunTaskHelper(samtools.process, new ExternalToolLogParser(), stateInfo));
383     setListenerForHelper(sh.data(), 0);
384 
385     start(samtools, "SAMtools");
386     CHECK_OP(stateInfo, );
387 
388     while (!samtools.process->waitForFinished(1000)) {
389         if (isCanceled()) {
390             CmdlineTaskRunner::killProcessTree(samtools.process);
391             return;
392         }
393     }
394     checkExitCode(samtools.process, "SAMtools");
395 
396     if (!hasError()) {
397         resultUrl = settings.outDir + settings.outName;
398     }
399 }
400 
start(const ProcessRun & pRun,const QString & toolName)401 void SamtoolsViewFilterTask::start(const ProcessRun &pRun, const QString &toolName) {
402     pRun.process->start(pRun.program, pRun.arguments);
403     bool started = pRun.process->waitForStarted();
404     CHECK_EXT(started, setError(tr("Can not run %1 tool").arg(toolName)), );
405 }
406 
checkExitCode(QProcess * process,const QString & toolName)407 void SamtoolsViewFilterTask::checkExitCode(QProcess *process, const QString &toolName) {
408     int exitCode = process->exitCode();
409     if (exitCode != EXIT_SUCCESS && !hasError()) {
410         setError(tr("%1 tool exited with code %2").arg(toolName).arg(exitCode));
411     } else {
412         algoLog.details(tr("Tool %1 finished successfully").arg(toolName));
413     }
414 }
415 
416 }  // namespace LocalWorkflow
417 }  // namespace U2
418