1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "RmdupBamWorker.h"
23 
24 #include <U2Core/BaseDocumentFormats.h>
25 #include <U2Core/DocumentImport.h>
26 #include <U2Core/DocumentModel.h>
27 #include <U2Core/DocumentUtils.h>
28 #include <U2Core/FailTask.h>
29 #include <U2Core/FileAndDirectoryUtils.h>
30 #include <U2Core/GObject.h>
31 #include <U2Core/GObjectTypes.h>
32 #include <U2Core/GUrlUtils.h>
33 #include <U2Core/IOAdapter.h>
34 #include <U2Core/IOAdapterUtils.h>
35 #include <U2Core/TaskSignalMapper.h>
36 #include <U2Core/U2OpStatusUtils.h>
37 #include <U2Core/U2SafePoints.h>
38 
39 #include <U2Designer/DelegateEditors.h>
40 
41 #include <U2Formats/BAMUtils.h>
42 
43 #include <U2Lang/ActorPrototypeRegistry.h>
44 #include <U2Lang/BaseActorCategories.h>
45 #include <U2Lang/BaseAttributes.h>
46 #include <U2Lang/BaseSlots.h>
47 #include <U2Lang/BaseTypes.h>
48 #include <U2Lang/IntegralBusModel.h>
49 #include <U2Lang/WorkflowEnv.h>
50 #include <U2Lang/WorkflowMonitor.h>
51 
52 namespace U2 {
53 namespace LocalWorkflow {
54 
55 const QString RmdupBamWorkerFactory::ACTOR_ID("rmdup-bam");
56 static const QString SHORT_NAME("mb");
57 static const QString INPUT_PORT("in-file");
58 static const QString OUTPUT_PORT("out-file");
59 static const QString OUT_MODE_ID("out-mode");
60 static const QString CUSTOM_DIR_ID("custom-dir");
61 static const QString OUT_NAME_ID("out-name");
62 static const QString REMOVE_SINGLE_END_ID("remove-single-end");
63 static const QString TREAT_READS_ID("treat_reads");
64 
65 /************************************************************************/
66 /* RmdupBamPrompter */
67 /************************************************************************/
composeRichDoc()68 QString RmdupBamPrompter::composeRichDoc() {
69     IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(INPUT_PORT));
70     const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
71     QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
72     QString producerName = tr("<u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);
73 
74     QString doc = tr("Remove PCR duplicates of BAM files from %1 with SAMTools rmdup.").arg(producerName);
75     return doc;
76 }
77 
78 /************************************************************************/
79 /* RmdupBamWorkerFactory */
80 /************************************************************************/
81 namespace {
82 
83 static const QString DEFAULT_NAME("Default");
84 }
85 
init()86 void RmdupBamWorkerFactory::init() {
87     Descriptor desc(ACTOR_ID, RmdupBamWorker::tr("Remove Duplicates in BAM Files"), RmdupBamWorker::tr("Remove PCR duplicates of BAM files using SAMTools rmdup."));
88 
89     QList<PortDescriptor *> p;
90     {
91         Descriptor inD(INPUT_PORT, RmdupBamWorker::tr("BAM File"), RmdupBamWorker::tr("Set of BAM files to rmdup"));
92         Descriptor outD(OUTPUT_PORT, RmdupBamWorker::tr("Cleaned BAM File"), RmdupBamWorker::tr("Cleaned BAM file"));
93 
94         QMap<Descriptor, DataTypePtr> inM;
95         inM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
96         p << new PortDescriptor(inD, DataTypePtr(new MapDataType(SHORT_NAME + ".input-url", inM)), true);
97 
98         QMap<Descriptor, DataTypePtr> outM;
99         outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
100         p << new PortDescriptor(outD, DataTypePtr(new MapDataType(SHORT_NAME + ".output-url", outM)), false, true);
101     }
102 
103     QList<Attribute *> a;
104     {
105         Descriptor outDir(OUT_MODE_ID, RmdupBamWorker::tr("Output folder"), RmdupBamWorker::tr("Select an output folder. <b>Custom</b> - specify the output folder in the 'Custom folder' parameter. "
106                                                                                                "<b>Workflow</b> - internal workflow folder. "
107                                                                                                "<b>Input file</b> - the folder of the input file."));
108 
109         Descriptor customDir(CUSTOM_DIR_ID, RmdupBamWorker::tr("Custom folder"), RmdupBamWorker::tr("Select the custom output folder."));
110 
111         Descriptor outName(OUT_NAME_ID, RmdupBamWorker::tr("Output BAM name"), RmdupBamWorker::tr("A name of an output BAM file. If default of empty value is provided the output name is the name of the first BAM file with .nodup.bam extension."));
112 
113         Descriptor removeSE(REMOVE_SINGLE_END_ID, RmdupBamWorker::tr("Remove for single-end reads"), RmdupBamWorker::tr("Remove duplicate for single-end reads. By default, the command works for paired-end reads only (-s)."));
114 
115         Descriptor treatReads(TREAT_READS_ID, RmdupBamWorker::tr("Treat as single-end"), RmdupBamWorker::tr("Treat paired-end reads and single-end reads (-S)."));
116 
117         a << new Attribute(outDir, BaseTypes::NUM_TYPE(), false, QVariant(FileAndDirectoryUtils::WORKFLOW_INTERNAL));
118         Attribute *customDirAttr = new Attribute(customDir, BaseTypes::STRING_TYPE(), false, QVariant(""));
119         customDirAttr->addRelation(new VisibilityRelation(OUT_MODE_ID, FileAndDirectoryUtils::CUSTOM));
120         a << customDirAttr;
121         a << new Attribute(outName, BaseTypes::STRING_TYPE(), false, QVariant(DEFAULT_NAME));
122         a << new Attribute(removeSE, BaseTypes::BOOL_TYPE(), false, QVariant(false));
123         a << new Attribute(treatReads, BaseTypes::BOOL_TYPE(), false, QVariant(false));
124     }
125 
126     QMap<QString, PropertyDelegate *> delegates;
127     {
128         QVariantMap directoryMap;
129         QString fileDir = RmdupBamWorker::tr("Input file");
130         QString workflowDir = RmdupBamWorker::tr("Workflow");
131         QString customD = RmdupBamWorker::tr("Custom");
132         directoryMap[fileDir] = FileAndDirectoryUtils::FILE_DIRECTORY;
133         directoryMap[workflowDir] = FileAndDirectoryUtils::WORKFLOW_INTERNAL;
134         directoryMap[customD] = FileAndDirectoryUtils::CUSTOM;
135         delegates[OUT_MODE_ID] = new ComboBoxDelegate(directoryMap);
136 
137         delegates[CUSTOM_DIR_ID] = new URLDelegate("", "", false, true);
138     }
139 
140     ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
141     proto->setEditor(new DelegateEditor(delegates));
142     proto->setPrompter(new RmdupBamPrompter());
143 
144     WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_BASIC(), proto);
145     DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
146     localDomain->registerEntry(new RmdupBamWorkerFactory());
147 }
148 
149 /************************************************************************/
150 /* RmdupBamWorker */
151 /************************************************************************/
RmdupBamWorker(Actor * a)152 RmdupBamWorker::RmdupBamWorker(Actor *a)
153     : BaseWorker(a), inputUrlPort(nullptr), outputUrlPort(nullptr), outUrls("") {
154 }
155 
init()156 void RmdupBamWorker::init() {
157     inputUrlPort = ports.value(INPUT_PORT);
158     outputUrlPort = ports.value(OUTPUT_PORT);
159 }
160 
tick()161 Task *RmdupBamWorker::tick() {
162     if (inputUrlPort->hasMessage()) {
163         const QString url = takeUrl();
164         CHECK(!url.isEmpty(), nullptr);
165 
166         const QString detectedFormat = FileAndDirectoryUtils::detectFormat(url);
167         if (detectedFormat.isEmpty()) {
168             coreLog.info(tr("Unknown file format: ") + url);
169             return nullptr;
170         }
171 
172         if (detectedFormat == BaseDocumentFormats::BAM) {
173             const QString outputDir = FileAndDirectoryUtils::createWorkingDir(url, getValue<int>(OUT_MODE_ID), getValue<QString>(CUSTOM_DIR_ID), context->workingDir());
174 
175             BamRmdupSetting setting;
176             setting.outDir = outputDir;
177             setting.outName = getTargetName(url, outputDir);
178             setting.inputUrl = url;
179             setting.removeSingleEnd = getValue<bool>(REMOVE_SINGLE_END_ID);
180             setting.treatReads = getValue<bool>(TREAT_READS_ID);
181 
182             SamtoolsRmdupTask *t = new SamtoolsRmdupTask(setting);
183             t->addListeners(createLogListeners());
184             connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
185             return t;
186         }
187     }
188 
189     if (inputUrlPort->isEnded()) {
190         setDone();
191         outputUrlPort->setEnded();
192     }
193     return nullptr;
194 }
195 
cleanup()196 void RmdupBamWorker::cleanup() {
197     outUrls.clear();
198 }
199 
200 namespace {
getTargetUrl(Task * task)201 QString getTargetUrl(Task *task) {
202     SamtoolsRmdupTask *rmdupTask = dynamic_cast<SamtoolsRmdupTask *>(task);
203 
204     if (nullptr != rmdupTask) {
205         return rmdupTask->getResult();
206     }
207     return "";
208 }
209 }  // namespace
210 
sl_taskFinished(Task * task)211 void RmdupBamWorker::sl_taskFinished(Task *task) {
212     CHECK(!task->hasError(), );
213     CHECK(!task->isCanceled(), );
214 
215     QString url = getTargetUrl(task);
216     CHECK(!url.isEmpty(), );
217 
218     sendResult(url);
219     monitor()->addOutputFile(url, getActorId());
220 }
221 
getTargetName(const QString & fileUrl,const QString & outDir)222 QString RmdupBamWorker::getTargetName(const QString &fileUrl, const QString &outDir) {
223     QString name = getValue<QString>(OUT_NAME_ID);
224 
225     if (name == DEFAULT_NAME || name.isEmpty()) {
226         name = QFileInfo(fileUrl).fileName();
227         name = name + ".nodup.bam";
228     }
229     if (outUrls.contains(outDir + name)) {
230         name.append(QString("_%1").arg(outUrls.size()));
231     }
232     outUrls.append(outDir + name);
233     return name;
234 }
235 
takeUrl()236 QString RmdupBamWorker::takeUrl() {
237     const Message inputMessage = getMessageAndSetupScriptValues(inputUrlPort);
238     if (inputMessage.isEmpty()) {
239         outputUrlPort->transit();
240         return "";
241     }
242 
243     const QVariantMap data = inputMessage.getData().toMap();
244     return data[BaseSlots::URL_SLOT().getId()].toString();
245 }
246 
sendResult(const QString & url)247 void RmdupBamWorker::sendResult(const QString &url) {
248     const Message message(BaseTypes::STRING_TYPE(), url);
249     outputUrlPort->put(message);
250 }
251 
252 ////////////////////////////////////////////////////////
253 // BamRmdupSetting
getSamtoolsArguments() const254 QStringList BamRmdupSetting::getSamtoolsArguments() const {
255     QStringList result;
256 
257     result << "rmdup";
258 
259     if (removeSingleEnd) {
260         result << "-s";
261     }
262 
263     if (treatReads) {
264         result << "-S";
265     }
266 
267     result << inputUrl;
268 
269     result << outDir + outName;
270 
271     return result;
272 }
273 
274 ////////////////////////////////////////////////////////
275 // SamtoolsRmdupTask
276 
277 const QString SamtoolsRmdupTask::SAMTOOLS_ID = "USUPP_SAMTOOLS";
278 
SamtoolsRmdupTask(const BamRmdupSetting & settings)279 SamtoolsRmdupTask::SamtoolsRmdupTask(const BamRmdupSetting &settings)
280     : ExternalToolSupportTask(tr("Samtool rmdup for %1 ").arg(settings.inputUrl), TaskFlags(TaskFlag_None)), settings(settings), resultUrl("") {
281 }
282 
prepare()283 void SamtoolsRmdupTask::prepare() {
284     if (settings.inputUrl.isEmpty()) {
285         setError(tr("No assembly URL to filter"));
286         return;
287     }
288 
289     const QDir outDir = QFileInfo(settings.outDir).absoluteDir();
290     if (!outDir.exists()) {
291         setError(tr("Folder does not exist: ") + outDir.absolutePath());
292         return;
293     }
294 }
295 
run()296 void SamtoolsRmdupTask::run() {
297     CHECK_OP(stateInfo, );
298 
299     ProcessRun samtools = ExternalToolSupportUtils::prepareProcess(SAMTOOLS_ID, settings.getSamtoolsArguments(), "", QStringList(), stateInfo, getListener(0));
300     CHECK_OP(stateInfo, );
301     QScopedPointer<QProcess> sp(samtools.process);
302     QScopedPointer<ExternalToolRunTaskHelper> sh(new ExternalToolRunTaskHelper(samtools.process, new ExternalToolLogParser(), stateInfo));
303     setListenerForHelper(sh.data(), 0);
304 
305     start(samtools, "SAMtools");
306     CHECK_OP(stateInfo, );
307 
308     while (!samtools.process->waitForFinished(1000)) {
309         if (isCanceled()) {
310             CmdlineTaskRunner::killProcessTree(samtools.process);
311             return;
312         }
313     }
314     checkExitCode(samtools.process, "SAMtools");
315 
316     if (!hasError()) {
317         resultUrl = settings.outDir + settings.outName;
318     }
319 }
320 
start(const ProcessRun & pRun,const QString & toolName)321 void SamtoolsRmdupTask::start(const ProcessRun &pRun, const QString &toolName) {
322     pRun.process->start(pRun.program, pRun.arguments);
323     bool started = pRun.process->waitForStarted();
324     CHECK_EXT(started, setError(tr("Can not run %1 tool").arg(toolName)), );
325 }
326 
checkExitCode(QProcess * process,const QString & toolName)327 void SamtoolsRmdupTask::checkExitCode(QProcess *process, const QString &toolName) {
328     int exitCode = process->exitCode();
329     if (exitCode != EXIT_SUCCESS && !hasError()) {
330         setError(tr("%1 tool exited with code %2").arg(toolName).arg(exitCode));
331     } else {
332         algoLog.details(tr("Tool %1 finished successfully").arg(toolName));
333     }
334 }
335 
336 }  // namespace LocalWorkflow
337 }  // namespace U2
338