1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "RmdupBamWorker.h"
23
24 #include <U2Core/BaseDocumentFormats.h>
25 #include <U2Core/DocumentImport.h>
26 #include <U2Core/DocumentModel.h>
27 #include <U2Core/DocumentUtils.h>
28 #include <U2Core/FailTask.h>
29 #include <U2Core/FileAndDirectoryUtils.h>
30 #include <U2Core/GObject.h>
31 #include <U2Core/GObjectTypes.h>
32 #include <U2Core/GUrlUtils.h>
33 #include <U2Core/IOAdapter.h>
34 #include <U2Core/IOAdapterUtils.h>
35 #include <U2Core/TaskSignalMapper.h>
36 #include <U2Core/U2OpStatusUtils.h>
37 #include <U2Core/U2SafePoints.h>
38
39 #include <U2Designer/DelegateEditors.h>
40
41 #include <U2Formats/BAMUtils.h>
42
43 #include <U2Lang/ActorPrototypeRegistry.h>
44 #include <U2Lang/BaseActorCategories.h>
45 #include <U2Lang/BaseAttributes.h>
46 #include <U2Lang/BaseSlots.h>
47 #include <U2Lang/BaseTypes.h>
48 #include <U2Lang/IntegralBusModel.h>
49 #include <U2Lang/WorkflowEnv.h>
50 #include <U2Lang/WorkflowMonitor.h>
51
52 namespace U2 {
53 namespace LocalWorkflow {
54
55 const QString RmdupBamWorkerFactory::ACTOR_ID("rmdup-bam");
56 static const QString SHORT_NAME("mb");
57 static const QString INPUT_PORT("in-file");
58 static const QString OUTPUT_PORT("out-file");
59 static const QString OUT_MODE_ID("out-mode");
60 static const QString CUSTOM_DIR_ID("custom-dir");
61 static const QString OUT_NAME_ID("out-name");
62 static const QString REMOVE_SINGLE_END_ID("remove-single-end");
63 static const QString TREAT_READS_ID("treat_reads");
64
65 /************************************************************************/
66 /* RmdupBamPrompter */
67 /************************************************************************/
composeRichDoc()68 QString RmdupBamPrompter::composeRichDoc() {
69 IntegralBusPort *input = qobject_cast<IntegralBusPort *>(target->getPort(INPUT_PORT));
70 const Actor *producer = input->getProducer(BaseSlots::URL_SLOT().getId());
71 QString unsetStr = "<font color='red'>" + tr("unset") + "</font>";
72 QString producerName = tr("<u>%1</u>").arg(producer ? producer->getLabel() : unsetStr);
73
74 QString doc = tr("Remove PCR duplicates of BAM files from %1 with SAMTools rmdup.").arg(producerName);
75 return doc;
76 }
77
78 /************************************************************************/
79 /* RmdupBamWorkerFactory */
80 /************************************************************************/
81 namespace {
82
83 static const QString DEFAULT_NAME("Default");
84 }
85
init()86 void RmdupBamWorkerFactory::init() {
87 Descriptor desc(ACTOR_ID, RmdupBamWorker::tr("Remove Duplicates in BAM Files"), RmdupBamWorker::tr("Remove PCR duplicates of BAM files using SAMTools rmdup."));
88
89 QList<PortDescriptor *> p;
90 {
91 Descriptor inD(INPUT_PORT, RmdupBamWorker::tr("BAM File"), RmdupBamWorker::tr("Set of BAM files to rmdup"));
92 Descriptor outD(OUTPUT_PORT, RmdupBamWorker::tr("Cleaned BAM File"), RmdupBamWorker::tr("Cleaned BAM file"));
93
94 QMap<Descriptor, DataTypePtr> inM;
95 inM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
96 p << new PortDescriptor(inD, DataTypePtr(new MapDataType(SHORT_NAME + ".input-url", inM)), true);
97
98 QMap<Descriptor, DataTypePtr> outM;
99 outM[BaseSlots::URL_SLOT()] = BaseTypes::STRING_TYPE();
100 p << new PortDescriptor(outD, DataTypePtr(new MapDataType(SHORT_NAME + ".output-url", outM)), false, true);
101 }
102
103 QList<Attribute *> a;
104 {
105 Descriptor outDir(OUT_MODE_ID, RmdupBamWorker::tr("Output folder"), RmdupBamWorker::tr("Select an output folder. <b>Custom</b> - specify the output folder in the 'Custom folder' parameter. "
106 "<b>Workflow</b> - internal workflow folder. "
107 "<b>Input file</b> - the folder of the input file."));
108
109 Descriptor customDir(CUSTOM_DIR_ID, RmdupBamWorker::tr("Custom folder"), RmdupBamWorker::tr("Select the custom output folder."));
110
111 Descriptor outName(OUT_NAME_ID, RmdupBamWorker::tr("Output BAM name"), RmdupBamWorker::tr("A name of an output BAM file. If default of empty value is provided the output name is the name of the first BAM file with .nodup.bam extension."));
112
113 Descriptor removeSE(REMOVE_SINGLE_END_ID, RmdupBamWorker::tr("Remove for single-end reads"), RmdupBamWorker::tr("Remove duplicate for single-end reads. By default, the command works for paired-end reads only (-s)."));
114
115 Descriptor treatReads(TREAT_READS_ID, RmdupBamWorker::tr("Treat as single-end"), RmdupBamWorker::tr("Treat paired-end reads and single-end reads (-S)."));
116
117 a << new Attribute(outDir, BaseTypes::NUM_TYPE(), false, QVariant(FileAndDirectoryUtils::WORKFLOW_INTERNAL));
118 Attribute *customDirAttr = new Attribute(customDir, BaseTypes::STRING_TYPE(), false, QVariant(""));
119 customDirAttr->addRelation(new VisibilityRelation(OUT_MODE_ID, FileAndDirectoryUtils::CUSTOM));
120 a << customDirAttr;
121 a << new Attribute(outName, BaseTypes::STRING_TYPE(), false, QVariant(DEFAULT_NAME));
122 a << new Attribute(removeSE, BaseTypes::BOOL_TYPE(), false, QVariant(false));
123 a << new Attribute(treatReads, BaseTypes::BOOL_TYPE(), false, QVariant(false));
124 }
125
126 QMap<QString, PropertyDelegate *> delegates;
127 {
128 QVariantMap directoryMap;
129 QString fileDir = RmdupBamWorker::tr("Input file");
130 QString workflowDir = RmdupBamWorker::tr("Workflow");
131 QString customD = RmdupBamWorker::tr("Custom");
132 directoryMap[fileDir] = FileAndDirectoryUtils::FILE_DIRECTORY;
133 directoryMap[workflowDir] = FileAndDirectoryUtils::WORKFLOW_INTERNAL;
134 directoryMap[customD] = FileAndDirectoryUtils::CUSTOM;
135 delegates[OUT_MODE_ID] = new ComboBoxDelegate(directoryMap);
136
137 delegates[CUSTOM_DIR_ID] = new URLDelegate("", "", false, true);
138 }
139
140 ActorPrototype *proto = new IntegralBusActorPrototype(desc, p, a);
141 proto->setEditor(new DelegateEditor(delegates));
142 proto->setPrompter(new RmdupBamPrompter());
143
144 WorkflowEnv::getProtoRegistry()->registerProto(BaseActorCategories::CATEGORY_NGS_BASIC(), proto);
145 DomainFactory *localDomain = WorkflowEnv::getDomainRegistry()->getById(LocalDomainFactory::ID);
146 localDomain->registerEntry(new RmdupBamWorkerFactory());
147 }
148
149 /************************************************************************/
150 /* RmdupBamWorker */
151 /************************************************************************/
RmdupBamWorker(Actor * a)152 RmdupBamWorker::RmdupBamWorker(Actor *a)
153 : BaseWorker(a), inputUrlPort(nullptr), outputUrlPort(nullptr), outUrls("") {
154 }
155
init()156 void RmdupBamWorker::init() {
157 inputUrlPort = ports.value(INPUT_PORT);
158 outputUrlPort = ports.value(OUTPUT_PORT);
159 }
160
tick()161 Task *RmdupBamWorker::tick() {
162 if (inputUrlPort->hasMessage()) {
163 const QString url = takeUrl();
164 CHECK(!url.isEmpty(), nullptr);
165
166 const QString detectedFormat = FileAndDirectoryUtils::detectFormat(url);
167 if (detectedFormat.isEmpty()) {
168 coreLog.info(tr("Unknown file format: ") + url);
169 return nullptr;
170 }
171
172 if (detectedFormat == BaseDocumentFormats::BAM) {
173 const QString outputDir = FileAndDirectoryUtils::createWorkingDir(url, getValue<int>(OUT_MODE_ID), getValue<QString>(CUSTOM_DIR_ID), context->workingDir());
174
175 BamRmdupSetting setting;
176 setting.outDir = outputDir;
177 setting.outName = getTargetName(url, outputDir);
178 setting.inputUrl = url;
179 setting.removeSingleEnd = getValue<bool>(REMOVE_SINGLE_END_ID);
180 setting.treatReads = getValue<bool>(TREAT_READS_ID);
181
182 SamtoolsRmdupTask *t = new SamtoolsRmdupTask(setting);
183 t->addListeners(createLogListeners());
184 connect(new TaskSignalMapper(t), SIGNAL(si_taskFinished(Task *)), SLOT(sl_taskFinished(Task *)));
185 return t;
186 }
187 }
188
189 if (inputUrlPort->isEnded()) {
190 setDone();
191 outputUrlPort->setEnded();
192 }
193 return nullptr;
194 }
195
cleanup()196 void RmdupBamWorker::cleanup() {
197 outUrls.clear();
198 }
199
200 namespace {
getTargetUrl(Task * task)201 QString getTargetUrl(Task *task) {
202 SamtoolsRmdupTask *rmdupTask = dynamic_cast<SamtoolsRmdupTask *>(task);
203
204 if (nullptr != rmdupTask) {
205 return rmdupTask->getResult();
206 }
207 return "";
208 }
209 } // namespace
210
sl_taskFinished(Task * task)211 void RmdupBamWorker::sl_taskFinished(Task *task) {
212 CHECK(!task->hasError(), );
213 CHECK(!task->isCanceled(), );
214
215 QString url = getTargetUrl(task);
216 CHECK(!url.isEmpty(), );
217
218 sendResult(url);
219 monitor()->addOutputFile(url, getActorId());
220 }
221
getTargetName(const QString & fileUrl,const QString & outDir)222 QString RmdupBamWorker::getTargetName(const QString &fileUrl, const QString &outDir) {
223 QString name = getValue<QString>(OUT_NAME_ID);
224
225 if (name == DEFAULT_NAME || name.isEmpty()) {
226 name = QFileInfo(fileUrl).fileName();
227 name = name + ".nodup.bam";
228 }
229 if (outUrls.contains(outDir + name)) {
230 name.append(QString("_%1").arg(outUrls.size()));
231 }
232 outUrls.append(outDir + name);
233 return name;
234 }
235
takeUrl()236 QString RmdupBamWorker::takeUrl() {
237 const Message inputMessage = getMessageAndSetupScriptValues(inputUrlPort);
238 if (inputMessage.isEmpty()) {
239 outputUrlPort->transit();
240 return "";
241 }
242
243 const QVariantMap data = inputMessage.getData().toMap();
244 return data[BaseSlots::URL_SLOT().getId()].toString();
245 }
246
sendResult(const QString & url)247 void RmdupBamWorker::sendResult(const QString &url) {
248 const Message message(BaseTypes::STRING_TYPE(), url);
249 outputUrlPort->put(message);
250 }
251
252 ////////////////////////////////////////////////////////
253 // BamRmdupSetting
getSamtoolsArguments() const254 QStringList BamRmdupSetting::getSamtoolsArguments() const {
255 QStringList result;
256
257 result << "rmdup";
258
259 if (removeSingleEnd) {
260 result << "-s";
261 }
262
263 if (treatReads) {
264 result << "-S";
265 }
266
267 result << inputUrl;
268
269 result << outDir + outName;
270
271 return result;
272 }
273
274 ////////////////////////////////////////////////////////
275 // SamtoolsRmdupTask
276
277 const QString SamtoolsRmdupTask::SAMTOOLS_ID = "USUPP_SAMTOOLS";
278
SamtoolsRmdupTask(const BamRmdupSetting & settings)279 SamtoolsRmdupTask::SamtoolsRmdupTask(const BamRmdupSetting &settings)
280 : ExternalToolSupportTask(tr("Samtool rmdup for %1 ").arg(settings.inputUrl), TaskFlags(TaskFlag_None)), settings(settings), resultUrl("") {
281 }
282
prepare()283 void SamtoolsRmdupTask::prepare() {
284 if (settings.inputUrl.isEmpty()) {
285 setError(tr("No assembly URL to filter"));
286 return;
287 }
288
289 const QDir outDir = QFileInfo(settings.outDir).absoluteDir();
290 if (!outDir.exists()) {
291 setError(tr("Folder does not exist: ") + outDir.absolutePath());
292 return;
293 }
294 }
295
run()296 void SamtoolsRmdupTask::run() {
297 CHECK_OP(stateInfo, );
298
299 ProcessRun samtools = ExternalToolSupportUtils::prepareProcess(SAMTOOLS_ID, settings.getSamtoolsArguments(), "", QStringList(), stateInfo, getListener(0));
300 CHECK_OP(stateInfo, );
301 QScopedPointer<QProcess> sp(samtools.process);
302 QScopedPointer<ExternalToolRunTaskHelper> sh(new ExternalToolRunTaskHelper(samtools.process, new ExternalToolLogParser(), stateInfo));
303 setListenerForHelper(sh.data(), 0);
304
305 start(samtools, "SAMtools");
306 CHECK_OP(stateInfo, );
307
308 while (!samtools.process->waitForFinished(1000)) {
309 if (isCanceled()) {
310 CmdlineTaskRunner::killProcessTree(samtools.process);
311 return;
312 }
313 }
314 checkExitCode(samtools.process, "SAMtools");
315
316 if (!hasError()) {
317 resultUrl = settings.outDir + settings.outName;
318 }
319 }
320
start(const ProcessRun & pRun,const QString & toolName)321 void SamtoolsRmdupTask::start(const ProcessRun &pRun, const QString &toolName) {
322 pRun.process->start(pRun.program, pRun.arguments);
323 bool started = pRun.process->waitForStarted();
324 CHECK_EXT(started, setError(tr("Can not run %1 tool").arg(toolName)), );
325 }
326
checkExitCode(QProcess * process,const QString & toolName)327 void SamtoolsRmdupTask::checkExitCode(QProcess *process, const QString &toolName) {
328 int exitCode = process->exitCode();
329 if (exitCode != EXIT_SUCCESS && !hasError()) {
330 setError(tr("%1 tool exited with code %2").arg(toolName).arg(exitCode));
331 } else {
332 algoLog.details(tr("Tool %1 finished successfully").arg(toolName));
333 }
334 }
335
336 } // namespace LocalWorkflow
337 } // namespace U2
338