1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "CAP3SupportTask.h"
23 
24 #include <U2Core/AddDocumentTask.h>
25 #include <U2Core/AppContext.h>
26 #include <U2Core/CopyDataTask.h>
27 #include <U2Core/Counter.h>
28 #include <U2Core/DocumentImport.h>
29 #include <U2Core/DocumentModel.h>
30 #include <U2Core/DocumentUtils.h>
31 #include <U2Core/ExternalToolRegistry.h>
32 #include <U2Core/IOAdapter.h>
33 #include <U2Core/IOAdapterUtils.h>
34 #include <U2Core/ProjectModel.h>
35 #include <U2Core/U2OpStatusUtils.h>
36 #include <U2Core/U2SafePoints.h>
37 
38 #include <U2Formats/DNAQualityIOUtils.h>
39 
40 #include <U2Gui/OpenViewTask.h>
41 
42 #include "CAP3Support.h"
43 
44 namespace U2 {
45 
46 //////////////////////////////////////////////////////////////////////////
47 ////CAP3SupportTask
48 
CAP3SupportTask(const CAP3SupportTaskSettings & _settings)49 CAP3SupportTask::CAP3SupportTask(const CAP3SupportTaskSettings &_settings)
50     : ExternalToolSupportTask("CAP3SupportTask", TaskFlags_NR_FOSE_COSC),
51       prepareDataForCAP3Task(nullptr),
52       cap3Task(nullptr),
53       copyResultTask(nullptr),
54       settings(_settings) {
55     GCOUNTER(cvar, "CAP3SupportTask");
56     setMaxParallelSubtasks(1);
57 }
58 
prepare()59 void CAP3SupportTask::prepare() {
60     tmpDirUrl = ExternalToolSupportUtils::createTmpDir(CAP3Support::CAP3_TMP_DIR, stateInfo);
61     CHECK_OP(stateInfo, );
62 
63     prepareDataForCAP3Task = new PrepareInputForCAP3Task(settings.inputFiles, tmpDirUrl);
64     addSubTask(prepareDataForCAP3Task);
65 }
66 
67 #define CAP3_EXT ".cap.ace"
68 
onSubTaskFinished(Task * subTask)69 QList<Task *> CAP3SupportTask::onSubTaskFinished(Task *subTask) {
70     QList<Task *> res;
71 
72     propagateSubtaskError();
73 
74     if (hasError() || isCanceled()) {
75         return res;
76     }
77 
78     if (subTask == prepareDataForCAP3Task) {
79         assert(!prepareDataForCAP3Task->getPreparedPath().isEmpty());
80         GUrl inputUrl = prepareDataForCAP3Task->getPreparedPath();
81         tmpOutputUrl = inputUrl.getURLString() + CAP3_EXT;
82 
83         QStringList arguments = settings.getArgumentsList();
84         arguments.prepend(inputUrl.getURLString());
85         cap3Task = new ExternalToolRunTask(CAP3Support::ET_CAP3_ID, arguments, new CAP3LogParser());
86         setListenerForTask(cap3Task);
87         cap3Task->setSubtaskProgressWeight(95);
88         res.append(cap3Task);
89     } else if (subTask == cap3Task) {
90         if (!QFile::exists(tmpOutputUrl)) {
91             if (AppContext::getExternalToolRegistry()->getById(CAP3Support::ET_CAP3_ID)->isValid()) {
92                 stateInfo.setError(tr("Output file not found"));
93             } else {
94                 stateInfo.setError(tr("Output file not found. May be %1 tool path '%2' not valid?")
95                                        .arg(AppContext::getExternalToolRegistry()->getById(CAP3Support::ET_CAP3_ID)->getName())
96                                        .arg(AppContext::getExternalToolRegistry()->getById(CAP3Support::ET_CAP3_ID)->getPath()));
97             }
98             return res;
99         }
100 
101         IOAdapterFactory *iof = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
102         copyResultTask = new CopyDataTask(iof, tmpOutputUrl, iof, settings.outputFilePath);
103         res.append(copyResultTask);
104 
105     } else if (subTask == copyResultTask) {
106         if (!QFile::exists(settings.outputFilePath)) {
107             stateInfo.setError(tr("Output file not found: copy from tmp dir failed."));
108             return res;
109         }
110         outputFile = settings.outputFilePath;
111     }
112     return res;
113 }
114 
getOutputFile() const115 QString CAP3SupportTask::getOutputFile() const {
116     return outputFile;
117 }
118 
report()119 Task::ReportResult CAP3SupportTask::report() {
120     U2OpStatus2Log os;
121     ExternalToolSupportUtils::removeTmpDir(tmpDirUrl, os);
122     return ReportResult_Finished;
123 }
124 
125 //////////////////////////////////////////
126 ////RunCap3AndOpenResultTask
RunCap3AndOpenResultTask(const CAP3SupportTaskSettings & settings)127 RunCap3AndOpenResultTask::RunCap3AndOpenResultTask(const CAP3SupportTaskSettings &settings)
128     : Task(tr("CAP3 run and open result task"), TaskFlags_NR_FOSE_COSC),
129       cap3Task(new CAP3SupportTask(settings)),
130       openView(settings.openView) {
131     GCOUNTER(cvar, "RunCap3AndOpenResultTask");
132     cap3Task->setSubtaskProgressWeight(95);
133 }
134 
prepare()135 void RunCap3AndOpenResultTask::prepare() {
136     SAFE_POINT_EXT(cap3Task, setError(tr("Invalid CAP3 task")), );
137     addSubTask(cap3Task);
138 }
139 
onSubTaskFinished(Task * subTask)140 QList<Task *> RunCap3AndOpenResultTask::onSubTaskFinished(Task *subTask) {
141     QList<Task *> subTasks;
142 
143     if (subTask->isCanceled() || subTask->hasError()) {
144         return subTasks;
145     }
146 
147     if (subTask == cap3Task) {
148         GUrl url(cap3Task->getOutputFile());
149 
150         ProjectLoader *loader = AppContext::getProjectLoader();
151         SAFE_POINT_EXT(loader, setError(tr("Project loader is NULL")), subTasks);
152         QVariantMap hints;
153         hints[ProjectLoaderHint_LoadWithoutView] = !openView;
154         Task *loadTask = loader->openWithProjectTask(url, hints);
155         if (nullptr != loadTask) {
156             subTasks << loadTask;
157         }
158     }
159 
160     return subTasks;
161 }
162 
163 //////////////////////////////////////////
164 ////CAP3LogParser
165 
CAP3LogParser()166 CAP3LogParser::CAP3LogParser() {
167 }
168 
getProgress()169 int CAP3LogParser::getProgress() {
170     return 0;
171 }
172 
173 //////////////////////////////////////////
174 ////PrepareInput
175 
PrepareInputForCAP3Task(const QStringList & inputFiles,const QString & outputDirPath)176 PrepareInputForCAP3Task::PrepareInputForCAP3Task(const QStringList &inputFiles, const QString &outputDirPath)
177     : Task("PrepareInputForCAP3Task", TaskFlags_FOSCOE), inputUrls(inputFiles), outputDir(outputDirPath), onlyCopyFiles(false) {
178 }
179 
prepare()180 void PrepareInputForCAP3Task::prepare() {
181     if (inputUrls.size() == 1) {
182         const QString &inputFileUrl = inputUrls.first();
183 
184         QList<FormatDetectionResult> results = DocumentUtils::detectFormat(inputFileUrl);
185 
186         if (!results.isEmpty()) {
187             DocumentFormat *format = results.first().format;
188             if (format->getFormatId() == BaseDocumentFormats::FASTA) {
189                 onlyCopyFiles = true;
190             }
191         }
192     }
193 
194     if (onlyCopyFiles) {
195         // Short path: copy single FASTA file along with quality and constraints to target dir
196         QString inputFileUrl = inputUrls.first();
197         filesToCopy.append(inputFileUrl);
198         QString inputFileUrlBase = GUrl(inputFileUrl).baseFileName();
199         QString inputFileDir = GUrl(inputFileUrl).dirPath();
200         QString qualFileUrl = inputFileDir + "/" + inputFileUrlBase + ".qual";
201         if (QFile::exists(qualFileUrl)) {
202             filesToCopy.append(qualFileUrl);
203         }
204         QString constraintsFileUrl = inputFileDir + "/" + inputFileUrlBase + ".con";
205         if (QFile::exists(constraintsFileUrl)) {
206             filesToCopy.append(qualFileUrl);
207         }
208         foreach (const QString &fileName, filesToCopy) {
209             IOAdapterFactory *iof =
210                 AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(BaseIOAdapters::LOCAL_FILE);
211             CopyDataTask *copyTask = new CopyDataTask(iof, fileName, iof, outputDir + "/" + GUrl(fileName).fileName());
212             addSubTask(copyTask);
213         }
214         preparedPath = outputDir + "/" + GUrl(inputFileUrl).fileName();
215     } else {
216         // Long path: load each file, save sequences and qualities to output dir
217         QList<GUrl> inputGUrls;
218         foreach (const QString &url, inputUrls) {
219             inputGUrls.append(url);
220         }
221 
222         if (!seqReader.init(inputGUrls)) {
223             setError(seqReader.getErrorMessage());
224             return;
225         }
226 
227         QString outPath = outputDir + "/" + QString("%1_misc").arg(inputGUrls.first().baseFileName());
228         qualityFilePath = outPath + ".qual";
229 
230         if (!seqWriter.init(outPath + ".fa")) {
231             setError(tr("Failed to initialize sequence writer."));
232             return;
233         }
234     }
235 }
236 
run()237 void PrepareInputForCAP3Task::run() {
238     if (hasError() || onlyCopyFiles) {
239         return;
240     }
241 
242     while (seqReader.hasNext()) {
243         if (isCanceled()) {
244             return;
245         }
246         DNASequence *seq = seqReader.getNextSequenceObject();
247         if (seq == nullptr) {
248             setError(seqReader.getErrorMessage());
249             return;
250         }
251         // avoid names duplication
252         QByteArray seqName = seq->getName().toLatin1();
253         seqName.replace(' ', '_');
254         seq->setName(seqName);
255         bool ok = seqWriter.writeNextSequence(*seq);
256         if (!ok) {
257             setError(tr("Failed to write sequence %1").arg(seq->getName()));
258             return;
259         }
260 
261         if (!seq->quality.isEmpty()) {
262             DNAQualityIOUtils::writeDNAQuality(seqName, seq->quality, qualityFilePath, true /*append*/, true /*decode*/, stateInfo);
263             if (stateInfo.hasError()) {
264                 return;
265             }
266         }
267     }
268     preparedPath = seqWriter.getOutputPath().getURLString();
269     seqWriter.close();
270 }
271 
getArgumentsList()272 QStringList CAP3SupportTaskSettings::getArgumentsList() {
273     QStringList res;
274 
275     res += "-a";
276     res += QString("%1").arg(bandExpansionSize);
277     res += "-b";
278     res += QString("%1").arg(baseQualityDiffCutoff);
279     res += "-c";
280     res += QString("%1").arg(baseQualityClipCutoff);
281     res += "-d";
282     res += QString("%1").arg(maxQScoreSum);
283     res += "-f";
284     res += QString("%1").arg(maxGapLength);
285     res += "-g";
286     res += QString("%1").arg(gapPenaltyFactor);
287     res += "-m";
288     res += QString("%1").arg(matchScoreFactor);
289     res += "-n";
290     res += QString("%1").arg(mismatchScoreFactor);
291     res += "-o";
292     res += QString("%1").arg(overlapLengthCutoff);
293     res += "-p";
294     res += QString("%1").arg(overlapPercentIdentityCutoff);
295     res += "-r";
296     res += QString("%1").arg((int)reverseReads);
297     res += "-s";
298     res += QString("%1").arg(overlapSimilarityScoreCutoff);
299     res += "-t";
300     res += QString("%1").arg(maxNumberOfWordMatches);
301     res += "-y";
302     res += QString("%1").arg(clippingRange);
303 
304     return res;
305 }
306 
307 }    // namespace U2
308