1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "PrepareToImportTask.h"
23 
24 #include <QFileInfo>
25 
26 #include <U2Core/AppContext.h>
27 #include <U2Core/AppSettings.h>
28 #include <U2Core/BaseDocumentFormats.h>
29 #include <U2Core/DocumentImport.h>
30 #include <U2Core/DocumentUtils.h>
31 #include <U2Core/U2SafePoints.h>
32 #include <U2Core/UserApplicationsSettings.h>
33 
34 #include <U2Formats/BAMUtils.h>
35 
36 #include "LoadBamInfoTask.h"
37 
38 namespace U2 {
39 namespace BAM {
40 
PrepareToImportTask(const GUrl & url,bool sam,const QString & refUrl,const QString & workingDir)41 PrepareToImportTask::PrepareToImportTask(const GUrl &url, bool sam, const QString &refUrl, const QString &workingDir)
42     : Task("Prepare assembly file to import", TaskFlag_None),
43       sourceURL(url), refUrl(refUrl), workingDir(workingDir), samFormat(sam), newURL(false) {
44     tpm = Progress_Manual;
45 }
46 
getBamUrl() const47 QString PrepareToImportTask::getBamUrl() const {
48     if (samFormat) {
49         QString samUrl = sourceURL.getURLString();
50         return workingDir + "/" + QFileInfo(samUrl).fileName() + ".bam";
51     } else {
52         return sourceURL.getURLString();
53     }
54 }
55 
getSortedBamUrl(const QString & bamUrl) const56 QString PrepareToImportTask::getSortedBamUrl(const QString &bamUrl) const {
57     return workingDir + "/" + QFileInfo(bamUrl).fileName() + "_sorted";
58 }
59 
getIndexedBamUrl(const QString & sortedBamUrl) const60 QString PrepareToImportTask::getIndexedBamUrl(const QString &sortedBamUrl) const {
61     return workingDir + "/" + QFileInfo(sortedBamUrl).fileName();
62 }
63 
getFastaUrl() const64 QString PrepareToImportTask::getFastaUrl() const {
65     return workingDir + "/" + QFileInfo(refUrl).fileName();
66 }
67 
getCopyError(const QString & url1,const QString & url2) const68 QString PrepareToImportTask::getCopyError(const QString &url1, const QString &url2) const {
69     return LoadInfoTask::tr("Can not copy the '%1' file to '%2'").arg(url1).arg(url2);
70 }
71 
72 namespace {
equalUrls(const QString & url1,const QString & url2)73 bool equalUrls(const QString &url1, const QString &url2) {
74     return QFileInfo(url1).absoluteFilePath() == QFileInfo(url2).absoluteFilePath();
75 }
76 }  // namespace
77 
needToCopyBam(const QString & sortedBamUrl) const78 bool PrepareToImportTask::needToCopyBam(const QString &sortedBamUrl) const {
79     const QString indexedBamUrl = getIndexedBamUrl(sortedBamUrl);
80     return !equalUrls(indexedBamUrl, sortedBamUrl);
81 }
82 
needToCopyFasta() const83 bool PrepareToImportTask::needToCopyFasta() const {
84     return !equalUrls(getFastaUrl(), refUrl);
85 }
86 
run()87 void PrepareToImportTask::run() {
88     // SAM to BAM if needed
89     QString bamUrl = getBamUrl();
90     if (samFormat) {
91         newURL = true;
92         stateInfo.setDescription(LoadInfoTask::tr("Converting SAM to BAM"));
93 
94         checkReferenceFile();
95         CHECK_OP(stateInfo, );
96 
97         BAMUtils::ConvertOption options(true /*SAM to BAM*/, refUrl);
98         BAMUtils::convertToSamOrBam(sourceURL, bamUrl, options, stateInfo);
99         CHECK_OP(stateInfo, );
100     }
101     stateInfo.setProgress(33);
102 
103     bool sorted = BAMUtils::isSortedBam(bamUrl, stateInfo);
104     CHECK_OP(stateInfo, );
105 
106     // Sort BAM if needed
107     QString sortedBamUrl;
108     if (sorted) {
109         sortedBamUrl = bamUrl;
110     } else {
111         newURL = true;
112         stateInfo.setDescription(LoadInfoTask::tr("Sorting BAM"));
113 
114         sortedBamUrl = BAMUtils::sortBam(bamUrl, getSortedBamUrl(bamUrl), stateInfo).getURLString();
115         CHECK_OP(stateInfo, );
116     }
117     stateInfo.setProgress(66);
118 
119     bool indexed = BAMUtils::hasValidBamIndex(sortedBamUrl);
120 
121     // Index BAM if needed
122     QString indexedBamUrl;
123     if (indexed) {
124         indexedBamUrl = sortedBamUrl;
125     } else {
126         indexedBamUrl = getIndexedBamUrl(sortedBamUrl);
127         if (needToCopyBam(sortedBamUrl)) {
128             newURL = true;
129             stateInfo.setDescription(LoadInfoTask::tr("Coping sorted BAM"));
130 
131             bool copied = QFile::copy(sortedBamUrl, indexedBamUrl);
132             CHECK_EXT(copied, setError(getCopyError(sortedBamUrl, indexedBamUrl)), );
133         }
134         stateInfo.setDescription(LoadInfoTask::tr("Creating BAM index"));
135 
136         BAMUtils::createBamIndex(indexedBamUrl, stateInfo);
137         CHECK_OP(stateInfo, );
138     }
139     stateInfo.setProgress(100);
140     sourceURL = indexedBamUrl;
141 }
142 
143 namespace {
isUnknownFormat(const QList<FormatDetectionResult> & formats)144 static bool isUnknownFormat(const QList<FormatDetectionResult> &formats) {
145     if (formats.isEmpty()) {
146         return true;
147     }
148     FormatDetectionResult f = formats.first();
149     if (nullptr == f.format && nullptr == f.importer) {
150         return true;
151     }
152     return false;
153 }
154 
detectedFormatId(const FormatDetectionResult & f)155 static QString detectedFormatId(const FormatDetectionResult &f) {
156     if (nullptr == f.format && f.importer == nullptr) {
157         return "";
158     } else if (nullptr == f.format) {
159         return f.importer->getImporterName();
160     }
161     return f.format->getFormatId();
162 }
163 }  // namespace
164 
checkReferenceFile()165 void PrepareToImportTask::checkReferenceFile() {
166     CHECK(!refUrl.isEmpty(), );
167 
168     FormatDetectionConfig cfg;
169     cfg.useImporters = true;
170     QList<FormatDetectionResult> formats = DocumentUtils::detectFormat(refUrl, cfg);
171     if (isUnknownFormat(formats)) {
172         setError(LoadInfoTask::tr("Unknown reference sequence format. Only FASTA is supported"));
173         return;
174     }
175     QString formatId = detectedFormatId(formats.first());
176     if (BaseDocumentFormats::FASTA != formatId) {
177         setError(LoadInfoTask::tr("The detected reference sequence format is '%1'. Only FASTA is supported").arg(formatId));
178         return;
179     }
180 
181     if (!BAMUtils::hasValidFastaIndex(refUrl)) {
182         if (needToCopyFasta()) {
183             bool copied = QFile::copy(refUrl, getFastaUrl());
184             CHECK_EXT(copied, setError(getCopyError(refUrl, getFastaUrl())), );
185 
186             refUrl = getFastaUrl();
187         }
188     }
189 }
190 
getSourceUrl() const191 const GUrl &PrepareToImportTask::getSourceUrl() const {
192     return sourceURL;
193 }
194 
isNewURL()195 bool PrepareToImportTask::isNewURL() {
196     return newURL;
197 }
198 
199 }  // namespace BAM
200 }  // namespace U2
201