1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "PrepareToImportTask.h"
23
24 #include <QFileInfo>
25
26 #include <U2Core/AppContext.h>
27 #include <U2Core/AppSettings.h>
28 #include <U2Core/BaseDocumentFormats.h>
29 #include <U2Core/DocumentImport.h>
30 #include <U2Core/DocumentUtils.h>
31 #include <U2Core/U2SafePoints.h>
32 #include <U2Core/UserApplicationsSettings.h>
33
34 #include <U2Formats/BAMUtils.h>
35
36 #include "LoadBamInfoTask.h"
37
38 namespace U2 {
39 namespace BAM {
40
PrepareToImportTask(const GUrl & url,bool sam,const QString & refUrl,const QString & workingDir)41 PrepareToImportTask::PrepareToImportTask(const GUrl &url, bool sam, const QString &refUrl, const QString &workingDir)
42 : Task("Prepare assembly file to import", TaskFlag_None),
43 sourceURL(url), refUrl(refUrl), workingDir(workingDir), samFormat(sam), newURL(false) {
44 tpm = Progress_Manual;
45 }
46
getBamUrl() const47 QString PrepareToImportTask::getBamUrl() const {
48 if (samFormat) {
49 QString samUrl = sourceURL.getURLString();
50 return workingDir + "/" + QFileInfo(samUrl).fileName() + ".bam";
51 } else {
52 return sourceURL.getURLString();
53 }
54 }
55
getSortedBamUrl(const QString & bamUrl) const56 QString PrepareToImportTask::getSortedBamUrl(const QString &bamUrl) const {
57 return workingDir + "/" + QFileInfo(bamUrl).fileName() + "_sorted";
58 }
59
getIndexedBamUrl(const QString & sortedBamUrl) const60 QString PrepareToImportTask::getIndexedBamUrl(const QString &sortedBamUrl) const {
61 return workingDir + "/" + QFileInfo(sortedBamUrl).fileName();
62 }
63
getFastaUrl() const64 QString PrepareToImportTask::getFastaUrl() const {
65 return workingDir + "/" + QFileInfo(refUrl).fileName();
66 }
67
getCopyError(const QString & url1,const QString & url2) const68 QString PrepareToImportTask::getCopyError(const QString &url1, const QString &url2) const {
69 return LoadInfoTask::tr("Can not copy the '%1' file to '%2'").arg(url1).arg(url2);
70 }
71
72 namespace {
equalUrls(const QString & url1,const QString & url2)73 bool equalUrls(const QString &url1, const QString &url2) {
74 return QFileInfo(url1).absoluteFilePath() == QFileInfo(url2).absoluteFilePath();
75 }
76 } // namespace
77
needToCopyBam(const QString & sortedBamUrl) const78 bool PrepareToImportTask::needToCopyBam(const QString &sortedBamUrl) const {
79 const QString indexedBamUrl = getIndexedBamUrl(sortedBamUrl);
80 return !equalUrls(indexedBamUrl, sortedBamUrl);
81 }
82
needToCopyFasta() const83 bool PrepareToImportTask::needToCopyFasta() const {
84 return !equalUrls(getFastaUrl(), refUrl);
85 }
86
run()87 void PrepareToImportTask::run() {
88 // SAM to BAM if needed
89 QString bamUrl = getBamUrl();
90 if (samFormat) {
91 newURL = true;
92 stateInfo.setDescription(LoadInfoTask::tr("Converting SAM to BAM"));
93
94 checkReferenceFile();
95 CHECK_OP(stateInfo, );
96
97 BAMUtils::ConvertOption options(true /*SAM to BAM*/, refUrl);
98 BAMUtils::convertToSamOrBam(sourceURL, bamUrl, options, stateInfo);
99 CHECK_OP(stateInfo, );
100 }
101 stateInfo.setProgress(33);
102
103 bool sorted = BAMUtils::isSortedBam(bamUrl, stateInfo);
104 CHECK_OP(stateInfo, );
105
106 // Sort BAM if needed
107 QString sortedBamUrl;
108 if (sorted) {
109 sortedBamUrl = bamUrl;
110 } else {
111 newURL = true;
112 stateInfo.setDescription(LoadInfoTask::tr("Sorting BAM"));
113
114 sortedBamUrl = BAMUtils::sortBam(bamUrl, getSortedBamUrl(bamUrl), stateInfo).getURLString();
115 CHECK_OP(stateInfo, );
116 }
117 stateInfo.setProgress(66);
118
119 bool indexed = BAMUtils::hasValidBamIndex(sortedBamUrl);
120
121 // Index BAM if needed
122 QString indexedBamUrl;
123 if (indexed) {
124 indexedBamUrl = sortedBamUrl;
125 } else {
126 indexedBamUrl = getIndexedBamUrl(sortedBamUrl);
127 if (needToCopyBam(sortedBamUrl)) {
128 newURL = true;
129 stateInfo.setDescription(LoadInfoTask::tr("Coping sorted BAM"));
130
131 bool copied = QFile::copy(sortedBamUrl, indexedBamUrl);
132 CHECK_EXT(copied, setError(getCopyError(sortedBamUrl, indexedBamUrl)), );
133 }
134 stateInfo.setDescription(LoadInfoTask::tr("Creating BAM index"));
135
136 BAMUtils::createBamIndex(indexedBamUrl, stateInfo);
137 CHECK_OP(stateInfo, );
138 }
139 stateInfo.setProgress(100);
140 sourceURL = indexedBamUrl;
141 }
142
143 namespace {
isUnknownFormat(const QList<FormatDetectionResult> & formats)144 static bool isUnknownFormat(const QList<FormatDetectionResult> &formats) {
145 if (formats.isEmpty()) {
146 return true;
147 }
148 FormatDetectionResult f = formats.first();
149 if (nullptr == f.format && nullptr == f.importer) {
150 return true;
151 }
152 return false;
153 }
154
detectedFormatId(const FormatDetectionResult & f)155 static QString detectedFormatId(const FormatDetectionResult &f) {
156 if (nullptr == f.format && f.importer == nullptr) {
157 return "";
158 } else if (nullptr == f.format) {
159 return f.importer->getImporterName();
160 }
161 return f.format->getFormatId();
162 }
163 } // namespace
164
checkReferenceFile()165 void PrepareToImportTask::checkReferenceFile() {
166 CHECK(!refUrl.isEmpty(), );
167
168 FormatDetectionConfig cfg;
169 cfg.useImporters = true;
170 QList<FormatDetectionResult> formats = DocumentUtils::detectFormat(refUrl, cfg);
171 if (isUnknownFormat(formats)) {
172 setError(LoadInfoTask::tr("Unknown reference sequence format. Only FASTA is supported"));
173 return;
174 }
175 QString formatId = detectedFormatId(formats.first());
176 if (BaseDocumentFormats::FASTA != formatId) {
177 setError(LoadInfoTask::tr("The detected reference sequence format is '%1'. Only FASTA is supported").arg(formatId));
178 return;
179 }
180
181 if (!BAMUtils::hasValidFastaIndex(refUrl)) {
182 if (needToCopyFasta()) {
183 bool copied = QFile::copy(refUrl, getFastaUrl());
184 CHECK_EXT(copied, setError(getCopyError(refUrl, getFastaUrl())), );
185
186 refUrl = getFastaUrl();
187 }
188 }
189 }
190
getSourceUrl() const191 const GUrl &PrepareToImportTask::getSourceUrl() const {
192 return sourceURL;
193 }
194
isNewURL()195 bool PrepareToImportTask::isNewURL() {
196 return newURL;
197 }
198
199 } // namespace BAM
200 } // namespace U2
201