1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "ace/ConvertAceToSqliteTask.h"
23
24 #include <QFile>
25
26 #include <U2Core/AppContext.h>
27 #include <U2Core/AssemblyImporter.h>
28 #include <U2Core/Counter.h>
29 #include <U2Core/DocumentModel.h>
30 #include <U2Core/IOAdapterUtils.h>
31 #include <U2Core/Timer.h>
32 #include <U2Core/U2AlphabetUtils.h>
33 #include <U2Core/U2AssemblyUtils.h>
34 #include <U2Core/U2AttributeDbi.h>
35 #include <U2Core/U2CoreAttributes.h>
36 #include <U2Core/U2CrossDatabaseReferenceDbi.h>
37 #include <U2Core/U2DbiRegistry.h>
38 #include <U2Core/U2DbiUtils.h>
39 #include <U2Core/U2ObjectDbi.h>
40 #include <U2Core/U2SafePoints.h>
41 #include <U2Core/U2SequenceDbi.h>
42 #include <U2Core/U2SequenceUtils.h>
43
44 namespace U2 {
45
ConvertAceToSqliteTask(const GUrl & _sourceUrl,const U2DbiRef & dstDbiRef)46 ConvertAceToSqliteTask::ConvertAceToSqliteTask(const GUrl &_sourceUrl, const U2DbiRef &dstDbiRef)
47 : Task(tr("Convert ACE to UGENE database (%1)").arg(_sourceUrl.fileName()), TaskFlag_None),
48 sourceUrl(_sourceUrl),
49 dstDbiRef(dstDbiRef),
50 dbi(nullptr),
51 databaseWasCreated(false),
52 countImportedAssembly(0) {
53 GCOUNTER(cvar, "ConvertAceToUgenedb");
54 tpm = Progress_Manual;
55 }
56
run()57 void ConvertAceToSqliteTask::run() {
58 taskLog.info(tr("Converting assembly from %1 to %2 started")
59 .arg(sourceUrl.fileName())
60 .arg(getDestinationUrl().fileName()));
61
62 qint64 startTime = TimeCounter::getCounter();
63
64 QScopedPointer<IOAdapter> ioAdapter;
65 IOAdapterFactory *factory = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(sourceUrl));
66 SAFE_POINT_EXT(factory, setError(tr("IOAdapterFactory is NULL")), );
67 ioAdapter.reset(factory->createIOAdapter());
68
69 if (!ioAdapter->open(sourceUrl, IOAdapterMode_Read)) {
70 setError(tr("Can't open file '%1'").arg(sourceUrl.getURLString()));
71 return;
72 }
73
74 U2OpStatusImpl os;
75 DbiConnection dbiHandle(dstDbiRef, false, os);
76 if (os.isCoR()) {
77 databaseWasCreated = true;
78 dbiHandle = DbiConnection(dstDbiRef, true, stateInfo);
79 CHECK_OP(stateInfo, );
80 }
81
82 dbi = dbiHandle.dbi;
83 SAFE_POINT(dbi, tr("DBI is NULL"), );
84 U2ObjectDbi *objDbi = dbi->getObjectDbi();
85 SAFE_POINT(objDbi, tr("Object DBI is NULL"), );
86
87 stateInfo.setDescription("Importing");
88 taskLog.details(tr("Importing"));
89
90 qint64 totalReadsImported = importAssemblies(*ioAdapter);
91 CHECK_OP(stateInfo, );
92
93 stateInfo.setDescription("Packing reads");
94 qint64 packTime = packReads();
95 CHECK_OP(stateInfo, );
96
97 stateInfo.setDescription("Updating attributes");
98 updateAttributeDbi();
99 CHECK_OP(stateInfo, );
100
101 qint64 totalTime = TimeCounter::getCounter() - startTime;
102 taskLog.info(QString("Converting assembly from %1 to %2 successfully finished: imported %3 reads, total time %4 s, pack time %5 s")
103 .arg(sourceUrl.fileName())
104 .arg(getDestinationUrl().fileName())
105 .arg(totalReadsImported)
106 .arg(totalTime)
107 .arg(packTime));
108 }
109
report()110 Task::ReportResult ConvertAceToSqliteTask::report() {
111 if (stateInfo.isCoR() &&
112 databaseWasCreated &&
113 getDestinationUrl().isLocalFile()) {
114 QFile::remove(getDestinationUrl().getURLString());
115 }
116
117 return ReportResult_Finished;
118 }
119
getDestinationUrl() const120 GUrl ConvertAceToSqliteTask::getDestinationUrl() const {
121 return GUrl(U2DbiUtils::ref2Url(dstDbiRef));
122 }
123
getImportedObjects() const124 QMap<U2Sequence, U2Assembly> ConvertAceToSqliteTask::getImportedObjects() const {
125 QMap<U2Sequence, U2Assembly> importedObjects;
126 foreach (int pairNum, importedReferences.keys()) {
127 importedObjects.insert(importedReferences[pairNum], assemblies[pairNum]);
128 }
129 return importedObjects;
130 }
131
importAssemblies(IOAdapter & ioAdapter)132 qint64 ConvertAceToSqliteTask::importAssemblies(IOAdapter &ioAdapter) {
133 qint64 totalReadsImported = 0;
134
135 U2SequenceDbi *seqDbi = dbi->getSequenceDbi();
136 SAFE_POINT(seqDbi, tr("Sequence DBI is NULL"), totalReadsImported);
137
138 U2OpStatusChildImpl os(&stateInfo, U2OpStatusMapping(0, 50));
139 QScopedPointer<AceReader> aceReader;
140 aceReader.reset(new AceReader(ioAdapter, os));
141 CHECK_OP(os, totalReadsImported);
142
143 QScopedPointer<AceIterator> iterator;
144 iterator.reset(new AceIterator(*aceReader, stateInfo));
145
146 while (iterator->hasNext()) {
147 CHECK(!isCanceled(), totalReadsImported);
148
149 TmpDbiObjects tmpObjects(dstDbiRef, os);
150
151 U2Assembly assembly;
152
153 Assembly aceAssembly = iterator->next();
154 CHECK_OP(stateInfo, totalReadsImported);
155 CHECK_EXT(aceAssembly.isValid(), setError(tr("Invalid source file")), totalReadsImported);
156 Assembly::Sequence aceReference = aceAssembly.getReference();
157 referencesData.insert(countImportedAssembly, aceReference);
158
159 U2Sequence reference;
160 reference.length = aceReference.data.length();
161 reference.visualName = aceReference.name;
162 reference.alphabet = U2AlphabetUtils::findBestAlphabet(aceReference.data)->getId();
163
164 seqDbi->createSequenceObject(reference, U2ObjectDbi::ROOT_FOLDER, stateInfo);
165 CHECK_OP(stateInfo, totalReadsImported);
166 importedReferences.insert(countImportedAssembly, reference);
167 tmpObjects.objects << reference.id;
168
169 QVariantMap refHints;
170 refHints[U2SequenceDbiHints::EMPTY_SEQUENCE] = true;
171 refHints[U2SequenceDbiHints::UPDATE_SEQUENCE_LENGTH] = true;
172 seqDbi->updateSequenceData(reference.id, U2_REGION_MAX, aceReference.data, refHints, stateInfo);
173 CHECK_OP(stateInfo, totalReadsImported);
174
175 assembly.visualName = aceAssembly.getName();
176 assembly.referenceId = reference.id;
177
178 U2AssemblyReadsImportInfo &importInfo = importInfos[countImportedAssembly];
179 AssemblyImporter importer(stateInfo);
180 importer.createAssembly(dstDbiRef, U2ObjectDbi::ROOT_FOLDER, nullptr, importInfo, assembly);
181 CHECK_OP(stateInfo, totalReadsImported);
182
183 importInfo.packed = false;
184 importInfo.nReads = aceAssembly.getReadsCount();
185 assemblies.insert(countImportedAssembly, assembly);
186
187 QList<U2AssemblyRead> reads = aceAssembly.getReads();
188
189 BufferedDbiIterator<U2AssemblyRead> readsIterator(reads);
190 importer.addReads(&readsIterator);
191 CHECK_OP(stateInfo, totalReadsImported);
192
193 tmpObjects.objects.removeAll(reference.id);
194
195 totalReadsImported += aceAssembly.getReadsCount();
196 countImportedAssembly++;
197 }
198 CHECK_EXT(aceReader->getContigsCount() == countImportedAssembly, setError(tr("Invalid source file")), totalReadsImported);
199
200 return totalReadsImported;
201 }
202
packReads()203 qint64 ConvertAceToSqliteTask::packReads() {
204 qint64 packStart = TimeCounter::getCounter();
205 int progressStep;
206 if (assemblies.count() > 0) {
207 progressStep = 40 / assemblies.count();
208 } else {
209 progressStep = 40;
210 }
211
212 U2AssemblyDbi *assDbi = dbi->getAssemblyDbi();
213 SAFE_POINT(assDbi, tr("Assembly DBI is NULL"), 0);
214
215 foreach (int assemblyNum, assemblies.keys()) {
216 U2AssemblyReadsImportInfo &importInfo = importInfos[assemblyNum];
217 // Pack reads only if it were not packed on import
218 if (!importInfo.packed) {
219 taskLog.details(tr("Packing reads for assembly '%1' (%2 of %3)")
220 .arg(assemblies[assemblyNum].visualName)
221 .arg(assemblyNum + 1)
222 .arg(assemblies.keys().count()));
223
224 U2AssemblyPackStat stat;
225 assDbi->pack(assemblies[assemblyNum].id, stat, stateInfo);
226 CHECK_OP(stateInfo, 0);
227
228 importInfo.packStat = stat;
229 }
230 stateInfo.setProgress(stateInfo.getProgress() + progressStep);
231 }
232
233 return TimeCounter::getCounter() - packStart;
234 }
235
updateAttributeDbi()236 void ConvertAceToSqliteTask::updateAttributeDbi() {
237 int progressStep;
238 if (assemblies.count() > 0) {
239 progressStep = 10 / assemblies.count();
240 } else {
241 progressStep = 10;
242 }
243
244 U2AttributeDbi *attrDbi = dbi->getAttributeDbi();
245 SAFE_POINT(attrDbi, tr("Attribute DBI is NULL"), );
246
247 foreach (int assemblyNum, assemblies.keys()) {
248 const U2Assembly &assembly = assemblies[assemblyNum];
249 const Assembly::Sequence &reference = referencesData[assemblyNum];
250 {
251 U2IntegerAttribute lenAttr;
252 lenAttr.objectId = assembly.id;
253 lenAttr.name = U2BaseAttributeName::reference_length;
254 lenAttr.version = assembly.version;
255 lenAttr.value = reference.data.length();
256 attrDbi->createIntegerAttribute(lenAttr, stateInfo);
257 CHECK_OP(stateInfo, );
258 }
259
260 U2AssemblyReadsImportInfo &importInfo = importInfos[assemblyNum];
261 qint64 maxProw = importInfo.packStat.maxProw;
262 qint64 readsCount = importInfo.packStat.readsCount;
263 const U2AssemblyCoverageStat &coverageStat = importInfo.coverageInfo.coverage;
264 if (maxProw > 0) {
265 U2IntegerAttribute maxProwAttr;
266 maxProwAttr.objectId = assembly.id;
267 maxProwAttr.name = U2BaseAttributeName::max_prow;
268 maxProwAttr.version = assembly.version;
269 maxProwAttr.value = maxProw;
270 attrDbi->createIntegerAttribute(maxProwAttr, stateInfo);
271 CHECK_OP(stateInfo, );
272 } else if (readsCount > 0) {
273 // if there are reads, but maxProw == 0 => error
274 taskLog.details(tr("Warning: incorrect maxProw == %1, probably packing was not done! Attribute was not set").arg(maxProw));
275 }
276
277 if (readsCount > 0) {
278 U2IntegerAttribute countReadsAttr;
279 countReadsAttr.objectId = assembly.id;
280 countReadsAttr.name = "count_reads_attribute";
281 countReadsAttr.version = assembly.version;
282 countReadsAttr.value = readsCount;
283 attrDbi->createIntegerAttribute(countReadsAttr, stateInfo);
284 CHECK_OP(stateInfo, );
285 }
286 if (!coverageStat.isEmpty()) {
287 U2ByteArrayAttribute attribute;
288 attribute.objectId = assembly.id;
289 attribute.name = U2BaseAttributeName::coverage_statistics;
290 attribute.value = U2AssemblyUtils::serializeCoverageStat(coverageStat);
291 attribute.version = assembly.version;
292 attrDbi->createByteArrayAttribute(attribute, stateInfo);
293 CHECK_OP(stateInfo, );
294 }
295 stateInfo.setProgress(stateInfo.getProgress() + progressStep);
296 }
297 }
298
299 } // namespace U2
300