1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "ace/ConvertAceToSqliteTask.h"
23 
24 #include <QFile>
25 
26 #include <U2Core/AppContext.h>
27 #include <U2Core/AssemblyImporter.h>
28 #include <U2Core/Counter.h>
29 #include <U2Core/DocumentModel.h>
30 #include <U2Core/IOAdapterUtils.h>
31 #include <U2Core/Timer.h>
32 #include <U2Core/U2AlphabetUtils.h>
33 #include <U2Core/U2AssemblyUtils.h>
34 #include <U2Core/U2AttributeDbi.h>
35 #include <U2Core/U2CoreAttributes.h>
36 #include <U2Core/U2CrossDatabaseReferenceDbi.h>
37 #include <U2Core/U2DbiRegistry.h>
38 #include <U2Core/U2DbiUtils.h>
39 #include <U2Core/U2ObjectDbi.h>
40 #include <U2Core/U2SafePoints.h>
41 #include <U2Core/U2SequenceDbi.h>
42 #include <U2Core/U2SequenceUtils.h>
43 
44 namespace U2 {
45 
ConvertAceToSqliteTask(const GUrl & _sourceUrl,const U2DbiRef & dstDbiRef)46 ConvertAceToSqliteTask::ConvertAceToSqliteTask(const GUrl &_sourceUrl, const U2DbiRef &dstDbiRef)
47     : Task(tr("Convert ACE to UGENE database (%1)").arg(_sourceUrl.fileName()), TaskFlag_None),
48       sourceUrl(_sourceUrl),
49       dstDbiRef(dstDbiRef),
50       dbi(nullptr),
51       databaseWasCreated(false),
52       countImportedAssembly(0) {
53     GCOUNTER(cvar, "ConvertAceToUgenedb");
54     tpm = Progress_Manual;
55 }
56 
run()57 void ConvertAceToSqliteTask::run() {
58     taskLog.info(tr("Converting assembly from %1 to %2 started")
59                      .arg(sourceUrl.fileName())
60                      .arg(getDestinationUrl().fileName()));
61 
62     qint64 startTime = TimeCounter::getCounter();
63 
64     QScopedPointer<IOAdapter> ioAdapter;
65     IOAdapterFactory *factory = AppContext::getIOAdapterRegistry()->getIOAdapterFactoryById(IOAdapterUtils::url2io(sourceUrl));
66     SAFE_POINT_EXT(factory, setError(tr("IOAdapterFactory is NULL")), );
67     ioAdapter.reset(factory->createIOAdapter());
68 
69     if (!ioAdapter->open(sourceUrl, IOAdapterMode_Read)) {
70         setError(tr("Can't open file '%1'").arg(sourceUrl.getURLString()));
71         return;
72     }
73 
74     U2OpStatusImpl os;
75     DbiConnection dbiHandle(dstDbiRef, false, os);
76     if (os.isCoR()) {
77         databaseWasCreated = true;
78         dbiHandle = DbiConnection(dstDbiRef, true, stateInfo);
79         CHECK_OP(stateInfo, );
80     }
81 
82     dbi = dbiHandle.dbi;
83     SAFE_POINT(dbi, tr("DBI is NULL"), );
84     U2ObjectDbi *objDbi = dbi->getObjectDbi();
85     SAFE_POINT(objDbi, tr("Object DBI is NULL"), );
86 
87     stateInfo.setDescription("Importing");
88     taskLog.details(tr("Importing"));
89 
90     qint64 totalReadsImported = importAssemblies(*ioAdapter);
91     CHECK_OP(stateInfo, );
92 
93     stateInfo.setDescription("Packing reads");
94     qint64 packTime = packReads();
95     CHECK_OP(stateInfo, );
96 
97     stateInfo.setDescription("Updating attributes");
98     updateAttributeDbi();
99     CHECK_OP(stateInfo, );
100 
101     qint64 totalTime = TimeCounter::getCounter() - startTime;
102     taskLog.info(QString("Converting assembly from %1 to %2 successfully finished: imported %3 reads, total time %4 s, pack time %5 s")
103                      .arg(sourceUrl.fileName())
104                      .arg(getDestinationUrl().fileName())
105                      .arg(totalReadsImported)
106                      .arg(totalTime)
107                      .arg(packTime));
108 }
109 
report()110 Task::ReportResult ConvertAceToSqliteTask::report() {
111     if (stateInfo.isCoR() &&
112         databaseWasCreated &&
113         getDestinationUrl().isLocalFile()) {
114         QFile::remove(getDestinationUrl().getURLString());
115     }
116 
117     return ReportResult_Finished;
118 }
119 
getDestinationUrl() const120 GUrl ConvertAceToSqliteTask::getDestinationUrl() const {
121     return GUrl(U2DbiUtils::ref2Url(dstDbiRef));
122 }
123 
getImportedObjects() const124 QMap<U2Sequence, U2Assembly> ConvertAceToSqliteTask::getImportedObjects() const {
125     QMap<U2Sequence, U2Assembly> importedObjects;
126     foreach (int pairNum, importedReferences.keys()) {
127         importedObjects.insert(importedReferences[pairNum], assemblies[pairNum]);
128     }
129     return importedObjects;
130 }
131 
importAssemblies(IOAdapter & ioAdapter)132 qint64 ConvertAceToSqliteTask::importAssemblies(IOAdapter &ioAdapter) {
133     qint64 totalReadsImported = 0;
134 
135     U2SequenceDbi *seqDbi = dbi->getSequenceDbi();
136     SAFE_POINT(seqDbi, tr("Sequence DBI is NULL"), totalReadsImported);
137 
138     U2OpStatusChildImpl os(&stateInfo, U2OpStatusMapping(0, 50));
139     QScopedPointer<AceReader> aceReader;
140     aceReader.reset(new AceReader(ioAdapter, os));
141     CHECK_OP(os, totalReadsImported);
142 
143     QScopedPointer<AceIterator> iterator;
144     iterator.reset(new AceIterator(*aceReader, stateInfo));
145 
146     while (iterator->hasNext()) {
147         CHECK(!isCanceled(), totalReadsImported);
148 
149         TmpDbiObjects tmpObjects(dstDbiRef, os);
150 
151         U2Assembly assembly;
152 
153         Assembly aceAssembly = iterator->next();
154         CHECK_OP(stateInfo, totalReadsImported);
155         CHECK_EXT(aceAssembly.isValid(), setError(tr("Invalid source file")), totalReadsImported);
156         Assembly::Sequence aceReference = aceAssembly.getReference();
157         referencesData.insert(countImportedAssembly, aceReference);
158 
159         U2Sequence reference;
160         reference.length = aceReference.data.length();
161         reference.visualName = aceReference.name;
162         reference.alphabet = U2AlphabetUtils::findBestAlphabet(aceReference.data)->getId();
163 
164         seqDbi->createSequenceObject(reference, U2ObjectDbi::ROOT_FOLDER, stateInfo);
165         CHECK_OP(stateInfo, totalReadsImported);
166         importedReferences.insert(countImportedAssembly, reference);
167         tmpObjects.objects << reference.id;
168 
169         QVariantMap refHints;
170         refHints[U2SequenceDbiHints::EMPTY_SEQUENCE] = true;
171         refHints[U2SequenceDbiHints::UPDATE_SEQUENCE_LENGTH] = true;
172         seqDbi->updateSequenceData(reference.id, U2_REGION_MAX, aceReference.data, refHints, stateInfo);
173         CHECK_OP(stateInfo, totalReadsImported);
174 
175         assembly.visualName = aceAssembly.getName();
176         assembly.referenceId = reference.id;
177 
178         U2AssemblyReadsImportInfo &importInfo = importInfos[countImportedAssembly];
179         AssemblyImporter importer(stateInfo);
180         importer.createAssembly(dstDbiRef, U2ObjectDbi::ROOT_FOLDER, nullptr, importInfo, assembly);
181         CHECK_OP(stateInfo, totalReadsImported);
182 
183         importInfo.packed = false;
184         importInfo.nReads = aceAssembly.getReadsCount();
185         assemblies.insert(countImportedAssembly, assembly);
186 
187         QList<U2AssemblyRead> reads = aceAssembly.getReads();
188 
189         BufferedDbiIterator<U2AssemblyRead> readsIterator(reads);
190         importer.addReads(&readsIterator);
191         CHECK_OP(stateInfo, totalReadsImported);
192 
193         tmpObjects.objects.removeAll(reference.id);
194 
195         totalReadsImported += aceAssembly.getReadsCount();
196         countImportedAssembly++;
197     }
198     CHECK_EXT(aceReader->getContigsCount() == countImportedAssembly, setError(tr("Invalid source file")), totalReadsImported);
199 
200     return totalReadsImported;
201 }
202 
packReads()203 qint64 ConvertAceToSqliteTask::packReads() {
204     qint64 packStart = TimeCounter::getCounter();
205     int progressStep;
206     if (assemblies.count() > 0) {
207         progressStep = 40 / assemblies.count();
208     } else {
209         progressStep = 40;
210     }
211 
212     U2AssemblyDbi *assDbi = dbi->getAssemblyDbi();
213     SAFE_POINT(assDbi, tr("Assembly DBI is NULL"), 0);
214 
215     foreach (int assemblyNum, assemblies.keys()) {
216         U2AssemblyReadsImportInfo &importInfo = importInfos[assemblyNum];
217         // Pack reads only if it were not packed on import
218         if (!importInfo.packed) {
219             taskLog.details(tr("Packing reads for assembly '%1' (%2 of %3)")
220                                 .arg(assemblies[assemblyNum].visualName)
221                                 .arg(assemblyNum + 1)
222                                 .arg(assemblies.keys().count()));
223 
224             U2AssemblyPackStat stat;
225             assDbi->pack(assemblies[assemblyNum].id, stat, stateInfo);
226             CHECK_OP(stateInfo, 0);
227 
228             importInfo.packStat = stat;
229         }
230         stateInfo.setProgress(stateInfo.getProgress() + progressStep);
231     }
232 
233     return TimeCounter::getCounter() - packStart;
234 }
235 
updateAttributeDbi()236 void ConvertAceToSqliteTask::updateAttributeDbi() {
237     int progressStep;
238     if (assemblies.count() > 0) {
239         progressStep = 10 / assemblies.count();
240     } else {
241         progressStep = 10;
242     }
243 
244     U2AttributeDbi *attrDbi = dbi->getAttributeDbi();
245     SAFE_POINT(attrDbi, tr("Attribute DBI is NULL"), );
246 
247     foreach (int assemblyNum, assemblies.keys()) {
248         const U2Assembly &assembly = assemblies[assemblyNum];
249         const Assembly::Sequence &reference = referencesData[assemblyNum];
250         {
251             U2IntegerAttribute lenAttr;
252             lenAttr.objectId = assembly.id;
253             lenAttr.name = U2BaseAttributeName::reference_length;
254             lenAttr.version = assembly.version;
255             lenAttr.value = reference.data.length();
256             attrDbi->createIntegerAttribute(lenAttr, stateInfo);
257             CHECK_OP(stateInfo, );
258         }
259 
260         U2AssemblyReadsImportInfo &importInfo = importInfos[assemblyNum];
261         qint64 maxProw = importInfo.packStat.maxProw;
262         qint64 readsCount = importInfo.packStat.readsCount;
263         const U2AssemblyCoverageStat &coverageStat = importInfo.coverageInfo.coverage;
264         if (maxProw > 0) {
265             U2IntegerAttribute maxProwAttr;
266             maxProwAttr.objectId = assembly.id;
267             maxProwAttr.name = U2BaseAttributeName::max_prow;
268             maxProwAttr.version = assembly.version;
269             maxProwAttr.value = maxProw;
270             attrDbi->createIntegerAttribute(maxProwAttr, stateInfo);
271             CHECK_OP(stateInfo, );
272         } else if (readsCount > 0) {
273             // if there are reads, but maxProw == 0 => error
274             taskLog.details(tr("Warning: incorrect maxProw == %1, probably packing was not done! Attribute was not set").arg(maxProw));
275         }
276 
277         if (readsCount > 0) {
278             U2IntegerAttribute countReadsAttr;
279             countReadsAttr.objectId = assembly.id;
280             countReadsAttr.name = "count_reads_attribute";
281             countReadsAttr.version = assembly.version;
282             countReadsAttr.value = readsCount;
283             attrDbi->createIntegerAttribute(countReadsAttr, stateInfo);
284             CHECK_OP(stateInfo, );
285         }
286         if (!coverageStat.isEmpty()) {
287             U2ByteArrayAttribute attribute;
288             attribute.objectId = assembly.id;
289             attribute.name = U2BaseAttributeName::coverage_statistics;
290             attribute.value = U2AssemblyUtils::serializeCoverageStat(coverageStat);
291             attribute.version = assembly.version;
292             attrDbi->createByteArrayAttribute(attribute, stateInfo);
293             CHECK_OP(stateInfo, );
294         }
295         stateInfo.setProgress(stateInfo.getProgress() + progressStep);
296     }
297 }
298 
299 }  // namespace U2
300