1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "GenomeAlignerIO.h"
23 
24 #include <U2Core/AppContext.h>
25 #include <U2Core/Counter.h>
26 #include <U2Core/Timer.h>
27 #include <U2Core/U2AlphabetUtils.h>
28 #include <U2Core/U2AssemblyDbi.h>
29 #include <U2Core/U2AttributeDbi.h>
30 #include <U2Core/U2CoreAttributes.h>
31 #include <U2Core/U2CrossDatabaseReferenceDbi.h>
32 #include <U2Core/U2DbiRegistry.h>
33 #include <U2Core/U2ObjectDbi.h>
34 #include <U2Core/U2OpStatusUtils.h>
35 
36 #include <U2Formats/DocumentFormatUtils.h>
37 
38 #include <U2Lang/BasePorts.h>
39 #include <U2Lang/BaseSlots.h>
40 
41 namespace U2 {
42 
43 /************************************************************************/
44 /* GenomeAlignerUrlReader                                               */
45 /************************************************************************/
46 
GenomeAlignerUrlReader(const QList<GUrl> & dnaList)47 GenomeAlignerUrlReader::GenomeAlignerUrlReader(const QList<GUrl> &dnaList) {
48     initOk = reader.init(dnaList);
49 }
50 
isEnd()51 bool GenomeAlignerUrlReader::isEnd() {
52     if (!initOk) {
53         return true;
54     }
55     return !reader.hasNext();
56 }
57 
getProgress()58 int GenomeAlignerUrlReader::getProgress() {
59     return reader.getProgress();
60 }
61 
read()62 SearchQuery *GenomeAlignerUrlReader::read() {
63     return new SearchQuery(reader.getNextSequenceObject());
64 }
65 
getMemberError()66 QString GenomeAlignerUrlReader::getMemberError() {
67     return reader.getErrorMessage();
68 }
69 
70 /************************************************************************/
71 /* GenomeAlignerUrlWriter                                               */
72 /************************************************************************/
73 
GenomeAlignerUrlWriter(const GUrl & resultFile,const QString & refName,int refLength)74 GenomeAlignerUrlWriter::GenomeAlignerUrlWriter(const GUrl &resultFile, const QString &refName, int refLength)
75     : seqWriter(resultFile, refName, refLength) {
76     writtenReadsCount = 0;
77 }
78 
write(SearchQuery * seq,SAType offset)79 void GenomeAlignerUrlWriter::write(SearchQuery *seq, SAType offset) {
80     seqWriter.writeNextAlignedRead(offset, DNASequence(seq->getName(), seq->constSequence()));
81     writtenReadsCount++;
82 }
83 
close()84 void GenomeAlignerUrlWriter::close() {
85     seqWriter.close();
86 }
87 
setReferenceName(const QString & refName)88 void GenomeAlignerUrlWriter::setReferenceName(const QString &refName) {
89     this->refName = refName;
90     seqWriter.setRefSeqName(refName);
91 }
92 
93 namespace LocalWorkflow {
94 
95 /************************************************************************/
96 /* GenomeAlignerCommunicationChanelReader                               */
97 /************************************************************************/
98 
GenomeAlignerCommunicationChanelReader(CommunicationChannel * reads)99 GenomeAlignerCommunicationChanelReader::GenomeAlignerCommunicationChanelReader(CommunicationChannel *reads) {
100     assert(reads != nullptr);
101     this->reads = reads;
102 }
103 
isEnd()104 bool GenomeAlignerCommunicationChanelReader::isEnd() {
105     return !reads->hasMessage() || reads->isEnded();
106 }
107 
getProgress()108 int GenomeAlignerCommunicationChanelReader::getProgress() {
109     return 100;
110 }
111 
read()112 SearchQuery *GenomeAlignerCommunicationChanelReader::read() {
113     DNASequence seq = reads->get().getData().toMap().value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<DNASequence>();
114 
115     return new SearchQuery(&seq);
116 }
117 
~GenomeAlignerCommunicationChanelReader()118 GenomeAlignerCommunicationChanelReader::~GenomeAlignerCommunicationChanelReader() {
119 }
120 
121 /************************************************************************/
122 /* GenomeAlignerMsaWriter                                        */
123 /************************************************************************/
GenomeAlignerMsaWriter()124 GenomeAlignerMsaWriter::GenomeAlignerMsaWriter() {
125     writtenReadsCount = 0;
126 }
127 
close()128 void GenomeAlignerMsaWriter::close() {
129     // TODO: add some heuristic alphabet selection.
130     result->setAlphabet(AppContext::getDNAAlphabetRegistry()->findById(BaseDNAAlphabetIds::NUCL_DNA_DEFAULT()));
131 }
132 
getResult()133 MultipleSequenceAlignment &GenomeAlignerMsaWriter::getResult() {
134     return result;
135 }
136 
write(SearchQuery * seq,SAType offset)137 void GenomeAlignerMsaWriter::write(SearchQuery *seq, SAType offset) {
138     QByteArray offsetGaps;
139     offsetGaps.fill(U2Msa::GAP_CHAR, offset);
140     QByteArray seqWithOffset = seq->constSequence();
141     seqWithOffset.prepend(offsetGaps);
142     result->addRow(seq->getName(), seqWithOffset);
143     writtenReadsCount++;
144 }
145 
setReferenceName(const QString & refName)146 void GenomeAlignerMsaWriter::setReferenceName(const QString &refName) {
147     this->refName = refName;
148     result->setName(refName);
149 }
150 
151 }  // namespace LocalWorkflow
152 
153 /************************************************************************/
154 /* GenomeAlignerDbiReader                                               */
155 /************************************************************************/
156 const qint64 GenomeAlignerDbiReader::readBunchSize = 1000;
157 
GenomeAlignerDbiReader(U2AssemblyDbi * _rDbi,U2Assembly _assembly)158 GenomeAlignerDbiReader::GenomeAlignerDbiReader(U2AssemblyDbi *_rDbi, U2Assembly _assembly)
159     : rDbi(_rDbi), assembly(_assembly) {
160     wholeAssembly.startPos = 0;
161     wholeAssembly.length = rDbi->getMaxEndPos(assembly.id, status) + 1;
162     currentRead = reads.end();
163     readNumber = 0;
164     maxRow = rDbi->getMaxPackedRow(assembly.id, wholeAssembly, status);
165 
166     readsInAssembly = rDbi->countReads(assembly.id, wholeAssembly, status);
167     if (readsInAssembly <= 0 || status.hasError()) {
168         uiLog.error(QString("Genome Aligner -> Database Error: " + status.getError()).toLatin1().data());
169         end = true;
170         return;
171     }
172 
173     end = false;
174 }
175 
read()176 SearchQuery *GenomeAlignerDbiReader::read() {
177     if (end) {
178         return nullptr;
179     }
180     reads.clear();
181     if (dbiIterator.data() == nullptr) {
182         dbiIterator.reset(rDbi->getReads(assembly.id, wholeAssembly, status));
183     }
184     if (dbiIterator->hasNext()) {
185         U2AssemblyRead read = dbiIterator->next();
186         readNumber++;
187         return new SearchQuery(read);
188     } else {
189         end = true;
190         return nullptr;
191     }
192 }
193 
isEnd()194 bool GenomeAlignerDbiReader::isEnd() {
195     return end;
196 }
197 
getProgress()198 int GenomeAlignerDbiReader::getProgress() {
199     return (int)(100 * (double)readNumber / readsInAssembly);
200 }
201 
202 /************************************************************************/
203 /* GenomeAlignerDbiWriter                                               */
204 /************************************************************************/
205 const qint64 GenomeAlignerDbiWriter::readBunchSize = 10000;
206 
checkOperationStatus(const U2OpStatus & status)207 inline void checkOperationStatus(const U2OpStatus &status) {
208     if (status.hasError()) {
209         coreLog.error(status.getError());
210         throw status.getError();
211     }
212 }
213 
GenomeAlignerDbiWriter(const QString & dbiFilePath,const QString & assemblyName,int refLength,const QString & referenceObjectName,const QString & referenceUrlForCrossLink)214 GenomeAlignerDbiWriter::GenomeAlignerDbiWriter(const QString &dbiFilePath,
215                                                const QString &assemblyName,
216                                                int refLength,
217                                                const QString &referenceObjectName,
218                                                const QString &referenceUrlForCrossLink)
219     : importer(status) {
220     // TODO: support several assemblies.
221     dbiHandle = QSharedPointer<DbiConnection>(new DbiConnection(U2DbiRef(SQLITE_DBI_ID, dbiFilePath), true, status));
222     checkOperationStatus(status);
223     sqliteDbi = dbiHandle->dbi;
224     wDbi = sqliteDbi->getAssemblyDbi();
225 
226     const QString folder = U2ObjectDbi::ROOT_FOLDER;
227     if (!referenceObjectName.isEmpty() && !referenceUrlForCrossLink.isEmpty()) {
228         U2CrossDatabaseReference crossDbRef;
229         crossDbRef.dataRef.dbiRef.dbiId = referenceUrlForCrossLink;
230         crossDbRef.dataRef.dbiRef.dbiFactoryId = "document";
231         crossDbRef.dataRef.entityId = referenceObjectName.toUtf8();
232         crossDbRef.visualName = "cross_database_reference: " + referenceObjectName;
233         crossDbRef.dataRef.version = 1;
234         sqliteDbi->getCrossDatabaseReferenceDbi()->createCrossReference(crossDbRef, folder, status);
235         checkOperationStatus(status);
236 
237         assembly.referenceId = crossDbRef.id;
238     }
239 
240     assembly.visualName = assemblyName;
241 
242     importer.createAssembly(sqliteDbi->getDbiRef(), folder, assembly);
243     checkOperationStatus(status);
244 
245     U2IntegerAttribute lenAttr;
246     lenAttr.objectId = assembly.id;
247     lenAttr.name = U2BaseAttributeName::reference_length;
248     lenAttr.version = 1;
249     lenAttr.value = refLength;
250     sqliteDbi->getAttributeDbi()->createIntegerAttribute(lenAttr, status);
251 }
252 
write(SearchQuery * seq,SAType offset)253 void GenomeAlignerDbiWriter::write(SearchQuery *seq, SAType offset) {
254     writtenReadsCount++;
255 
256     U2AssemblyRead read(new U2AssemblyReadData());
257 
258     read->name = seq->getName().toLatin1();
259     read->leftmostPos = offset;
260     read->effectiveLen = seq->length();
261     read->readSequence = seq->constSequence();
262     read->quality = seq->hasQuality() ? seq->getQuality().qualCodes : "";
263     read->flags = None;
264     read->cigar.append(U2CigarToken(U2CigarOp_M, seq->length()));
265 
266     reads.append(read);
267     if (reads.size() >= readBunchSize) {
268         BufferedDbiIterator<U2AssemblyRead> readsIterator(reads);
269         importer.addReads(&readsIterator);
270         checkOperationStatus(status);
271         reads.clear();
272     }
273 }
274 
close()275 void GenomeAlignerDbiWriter::close() {
276     if (reads.size() > 0) {
277         BufferedDbiIterator<U2AssemblyRead> readsIterator(reads);
278         importer.addReads(&readsIterator);
279         checkOperationStatus(status);
280         reads.clear();
281     }
282 
283     U2AssemblyReadsImportInfo info;
284     importer.packReads(info);
285     checkOperationStatus(status);
286     sqliteDbi->flush(status);
287 }
288 
289 }  // namespace U2
290