1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "GenomeAlignerIO.h"
23
24 #include <U2Core/AppContext.h>
25 #include <U2Core/Counter.h>
26 #include <U2Core/Timer.h>
27 #include <U2Core/U2AlphabetUtils.h>
28 #include <U2Core/U2AssemblyDbi.h>
29 #include <U2Core/U2AttributeDbi.h>
30 #include <U2Core/U2CoreAttributes.h>
31 #include <U2Core/U2CrossDatabaseReferenceDbi.h>
32 #include <U2Core/U2DbiRegistry.h>
33 #include <U2Core/U2ObjectDbi.h>
34 #include <U2Core/U2OpStatusUtils.h>
35
36 #include <U2Formats/DocumentFormatUtils.h>
37
38 #include <U2Lang/BasePorts.h>
39 #include <U2Lang/BaseSlots.h>
40
41 namespace U2 {
42
43 /************************************************************************/
44 /* GenomeAlignerUrlReader */
45 /************************************************************************/
46
GenomeAlignerUrlReader(const QList<GUrl> & dnaList)47 GenomeAlignerUrlReader::GenomeAlignerUrlReader(const QList<GUrl> &dnaList) {
48 initOk = reader.init(dnaList);
49 }
50
isEnd()51 bool GenomeAlignerUrlReader::isEnd() {
52 if (!initOk) {
53 return true;
54 }
55 return !reader.hasNext();
56 }
57
getProgress()58 int GenomeAlignerUrlReader::getProgress() {
59 return reader.getProgress();
60 }
61
read()62 SearchQuery *GenomeAlignerUrlReader::read() {
63 return new SearchQuery(reader.getNextSequenceObject());
64 }
65
getMemberError()66 QString GenomeAlignerUrlReader::getMemberError() {
67 return reader.getErrorMessage();
68 }
69
70 /************************************************************************/
71 /* GenomeAlignerUrlWriter */
72 /************************************************************************/
73
GenomeAlignerUrlWriter(const GUrl & resultFile,const QString & refName,int refLength)74 GenomeAlignerUrlWriter::GenomeAlignerUrlWriter(const GUrl &resultFile, const QString &refName, int refLength)
75 : seqWriter(resultFile, refName, refLength) {
76 writtenReadsCount = 0;
77 }
78
write(SearchQuery * seq,SAType offset)79 void GenomeAlignerUrlWriter::write(SearchQuery *seq, SAType offset) {
80 seqWriter.writeNextAlignedRead(offset, DNASequence(seq->getName(), seq->constSequence()));
81 writtenReadsCount++;
82 }
83
close()84 void GenomeAlignerUrlWriter::close() {
85 seqWriter.close();
86 }
87
setReferenceName(const QString & refName)88 void GenomeAlignerUrlWriter::setReferenceName(const QString &refName) {
89 this->refName = refName;
90 seqWriter.setRefSeqName(refName);
91 }
92
93 namespace LocalWorkflow {
94
95 /************************************************************************/
96 /* GenomeAlignerCommunicationChanelReader */
97 /************************************************************************/
98
GenomeAlignerCommunicationChanelReader(CommunicationChannel * reads)99 GenomeAlignerCommunicationChanelReader::GenomeAlignerCommunicationChanelReader(CommunicationChannel *reads) {
100 assert(reads != nullptr);
101 this->reads = reads;
102 }
103
isEnd()104 bool GenomeAlignerCommunicationChanelReader::isEnd() {
105 return !reads->hasMessage() || reads->isEnded();
106 }
107
getProgress()108 int GenomeAlignerCommunicationChanelReader::getProgress() {
109 return 100;
110 }
111
read()112 SearchQuery *GenomeAlignerCommunicationChanelReader::read() {
113 DNASequence seq = reads->get().getData().toMap().value(BaseSlots::DNA_SEQUENCE_SLOT().getId()).value<DNASequence>();
114
115 return new SearchQuery(&seq);
116 }
117
~GenomeAlignerCommunicationChanelReader()118 GenomeAlignerCommunicationChanelReader::~GenomeAlignerCommunicationChanelReader() {
119 }
120
121 /************************************************************************/
122 /* GenomeAlignerMsaWriter */
123 /************************************************************************/
GenomeAlignerMsaWriter()124 GenomeAlignerMsaWriter::GenomeAlignerMsaWriter() {
125 writtenReadsCount = 0;
126 }
127
close()128 void GenomeAlignerMsaWriter::close() {
129 // TODO: add some heuristic alphabet selection.
130 result->setAlphabet(AppContext::getDNAAlphabetRegistry()->findById(BaseDNAAlphabetIds::NUCL_DNA_DEFAULT()));
131 }
132
getResult()133 MultipleSequenceAlignment &GenomeAlignerMsaWriter::getResult() {
134 return result;
135 }
136
write(SearchQuery * seq,SAType offset)137 void GenomeAlignerMsaWriter::write(SearchQuery *seq, SAType offset) {
138 QByteArray offsetGaps;
139 offsetGaps.fill(U2Msa::GAP_CHAR, offset);
140 QByteArray seqWithOffset = seq->constSequence();
141 seqWithOffset.prepend(offsetGaps);
142 result->addRow(seq->getName(), seqWithOffset);
143 writtenReadsCount++;
144 }
145
setReferenceName(const QString & refName)146 void GenomeAlignerMsaWriter::setReferenceName(const QString &refName) {
147 this->refName = refName;
148 result->setName(refName);
149 }
150
151 } // namespace LocalWorkflow
152
153 /************************************************************************/
154 /* GenomeAlignerDbiReader */
155 /************************************************************************/
156 const qint64 GenomeAlignerDbiReader::readBunchSize = 1000;
157
GenomeAlignerDbiReader(U2AssemblyDbi * _rDbi,U2Assembly _assembly)158 GenomeAlignerDbiReader::GenomeAlignerDbiReader(U2AssemblyDbi *_rDbi, U2Assembly _assembly)
159 : rDbi(_rDbi), assembly(_assembly) {
160 wholeAssembly.startPos = 0;
161 wholeAssembly.length = rDbi->getMaxEndPos(assembly.id, status) + 1;
162 currentRead = reads.end();
163 readNumber = 0;
164 maxRow = rDbi->getMaxPackedRow(assembly.id, wholeAssembly, status);
165
166 readsInAssembly = rDbi->countReads(assembly.id, wholeAssembly, status);
167 if (readsInAssembly <= 0 || status.hasError()) {
168 uiLog.error(QString("Genome Aligner -> Database Error: " + status.getError()).toLatin1().data());
169 end = true;
170 return;
171 }
172
173 end = false;
174 }
175
read()176 SearchQuery *GenomeAlignerDbiReader::read() {
177 if (end) {
178 return nullptr;
179 }
180 reads.clear();
181 if (dbiIterator.data() == nullptr) {
182 dbiIterator.reset(rDbi->getReads(assembly.id, wholeAssembly, status));
183 }
184 if (dbiIterator->hasNext()) {
185 U2AssemblyRead read = dbiIterator->next();
186 readNumber++;
187 return new SearchQuery(read);
188 } else {
189 end = true;
190 return nullptr;
191 }
192 }
193
isEnd()194 bool GenomeAlignerDbiReader::isEnd() {
195 return end;
196 }
197
getProgress()198 int GenomeAlignerDbiReader::getProgress() {
199 return (int)(100 * (double)readNumber / readsInAssembly);
200 }
201
202 /************************************************************************/
203 /* GenomeAlignerDbiWriter */
204 /************************************************************************/
205 const qint64 GenomeAlignerDbiWriter::readBunchSize = 10000;
206
checkOperationStatus(const U2OpStatus & status)207 inline void checkOperationStatus(const U2OpStatus &status) {
208 if (status.hasError()) {
209 coreLog.error(status.getError());
210 throw status.getError();
211 }
212 }
213
GenomeAlignerDbiWriter(const QString & dbiFilePath,const QString & assemblyName,int refLength,const QString & referenceObjectName,const QString & referenceUrlForCrossLink)214 GenomeAlignerDbiWriter::GenomeAlignerDbiWriter(const QString &dbiFilePath,
215 const QString &assemblyName,
216 int refLength,
217 const QString &referenceObjectName,
218 const QString &referenceUrlForCrossLink)
219 : importer(status) {
220 // TODO: support several assemblies.
221 dbiHandle = QSharedPointer<DbiConnection>(new DbiConnection(U2DbiRef(SQLITE_DBI_ID, dbiFilePath), true, status));
222 checkOperationStatus(status);
223 sqliteDbi = dbiHandle->dbi;
224 wDbi = sqliteDbi->getAssemblyDbi();
225
226 const QString folder = U2ObjectDbi::ROOT_FOLDER;
227 if (!referenceObjectName.isEmpty() && !referenceUrlForCrossLink.isEmpty()) {
228 U2CrossDatabaseReference crossDbRef;
229 crossDbRef.dataRef.dbiRef.dbiId = referenceUrlForCrossLink;
230 crossDbRef.dataRef.dbiRef.dbiFactoryId = "document";
231 crossDbRef.dataRef.entityId = referenceObjectName.toUtf8();
232 crossDbRef.visualName = "cross_database_reference: " + referenceObjectName;
233 crossDbRef.dataRef.version = 1;
234 sqliteDbi->getCrossDatabaseReferenceDbi()->createCrossReference(crossDbRef, folder, status);
235 checkOperationStatus(status);
236
237 assembly.referenceId = crossDbRef.id;
238 }
239
240 assembly.visualName = assemblyName;
241
242 importer.createAssembly(sqliteDbi->getDbiRef(), folder, assembly);
243 checkOperationStatus(status);
244
245 U2IntegerAttribute lenAttr;
246 lenAttr.objectId = assembly.id;
247 lenAttr.name = U2BaseAttributeName::reference_length;
248 lenAttr.version = 1;
249 lenAttr.value = refLength;
250 sqliteDbi->getAttributeDbi()->createIntegerAttribute(lenAttr, status);
251 }
252
write(SearchQuery * seq,SAType offset)253 void GenomeAlignerDbiWriter::write(SearchQuery *seq, SAType offset) {
254 writtenReadsCount++;
255
256 U2AssemblyRead read(new U2AssemblyReadData());
257
258 read->name = seq->getName().toLatin1();
259 read->leftmostPos = offset;
260 read->effectiveLen = seq->length();
261 read->readSequence = seq->constSequence();
262 read->quality = seq->hasQuality() ? seq->getQuality().qualCodes : "";
263 read->flags = None;
264 read->cigar.append(U2CigarToken(U2CigarOp_M, seq->length()));
265
266 reads.append(read);
267 if (reads.size() >= readBunchSize) {
268 BufferedDbiIterator<U2AssemblyRead> readsIterator(reads);
269 importer.addReads(&readsIterator);
270 checkOperationStatus(status);
271 reads.clear();
272 }
273 }
274
close()275 void GenomeAlignerDbiWriter::close() {
276 if (reads.size() > 0) {
277 BufferedDbiIterator<U2AssemblyRead> readsIterator(reads);
278 importer.addReads(&readsIterator);
279 checkOperationStatus(status);
280 reads.clear();
281 }
282
283 U2AssemblyReadsImportInfo info;
284 importer.packReads(info);
285 checkOperationStatus(status);
286 sqliteDbi->flush(status);
287 }
288
289 } // namespace U2
290