1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include <SamtoolsAdapter.h>
23 
24 #include <QFile>
25 #include <QFileInfo>
26 
27 #include <U2Core/IOAdapterUtils.h>
28 #include <U2Core/U2AssemblyUtils.h>
29 #include <U2Core/U2CoreAttributes.h>
30 #include <U2Core/U2DbiRegistry.h>
31 #include <U2Core/U2OpStatusUtils.h>
32 #include <U2Core/U2SafePoints.h>
33 
34 #include <U2Formats/BAMUtils.h>
35 
36 #include "BAMDbiPlugin.h"
37 #include "BAMFormat.h"
38 #include "Exception.h"
39 #include "IOException.h"
40 #include "SamtoolsBasedDbi.h"
41 
42 namespace U2 {
43 namespace BAM {
44 
45 static const QByteArray ATTRIBUTE_SEP(":~!ugene-attribute!~:");
46 
47 /************************************************************************/
48 /* SamtoolsBasedDbi */
49 /************************************************************************/
SamtoolsBasedDbi()50 SamtoolsBasedDbi::SamtoolsBasedDbi()
51     : U2AbstractDbi(SamtoolsBasedDbiFactory::ID), assembliesCount(0), bamHandler(nullptr), header(nullptr), index(nullptr) {
52 }
53 
~SamtoolsBasedDbi()54 SamtoolsBasedDbi::~SamtoolsBasedDbi() {
55     this->cleanup();
56 }
57 
shutdown(U2OpStatus &)58 QVariantMap SamtoolsBasedDbi::shutdown(U2OpStatus & /*os*/) {
59     cleanup();
60     return QVariantMap();
61 }
62 
init(const QHash<QString,QString> & properties,const QVariantMap &,U2OpStatus & os)63 void SamtoolsBasedDbi::init(const QHash<QString, QString> &properties, const QVariantMap & /*persistentData*/, U2OpStatus &os) {
64     try {
65         if (U2DbiState_Void != state) {
66             throw Exception(BAMDbiPlugin::tr("Invalid DBI state"));
67         }
68         state = U2DbiState_Starting;
69         if (properties.value(U2DbiOptions::U2_DBI_OPTION_URL).isEmpty()) {
70             throw Exception(BAMDbiPlugin::tr("URL is not specified"));
71         }
72         url = GUrl(properties.value(U2DbiOptions::U2_DBI_OPTION_URL));
73         if (!url.isLocalFile()) {
74             throw Exception(BAMDbiPlugin::tr("Non-local files are not supported"));
75         }
76         bool sorted = BAMUtils::isSortedBam(url, os);
77         CHECK_OP_EXT(os, throw Exception(os.getError()), );
78         if (!sorted) {
79             throw Exception("Only indexed sorted BAM files could be used by this DBI");
80         }
81 
82         bool ok = this->initBamStructures(url);
83         if (!ok) {
84             throw Exception(BAMDbiPlugin::tr("Can't build index for: %1").arg(url.getURLString()));
85         }
86 
87         assembliesCount = header->n_targets;
88         assemblyDbi.reset(new SamtoolsBasedAssemblyDbi(*this));
89         attributeDbi.reset(new SamtoolsBasedAttributeDbi(*this));
90         createObjectDbi();
91 
92         initProperties = properties;
93         features.insert(U2DbiFeature_ReadSequence);
94         features.insert(U2DbiFeature_ReadAssembly);
95         features.insert(U2DbiFeature_ReadAttributes);
96         dbiId = url.getURLString();
97         state = U2DbiState_Ready;
98     } catch (const Exception &e) {
99         os.setError(e.getMessage());
100         this->cleanup();
101     }
102 }
103 
initBamStructures(const GUrl & fileName)104 bool SamtoolsBasedDbi::initBamStructures(const GUrl &fileName) {
105     QByteArray urlBA = fileName.getURLString().toLocal8Bit();
106     const char *url = urlBA.constData();
107     bamHandler = bam_open(url, "r");
108     if (nullptr == bamHandler) {
109         throw IOException(BAMDbiPlugin::tr("Can't open file '%1'").arg(url));
110     }
111 
112     bool indexed = BAMUtils::hasValidBamIndex(fileName);
113     if (indexed) {
114         index = bam_index_load(url);
115     } else {
116         throw Exception("Only indexed sorted BAM files could be used by this DBI");
117     }
118     if (nullptr == index) {
119         throw IOException(BAMDbiPlugin::tr("Can't load index file for '%1'").arg(url));
120     }
121 
122     header = bam_header_read(bamHandler);
123     if (nullptr == header) {
124         throw IOException(BAMDbiPlugin::tr("Can't read header from file '%1'").arg(url));
125     }
126     return true;
127 }
128 
cleanup()129 void SamtoolsBasedDbi::cleanup() {
130     assemblyDbi.reset();
131     objectDbi.reset();
132     attributeDbi.reset();
133     if (nullptr != header) {
134         bam_header_destroy(header);
135         header = nullptr;
136     }
137     if (nullptr != index) {
138         bam_index_destroy(index);
139         index = nullptr;
140     }
141     if (nullptr != bamHandler) {
142         bam_close(bamHandler);
143         bamHandler = nullptr;
144     }
145     state = U2DbiState_Void;
146 }
147 
createObjectDbi()148 void SamtoolsBasedDbi::createObjectDbi() {
149     QList<U2DataId> assemblyObjectIds;
150     for (int i = 0; i < header->n_targets; i++) {
151         assemblyObjectIds << QByteArray::number(i);
152     }
153     objectDbi.reset(new SamtoolsBasedObjectDbi(*this, assemblyObjectIds));
154 }
155 
getEntityTypeById(const U2DataId & id) const156 U2DataType SamtoolsBasedDbi::getEntityTypeById(const U2DataId &id) const {
157     QString idStr = id;
158     if (idStr.endsWith(ATTRIBUTE_SEP + U2BaseAttributeName::reference_length)) {
159         return U2Type::AttributeInteger;
160     }
161     CHECK(!idStr.isEmpty(), U2Type::Unknown);
162 
163     U2OpStatusImpl os;
164     int dbId = SamtoolsBasedAssemblyDbi::toSamtoolsId(id, os);
165     CHECK_OP(os, U2Type::Unknown);
166 
167     if (dbId <= assembliesCount) {
168         return U2Type::Assembly;
169     } else {
170         return U2Type::Unknown;
171     }
172 }
173 
getBamFile() const174 bamFile SamtoolsBasedDbi::getBamFile() const {
175     return bamHandler;
176 }
177 
getHeader() const178 const bam_header_t *SamtoolsBasedDbi::getHeader() const {
179     return header;
180 }
181 
getIndex() const182 const bam_index_t *SamtoolsBasedDbi::getIndex() const {
183     return index;
184 }
185 
getAssemblyDbi()186 U2AssemblyDbi *SamtoolsBasedDbi::getAssemblyDbi() {
187     if (U2DbiState_Ready == state) {
188         return assemblyDbi.data();
189     } else {
190         return nullptr;
191     }
192 }
193 
getObjectDbi()194 U2ObjectDbi *SamtoolsBasedDbi::getObjectDbi() {
195     if (U2DbiState_Ready == state) {
196         return objectDbi.data();
197     } else {
198         return nullptr;
199     }
200 }
201 
getAttributeDbi()202 U2AttributeDbi *SamtoolsBasedDbi::getAttributeDbi() {
203     if (U2DbiState_Ready == state) {
204         return attributeDbi.data();
205     } else {
206         return nullptr;
207     }
208 }
209 
isReadOnly() const210 bool SamtoolsBasedDbi::isReadOnly() const {
211     return !QFileInfo(url.getURLString()).permission(QFile::WriteUser);
212 }
213 
214 /************************************************************************/
215 /* SamtoolsBasedObjectDbi */
216 /************************************************************************/
SamtoolsBasedObjectDbi(SamtoolsBasedDbi & dbi,const QList<U2DataId> & assemblyObjectIds)217 SamtoolsBasedObjectDbi::SamtoolsBasedObjectDbi(SamtoolsBasedDbi &dbi, const QList<U2DataId> &assemblyObjectIds)
218     : U2SimpleObjectDbi(&dbi), dbi(dbi), assemblyObjectIds(assemblyObjectIds) {
219 }
220 
countObjects(U2OpStatus & os)221 qint64 SamtoolsBasedObjectDbi::countObjects(U2OpStatus &os) {
222     return countObjects(U2Type::Assembly, os);
223 }
224 
countObjects(U2DataType type,U2OpStatus & os)225 qint64 SamtoolsBasedObjectDbi::countObjects(U2DataType type, U2OpStatus &os) {
226     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
227               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
228               0);
229 
230     if (U2Type::Assembly == type) {
231         return assemblyObjectIds.size();
232     } else {
233         return 0;
234     }
235 }
236 
getObjectNames(qint64,qint64,U2OpStatus & os)237 QHash<U2DataId, QString> SamtoolsBasedObjectDbi::getObjectNames(qint64 /*offset*/, qint64 /*count*/, U2OpStatus &os) {
238     QHash<U2DataId, QString> result;
239     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
240               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
241               result);
242 
243     return result;
244 }
245 
getObject(U2Object & object,const U2DataId & id,U2OpStatus & os)246 void SamtoolsBasedObjectDbi::getObject(U2Object &object, const U2DataId &id, U2OpStatus &os) {
247     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
248               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")), );
249 
250     CHECK_EXT(assemblyObjectIds.contains(id), os.setError(BAMDbiPlugin::tr("Object not found")), );
251     object = dbi.getAssemblyDbi()->getAssemblyObject(id, os);
252 }
253 
getObjects(qint64 offset,qint64 count,U2OpStatus & os)254 QList<U2DataId> SamtoolsBasedObjectDbi::getObjects(qint64 offset, qint64 count, U2OpStatus &os) {
255     return getObjects(U2Type::Assembly, offset, count, os);
256 }
257 
getObjects(U2DataType type,qint64 offset,qint64 count,U2OpStatus & os)258 QList<U2DataId> SamtoolsBasedObjectDbi::getObjects(U2DataType type, qint64 offset, qint64 count, U2OpStatus &os) {
259     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
260               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
261               QList<U2DataId>());
262 
263     if (U2Type::Assembly == type) {
264         qint64 lastExc = offset + count;
265         if (U2DbiOptions::U2_DBI_NO_LIMIT == count) {
266             lastExc = assemblyObjectIds.size();
267         }
268         QList<U2DataId> result = assemblyObjectIds.mid(offset, lastExc);
269         return result;
270     } else {
271         return QList<U2DataId>();
272     }
273 }
274 
getParents(const U2DataId &,U2OpStatus & os)275 QList<U2DataId> SamtoolsBasedObjectDbi::getParents(const U2DataId & /*entityId*/, U2OpStatus &os) {
276     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
277               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
278               QList<U2DataId>());
279     return QList<U2DataId>();
280 }
281 
getFolders(U2OpStatus & os)282 QStringList SamtoolsBasedObjectDbi::getFolders(U2OpStatus &os) {
283     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
284               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
285               QStringList());
286     return QStringList(U2ObjectDbi::ROOT_FOLDER);
287 }
288 
countObjects(const QString & folder,U2OpStatus & os)289 qint64 SamtoolsBasedObjectDbi::countObjects(const QString &folder, U2OpStatus &os) {
290     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
291               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
292               0);
293 
294     CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
295               os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
296               0);
297 
298     return countObjects(os);
299 }
300 
getObjects(const QString & folder,qint64 offset,qint64 count,U2OpStatus & os)301 QList<U2DataId> SamtoolsBasedObjectDbi::getObjects(const QString &folder, qint64 offset, qint64 count, U2OpStatus &os) {
302     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
303               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
304               QList<U2DataId>());
305 
306     CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
307               os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
308               QList<U2DataId>());
309 
310     return getObjects(offset, count, os);
311 }
312 
getObjectFolders(const U2DataId & objectId,U2OpStatus & os)313 QStringList SamtoolsBasedObjectDbi::getObjectFolders(const U2DataId &objectId, U2OpStatus &os) {
314     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
315               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
316               QStringList());
317 
318     if (U2Type::Assembly == dbi.getEntityTypeById(objectId)) {
319         return QStringList(U2ObjectDbi::ROOT_FOLDER);
320     } else {
321         return QStringList();
322     }
323 }
324 
getObjectVersion(const U2DataId &,U2OpStatus & os)325 qint64 SamtoolsBasedObjectDbi::getObjectVersion(const U2DataId & /*objectId*/, U2OpStatus &os) {
326     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
327               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
328               0);
329 
330     return 0;
331 }
332 
getFolderLocalVersion(const QString & folder,U2OpStatus & os)333 qint64 SamtoolsBasedObjectDbi::getFolderLocalVersion(const QString &folder, U2OpStatus &os) {
334     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
335               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
336               0);
337 
338     CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
339               os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
340               0);
341 
342     return 0;
343 }
344 
getFolderGlobalVersion(const QString & folder,U2OpStatus & os)345 qint64 SamtoolsBasedObjectDbi::getFolderGlobalVersion(const QString &folder, U2OpStatus &os) {
346     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
347               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
348               0);
349 
350     CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
351               os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
352               0);
353 
354     return 0;
355 }
356 
getObjectsByVisualName(const QString &,U2DataType,U2OpStatus &)357 U2DbiIterator<U2DataId> *SamtoolsBasedObjectDbi::getObjectsByVisualName(const QString &, U2DataType, U2OpStatus &) {
358     // TODO:
359     return nullptr;
360 }
361 
renameObject(const U2DataId &,const QString &,U2OpStatus & os)362 void SamtoolsBasedObjectDbi::renameObject(const U2DataId & /*id*/, const QString & /*newName*/, U2OpStatus &os) {
363     os.setError("Not implemented!");
364 }
365 
setObjectRank(const U2DataId &,U2DbiObjectRank,U2OpStatus & os)366 void SamtoolsBasedObjectDbi::setObjectRank(const U2DataId & /*objectId*/, U2DbiObjectRank /*newRank*/, U2OpStatus &os) {
367     os.setError("Not implemented!");
368 }
369 
getObjectRank(const U2DataId &,U2OpStatus & os)370 U2DbiObjectRank SamtoolsBasedObjectDbi::getObjectRank(const U2DataId & /*objectId*/, U2OpStatus &os) {
371     os.setError("Not implemented!");
372     return U2DbiObjectRank_TopLevel;
373 }
374 
setParent(const U2DataId &,const U2DataId &,U2OpStatus & os)375 void SamtoolsBasedObjectDbi::setParent(const U2DataId & /*parentId*/, const U2DataId & /*childId*/, U2OpStatus &os) {
376     os.setError("Not implemented!");
377 }
378 
379 /************************************************************************/
380 /* SamtoolsBasedReadsIterator */
381 /************************************************************************/
382 const int SamtoolsBasedReadsIterator::BUFFERED_INTERVAL_SIZE = 1000;
383 
SamtoolsBasedReadsIterator(int assemblyId,const U2Region & _r,SamtoolsBasedDbi & _dbi,const QByteArray & _nameFilter)384 SamtoolsBasedReadsIterator::SamtoolsBasedReadsIterator(
385     int assemblyId,
386     const U2Region &_r,
387     SamtoolsBasedDbi &_dbi,
388     const QByteArray &_nameFilter)
389     : U2DbiIterator<U2AssemblyRead>(), assemblyId(assemblyId), dbi(_dbi), nameFilter(_nameFilter) {
390     current = reads.begin();
391     bool errorRegion = false;
392     qint64 startPos = _r.startPos;
393     qint64 endPos = _r.endPos() - 1;
394 
395     // region must be between 0 and INT_MAX
396     if (startPos < 0) {
397         startPos = 0;
398     } else if (startPos > INT_MAX) {
399         startPos = INT_MAX;
400         errorRegion = true;
401     }
402     if (endPos < 0) {
403         endPos = 0;
404         errorRegion = true;
405     } else if (endPos > INT_MAX) {
406         endPos = INT_MAX;
407     }
408 
409     qint64 length = endPos - startPos + 1;
410     r = U2Region(startPos, length);
411     nextPosToRead = r.startPos;
412 
413     SAFE_POINT(!errorRegion, QString("Bad region for samtools reads fetching: %1 - %2").arg(_r.startPos).arg(_r.endPos()), );
414 }
415 
hasNext()416 bool SamtoolsBasedReadsIterator::hasNext() {
417     applyNameFilter();
418 
419     bool fetch = false;
420     if (reads.isEmpty()) {
421         fetch = true;
422     } else if (reads.end() == current) {
423         fetch = true;
424     }
425     if (!fetch) {
426         return true;
427     }
428 
429     reads.clear();
430     current = reads.begin();
431     qint64 endPosExc = r.endPos();
432     while (reads.isEmpty() && nextPosToRead < endPosExc) {
433         fetchNextChunk();
434         applyNameFilter();
435     }
436 
437     if (!reads.isEmpty()) {
438         return true;
439     }
440     return false;
441 }
442 
applyNameFilter()443 void SamtoolsBasedReadsIterator::applyNameFilter() {
444     if (nameFilter.isEmpty()) {
445         return;
446     }
447     while (current != reads.end()) {
448         if ((*current)->name == nameFilter) {
449             return;
450         }
451         current++;
452     }
453     if (current == reads.end()) {
454         reads.clear();
455         current = reads.begin();
456     }
457 }
458 
next()459 U2AssemblyRead SamtoolsBasedReadsIterator::next() {
460     if (this->hasNext()) {
461         U2AssemblyRead res = *current;
462         current++;
463         return res;
464     }
465     return U2AssemblyRead();
466 }
467 
peek()468 U2AssemblyRead SamtoolsBasedReadsIterator::peek() {
469     if (this->hasNext()) {
470         U2AssemblyRead res = *current;
471         return res;
472     }
473     return U2AssemblyRead();
474 }
475 
476 static const int NAME_COL = 0;
477 static const int FLAGS_COL = 1;
478 static const int CIGAR_COL = 5;
479 static const int RNEXT_COL = 6;
480 static const int SEQ_COL = 9;
481 static const int QUAL_COL = 10;
482 
bamFetchFunction(const bam1_t * b,void * data)483 int bamFetchFunction(const bam1_t *b, void *data) {
484     SamtoolsBasedReadsIterator *it = (SamtoolsBasedReadsIterator *)data;
485     QList<U2AssemblyRead> &reads = it->reads;
486     SamtoolsBasedDbi &dbi = it->dbi;
487 
488     U2AssemblyRead read(new U2AssemblyReadData());
489     {
490         char *samStr = bam_format1(dbi.getHeader(), b);
491         QByteArray samArr(samStr);
492         QList<QByteArray> values = samArr.split('\t');
493 
494         read->name = values[NAME_COL];
495         read->flags = values[FLAGS_COL].toLongLong();
496         read->leftmostPos = b->core.pos;
497         read->mappingQuality = b->core.qual;
498         QString error;
499         QList<U2CigarToken> tokens = U2AssemblyUtils::parseCigar(values[CIGAR_COL], error);
500         if (error.isEmpty()) {
501             read->cigar = tokens;
502         }
503         read->readSequence = values[SEQ_COL];
504         if ("*" != values[QUAL_COL]) {
505             read->quality = values[QUAL_COL];
506         }
507         read->effectiveLen = Alignment::computeLength(read->cigar);
508         delete[] samStr;
509         read->id = read->name + ";" + QByteArray::number(read->leftmostPos) + ";" + QByteArray::number(read->effectiveLen);
510         read->rnext = values[RNEXT_COL];
511         read->pnext = b->core.mpos;
512         QByteArray auxStr((const char *)bam1_aux(b), b->l_aux);
513         read->aux = SamtoolsAdapter::string2aux(auxStr);
514     }
515 
516     // add new border intersected reads
517     qint64 endPos = read->leftmostPos + read->effectiveLen;
518     if (endPos >= (qint64)it->nextPosToRead) {
519         it->newBorderReadIds << read->id;
520     }
521 
522     if (!it->borderReadIds.contains(read->id)) {
523         reads.append(read);
524     }
525     return 0;
526 }
527 
fetchNextChunk()528 void SamtoolsBasedReadsIterator::fetchNextChunk() {
529     bamFile bam = dbi.getBamFile();
530     const bam_index_t *idx = dbi.getIndex();
531     SAFE_POINT_EXT(nullptr != bam, nextPosToRead = INT_MAX, );
532     SAFE_POINT_EXT(nullptr != idx, nextPosToRead = INT_MAX, );
533 
534     void *data = (void *)(this);
535     borderReadIds = newBorderReadIds;
536     newBorderReadIds.clear();
537     int startPos = (int)nextPosToRead;
538     int endPos = (int)(nextPosToRead + BUFFERED_INTERVAL_SIZE);
539     nextPosToRead += BUFFERED_INTERVAL_SIZE;
540     bam_fetch(bam, idx, assemblyId, startPos, endPos, data, bamFetchFunction);
541 
542     current = reads.begin();
543 }
544 
545 /************************************************************************/
546 /* SamtoolsBasedAssemblyDbi */
547 /************************************************************************/
SamtoolsBasedAssemblyDbi(SamtoolsBasedDbi & dbi)548 SamtoolsBasedAssemblyDbi::SamtoolsBasedAssemblyDbi(SamtoolsBasedDbi &dbi)
549     : U2SimpleAssemblyDbi(&dbi), dbi(dbi) {
550 }
551 
toSamtoolsId(const U2DataId & assemblyId,U2OpStatus & os)552 int SamtoolsBasedAssemblyDbi::toSamtoolsId(const U2DataId &assemblyId, U2OpStatus &os) {
553     bool ok = false;
554     int dbId = assemblyId.toInt(&ok);
555     if (!ok) {
556         os.setError(QString("Incorrect samtools assembly id: %1").arg(assemblyId.data()));
557     }
558     return dbId;
559 }
560 
getAssemblyObject(const U2DataId & id,U2OpStatus & os)561 U2Assembly SamtoolsBasedAssemblyDbi::getAssemblyObject(const U2DataId &id, U2OpStatus &os) {
562     CHECK_EXT(U2DbiState_Ready == dbi.getState(),
563               os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
564               U2Assembly());
565 
566     const bam_header_t *header = dbi.getHeader();
567     SAFE_POINT(nullptr != header, "NULL BAM header", U2Assembly());
568 
569     CHECK_EXT(U2Type::Assembly == dbi.getEntityTypeById(id),
570               os.setError(BAMDbiPlugin::tr("The specified object is not an assembly")),
571               U2Assembly());
572 
573     int dbId = SamtoolsBasedAssemblyDbi::toSamtoolsId(id, os);
574     CHECK_OP(os, U2Assembly());
575     CHECK(dbId < header->n_targets, U2Assembly());
576 
577     U2Assembly result;
578     result.id = id;
579     result.dbiId = dbi.getDbiId();
580     result.visualName = header->target_name[dbId];
581 
582     return result;
583 }
584 
bamCountFunction(const bam1_t *,void * data)585 int bamCountFunction(const bam1_t * /*b*/, void *data) {
586     qint64 *count = (qint64 *)data;
587     (*count)++;
588     return 0;
589 }
590 
countReads(const U2DataId & assemblyId,const U2Region & r,U2OpStatus & os)591 qint64 SamtoolsBasedAssemblyDbi::countReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os) {
592     int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
593     CHECK_OP(os, 0);
594 
595     qint64 result = 0;
596     void *data = &result;
597     U2Region targetReg = this->getCorrectRegion(assemblyId, r, os);
598     CHECK_OP(os, 0);
599     qint64 endPos = targetReg.endPos() - 1;
600     bam_fetch(dbi.getBamFile(), dbi.getIndex(), id, (int)targetReg.startPos, (int)endPos, data, bamCountFunction);
601 
602     return result;
603 }
604 
getReads(const U2DataId & assemblyId,const U2Region & r,U2OpStatus & os,bool)605 U2DbiIterator<U2AssemblyRead> *SamtoolsBasedAssemblyDbi::getReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os, bool /*sortedHint*/) {
606     int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
607     CHECK_OP(os, nullptr);
608     U2Region targetReg = this->getCorrectRegion(assemblyId, r, os);
609     return new SamtoolsBasedReadsIterator(id, targetReg, dbi);
610 }
611 
getMaxPackedRow(const U2DataId &,const U2Region &,U2OpStatus & os)612 qint64 SamtoolsBasedAssemblyDbi::getMaxPackedRow(const U2DataId &, const U2Region &, U2OpStatus &os) {
613     os.setError("Operation not supported: BAM::SamtoolsBasedAssemblyDbi::getMaxPackedRow");
614     return 0;
615 }
616 
getReadsByRow(const U2DataId &,const U2Region &,qint64,qint64,U2OpStatus & os)617 U2DbiIterator<U2AssemblyRead> *SamtoolsBasedAssemblyDbi::getReadsByRow(const U2DataId &, const U2Region &, qint64, qint64, U2OpStatus &os) {
618     os.setError("Operation not supported: BAM::SamtoolsBasedAssemblyDbi::getReadsByRow");
619     return nullptr;
620 }
621 
getReadsByName(const U2DataId & assemblyId,const QByteArray & name,U2OpStatus & os)622 U2DbiIterator<U2AssemblyRead> *SamtoolsBasedAssemblyDbi::getReadsByName(const U2DataId &assemblyId, const QByteArray &name, U2OpStatus &os) {
623     int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
624     CHECK_OP(os, nullptr);
625     U2Region targetReg = this->getCorrectRegion(assemblyId, U2_REGION_MAX, os);
626     return new SamtoolsBasedReadsIterator(id, targetReg, dbi, name);
627 }
628 
getMaxEndPos(const U2DataId & assemblyId,U2OpStatus & os)629 qint64 SamtoolsBasedAssemblyDbi::getMaxEndPos(const U2DataId &assemblyId, U2OpStatus &os) {
630     int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
631     CHECK_OP(os, 0);
632 
633     const bam_header_t *header = dbi.getHeader();
634     CHECK_EXT(nullptr != header, os.setError("NULL header"), 0);
635     CHECK_EXT(id < header->n_targets, os.setError("Unknown assembly id"), 0);
636 
637     qint64 targetLength = header->target_len[id];
638     // Avoid returning '-1' (object-not-found) for empty assemblies.
639     return targetLength == 0 ? 0 : targetLength - 1;
640 }
641 
getCorrectRegion(const U2DataId & assemblyId,const U2Region & r,U2OpStatus & os)642 U2Region SamtoolsBasedAssemblyDbi::getCorrectRegion(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os) {
643     qint64 assemblyLength = getMaxEndPos(assemblyId, os) + 1;
644     CHECK_OP(os, U2Region());
645     qint64 startPos = r.startPos;
646     qint64 endPos = r.endPos() - 1;
647 
648     U2Region outOfRangeRegion(assemblyLength + 1, 0);
649 
650     if (startPos < 0) {
651         startPos = 0;
652     } else if (startPos >= assemblyLength) {
653         return outOfRangeRegion;
654     }
655     if (endPos < 0) {
656         return outOfRangeRegion;
657     } else if (endPos >= assemblyLength) {
658         endPos = assemblyLength - 1;
659     }
660 
661     qint64 length = endPos - startPos + 1;
662     CHECK(length >= 0, outOfRangeRegion);
663 
664     U2Region result(startPos, length);
665     return result;
666 }
667 
668 /************************************************************************/
669 /* SamtoolsBasedAttributeDbi */
670 /************************************************************************/
SamtoolsBasedAttributeDbi(SamtoolsBasedDbi & _dbi)671 SamtoolsBasedAttributeDbi::SamtoolsBasedAttributeDbi(SamtoolsBasedDbi &_dbi)
672     : U2SimpleAttributeDbi(&_dbi), dbi(_dbi) {
673 }
674 
getAvailableAttributeNames(U2OpStatus &)675 QStringList SamtoolsBasedAttributeDbi::getAvailableAttributeNames(U2OpStatus & /*os*/) {
676     QStringList result;
677     result << U2BaseAttributeName::reference_length;
678 
679     return result;
680 }
681 
getObjectAttributes(const U2DataId & objectId,const QString & attributeName,U2OpStatus &)682 QList<U2DataId> SamtoolsBasedAttributeDbi::getObjectAttributes(const U2DataId &objectId, const QString &attributeName, U2OpStatus & /*os*/) {
683     QList<U2DataId> result;
684     if (attributeName.isEmpty()) {
685         result << objectId + ATTRIBUTE_SEP + U2BaseAttributeName::reference_length.toLatin1();
686     } else if (U2BaseAttributeName::reference_length == attributeName) {
687         result << objectId + ATTRIBUTE_SEP + U2BaseAttributeName::reference_length.toLatin1();
688     }
689 
690     return result;
691 }
692 
getObjectPairAttributes(const U2DataId &,const U2DataId &,const QString &,U2OpStatus &)693 QList<U2DataId> SamtoolsBasedAttributeDbi::getObjectPairAttributes(const U2DataId & /*objectId*/, const U2DataId & /*childId*/, const QString & /*attributeName*/, U2OpStatus & /*os*/) {
694     return QList<U2DataId>();
695 }
696 
getIntegerAttribute(const U2DataId & attributeId,U2OpStatus & os)697 U2IntegerAttribute SamtoolsBasedAttributeDbi::getIntegerAttribute(const U2DataId &attributeId, U2OpStatus &os) {
698     U2IntegerAttribute result;
699     QString idStr = attributeId;
700     QStringList tokens = idStr.split(ATTRIBUTE_SEP);
701     CHECK(2 == tokens.size(), result);
702 
703     QString attrName = tokens[1];
704     if (U2BaseAttributeName::reference_length == attrName) {
705         U2DataId objIdStr = U2DataId(tokens[0].toLatin1());
706         int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(objIdStr, os);
707         CHECK_OP(os, result);
708 
709         const bam_header_t *header = dbi.getHeader();
710         CHECK_EXT(nullptr != header, os.setError("NULL header"), result);
711         CHECK_EXT(id < header->n_targets, os.setError("Unknown assembly id"), result);
712         qint64 length = header->target_len[id];
713         result = U2IntegerAttribute(objIdStr, U2BaseAttributeName::reference_length, length);
714         result.id = attributeId;
715         result.value = length;
716     }
717 
718     return result;
719 }
720 
getRealAttribute(const U2DataId &,U2OpStatus &)721 U2RealAttribute SamtoolsBasedAttributeDbi::getRealAttribute(const U2DataId & /*attributeId*/, U2OpStatus & /*os*/) {
722     return U2RealAttribute();
723 }
724 
getStringAttribute(const U2DataId &,U2OpStatus &)725 U2StringAttribute SamtoolsBasedAttributeDbi::getStringAttribute(const U2DataId & /*attributeId*/, U2OpStatus & /*os*/) {
726     return U2StringAttribute();
727 }
728 
getByteArrayAttribute(const U2DataId &,U2OpStatus &)729 U2ByteArrayAttribute SamtoolsBasedAttributeDbi::getByteArrayAttribute(const U2DataId & /*attributeId*/, U2OpStatus & /*os*/) {
730     return U2ByteArrayAttribute();
731 }
732 
sort(const U2DbiSortConfig &,qint64,qint64,U2OpStatus & os)733 QList<U2DataId> SamtoolsBasedAttributeDbi::sort(const U2DbiSortConfig & /*sc*/, qint64 /*offset*/, qint64 /*count*/, U2OpStatus &os) {
734     U2DbiUtils::logNotSupported(U2DbiFeature_WriteAttributes, getRootDbi(), os);
735     return QList<U2DataId>();
736 }
737 
738 /************************************************************************/
739 /* SamtoolsBasedDbiFactory */
740 /************************************************************************/
741 const QString SamtoolsBasedDbiFactory::ID = BAM_DBI_ID;
742 
SamtoolsBasedDbiFactory()743 SamtoolsBasedDbiFactory::SamtoolsBasedDbiFactory()
744     : U2DbiFactory() {
745 }
746 
createDbi()747 U2Dbi *SamtoolsBasedDbiFactory::createDbi() {
748     return new SamtoolsBasedDbi();
749 }
750 
getId() const751 U2DbiFactoryId SamtoolsBasedDbiFactory::getId() const {
752     return ID;
753 }
754 
isValidDbi(const QHash<QString,QString> & properties,const QByteArray & rawData,U2OpStatus &) const755 FormatCheckResult SamtoolsBasedDbiFactory::isValidDbi(const QHash<QString, QString> &properties, const QByteArray &rawData, U2OpStatus & /*os*/) const {
756     BAMFormatUtils f;
757     FormatCheckResult res = f.checkRawData(rawData, properties.value(U2DbiOptions::U2_DBI_OPTION_URL));
758     return res;
759 }
760 
isDbiExists(const U2DbiId & id) const761 bool SamtoolsBasedDbiFactory::isDbiExists(const U2DbiId &id) const {
762     return QFile::exists(id);
763 }
764 
765 }  // namespace BAM
766 }  // namespace U2
767