1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include <SamtoolsAdapter.h>
23
24 #include <QFile>
25 #include <QFileInfo>
26
27 #include <U2Core/IOAdapterUtils.h>
28 #include <U2Core/U2AssemblyUtils.h>
29 #include <U2Core/U2CoreAttributes.h>
30 #include <U2Core/U2DbiRegistry.h>
31 #include <U2Core/U2OpStatusUtils.h>
32 #include <U2Core/U2SafePoints.h>
33
34 #include <U2Formats/BAMUtils.h>
35
36 #include "BAMDbiPlugin.h"
37 #include "BAMFormat.h"
38 #include "Exception.h"
39 #include "IOException.h"
40 #include "SamtoolsBasedDbi.h"
41
42 namespace U2 {
43 namespace BAM {
44
45 static const QByteArray ATTRIBUTE_SEP(":~!ugene-attribute!~:");
46
47 /************************************************************************/
48 /* SamtoolsBasedDbi */
49 /************************************************************************/
SamtoolsBasedDbi()50 SamtoolsBasedDbi::SamtoolsBasedDbi()
51 : U2AbstractDbi(SamtoolsBasedDbiFactory::ID), assembliesCount(0), bamHandler(nullptr), header(nullptr), index(nullptr) {
52 }
53
~SamtoolsBasedDbi()54 SamtoolsBasedDbi::~SamtoolsBasedDbi() {
55 this->cleanup();
56 }
57
shutdown(U2OpStatus &)58 QVariantMap SamtoolsBasedDbi::shutdown(U2OpStatus & /*os*/) {
59 cleanup();
60 return QVariantMap();
61 }
62
init(const QHash<QString,QString> & properties,const QVariantMap &,U2OpStatus & os)63 void SamtoolsBasedDbi::init(const QHash<QString, QString> &properties, const QVariantMap & /*persistentData*/, U2OpStatus &os) {
64 try {
65 if (U2DbiState_Void != state) {
66 throw Exception(BAMDbiPlugin::tr("Invalid DBI state"));
67 }
68 state = U2DbiState_Starting;
69 if (properties.value(U2DbiOptions::U2_DBI_OPTION_URL).isEmpty()) {
70 throw Exception(BAMDbiPlugin::tr("URL is not specified"));
71 }
72 url = GUrl(properties.value(U2DbiOptions::U2_DBI_OPTION_URL));
73 if (!url.isLocalFile()) {
74 throw Exception(BAMDbiPlugin::tr("Non-local files are not supported"));
75 }
76 bool sorted = BAMUtils::isSortedBam(url, os);
77 CHECK_OP_EXT(os, throw Exception(os.getError()), );
78 if (!sorted) {
79 throw Exception("Only indexed sorted BAM files could be used by this DBI");
80 }
81
82 bool ok = this->initBamStructures(url);
83 if (!ok) {
84 throw Exception(BAMDbiPlugin::tr("Can't build index for: %1").arg(url.getURLString()));
85 }
86
87 assembliesCount = header->n_targets;
88 assemblyDbi.reset(new SamtoolsBasedAssemblyDbi(*this));
89 attributeDbi.reset(new SamtoolsBasedAttributeDbi(*this));
90 createObjectDbi();
91
92 initProperties = properties;
93 features.insert(U2DbiFeature_ReadSequence);
94 features.insert(U2DbiFeature_ReadAssembly);
95 features.insert(U2DbiFeature_ReadAttributes);
96 dbiId = url.getURLString();
97 state = U2DbiState_Ready;
98 } catch (const Exception &e) {
99 os.setError(e.getMessage());
100 this->cleanup();
101 }
102 }
103
initBamStructures(const GUrl & fileName)104 bool SamtoolsBasedDbi::initBamStructures(const GUrl &fileName) {
105 QByteArray urlBA = fileName.getURLString().toLocal8Bit();
106 const char *url = urlBA.constData();
107 bamHandler = bam_open(url, "r");
108 if (nullptr == bamHandler) {
109 throw IOException(BAMDbiPlugin::tr("Can't open file '%1'").arg(url));
110 }
111
112 bool indexed = BAMUtils::hasValidBamIndex(fileName);
113 if (indexed) {
114 index = bam_index_load(url);
115 } else {
116 throw Exception("Only indexed sorted BAM files could be used by this DBI");
117 }
118 if (nullptr == index) {
119 throw IOException(BAMDbiPlugin::tr("Can't load index file for '%1'").arg(url));
120 }
121
122 header = bam_header_read(bamHandler);
123 if (nullptr == header) {
124 throw IOException(BAMDbiPlugin::tr("Can't read header from file '%1'").arg(url));
125 }
126 return true;
127 }
128
cleanup()129 void SamtoolsBasedDbi::cleanup() {
130 assemblyDbi.reset();
131 objectDbi.reset();
132 attributeDbi.reset();
133 if (nullptr != header) {
134 bam_header_destroy(header);
135 header = nullptr;
136 }
137 if (nullptr != index) {
138 bam_index_destroy(index);
139 index = nullptr;
140 }
141 if (nullptr != bamHandler) {
142 bam_close(bamHandler);
143 bamHandler = nullptr;
144 }
145 state = U2DbiState_Void;
146 }
147
createObjectDbi()148 void SamtoolsBasedDbi::createObjectDbi() {
149 QList<U2DataId> assemblyObjectIds;
150 for (int i = 0; i < header->n_targets; i++) {
151 assemblyObjectIds << QByteArray::number(i);
152 }
153 objectDbi.reset(new SamtoolsBasedObjectDbi(*this, assemblyObjectIds));
154 }
155
getEntityTypeById(const U2DataId & id) const156 U2DataType SamtoolsBasedDbi::getEntityTypeById(const U2DataId &id) const {
157 QString idStr = id;
158 if (idStr.endsWith(ATTRIBUTE_SEP + U2BaseAttributeName::reference_length)) {
159 return U2Type::AttributeInteger;
160 }
161 CHECK(!idStr.isEmpty(), U2Type::Unknown);
162
163 U2OpStatusImpl os;
164 int dbId = SamtoolsBasedAssemblyDbi::toSamtoolsId(id, os);
165 CHECK_OP(os, U2Type::Unknown);
166
167 if (dbId <= assembliesCount) {
168 return U2Type::Assembly;
169 } else {
170 return U2Type::Unknown;
171 }
172 }
173
getBamFile() const174 bamFile SamtoolsBasedDbi::getBamFile() const {
175 return bamHandler;
176 }
177
getHeader() const178 const bam_header_t *SamtoolsBasedDbi::getHeader() const {
179 return header;
180 }
181
getIndex() const182 const bam_index_t *SamtoolsBasedDbi::getIndex() const {
183 return index;
184 }
185
getAssemblyDbi()186 U2AssemblyDbi *SamtoolsBasedDbi::getAssemblyDbi() {
187 if (U2DbiState_Ready == state) {
188 return assemblyDbi.data();
189 } else {
190 return nullptr;
191 }
192 }
193
getObjectDbi()194 U2ObjectDbi *SamtoolsBasedDbi::getObjectDbi() {
195 if (U2DbiState_Ready == state) {
196 return objectDbi.data();
197 } else {
198 return nullptr;
199 }
200 }
201
getAttributeDbi()202 U2AttributeDbi *SamtoolsBasedDbi::getAttributeDbi() {
203 if (U2DbiState_Ready == state) {
204 return attributeDbi.data();
205 } else {
206 return nullptr;
207 }
208 }
209
isReadOnly() const210 bool SamtoolsBasedDbi::isReadOnly() const {
211 return !QFileInfo(url.getURLString()).permission(QFile::WriteUser);
212 }
213
214 /************************************************************************/
215 /* SamtoolsBasedObjectDbi */
216 /************************************************************************/
SamtoolsBasedObjectDbi(SamtoolsBasedDbi & dbi,const QList<U2DataId> & assemblyObjectIds)217 SamtoolsBasedObjectDbi::SamtoolsBasedObjectDbi(SamtoolsBasedDbi &dbi, const QList<U2DataId> &assemblyObjectIds)
218 : U2SimpleObjectDbi(&dbi), dbi(dbi), assemblyObjectIds(assemblyObjectIds) {
219 }
220
countObjects(U2OpStatus & os)221 qint64 SamtoolsBasedObjectDbi::countObjects(U2OpStatus &os) {
222 return countObjects(U2Type::Assembly, os);
223 }
224
countObjects(U2DataType type,U2OpStatus & os)225 qint64 SamtoolsBasedObjectDbi::countObjects(U2DataType type, U2OpStatus &os) {
226 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
227 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
228 0);
229
230 if (U2Type::Assembly == type) {
231 return assemblyObjectIds.size();
232 } else {
233 return 0;
234 }
235 }
236
getObjectNames(qint64,qint64,U2OpStatus & os)237 QHash<U2DataId, QString> SamtoolsBasedObjectDbi::getObjectNames(qint64 /*offset*/, qint64 /*count*/, U2OpStatus &os) {
238 QHash<U2DataId, QString> result;
239 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
240 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
241 result);
242
243 return result;
244 }
245
getObject(U2Object & object,const U2DataId & id,U2OpStatus & os)246 void SamtoolsBasedObjectDbi::getObject(U2Object &object, const U2DataId &id, U2OpStatus &os) {
247 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
248 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")), );
249
250 CHECK_EXT(assemblyObjectIds.contains(id), os.setError(BAMDbiPlugin::tr("Object not found")), );
251 object = dbi.getAssemblyDbi()->getAssemblyObject(id, os);
252 }
253
getObjects(qint64 offset,qint64 count,U2OpStatus & os)254 QList<U2DataId> SamtoolsBasedObjectDbi::getObjects(qint64 offset, qint64 count, U2OpStatus &os) {
255 return getObjects(U2Type::Assembly, offset, count, os);
256 }
257
getObjects(U2DataType type,qint64 offset,qint64 count,U2OpStatus & os)258 QList<U2DataId> SamtoolsBasedObjectDbi::getObjects(U2DataType type, qint64 offset, qint64 count, U2OpStatus &os) {
259 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
260 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
261 QList<U2DataId>());
262
263 if (U2Type::Assembly == type) {
264 qint64 lastExc = offset + count;
265 if (U2DbiOptions::U2_DBI_NO_LIMIT == count) {
266 lastExc = assemblyObjectIds.size();
267 }
268 QList<U2DataId> result = assemblyObjectIds.mid(offset, lastExc);
269 return result;
270 } else {
271 return QList<U2DataId>();
272 }
273 }
274
getParents(const U2DataId &,U2OpStatus & os)275 QList<U2DataId> SamtoolsBasedObjectDbi::getParents(const U2DataId & /*entityId*/, U2OpStatus &os) {
276 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
277 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
278 QList<U2DataId>());
279 return QList<U2DataId>();
280 }
281
getFolders(U2OpStatus & os)282 QStringList SamtoolsBasedObjectDbi::getFolders(U2OpStatus &os) {
283 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
284 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
285 QStringList());
286 return QStringList(U2ObjectDbi::ROOT_FOLDER);
287 }
288
countObjects(const QString & folder,U2OpStatus & os)289 qint64 SamtoolsBasedObjectDbi::countObjects(const QString &folder, U2OpStatus &os) {
290 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
291 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
292 0);
293
294 CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
295 os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
296 0);
297
298 return countObjects(os);
299 }
300
getObjects(const QString & folder,qint64 offset,qint64 count,U2OpStatus & os)301 QList<U2DataId> SamtoolsBasedObjectDbi::getObjects(const QString &folder, qint64 offset, qint64 count, U2OpStatus &os) {
302 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
303 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
304 QList<U2DataId>());
305
306 CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
307 os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
308 QList<U2DataId>());
309
310 return getObjects(offset, count, os);
311 }
312
getObjectFolders(const U2DataId & objectId,U2OpStatus & os)313 QStringList SamtoolsBasedObjectDbi::getObjectFolders(const U2DataId &objectId, U2OpStatus &os) {
314 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
315 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
316 QStringList());
317
318 if (U2Type::Assembly == dbi.getEntityTypeById(objectId)) {
319 return QStringList(U2ObjectDbi::ROOT_FOLDER);
320 } else {
321 return QStringList();
322 }
323 }
324
getObjectVersion(const U2DataId &,U2OpStatus & os)325 qint64 SamtoolsBasedObjectDbi::getObjectVersion(const U2DataId & /*objectId*/, U2OpStatus &os) {
326 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
327 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
328 0);
329
330 return 0;
331 }
332
getFolderLocalVersion(const QString & folder,U2OpStatus & os)333 qint64 SamtoolsBasedObjectDbi::getFolderLocalVersion(const QString &folder, U2OpStatus &os) {
334 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
335 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
336 0);
337
338 CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
339 os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
340 0);
341
342 return 0;
343 }
344
getFolderGlobalVersion(const QString & folder,U2OpStatus & os)345 qint64 SamtoolsBasedObjectDbi::getFolderGlobalVersion(const QString &folder, U2OpStatus &os) {
346 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
347 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
348 0);
349
350 CHECK_EXT(U2ObjectDbi::ROOT_FOLDER == folder,
351 os.setError(BAMDbiPlugin::tr("No such folder: %1").arg(folder)),
352 0);
353
354 return 0;
355 }
356
getObjectsByVisualName(const QString &,U2DataType,U2OpStatus &)357 U2DbiIterator<U2DataId> *SamtoolsBasedObjectDbi::getObjectsByVisualName(const QString &, U2DataType, U2OpStatus &) {
358 // TODO:
359 return nullptr;
360 }
361
renameObject(const U2DataId &,const QString &,U2OpStatus & os)362 void SamtoolsBasedObjectDbi::renameObject(const U2DataId & /*id*/, const QString & /*newName*/, U2OpStatus &os) {
363 os.setError("Not implemented!");
364 }
365
setObjectRank(const U2DataId &,U2DbiObjectRank,U2OpStatus & os)366 void SamtoolsBasedObjectDbi::setObjectRank(const U2DataId & /*objectId*/, U2DbiObjectRank /*newRank*/, U2OpStatus &os) {
367 os.setError("Not implemented!");
368 }
369
getObjectRank(const U2DataId &,U2OpStatus & os)370 U2DbiObjectRank SamtoolsBasedObjectDbi::getObjectRank(const U2DataId & /*objectId*/, U2OpStatus &os) {
371 os.setError("Not implemented!");
372 return U2DbiObjectRank_TopLevel;
373 }
374
setParent(const U2DataId &,const U2DataId &,U2OpStatus & os)375 void SamtoolsBasedObjectDbi::setParent(const U2DataId & /*parentId*/, const U2DataId & /*childId*/, U2OpStatus &os) {
376 os.setError("Not implemented!");
377 }
378
379 /************************************************************************/
380 /* SamtoolsBasedReadsIterator */
381 /************************************************************************/
382 const int SamtoolsBasedReadsIterator::BUFFERED_INTERVAL_SIZE = 1000;
383
SamtoolsBasedReadsIterator(int assemblyId,const U2Region & _r,SamtoolsBasedDbi & _dbi,const QByteArray & _nameFilter)384 SamtoolsBasedReadsIterator::SamtoolsBasedReadsIterator(
385 int assemblyId,
386 const U2Region &_r,
387 SamtoolsBasedDbi &_dbi,
388 const QByteArray &_nameFilter)
389 : U2DbiIterator<U2AssemblyRead>(), assemblyId(assemblyId), dbi(_dbi), nameFilter(_nameFilter) {
390 current = reads.begin();
391 bool errorRegion = false;
392 qint64 startPos = _r.startPos;
393 qint64 endPos = _r.endPos() - 1;
394
395 // region must be between 0 and INT_MAX
396 if (startPos < 0) {
397 startPos = 0;
398 } else if (startPos > INT_MAX) {
399 startPos = INT_MAX;
400 errorRegion = true;
401 }
402 if (endPos < 0) {
403 endPos = 0;
404 errorRegion = true;
405 } else if (endPos > INT_MAX) {
406 endPos = INT_MAX;
407 }
408
409 qint64 length = endPos - startPos + 1;
410 r = U2Region(startPos, length);
411 nextPosToRead = r.startPos;
412
413 SAFE_POINT(!errorRegion, QString("Bad region for samtools reads fetching: %1 - %2").arg(_r.startPos).arg(_r.endPos()), );
414 }
415
hasNext()416 bool SamtoolsBasedReadsIterator::hasNext() {
417 applyNameFilter();
418
419 bool fetch = false;
420 if (reads.isEmpty()) {
421 fetch = true;
422 } else if (reads.end() == current) {
423 fetch = true;
424 }
425 if (!fetch) {
426 return true;
427 }
428
429 reads.clear();
430 current = reads.begin();
431 qint64 endPosExc = r.endPos();
432 while (reads.isEmpty() && nextPosToRead < endPosExc) {
433 fetchNextChunk();
434 applyNameFilter();
435 }
436
437 if (!reads.isEmpty()) {
438 return true;
439 }
440 return false;
441 }
442
applyNameFilter()443 void SamtoolsBasedReadsIterator::applyNameFilter() {
444 if (nameFilter.isEmpty()) {
445 return;
446 }
447 while (current != reads.end()) {
448 if ((*current)->name == nameFilter) {
449 return;
450 }
451 current++;
452 }
453 if (current == reads.end()) {
454 reads.clear();
455 current = reads.begin();
456 }
457 }
458
next()459 U2AssemblyRead SamtoolsBasedReadsIterator::next() {
460 if (this->hasNext()) {
461 U2AssemblyRead res = *current;
462 current++;
463 return res;
464 }
465 return U2AssemblyRead();
466 }
467
peek()468 U2AssemblyRead SamtoolsBasedReadsIterator::peek() {
469 if (this->hasNext()) {
470 U2AssemblyRead res = *current;
471 return res;
472 }
473 return U2AssemblyRead();
474 }
475
476 static const int NAME_COL = 0;
477 static const int FLAGS_COL = 1;
478 static const int CIGAR_COL = 5;
479 static const int RNEXT_COL = 6;
480 static const int SEQ_COL = 9;
481 static const int QUAL_COL = 10;
482
bamFetchFunction(const bam1_t * b,void * data)483 int bamFetchFunction(const bam1_t *b, void *data) {
484 SamtoolsBasedReadsIterator *it = (SamtoolsBasedReadsIterator *)data;
485 QList<U2AssemblyRead> &reads = it->reads;
486 SamtoolsBasedDbi &dbi = it->dbi;
487
488 U2AssemblyRead read(new U2AssemblyReadData());
489 {
490 char *samStr = bam_format1(dbi.getHeader(), b);
491 QByteArray samArr(samStr);
492 QList<QByteArray> values = samArr.split('\t');
493
494 read->name = values[NAME_COL];
495 read->flags = values[FLAGS_COL].toLongLong();
496 read->leftmostPos = b->core.pos;
497 read->mappingQuality = b->core.qual;
498 QString error;
499 QList<U2CigarToken> tokens = U2AssemblyUtils::parseCigar(values[CIGAR_COL], error);
500 if (error.isEmpty()) {
501 read->cigar = tokens;
502 }
503 read->readSequence = values[SEQ_COL];
504 if ("*" != values[QUAL_COL]) {
505 read->quality = values[QUAL_COL];
506 }
507 read->effectiveLen = Alignment::computeLength(read->cigar);
508 delete[] samStr;
509 read->id = read->name + ";" + QByteArray::number(read->leftmostPos) + ";" + QByteArray::number(read->effectiveLen);
510 read->rnext = values[RNEXT_COL];
511 read->pnext = b->core.mpos;
512 QByteArray auxStr((const char *)bam1_aux(b), b->l_aux);
513 read->aux = SamtoolsAdapter::string2aux(auxStr);
514 }
515
516 // add new border intersected reads
517 qint64 endPos = read->leftmostPos + read->effectiveLen;
518 if (endPos >= (qint64)it->nextPosToRead) {
519 it->newBorderReadIds << read->id;
520 }
521
522 if (!it->borderReadIds.contains(read->id)) {
523 reads.append(read);
524 }
525 return 0;
526 }
527
fetchNextChunk()528 void SamtoolsBasedReadsIterator::fetchNextChunk() {
529 bamFile bam = dbi.getBamFile();
530 const bam_index_t *idx = dbi.getIndex();
531 SAFE_POINT_EXT(nullptr != bam, nextPosToRead = INT_MAX, );
532 SAFE_POINT_EXT(nullptr != idx, nextPosToRead = INT_MAX, );
533
534 void *data = (void *)(this);
535 borderReadIds = newBorderReadIds;
536 newBorderReadIds.clear();
537 int startPos = (int)nextPosToRead;
538 int endPos = (int)(nextPosToRead + BUFFERED_INTERVAL_SIZE);
539 nextPosToRead += BUFFERED_INTERVAL_SIZE;
540 bam_fetch(bam, idx, assemblyId, startPos, endPos, data, bamFetchFunction);
541
542 current = reads.begin();
543 }
544
545 /************************************************************************/
546 /* SamtoolsBasedAssemblyDbi */
547 /************************************************************************/
SamtoolsBasedAssemblyDbi(SamtoolsBasedDbi & dbi)548 SamtoolsBasedAssemblyDbi::SamtoolsBasedAssemblyDbi(SamtoolsBasedDbi &dbi)
549 : U2SimpleAssemblyDbi(&dbi), dbi(dbi) {
550 }
551
toSamtoolsId(const U2DataId & assemblyId,U2OpStatus & os)552 int SamtoolsBasedAssemblyDbi::toSamtoolsId(const U2DataId &assemblyId, U2OpStatus &os) {
553 bool ok = false;
554 int dbId = assemblyId.toInt(&ok);
555 if (!ok) {
556 os.setError(QString("Incorrect samtools assembly id: %1").arg(assemblyId.data()));
557 }
558 return dbId;
559 }
560
getAssemblyObject(const U2DataId & id,U2OpStatus & os)561 U2Assembly SamtoolsBasedAssemblyDbi::getAssemblyObject(const U2DataId &id, U2OpStatus &os) {
562 CHECK_EXT(U2DbiState_Ready == dbi.getState(),
563 os.setError(BAMDbiPlugin::tr("Invalid samtools DBI state")),
564 U2Assembly());
565
566 const bam_header_t *header = dbi.getHeader();
567 SAFE_POINT(nullptr != header, "NULL BAM header", U2Assembly());
568
569 CHECK_EXT(U2Type::Assembly == dbi.getEntityTypeById(id),
570 os.setError(BAMDbiPlugin::tr("The specified object is not an assembly")),
571 U2Assembly());
572
573 int dbId = SamtoolsBasedAssemblyDbi::toSamtoolsId(id, os);
574 CHECK_OP(os, U2Assembly());
575 CHECK(dbId < header->n_targets, U2Assembly());
576
577 U2Assembly result;
578 result.id = id;
579 result.dbiId = dbi.getDbiId();
580 result.visualName = header->target_name[dbId];
581
582 return result;
583 }
584
bamCountFunction(const bam1_t *,void * data)585 int bamCountFunction(const bam1_t * /*b*/, void *data) {
586 qint64 *count = (qint64 *)data;
587 (*count)++;
588 return 0;
589 }
590
countReads(const U2DataId & assemblyId,const U2Region & r,U2OpStatus & os)591 qint64 SamtoolsBasedAssemblyDbi::countReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os) {
592 int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
593 CHECK_OP(os, 0);
594
595 qint64 result = 0;
596 void *data = &result;
597 U2Region targetReg = this->getCorrectRegion(assemblyId, r, os);
598 CHECK_OP(os, 0);
599 qint64 endPos = targetReg.endPos() - 1;
600 bam_fetch(dbi.getBamFile(), dbi.getIndex(), id, (int)targetReg.startPos, (int)endPos, data, bamCountFunction);
601
602 return result;
603 }
604
getReads(const U2DataId & assemblyId,const U2Region & r,U2OpStatus & os,bool)605 U2DbiIterator<U2AssemblyRead> *SamtoolsBasedAssemblyDbi::getReads(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os, bool /*sortedHint*/) {
606 int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
607 CHECK_OP(os, nullptr);
608 U2Region targetReg = this->getCorrectRegion(assemblyId, r, os);
609 return new SamtoolsBasedReadsIterator(id, targetReg, dbi);
610 }
611
getMaxPackedRow(const U2DataId &,const U2Region &,U2OpStatus & os)612 qint64 SamtoolsBasedAssemblyDbi::getMaxPackedRow(const U2DataId &, const U2Region &, U2OpStatus &os) {
613 os.setError("Operation not supported: BAM::SamtoolsBasedAssemblyDbi::getMaxPackedRow");
614 return 0;
615 }
616
getReadsByRow(const U2DataId &,const U2Region &,qint64,qint64,U2OpStatus & os)617 U2DbiIterator<U2AssemblyRead> *SamtoolsBasedAssemblyDbi::getReadsByRow(const U2DataId &, const U2Region &, qint64, qint64, U2OpStatus &os) {
618 os.setError("Operation not supported: BAM::SamtoolsBasedAssemblyDbi::getReadsByRow");
619 return nullptr;
620 }
621
getReadsByName(const U2DataId & assemblyId,const QByteArray & name,U2OpStatus & os)622 U2DbiIterator<U2AssemblyRead> *SamtoolsBasedAssemblyDbi::getReadsByName(const U2DataId &assemblyId, const QByteArray &name, U2OpStatus &os) {
623 int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
624 CHECK_OP(os, nullptr);
625 U2Region targetReg = this->getCorrectRegion(assemblyId, U2_REGION_MAX, os);
626 return new SamtoolsBasedReadsIterator(id, targetReg, dbi, name);
627 }
628
getMaxEndPos(const U2DataId & assemblyId,U2OpStatus & os)629 qint64 SamtoolsBasedAssemblyDbi::getMaxEndPos(const U2DataId &assemblyId, U2OpStatus &os) {
630 int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(assemblyId, os);
631 CHECK_OP(os, 0);
632
633 const bam_header_t *header = dbi.getHeader();
634 CHECK_EXT(nullptr != header, os.setError("NULL header"), 0);
635 CHECK_EXT(id < header->n_targets, os.setError("Unknown assembly id"), 0);
636
637 qint64 targetLength = header->target_len[id];
638 // Avoid returning '-1' (object-not-found) for empty assemblies.
639 return targetLength == 0 ? 0 : targetLength - 1;
640 }
641
getCorrectRegion(const U2DataId & assemblyId,const U2Region & r,U2OpStatus & os)642 U2Region SamtoolsBasedAssemblyDbi::getCorrectRegion(const U2DataId &assemblyId, const U2Region &r, U2OpStatus &os) {
643 qint64 assemblyLength = getMaxEndPos(assemblyId, os) + 1;
644 CHECK_OP(os, U2Region());
645 qint64 startPos = r.startPos;
646 qint64 endPos = r.endPos() - 1;
647
648 U2Region outOfRangeRegion(assemblyLength + 1, 0);
649
650 if (startPos < 0) {
651 startPos = 0;
652 } else if (startPos >= assemblyLength) {
653 return outOfRangeRegion;
654 }
655 if (endPos < 0) {
656 return outOfRangeRegion;
657 } else if (endPos >= assemblyLength) {
658 endPos = assemblyLength - 1;
659 }
660
661 qint64 length = endPos - startPos + 1;
662 CHECK(length >= 0, outOfRangeRegion);
663
664 U2Region result(startPos, length);
665 return result;
666 }
667
668 /************************************************************************/
669 /* SamtoolsBasedAttributeDbi */
670 /************************************************************************/
SamtoolsBasedAttributeDbi(SamtoolsBasedDbi & _dbi)671 SamtoolsBasedAttributeDbi::SamtoolsBasedAttributeDbi(SamtoolsBasedDbi &_dbi)
672 : U2SimpleAttributeDbi(&_dbi), dbi(_dbi) {
673 }
674
getAvailableAttributeNames(U2OpStatus &)675 QStringList SamtoolsBasedAttributeDbi::getAvailableAttributeNames(U2OpStatus & /*os*/) {
676 QStringList result;
677 result << U2BaseAttributeName::reference_length;
678
679 return result;
680 }
681
getObjectAttributes(const U2DataId & objectId,const QString & attributeName,U2OpStatus &)682 QList<U2DataId> SamtoolsBasedAttributeDbi::getObjectAttributes(const U2DataId &objectId, const QString &attributeName, U2OpStatus & /*os*/) {
683 QList<U2DataId> result;
684 if (attributeName.isEmpty()) {
685 result << objectId + ATTRIBUTE_SEP + U2BaseAttributeName::reference_length.toLatin1();
686 } else if (U2BaseAttributeName::reference_length == attributeName) {
687 result << objectId + ATTRIBUTE_SEP + U2BaseAttributeName::reference_length.toLatin1();
688 }
689
690 return result;
691 }
692
getObjectPairAttributes(const U2DataId &,const U2DataId &,const QString &,U2OpStatus &)693 QList<U2DataId> SamtoolsBasedAttributeDbi::getObjectPairAttributes(const U2DataId & /*objectId*/, const U2DataId & /*childId*/, const QString & /*attributeName*/, U2OpStatus & /*os*/) {
694 return QList<U2DataId>();
695 }
696
getIntegerAttribute(const U2DataId & attributeId,U2OpStatus & os)697 U2IntegerAttribute SamtoolsBasedAttributeDbi::getIntegerAttribute(const U2DataId &attributeId, U2OpStatus &os) {
698 U2IntegerAttribute result;
699 QString idStr = attributeId;
700 QStringList tokens = idStr.split(ATTRIBUTE_SEP);
701 CHECK(2 == tokens.size(), result);
702
703 QString attrName = tokens[1];
704 if (U2BaseAttributeName::reference_length == attrName) {
705 U2DataId objIdStr = U2DataId(tokens[0].toLatin1());
706 int id = SamtoolsBasedAssemblyDbi::toSamtoolsId(objIdStr, os);
707 CHECK_OP(os, result);
708
709 const bam_header_t *header = dbi.getHeader();
710 CHECK_EXT(nullptr != header, os.setError("NULL header"), result);
711 CHECK_EXT(id < header->n_targets, os.setError("Unknown assembly id"), result);
712 qint64 length = header->target_len[id];
713 result = U2IntegerAttribute(objIdStr, U2BaseAttributeName::reference_length, length);
714 result.id = attributeId;
715 result.value = length;
716 }
717
718 return result;
719 }
720
getRealAttribute(const U2DataId &,U2OpStatus &)721 U2RealAttribute SamtoolsBasedAttributeDbi::getRealAttribute(const U2DataId & /*attributeId*/, U2OpStatus & /*os*/) {
722 return U2RealAttribute();
723 }
724
getStringAttribute(const U2DataId &,U2OpStatus &)725 U2StringAttribute SamtoolsBasedAttributeDbi::getStringAttribute(const U2DataId & /*attributeId*/, U2OpStatus & /*os*/) {
726 return U2StringAttribute();
727 }
728
getByteArrayAttribute(const U2DataId &,U2OpStatus &)729 U2ByteArrayAttribute SamtoolsBasedAttributeDbi::getByteArrayAttribute(const U2DataId & /*attributeId*/, U2OpStatus & /*os*/) {
730 return U2ByteArrayAttribute();
731 }
732
sort(const U2DbiSortConfig &,qint64,qint64,U2OpStatus & os)733 QList<U2DataId> SamtoolsBasedAttributeDbi::sort(const U2DbiSortConfig & /*sc*/, qint64 /*offset*/, qint64 /*count*/, U2OpStatus &os) {
734 U2DbiUtils::logNotSupported(U2DbiFeature_WriteAttributes, getRootDbi(), os);
735 return QList<U2DataId>();
736 }
737
738 /************************************************************************/
739 /* SamtoolsBasedDbiFactory */
740 /************************************************************************/
741 const QString SamtoolsBasedDbiFactory::ID = BAM_DBI_ID;
742
SamtoolsBasedDbiFactory()743 SamtoolsBasedDbiFactory::SamtoolsBasedDbiFactory()
744 : U2DbiFactory() {
745 }
746
createDbi()747 U2Dbi *SamtoolsBasedDbiFactory::createDbi() {
748 return new SamtoolsBasedDbi();
749 }
750
getId() const751 U2DbiFactoryId SamtoolsBasedDbiFactory::getId() const {
752 return ID;
753 }
754
isValidDbi(const QHash<QString,QString> & properties,const QByteArray & rawData,U2OpStatus &) const755 FormatCheckResult SamtoolsBasedDbiFactory::isValidDbi(const QHash<QString, QString> &properties, const QByteArray &rawData, U2OpStatus & /*os*/) const {
756 BAMFormatUtils f;
757 FormatCheckResult res = f.checkRawData(rawData, properties.value(U2DbiOptions::U2_DBI_OPTION_URL));
758 return res;
759 }
760
isDbiExists(const U2DbiId & id) const761 bool SamtoolsBasedDbiFactory::isDbiExists(const U2DbiId &id) const {
762 return QFile::exists(id);
763 }
764
765 } // namespace BAM
766 } // namespace U2
767