1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "CalculateCoveragePerBaseTask.h"
23
24 #include <U2Core/U2AssemblyDbi.h>
25 #include <U2Core/U2AssemblyUtils.h>
26 #include <U2Core/U2AttributeDbi.h>
27 #include <U2Core/U2AttributeUtils.h>
28 #include <U2Core/U2CoreAttributes.h>
29 #include <U2Core/U2DbiUtils.h>
30 #include <U2Core/U2SafePoints.h>
31
32 namespace U2 {
33
CalculateCoveragePerBaseOnRegionTask(const U2DbiRef & dbiRef,const U2DataId & assemblyId,const U2Region & region)34 CalculateCoveragePerBaseOnRegionTask::CalculateCoveragePerBaseOnRegionTask(const U2DbiRef &dbiRef, const U2DataId &assemblyId, const U2Region ®ion)
35 : Task(tr("Calculate coverage per base for assembly on region (%1, %2)").arg(region.startPos).arg(region.endPos()), TaskFlag_None),
36 dbiRef(dbiRef),
37 assemblyId(assemblyId),
38 region(region),
39 results(new QVector<CoveragePerBaseInfo>) {
40 SAFE_POINT_EXT(dbiRef.isValid(), setError(tr("Invalid database reference")), );
41 SAFE_POINT_EXT(!assemblyId.isEmpty(), setError(tr("Invalid assembly ID")), );
42 }
43
~CalculateCoveragePerBaseOnRegionTask()44 CalculateCoveragePerBaseOnRegionTask::~CalculateCoveragePerBaseOnRegionTask() {
45 delete results;
46 }
47
run()48 void CalculateCoveragePerBaseOnRegionTask::run() {
49 DbiConnection con(dbiRef, stateInfo);
50 CHECK_OP(stateInfo, );
51 U2AssemblyDbi *assemblyDbi = con.dbi->getAssemblyDbi();
52 SAFE_POINT_EXT(nullptr != assemblyDbi, setError(tr("Assembly DBI is NULL")), );
53
54 results->resize(region.length);
55
56 QScopedPointer<U2DbiIterator<U2AssemblyRead>> readsIterator(assemblyDbi->getReads(assemblyId, region, stateInfo));
57 while (readsIterator->hasNext()) {
58 const U2AssemblyRead read = readsIterator->next();
59 processRead(read);
60 CHECK_OP(stateInfo, );
61 }
62 }
63
getRegion() const64 const U2Region &CalculateCoveragePerBaseOnRegionTask::getRegion() const {
65 return region;
66 }
67
takeResult()68 QVector<CoveragePerBaseInfo> *CalculateCoveragePerBaseOnRegionTask::takeResult() {
69 QVector<CoveragePerBaseInfo> *result = results;
70 results = nullptr;
71 return result;
72 }
73
processRead(const U2AssemblyRead & read)74 void CalculateCoveragePerBaseOnRegionTask::processRead(const U2AssemblyRead &read) {
75 const qint64 startPos = qMax(read->leftmostPos, region.startPos);
76 const qint64 endPos = qMin(read->leftmostPos + read->effectiveLen, region.endPos());
77 const U2Region regionToProcess = U2Region(startPos, endPos - startPos);
78
79 // we have used effective length of the read, so insertions/deletions are already taken into account
80 // cigarVector can be longer than needed
81 QVector<U2CigarOp> cigarVector;
82 foreach (const U2CigarToken &cigar, read->cigar) {
83 cigarVector += QVector<U2CigarOp>(cigar.count, cigar.op);
84 }
85
86 if (read->leftmostPos < regionToProcess.startPos) {
87 cigarVector = cigarVector.mid(regionToProcess.startPos - read->leftmostPos); // cut unneeded cigar string
88 }
89
90 for (int positionOffset = 0, cigarOffset = 0, deletionsCount = 0, insertionsCount = 0; regionToProcess.startPos + positionOffset < regionToProcess.endPos(); positionOffset++) {
91 char currentBase = 'N';
92 CoveragePerBaseInfo &info = (*results)[regionToProcess.startPos + positionOffset - region.startPos];
93 const U2CigarOp cigarOp = nextCigarOp(cigarVector, cigarOffset, insertionsCount);
94 CHECK_OP(stateInfo, );
95
96 switch (cigarOp) {
97 case U2CigarOp_I:
98 case U2CigarOp_S:
99 // skip the insertion
100 continue;
101 case U2CigarOp_D:
102 // skip the deletion
103 deletionsCount++;
104 continue;
105 case U2CigarOp_N:
106 // skip the deletion
107 deletionsCount++;
108 continue;
109 default:
110 currentBase = read->readSequence[positionOffset - deletionsCount + insertionsCount];
111 break;
112 }
113 info.basesCount[currentBase] = info.basesCount[currentBase] + 1;
114 info.coverage++;
115 }
116 }
117
nextCigarOp(const QVector<U2CigarOp> & cigarVector,int & index,int & insertionsCount)118 U2CigarOp CalculateCoveragePerBaseOnRegionTask::nextCigarOp(const QVector<U2CigarOp> &cigarVector, int &index, int &insertionsCount) {
119 U2CigarOp cigarOp = U2CigarOp_Invalid;
120
121 do {
122 SAFE_POINT_EXT(index < cigarVector.length(), setError(tr("Cigar string: out of bounds")), U2CigarOp_Invalid);
123 cigarOp = cigarVector[index];
124 index++;
125
126 if (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp) {
127 insertionsCount++;
128 }
129 } while (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp || U2CigarOp_P == cigarOp);
130
131 return cigarOp;
132 }
133
CalculateCoveragePerBaseTask(const U2DbiRef & _dbiRef,const U2DataId & _assemblyId)134 CalculateCoveragePerBaseTask::CalculateCoveragePerBaseTask(const U2DbiRef &_dbiRef, const U2DataId &_assemblyId)
135 : Task(tr("Calculate coverage per base for assembly"), TaskFlags_NR_FOSE_COSC),
136 dbiRef(_dbiRef),
137 assemblyId(_assemblyId),
138 getLengthTask(nullptr) {
139 SAFE_POINT_EXT(dbiRef.isValid(), setError(tr("Invalid database reference")), );
140 SAFE_POINT_EXT(!assemblyId.isEmpty(), setError(tr("Invalid assembly ID")), );
141 }
142
~CalculateCoveragePerBaseTask()143 CalculateCoveragePerBaseTask::~CalculateCoveragePerBaseTask() {
144 qDeleteAll(results.values());
145 }
146
prepare()147 void CalculateCoveragePerBaseTask::prepare() {
148 getLengthTask = new GetAssemblyLengthTask(dbiRef, assemblyId);
149 addSubTask(getLengthTask);
150 }
151
onSubTaskFinished(Task * subTask)152 QList<Task *> CalculateCoveragePerBaseTask::onSubTaskFinished(Task *subTask) {
153 QList<Task *> res;
154 CHECK_OP(stateInfo, res);
155
156 if (subTask == getLengthTask) {
157 const qint64 length = getLengthTask->getAssemblyLength();
158 qint64 tasksCount = length / MAX_REGION_LENGTH + (length % MAX_REGION_LENGTH > 0 ? 1 : 0);
159 for (qint64 i = 0; i < tasksCount; i++) {
160 const U2Region region(i * MAX_REGION_LENGTH, (i == tasksCount - 1 ? length % MAX_REGION_LENGTH : MAX_REGION_LENGTH));
161 res.append(new CalculateCoveragePerBaseOnRegionTask(dbiRef, assemblyId, region));
162 }
163 } else {
164 CalculateCoveragePerBaseOnRegionTask *calculateTask = qobject_cast<CalculateCoveragePerBaseOnRegionTask *>(subTask);
165 SAFE_POINT_EXT(nullptr != calculateTask, setError(tr("An unexpected subtask")), res);
166
167 results.insert(calculateTask->getRegion().startPos, calculateTask->takeResult());
168 emit si_regionIsProcessed(calculateTask->getRegion().startPos);
169 }
170
171 return res;
172 }
173
isResultReady(qint64 startPos) const174 bool CalculateCoveragePerBaseTask::isResultReady(qint64 startPos) const {
175 return results.contains(startPos);
176 }
177
areThereUnprocessedResults() const178 bool CalculateCoveragePerBaseTask::areThereUnprocessedResults() const {
179 return !results.isEmpty();
180 }
181
takeResult(qint64 startPos)182 QVector<CoveragePerBaseInfo> *CalculateCoveragePerBaseTask::takeResult(qint64 startPos) {
183 QVector<CoveragePerBaseInfo> *result = results.value(startPos, nullptr);
184 results.remove(startPos);
185 return result;
186 }
187
run()188 void GetAssemblyLengthTask::run() {
189 DbiConnection con(dbiRef, stateInfo);
190 CHECK_OP(stateInfo, );
191 U2AttributeDbi *attributeDbi = con.dbi->getAttributeDbi();
192 SAFE_POINT_EXT(nullptr != attributeDbi, setError(tr("Attribute DBI is NULL")), );
193
194 const U2IntegerAttribute lengthAttribute = U2AttributeUtils::findIntegerAttribute(attributeDbi, assemblyId, U2BaseAttributeName::reference_length, stateInfo);
195 CHECK_OP(stateInfo, );
196 CHECK_EXT(lengthAttribute.hasValidId(), setError(tr("Can't get the assembly length: attribute is missing")), );
197 CHECK_EXT(lengthAttribute.value > 0, setError(tr("Assembly length must be greater than zero")), );
198
199 length = lengthAttribute.value;
200 }
201 } // namespace U2
202