1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "CalculateCoveragePerBaseTask.h"
23 
24 #include <U2Core/U2AssemblyDbi.h>
25 #include <U2Core/U2AssemblyUtils.h>
26 #include <U2Core/U2AttributeDbi.h>
27 #include <U2Core/U2AttributeUtils.h>
28 #include <U2Core/U2CoreAttributes.h>
29 #include <U2Core/U2DbiUtils.h>
30 #include <U2Core/U2SafePoints.h>
31 
32 namespace U2 {
33 
CalculateCoveragePerBaseOnRegionTask(const U2DbiRef & dbiRef,const U2DataId & assemblyId,const U2Region & region)34 CalculateCoveragePerBaseOnRegionTask::CalculateCoveragePerBaseOnRegionTask(const U2DbiRef &dbiRef, const U2DataId &assemblyId, const U2Region &region)
35     : Task(tr("Calculate coverage per base for assembly on region (%1, %2)").arg(region.startPos).arg(region.endPos()), TaskFlag_None),
36       dbiRef(dbiRef),
37       assemblyId(assemblyId),
38       region(region),
39       results(new QVector<CoveragePerBaseInfo>) {
40     SAFE_POINT_EXT(dbiRef.isValid(), setError(tr("Invalid database reference")), );
41     SAFE_POINT_EXT(!assemblyId.isEmpty(), setError(tr("Invalid assembly ID")), );
42 }
43 
~CalculateCoveragePerBaseOnRegionTask()44 CalculateCoveragePerBaseOnRegionTask::~CalculateCoveragePerBaseOnRegionTask() {
45     delete results;
46 }
47 
run()48 void CalculateCoveragePerBaseOnRegionTask::run() {
49     DbiConnection con(dbiRef, stateInfo);
50     CHECK_OP(stateInfo, );
51     U2AssemblyDbi *assemblyDbi = con.dbi->getAssemblyDbi();
52     SAFE_POINT_EXT(nullptr != assemblyDbi, setError(tr("Assembly DBI is NULL")), );
53 
54     results->resize(region.length);
55 
56     QScopedPointer<U2DbiIterator<U2AssemblyRead>> readsIterator(assemblyDbi->getReads(assemblyId, region, stateInfo));
57     while (readsIterator->hasNext()) {
58         const U2AssemblyRead read = readsIterator->next();
59         processRead(read);
60         CHECK_OP(stateInfo, );
61     }
62 }
63 
getRegion() const64 const U2Region &CalculateCoveragePerBaseOnRegionTask::getRegion() const {
65     return region;
66 }
67 
takeResult()68 QVector<CoveragePerBaseInfo> *CalculateCoveragePerBaseOnRegionTask::takeResult() {
69     QVector<CoveragePerBaseInfo> *result = results;
70     results = nullptr;
71     return result;
72 }
73 
processRead(const U2AssemblyRead & read)74 void CalculateCoveragePerBaseOnRegionTask::processRead(const U2AssemblyRead &read) {
75     const qint64 startPos = qMax(read->leftmostPos, region.startPos);
76     const qint64 endPos = qMin(read->leftmostPos + read->effectiveLen, region.endPos());
77     const U2Region regionToProcess = U2Region(startPos, endPos - startPos);
78 
79     // we have used effective length of the read, so insertions/deletions are already taken into account
80     // cigarVector can be longer than needed
81     QVector<U2CigarOp> cigarVector;
82     foreach (const U2CigarToken &cigar, read->cigar) {
83         cigarVector += QVector<U2CigarOp>(cigar.count, cigar.op);
84     }
85 
86     if (read->leftmostPos < regionToProcess.startPos) {
87         cigarVector = cigarVector.mid(regionToProcess.startPos - read->leftmostPos);  // cut unneeded cigar string
88     }
89 
90     for (int positionOffset = 0, cigarOffset = 0, deletionsCount = 0, insertionsCount = 0; regionToProcess.startPos + positionOffset < regionToProcess.endPos(); positionOffset++) {
91         char currentBase = 'N';
92         CoveragePerBaseInfo &info = (*results)[regionToProcess.startPos + positionOffset - region.startPos];
93         const U2CigarOp cigarOp = nextCigarOp(cigarVector, cigarOffset, insertionsCount);
94         CHECK_OP(stateInfo, );
95 
96         switch (cigarOp) {
97             case U2CigarOp_I:
98             case U2CigarOp_S:
99                 // skip the insertion
100                 continue;
101             case U2CigarOp_D:
102                 // skip the deletion
103                 deletionsCount++;
104                 continue;
105             case U2CigarOp_N:
106                 // skip the deletion
107                 deletionsCount++;
108                 continue;
109             default:
110                 currentBase = read->readSequence[positionOffset - deletionsCount + insertionsCount];
111                 break;
112         }
113         info.basesCount[currentBase] = info.basesCount[currentBase] + 1;
114         info.coverage++;
115     }
116 }
117 
nextCigarOp(const QVector<U2CigarOp> & cigarVector,int & index,int & insertionsCount)118 U2CigarOp CalculateCoveragePerBaseOnRegionTask::nextCigarOp(const QVector<U2CigarOp> &cigarVector, int &index, int &insertionsCount) {
119     U2CigarOp cigarOp = U2CigarOp_Invalid;
120 
121     do {
122         SAFE_POINT_EXT(index < cigarVector.length(), setError(tr("Cigar string: out of bounds")), U2CigarOp_Invalid);
123         cigarOp = cigarVector[index];
124         index++;
125 
126         if (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp) {
127             insertionsCount++;
128         }
129     } while (U2CigarOp_I == cigarOp || U2CigarOp_S == cigarOp || U2CigarOp_P == cigarOp);
130 
131     return cigarOp;
132 }
133 
CalculateCoveragePerBaseTask(const U2DbiRef & _dbiRef,const U2DataId & _assemblyId)134 CalculateCoveragePerBaseTask::CalculateCoveragePerBaseTask(const U2DbiRef &_dbiRef, const U2DataId &_assemblyId)
135     : Task(tr("Calculate coverage per base for assembly"), TaskFlags_NR_FOSE_COSC),
136       dbiRef(_dbiRef),
137       assemblyId(_assemblyId),
138       getLengthTask(nullptr) {
139     SAFE_POINT_EXT(dbiRef.isValid(), setError(tr("Invalid database reference")), );
140     SAFE_POINT_EXT(!assemblyId.isEmpty(), setError(tr("Invalid assembly ID")), );
141 }
142 
~CalculateCoveragePerBaseTask()143 CalculateCoveragePerBaseTask::~CalculateCoveragePerBaseTask() {
144     qDeleteAll(results.values());
145 }
146 
prepare()147 void CalculateCoveragePerBaseTask::prepare() {
148     getLengthTask = new GetAssemblyLengthTask(dbiRef, assemblyId);
149     addSubTask(getLengthTask);
150 }
151 
onSubTaskFinished(Task * subTask)152 QList<Task *> CalculateCoveragePerBaseTask::onSubTaskFinished(Task *subTask) {
153     QList<Task *> res;
154     CHECK_OP(stateInfo, res);
155 
156     if (subTask == getLengthTask) {
157         const qint64 length = getLengthTask->getAssemblyLength();
158         qint64 tasksCount = length / MAX_REGION_LENGTH + (length % MAX_REGION_LENGTH > 0 ? 1 : 0);
159         for (qint64 i = 0; i < tasksCount; i++) {
160             const U2Region region(i * MAX_REGION_LENGTH, (i == tasksCount - 1 ? length % MAX_REGION_LENGTH : MAX_REGION_LENGTH));
161             res.append(new CalculateCoveragePerBaseOnRegionTask(dbiRef, assemblyId, region));
162         }
163     } else {
164         CalculateCoveragePerBaseOnRegionTask *calculateTask = qobject_cast<CalculateCoveragePerBaseOnRegionTask *>(subTask);
165         SAFE_POINT_EXT(nullptr != calculateTask, setError(tr("An unexpected subtask")), res);
166 
167         results.insert(calculateTask->getRegion().startPos, calculateTask->takeResult());
168         emit si_regionIsProcessed(calculateTask->getRegion().startPos);
169     }
170 
171     return res;
172 }
173 
isResultReady(qint64 startPos) const174 bool CalculateCoveragePerBaseTask::isResultReady(qint64 startPos) const {
175     return results.contains(startPos);
176 }
177 
areThereUnprocessedResults() const178 bool CalculateCoveragePerBaseTask::areThereUnprocessedResults() const {
179     return !results.isEmpty();
180 }
181 
takeResult(qint64 startPos)182 QVector<CoveragePerBaseInfo> *CalculateCoveragePerBaseTask::takeResult(qint64 startPos) {
183     QVector<CoveragePerBaseInfo> *result = results.value(startPos, nullptr);
184     results.remove(startPos);
185     return result;
186 }
187 
run()188 void GetAssemblyLengthTask::run() {
189     DbiConnection con(dbiRef, stateInfo);
190     CHECK_OP(stateInfo, );
191     U2AttributeDbi *attributeDbi = con.dbi->getAttributeDbi();
192     SAFE_POINT_EXT(nullptr != attributeDbi, setError(tr("Attribute DBI is NULL")), );
193 
194     const U2IntegerAttribute lengthAttribute = U2AttributeUtils::findIntegerAttribute(attributeDbi, assemblyId, U2BaseAttributeName::reference_length, stateInfo);
195     CHECK_OP(stateInfo, );
196     CHECK_EXT(lengthAttribute.hasValidId(), setError(tr("Can't get the assembly length: attribute is missing")), );
197     CHECK_EXT(lengthAttribute.value > 0, setError(tr("Assembly length must be greater than zero")), );
198 
199     length = lengthAttribute.value;
200 }
201 }  // namespace U2
202