1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "MsaRowUtils.h"
23
24 #include <U2Core/DNASequence.h>
25 #include <U2Core/MultipleSequenceAlignment.h>
26 #include <U2Core/U2OpStatus.h>
27 #include <U2Core/U2Region.h>
28 #include <U2Core/U2SafePoints.h>
29
30 namespace U2 {
31
getRowLength(const QByteArray & seq,const U2MsaRowGapModel & gaps)32 int MsaRowUtils::getRowLength(const QByteArray &seq, const U2MsaRowGapModel &gaps) {
33 return seq.length() + getGapsLength(gaps);
34 }
35
getGapsLength(const U2MsaRowGapModel & gaps)36 int MsaRowUtils::getGapsLength(const U2MsaRowGapModel &gaps) {
37 int length = 0;
38 foreach (const U2MsaGap &elt, gaps) {
39 length += elt.gap;
40 }
41 return length;
42 }
43
charAt(const QByteArray & seq,const U2MsaRowGapModel & gaps,int pos)44 char MsaRowUtils::charAt(const QByteArray &seq, const U2MsaRowGapModel &gaps, int pos) {
45 if (pos < 0 || pos >= getRowLength(seq, gaps)) {
46 return U2Msa::GAP_CHAR;
47 }
48
49 int gapsLength = 0;
50 foreach (const U2MsaGap &gap, gaps) {
51 // Current gap is somewhere further in the row
52 if (gap.offset > pos) {
53 break;
54 }
55 // Inside the gap
56 else if ((pos >= gap.offset) && (pos < gap.offset + gap.gap)) {
57 return U2Msa::GAP_CHAR;
58 }
59 // Go further in the row, calculating the current gaps length
60 else {
61 gapsLength += gap.gap;
62 }
63 }
64
65 if (pos >= gapsLength + seq.length()) {
66 return U2Msa::GAP_CHAR;
67 }
68
69 int index = pos - gapsLength;
70 bool indexIsInBounds = (index < seq.length()) && (index >= 0);
71
72 SAFE_POINT(indexIsInBounds,
73 QString("Internal error detected in MultipleSequenceAlignmentRow::charAt,"
74 " row length is '%1', gapsLength is '%2'!")
75 .arg(getRowLength(seq, gaps))
76 .arg(index),
77 U2Msa::GAP_CHAR);
78 return seq[index];
79 }
80
getRowLengthWithoutTrailing(const QByteArray & seq,const U2MsaRowGapModel & gaps)81 qint64 MsaRowUtils::getRowLengthWithoutTrailing(const QByteArray &seq, const U2MsaRowGapModel &gaps) {
82 int rowLength = getRowLength(seq, gaps);
83 int rowLengthWithoutTrailingGap = rowLength;
84 if (!gaps.isEmpty()) {
85 if (U2Msa::GAP_CHAR == charAt(seq, gaps, rowLength - 1)) {
86 U2MsaGap lastGap = gaps.last();
87 rowLengthWithoutTrailingGap -= lastGap.gap;
88 }
89 }
90 return rowLengthWithoutTrailingGap;
91 }
92
getRowLengthWithoutTrailing(qint64 dataLength,const U2MsaRowGapModel & gaps)93 qint64 MsaRowUtils::getRowLengthWithoutTrailing(qint64 dataLength, const U2MsaRowGapModel &gaps) {
94 qint64 gappedDataLength = dataLength;
95 foreach (const U2MsaGap &gap, gaps) {
96 if (gap.offset > gappedDataLength) {
97 break;
98 }
99 gappedDataLength += gap.gap;
100 }
101 return gappedDataLength;
102 }
103
getUngappedPosition(const U2MsaRowGapModel & gaps,qint64 dataLength,qint64 position,bool allowGapInPos)104 qint64 MsaRowUtils::getUngappedPosition(const U2MsaRowGapModel &gaps, qint64 dataLength, qint64 position, bool allowGapInPos) {
105 if (isGap(dataLength, gaps, position) && !allowGapInPos) {
106 return -1;
107 }
108
109 int gapsLength = 0;
110 foreach (const U2MsaGap &gap, gaps) {
111 if (gap.offset < position) {
112 if (allowGapInPos) {
113 gapsLength += (gap.offset + gap.gap < position) ? gap.gap : gap.gap - (gap.offset + gap.gap - position);
114 } else {
115 gapsLength += gap.gap;
116 }
117 } else {
118 break;
119 }
120 }
121
122 return position - gapsLength;
123 }
124
getGappedRegion(const U2MsaRowGapModel & gaps,const U2Region & ungappedRegion)125 U2Region MsaRowUtils::getGappedRegion(const U2MsaRowGapModel &gaps, const U2Region &ungappedRegion) {
126 U2Region result(ungappedRegion);
127 foreach (const U2MsaGap &gap, gaps) {
128 if (gap.offset <= result.startPos) { // leading gaps
129 result.startPos += gap.gap;
130 } else if (gap.offset > result.startPos && gap.offset < result.endPos()) { // inner gaps
131 result.length += gap.gap;
132 } else { // trailing
133 break;
134 }
135 }
136 return result;
137 }
138
getUngappedRegion(const U2MsaRowGapModel & gaps,const U2Region & selection)139 U2Region MsaRowUtils::getUngappedRegion(const U2MsaRowGapModel &gaps, const U2Region &selection) {
140 int shiftStartPos = 0;
141 int decreaseLength = 0;
142 foreach (const U2MsaGap &gap, gaps) {
143 if (gap.endPos() < selection.startPos) {
144 shiftStartPos += gap.gap;
145 } else if (gap.offset < selection.startPos && gap.offset + gap.gap >= selection.startPos) {
146 shiftStartPos = selection.startPos - gap.offset;
147 decreaseLength += gap.offset + gap.gap - selection.startPos;
148 } else if (gap.offset < selection.endPos() && gap.offset >= selection.startPos) {
149 if (gap.endPos() >= selection.endPos()) {
150 decreaseLength += selection.endPos() - gap.offset;
151 } else {
152 decreaseLength += gap.gap;
153 }
154 } else if (gap.offset <= selection.startPos && gap.offset + gap.gap >= selection.endPos()) {
155 return U2Region(0, 0);
156 } else {
157 break;
158 }
159 }
160 U2Region result(selection.startPos - shiftStartPos, selection.length - decreaseLength);
161 SAFE_POINT(result.startPos >= 0, "Error with calculation ungapped region", U2Region(0, 0));
162 SAFE_POINT(result.length > 0, "Error with calculation ungapped region", U2Region(0, 0));
163 return result;
164 }
165
getCoreStart(const U2MsaRowGapModel & gaps)166 int MsaRowUtils::getCoreStart(const U2MsaRowGapModel &gaps) {
167 if (!gaps.isEmpty() && gaps.first().offset == 0) {
168 return gaps.first().gap;
169 }
170 return 0;
171 }
172
insertGaps(U2OpStatus & os,U2MsaRowGapModel & gaps,int rowLengthWithoutTrailing,int position,int count)173 void MsaRowUtils::insertGaps(U2OpStatus &os, U2MsaRowGapModel &gaps, int rowLengthWithoutTrailing, int position, int count) {
174 SAFE_POINT_EXT(0 <= count, os.setError(QString("Internal error: incorrect parameters were passed to MsaRowUtils::insertGaps, "
175 "pos '%1', count '%2'")
176 .arg(position)
177 .arg(count)), );
178 CHECK(0 <= position && position < rowLengthWithoutTrailing, );
179
180 if (0 == position) {
181 addOffsetToGapModel(gaps, count);
182 } else {
183 const int dataLength = rowLengthWithoutTrailing - getGapsLength(gaps);
184 if (isGap(dataLength, gaps, position) || isGap(dataLength, gaps, position - 1)) {
185 // A gap is near
186 // Find the gaps and append 'count' gaps to it
187 // Shift all gaps that further in the row
188 for (int i = 0; i < gaps.count(); ++i) {
189 if (position >= gaps[i].offset) {
190 if (position <= gaps[i].offset + gaps[i].gap) {
191 gaps[i].gap += count;
192 }
193 } else {
194 gaps[i].offset += count;
195 }
196 }
197 } else {
198 // Insert between chars
199 bool found = false;
200
201 int indexGreaterGaps = 0;
202 for (int i = 0; i < gaps.count(); ++i) {
203 if (position > gaps[i].offset + gaps[i].gap) {
204 continue;
205 } else {
206 found = true;
207 U2MsaGap newGap(position, count);
208 gaps.insert(i, newGap);
209 indexGreaterGaps = i;
210 break;
211 }
212 }
213
214 // If found somewhere between existent gaps
215 if (found) {
216 // Shift further gaps
217 for (int i = indexGreaterGaps + 1; i < gaps.count(); ++i) {
218 gaps[i].offset += count;
219 }
220 } else {
221 // This is the last gap
222 U2MsaGap newGap(position, count);
223 gaps.append(newGap);
224 return;
225 }
226 }
227 }
228 }
229
removeGaps(U2OpStatus & os,U2MsaRowGapModel & gaps,int rowLengthWithoutTrailing,int position,int count)230 void MsaRowUtils::removeGaps(U2OpStatus &os, U2MsaRowGapModel &gaps, int rowLengthWithoutTrailing, int position, int count) {
231 SAFE_POINT_EXT(0 <= position && 0 <= count, os.setError(QString("Internal error: incorrect parameters were passed to MsaRowUtils::removeGaps, "
232 "pos '%1', count '%2'")
233 .arg(position)
234 .arg(count)), );
235 CHECK(position <= rowLengthWithoutTrailing, );
236
237 QList<U2MsaGap> newGapModel;
238 int endRegionPos = position + count; // non-inclusive
239 foreach (U2MsaGap gap, gaps) {
240 qint64 gapEnd = gap.offset + gap.gap;
241 if (gapEnd < position) {
242 newGapModel << gap;
243 } else if (gapEnd <= endRegionPos) {
244 if (gap.offset < position) {
245 gap.gap = position - gap.offset;
246 newGapModel << gap;
247 }
248 // Otherwise just remove the gap (do not write to the new gap model)
249 } else {
250 if (gap.offset < position) {
251 gap.gap -= count;
252 SAFE_POINT(gap.gap >= 0, "Non-positive gap length", );
253 newGapModel << gap;
254 } else if (gap.offset < endRegionPos) {
255 gap.gap = gapEnd - endRegionPos;
256 gap.offset = position;
257 SAFE_POINT(gap.gap > 0, "Non-positive gap length", );
258 SAFE_POINT(gap.offset >= 0, "Negative gap offset", );
259 newGapModel << gap;
260 } else {
261 // Shift the gap
262 gap.offset -= count;
263 SAFE_POINT(gap.offset >= 0, "Negative gap offset", );
264 newGapModel << gap;
265 }
266 }
267 }
268
269 gaps = newGapModel;
270 }
271
addOffsetToGapModel(U2MsaRowGapModel & gapModel,int offset)272 void MsaRowUtils::addOffsetToGapModel(U2MsaRowGapModel &gapModel, int offset) {
273 if (0 == offset) {
274 return;
275 }
276
277 if (!gapModel.isEmpty()) {
278 U2MsaGap &firstGap = gapModel[0];
279 if (0 == firstGap.offset) {
280 firstGap.gap += offset;
281 } else {
282 SAFE_POINT(offset >= 0, "Negative gap offset", );
283 U2MsaGap beginningGap(0, offset);
284 gapModel.insert(0, beginningGap);
285 }
286
287 // Shift other gaps
288 if (gapModel.count() > 1) {
289 for (int i = 1; i < gapModel.count(); ++i) {
290 qint64 newOffset = gapModel[i].offset + offset;
291 SAFE_POINT(newOffset >= 0, "Negative gap offset", );
292 gapModel[i].offset = newOffset;
293 }
294 }
295 } else {
296 SAFE_POINT(offset >= 0, "Negative gap offset", );
297 U2MsaGap gap(0, offset);
298 gapModel.append(gap);
299 }
300 }
301
shiftGapModel(U2MsaRowGapModel & gapModel,int shiftSize)302 void MsaRowUtils::shiftGapModel(U2MsaRowGapModel &gapModel, int shiftSize) {
303 CHECK(!gapModel.isEmpty(), );
304 CHECK(shiftSize != 0, );
305 CHECK(-shiftSize <= gapModel.first().offset, );
306 for (int i = 0; i < gapModel.size(); i++) {
307 gapModel[i].offset += shiftSize;
308 }
309 }
310
isGap(int dataLength,const U2MsaRowGapModel & gapModel,int position)311 bool MsaRowUtils::isGap(int dataLength, const U2MsaRowGapModel &gapModel, int position) {
312 int gapsLength = 0;
313 foreach (const U2MsaGap &gap, gapModel) {
314 if (gap.offset <= position && position < gap.offset + gap.gap) {
315 return true;
316 }
317 if (position < gap.offset) {
318 return false;
319 }
320 gapsLength += gap.gap;
321 }
322
323 if (dataLength + gapsLength <= position) {
324 return true;
325 }
326
327 return false;
328 }
329
isLeadingOrTrailingGap(int dataLength,const U2MsaRowGapModel & gapModel,int position)330 bool MsaRowUtils::isLeadingOrTrailingGap(int dataLength, const U2MsaRowGapModel &gapModel, int position) {
331 if (gapModel.isEmpty()) {
332 return false;
333 }
334 if (gapModel[0].offset == 0 && position < gapModel[0].endPos()) {
335 return true; // leading gap.
336 }
337 int totalGapsLen = 0;
338 for (const U2MsaGap &gap : qAsConst(gapModel)) {
339 totalGapsLen += gap.gap;
340 if (position < gap.offset) {
341 return false; // somewhere in the middle.
342 }
343 }
344 return position >= dataLength + totalGapsLen; // trailing gap.
345 }
346
chopGapModel(U2MsaRowGapModel & gapModel,qint64 maxLength)347 void MsaRowUtils::chopGapModel(U2MsaRowGapModel &gapModel, qint64 maxLength) {
348 chopGapModel(gapModel, U2Region(0, maxLength));
349 }
350
chopGapModel(U2MsaRowGapModel & gapModel,const U2Region & boundRegion)351 void MsaRowUtils::chopGapModel(U2MsaRowGapModel &gapModel, const U2Region &boundRegion) {
352 // Remove gaps after the region
353 while (!gapModel.isEmpty() && gapModel.last().offset >= boundRegion.endPos()) {
354 gapModel.removeLast();
355 }
356
357 if (!gapModel.isEmpty() && gapModel.last().endPos() > boundRegion.endPos()) {
358 gapModel.last().gap = boundRegion.endPos() - gapModel.last().offset;
359 }
360
361 // Remove gaps before the region
362 qint64 removedGapsLength = 0;
363 while (!gapModel.isEmpty() && gapModel.first().endPos() < boundRegion.startPos) {
364 removedGapsLength += gapModel.first().gap;
365 gapModel.removeFirst();
366 }
367
368 if (!gapModel.isEmpty() && gapModel.first().offset < boundRegion.startPos) {
369 removedGapsLength += boundRegion.startPos - gapModel.first().offset;
370 gapModel.first().gap -= boundRegion.startPos - gapModel.first().offset;
371 gapModel.first().offset = boundRegion.startPos;
372 }
373
374 shiftGapModel(gapModel, -removedGapsLength);
375 }
376
joinCharsAndGaps(const DNASequence & sequence,const U2MsaRowGapModel & gapModel,int rowLength,bool keepLeadingGaps,bool keepTrailingGaps)377 QByteArray MsaRowUtils::joinCharsAndGaps(const DNASequence &sequence, const U2MsaRowGapModel &gapModel, int rowLength, bool keepLeadingGaps, bool keepTrailingGaps) {
378 QByteArray bytes = sequence.constSequence();
379 int beginningOffset = 0;
380
381 if (gapModel.isEmpty()) {
382 return bytes;
383 }
384
385 for (int i = 0; i < gapModel.size(); ++i) {
386 QByteArray gapsBytes;
387 if (!keepLeadingGaps && (0 == gapModel[i].offset)) {
388 beginningOffset = gapModel[i].gap;
389 continue;
390 }
391
392 gapsBytes.fill(U2Msa::GAP_CHAR, gapModel[i].gap);
393 bytes.insert(gapModel[i].offset - beginningOffset, gapsBytes);
394 }
395
396 if (keepTrailingGaps && (bytes.size() < rowLength)) {
397 QByteArray gapsBytes;
398 gapsBytes.fill(U2Msa::GAP_CHAR, rowLength - bytes.size());
399 bytes.append(gapsBytes);
400 }
401
402 return bytes;
403 }
404
405 namespace {
406
getNextGap(QListIterator<U2MsaGap> & mainGapModelIterator,QListIterator<U2MsaGap> & additionalGapModelIterator,qint64 & gapsFromMainModelLength)407 U2MsaGap getNextGap(QListIterator<U2MsaGap> &mainGapModelIterator, QListIterator<U2MsaGap> &additionalGapModelIterator, qint64 &gapsFromMainModelLength) {
408 SAFE_POINT(mainGapModelIterator.hasNext() || additionalGapModelIterator.hasNext(), "Out of gap models boundaries", U2MsaGap());
409
410 if (!mainGapModelIterator.hasNext()) {
411 U2MsaGap gap = additionalGapModelIterator.next();
412 gap.offset += gapsFromMainModelLength;
413 return gap;
414 }
415
416 if (!additionalGapModelIterator.hasNext()) {
417 const U2MsaGap mainGap = mainGapModelIterator.next();
418 gapsFromMainModelLength += mainGap.gap;
419 return mainGap;
420 }
421
422 const U2MsaGap mainGap = mainGapModelIterator.peekNext();
423 const U2MsaGap additionalGap = additionalGapModelIterator.peekNext();
424 const U2MsaGap intersection = mainGap.intersect(additionalGap);
425
426 if (intersection.isValid()) {
427 const U2MsaGap unitedGap = U2MsaGap(qMin(mainGap.offset, additionalGap.offset + gapsFromMainModelLength), mainGap.gap + additionalGap.gap);
428 gapsFromMainModelLength += mainGap.gap;
429 mainGapModelIterator.next();
430 additionalGapModelIterator.next();
431 return unitedGap;
432 }
433
434 if (mainGap.offset <= additionalGap.offset + gapsFromMainModelLength) {
435 gapsFromMainModelLength += mainGap.gap;
436 mainGapModelIterator.next();
437 return mainGap;
438 } else {
439 U2MsaGap shiftedAdditionalGap = additionalGapModelIterator.next();
440 shiftedAdditionalGap.offset += gapsFromMainModelLength;
441 return shiftedAdditionalGap;
442 }
443 }
444
445 } // namespace
446
insertGapModel(const U2MsaRowGapModel & mainGapModel,const U2MsaRowGapModel & additionalGapModel)447 U2MsaRowGapModel MsaRowUtils::insertGapModel(const U2MsaRowGapModel &mainGapModel, const U2MsaRowGapModel &additionalGapModel) {
448 U2MsaRowGapModel mergedGapModel;
449 QListIterator<U2MsaGap> mainGapModelIterator(mainGapModel);
450 QListIterator<U2MsaGap> additionalGapModelIterator(additionalGapModel);
451 qint64 gapsFromMainModelLength = 0;
452 while (mainGapModelIterator.hasNext() || additionalGapModelIterator.hasNext()) {
453 mergedGapModel << getNextGap(mainGapModelIterator, additionalGapModelIterator, gapsFromMainModelLength);
454 }
455 mergeConsecutiveGaps(mergedGapModel);
456 return mergedGapModel;
457 }
458
mergeConsecutiveGaps(U2MsaRowGapModel & gapModel)459 void MsaRowUtils::mergeConsecutiveGaps(U2MsaRowGapModel &gapModel) {
460 CHECK(!gapModel.isEmpty(), );
461 QList<U2MsaGap> newGapModel;
462
463 newGapModel << gapModel[0];
464 int indexInNewGapModel = 0;
465 for (int i = 1; i < gapModel.count(); ++i) {
466 const qint64 previousGapEnd = newGapModel[indexInNewGapModel].offset + newGapModel[indexInNewGapModel].gap - 1;
467 const qint64 currectGapStart = gapModel[i].offset;
468 SAFE_POINT(currectGapStart > previousGapEnd, "Incorrect gap model during merging consecutive gaps", );
469 if (currectGapStart == previousGapEnd + 1) {
470 // Merge gaps
471 const qint64 newGapLength = newGapModel[indexInNewGapModel].gap + gapModel[i].gap;
472 SAFE_POINT(newGapLength > 0, "Non-positive gap length", )
473 newGapModel[indexInNewGapModel].gap = newGapLength;
474 } else {
475 // Add the gap to the list
476 newGapModel << gapModel[i];
477 indexInNewGapModel++;
478 }
479 }
480 gapModel = newGapModel;
481 }
482
483 namespace {
484
hasIntersection(const U2MsaGap & gap1,const U2MsaGap & gap2)485 bool hasIntersection(const U2MsaGap &gap1, const U2MsaGap &gap2) {
486 return gap1.offset < gap2.endPos() && gap2.offset < gap1.endPos();
487 }
488
489 // Moves the iterator that points to the less gap
490 // returns true, if step was successfully done
491 // returns false, if the end is already reached
stepForward(QMutableListIterator<U2MsaGap> & firstIterator,QMutableListIterator<U2MsaGap> & secondIterator)492 bool stepForward(QMutableListIterator<U2MsaGap> &firstIterator, QMutableListIterator<U2MsaGap> &secondIterator) {
493 CHECK(firstIterator.hasNext(), false);
494 CHECK(secondIterator.hasNext(), false);
495 const U2MsaGap &firstGap = firstIterator.peekNext();
496 const U2MsaGap &secondGap = secondIterator.peekNext();
497 if (firstGap.offset <= secondGap.offset) {
498 firstIterator.next();
499 } else {
500 secondIterator.next();
501 }
502 return true;
503 }
504
505 // forward iterators to the state, when the next values have an intersection
506 // returns true if there an intersection was found, otherwise return false
forwardToIntersection(QMutableListIterator<U2MsaGap> & firstIterator,QMutableListIterator<U2MsaGap> & secondIterator)507 bool forwardToIntersection(QMutableListIterator<U2MsaGap> &firstIterator, QMutableListIterator<U2MsaGap> &secondIterator) {
508 bool endReached = false;
509 while (!hasIntersection(firstIterator.peekNext(), secondIterator.peekNext())) {
510 endReached = !stepForward(firstIterator, secondIterator);
511 if (endReached) {
512 break;
513 }
514 }
515 return !endReached;
516 }
517
subGap(const U2MsaGap & subFrom,const U2MsaGap & subWhat)518 QPair<U2MsaGap, U2MsaGap> subGap(const U2MsaGap &subFrom, const U2MsaGap &subWhat) {
519 QPair<U2MsaGap, U2MsaGap> result;
520 if (subFrom.offset < subWhat.offset) {
521 result.first = U2MsaGap(subFrom.offset, subWhat.offset - subFrom.offset);
522 }
523 if (subFrom.endPos() > subWhat.endPos()) {
524 result.second = U2MsaGap(subWhat.endPos(), subFrom.endPos() - subWhat.endPos());
525 }
526 return result;
527 }
528
removeCommonPart(QMutableListIterator<U2MsaGap> & iterator,const U2MsaGap & commonPart)529 void removeCommonPart(QMutableListIterator<U2MsaGap> &iterator, const U2MsaGap &commonPart) {
530 const QPair<U2MsaGap, U2MsaGap> gapDifference = subGap(iterator.peekNext(), commonPart);
531 if (gapDifference.second.isValid()) {
532 iterator.peekNext() = gapDifference.second;
533 }
534 if (gapDifference.first.isValid()) {
535 iterator.insert(gapDifference.first);
536 }
537 if (!gapDifference.first.isValid() && !gapDifference.second.isValid()) {
538 iterator.next();
539 iterator.remove();
540 }
541 }
542
543 // extracts a common part from the next values, a difference between values is written back to the models
extractCommonPart(QMutableListIterator<U2MsaGap> & firstIterator,QMutableListIterator<U2MsaGap> & secondIterator)544 U2MsaGap extractCommonPart(QMutableListIterator<U2MsaGap> &firstIterator, QMutableListIterator<U2MsaGap> &secondIterator) {
545 SAFE_POINT(firstIterator.hasNext() && secondIterator.hasNext(), "Out of gap model boundaries", U2MsaGap());
546 U2MsaGap &firstGap = firstIterator.peekNext();
547 U2MsaGap &secondGap = secondIterator.peekNext();
548
549 const U2MsaGap commonPart = firstGap.intersect(secondGap);
550 SAFE_POINT(commonPart.isValid(), "Gaps don't have an intersection", U2MsaGap());
551 removeCommonPart(firstIterator, commonPart);
552 removeCommonPart(secondIterator, commonPart);
553
554 return commonPart;
555 }
556
557 } // namespace
558
getGapModelsDifference(const U2MsaRowGapModel & firstGapModel,const U2MsaRowGapModel & secondGapModel,U2MsaRowGapModel & commonPart,U2MsaRowGapModel & firstDifference,U2MsaRowGapModel & secondDifference)559 void MsaRowUtils::getGapModelsDifference(const U2MsaRowGapModel &firstGapModel, const U2MsaRowGapModel &secondGapModel, U2MsaRowGapModel &commonPart, U2MsaRowGapModel &firstDifference, U2MsaRowGapModel &secondDifference) {
560 commonPart.clear();
561 firstDifference = firstGapModel;
562 QMutableListIterator<U2MsaGap> firstIterator(firstDifference);
563 secondDifference = secondGapModel;
564 QMutableListIterator<U2MsaGap> secondIterator(secondDifference);
565
566 while (firstIterator.hasNext() && secondIterator.hasNext()) {
567 const bool intersectionFound = forwardToIntersection(firstIterator, secondIterator);
568 if (!intersectionFound) {
569 break;
570 }
571 commonPart << extractCommonPart(firstIterator, secondIterator);
572 }
573 mergeConsecutiveGaps(commonPart);
574 }
575
576 namespace {
577
insertGap(U2MsaRowGapModel & gapModel,const U2MsaGap & gap)578 void insertGap(U2MsaRowGapModel &gapModel, const U2MsaGap &gap) {
579 for (int i = 0; i < gapModel.size(); i++) {
580 if (gapModel[i].endPos() < gap.offset) {
581 // search the proper location
582 continue;
583 } else if (gapModel[i].offset > gap.endPos()) {
584 // no intersection, just insert
585 gapModel.insert(i, gap);
586 } else {
587 // there is an intersection
588 gapModel[i].offset = qMin(gapModel[i].offset, gap.offset);
589 gapModel[i].setEndPos(qMax(gapModel[i].endPos(), gap.endPos()));
590 int gapsToRemove = 0;
591 for (int j = i + 1; j < gapModel.size(); j++) {
592 if (gapModel[j].endPos() <= gapModel[i].endPos()) {
593 // this gap is fully covered by a new gap, just remove
594 gapsToRemove++;
595 } else if (gapModel[j].offset <= gapModel[i].endPos()) {
596 // this gap is partially covered by a new gap, enlarge the new gap and remove
597 gapModel[i].setEndPos(qMax(gapModel[i].endPos(), gapModel[j].endPos()));
598 gapsToRemove++;
599 } else {
600 break;
601 }
602 }
603
604 gapModel.erase(gapModel.begin() + i + 1, gapModel.begin() + i + gapsToRemove + 1);
605 }
606 }
607 }
608
subtitudeGap(QMutableListIterator<U2MsaGap> & minuendIterator,QMutableListIterator<U2MsaGap> & subtrahendIterator)609 void subtitudeGap(QMutableListIterator<U2MsaGap> &minuendIterator, QMutableListIterator<U2MsaGap> &subtrahendIterator) {
610 const QPair<U2MsaGap, U2MsaGap> substitutionResult = subGap(minuendIterator.next(), subtrahendIterator.peekNext());
611 minuendIterator.remove();
612
613 if (substitutionResult.second.isValid()) {
614 minuendIterator.insert(substitutionResult.second);
615 minuendIterator.previous();
616 }
617
618 if (substitutionResult.first.isValid()) {
619 minuendIterator.insert(substitutionResult.first);
620 minuendIterator.previous();
621 }
622 }
623
624 } // namespace
625
mergeGapModels(const U2MsaListGapModel & gapModels)626 U2MsaRowGapModel MsaRowUtils::mergeGapModels(const U2MsaListGapModel &gapModels) {
627 U2MsaRowGapModel mergedGapModel;
628 for (const U2MsaRowGapModel &gapModel : qAsConst(gapModels)) {
629 for (const U2MsaGap &gap : qAsConst(gapModel)) {
630 insertGap(mergedGapModel, gap);
631 }
632 }
633 return mergedGapModel;
634 }
635
subtitudeGapModel(const U2MsaRowGapModel & minuendGapModel,const U2MsaRowGapModel & subtrahendGapModel)636 U2MsaRowGapModel MsaRowUtils::subtitudeGapModel(const U2MsaRowGapModel &minuendGapModel, const U2MsaRowGapModel &subtrahendGapModel) {
637 U2MsaRowGapModel result = minuendGapModel;
638 U2MsaRowGapModel subtrahendGapModelCopy = subtrahendGapModel;
639 QMutableListIterator<U2MsaGap> minuendIterator(result);
640 QMutableListIterator<U2MsaGap> subtrahendIterator(subtrahendGapModelCopy);
641
642 while (minuendIterator.hasNext() && subtrahendIterator.hasNext()) {
643 const bool intersectionFound = forwardToIntersection(minuendIterator, subtrahendIterator);
644 if (!intersectionFound) {
645 break;
646 }
647 subtitudeGap(minuendIterator, subtrahendIterator);
648 }
649
650 return minuendGapModel;
651 }
652
reverseGapModel(const U2MsaRowGapModel & gapModel,qint64 rowLengthWithoutTrailing)653 U2MsaRowGapModel MsaRowUtils::reverseGapModel(const U2MsaRowGapModel &gapModel, qint64 rowLengthWithoutTrailing) {
654 U2MsaRowGapModel reversedGapModel = gapModel;
655
656 foreach (const U2MsaGap &gap, gapModel) {
657 if (rowLengthWithoutTrailing - gap.endPos() < 0) {
658 Q_ASSERT(false); // original model has gaps out of range or trailing gaps
659 continue;
660 }
661 reversedGapModel.prepend(U2MsaGap(rowLengthWithoutTrailing - gap.offset, gap.gap));
662 }
663
664 if (hasLeadingGaps(gapModel)) {
665 reversedGapModel.removeLast();
666 reversedGapModel.prepend(gapModel.first());
667 }
668
669 return reversedGapModel;
670 }
671
hasLeadingGaps(const U2MsaRowGapModel & gapModel)672 bool MsaRowUtils::hasLeadingGaps(const U2MsaRowGapModel &gapModel) {
673 return !gapModel.isEmpty() && gapModel.first().offset == 0;
674 }
675
removeTrailingGapsFromModel(qint64 length,U2MsaRowGapModel & gapModel)676 void MsaRowUtils::removeTrailingGapsFromModel(qint64 length, U2MsaRowGapModel &gapModel) {
677 for (int i = 0; i < gapModel.size(); i++) {
678 const U2MsaGap &gap = gapModel.at(i);
679 if (gap.offset >= length) {
680 while (gapModel.size() > i) {
681 gapModel.removeLast();
682 }
683 } else {
684 length += gap.gap;
685 }
686 }
687 }
688
689 } // namespace U2
690