1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "MsaRowUtils.h"
23 
24 #include <U2Core/DNASequence.h>
25 #include <U2Core/MultipleSequenceAlignment.h>
26 #include <U2Core/U2OpStatus.h>
27 #include <U2Core/U2Region.h>
28 #include <U2Core/U2SafePoints.h>
29 
30 namespace U2 {
31 
getRowLength(const QByteArray & seq,const U2MsaRowGapModel & gaps)32 int MsaRowUtils::getRowLength(const QByteArray &seq, const U2MsaRowGapModel &gaps) {
33     return seq.length() + getGapsLength(gaps);
34 }
35 
getGapsLength(const U2MsaRowGapModel & gaps)36 int MsaRowUtils::getGapsLength(const U2MsaRowGapModel &gaps) {
37     int length = 0;
38     foreach (const U2MsaGap &elt, gaps) {
39         length += elt.gap;
40     }
41     return length;
42 }
43 
charAt(const QByteArray & seq,const U2MsaRowGapModel & gaps,int pos)44 char MsaRowUtils::charAt(const QByteArray &seq, const U2MsaRowGapModel &gaps, int pos) {
45     if (pos < 0 || pos >= getRowLength(seq, gaps)) {
46         return U2Msa::GAP_CHAR;
47     }
48 
49     int gapsLength = 0;
50     foreach (const U2MsaGap &gap, gaps) {
51         // Current gap is somewhere further in the row
52         if (gap.offset > pos) {
53             break;
54         }
55         // Inside the gap
56         else if ((pos >= gap.offset) && (pos < gap.offset + gap.gap)) {
57             return U2Msa::GAP_CHAR;
58         }
59         // Go further in the row, calculating the current gaps length
60         else {
61             gapsLength += gap.gap;
62         }
63     }
64 
65     if (pos >= gapsLength + seq.length()) {
66         return U2Msa::GAP_CHAR;
67     }
68 
69     int index = pos - gapsLength;
70     bool indexIsInBounds = (index < seq.length()) && (index >= 0);
71 
72     SAFE_POINT(indexIsInBounds,
73                QString("Internal error detected in MultipleSequenceAlignmentRow::charAt,"
74                        " row length is '%1', gapsLength is '%2'!")
75                    .arg(getRowLength(seq, gaps))
76                    .arg(index),
77                U2Msa::GAP_CHAR);
78     return seq[index];
79 }
80 
getRowLengthWithoutTrailing(const QByteArray & seq,const U2MsaRowGapModel & gaps)81 qint64 MsaRowUtils::getRowLengthWithoutTrailing(const QByteArray &seq, const U2MsaRowGapModel &gaps) {
82     int rowLength = getRowLength(seq, gaps);
83     int rowLengthWithoutTrailingGap = rowLength;
84     if (!gaps.isEmpty()) {
85         if (U2Msa::GAP_CHAR == charAt(seq, gaps, rowLength - 1)) {
86             U2MsaGap lastGap = gaps.last();
87             rowLengthWithoutTrailingGap -= lastGap.gap;
88         }
89     }
90     return rowLengthWithoutTrailingGap;
91 }
92 
getRowLengthWithoutTrailing(qint64 dataLength,const U2MsaRowGapModel & gaps)93 qint64 MsaRowUtils::getRowLengthWithoutTrailing(qint64 dataLength, const U2MsaRowGapModel &gaps) {
94     qint64 gappedDataLength = dataLength;
95     foreach (const U2MsaGap &gap, gaps) {
96         if (gap.offset > gappedDataLength) {
97             break;
98         }
99         gappedDataLength += gap.gap;
100     }
101     return gappedDataLength;
102 }
103 
getUngappedPosition(const U2MsaRowGapModel & gaps,qint64 dataLength,qint64 position,bool allowGapInPos)104 qint64 MsaRowUtils::getUngappedPosition(const U2MsaRowGapModel &gaps, qint64 dataLength, qint64 position, bool allowGapInPos) {
105     if (isGap(dataLength, gaps, position) && !allowGapInPos) {
106         return -1;
107     }
108 
109     int gapsLength = 0;
110     foreach (const U2MsaGap &gap, gaps) {
111         if (gap.offset < position) {
112             if (allowGapInPos) {
113                 gapsLength += (gap.offset + gap.gap < position) ? gap.gap : gap.gap - (gap.offset + gap.gap - position);
114             } else {
115                 gapsLength += gap.gap;
116             }
117         } else {
118             break;
119         }
120     }
121 
122     return position - gapsLength;
123 }
124 
getGappedRegion(const U2MsaRowGapModel & gaps,const U2Region & ungappedRegion)125 U2Region MsaRowUtils::getGappedRegion(const U2MsaRowGapModel &gaps, const U2Region &ungappedRegion) {
126     U2Region result(ungappedRegion);
127     foreach (const U2MsaGap &gap, gaps) {
128         if (gap.offset <= result.startPos) {  // leading gaps
129             result.startPos += gap.gap;
130         } else if (gap.offset > result.startPos && gap.offset < result.endPos()) {  // inner gaps
131             result.length += gap.gap;
132         } else {  // trailing
133             break;
134         }
135     }
136     return result;
137 }
138 
getUngappedRegion(const U2MsaRowGapModel & gaps,const U2Region & selection)139 U2Region MsaRowUtils::getUngappedRegion(const U2MsaRowGapModel &gaps, const U2Region &selection) {
140     int shiftStartPos = 0;
141     int decreaseLength = 0;
142     foreach (const U2MsaGap &gap, gaps) {
143         if (gap.endPos() < selection.startPos) {
144             shiftStartPos += gap.gap;
145         } else if (gap.offset < selection.startPos && gap.offset + gap.gap >= selection.startPos) {
146             shiftStartPos = selection.startPos - gap.offset;
147             decreaseLength += gap.offset + gap.gap - selection.startPos;
148         } else if (gap.offset < selection.endPos() && gap.offset >= selection.startPos) {
149             if (gap.endPos() >= selection.endPos()) {
150                 decreaseLength += selection.endPos() - gap.offset;
151             } else {
152                 decreaseLength += gap.gap;
153             }
154         } else if (gap.offset <= selection.startPos && gap.offset + gap.gap >= selection.endPos()) {
155             return U2Region(0, 0);
156         } else {
157             break;
158         }
159     }
160     U2Region result(selection.startPos - shiftStartPos, selection.length - decreaseLength);
161     SAFE_POINT(result.startPos >= 0, "Error with calculation ungapped region", U2Region(0, 0));
162     SAFE_POINT(result.length > 0, "Error with calculation ungapped region", U2Region(0, 0));
163     return result;
164 }
165 
getCoreStart(const U2MsaRowGapModel & gaps)166 int MsaRowUtils::getCoreStart(const U2MsaRowGapModel &gaps) {
167     if (!gaps.isEmpty() && gaps.first().offset == 0) {
168         return gaps.first().gap;
169     }
170     return 0;
171 }
172 
insertGaps(U2OpStatus & os,U2MsaRowGapModel & gaps,int rowLengthWithoutTrailing,int position,int count)173 void MsaRowUtils::insertGaps(U2OpStatus &os, U2MsaRowGapModel &gaps, int rowLengthWithoutTrailing, int position, int count) {
174     SAFE_POINT_EXT(0 <= count, os.setError(QString("Internal error: incorrect parameters were passed to MsaRowUtils::insertGaps, "
175                                                    "pos '%1', count '%2'")
176                                                .arg(position)
177                                                .arg(count)), );
178     CHECK(0 <= position && position < rowLengthWithoutTrailing, );
179 
180     if (0 == position) {
181         addOffsetToGapModel(gaps, count);
182     } else {
183         const int dataLength = rowLengthWithoutTrailing - getGapsLength(gaps);
184         if (isGap(dataLength, gaps, position) || isGap(dataLength, gaps, position - 1)) {
185             // A gap is near
186             // Find the gaps and append 'count' gaps to it
187             // Shift all gaps that further in the row
188             for (int i = 0; i < gaps.count(); ++i) {
189                 if (position >= gaps[i].offset) {
190                     if (position <= gaps[i].offset + gaps[i].gap) {
191                         gaps[i].gap += count;
192                     }
193                 } else {
194                     gaps[i].offset += count;
195                 }
196             }
197         } else {
198             // Insert between chars
199             bool found = false;
200 
201             int indexGreaterGaps = 0;
202             for (int i = 0; i < gaps.count(); ++i) {
203                 if (position > gaps[i].offset + gaps[i].gap) {
204                     continue;
205                 } else {
206                     found = true;
207                     U2MsaGap newGap(position, count);
208                     gaps.insert(i, newGap);
209                     indexGreaterGaps = i;
210                     break;
211                 }
212             }
213 
214             // If found somewhere between existent gaps
215             if (found) {
216                 // Shift further gaps
217                 for (int i = indexGreaterGaps + 1; i < gaps.count(); ++i) {
218                     gaps[i].offset += count;
219                 }
220             } else {
221                 // This is the last gap
222                 U2MsaGap newGap(position, count);
223                 gaps.append(newGap);
224                 return;
225             }
226         }
227     }
228 }
229 
removeGaps(U2OpStatus & os,U2MsaRowGapModel & gaps,int rowLengthWithoutTrailing,int position,int count)230 void MsaRowUtils::removeGaps(U2OpStatus &os, U2MsaRowGapModel &gaps, int rowLengthWithoutTrailing, int position, int count) {
231     SAFE_POINT_EXT(0 <= position && 0 <= count, os.setError(QString("Internal error: incorrect parameters were passed to MsaRowUtils::removeGaps, "
232                                                                     "pos '%1', count '%2'")
233                                                                 .arg(position)
234                                                                 .arg(count)), );
235     CHECK(position <= rowLengthWithoutTrailing, );
236 
237     QList<U2MsaGap> newGapModel;
238     int endRegionPos = position + count;  // non-inclusive
239     foreach (U2MsaGap gap, gaps) {
240         qint64 gapEnd = gap.offset + gap.gap;
241         if (gapEnd < position) {
242             newGapModel << gap;
243         } else if (gapEnd <= endRegionPos) {
244             if (gap.offset < position) {
245                 gap.gap = position - gap.offset;
246                 newGapModel << gap;
247             }
248             // Otherwise just remove the gap (do not write to the new gap model)
249         } else {
250             if (gap.offset < position) {
251                 gap.gap -= count;
252                 SAFE_POINT(gap.gap >= 0, "Non-positive gap length", );
253                 newGapModel << gap;
254             } else if (gap.offset < endRegionPos) {
255                 gap.gap = gapEnd - endRegionPos;
256                 gap.offset = position;
257                 SAFE_POINT(gap.gap > 0, "Non-positive gap length", );
258                 SAFE_POINT(gap.offset >= 0, "Negative gap offset", );
259                 newGapModel << gap;
260             } else {
261                 // Shift the gap
262                 gap.offset -= count;
263                 SAFE_POINT(gap.offset >= 0, "Negative gap offset", );
264                 newGapModel << gap;
265             }
266         }
267     }
268 
269     gaps = newGapModel;
270 }
271 
addOffsetToGapModel(U2MsaRowGapModel & gapModel,int offset)272 void MsaRowUtils::addOffsetToGapModel(U2MsaRowGapModel &gapModel, int offset) {
273     if (0 == offset) {
274         return;
275     }
276 
277     if (!gapModel.isEmpty()) {
278         U2MsaGap &firstGap = gapModel[0];
279         if (0 == firstGap.offset) {
280             firstGap.gap += offset;
281         } else {
282             SAFE_POINT(offset >= 0, "Negative gap offset", );
283             U2MsaGap beginningGap(0, offset);
284             gapModel.insert(0, beginningGap);
285         }
286 
287         // Shift other gaps
288         if (gapModel.count() > 1) {
289             for (int i = 1; i < gapModel.count(); ++i) {
290                 qint64 newOffset = gapModel[i].offset + offset;
291                 SAFE_POINT(newOffset >= 0, "Negative gap offset", );
292                 gapModel[i].offset = newOffset;
293             }
294         }
295     } else {
296         SAFE_POINT(offset >= 0, "Negative gap offset", );
297         U2MsaGap gap(0, offset);
298         gapModel.append(gap);
299     }
300 }
301 
shiftGapModel(U2MsaRowGapModel & gapModel,int shiftSize)302 void MsaRowUtils::shiftGapModel(U2MsaRowGapModel &gapModel, int shiftSize) {
303     CHECK(!gapModel.isEmpty(), );
304     CHECK(shiftSize != 0, );
305     CHECK(-shiftSize <= gapModel.first().offset, );
306     for (int i = 0; i < gapModel.size(); i++) {
307         gapModel[i].offset += shiftSize;
308     }
309 }
310 
isGap(int dataLength,const U2MsaRowGapModel & gapModel,int position)311 bool MsaRowUtils::isGap(int dataLength, const U2MsaRowGapModel &gapModel, int position) {
312     int gapsLength = 0;
313     foreach (const U2MsaGap &gap, gapModel) {
314         if (gap.offset <= position && position < gap.offset + gap.gap) {
315             return true;
316         }
317         if (position < gap.offset) {
318             return false;
319         }
320         gapsLength += gap.gap;
321     }
322 
323     if (dataLength + gapsLength <= position) {
324         return true;
325     }
326 
327     return false;
328 }
329 
isLeadingOrTrailingGap(int dataLength,const U2MsaRowGapModel & gapModel,int position)330 bool MsaRowUtils::isLeadingOrTrailingGap(int dataLength, const U2MsaRowGapModel &gapModel, int position) {
331     if (gapModel.isEmpty()) {
332         return false;
333     }
334     if (gapModel[0].offset == 0 && position < gapModel[0].endPos()) {
335         return true;  // leading gap.
336     }
337     int totalGapsLen = 0;
338     for (const U2MsaGap &gap : qAsConst(gapModel)) {
339         totalGapsLen += gap.gap;
340         if (position < gap.offset) {
341             return false;  // somewhere in the middle.
342         }
343     }
344     return position >= dataLength + totalGapsLen;  // trailing gap.
345 }
346 
chopGapModel(U2MsaRowGapModel & gapModel,qint64 maxLength)347 void MsaRowUtils::chopGapModel(U2MsaRowGapModel &gapModel, qint64 maxLength) {
348     chopGapModel(gapModel, U2Region(0, maxLength));
349 }
350 
chopGapModel(U2MsaRowGapModel & gapModel,const U2Region & boundRegion)351 void MsaRowUtils::chopGapModel(U2MsaRowGapModel &gapModel, const U2Region &boundRegion) {
352     // Remove gaps after the region
353     while (!gapModel.isEmpty() && gapModel.last().offset >= boundRegion.endPos()) {
354         gapModel.removeLast();
355     }
356 
357     if (!gapModel.isEmpty() && gapModel.last().endPos() > boundRegion.endPos()) {
358         gapModel.last().gap = boundRegion.endPos() - gapModel.last().offset;
359     }
360 
361     // Remove gaps before the region
362     qint64 removedGapsLength = 0;
363     while (!gapModel.isEmpty() && gapModel.first().endPos() < boundRegion.startPos) {
364         removedGapsLength += gapModel.first().gap;
365         gapModel.removeFirst();
366     }
367 
368     if (!gapModel.isEmpty() && gapModel.first().offset < boundRegion.startPos) {
369         removedGapsLength += boundRegion.startPos - gapModel.first().offset;
370         gapModel.first().gap -= boundRegion.startPos - gapModel.first().offset;
371         gapModel.first().offset = boundRegion.startPos;
372     }
373 
374     shiftGapModel(gapModel, -removedGapsLength);
375 }
376 
joinCharsAndGaps(const DNASequence & sequence,const U2MsaRowGapModel & gapModel,int rowLength,bool keepLeadingGaps,bool keepTrailingGaps)377 QByteArray MsaRowUtils::joinCharsAndGaps(const DNASequence &sequence, const U2MsaRowGapModel &gapModel, int rowLength, bool keepLeadingGaps, bool keepTrailingGaps) {
378     QByteArray bytes = sequence.constSequence();
379     int beginningOffset = 0;
380 
381     if (gapModel.isEmpty()) {
382         return bytes;
383     }
384 
385     for (int i = 0; i < gapModel.size(); ++i) {
386         QByteArray gapsBytes;
387         if (!keepLeadingGaps && (0 == gapModel[i].offset)) {
388             beginningOffset = gapModel[i].gap;
389             continue;
390         }
391 
392         gapsBytes.fill(U2Msa::GAP_CHAR, gapModel[i].gap);
393         bytes.insert(gapModel[i].offset - beginningOffset, gapsBytes);
394     }
395 
396     if (keepTrailingGaps && (bytes.size() < rowLength)) {
397         QByteArray gapsBytes;
398         gapsBytes.fill(U2Msa::GAP_CHAR, rowLength - bytes.size());
399         bytes.append(gapsBytes);
400     }
401 
402     return bytes;
403 }
404 
405 namespace {
406 
getNextGap(QListIterator<U2MsaGap> & mainGapModelIterator,QListIterator<U2MsaGap> & additionalGapModelIterator,qint64 & gapsFromMainModelLength)407 U2MsaGap getNextGap(QListIterator<U2MsaGap> &mainGapModelIterator, QListIterator<U2MsaGap> &additionalGapModelIterator, qint64 &gapsFromMainModelLength) {
408     SAFE_POINT(mainGapModelIterator.hasNext() || additionalGapModelIterator.hasNext(), "Out of gap models boundaries", U2MsaGap());
409 
410     if (!mainGapModelIterator.hasNext()) {
411         U2MsaGap gap = additionalGapModelIterator.next();
412         gap.offset += gapsFromMainModelLength;
413         return gap;
414     }
415 
416     if (!additionalGapModelIterator.hasNext()) {
417         const U2MsaGap mainGap = mainGapModelIterator.next();
418         gapsFromMainModelLength += mainGap.gap;
419         return mainGap;
420     }
421 
422     const U2MsaGap mainGap = mainGapModelIterator.peekNext();
423     const U2MsaGap additionalGap = additionalGapModelIterator.peekNext();
424     const U2MsaGap intersection = mainGap.intersect(additionalGap);
425 
426     if (intersection.isValid()) {
427         const U2MsaGap unitedGap = U2MsaGap(qMin(mainGap.offset, additionalGap.offset + gapsFromMainModelLength), mainGap.gap + additionalGap.gap);
428         gapsFromMainModelLength += mainGap.gap;
429         mainGapModelIterator.next();
430         additionalGapModelIterator.next();
431         return unitedGap;
432     }
433 
434     if (mainGap.offset <= additionalGap.offset + gapsFromMainModelLength) {
435         gapsFromMainModelLength += mainGap.gap;
436         mainGapModelIterator.next();
437         return mainGap;
438     } else {
439         U2MsaGap shiftedAdditionalGap = additionalGapModelIterator.next();
440         shiftedAdditionalGap.offset += gapsFromMainModelLength;
441         return shiftedAdditionalGap;
442     }
443 }
444 
445 }  // namespace
446 
insertGapModel(const U2MsaRowGapModel & mainGapModel,const U2MsaRowGapModel & additionalGapModel)447 U2MsaRowGapModel MsaRowUtils::insertGapModel(const U2MsaRowGapModel &mainGapModel, const U2MsaRowGapModel &additionalGapModel) {
448     U2MsaRowGapModel mergedGapModel;
449     QListIterator<U2MsaGap> mainGapModelIterator(mainGapModel);
450     QListIterator<U2MsaGap> additionalGapModelIterator(additionalGapModel);
451     qint64 gapsFromMainModelLength = 0;
452     while (mainGapModelIterator.hasNext() || additionalGapModelIterator.hasNext()) {
453         mergedGapModel << getNextGap(mainGapModelIterator, additionalGapModelIterator, gapsFromMainModelLength);
454     }
455     mergeConsecutiveGaps(mergedGapModel);
456     return mergedGapModel;
457 }
458 
mergeConsecutiveGaps(U2MsaRowGapModel & gapModel)459 void MsaRowUtils::mergeConsecutiveGaps(U2MsaRowGapModel &gapModel) {
460     CHECK(!gapModel.isEmpty(), );
461     QList<U2MsaGap> newGapModel;
462 
463     newGapModel << gapModel[0];
464     int indexInNewGapModel = 0;
465     for (int i = 1; i < gapModel.count(); ++i) {
466         const qint64 previousGapEnd = newGapModel[indexInNewGapModel].offset + newGapModel[indexInNewGapModel].gap - 1;
467         const qint64 currectGapStart = gapModel[i].offset;
468         SAFE_POINT(currectGapStart > previousGapEnd, "Incorrect gap model during merging consecutive gaps", );
469         if (currectGapStart == previousGapEnd + 1) {
470             // Merge gaps
471             const qint64 newGapLength = newGapModel[indexInNewGapModel].gap + gapModel[i].gap;
472             SAFE_POINT(newGapLength > 0, "Non-positive gap length", )
473             newGapModel[indexInNewGapModel].gap = newGapLength;
474         } else {
475             // Add the gap to the list
476             newGapModel << gapModel[i];
477             indexInNewGapModel++;
478         }
479     }
480     gapModel = newGapModel;
481 }
482 
483 namespace {
484 
hasIntersection(const U2MsaGap & gap1,const U2MsaGap & gap2)485 bool hasIntersection(const U2MsaGap &gap1, const U2MsaGap &gap2) {
486     return gap1.offset < gap2.endPos() && gap2.offset < gap1.endPos();
487 }
488 
489 // Moves the iterator that points to the less gap
490 // returns true, if step was successfully done
491 // returns false, if the end is already reached
stepForward(QMutableListIterator<U2MsaGap> & firstIterator,QMutableListIterator<U2MsaGap> & secondIterator)492 bool stepForward(QMutableListIterator<U2MsaGap> &firstIterator, QMutableListIterator<U2MsaGap> &secondIterator) {
493     CHECK(firstIterator.hasNext(), false);
494     CHECK(secondIterator.hasNext(), false);
495     const U2MsaGap &firstGap = firstIterator.peekNext();
496     const U2MsaGap &secondGap = secondIterator.peekNext();
497     if (firstGap.offset <= secondGap.offset) {
498         firstIterator.next();
499     } else {
500         secondIterator.next();
501     }
502     return true;
503 }
504 
505 // forward iterators to the state, when the next values have an intersection
506 // returns true if there an intersection was found, otherwise return false
forwardToIntersection(QMutableListIterator<U2MsaGap> & firstIterator,QMutableListIterator<U2MsaGap> & secondIterator)507 bool forwardToIntersection(QMutableListIterator<U2MsaGap> &firstIterator, QMutableListIterator<U2MsaGap> &secondIterator) {
508     bool endReached = false;
509     while (!hasIntersection(firstIterator.peekNext(), secondIterator.peekNext())) {
510         endReached = !stepForward(firstIterator, secondIterator);
511         if (endReached) {
512             break;
513         }
514     }
515     return !endReached;
516 }
517 
subGap(const U2MsaGap & subFrom,const U2MsaGap & subWhat)518 QPair<U2MsaGap, U2MsaGap> subGap(const U2MsaGap &subFrom, const U2MsaGap &subWhat) {
519     QPair<U2MsaGap, U2MsaGap> result;
520     if (subFrom.offset < subWhat.offset) {
521         result.first = U2MsaGap(subFrom.offset, subWhat.offset - subFrom.offset);
522     }
523     if (subFrom.endPos() > subWhat.endPos()) {
524         result.second = U2MsaGap(subWhat.endPos(), subFrom.endPos() - subWhat.endPos());
525     }
526     return result;
527 }
528 
removeCommonPart(QMutableListIterator<U2MsaGap> & iterator,const U2MsaGap & commonPart)529 void removeCommonPart(QMutableListIterator<U2MsaGap> &iterator, const U2MsaGap &commonPart) {
530     const QPair<U2MsaGap, U2MsaGap> gapDifference = subGap(iterator.peekNext(), commonPart);
531     if (gapDifference.second.isValid()) {
532         iterator.peekNext() = gapDifference.second;
533     }
534     if (gapDifference.first.isValid()) {
535         iterator.insert(gapDifference.first);
536     }
537     if (!gapDifference.first.isValid() && !gapDifference.second.isValid()) {
538         iterator.next();
539         iterator.remove();
540     }
541 }
542 
543 // extracts a common part from the next values, a difference between values is written back to the models
extractCommonPart(QMutableListIterator<U2MsaGap> & firstIterator,QMutableListIterator<U2MsaGap> & secondIterator)544 U2MsaGap extractCommonPart(QMutableListIterator<U2MsaGap> &firstIterator, QMutableListIterator<U2MsaGap> &secondIterator) {
545     SAFE_POINT(firstIterator.hasNext() && secondIterator.hasNext(), "Out of gap model boundaries", U2MsaGap());
546     U2MsaGap &firstGap = firstIterator.peekNext();
547     U2MsaGap &secondGap = secondIterator.peekNext();
548 
549     const U2MsaGap commonPart = firstGap.intersect(secondGap);
550     SAFE_POINT(commonPart.isValid(), "Gaps don't have an intersection", U2MsaGap());
551     removeCommonPart(firstIterator, commonPart);
552     removeCommonPart(secondIterator, commonPart);
553 
554     return commonPart;
555 }
556 
557 }  // namespace
558 
getGapModelsDifference(const U2MsaRowGapModel & firstGapModel,const U2MsaRowGapModel & secondGapModel,U2MsaRowGapModel & commonPart,U2MsaRowGapModel & firstDifference,U2MsaRowGapModel & secondDifference)559 void MsaRowUtils::getGapModelsDifference(const U2MsaRowGapModel &firstGapModel, const U2MsaRowGapModel &secondGapModel, U2MsaRowGapModel &commonPart, U2MsaRowGapModel &firstDifference, U2MsaRowGapModel &secondDifference) {
560     commonPart.clear();
561     firstDifference = firstGapModel;
562     QMutableListIterator<U2MsaGap> firstIterator(firstDifference);
563     secondDifference = secondGapModel;
564     QMutableListIterator<U2MsaGap> secondIterator(secondDifference);
565 
566     while (firstIterator.hasNext() && secondIterator.hasNext()) {
567         const bool intersectionFound = forwardToIntersection(firstIterator, secondIterator);
568         if (!intersectionFound) {
569             break;
570         }
571         commonPart << extractCommonPart(firstIterator, secondIterator);
572     }
573     mergeConsecutiveGaps(commonPart);
574 }
575 
576 namespace {
577 
insertGap(U2MsaRowGapModel & gapModel,const U2MsaGap & gap)578 void insertGap(U2MsaRowGapModel &gapModel, const U2MsaGap &gap) {
579     for (int i = 0; i < gapModel.size(); i++) {
580         if (gapModel[i].endPos() < gap.offset) {
581             // search the proper location
582             continue;
583         } else if (gapModel[i].offset > gap.endPos()) {
584             // no intersection, just insert
585             gapModel.insert(i, gap);
586         } else {
587             // there is an intersection
588             gapModel[i].offset = qMin(gapModel[i].offset, gap.offset);
589             gapModel[i].setEndPos(qMax(gapModel[i].endPos(), gap.endPos()));
590             int gapsToRemove = 0;
591             for (int j = i + 1; j < gapModel.size(); j++) {
592                 if (gapModel[j].endPos() <= gapModel[i].endPos()) {
593                     // this gap is fully covered by a new gap, just remove
594                     gapsToRemove++;
595                 } else if (gapModel[j].offset <= gapModel[i].endPos()) {
596                     // this gap is partially covered by a new gap, enlarge the new gap and remove
597                     gapModel[i].setEndPos(qMax(gapModel[i].endPos(), gapModel[j].endPos()));
598                     gapsToRemove++;
599                 } else {
600                     break;
601                 }
602             }
603 
604             gapModel.erase(gapModel.begin() + i + 1, gapModel.begin() + i + gapsToRemove + 1);
605         }
606     }
607 }
608 
subtitudeGap(QMutableListIterator<U2MsaGap> & minuendIterator,QMutableListIterator<U2MsaGap> & subtrahendIterator)609 void subtitudeGap(QMutableListIterator<U2MsaGap> &minuendIterator, QMutableListIterator<U2MsaGap> &subtrahendIterator) {
610     const QPair<U2MsaGap, U2MsaGap> substitutionResult = subGap(minuendIterator.next(), subtrahendIterator.peekNext());
611     minuendIterator.remove();
612 
613     if (substitutionResult.second.isValid()) {
614         minuendIterator.insert(substitutionResult.second);
615         minuendIterator.previous();
616     }
617 
618     if (substitutionResult.first.isValid()) {
619         minuendIterator.insert(substitutionResult.first);
620         minuendIterator.previous();
621     }
622 }
623 
624 }  // namespace
625 
mergeGapModels(const U2MsaListGapModel & gapModels)626 U2MsaRowGapModel MsaRowUtils::mergeGapModels(const U2MsaListGapModel &gapModels) {
627     U2MsaRowGapModel mergedGapModel;
628     for (const U2MsaRowGapModel &gapModel : qAsConst(gapModels)) {
629         for (const U2MsaGap &gap : qAsConst(gapModel)) {
630             insertGap(mergedGapModel, gap);
631         }
632     }
633     return mergedGapModel;
634 }
635 
subtitudeGapModel(const U2MsaRowGapModel & minuendGapModel,const U2MsaRowGapModel & subtrahendGapModel)636 U2MsaRowGapModel MsaRowUtils::subtitudeGapModel(const U2MsaRowGapModel &minuendGapModel, const U2MsaRowGapModel &subtrahendGapModel) {
637     U2MsaRowGapModel result = minuendGapModel;
638     U2MsaRowGapModel subtrahendGapModelCopy = subtrahendGapModel;
639     QMutableListIterator<U2MsaGap> minuendIterator(result);
640     QMutableListIterator<U2MsaGap> subtrahendIterator(subtrahendGapModelCopy);
641 
642     while (minuendIterator.hasNext() && subtrahendIterator.hasNext()) {
643         const bool intersectionFound = forwardToIntersection(minuendIterator, subtrahendIterator);
644         if (!intersectionFound) {
645             break;
646         }
647         subtitudeGap(minuendIterator, subtrahendIterator);
648     }
649 
650     return minuendGapModel;
651 }
652 
reverseGapModel(const U2MsaRowGapModel & gapModel,qint64 rowLengthWithoutTrailing)653 U2MsaRowGapModel MsaRowUtils::reverseGapModel(const U2MsaRowGapModel &gapModel, qint64 rowLengthWithoutTrailing) {
654     U2MsaRowGapModel reversedGapModel = gapModel;
655 
656     foreach (const U2MsaGap &gap, gapModel) {
657         if (rowLengthWithoutTrailing - gap.endPos() < 0) {
658             Q_ASSERT(false);  // original model has gaps out of range or trailing gaps
659             continue;
660         }
661         reversedGapModel.prepend(U2MsaGap(rowLengthWithoutTrailing - gap.offset, gap.gap));
662     }
663 
664     if (hasLeadingGaps(gapModel)) {
665         reversedGapModel.removeLast();
666         reversedGapModel.prepend(gapModel.first());
667     }
668 
669     return reversedGapModel;
670 }
671 
hasLeadingGaps(const U2MsaRowGapModel & gapModel)672 bool MsaRowUtils::hasLeadingGaps(const U2MsaRowGapModel &gapModel) {
673     return !gapModel.isEmpty() && gapModel.first().offset == 0;
674 }
675 
removeTrailingGapsFromModel(qint64 length,U2MsaRowGapModel & gapModel)676 void MsaRowUtils::removeTrailingGapsFromModel(qint64 length, U2MsaRowGapModel &gapModel) {
677     for (int i = 0; i < gapModel.size(); i++) {
678         const U2MsaGap &gap = gapModel.at(i);
679         if (gap.offset >= length) {
680             while (gapModel.size() > i) {
681                 gapModel.removeLast();
682             }
683         } else {
684             length += gap.gap;
685         }
686     }
687 }
688 
689 }  // namespace U2
690