1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "MsaDbiUtils.h"
23 
24 #include <U2Core/DNASequenceUtils.h>
25 #include <U2Core/MultipleSequenceAlignmentExporter.h>
26 #include <U2Core/RawDataUdrSchema.h>
27 #include <U2Core/U2AlphabetUtils.h>
28 #include <U2Core/U2AttributeDbi.h>
29 #include <U2Core/U2OpStatusUtils.h>
30 #include <U2Core/U2SafePoints.h>
31 #include <U2Core/U2SequenceDbi.h>
32 
33 namespace U2 {
34 
35 /** Validates 'pos' in an alignment: it must be non-negative and less than or equal to the alignment length */
splitBytesToCharsAndGaps(const QByteArray & input,QByteArray & seqBytes,QList<U2MsaGap> & gapModel)36 void MaDbiUtils::splitBytesToCharsAndGaps(const QByteArray &input, QByteArray &seqBytes, QList<U2MsaGap> &gapModel) {
37     bool previousCharIsGap = false;
38     int gapsCount = 0;
39     int gapsOffset = 0;
40 
41     for (int i = 0; i < input.count(); ++i) {
42         // A char
43         if ((U2Msa::GAP_CHAR != input.at(i))) {
44             if (previousCharIsGap) {
45                 U2MsaGap gap(gapsOffset, gapsCount);
46                 gapModel.append(gap);
47                 gapsCount = 0;
48             }
49             seqBytes.append(input.at(i));
50             previousCharIsGap = false;
51         }
52         // A gap
53         else {
54             gapsCount++;
55             // A gap before the end of the row
56             if (i < input.count() - 1) {
57                 if (!previousCharIsGap) {
58                     gapsOffset = i;
59                 }
60                 previousCharIsGap = true;
61             }
62             // A gap at the end of the row
63             else {
64                 // Correct the offset if there is one gap at the end of the row
65                 if (1 == gapsCount) {
66                     gapsOffset = i;
67                 }
68                 SAFE_POINT(gapsOffset >= 0, "Negative gap offset!", );
69                 SAFE_POINT(gapsCount > 0, "Non-positive gap length!", );
70                 U2MsaGap gap(gapsOffset, gapsCount);
71                 gapModel.append(gap);
72             }
73         }
74     }
75 
76     SAFE_POINT(-1 == seqBytes.indexOf(U2Msa::GAP_CHAR), "Row sequence contains gaps!", );
77 }
78 
getMaLength(const U2EntityRef & maRef,U2OpStatus & os)79 qint64 MaDbiUtils::getMaLength(const U2EntityRef &maRef, U2OpStatus &os) {
80     DbiConnection con(maRef.dbiRef, os);
81     CHECK_OP(os, -1);
82 
83     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
84     SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), -1);
85 
86     qint64 length = msaDbi->getMsaLength(maRef.entityId, os);
87     CHECK_OP(os, -1);
88 
89     return length;
90 }
91 
updateMaLength(const U2EntityRef & maRef,qint64 newLen,U2OpStatus & os)92 void MaDbiUtils::updateMaLength(const U2EntityRef &maRef, qint64 newLen, U2OpStatus &os) {
93     // Prepare the connection
94     DbiConnection con(maRef.dbiRef, os);
95     CHECK_OP(os, );
96 
97     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
98     SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), );
99 
100     // Update the data
101     msaDbi->updateMsaLength(maRef.entityId, newLen, os);
102 }
103 
getMaAlphabet(const U2EntityRef & maRef,U2OpStatus & os)104 U2AlphabetId MaDbiUtils::getMaAlphabet(const U2EntityRef &maRef, U2OpStatus &os) {
105     DbiConnection con(maRef.dbiRef, os);
106     CHECK_OP(os, U2AlphabetId());
107 
108     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
109     SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), U2AlphabetId());
110 
111     U2AlphabetId alphabet = msaDbi->getMsaAlphabet(maRef.entityId, os);
112     CHECK_OP(os, U2AlphabetId());
113 
114     return alphabet;
115 }
116 
updateMaAlphabet(const U2EntityRef & maRef,const U2AlphabetId & alphabet,U2OpStatus & os)117 void MaDbiUtils::updateMaAlphabet(const U2EntityRef &maRef, const U2AlphabetId &alphabet, U2OpStatus &os) {
118     // Prepare the connection
119     SAFE_POINT_EXT(alphabet.isValid(), os.setError("Invalid alphabet was passed !"), );
120     DbiConnection con(maRef.dbiRef, os);
121     CHECK_OP(os, );
122 
123     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
124     SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), );
125 
126     // Update the data
127     msaDbi->updateMsaAlphabet(maRef.entityId, alphabet, os);
128 }
129 
renameMa(const U2EntityRef & maRef,const QString & newName,U2OpStatus & os)130 void MaDbiUtils::renameMa(const U2EntityRef &maRef, const QString &newName, U2OpStatus &os) {
131     if (newName.isEmpty()) {
132         os.setError(tr("Can't rename an alignment to an empty name!"));
133     }
134 
135     // Prepare the connection
136     DbiConnection con(maRef.dbiRef, os);
137     CHECK_OP(os, );
138 
139     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
140     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
141 
142     // Update the name
143     msaDbi->updateMsaName(maRef.entityId, newName, os);
144 }
145 
updateRowGapModel(const U2EntityRef & msaRef,qint64 rowId,const QList<U2MsaGap> & gaps,U2OpStatus & os)146 void MaDbiUtils::updateRowGapModel(const U2EntityRef &msaRef, qint64 rowId, const QList<U2MsaGap> &gaps, U2OpStatus &os) {
147     // Prepare the connection
148     DbiConnection con(msaRef.dbiRef, os);
149     CHECK_OP(os, );
150 
151     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
152     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
153 
154     // Update the data
155     msaDbi->updateGapModel(msaRef.entityId, rowId, gaps, os);
156 }
157 
updateRowsOrder(const U2EntityRef & meRef,const QList<qint64> & rowsOrder,U2OpStatus & os)158 void MaDbiUtils::updateRowsOrder(const U2EntityRef &meRef, const QList<qint64> &rowsOrder, U2OpStatus &os) {
159     // Prepare the connection
160     DbiConnection con(meRef.dbiRef, os);
161     CHECK_OP(os, );
162 
163     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
164     SAFE_POINT(msaDbi != nullptr, "Msa Dbi is null!", );
165 
166     // Update the data
167     msaDbi->setNewRowsOrder(meRef.entityId, rowsOrder, os);
168 }
169 
renameRow(const U2EntityRef & msaRef,qint64 rowId,const QString & newName,U2OpStatus & os)170 void MaDbiUtils::renameRow(const U2EntityRef &msaRef, qint64 rowId, const QString &newName, U2OpStatus &os) {
171     if (newName.isEmpty()) {
172         os.setError(tr("Can't rename a row to an empty name!"));
173     }
174 
175     // Prepare the connection
176     DbiConnection con(msaRef.dbiRef, os);
177     CHECK_OP(os, );
178 
179     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
180     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
181 
182     // Update the row name
183     msaDbi->updateRowName(msaRef.entityId, rowId, newName, os);
184 }
185 
moveRows(const U2EntityRef & msaRef,const QList<qint64> & rowsToMove,const int delta,U2OpStatus & os)186 void MaDbiUtils::moveRows(const U2EntityRef &msaRef, const QList<qint64> &rowsToMove, const int delta, U2OpStatus &os) {
187     DbiConnection con(msaRef.dbiRef, false, os);
188     CHECK_OP(os, );
189 
190     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
191     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
192 
193     if (delta == 0 || rowsToMove.isEmpty()) {
194         return;
195     }
196 
197     QList<U2MsaRow> rows = msaDbi->getRows(msaRef.entityId, os);
198     CHECK_OP(os, );
199 
200     QList<qint64> rowIds;
201     for (int i = 0; i < rows.length(); ++i) {
202         rowIds << rows[i].rowId;
203     }
204 
205     QList<QPair<int, int>> from_To;
206 
207     if (delta < 0) {
208         int rowIndex = rowIds.indexOf(rowsToMove.first());
209         if (rowIndex == -1) {
210             os.setError("Invalid row list");
211             return;
212         }
213         int moveToIndex = rowIndex + delta >= 0 ? rowIndex + delta : 0;
214         from_To.append(QPair<int, int>(rowIndex, moveToIndex));
215         for (int i = 1; i < rowsToMove.length(); ++i) {
216             rowIndex = rowIds.indexOf(rowsToMove[i]);
217             if (rowIndex == -1) {
218                 os.setError("Invalid row list");
219                 return;
220             }
221             if (rowIndex <= from_To[i - 1].first) {
222                 os.setError("List of rows to move is not ordered");
223                 return;
224             }
225             moveToIndex = rowIndex + delta > from_To[i - 1].second ? rowIndex + delta : from_To[i - 1].second + 1;
226             from_To.append(QPair<int, int>(rowIndex, moveToIndex));
227         }
228     } else {
229         int rowIndex = rowIds.indexOf(rowsToMove.last());
230         if (rowIndex == -1) {
231             os.setError("Invalid row list");
232             return;
233         }
234         int moveToIndex = rowIndex + delta < rowIds.length() ? rowIndex + delta : rowIds.length() - 1;
235         from_To.append(QPair<int, int>(rowIndex, moveToIndex));
236         for (int i = 1; i < rowsToMove.length(); ++i) {
237             rowIndex = rowIds.indexOf(rowsToMove[rowsToMove.length() - i - 1]);
238             if (rowIndex == -1) {
239                 os.setError("Invalid row list");
240                 return;
241             }
242             if (rowIndex >= from_To[i - 1].first) {
243                 os.setError("List of rows to move is not ordered");
244                 return;
245             }
246             moveToIndex = rowIndex + delta < from_To[i - 1].second ? rowIndex + delta : from_To[i - 1].second - 1;
247             from_To.append(QPair<int, int>(rowIndex, moveToIndex));
248         }
249     }
250     QPair<int, int> coords;
251     foreach (coords, from_To) {
252         rowIds.move(coords.first, coords.second);
253     }
254     msaDbi->setNewRowsOrder(msaRef.entityId, rowIds, os);
255     CHECK_OP(os, );
256 }
257 
getStartAndEndSequencePositions(const QByteArray & seq,const QList<U2MsaGap> & gaps,qint64 pos,qint64 count,qint64 & startPosInSeq,qint64 & endPosInSeq)258 void MaDbiUtils::getStartAndEndSequencePositions(const QByteArray &seq, const QList<U2MsaGap> &gaps, qint64 pos, qint64 count, qint64 &startPosInSeq, qint64 &endPosInSeq) {
259     int rowLengthWithoutTrailingGap = MsaRowUtils::getRowLengthWithoutTrailing(seq, gaps);
260     SAFE_POINT(pos < rowLengthWithoutTrailingGap, "Incorrect position!", );
261 
262     // Remove chars from the sequence
263     // Calculate start position in the sequence
264     if (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, pos)) {
265         int i = 1;
266         while (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, pos + i)) {
267             if (MsaRowUtils::getRowLength(seq, gaps) == pos + i) {
268                 break;
269             }
270             i++;
271         }
272         startPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), pos + i);
273     } else {
274         startPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), pos);
275     }
276 
277     // Calculate end position in the sequence
278     int endRegionPos = pos + count;  // non-inclusive
279 
280     if (endRegionPos > rowLengthWithoutTrailingGap) {
281         endRegionPos = rowLengthWithoutTrailingGap;
282     }
283 
284     if (endRegionPos == rowLengthWithoutTrailingGap) {
285         endPosInSeq = seq.length();
286     } else {
287         if (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, endRegionPos)) {
288             int i = 1;
289             while (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, endRegionPos + i)) {
290                 if (MsaRowUtils::getRowLength(seq, gaps) == endRegionPos + i) {
291                     break;
292                 }
293                 i++;
294             }
295             endPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), endRegionPos + i);
296         } else {
297             endPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), endRegionPos);
298         }
299     }
300 }
301 
getCheckedConnection(const U2DbiRef & dbiRef,U2OpStatus & os)302 DbiConnection *MaDbiUtils::getCheckedConnection(const U2DbiRef &dbiRef, U2OpStatus &os) {
303     QScopedPointer<DbiConnection> con(new DbiConnection(dbiRef, os));
304     CHECK_OP(os, nullptr);
305 
306     if (nullptr == con->dbi) {
307         os.setError("NULL root dbi");
308         return nullptr;
309     }
310 
311     if (nullptr == con->dbi->getMsaDbi()) {
312         os.setError("NULL MSA dbi");
313         return nullptr;
314     }
315 
316     if (con->dbi->getSequenceDbi() == nullptr) {
317         os.setError("NULL sequence dbi");
318         return nullptr;
319     }
320     return con.take();
321 }
322 
323 /** Validates that all 'rowIds' contains in the alignment rows */
validateRowIds(const MultipleSequenceAlignment & al,const QList<qint64> & rowIds)324 bool MaDbiUtils::validateRowIds(const MultipleSequenceAlignment &al, const QList<qint64> &rowIds) {
325     QSet<qint64> alRowIds = al->getRowsIds().toSet();
326     for (qint64 rowId : qAsConst(rowIds)) {
327         if (!alRowIds.contains(rowId)) {
328             coreLog.trace(QString("No row ID '%1' in '%2' alignment!").arg(rowId).arg(al->getName()));
329             return false;
330         }
331     }
332     return true;
333 }
334 
validateRowIds(U2MsaDbi * msaDbi,const U2DataId & msaId,const QList<qint64> & rowIds,U2OpStatus & os)335 void MaDbiUtils::validateRowIds(U2MsaDbi *msaDbi, const U2DataId &msaId, const QList<qint64> &rowIds, U2OpStatus &os) {
336     QList<U2MsaRow> allRows = msaDbi->getRows(msaId, os);
337     CHECK_OP(os, );
338     QList<qint64> allRowIds;
339     foreach (const U2MsaRow &row, allRows) {
340         allRowIds << row.rowId;
341     }
342     foreach (qint64 rowId, rowIds) {
343         if (!allRowIds.contains(rowId)) {
344             os.setError(QString("No row ID '%1' in an alignment!").arg(rowId));
345             return;
346         }
347     }
348 }
349 
350 /////////////////////////////////////////////////////////////////
351 // Helper-methods for additional calculations
352 
calculateGapModelAfterInsert(QList<U2MsaGap> & gapModel,qint64 pos,qint64 count)353 void MsaDbiUtils::calculateGapModelAfterInsert(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count) {
354     SAFE_POINT(pos >= 0, QString("Invalid position '%1'!").arg(pos), );
355     SAFE_POINT(count > 0, QString("Invalid characters count '%1'!").arg(count), );
356 
357     // There are no gaps yet
358     if (gapModel.isEmpty()) {
359         U2MsaGap gap(pos, count);
360         gapModel.append(gap);
361         return;
362     }
363     // There are gaps in the row
364     else {
365         // Insert gaps to the row beginning
366         if (0 == pos) {
367             U2MsaGap &firstGap = gapModel[0];
368             if (0 == firstGap.offset) {
369                 firstGap.gap += count;
370             } else {
371                 U2MsaGap beginningGap(0, count);
372                 gapModel.insert(0, beginningGap);
373             }
374 
375             // Shift other gaps
376             if (gapModel.count() > 1) {
377                 for (int i = 1; i < gapModel.count(); ++i) {
378                     qint64 newOffset = gapModel[i].offset + count;
379                     gapModel[i].offset = newOffset;
380                 }
381             }
382         }
383         // Gaps are inserted to the middle of the row
384         else {
385             // A gap is near
386             if (gapInPosition(gapModel, pos) ||
387                 gapInPosition(gapModel, pos - 1)) {
388                 // Find the gaps and append 'count' gaps to it
389                 // Shift all gaps that further in the row
390                 for (int i = 0; i < gapModel.count(); ++i) {
391                     if (pos >= gapModel[i].offset) {
392                         if (pos <= gapModel[i].offset + gapModel[i].gap) {
393                             gapModel[i].gap += count;
394                         }
395                     } else {
396                         gapModel[i].offset += count;
397                     }
398                 }
399             }
400             // Insert between chars
401             else {
402                 bool found = false;
403 
404                 int indexGreaterGaps = 0;
405                 for (int i = 0; i < gapModel.count(); ++i) {
406                     if (pos > gapModel[i].offset + gapModel[i].gap) {
407                         continue;
408                     } else {
409                         found = true;
410                         U2MsaGap newGap(pos, count);
411                         gapModel.insert(i, newGap);
412                         indexGreaterGaps = i;
413                         break;
414                     }
415                 }
416 
417                 // If found somewhere between existent gaps
418                 if (found) {
419                     // Shift further gaps
420                     for (int i = indexGreaterGaps + 1; i < gapModel.count(); ++i) {
421                         gapModel[i].offset += count;
422                     }
423                 }
424                 // This is the last gap
425                 else {
426                     U2MsaGap newGap(pos, count);
427                     gapModel.append(newGap);
428                 }
429             }
430         }
431     }
432 }
433 
cutOffLeadingGaps(QList<U2MsaRow> & rows)434 QList<U2MsaRow> MsaDbiUtils::cutOffLeadingGaps(QList<U2MsaRow> &rows) {
435     qint64 leadingGapsToRemove = LLONG_MAX;
436     for (qint64 i = 0; i < rows.length(); ++i) {
437         // If some rows haven't any leading gaps,
438         // If some rows haven't any gaps
439         // If some rows first gap's offset isn't zero
440         // return
441         if (leadingGapsToRemove == 0 || rows[i].gaps.isEmpty() || rows[i].gaps.first().offset != 0) {
442             leadingGapsToRemove = 0;
443             return QList<U2MsaRow>();
444         }
445         leadingGapsToRemove = qMin(leadingGapsToRemove, rows[i].gaps.first().gap);
446     }
447 
448     // If there is any leading gaps after all, they should be removed.
449     if (leadingGapsToRemove != 0) {
450         for (qint64 i = 0; i < rows.length(); ++i) {
451             calculateGapModelAfterRemove(rows[i].gaps, 0, leadingGapsToRemove);
452         }
453     }
454     return rows;
455 }
456 
cutOffTrailingGaps(QList<U2MsaRow> & rows,const qint64 msaLength)457 QList<U2MsaRow> MsaDbiUtils::cutOffTrailingGaps(QList<U2MsaRow> &rows, const qint64 msaLength) {
458     QList<U2MsaRow> affectedRows;
459     for (QList<U2MsaRow>::iterator rowIt = rows.begin(); rowIt < rows.end(); ++rowIt) {
460         // If there are no gaps in the row, skip this row.
461         if (rowIt->gaps.isEmpty()) {
462             continue;
463         }
464 
465         // Delete all gaps with offset after msa length.
466         for (int gapReverseIndex = rowIt->gaps.size() - 1;
467              gapReverseIndex >= 0 && gapReverseIndex < rowIt->gaps.size() && rowIt->gaps.at(gapReverseIndex).offset > msaLength - 1;
468              --gapReverseIndex) {
469             rowIt->gaps.removeAt(gapReverseIndex++);
470             affectedRows << *rowIt;
471         }
472 
473         // Cut off all gaps with offset before msa length and end after msa length
474         if (!rowIt->gaps.isEmpty() && rowIt->gaps.last().gap + rowIt->gaps.last().offset > msaLength) {
475             rowIt->gaps.last().gap = msaLength - rowIt->gaps.last().offset;
476             affectedRows << *rowIt;
477         }
478     }
479     return affectedRows;
480 }
481 
calculateGapModelAfterRemove(QList<U2MsaGap> & gapModel,qint64 pos,qint64 count)482 void MsaDbiUtils::calculateGapModelAfterRemove(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count) {
483     QList<U2MsaGap> newGapModel;
484     qint64 endRegionPos = pos + count;  // non-inclusive
485     foreach (U2MsaGap gap, gapModel) {
486         qint64 gapEnd = gap.offset + gap.gap;
487         if (gapEnd < pos) {
488             newGapModel << gap;
489         } else if (gapEnd <= endRegionPos) {
490             if (gap.offset < pos) {
491                 gap.gap = pos - gap.offset;
492                 newGapModel << gap;
493             }
494             // Otherwise just remove the gap (do not write to the new gap model)
495         } else {
496             if (gap.offset < pos) {
497                 gap.gap -= count;
498                 SAFE_POINT(gap.gap >= 0, "Non-positive gap length!", );
499                 newGapModel << gap;
500             } else if (gap.offset < endRegionPos) {
501                 gap.gap = gapEnd - endRegionPos;
502                 gap.offset = pos;
503                 SAFE_POINT(gap.gap > 0, "Non-positive gap length!", );
504                 SAFE_POINT(gap.offset >= 0, "Negative gap offset!", );
505                 newGapModel << gap;
506             } else {
507                 // Shift the gap
508                 gap.offset -= count;
509                 SAFE_POINT(gap.offset >= 0, "Negative gap offset!", );
510                 newGapModel << gap;
511             }
512         }
513     }
514 
515     gapModel = newGapModel;
516 }
517 
calculateGapsLength(const QList<U2MsaGap> & gapModel)518 qint64 MsaDbiUtils::calculateGapsLength(const QList<U2MsaGap> &gapModel) {
519     qint64 length = 0;
520     foreach (U2MsaGap elt, gapModel) {
521         length += elt.gap;
522     }
523     return length;
524 }
525 
calculateRowLength(const U2MsaRow & row)526 qint64 MsaDbiUtils::calculateRowLength(const U2MsaRow &row) {
527     qint64 seqLength = row.gend - row.gstart;
528     qint64 gapsLength = calculateGapsLength(row.gaps);
529     return seqLength + gapsLength;
530 }
531 
mergeConsecutiveGaps(QList<U2MsaGap> & gapModel)532 void MsaDbiUtils::mergeConsecutiveGaps(QList<U2MsaGap> &gapModel) {
533     QList<U2MsaGap> newGapModel;
534     if (gapModel.isEmpty()) {
535         return;
536     }
537 
538     newGapModel << gapModel[0];
539     int indexInNewGapModel = 0;
540     for (int i = 1; i < gapModel.count(); ++i) {
541         int previousGapEnd = newGapModel[indexInNewGapModel].offset +
542                              newGapModel[indexInNewGapModel].gap - 1;
543         int currectGapStart = gapModel[i].offset;
544         SAFE_POINT(currectGapStart > previousGapEnd,
545                    "Incorrect gap model during merging consecutive gaps!", );
546         if (currectGapStart == previousGapEnd + 1) {
547             // Merge gaps
548             qint64 newGapLength = newGapModel[indexInNewGapModel].gap + gapModel[i].gap;
549             SAFE_POINT(newGapLength > 0, "Non-positive gap length!", )
550             newGapModel[indexInNewGapModel].gap = newGapLength;
551         } else {
552             // Add the gap to the list
553             newGapModel << gapModel[i];
554             indexInNewGapModel++;
555         }
556     }
557     gapModel = newGapModel;
558 }
559 
removeCharsFromRow(QByteArray & seq,QList<U2MsaGap> & gaps,qint64 pos,qint64 count)560 void MsaDbiUtils::removeCharsFromRow(QByteArray &seq, QList<U2MsaGap> &gaps, qint64 pos, qint64 count) {
561     SAFE_POINT(pos >= 0, "Incorrect position!", );
562     SAFE_POINT(count > 0, "Incorrect characters count!", );
563 
564     if (pos >= MsaRowUtils::getRowLength(seq, gaps)) {
565         return;
566     }
567 
568     if (pos < MsaRowUtils::getRowLengthWithoutTrailing(seq, gaps)) {
569         qint64 startPosInSeq = -1;
570         qint64 endPosInSeq = -1;
571         MaDbiUtils::getStartAndEndSequencePositions(seq, gaps, pos, count, startPosInSeq, endPosInSeq);
572 
573         if ((startPosInSeq < endPosInSeq) && (-1 != startPosInSeq) && (-1 != endPosInSeq)) {
574             U2OpStatus2Log os;
575             DNASequenceUtils::removeChars(seq, startPosInSeq, endPosInSeq, os);
576             SAFE_POINT_OP(os, );
577         }
578     }
579 
580     calculateGapModelAfterRemove(gaps, pos, count);
581     mergeConsecutiveGaps(gaps);
582 }
583 
calculateGapModelAfterReplaceChar(QList<U2MsaGap> & gapModel,qint64 pos)584 void MaDbiUtils::calculateGapModelAfterReplaceChar(QList<U2MsaGap> &gapModel, qint64 pos) {
585     SAFE_POINT(pos >= 0, QString("Invalid position '%1'!").arg(pos), );
586 
587     for (int i = 0; i < gapModel.count(); ++i) {
588         U2MsaGap &curGap = gapModel[i];
589         qint64 gapStart = curGap.offset;
590         qint64 gapEnd = gapStart + curGap.gap;
591         if (pos >= gapStart && pos <= gapEnd) {
592             if (pos == gapStart) {
593                 if (curGap.gap == 1) {
594                     gapModel.removeAt(i);
595                 } else {
596                     curGap.offset++;
597                     curGap.gap--;
598                 }
599             } else if (pos == gapEnd - 1) {
600                 if (curGap.gap == 1) {
601                     gapModel.removeAt(i);
602                 } else {
603                     curGap.gap--;
604                 }
605             } else {
606                 gapModel.removeAt(i);
607                 U2MsaGap firstGap(gapStart, pos - gapStart);
608                 U2MsaGap secondGap(pos + 1, gapEnd - pos - 1);
609                 gapModel.insert(i, secondGap);
610                 gapModel.insert(i, firstGap);
611             }
612             break;
613         }
614     }
615 }
616 
replaceCharsInRow(QByteArray & sequence,QList<U2MsaGap> & gaps,const U2Region & range,char newChar,U2OpStatus & os)617 void MsaDbiUtils::replaceCharsInRow(QByteArray &sequence, QList<U2MsaGap> &gaps, const U2Region &range, char newChar, U2OpStatus &os) {
618     for (qint64 pos = range.startPos; pos < range.endPos(); pos++) {
619         qint64 lengthWithNoTrail = MsaRowUtils::getRowLengthWithoutTrailing(sequence, gaps);
620         if (pos < lengthWithNoTrail) {
621             qint64 posInSeq = -1;
622             qint64 endPosInSeq = -1;
623             MaDbiUtils::getStartAndEndSequencePositions(sequence, gaps, pos, 1, posInSeq, endPosInSeq);
624             if (posInSeq >= 0 && endPosInSeq > posInSeq) {
625                 DNASequenceUtils::replaceChars(sequence, posInSeq, QByteArray(1, newChar), os);
626                 CHECK_OP(os, );
627             } else {
628                 DNASequenceUtils::insertChars(sequence, posInSeq, QByteArray(1, newChar), os);
629                 CHECK_OP(os, );
630                 MaDbiUtils::calculateGapModelAfterReplaceChar(gaps, pos);
631             }
632         } else {
633             sequence.append(newChar);
634             if (pos != lengthWithNoTrail) {
635                 gaps.append(U2MsaGap(lengthWithNoTrail, pos - lengthWithNoTrail));
636             }
637         }
638     }
639 }
640 
cropCharsFromRow(MultipleSequenceAlignmentRow & alRow,qint64 pos,qint64 count)641 void MsaDbiUtils::cropCharsFromRow(MultipleSequenceAlignmentRow &alRow, qint64 pos, qint64 count) {
642     SAFE_POINT(pos >= 0, "Incorrect position!", );
643     SAFE_POINT(count > 0, "Incorrect characters count!", );
644 
645     // Change the sequence
646     qint64 initialRowLength = alRow->getRowLength();
647     qint64 initialSeqLength = alRow->getUngappedLength();
648     DNASequence modifiedSeq = alRow->getSequence();
649 
650     if (pos >= alRow->getRowLengthWithoutTrailing()) {
651         DNASequenceUtils::makeEmpty(modifiedSeq);
652     } else {
653         qint64 startPosInSeq = -1;
654         qint64 endPosInSeq = -1;
655         MaDbiUtils::getStartAndEndSequencePositions(alRow->getSequence().seq, alRow->getGapModel(), pos, count, startPosInSeq, endPosInSeq);
656 
657         // Remove inside a gap
658         if ((startPosInSeq <= endPosInSeq) && (-1 != startPosInSeq) && (-1 != endPosInSeq)) {
659             U2OpStatus2Log os;
660             if (endPosInSeq < initialSeqLength) {
661                 DNASequenceUtils::removeChars(modifiedSeq, endPosInSeq, initialSeqLength, os);
662                 SAFE_POINT_OP(os, );
663             }
664 
665             if (startPosInSeq > 0) {
666                 DNASequenceUtils::removeChars(modifiedSeq, 0, startPosInSeq, os);
667                 SAFE_POINT_OP(os, );
668             }
669         }
670     }
671 
672     // Change the gap model
673     QList<U2MsaGap> gapModel = alRow->getGapModel();
674     if (pos + count < initialRowLength) {
675         calculateGapModelAfterRemove(gapModel, pos + count, initialRowLength - pos - count);
676     }
677 
678     if (pos > 0) {
679         calculateGapModelAfterRemove(gapModel, 0, pos);
680     }
681     U2OpStatusImpl os;
682     alRow->setRowContent(modifiedSeq, gapModel, os);
683     CHECK_OP(os, );
684 }
685 
686 /** Returns "true" if there is a gap on position "pos" */
gapInPosition(const QList<U2MsaGap> & gapModel,qint64 pos)687 bool MsaDbiUtils::gapInPosition(const QList<U2MsaGap> &gapModel, qint64 pos) {
688     foreach (const U2MsaGap &gap, gapModel) {
689         if (gap.offset + gap.gap - 1 < pos) {
690             continue;
691         }
692         if (gap.offset > pos) {
693             return false;
694         }
695         return true;
696     }
697     return false;
698 }
699 
700 /////////////////////////////////////////////////////////////////
701 // MSA DBI Utilities
702 
updateMsa(const U2EntityRef & msaRef,const MultipleSequenceAlignment & ma,U2OpStatus & os)703 void MsaDbiUtils::updateMsa(const U2EntityRef &msaRef, const MultipleSequenceAlignment &ma, U2OpStatus &os) {
704     // Prepare the connection
705     DbiConnection con(msaRef.dbiRef, os);
706     CHECK_OP(os, );
707 
708     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
709     SAFE_POINT(msaDbi != nullptr, "NULL Msa Dbi!", );
710 
711     U2SequenceDbi *seqDbi = con.dbi->getSequenceDbi();
712     SAFE_POINT(seqDbi != nullptr, "NULL Sequence Dbi!", );
713 
714     U2AttributeDbi *attrDbi = con.dbi->getAttributeDbi();
715     SAFE_POINT(attrDbi != nullptr, "NULL Attribute Dbi!", );
716 
717     //// UPDATE MSA OBJECT
718     const DNAAlphabet *originalMaAlphabet = ma->getAlphabet();
719     SAFE_POINT(originalMaAlphabet != nullptr, "The alignment alphabet is NULL!", );
720 
721     U2Msa msaObj;
722     msaObj.id = msaRef.entityId;
723     msaObj.visualName = ma->getName();
724     msaObj.alphabet.id = originalMaAlphabet->getId();
725     msaObj.length = ma->getLength();
726 
727     msaDbi->updateMsaName(msaRef.entityId, ma->getName(), os);
728     CHECK_OP(os, );
729 
730     msaDbi->updateMsaAlphabet(msaRef.entityId, originalMaAlphabet->getId(), os);
731     CHECK_OP(os, );
732 
733     msaDbi->updateMsaLength(msaRef.entityId, ma->getLength(), os);
734     CHECK_OP(os, );
735 
736     //// UPDATE ROWS AND SEQUENCES
737     // Get rows that are currently stored in the database
738     QList<U2MsaRow> currentRows = msaDbi->getRows(msaRef.entityId, os);
739     QList<qint64> currentRowIds;
740     CHECK_OP(os, );
741 
742     QList<qint64> newRowsIds = ma->getRowsIds();
743     QList<qint64> eliminatedRows;
744 
745     for (const U2MsaRow &currentRow: qAsConst(currentRows)) {
746         currentRowIds.append(currentRow.rowId);
747 
748         // Update data for rows with the same row and sequence IDs
749         if (newRowsIds.contains(currentRow.rowId)) {
750             // Update sequence and row info
751             U2MsaRow newRow = ma->getMsaRowByRowId(currentRow.rowId, os)->getRowDbInfo();
752             CHECK_OP(os, );
753 
754             if (newRow.sequenceId != currentRow.sequenceId) {
755                 // Kill the row from the current alignment, it is incorrect. New row with this ID will be created later.
756                 MsaDbiUtils::removeRow(msaRef, currentRow.rowId, os);
757                 CHECK_OP(os, );
758 
759                 currentRowIds.removeOne(currentRow.rowId);
760                 continue;
761             }
762 
763             DNASequence sequence = ma->getMsaRowByRowId(newRow.rowId, os)->getSequence();
764             CHECK_OP(os, );
765 
766             msaDbi->updateRowName(msaRef.entityId, newRow.rowId, sequence.getName(), os);
767             CHECK_OP(os, );
768 
769             msaDbi->updateRowContent(msaRef.entityId, newRow.rowId, sequence.seq, newRow.gaps, os);
770             CHECK_OP(os, );
771         } else {
772             // Remove rows that are no more present in the alignment
773             eliminatedRows.append(currentRow.rowId);
774         }
775     }
776 
777     msaDbi->removeRows(msaRef.entityId, eliminatedRows, os);
778     CHECK_OP(os, );
779 
780     // Add rows that are stored in memory, but are not present in the database,
781     // remember the rows order
782     QList<qint64> rowsOrder;
783     for (int i = 0, n = ma->getNumRows(); i < n; ++i) {
784         const MultipleSequenceAlignmentRow alRow = ma->getMsaRow(i);
785         U2MsaRow row = alRow->getRowDbInfo();
786 
787         if (row.sequenceId.isEmpty() || !currentRowIds.contains(row.rowId)) {
788             // Import the sequence
789             DNASequence rowSeq = alRow->getSequence();
790             U2Sequence sequence = U2Sequence();
791             sequence.visualName = rowSeq.getName();
792             sequence.circular = rowSeq.circular;
793             sequence.length = rowSeq.length();
794 
795             const DNAAlphabet *rowSequenceAlphabet = rowSeq.alphabet;
796             if (rowSequenceAlphabet == nullptr) {
797                 rowSequenceAlphabet = U2AlphabetUtils::findBestAlphabet(rowSeq.constData(), rowSeq.length());
798             }
799             SAFE_POINT(rowSequenceAlphabet != nullptr, "Failed to get alphabet for a sequence!", );
800             sequence.alphabet.id = rowSequenceAlphabet->getId();
801 
802             seqDbi->createSequenceObject(sequence, "", os);
803             CHECK_OP(os, );
804 
805             QVariantMap hints;
806             const QByteArray &seqData = rowSeq.constSequence();
807             seqDbi->updateSequenceData(sequence.id, U2_REGION_MAX, seqData, hints, os);
808             CHECK_OP(os, );
809 
810             // Create the row
811             row.rowId = -1;  // set the row ID automatically
812             row.sequenceId = sequence.id;
813             row.gstart = 0;
814             row.gend = sequence.length;
815             row.gaps = alRow->getGapModel();
816             MsaDbiUtils::addRow(msaRef, -1, row, os);
817             CHECK_OP(os, );
818         }
819         rowsOrder.append(row.rowId);
820     }
821 
822     //// UPDATE ROWS POSITIONS
823     msaDbi->setNewRowsOrder(msaRef.entityId, rowsOrder, os);
824 
825     //// UPDATE MALIGNMENT ATTRIBUTES
826     QVariantMap alInfo = ma->getInfo();
827 
828     foreach (QString key, alInfo.keys()) {
829         QString val = alInfo.value(key).value<QString>();
830         U2StringAttribute attr(msaRef.entityId, key, val);
831 
832         attrDbi->createStringAttribute(attr, os);
833         CHECK_OP(os, );
834     }
835 }
836 
updateRowContent(const U2EntityRef & msaRef,qint64 rowId,const QByteArray & seqBytes,const QList<U2MsaGap> & gaps,U2OpStatus & os)837 void MsaDbiUtils::updateRowContent(const U2EntityRef &msaRef, qint64 rowId, const QByteArray &seqBytes, const QList<U2MsaGap> &gaps, U2OpStatus &os) {
838     // Prepare the connection
839     DbiConnection con(msaRef.dbiRef, os);
840     CHECK_OP(os, );
841 
842     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
843     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
844 
845     // Update the data
846     msaDbi->updateRowContent(msaRef.entityId, rowId, seqBytes, gaps, os);
847 }
848 
insertGaps(const U2EntityRef & msaRef,const QList<qint64> & rowIds,qint64 pos,qint64 count,U2OpStatus & os,bool collapseTrailingGaps)849 void MsaDbiUtils::insertGaps(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os, bool collapseTrailingGaps) {
850     // Prepare the connection
851     DbiConnection con(msaRef.dbiRef, os);
852     CHECK_OP(os, );
853 
854     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
855     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
856 
857     // Get the MSA properties
858     const U2Msa msaObj = msaDbi->getMsaObject(msaRef.entityId, os);
859     const qint64 alLength = msaObj.length;
860 
861     // Validate the position
862     if (pos < 0 || pos > alLength) {
863         coreLog.trace(QString("Invalid position '%1' in '%2' alignment!").arg(pos).arg(msaObj.visualName));
864         os.setError(tr("Failed to insert gaps into an alignment!"));
865         return;
866     }
867 
868     // Validate the count of gaps
869     if (count <= 0) {
870         coreLog.trace(QString("Invalid value of characters count '%1'!").arg(count));
871         os.setError(tr("Failed to insert gaps into an alignment!"));
872         return;
873     }
874 
875     // Insert gaps into rows
876     QList<U2MsaRow> rows;
877     foreach (qint64 rowId, rowIds) {
878         const U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
879         CHECK_OP(os, );
880 
881         rows.append(row);
882     }
883 
884     int trailingGapsColumns = count;
885     foreach (U2MsaRow row, rows) {
886         // Calculate the new gap model
887         calculateGapModelAfterInsert(row.gaps, pos, count);
888 
889         // Trim trailing gap (if any)
890         qint64 seqLength = row.gend - row.gstart;
891         qint64 gapsLength = 0;
892         int diff = alLength - row.length;
893         trailingGapsColumns = diff < trailingGapsColumns ? diff : trailingGapsColumns;
894 
895         for (int i = 0, n = row.gaps.count(); i < n; ++i) {
896             const U2MsaGap &gap = row.gaps[i];
897             if ((i == n - 1) && (gap.offset >= seqLength + gapsLength)) {
898                 row.gaps.removeAt(i);
899                 break;
900             }
901             gapsLength += gap.gap;
902         }
903 
904         // Put the new gap model into the database
905         msaDbi->updateGapModel(msaRef.entityId, row.rowId, row.gaps, os);
906         CHECK_OP(os, );
907     }
908 
909     if (collapseTrailingGaps) {
910         qint64 enlargeAlignmentLength = 0;
911         foreach (qint64 rowId, rowIds) {
912             enlargeAlignmentLength = qMax(enlargeAlignmentLength, msaDbi->getRow(msaRef.entityId, rowId, os).length);
913             CHECK_OP(os, );
914         }
915         if (msaObj.length < enlargeAlignmentLength) {
916             msaDbi->updateMsaLength(msaRef.entityId, enlargeAlignmentLength, os);
917             CHECK_OP(os, );
918         }
919     } else {
920         int newLength = msaObj.length + count - trailingGapsColumns;
921         if (msaObj.length < newLength) {
922             msaDbi->updateMsaLength(msaRef.entityId, msaObj.length + count, os);
923         }
924         CHECK_OP(os, );
925     }
926 }
927 
removeRegion(const U2EntityRef & msaRef,const QList<qint64> & rowIds,qint64 pos,qint64 count,U2OpStatus & os)928 void MsaDbiUtils::removeRegion(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os) {
929     // Check parameters
930     CHECK_EXT(pos >= 0, os.setError(QString("Negative MSA pos: %1").arg(pos)), );
931     CHECK_EXT(count > 0, os.setError(QString("Wrong MSA base count: %1").arg(count)), );
932 
933     // Prepare the connection
934     QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
935     SAFE_POINT_OP(os, );
936     U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
937     U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
938 
939     U2Msa msa = msaDbi->getMsaObject(msaRef.entityId, os);
940     SAFE_POINT_OP(os, );
941 
942     MaDbiUtils::validateRowIds(msaDbi, msaRef.entityId, rowIds, os);
943     CHECK_OP(os, );
944 
945     qint64 rowNum = msaDbi->getNumOfRows(msaRef.entityId, os);
946     bool keepAlignmentLength = true;
947     if (rowNum == rowIds.size()) {
948         keepAlignmentLength = false;
949     }
950 
951     // Remove region for each row from the list
952     foreach (qint64 rowId, rowIds) {
953         U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
954         SAFE_POINT_OP(os, );
955 
956         U2Region seqReg(row.gstart, row.gend - row.gstart);
957         QByteArray seq = sequenceDbi->getSequenceData(row.sequenceId, seqReg, os);
958         SAFE_POINT_OP(os, );
959 
960         // Calculate the modified row
961         removeCharsFromRow(seq, row.gaps, pos, count);
962 
963         msaDbi->updateRowContent(msaRef.entityId, rowId, seq, row.gaps, os);
964         SAFE_POINT_OP(os, );
965     }
966     if (!keepAlignmentLength) {
967         msaDbi->updateMsaLength(msaRef.entityId, msa.length - count, os);
968     }
969 }
970 
replaceCharactersInRow(const U2EntityRef & msaRef,qint64 rowId,const U2Region & range,char newChar,U2OpStatus & os)971 void MsaDbiUtils::replaceCharactersInRow(const U2EntityRef &msaRef, qint64 rowId, const U2Region &range, char newChar, U2OpStatus &os) {
972     SAFE_POINT_EXT(newChar != U2Msa::GAP_CHAR, os.setError("Can't use GAP for replacement!"), );
973 
974     QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
975     CHECK_OP(os, );
976 
977     U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
978     U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
979 
980     U2Msa msa = msaDbi->getMsaObject(msaRef.entityId, os);
981     CHECK_OP(os, );
982 
983     MaDbiUtils::validateRowIds(msaDbi, msaRef.entityId, {rowId}, os);
984     CHECK_OP(os, );
985 
986     U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
987     CHECK_OP(os, );
988 
989     qint64 msaLength = msaDbi->getMsaLength(msaRef.entityId, os);
990     CHECK_OP(os, );
991     CHECK_EXT(U2Region(0, msaLength).contains(range), os.setError(tr("Invalid range: %1 %2").arg(range.startPos).arg(range.endPos())), );
992 
993     U2Region sequenceRange(row.gstart, row.gend - row.gstart);
994     QByteArray sequence = sequenceDbi->getSequenceData(row.sequenceId, sequenceRange, os);
995     CHECK_OP(os, );
996 
997     replaceCharsInRow(sequence, row.gaps, range, newChar, os);
998     CHECK_OP(os, );
999 
1000     msaDbi->updateRowContent(msaRef.entityId, rowId, sequence, row.gaps, os);
1001 }
1002 
replaceNonGapCharacter(const U2EntityRef & msaRef,char oldChar,char newChar,U2OpStatus & os)1003 QList<qint64> MsaDbiUtils::replaceNonGapCharacter(const U2EntityRef &msaRef, char oldChar, char newChar, U2OpStatus &os) {
1004     QList<qint64> modifiedRowIds;
1005     if (oldChar == newChar) {
1006         return modifiedRowIds;
1007     }
1008     QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
1009     CHECK_OP(os, modifiedRowIds);
1010 
1011     U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
1012     U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
1013 
1014     QList<qint64> rowIds = msaDbi->getOrderedRowIds(msaRef.entityId, os);
1015     CHECK_OP(os, modifiedRowIds);
1016     for (qint64 rowId : qAsConst(rowIds)) {
1017         U2MsaRow msaRow = msaDbi->getRow(msaRef.entityId, rowId, os);
1018         CHECK_OP(os, modifiedRowIds);
1019         U2Region sequenceRegion(msaRow.gstart, msaRow.gend - msaRow.gstart);
1020         QByteArray sequenceData = sequenceDbi->getSequenceData(msaRow.sequenceId, sequenceRegion, os);
1021         if (sequenceData.contains(oldChar)) {
1022             sequenceData.replace(oldChar, newChar);
1023             msaDbi->updateRowContent(msaRef.entityId, rowId, sequenceData, msaRow.gaps, os);
1024             CHECK_OP(os, modifiedRowIds);
1025             modifiedRowIds << rowId;
1026         }
1027     }
1028     return modifiedRowIds;
1029 }
1030 
keepOnlyAlphabetChars(const U2EntityRef & msaRef,const DNAAlphabet * alphabet,const QByteArray & replacementMap,U2OpStatus & os)1031 QList<qint64> MsaDbiUtils::keepOnlyAlphabetChars(const U2EntityRef &msaRef, const DNAAlphabet *alphabet, const QByteArray &replacementMap, U2OpStatus &os) {
1032     QList<qint64> modifiedRowIds;
1033     QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
1034     CHECK_OP(os, modifiedRowIds);
1035 
1036     bool hasReplacementMap = replacementMap.size() == 256;
1037     SAFE_POINT(hasReplacementMap || replacementMap.isEmpty(), "replacementMap has invalid size: " + QString::number(replacementMap.size()), modifiedRowIds);
1038 
1039     U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
1040     U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
1041 
1042     QList<qint64> rowIds = msaDbi->getOrderedRowIds(msaRef.entityId, os);
1043     CHECK_OP(os, modifiedRowIds);
1044     QByteArray alphabetChars = alphabet->getAlphabetChars();
1045     QBitArray validCharsMap = TextUtils::createBitMap(alphabetChars);
1046     char defaultChar = alphabet->getDefaultSymbol();
1047     for (qint64 rowId : qAsConst(rowIds)) {
1048         U2MsaRow msaRow = msaDbi->getRow(msaRef.entityId, rowId, os);
1049         CHECK_OP(os, modifiedRowIds);
1050         U2Region sequenceRegion(msaRow.gstart, msaRow.gend - msaRow.gstart);
1051         QByteArray sequenceData = sequenceDbi->getSequenceData(msaRow.sequenceId, sequenceRegion, os);
1052         bool isModified = false;
1053         for (int i = 0, n = sequenceData.length(); i < n; i++) {
1054             char c = sequenceData[i];
1055             if (!validCharsMap.testBit(c)) {
1056                 isModified = true;
1057                 char newChar = hasReplacementMap ? replacementMap[(quint8)c] : '\0';
1058                 sequenceData[i] = validCharsMap.testBit(newChar) ? newChar : defaultChar;
1059             }
1060         }
1061         if (isModified) {
1062             msaDbi->updateRowContent(msaRef.entityId, rowId, sequenceData, msaRow.gaps, os);
1063             CHECK_OP(os, modifiedRowIds);
1064             modifiedRowIds << rowId;
1065         }
1066     }
1067     return modifiedRowIds;
1068 }
1069 
removeEmptyRows(const U2EntityRef & msaRef,const QList<qint64> & rowIds,U2OpStatus & os)1070 QList<qint64> MsaDbiUtils::removeEmptyRows(const U2EntityRef &msaRef, const QList<qint64> &rowIds, U2OpStatus &os) {
1071     QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
1072     SAFE_POINT_OP(os, QList<qint64>());
1073     U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
1074     U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
1075 
1076     MaDbiUtils::validateRowIds(msaDbi, msaRef.entityId, rowIds, os);
1077     CHECK_OP(os, QList<qint64>());
1078 
1079     // find empty rows
1080     QList<qint64> emptyRowIds;
1081     foreach (qint64 rowId, rowIds) {
1082         U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
1083         SAFE_POINT_OP(os, QList<qint64>());
1084         U2Sequence seq = sequenceDbi->getSequenceObject(row.sequenceId, os);
1085         SAFE_POINT_OP(os, QList<qint64>());
1086         if (0 == seq.length) {
1087             emptyRowIds << row.rowId;
1088         }
1089     }
1090     if (!emptyRowIds.isEmpty()) {
1091         // remove empty rows
1092         msaDbi->removeRows(msaRef.entityId, emptyRowIds, os);
1093         SAFE_POINT_OP(os, QList<qint64>());
1094     }
1095     return emptyRowIds;
1096 }
1097 
crop(const U2EntityRef & msaRef,const QList<qint64> & rowIds,const U2Region & columnRange,U2OpStatus & os)1098 void MsaDbiUtils::crop(const U2EntityRef &msaRef, const QList<qint64> &rowIds, const U2Region &columnRange, U2OpStatus &os) {
1099     // Get the alignment.
1100     MultipleSequenceAlignmentExporter alExporter;
1101     MultipleSequenceAlignment al = alExporter.getAlignment(msaRef.dbiRef, msaRef.entityId, os);
1102 
1103     // Validate the parameters.
1104     if (columnRange.startPos < 0 || columnRange.endPos() > al->getLength()) {
1105         os.setError(tr("Invalid crop column range: %1..%2").arg(columnRange.startPos + 1).arg(columnRange.endPos()));
1106         uiLog.error(os.getError());
1107         return;
1108     }
1109     if (rowIds.isEmpty()) {
1110         os.setError(tr("List of ids to crop is empty"));
1111         uiLog.error(os.getError());
1112         return;
1113     }
1114     if (!MaDbiUtils::validateRowIds(al, rowIds)) {
1115         os.setError(tr("Invalid crop row ids"));
1116         uiLog.error(os.getError());
1117         return;
1118     }
1119     // Prepare the connection
1120     DbiConnection con(msaRef.dbiRef, os);
1121     CHECK_OP(os, );
1122 
1123     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1124     SAFE_POINT(msaDbi != nullptr, "NULL Msa Dbi!", );
1125 
1126     bool isRowLengthChanged = columnRange.length < al->getLength();
1127 
1128     // Crop or remove each row.
1129     for (int i = 0, n = al->getNumRows(); i < n; i++) {
1130         MultipleSequenceAlignmentRow row = al->getMsaRow(i)->getExplicitCopy();
1131         qint64 rowId = row->getRowId();
1132         if (!rowIds.contains(rowId)) {
1133             MsaDbiUtils::removeRow(msaRef, row->getRowId(), os);
1134             CHECK_OP(os, );
1135             continue;
1136         }
1137         if (!isRowLengthChanged) {
1138             continue;  // do not touch this column at all.
1139         }
1140         U2DataId sequenceId = row->getRowDbInfo().sequenceId;
1141         SAFE_POINT(!sequenceId.isEmpty(), "Empty sequence ID!", );
1142 
1143         // Calculate the modified row.
1144         cropCharsFromRow(row, columnRange.startPos, columnRange.length);
1145 
1146         // Put the new sequence and gap model into the database.
1147         msaDbi->updateRowContent(msaRef.entityId, rowId, row->getSequence().constSequence(), row->getGapModel(), os);
1148         CHECK_OP(os, );
1149     }
1150     if (isRowLengthChanged) {
1151         msaDbi->updateMsaLength(msaRef.entityId, columnRange.length, os);
1152     }
1153     CHECK_OP(os, );
1154 }
1155 
trim(const U2EntityRef & msaRef,U2OpStatus & os)1156 QList<qint64> MsaDbiUtils::trim(const U2EntityRef &msaRef, U2OpStatus &os) {
1157     const QList<qint64> invalidResult;
1158     // Prepare the connection
1159     DbiConnection con(msaRef.dbiRef, os);
1160     CHECK_OP(os, invalidResult);
1161 
1162     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1163     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", invalidResult);
1164 
1165     qint64 msaLength = msaDbi->getMsaObject(msaRef.entityId, os).length;
1166     CHECK_OP(os, invalidResult);
1167     SAFE_POINT(msaLength >= 0, "Msa length is negative.", invalidResult);
1168 
1169     QList<U2MsaRow> rows = msaDbi->getRows(msaRef.entityId, os);
1170     CHECK_OP(os, invalidResult);
1171     SAFE_POINT(!rows.isEmpty(), "Msa rows list is empty.", invalidResult);
1172 
1173     // Trim trailing gaps from gap model
1174     QList<U2MsaRow> modifiedRows = cutOffTrailingGaps(rows, msaLength);
1175 
1176     // Trim leading gaps, it changes length of msa, msaLength doesn`t update.
1177     const QList<U2MsaRow> cutOffStartResult = cutOffLeadingGaps(rows);
1178     // if cutting off leading gaps was performed then all rows has changed
1179 
1180     if (!cutOffStartResult.isEmpty()) {
1181         modifiedRows << cutOffStartResult;
1182     }
1183 
1184     QList<qint64> modifiedRowIds;
1185     // Update gap model
1186     foreach (U2MsaRow row, modifiedRows) {
1187         msaDbi->updateGapModel(msaRef.entityId, row.rowId, row.gaps, os);
1188         CHECK_OP(os, invalidResult);
1189         modifiedRowIds.append(row.rowId);
1190     }
1191 
1192     qint64 newMsaLen = -1;
1193     // check if rows contains modified rows whitoud leading gaps
1194     rows = msaDbi->getRows(msaRef.entityId, os);
1195     CHECK_OP(os, invalidResult);
1196     SAFE_POINT(!rows.isEmpty(), "Msa rows list is empty.", invalidResult);
1197     foreach (U2MsaRow row, rows) {
1198         if (row.length != 0) {
1199             // row is not empty
1200             if (newMsaLen == -1) {
1201                 newMsaLen = row.length;
1202             } else {
1203                 newMsaLen = qMax(newMsaLen, row.length);
1204             }
1205         }
1206     }
1207     if (newMsaLen == -1) {
1208         // alignment is empty or full of gaps
1209         newMsaLen = 0;
1210     }
1211 
1212     msaDbi->updateMsaLength(msaRef.entityId, newMsaLen, os);
1213     return modifiedRowIds;
1214 }
1215 
addRow(const U2EntityRef & msaRef,qint64 posInMsa,U2MsaRow & row,U2OpStatus & os)1216 void MsaDbiUtils::addRow(const U2EntityRef &msaRef, qint64 posInMsa, U2MsaRow &row, U2OpStatus &os) {
1217     // Validate the parameters
1218     SAFE_POINT_EXT(!row.sequenceId.isEmpty(), os.setError("Invalid sequence reference"), );
1219 
1220     // Prepare the connection
1221     DbiConnection con(msaRef.dbiRef, os);
1222     CHECK_OP(os, );
1223 
1224     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1225     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
1226 
1227     // Add the row
1228     msaDbi->addRow(msaRef.entityId, posInMsa, row, os);
1229     CHECK_OP(os, );
1230 }
1231 
removeRow(const U2EntityRef & msaRef,qint64 rowId,U2OpStatus & os)1232 void MsaDbiUtils::removeRow(const U2EntityRef &msaRef, qint64 rowId, U2OpStatus &os) {
1233     // Prepare the connection
1234     DbiConnection con(msaRef.dbiRef, os);
1235     CHECK_OP(os, );
1236 
1237     U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1238     SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
1239 
1240     // Remove the row
1241     msaDbi->removeRow(msaRef.entityId, rowId, os);
1242 }
1243 
1244 }  // namespace U2
1245