1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "MsaDbiUtils.h"
23
24 #include <U2Core/DNASequenceUtils.h>
25 #include <U2Core/MultipleSequenceAlignmentExporter.h>
26 #include <U2Core/RawDataUdrSchema.h>
27 #include <U2Core/U2AlphabetUtils.h>
28 #include <U2Core/U2AttributeDbi.h>
29 #include <U2Core/U2OpStatusUtils.h>
30 #include <U2Core/U2SafePoints.h>
31 #include <U2Core/U2SequenceDbi.h>
32
33 namespace U2 {
34
35 /** Validates 'pos' in an alignment: it must be non-negative and less than or equal to the alignment length */
splitBytesToCharsAndGaps(const QByteArray & input,QByteArray & seqBytes,QList<U2MsaGap> & gapModel)36 void MaDbiUtils::splitBytesToCharsAndGaps(const QByteArray &input, QByteArray &seqBytes, QList<U2MsaGap> &gapModel) {
37 bool previousCharIsGap = false;
38 int gapsCount = 0;
39 int gapsOffset = 0;
40
41 for (int i = 0; i < input.count(); ++i) {
42 // A char
43 if ((U2Msa::GAP_CHAR != input.at(i))) {
44 if (previousCharIsGap) {
45 U2MsaGap gap(gapsOffset, gapsCount);
46 gapModel.append(gap);
47 gapsCount = 0;
48 }
49 seqBytes.append(input.at(i));
50 previousCharIsGap = false;
51 }
52 // A gap
53 else {
54 gapsCount++;
55 // A gap before the end of the row
56 if (i < input.count() - 1) {
57 if (!previousCharIsGap) {
58 gapsOffset = i;
59 }
60 previousCharIsGap = true;
61 }
62 // A gap at the end of the row
63 else {
64 // Correct the offset if there is one gap at the end of the row
65 if (1 == gapsCount) {
66 gapsOffset = i;
67 }
68 SAFE_POINT(gapsOffset >= 0, "Negative gap offset!", );
69 SAFE_POINT(gapsCount > 0, "Non-positive gap length!", );
70 U2MsaGap gap(gapsOffset, gapsCount);
71 gapModel.append(gap);
72 }
73 }
74 }
75
76 SAFE_POINT(-1 == seqBytes.indexOf(U2Msa::GAP_CHAR), "Row sequence contains gaps!", );
77 }
78
getMaLength(const U2EntityRef & maRef,U2OpStatus & os)79 qint64 MaDbiUtils::getMaLength(const U2EntityRef &maRef, U2OpStatus &os) {
80 DbiConnection con(maRef.dbiRef, os);
81 CHECK_OP(os, -1);
82
83 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
84 SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), -1);
85
86 qint64 length = msaDbi->getMsaLength(maRef.entityId, os);
87 CHECK_OP(os, -1);
88
89 return length;
90 }
91
updateMaLength(const U2EntityRef & maRef,qint64 newLen,U2OpStatus & os)92 void MaDbiUtils::updateMaLength(const U2EntityRef &maRef, qint64 newLen, U2OpStatus &os) {
93 // Prepare the connection
94 DbiConnection con(maRef.dbiRef, os);
95 CHECK_OP(os, );
96
97 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
98 SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), );
99
100 // Update the data
101 msaDbi->updateMsaLength(maRef.entityId, newLen, os);
102 }
103
getMaAlphabet(const U2EntityRef & maRef,U2OpStatus & os)104 U2AlphabetId MaDbiUtils::getMaAlphabet(const U2EntityRef &maRef, U2OpStatus &os) {
105 DbiConnection con(maRef.dbiRef, os);
106 CHECK_OP(os, U2AlphabetId());
107
108 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
109 SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), U2AlphabetId());
110
111 U2AlphabetId alphabet = msaDbi->getMsaAlphabet(maRef.entityId, os);
112 CHECK_OP(os, U2AlphabetId());
113
114 return alphabet;
115 }
116
updateMaAlphabet(const U2EntityRef & maRef,const U2AlphabetId & alphabet,U2OpStatus & os)117 void MaDbiUtils::updateMaAlphabet(const U2EntityRef &maRef, const U2AlphabetId &alphabet, U2OpStatus &os) {
118 // Prepare the connection
119 SAFE_POINT_EXT(alphabet.isValid(), os.setError("Invalid alphabet was passed !"), );
120 DbiConnection con(maRef.dbiRef, os);
121 CHECK_OP(os, );
122
123 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
124 SAFE_POINT_EXT(nullptr != msaDbi, os.setError("NULL Msa Dbi!"), );
125
126 // Update the data
127 msaDbi->updateMsaAlphabet(maRef.entityId, alphabet, os);
128 }
129
renameMa(const U2EntityRef & maRef,const QString & newName,U2OpStatus & os)130 void MaDbiUtils::renameMa(const U2EntityRef &maRef, const QString &newName, U2OpStatus &os) {
131 if (newName.isEmpty()) {
132 os.setError(tr("Can't rename an alignment to an empty name!"));
133 }
134
135 // Prepare the connection
136 DbiConnection con(maRef.dbiRef, os);
137 CHECK_OP(os, );
138
139 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
140 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
141
142 // Update the name
143 msaDbi->updateMsaName(maRef.entityId, newName, os);
144 }
145
updateRowGapModel(const U2EntityRef & msaRef,qint64 rowId,const QList<U2MsaGap> & gaps,U2OpStatus & os)146 void MaDbiUtils::updateRowGapModel(const U2EntityRef &msaRef, qint64 rowId, const QList<U2MsaGap> &gaps, U2OpStatus &os) {
147 // Prepare the connection
148 DbiConnection con(msaRef.dbiRef, os);
149 CHECK_OP(os, );
150
151 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
152 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
153
154 // Update the data
155 msaDbi->updateGapModel(msaRef.entityId, rowId, gaps, os);
156 }
157
updateRowsOrder(const U2EntityRef & meRef,const QList<qint64> & rowsOrder,U2OpStatus & os)158 void MaDbiUtils::updateRowsOrder(const U2EntityRef &meRef, const QList<qint64> &rowsOrder, U2OpStatus &os) {
159 // Prepare the connection
160 DbiConnection con(meRef.dbiRef, os);
161 CHECK_OP(os, );
162
163 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
164 SAFE_POINT(msaDbi != nullptr, "Msa Dbi is null!", );
165
166 // Update the data
167 msaDbi->setNewRowsOrder(meRef.entityId, rowsOrder, os);
168 }
169
renameRow(const U2EntityRef & msaRef,qint64 rowId,const QString & newName,U2OpStatus & os)170 void MaDbiUtils::renameRow(const U2EntityRef &msaRef, qint64 rowId, const QString &newName, U2OpStatus &os) {
171 if (newName.isEmpty()) {
172 os.setError(tr("Can't rename a row to an empty name!"));
173 }
174
175 // Prepare the connection
176 DbiConnection con(msaRef.dbiRef, os);
177 CHECK_OP(os, );
178
179 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
180 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
181
182 // Update the row name
183 msaDbi->updateRowName(msaRef.entityId, rowId, newName, os);
184 }
185
moveRows(const U2EntityRef & msaRef,const QList<qint64> & rowsToMove,const int delta,U2OpStatus & os)186 void MaDbiUtils::moveRows(const U2EntityRef &msaRef, const QList<qint64> &rowsToMove, const int delta, U2OpStatus &os) {
187 DbiConnection con(msaRef.dbiRef, false, os);
188 CHECK_OP(os, );
189
190 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
191 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
192
193 if (delta == 0 || rowsToMove.isEmpty()) {
194 return;
195 }
196
197 QList<U2MsaRow> rows = msaDbi->getRows(msaRef.entityId, os);
198 CHECK_OP(os, );
199
200 QList<qint64> rowIds;
201 for (int i = 0; i < rows.length(); ++i) {
202 rowIds << rows[i].rowId;
203 }
204
205 QList<QPair<int, int>> from_To;
206
207 if (delta < 0) {
208 int rowIndex = rowIds.indexOf(rowsToMove.first());
209 if (rowIndex == -1) {
210 os.setError("Invalid row list");
211 return;
212 }
213 int moveToIndex = rowIndex + delta >= 0 ? rowIndex + delta : 0;
214 from_To.append(QPair<int, int>(rowIndex, moveToIndex));
215 for (int i = 1; i < rowsToMove.length(); ++i) {
216 rowIndex = rowIds.indexOf(rowsToMove[i]);
217 if (rowIndex == -1) {
218 os.setError("Invalid row list");
219 return;
220 }
221 if (rowIndex <= from_To[i - 1].first) {
222 os.setError("List of rows to move is not ordered");
223 return;
224 }
225 moveToIndex = rowIndex + delta > from_To[i - 1].second ? rowIndex + delta : from_To[i - 1].second + 1;
226 from_To.append(QPair<int, int>(rowIndex, moveToIndex));
227 }
228 } else {
229 int rowIndex = rowIds.indexOf(rowsToMove.last());
230 if (rowIndex == -1) {
231 os.setError("Invalid row list");
232 return;
233 }
234 int moveToIndex = rowIndex + delta < rowIds.length() ? rowIndex + delta : rowIds.length() - 1;
235 from_To.append(QPair<int, int>(rowIndex, moveToIndex));
236 for (int i = 1; i < rowsToMove.length(); ++i) {
237 rowIndex = rowIds.indexOf(rowsToMove[rowsToMove.length() - i - 1]);
238 if (rowIndex == -1) {
239 os.setError("Invalid row list");
240 return;
241 }
242 if (rowIndex >= from_To[i - 1].first) {
243 os.setError("List of rows to move is not ordered");
244 return;
245 }
246 moveToIndex = rowIndex + delta < from_To[i - 1].second ? rowIndex + delta : from_To[i - 1].second - 1;
247 from_To.append(QPair<int, int>(rowIndex, moveToIndex));
248 }
249 }
250 QPair<int, int> coords;
251 foreach (coords, from_To) {
252 rowIds.move(coords.first, coords.second);
253 }
254 msaDbi->setNewRowsOrder(msaRef.entityId, rowIds, os);
255 CHECK_OP(os, );
256 }
257
getStartAndEndSequencePositions(const QByteArray & seq,const QList<U2MsaGap> & gaps,qint64 pos,qint64 count,qint64 & startPosInSeq,qint64 & endPosInSeq)258 void MaDbiUtils::getStartAndEndSequencePositions(const QByteArray &seq, const QList<U2MsaGap> &gaps, qint64 pos, qint64 count, qint64 &startPosInSeq, qint64 &endPosInSeq) {
259 int rowLengthWithoutTrailingGap = MsaRowUtils::getRowLengthWithoutTrailing(seq, gaps);
260 SAFE_POINT(pos < rowLengthWithoutTrailingGap, "Incorrect position!", );
261
262 // Remove chars from the sequence
263 // Calculate start position in the sequence
264 if (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, pos)) {
265 int i = 1;
266 while (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, pos + i)) {
267 if (MsaRowUtils::getRowLength(seq, gaps) == pos + i) {
268 break;
269 }
270 i++;
271 }
272 startPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), pos + i);
273 } else {
274 startPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), pos);
275 }
276
277 // Calculate end position in the sequence
278 int endRegionPos = pos + count; // non-inclusive
279
280 if (endRegionPos > rowLengthWithoutTrailingGap) {
281 endRegionPos = rowLengthWithoutTrailingGap;
282 }
283
284 if (endRegionPos == rowLengthWithoutTrailingGap) {
285 endPosInSeq = seq.length();
286 } else {
287 if (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, endRegionPos)) {
288 int i = 1;
289 while (U2Msa::GAP_CHAR == MsaRowUtils::charAt(seq, gaps, endRegionPos + i)) {
290 if (MsaRowUtils::getRowLength(seq, gaps) == endRegionPos + i) {
291 break;
292 }
293 i++;
294 }
295 endPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), endRegionPos + i);
296 } else {
297 endPosInSeq = MsaRowUtils::getUngappedPosition(gaps, seq.length(), endRegionPos);
298 }
299 }
300 }
301
getCheckedConnection(const U2DbiRef & dbiRef,U2OpStatus & os)302 DbiConnection *MaDbiUtils::getCheckedConnection(const U2DbiRef &dbiRef, U2OpStatus &os) {
303 QScopedPointer<DbiConnection> con(new DbiConnection(dbiRef, os));
304 CHECK_OP(os, nullptr);
305
306 if (nullptr == con->dbi) {
307 os.setError("NULL root dbi");
308 return nullptr;
309 }
310
311 if (nullptr == con->dbi->getMsaDbi()) {
312 os.setError("NULL MSA dbi");
313 return nullptr;
314 }
315
316 if (con->dbi->getSequenceDbi() == nullptr) {
317 os.setError("NULL sequence dbi");
318 return nullptr;
319 }
320 return con.take();
321 }
322
323 /** Validates that all 'rowIds' contains in the alignment rows */
validateRowIds(const MultipleSequenceAlignment & al,const QList<qint64> & rowIds)324 bool MaDbiUtils::validateRowIds(const MultipleSequenceAlignment &al, const QList<qint64> &rowIds) {
325 QSet<qint64> alRowIds = al->getRowsIds().toSet();
326 for (qint64 rowId : qAsConst(rowIds)) {
327 if (!alRowIds.contains(rowId)) {
328 coreLog.trace(QString("No row ID '%1' in '%2' alignment!").arg(rowId).arg(al->getName()));
329 return false;
330 }
331 }
332 return true;
333 }
334
validateRowIds(U2MsaDbi * msaDbi,const U2DataId & msaId,const QList<qint64> & rowIds,U2OpStatus & os)335 void MaDbiUtils::validateRowIds(U2MsaDbi *msaDbi, const U2DataId &msaId, const QList<qint64> &rowIds, U2OpStatus &os) {
336 QList<U2MsaRow> allRows = msaDbi->getRows(msaId, os);
337 CHECK_OP(os, );
338 QList<qint64> allRowIds;
339 foreach (const U2MsaRow &row, allRows) {
340 allRowIds << row.rowId;
341 }
342 foreach (qint64 rowId, rowIds) {
343 if (!allRowIds.contains(rowId)) {
344 os.setError(QString("No row ID '%1' in an alignment!").arg(rowId));
345 return;
346 }
347 }
348 }
349
350 /////////////////////////////////////////////////////////////////
351 // Helper-methods for additional calculations
352
calculateGapModelAfterInsert(QList<U2MsaGap> & gapModel,qint64 pos,qint64 count)353 void MsaDbiUtils::calculateGapModelAfterInsert(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count) {
354 SAFE_POINT(pos >= 0, QString("Invalid position '%1'!").arg(pos), );
355 SAFE_POINT(count > 0, QString("Invalid characters count '%1'!").arg(count), );
356
357 // There are no gaps yet
358 if (gapModel.isEmpty()) {
359 U2MsaGap gap(pos, count);
360 gapModel.append(gap);
361 return;
362 }
363 // There are gaps in the row
364 else {
365 // Insert gaps to the row beginning
366 if (0 == pos) {
367 U2MsaGap &firstGap = gapModel[0];
368 if (0 == firstGap.offset) {
369 firstGap.gap += count;
370 } else {
371 U2MsaGap beginningGap(0, count);
372 gapModel.insert(0, beginningGap);
373 }
374
375 // Shift other gaps
376 if (gapModel.count() > 1) {
377 for (int i = 1; i < gapModel.count(); ++i) {
378 qint64 newOffset = gapModel[i].offset + count;
379 gapModel[i].offset = newOffset;
380 }
381 }
382 }
383 // Gaps are inserted to the middle of the row
384 else {
385 // A gap is near
386 if (gapInPosition(gapModel, pos) ||
387 gapInPosition(gapModel, pos - 1)) {
388 // Find the gaps and append 'count' gaps to it
389 // Shift all gaps that further in the row
390 for (int i = 0; i < gapModel.count(); ++i) {
391 if (pos >= gapModel[i].offset) {
392 if (pos <= gapModel[i].offset + gapModel[i].gap) {
393 gapModel[i].gap += count;
394 }
395 } else {
396 gapModel[i].offset += count;
397 }
398 }
399 }
400 // Insert between chars
401 else {
402 bool found = false;
403
404 int indexGreaterGaps = 0;
405 for (int i = 0; i < gapModel.count(); ++i) {
406 if (pos > gapModel[i].offset + gapModel[i].gap) {
407 continue;
408 } else {
409 found = true;
410 U2MsaGap newGap(pos, count);
411 gapModel.insert(i, newGap);
412 indexGreaterGaps = i;
413 break;
414 }
415 }
416
417 // If found somewhere between existent gaps
418 if (found) {
419 // Shift further gaps
420 for (int i = indexGreaterGaps + 1; i < gapModel.count(); ++i) {
421 gapModel[i].offset += count;
422 }
423 }
424 // This is the last gap
425 else {
426 U2MsaGap newGap(pos, count);
427 gapModel.append(newGap);
428 }
429 }
430 }
431 }
432 }
433
cutOffLeadingGaps(QList<U2MsaRow> & rows)434 QList<U2MsaRow> MsaDbiUtils::cutOffLeadingGaps(QList<U2MsaRow> &rows) {
435 qint64 leadingGapsToRemove = LLONG_MAX;
436 for (qint64 i = 0; i < rows.length(); ++i) {
437 // If some rows haven't any leading gaps,
438 // If some rows haven't any gaps
439 // If some rows first gap's offset isn't zero
440 // return
441 if (leadingGapsToRemove == 0 || rows[i].gaps.isEmpty() || rows[i].gaps.first().offset != 0) {
442 leadingGapsToRemove = 0;
443 return QList<U2MsaRow>();
444 }
445 leadingGapsToRemove = qMin(leadingGapsToRemove, rows[i].gaps.first().gap);
446 }
447
448 // If there is any leading gaps after all, they should be removed.
449 if (leadingGapsToRemove != 0) {
450 for (qint64 i = 0; i < rows.length(); ++i) {
451 calculateGapModelAfterRemove(rows[i].gaps, 0, leadingGapsToRemove);
452 }
453 }
454 return rows;
455 }
456
cutOffTrailingGaps(QList<U2MsaRow> & rows,const qint64 msaLength)457 QList<U2MsaRow> MsaDbiUtils::cutOffTrailingGaps(QList<U2MsaRow> &rows, const qint64 msaLength) {
458 QList<U2MsaRow> affectedRows;
459 for (QList<U2MsaRow>::iterator rowIt = rows.begin(); rowIt < rows.end(); ++rowIt) {
460 // If there are no gaps in the row, skip this row.
461 if (rowIt->gaps.isEmpty()) {
462 continue;
463 }
464
465 // Delete all gaps with offset after msa length.
466 for (int gapReverseIndex = rowIt->gaps.size() - 1;
467 gapReverseIndex >= 0 && gapReverseIndex < rowIt->gaps.size() && rowIt->gaps.at(gapReverseIndex).offset > msaLength - 1;
468 --gapReverseIndex) {
469 rowIt->gaps.removeAt(gapReverseIndex++);
470 affectedRows << *rowIt;
471 }
472
473 // Cut off all gaps with offset before msa length and end after msa length
474 if (!rowIt->gaps.isEmpty() && rowIt->gaps.last().gap + rowIt->gaps.last().offset > msaLength) {
475 rowIt->gaps.last().gap = msaLength - rowIt->gaps.last().offset;
476 affectedRows << *rowIt;
477 }
478 }
479 return affectedRows;
480 }
481
calculateGapModelAfterRemove(QList<U2MsaGap> & gapModel,qint64 pos,qint64 count)482 void MsaDbiUtils::calculateGapModelAfterRemove(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count) {
483 QList<U2MsaGap> newGapModel;
484 qint64 endRegionPos = pos + count; // non-inclusive
485 foreach (U2MsaGap gap, gapModel) {
486 qint64 gapEnd = gap.offset + gap.gap;
487 if (gapEnd < pos) {
488 newGapModel << gap;
489 } else if (gapEnd <= endRegionPos) {
490 if (gap.offset < pos) {
491 gap.gap = pos - gap.offset;
492 newGapModel << gap;
493 }
494 // Otherwise just remove the gap (do not write to the new gap model)
495 } else {
496 if (gap.offset < pos) {
497 gap.gap -= count;
498 SAFE_POINT(gap.gap >= 0, "Non-positive gap length!", );
499 newGapModel << gap;
500 } else if (gap.offset < endRegionPos) {
501 gap.gap = gapEnd - endRegionPos;
502 gap.offset = pos;
503 SAFE_POINT(gap.gap > 0, "Non-positive gap length!", );
504 SAFE_POINT(gap.offset >= 0, "Negative gap offset!", );
505 newGapModel << gap;
506 } else {
507 // Shift the gap
508 gap.offset -= count;
509 SAFE_POINT(gap.offset >= 0, "Negative gap offset!", );
510 newGapModel << gap;
511 }
512 }
513 }
514
515 gapModel = newGapModel;
516 }
517
calculateGapsLength(const QList<U2MsaGap> & gapModel)518 qint64 MsaDbiUtils::calculateGapsLength(const QList<U2MsaGap> &gapModel) {
519 qint64 length = 0;
520 foreach (U2MsaGap elt, gapModel) {
521 length += elt.gap;
522 }
523 return length;
524 }
525
calculateRowLength(const U2MsaRow & row)526 qint64 MsaDbiUtils::calculateRowLength(const U2MsaRow &row) {
527 qint64 seqLength = row.gend - row.gstart;
528 qint64 gapsLength = calculateGapsLength(row.gaps);
529 return seqLength + gapsLength;
530 }
531
mergeConsecutiveGaps(QList<U2MsaGap> & gapModel)532 void MsaDbiUtils::mergeConsecutiveGaps(QList<U2MsaGap> &gapModel) {
533 QList<U2MsaGap> newGapModel;
534 if (gapModel.isEmpty()) {
535 return;
536 }
537
538 newGapModel << gapModel[0];
539 int indexInNewGapModel = 0;
540 for (int i = 1; i < gapModel.count(); ++i) {
541 int previousGapEnd = newGapModel[indexInNewGapModel].offset +
542 newGapModel[indexInNewGapModel].gap - 1;
543 int currectGapStart = gapModel[i].offset;
544 SAFE_POINT(currectGapStart > previousGapEnd,
545 "Incorrect gap model during merging consecutive gaps!", );
546 if (currectGapStart == previousGapEnd + 1) {
547 // Merge gaps
548 qint64 newGapLength = newGapModel[indexInNewGapModel].gap + gapModel[i].gap;
549 SAFE_POINT(newGapLength > 0, "Non-positive gap length!", )
550 newGapModel[indexInNewGapModel].gap = newGapLength;
551 } else {
552 // Add the gap to the list
553 newGapModel << gapModel[i];
554 indexInNewGapModel++;
555 }
556 }
557 gapModel = newGapModel;
558 }
559
removeCharsFromRow(QByteArray & seq,QList<U2MsaGap> & gaps,qint64 pos,qint64 count)560 void MsaDbiUtils::removeCharsFromRow(QByteArray &seq, QList<U2MsaGap> &gaps, qint64 pos, qint64 count) {
561 SAFE_POINT(pos >= 0, "Incorrect position!", );
562 SAFE_POINT(count > 0, "Incorrect characters count!", );
563
564 if (pos >= MsaRowUtils::getRowLength(seq, gaps)) {
565 return;
566 }
567
568 if (pos < MsaRowUtils::getRowLengthWithoutTrailing(seq, gaps)) {
569 qint64 startPosInSeq = -1;
570 qint64 endPosInSeq = -1;
571 MaDbiUtils::getStartAndEndSequencePositions(seq, gaps, pos, count, startPosInSeq, endPosInSeq);
572
573 if ((startPosInSeq < endPosInSeq) && (-1 != startPosInSeq) && (-1 != endPosInSeq)) {
574 U2OpStatus2Log os;
575 DNASequenceUtils::removeChars(seq, startPosInSeq, endPosInSeq, os);
576 SAFE_POINT_OP(os, );
577 }
578 }
579
580 calculateGapModelAfterRemove(gaps, pos, count);
581 mergeConsecutiveGaps(gaps);
582 }
583
calculateGapModelAfterReplaceChar(QList<U2MsaGap> & gapModel,qint64 pos)584 void MaDbiUtils::calculateGapModelAfterReplaceChar(QList<U2MsaGap> &gapModel, qint64 pos) {
585 SAFE_POINT(pos >= 0, QString("Invalid position '%1'!").arg(pos), );
586
587 for (int i = 0; i < gapModel.count(); ++i) {
588 U2MsaGap &curGap = gapModel[i];
589 qint64 gapStart = curGap.offset;
590 qint64 gapEnd = gapStart + curGap.gap;
591 if (pos >= gapStart && pos <= gapEnd) {
592 if (pos == gapStart) {
593 if (curGap.gap == 1) {
594 gapModel.removeAt(i);
595 } else {
596 curGap.offset++;
597 curGap.gap--;
598 }
599 } else if (pos == gapEnd - 1) {
600 if (curGap.gap == 1) {
601 gapModel.removeAt(i);
602 } else {
603 curGap.gap--;
604 }
605 } else {
606 gapModel.removeAt(i);
607 U2MsaGap firstGap(gapStart, pos - gapStart);
608 U2MsaGap secondGap(pos + 1, gapEnd - pos - 1);
609 gapModel.insert(i, secondGap);
610 gapModel.insert(i, firstGap);
611 }
612 break;
613 }
614 }
615 }
616
replaceCharsInRow(QByteArray & sequence,QList<U2MsaGap> & gaps,const U2Region & range,char newChar,U2OpStatus & os)617 void MsaDbiUtils::replaceCharsInRow(QByteArray &sequence, QList<U2MsaGap> &gaps, const U2Region &range, char newChar, U2OpStatus &os) {
618 for (qint64 pos = range.startPos; pos < range.endPos(); pos++) {
619 qint64 lengthWithNoTrail = MsaRowUtils::getRowLengthWithoutTrailing(sequence, gaps);
620 if (pos < lengthWithNoTrail) {
621 qint64 posInSeq = -1;
622 qint64 endPosInSeq = -1;
623 MaDbiUtils::getStartAndEndSequencePositions(sequence, gaps, pos, 1, posInSeq, endPosInSeq);
624 if (posInSeq >= 0 && endPosInSeq > posInSeq) {
625 DNASequenceUtils::replaceChars(sequence, posInSeq, QByteArray(1, newChar), os);
626 CHECK_OP(os, );
627 } else {
628 DNASequenceUtils::insertChars(sequence, posInSeq, QByteArray(1, newChar), os);
629 CHECK_OP(os, );
630 MaDbiUtils::calculateGapModelAfterReplaceChar(gaps, pos);
631 }
632 } else {
633 sequence.append(newChar);
634 if (pos != lengthWithNoTrail) {
635 gaps.append(U2MsaGap(lengthWithNoTrail, pos - lengthWithNoTrail));
636 }
637 }
638 }
639 }
640
cropCharsFromRow(MultipleSequenceAlignmentRow & alRow,qint64 pos,qint64 count)641 void MsaDbiUtils::cropCharsFromRow(MultipleSequenceAlignmentRow &alRow, qint64 pos, qint64 count) {
642 SAFE_POINT(pos >= 0, "Incorrect position!", );
643 SAFE_POINT(count > 0, "Incorrect characters count!", );
644
645 // Change the sequence
646 qint64 initialRowLength = alRow->getRowLength();
647 qint64 initialSeqLength = alRow->getUngappedLength();
648 DNASequence modifiedSeq = alRow->getSequence();
649
650 if (pos >= alRow->getRowLengthWithoutTrailing()) {
651 DNASequenceUtils::makeEmpty(modifiedSeq);
652 } else {
653 qint64 startPosInSeq = -1;
654 qint64 endPosInSeq = -1;
655 MaDbiUtils::getStartAndEndSequencePositions(alRow->getSequence().seq, alRow->getGapModel(), pos, count, startPosInSeq, endPosInSeq);
656
657 // Remove inside a gap
658 if ((startPosInSeq <= endPosInSeq) && (-1 != startPosInSeq) && (-1 != endPosInSeq)) {
659 U2OpStatus2Log os;
660 if (endPosInSeq < initialSeqLength) {
661 DNASequenceUtils::removeChars(modifiedSeq, endPosInSeq, initialSeqLength, os);
662 SAFE_POINT_OP(os, );
663 }
664
665 if (startPosInSeq > 0) {
666 DNASequenceUtils::removeChars(modifiedSeq, 0, startPosInSeq, os);
667 SAFE_POINT_OP(os, );
668 }
669 }
670 }
671
672 // Change the gap model
673 QList<U2MsaGap> gapModel = alRow->getGapModel();
674 if (pos + count < initialRowLength) {
675 calculateGapModelAfterRemove(gapModel, pos + count, initialRowLength - pos - count);
676 }
677
678 if (pos > 0) {
679 calculateGapModelAfterRemove(gapModel, 0, pos);
680 }
681 U2OpStatusImpl os;
682 alRow->setRowContent(modifiedSeq, gapModel, os);
683 CHECK_OP(os, );
684 }
685
686 /** Returns "true" if there is a gap on position "pos" */
gapInPosition(const QList<U2MsaGap> & gapModel,qint64 pos)687 bool MsaDbiUtils::gapInPosition(const QList<U2MsaGap> &gapModel, qint64 pos) {
688 foreach (const U2MsaGap &gap, gapModel) {
689 if (gap.offset + gap.gap - 1 < pos) {
690 continue;
691 }
692 if (gap.offset > pos) {
693 return false;
694 }
695 return true;
696 }
697 return false;
698 }
699
700 /////////////////////////////////////////////////////////////////
701 // MSA DBI Utilities
702
updateMsa(const U2EntityRef & msaRef,const MultipleSequenceAlignment & ma,U2OpStatus & os)703 void MsaDbiUtils::updateMsa(const U2EntityRef &msaRef, const MultipleSequenceAlignment &ma, U2OpStatus &os) {
704 // Prepare the connection
705 DbiConnection con(msaRef.dbiRef, os);
706 CHECK_OP(os, );
707
708 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
709 SAFE_POINT(msaDbi != nullptr, "NULL Msa Dbi!", );
710
711 U2SequenceDbi *seqDbi = con.dbi->getSequenceDbi();
712 SAFE_POINT(seqDbi != nullptr, "NULL Sequence Dbi!", );
713
714 U2AttributeDbi *attrDbi = con.dbi->getAttributeDbi();
715 SAFE_POINT(attrDbi != nullptr, "NULL Attribute Dbi!", );
716
717 //// UPDATE MSA OBJECT
718 const DNAAlphabet *originalMaAlphabet = ma->getAlphabet();
719 SAFE_POINT(originalMaAlphabet != nullptr, "The alignment alphabet is NULL!", );
720
721 U2Msa msaObj;
722 msaObj.id = msaRef.entityId;
723 msaObj.visualName = ma->getName();
724 msaObj.alphabet.id = originalMaAlphabet->getId();
725 msaObj.length = ma->getLength();
726
727 msaDbi->updateMsaName(msaRef.entityId, ma->getName(), os);
728 CHECK_OP(os, );
729
730 msaDbi->updateMsaAlphabet(msaRef.entityId, originalMaAlphabet->getId(), os);
731 CHECK_OP(os, );
732
733 msaDbi->updateMsaLength(msaRef.entityId, ma->getLength(), os);
734 CHECK_OP(os, );
735
736 //// UPDATE ROWS AND SEQUENCES
737 // Get rows that are currently stored in the database
738 QList<U2MsaRow> currentRows = msaDbi->getRows(msaRef.entityId, os);
739 QList<qint64> currentRowIds;
740 CHECK_OP(os, );
741
742 QList<qint64> newRowsIds = ma->getRowsIds();
743 QList<qint64> eliminatedRows;
744
745 for (const U2MsaRow ¤tRow: qAsConst(currentRows)) {
746 currentRowIds.append(currentRow.rowId);
747
748 // Update data for rows with the same row and sequence IDs
749 if (newRowsIds.contains(currentRow.rowId)) {
750 // Update sequence and row info
751 U2MsaRow newRow = ma->getMsaRowByRowId(currentRow.rowId, os)->getRowDbInfo();
752 CHECK_OP(os, );
753
754 if (newRow.sequenceId != currentRow.sequenceId) {
755 // Kill the row from the current alignment, it is incorrect. New row with this ID will be created later.
756 MsaDbiUtils::removeRow(msaRef, currentRow.rowId, os);
757 CHECK_OP(os, );
758
759 currentRowIds.removeOne(currentRow.rowId);
760 continue;
761 }
762
763 DNASequence sequence = ma->getMsaRowByRowId(newRow.rowId, os)->getSequence();
764 CHECK_OP(os, );
765
766 msaDbi->updateRowName(msaRef.entityId, newRow.rowId, sequence.getName(), os);
767 CHECK_OP(os, );
768
769 msaDbi->updateRowContent(msaRef.entityId, newRow.rowId, sequence.seq, newRow.gaps, os);
770 CHECK_OP(os, );
771 } else {
772 // Remove rows that are no more present in the alignment
773 eliminatedRows.append(currentRow.rowId);
774 }
775 }
776
777 msaDbi->removeRows(msaRef.entityId, eliminatedRows, os);
778 CHECK_OP(os, );
779
780 // Add rows that are stored in memory, but are not present in the database,
781 // remember the rows order
782 QList<qint64> rowsOrder;
783 for (int i = 0, n = ma->getNumRows(); i < n; ++i) {
784 const MultipleSequenceAlignmentRow alRow = ma->getMsaRow(i);
785 U2MsaRow row = alRow->getRowDbInfo();
786
787 if (row.sequenceId.isEmpty() || !currentRowIds.contains(row.rowId)) {
788 // Import the sequence
789 DNASequence rowSeq = alRow->getSequence();
790 U2Sequence sequence = U2Sequence();
791 sequence.visualName = rowSeq.getName();
792 sequence.circular = rowSeq.circular;
793 sequence.length = rowSeq.length();
794
795 const DNAAlphabet *rowSequenceAlphabet = rowSeq.alphabet;
796 if (rowSequenceAlphabet == nullptr) {
797 rowSequenceAlphabet = U2AlphabetUtils::findBestAlphabet(rowSeq.constData(), rowSeq.length());
798 }
799 SAFE_POINT(rowSequenceAlphabet != nullptr, "Failed to get alphabet for a sequence!", );
800 sequence.alphabet.id = rowSequenceAlphabet->getId();
801
802 seqDbi->createSequenceObject(sequence, "", os);
803 CHECK_OP(os, );
804
805 QVariantMap hints;
806 const QByteArray &seqData = rowSeq.constSequence();
807 seqDbi->updateSequenceData(sequence.id, U2_REGION_MAX, seqData, hints, os);
808 CHECK_OP(os, );
809
810 // Create the row
811 row.rowId = -1; // set the row ID automatically
812 row.sequenceId = sequence.id;
813 row.gstart = 0;
814 row.gend = sequence.length;
815 row.gaps = alRow->getGapModel();
816 MsaDbiUtils::addRow(msaRef, -1, row, os);
817 CHECK_OP(os, );
818 }
819 rowsOrder.append(row.rowId);
820 }
821
822 //// UPDATE ROWS POSITIONS
823 msaDbi->setNewRowsOrder(msaRef.entityId, rowsOrder, os);
824
825 //// UPDATE MALIGNMENT ATTRIBUTES
826 QVariantMap alInfo = ma->getInfo();
827
828 foreach (QString key, alInfo.keys()) {
829 QString val = alInfo.value(key).value<QString>();
830 U2StringAttribute attr(msaRef.entityId, key, val);
831
832 attrDbi->createStringAttribute(attr, os);
833 CHECK_OP(os, );
834 }
835 }
836
updateRowContent(const U2EntityRef & msaRef,qint64 rowId,const QByteArray & seqBytes,const QList<U2MsaGap> & gaps,U2OpStatus & os)837 void MsaDbiUtils::updateRowContent(const U2EntityRef &msaRef, qint64 rowId, const QByteArray &seqBytes, const QList<U2MsaGap> &gaps, U2OpStatus &os) {
838 // Prepare the connection
839 DbiConnection con(msaRef.dbiRef, os);
840 CHECK_OP(os, );
841
842 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
843 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
844
845 // Update the data
846 msaDbi->updateRowContent(msaRef.entityId, rowId, seqBytes, gaps, os);
847 }
848
insertGaps(const U2EntityRef & msaRef,const QList<qint64> & rowIds,qint64 pos,qint64 count,U2OpStatus & os,bool collapseTrailingGaps)849 void MsaDbiUtils::insertGaps(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os, bool collapseTrailingGaps) {
850 // Prepare the connection
851 DbiConnection con(msaRef.dbiRef, os);
852 CHECK_OP(os, );
853
854 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
855 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
856
857 // Get the MSA properties
858 const U2Msa msaObj = msaDbi->getMsaObject(msaRef.entityId, os);
859 const qint64 alLength = msaObj.length;
860
861 // Validate the position
862 if (pos < 0 || pos > alLength) {
863 coreLog.trace(QString("Invalid position '%1' in '%2' alignment!").arg(pos).arg(msaObj.visualName));
864 os.setError(tr("Failed to insert gaps into an alignment!"));
865 return;
866 }
867
868 // Validate the count of gaps
869 if (count <= 0) {
870 coreLog.trace(QString("Invalid value of characters count '%1'!").arg(count));
871 os.setError(tr("Failed to insert gaps into an alignment!"));
872 return;
873 }
874
875 // Insert gaps into rows
876 QList<U2MsaRow> rows;
877 foreach (qint64 rowId, rowIds) {
878 const U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
879 CHECK_OP(os, );
880
881 rows.append(row);
882 }
883
884 int trailingGapsColumns = count;
885 foreach (U2MsaRow row, rows) {
886 // Calculate the new gap model
887 calculateGapModelAfterInsert(row.gaps, pos, count);
888
889 // Trim trailing gap (if any)
890 qint64 seqLength = row.gend - row.gstart;
891 qint64 gapsLength = 0;
892 int diff = alLength - row.length;
893 trailingGapsColumns = diff < trailingGapsColumns ? diff : trailingGapsColumns;
894
895 for (int i = 0, n = row.gaps.count(); i < n; ++i) {
896 const U2MsaGap &gap = row.gaps[i];
897 if ((i == n - 1) && (gap.offset >= seqLength + gapsLength)) {
898 row.gaps.removeAt(i);
899 break;
900 }
901 gapsLength += gap.gap;
902 }
903
904 // Put the new gap model into the database
905 msaDbi->updateGapModel(msaRef.entityId, row.rowId, row.gaps, os);
906 CHECK_OP(os, );
907 }
908
909 if (collapseTrailingGaps) {
910 qint64 enlargeAlignmentLength = 0;
911 foreach (qint64 rowId, rowIds) {
912 enlargeAlignmentLength = qMax(enlargeAlignmentLength, msaDbi->getRow(msaRef.entityId, rowId, os).length);
913 CHECK_OP(os, );
914 }
915 if (msaObj.length < enlargeAlignmentLength) {
916 msaDbi->updateMsaLength(msaRef.entityId, enlargeAlignmentLength, os);
917 CHECK_OP(os, );
918 }
919 } else {
920 int newLength = msaObj.length + count - trailingGapsColumns;
921 if (msaObj.length < newLength) {
922 msaDbi->updateMsaLength(msaRef.entityId, msaObj.length + count, os);
923 }
924 CHECK_OP(os, );
925 }
926 }
927
removeRegion(const U2EntityRef & msaRef,const QList<qint64> & rowIds,qint64 pos,qint64 count,U2OpStatus & os)928 void MsaDbiUtils::removeRegion(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os) {
929 // Check parameters
930 CHECK_EXT(pos >= 0, os.setError(QString("Negative MSA pos: %1").arg(pos)), );
931 CHECK_EXT(count > 0, os.setError(QString("Wrong MSA base count: %1").arg(count)), );
932
933 // Prepare the connection
934 QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
935 SAFE_POINT_OP(os, );
936 U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
937 U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
938
939 U2Msa msa = msaDbi->getMsaObject(msaRef.entityId, os);
940 SAFE_POINT_OP(os, );
941
942 MaDbiUtils::validateRowIds(msaDbi, msaRef.entityId, rowIds, os);
943 CHECK_OP(os, );
944
945 qint64 rowNum = msaDbi->getNumOfRows(msaRef.entityId, os);
946 bool keepAlignmentLength = true;
947 if (rowNum == rowIds.size()) {
948 keepAlignmentLength = false;
949 }
950
951 // Remove region for each row from the list
952 foreach (qint64 rowId, rowIds) {
953 U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
954 SAFE_POINT_OP(os, );
955
956 U2Region seqReg(row.gstart, row.gend - row.gstart);
957 QByteArray seq = sequenceDbi->getSequenceData(row.sequenceId, seqReg, os);
958 SAFE_POINT_OP(os, );
959
960 // Calculate the modified row
961 removeCharsFromRow(seq, row.gaps, pos, count);
962
963 msaDbi->updateRowContent(msaRef.entityId, rowId, seq, row.gaps, os);
964 SAFE_POINT_OP(os, );
965 }
966 if (!keepAlignmentLength) {
967 msaDbi->updateMsaLength(msaRef.entityId, msa.length - count, os);
968 }
969 }
970
replaceCharactersInRow(const U2EntityRef & msaRef,qint64 rowId,const U2Region & range,char newChar,U2OpStatus & os)971 void MsaDbiUtils::replaceCharactersInRow(const U2EntityRef &msaRef, qint64 rowId, const U2Region &range, char newChar, U2OpStatus &os) {
972 SAFE_POINT_EXT(newChar != U2Msa::GAP_CHAR, os.setError("Can't use GAP for replacement!"), );
973
974 QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
975 CHECK_OP(os, );
976
977 U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
978 U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
979
980 U2Msa msa = msaDbi->getMsaObject(msaRef.entityId, os);
981 CHECK_OP(os, );
982
983 MaDbiUtils::validateRowIds(msaDbi, msaRef.entityId, {rowId}, os);
984 CHECK_OP(os, );
985
986 U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
987 CHECK_OP(os, );
988
989 qint64 msaLength = msaDbi->getMsaLength(msaRef.entityId, os);
990 CHECK_OP(os, );
991 CHECK_EXT(U2Region(0, msaLength).contains(range), os.setError(tr("Invalid range: %1 %2").arg(range.startPos).arg(range.endPos())), );
992
993 U2Region sequenceRange(row.gstart, row.gend - row.gstart);
994 QByteArray sequence = sequenceDbi->getSequenceData(row.sequenceId, sequenceRange, os);
995 CHECK_OP(os, );
996
997 replaceCharsInRow(sequence, row.gaps, range, newChar, os);
998 CHECK_OP(os, );
999
1000 msaDbi->updateRowContent(msaRef.entityId, rowId, sequence, row.gaps, os);
1001 }
1002
replaceNonGapCharacter(const U2EntityRef & msaRef,char oldChar,char newChar,U2OpStatus & os)1003 QList<qint64> MsaDbiUtils::replaceNonGapCharacter(const U2EntityRef &msaRef, char oldChar, char newChar, U2OpStatus &os) {
1004 QList<qint64> modifiedRowIds;
1005 if (oldChar == newChar) {
1006 return modifiedRowIds;
1007 }
1008 QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
1009 CHECK_OP(os, modifiedRowIds);
1010
1011 U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
1012 U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
1013
1014 QList<qint64> rowIds = msaDbi->getOrderedRowIds(msaRef.entityId, os);
1015 CHECK_OP(os, modifiedRowIds);
1016 for (qint64 rowId : qAsConst(rowIds)) {
1017 U2MsaRow msaRow = msaDbi->getRow(msaRef.entityId, rowId, os);
1018 CHECK_OP(os, modifiedRowIds);
1019 U2Region sequenceRegion(msaRow.gstart, msaRow.gend - msaRow.gstart);
1020 QByteArray sequenceData = sequenceDbi->getSequenceData(msaRow.sequenceId, sequenceRegion, os);
1021 if (sequenceData.contains(oldChar)) {
1022 sequenceData.replace(oldChar, newChar);
1023 msaDbi->updateRowContent(msaRef.entityId, rowId, sequenceData, msaRow.gaps, os);
1024 CHECK_OP(os, modifiedRowIds);
1025 modifiedRowIds << rowId;
1026 }
1027 }
1028 return modifiedRowIds;
1029 }
1030
keepOnlyAlphabetChars(const U2EntityRef & msaRef,const DNAAlphabet * alphabet,const QByteArray & replacementMap,U2OpStatus & os)1031 QList<qint64> MsaDbiUtils::keepOnlyAlphabetChars(const U2EntityRef &msaRef, const DNAAlphabet *alphabet, const QByteArray &replacementMap, U2OpStatus &os) {
1032 QList<qint64> modifiedRowIds;
1033 QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
1034 CHECK_OP(os, modifiedRowIds);
1035
1036 bool hasReplacementMap = replacementMap.size() == 256;
1037 SAFE_POINT(hasReplacementMap || replacementMap.isEmpty(), "replacementMap has invalid size: " + QString::number(replacementMap.size()), modifiedRowIds);
1038
1039 U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
1040 U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
1041
1042 QList<qint64> rowIds = msaDbi->getOrderedRowIds(msaRef.entityId, os);
1043 CHECK_OP(os, modifiedRowIds);
1044 QByteArray alphabetChars = alphabet->getAlphabetChars();
1045 QBitArray validCharsMap = TextUtils::createBitMap(alphabetChars);
1046 char defaultChar = alphabet->getDefaultSymbol();
1047 for (qint64 rowId : qAsConst(rowIds)) {
1048 U2MsaRow msaRow = msaDbi->getRow(msaRef.entityId, rowId, os);
1049 CHECK_OP(os, modifiedRowIds);
1050 U2Region sequenceRegion(msaRow.gstart, msaRow.gend - msaRow.gstart);
1051 QByteArray sequenceData = sequenceDbi->getSequenceData(msaRow.sequenceId, sequenceRegion, os);
1052 bool isModified = false;
1053 for (int i = 0, n = sequenceData.length(); i < n; i++) {
1054 char c = sequenceData[i];
1055 if (!validCharsMap.testBit(c)) {
1056 isModified = true;
1057 char newChar = hasReplacementMap ? replacementMap[(quint8)c] : '\0';
1058 sequenceData[i] = validCharsMap.testBit(newChar) ? newChar : defaultChar;
1059 }
1060 }
1061 if (isModified) {
1062 msaDbi->updateRowContent(msaRef.entityId, rowId, sequenceData, msaRow.gaps, os);
1063 CHECK_OP(os, modifiedRowIds);
1064 modifiedRowIds << rowId;
1065 }
1066 }
1067 return modifiedRowIds;
1068 }
1069
removeEmptyRows(const U2EntityRef & msaRef,const QList<qint64> & rowIds,U2OpStatus & os)1070 QList<qint64> MsaDbiUtils::removeEmptyRows(const U2EntityRef &msaRef, const QList<qint64> &rowIds, U2OpStatus &os) {
1071 QScopedPointer<DbiConnection> con(MaDbiUtils::getCheckedConnection(msaRef.dbiRef, os));
1072 SAFE_POINT_OP(os, QList<qint64>());
1073 U2MsaDbi *msaDbi = con->dbi->getMsaDbi();
1074 U2SequenceDbi *sequenceDbi = con->dbi->getSequenceDbi();
1075
1076 MaDbiUtils::validateRowIds(msaDbi, msaRef.entityId, rowIds, os);
1077 CHECK_OP(os, QList<qint64>());
1078
1079 // find empty rows
1080 QList<qint64> emptyRowIds;
1081 foreach (qint64 rowId, rowIds) {
1082 U2MsaRow row = msaDbi->getRow(msaRef.entityId, rowId, os);
1083 SAFE_POINT_OP(os, QList<qint64>());
1084 U2Sequence seq = sequenceDbi->getSequenceObject(row.sequenceId, os);
1085 SAFE_POINT_OP(os, QList<qint64>());
1086 if (0 == seq.length) {
1087 emptyRowIds << row.rowId;
1088 }
1089 }
1090 if (!emptyRowIds.isEmpty()) {
1091 // remove empty rows
1092 msaDbi->removeRows(msaRef.entityId, emptyRowIds, os);
1093 SAFE_POINT_OP(os, QList<qint64>());
1094 }
1095 return emptyRowIds;
1096 }
1097
crop(const U2EntityRef & msaRef,const QList<qint64> & rowIds,const U2Region & columnRange,U2OpStatus & os)1098 void MsaDbiUtils::crop(const U2EntityRef &msaRef, const QList<qint64> &rowIds, const U2Region &columnRange, U2OpStatus &os) {
1099 // Get the alignment.
1100 MultipleSequenceAlignmentExporter alExporter;
1101 MultipleSequenceAlignment al = alExporter.getAlignment(msaRef.dbiRef, msaRef.entityId, os);
1102
1103 // Validate the parameters.
1104 if (columnRange.startPos < 0 || columnRange.endPos() > al->getLength()) {
1105 os.setError(tr("Invalid crop column range: %1..%2").arg(columnRange.startPos + 1).arg(columnRange.endPos()));
1106 uiLog.error(os.getError());
1107 return;
1108 }
1109 if (rowIds.isEmpty()) {
1110 os.setError(tr("List of ids to crop is empty"));
1111 uiLog.error(os.getError());
1112 return;
1113 }
1114 if (!MaDbiUtils::validateRowIds(al, rowIds)) {
1115 os.setError(tr("Invalid crop row ids"));
1116 uiLog.error(os.getError());
1117 return;
1118 }
1119 // Prepare the connection
1120 DbiConnection con(msaRef.dbiRef, os);
1121 CHECK_OP(os, );
1122
1123 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1124 SAFE_POINT(msaDbi != nullptr, "NULL Msa Dbi!", );
1125
1126 bool isRowLengthChanged = columnRange.length < al->getLength();
1127
1128 // Crop or remove each row.
1129 for (int i = 0, n = al->getNumRows(); i < n; i++) {
1130 MultipleSequenceAlignmentRow row = al->getMsaRow(i)->getExplicitCopy();
1131 qint64 rowId = row->getRowId();
1132 if (!rowIds.contains(rowId)) {
1133 MsaDbiUtils::removeRow(msaRef, row->getRowId(), os);
1134 CHECK_OP(os, );
1135 continue;
1136 }
1137 if (!isRowLengthChanged) {
1138 continue; // do not touch this column at all.
1139 }
1140 U2DataId sequenceId = row->getRowDbInfo().sequenceId;
1141 SAFE_POINT(!sequenceId.isEmpty(), "Empty sequence ID!", );
1142
1143 // Calculate the modified row.
1144 cropCharsFromRow(row, columnRange.startPos, columnRange.length);
1145
1146 // Put the new sequence and gap model into the database.
1147 msaDbi->updateRowContent(msaRef.entityId, rowId, row->getSequence().constSequence(), row->getGapModel(), os);
1148 CHECK_OP(os, );
1149 }
1150 if (isRowLengthChanged) {
1151 msaDbi->updateMsaLength(msaRef.entityId, columnRange.length, os);
1152 }
1153 CHECK_OP(os, );
1154 }
1155
trim(const U2EntityRef & msaRef,U2OpStatus & os)1156 QList<qint64> MsaDbiUtils::trim(const U2EntityRef &msaRef, U2OpStatus &os) {
1157 const QList<qint64> invalidResult;
1158 // Prepare the connection
1159 DbiConnection con(msaRef.dbiRef, os);
1160 CHECK_OP(os, invalidResult);
1161
1162 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1163 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", invalidResult);
1164
1165 qint64 msaLength = msaDbi->getMsaObject(msaRef.entityId, os).length;
1166 CHECK_OP(os, invalidResult);
1167 SAFE_POINT(msaLength >= 0, "Msa length is negative.", invalidResult);
1168
1169 QList<U2MsaRow> rows = msaDbi->getRows(msaRef.entityId, os);
1170 CHECK_OP(os, invalidResult);
1171 SAFE_POINT(!rows.isEmpty(), "Msa rows list is empty.", invalidResult);
1172
1173 // Trim trailing gaps from gap model
1174 QList<U2MsaRow> modifiedRows = cutOffTrailingGaps(rows, msaLength);
1175
1176 // Trim leading gaps, it changes length of msa, msaLength doesn`t update.
1177 const QList<U2MsaRow> cutOffStartResult = cutOffLeadingGaps(rows);
1178 // if cutting off leading gaps was performed then all rows has changed
1179
1180 if (!cutOffStartResult.isEmpty()) {
1181 modifiedRows << cutOffStartResult;
1182 }
1183
1184 QList<qint64> modifiedRowIds;
1185 // Update gap model
1186 foreach (U2MsaRow row, modifiedRows) {
1187 msaDbi->updateGapModel(msaRef.entityId, row.rowId, row.gaps, os);
1188 CHECK_OP(os, invalidResult);
1189 modifiedRowIds.append(row.rowId);
1190 }
1191
1192 qint64 newMsaLen = -1;
1193 // check if rows contains modified rows whitoud leading gaps
1194 rows = msaDbi->getRows(msaRef.entityId, os);
1195 CHECK_OP(os, invalidResult);
1196 SAFE_POINT(!rows.isEmpty(), "Msa rows list is empty.", invalidResult);
1197 foreach (U2MsaRow row, rows) {
1198 if (row.length != 0) {
1199 // row is not empty
1200 if (newMsaLen == -1) {
1201 newMsaLen = row.length;
1202 } else {
1203 newMsaLen = qMax(newMsaLen, row.length);
1204 }
1205 }
1206 }
1207 if (newMsaLen == -1) {
1208 // alignment is empty or full of gaps
1209 newMsaLen = 0;
1210 }
1211
1212 msaDbi->updateMsaLength(msaRef.entityId, newMsaLen, os);
1213 return modifiedRowIds;
1214 }
1215
addRow(const U2EntityRef & msaRef,qint64 posInMsa,U2MsaRow & row,U2OpStatus & os)1216 void MsaDbiUtils::addRow(const U2EntityRef &msaRef, qint64 posInMsa, U2MsaRow &row, U2OpStatus &os) {
1217 // Validate the parameters
1218 SAFE_POINT_EXT(!row.sequenceId.isEmpty(), os.setError("Invalid sequence reference"), );
1219
1220 // Prepare the connection
1221 DbiConnection con(msaRef.dbiRef, os);
1222 CHECK_OP(os, );
1223
1224 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1225 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
1226
1227 // Add the row
1228 msaDbi->addRow(msaRef.entityId, posInMsa, row, os);
1229 CHECK_OP(os, );
1230 }
1231
removeRow(const U2EntityRef & msaRef,qint64 rowId,U2OpStatus & os)1232 void MsaDbiUtils::removeRow(const U2EntityRef &msaRef, qint64 rowId, U2OpStatus &os) {
1233 // Prepare the connection
1234 DbiConnection con(msaRef.dbiRef, os);
1235 CHECK_OP(os, );
1236
1237 U2MsaDbi *msaDbi = con.dbi->getMsaDbi();
1238 SAFE_POINT(nullptr != msaDbi, "NULL Msa Dbi!", );
1239
1240 // Remove the row
1241 msaDbi->removeRow(msaRef.entityId, rowId, os);
1242 }
1243
1244 } // namespace U2
1245