1 /** 2 * UGENE - Integrated Bioinformatics Tools. 3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru> 4 * http://ugene.net 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 19 * MA 02110-1301, USA. 20 */ 21 22 #ifndef _U2_MSA_DBI_UTILS_ 23 #define _U2_MSA_DBI_UTILS_ 24 25 #include <U2Core/MultipleSequenceAlignment.h> 26 #include <U2Core/U2MsaDbi.h> 27 28 namespace U2 { 29 30 class U2CORE_EXPORT MaDbiUtils : public QObject { 31 Q_OBJECT 32 public: 33 /** Split 'input' bytes into sequence bytes (chars) and a gap model */ 34 static void splitBytesToCharsAndGaps(const QByteArray &input, QByteArray &seqBytes, QList<U2MsaGap> &gapModel); 35 36 /** 37 * Get the length of the alignment in the database. 38 */ 39 static qint64 getMaLength(const U2EntityRef &maRef, U2OpStatus &os); 40 41 /** 42 * Update the length of the alignment in the database. 43 */ 44 static void updateMaLength(const U2EntityRef &maRef, qint64 newLen, U2OpStatus &os); 45 46 /** 47 * Get the alphabet of the alignment in the database. 48 */ 49 static U2AlphabetId getMaAlphabet(const U2EntityRef &maRef, U2OpStatus &os); 50 51 /** 52 * Update the alphabet of the alignment in the database. 53 */ 54 static void updateMaAlphabet(const U2EntityRef &maRef, const U2AlphabetId &alphabet, U2OpStatus &os); 55 56 /** 57 * Renames an alignment. 58 * Parameter 'newName' must be NOT empty! 59 */ 60 static void renameMa(const U2EntityRef &maRef, const QString &newName, U2OpStatus &os); 61 62 /** 63 * Updates a gap model of the specified row in the database. 64 * Parameter 'rowId' must contain a valid row ID in the database. 65 */ 66 static void updateRowGapModel(const U2EntityRef &maRef, qint64 rowId, const QList<U2MsaGap> &gaps, U2OpStatus &os); 67 68 /** 69 * Updates positions of the rows in the database according to the order in the list. 70 * All IDs must exactly match IDs of the MSA! 71 */ 72 static void updateRowsOrder(const U2EntityRef &maRef, const QList<qint64> &rowsOrder, U2OpStatus &os); 73 74 /** 75 * Renames a row of the alignment, i.e. the corresponding sequence. 76 * Parameter 'rowId' must contain a valid row ID in the database. 77 * Parameter 'newName' must be NOT empty! 78 */ 79 static void renameRow(const U2EntityRef &msaRef, qint64 rowId, const QString &newName, U2OpStatus &os); 80 81 /** 82 * Updates positions of the rows in the database according to the delta. 83 * If some rows can`t move, the other rows will continue to move until there is space. 84 * rowsToMove must have the same relative order as rows in database have. 85 * All IDs must exactly match IDs of the MSA! 86 */ 87 static void moveRows(const U2EntityRef &msaRef, const QList<qint64> &rowsToMove, const int delta, U2OpStatus &os); 88 89 /** 90 * Calculates start and end position in the sequence, 91 * depending on the start position in the row and the 'count' character from it 92 */ 93 static void getStartAndEndSequencePositions(const QByteArray &seq, const QList<U2MsaGap> &gaps, qint64 pos, qint64 count, qint64 &startPosInSeq, qint64 &endPosInSeq); 94 95 static DbiConnection *getCheckedConnection(const U2DbiRef &dbiRef, U2OpStatus &os); 96 static bool validateRowIds(const MultipleSequenceAlignment &al, const QList<qint64> &rowIds); 97 static void validateRowIds(U2MsaDbi *msaDbi, const U2DataId &msaId, const QList<qint64> &rowIds, U2OpStatus &os); 98 99 static void calculateGapModelAfterReplaceChar(QList<U2MsaGap> &gapModel, qint64 pos); 100 }; 101 102 class U2CORE_EXPORT MsaDbiUtils : public QObject { 103 Q_OBJECT 104 friend class McaDbiUtils; 105 106 public: 107 /** 108 * Updates the whole alignment in the database: 109 * 1) Updates the MSA object 110 * 2) If 'al' rows have IDs of the rows and sequences in the database, updates these data. 111 * Otherwise adds or removes the corresponding rows and sequences. 112 * 3) Updates rows positions 113 */ 114 static void updateMsa(const U2EntityRef &msaRef, const MultipleSequenceAlignment &ma, U2OpStatus &os); 115 116 /** 117 * Inserts 'count' gaps to rows with specified IDs from 'pos' position. 118 * Updates the alignment length. 119 * Parameter 'rowIds' must contain valid IDs of the alignment rows in the database! 120 * Parameter 'pos' must be >=0 and < the alignment length. 121 * Parameter 'count' must be > 0. 122 */ 123 static void insertGaps(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os, bool collapseTrailingGaps); 124 125 /** 126 * Removes a region from an alignment. 127 * Does NOT trim the result alignment! 128 * Parameter 'rowIds' must contain valid IDs of the alignment rows in the database! 129 * Parameter 'pos' must be >=0. 130 * Parameter 'count' must be > 0. 131 * The sum 'pos' + 'count' must be <= the alignment length. 132 */ 133 static void removeRegion(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os); 134 135 /** Replaces all characters in the given column range with a new character. */ 136 static void replaceCharactersInRow(const U2EntityRef &msaRef, qint64 rowId, const U2Region &range, char newChar, U2OpStatus &os); 137 138 /** 139 * Replaces a non-gap character in the whole alignment. 140 * Returns list of modified row ids. 141 */ 142 static QList<qint64> replaceNonGapCharacter(const U2EntityRef &msaRef, char oldChar, char newChar, U2OpStatus &os); 143 144 /** 145 * Keeps only valid characters all characters from the given alphabet. 146 * Tries to use 'replacementMap' for all invalid characters first: if the replacementMap contains non-'\0' character - uses the mapped value. 147 * The 'replacementMap' can be either empty of should contain mapping for all possible 256 Latin1 chars. 148 */ 149 static QList<qint64> keepOnlyAlphabetChars(const U2EntityRef &msaRef, const DNAAlphabet *alphabet, const QByteArray &replacementMap, U2OpStatus &os); 150 151 /** 152 * Keeps only the specified rows in the alignment - 'count' characters from position 'pos'. 153 * If a row length is less than 'pos', the sequence and gap model becomes empty. 154 * Updates the alignment length. 155 * Parameter 'rowIds' must contain valid IDs of the alignment rows in the database! 156 * Parameter 'columnRange' must be a valid non-empty column range. 157 */ 158 static void crop(const U2EntityRef &msaRef, const QList<qint64> &rowIds, const U2Region &columnRange, U2OpStatus &os); 159 160 /** 161 * Removes leading and trailing gaps, if required. 162 * Updates the alignment length in this case. 163 * Gap mode should be correct, else some gaps may be not trimmed. 164 * Returns list of modified rows. 165 */ 166 static QList<qint64> trim(const U2EntityRef &msaRef, U2OpStatus &os); 167 168 /** 169 * Adds a row to the alignment and updates 'row'. 170 * Updates the alignment length. 171 * Parameter 'posInMsa' must be within bounds [0, numRows] or '-1' (row is appended). 172 * Parameter 'row' must contain valid values! 173 */ 174 static void addRow(const U2EntityRef &msaRef, qint64 posInMsa, U2MsaRow &row, U2OpStatus &os); 175 176 /** 177 * Removes a row from the alignment. 178 * Parameter 'rowId' must contain a valid row ID in the database. 179 */ 180 static void removeRow(const U2EntityRef &msaRef, qint64 rowId, U2OpStatus &os); 181 182 /** 183 * Updates the row sequence data and the row info (gaps, etc.) in the database. 184 * Parameter 'rowId' must contain a valid row ID in the database. 185 */ 186 static void updateRowContent(const U2EntityRef &msaRef, qint64 rowId, const QByteArray &seqBytes, const QList<U2MsaGap> &gaps, U2OpStatus &os); 187 188 /** 189 * If some of specified rows is empty it will be removed. 190 * Returns list of removed row ids. 191 */ 192 static QList<qint64> removeEmptyRows(const U2EntityRef &msaRef, const QList<qint64> &rowIds, U2OpStatus &os); 193 194 /** Calculates a new gap model when 'count' gaps are inserted to 'pos' position */ 195 static void calculateGapModelAfterInsert(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count); 196 197 private: 198 /** 199 * Verifies if the alignment contains columns of gaps at the beginning. 200 * Delete this columns of gaps from each row. 201 * Gaps should be ordered and should not stick together. 202 * Returns a list containing modified rows 203 */ 204 static QList<U2MsaRow> cutOffLeadingGaps(QList<U2MsaRow> &rows); 205 206 /** 207 * Delete all gaps from gapModel after msaLength. 208 * If gap begins before the end of alignment and end after it, it will be modified: 209 * the gap`s end will be equal to the alignment`s end. 210 * Returns a list containing modified rows 211 */ 212 static QList<U2MsaRow> cutOffTrailingGaps(QList<U2MsaRow> &rows, const qint64 msaLength); 213 214 /** 215 * Removes gaps from the row between position 'pos' and 'pos + count'. 216 * Shifts the remaining gaps, if required. 217 */ 218 static void calculateGapModelAfterRemove(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count); 219 220 /** Length of all gaps in the gap model */ 221 static qint64 calculateGapsLength(const QList<U2MsaGap> &gapModel); 222 223 /** Length of the sequence and gap model for the row */ 224 static qint64 calculateRowLength(const U2MsaRow &row); 225 226 /** If there are consecutive gaps in the gaps model, merges them into one gap */ 227 static void mergeConsecutiveGaps(QList<U2MsaGap> &gapModel); 228 229 /** Removes chars/gaps from the row */ 230 static void removeCharsFromRow(QByteArray &seq, QList<U2MsaGap> &gaps, qint64 pos, qint64 count); 231 232 /** Replace characters in the row with a new character. Updates sequence & gap model. */ 233 static void replaceCharsInRow(QByteArray &sequence, QList<U2MsaGap> &gaps, const U2Region &range, char newChar, U2OpStatus &os); 234 235 /** 236 * Crops a row to region from 'pos' to 'pos' + 'count', 237 * or a lesser region, if the length of the row is smaller. 238 * Parameter 'pos' can even be greater than the length of the row. 239 * The row sequence and gap model are set to empty values in this case. 240 */ 241 static void cropCharsFromRow(MultipleSequenceAlignmentRow &alRow, qint64 pos, qint64 count); 242 243 /** Returns "true" if there is a gap on position "pos" */ 244 static bool gapInPosition(const QList<U2MsaGap> &gapModel, qint64 pos); 245 }; 246 247 } // namespace U2 248 249 #endif 250