1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #ifndef _U2_MSA_DBI_UTILS_
23 #define _U2_MSA_DBI_UTILS_
24 
25 #include <U2Core/MultipleSequenceAlignment.h>
26 #include <U2Core/U2MsaDbi.h>
27 
28 namespace U2 {
29 
30 class U2CORE_EXPORT MaDbiUtils : public QObject {
31     Q_OBJECT
32 public:
33     /** Split 'input' bytes into sequence bytes (chars) and a gap model */
34     static void splitBytesToCharsAndGaps(const QByteArray &input, QByteArray &seqBytes, QList<U2MsaGap> &gapModel);
35 
36     /**
37      * Get the length of the alignment in the database.
38      */
39     static qint64 getMaLength(const U2EntityRef &maRef, U2OpStatus &os);
40 
41     /**
42      * Update the length of the alignment in the database.
43      */
44     static void updateMaLength(const U2EntityRef &maRef, qint64 newLen, U2OpStatus &os);
45 
46     /**
47      * Get the alphabet of the alignment in the database.
48      */
49     static U2AlphabetId getMaAlphabet(const U2EntityRef &maRef, U2OpStatus &os);
50 
51     /**
52      * Update the alphabet of the alignment in the database.
53      */
54     static void updateMaAlphabet(const U2EntityRef &maRef, const U2AlphabetId &alphabet, U2OpStatus &os);
55 
56     /**
57      * Renames an alignment.
58      * Parameter 'newName' must be NOT empty!
59      */
60     static void renameMa(const U2EntityRef &maRef, const QString &newName, U2OpStatus &os);
61 
62     /**
63      * Updates a gap model of the specified row in the database.
64      * Parameter 'rowId' must contain a valid row ID in the database.
65      */
66     static void updateRowGapModel(const U2EntityRef &maRef, qint64 rowId, const QList<U2MsaGap> &gaps, U2OpStatus &os);
67 
68     /**
69      * Updates positions of the rows in the database according to the order in the list.
70      * All IDs must exactly match IDs of the MSA!
71      */
72     static void updateRowsOrder(const U2EntityRef &maRef, const QList<qint64> &rowsOrder, U2OpStatus &os);
73 
74     /**
75      * Renames a row of the alignment, i.e. the corresponding sequence.
76      * Parameter 'rowId' must contain a valid row ID in the database.
77      * Parameter 'newName' must be NOT empty!
78      */
79     static void renameRow(const U2EntityRef &msaRef, qint64 rowId, const QString &newName, U2OpStatus &os);
80 
81     /**
82      * Updates positions of the rows in the database according to the delta.
83      * If some rows can`t move, the other rows will continue to move until there is space.
84      * rowsToMove must have the same relative order as rows in database have.
85      * All IDs must exactly match IDs of the MSA!
86      */
87     static void moveRows(const U2EntityRef &msaRef, const QList<qint64> &rowsToMove, const int delta, U2OpStatus &os);
88 
89     /**
90      * Calculates start and end position in the sequence,
91      * depending on the start position in the row and the 'count' character from it
92      */
93     static void getStartAndEndSequencePositions(const QByteArray &seq, const QList<U2MsaGap> &gaps, qint64 pos, qint64 count, qint64 &startPosInSeq, qint64 &endPosInSeq);
94 
95     static DbiConnection *getCheckedConnection(const U2DbiRef &dbiRef, U2OpStatus &os);
96     static bool validateRowIds(const MultipleSequenceAlignment &al, const QList<qint64> &rowIds);
97     static void validateRowIds(U2MsaDbi *msaDbi, const U2DataId &msaId, const QList<qint64> &rowIds, U2OpStatus &os);
98 
99     static void calculateGapModelAfterReplaceChar(QList<U2MsaGap> &gapModel, qint64 pos);
100 };
101 
102 class U2CORE_EXPORT MsaDbiUtils : public QObject {
103     Q_OBJECT
104     friend class McaDbiUtils;
105 
106 public:
107     /**
108      * Updates the whole alignment in the database:
109      * 1) Updates the MSA object
110      * 2) If 'al' rows have IDs of the rows and sequences in the database, updates these data.
111      *    Otherwise adds or removes the corresponding rows and sequences.
112      * 3) Updates rows positions
113      */
114     static void updateMsa(const U2EntityRef &msaRef, const MultipleSequenceAlignment &ma, U2OpStatus &os);
115 
116     /**
117      * Inserts 'count' gaps to rows with specified IDs from 'pos' position.
118      * Updates the alignment length.
119      * Parameter 'rowIds' must contain valid IDs of the alignment rows in the database!
120      * Parameter 'pos' must be >=0 and < the alignment length.
121      * Parameter 'count' must be > 0.
122      */
123     static void insertGaps(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os, bool collapseTrailingGaps);
124 
125     /**
126      * Removes a region from an alignment.
127      * Does NOT trim the result alignment!
128      * Parameter 'rowIds' must contain valid IDs of the alignment rows in the database!
129      * Parameter 'pos' must be >=0.
130      * Parameter 'count' must be > 0.
131      * The sum 'pos' + 'count' must be <= the alignment length.
132      */
133     static void removeRegion(const U2EntityRef &msaRef, const QList<qint64> &rowIds, qint64 pos, qint64 count, U2OpStatus &os);
134 
135     /** Replaces all characters in the given column range with a new character. */
136     static void replaceCharactersInRow(const U2EntityRef &msaRef, qint64 rowId, const U2Region &range, char newChar, U2OpStatus &os);
137 
138     /**
139      * Replaces a non-gap character in the whole alignment.
140      * Returns list of modified row ids.
141      */
142     static QList<qint64> replaceNonGapCharacter(const U2EntityRef &msaRef, char oldChar, char newChar, U2OpStatus &os);
143 
144     /**
145      * Keeps only valid characters all characters from the given alphabet.
146      * Tries to use 'replacementMap' for all invalid characters first: if the replacementMap contains non-'\0' character - uses the mapped value.
147      * The 'replacementMap' can be either empty of should contain mapping for all possible 256 Latin1 chars.
148      */
149     static QList<qint64> keepOnlyAlphabetChars(const U2EntityRef &msaRef, const DNAAlphabet *alphabet, const QByteArray &replacementMap, U2OpStatus &os);
150 
151     /**
152      * Keeps only the specified rows in the alignment - 'count' characters from position 'pos'.
153      * If a row length is less than 'pos', the sequence and gap model becomes empty.
154      * Updates the alignment length.
155      * Parameter 'rowIds' must contain valid IDs of the alignment rows in the database!
156      * Parameter 'columnRange' must be a valid non-empty column range.
157      */
158     static void crop(const U2EntityRef &msaRef, const QList<qint64> &rowIds, const U2Region &columnRange, U2OpStatus &os);
159 
160     /**
161      * Removes leading and trailing gaps, if required.
162      * Updates the alignment length in this case.
163      * Gap mode should be correct, else some gaps may be not trimmed.
164      * Returns list of modified rows.
165      */
166     static QList<qint64> trim(const U2EntityRef &msaRef, U2OpStatus &os);
167 
168     /**
169      * Adds a row to the alignment and updates 'row'.
170      * Updates the alignment length.
171      * Parameter 'posInMsa' must be within bounds [0, numRows] or '-1' (row is appended).
172      * Parameter 'row' must contain valid values!
173      */
174     static void addRow(const U2EntityRef &msaRef, qint64 posInMsa, U2MsaRow &row, U2OpStatus &os);
175 
176     /**
177      * Removes a row from the alignment.
178      * Parameter 'rowId' must contain a valid row ID in the database.
179      */
180     static void removeRow(const U2EntityRef &msaRef, qint64 rowId, U2OpStatus &os);
181 
182     /**
183      * Updates the row sequence data and the row info (gaps, etc.) in the database.
184      * Parameter 'rowId' must contain a valid row ID in the database.
185      */
186     static void updateRowContent(const U2EntityRef &msaRef, qint64 rowId, const QByteArray &seqBytes, const QList<U2MsaGap> &gaps, U2OpStatus &os);
187 
188     /**
189      * If some of specified rows is empty it will be removed.
190      * Returns list of removed row ids.
191      */
192     static QList<qint64> removeEmptyRows(const U2EntityRef &msaRef, const QList<qint64> &rowIds, U2OpStatus &os);
193 
194     /** Calculates a new gap model when 'count' gaps are inserted to 'pos' position */
195     static void calculateGapModelAfterInsert(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count);
196 
197 private:
198     /**
199      * Verifies if the alignment contains columns of gaps at the beginning.
200      * Delete this columns of gaps from each row.
201      * Gaps should be ordered and should not stick together.
202      * Returns a list containing modified rows
203      */
204     static QList<U2MsaRow> cutOffLeadingGaps(QList<U2MsaRow> &rows);
205 
206     /**
207      * Delete all gaps from gapModel after msaLength.
208      * If gap begins before the end of alignment and end after it, it will be modified:
209      * the gap`s end will be equal to the alignment`s end.
210      * Returns a list containing modified rows
211      */
212     static QList<U2MsaRow> cutOffTrailingGaps(QList<U2MsaRow> &rows, const qint64 msaLength);
213 
214     /**
215      * Removes gaps from the row between position 'pos' and 'pos + count'.
216      * Shifts the remaining gaps, if required.
217      */
218     static void calculateGapModelAfterRemove(QList<U2MsaGap> &gapModel, qint64 pos, qint64 count);
219 
220     /** Length of all gaps in the gap model */
221     static qint64 calculateGapsLength(const QList<U2MsaGap> &gapModel);
222 
223     /** Length of the sequence and gap model for the row */
224     static qint64 calculateRowLength(const U2MsaRow &row);
225 
226     /** If there are consecutive gaps in the gaps model, merges them into one gap */
227     static void mergeConsecutiveGaps(QList<U2MsaGap> &gapModel);
228 
229     /** Removes chars/gaps from the row */
230     static void removeCharsFromRow(QByteArray &seq, QList<U2MsaGap> &gaps, qint64 pos, qint64 count);
231 
232     /** Replace characters in the row with a new character. Updates sequence & gap model. */
233     static void replaceCharsInRow(QByteArray &sequence, QList<U2MsaGap> &gaps, const U2Region &range, char newChar, U2OpStatus &os);
234 
235     /**
236      * Crops a row to region from 'pos' to 'pos' + 'count',
237      * or a lesser region, if the length of the row is smaller.
238      * Parameter 'pos' can even be greater than the length of the row.
239      * The row sequence and gap model are set to empty values in this case.
240      */
241     static void cropCharsFromRow(MultipleSequenceAlignmentRow &alRow, qint64 pos, qint64 count);
242 
243     /** Returns "true" if there is a gap on position "pos" */
244     static bool gapInPosition(const QList<U2MsaGap> &gapModel, qint64 pos);
245 };
246 
247 }  // namespace U2
248 
249 #endif
250