1 /**
2 * UGENE - Integrated Bioinformatics Tools.
3 * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4 * http://ugene.net
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19 * MA 02110-1301, USA.
20 */
21
22 #include "MsaColorSchemeClustalX.h"
23
24 #include <U2Algorithm/MSAConsensusUtils.h>
25
26 #include <U2Core/MultipleAlignmentObject.h>
27
28 namespace U2 {
29
MsaColorSchemeClustalX(QObject * parent,const MsaColorSchemeFactory * factory,MultipleAlignmentObject * maObj)30 MsaColorSchemeClustalX::MsaColorSchemeClustalX(QObject *parent, const MsaColorSchemeFactory *factory, MultipleAlignmentObject *maObj)
31 : MsaColorScheme(parent, factory, maObj),
32 objVersion(1),
33 cacheVersion(0),
34 aliLen(maObj->getLength()) {
35 colorByIdx[ClustalColor_BLUE] = "#80a0f0";
36 colorByIdx[ClustalColor_RED] = "#f01505";
37 colorByIdx[ClustalColor_GREEN] = "#15c015";
38 colorByIdx[ClustalColor_PINK] = "#f08080";
39 colorByIdx[ClustalColor_MAGENTA] = "#c048c0";
40 colorByIdx[ClustalColor_ORANGE] = "#f09048";
41 colorByIdx[ClustalColor_CYAN] = "#15a4a4";
42 colorByIdx[ClustalColor_YELLOW] = "#c0c000";
43
44 connect(maObj, SIGNAL(si_alignmentChanged(const MultipleAlignment &, const MaModificationInfo &)), SLOT(sl_alignmentChanged()));
45 }
46
getBackgroundColor(int seq,int pos,char) const47 QColor MsaColorSchemeClustalX::getBackgroundColor(int seq, int pos, char) const {
48 if (cacheVersion != objVersion) {
49 updateCache();
50 }
51
52 int idx = getColorIdx(seq, pos);
53 assert(idx >= 0 && idx < ClustalColor_NUM_COLORS);
54 return colorByIdx[idx];
55 }
56
getFontColor(int seq,int pos,char c) const57 QColor MsaColorSchemeClustalX::getFontColor(int seq, int pos, char c) const {
58 Q_UNUSED(seq);
59 Q_UNUSED(pos);
60 Q_UNUSED(c);
61
62 return QColor();
63 }
64
sl_alignmentChanged()65 void MsaColorSchemeClustalX::sl_alignmentChanged() {
66 objVersion++;
67 }
68
69 namespace {
70
basesContent(const int * freqs,const char * str,int len)71 int basesContent(const int *freqs, const char *str, int len) {
72 int res = 0;
73 for (int i = 0; i < len; i++) {
74 uchar c = str[i];
75 res += freqs[c];
76 }
77 return res;
78 }
79
80 } // namespace
81
updateCache() const82 void MsaColorSchemeClustalX::updateCache() const {
83 if (cacheVersion == objVersion) {
84 return;
85 }
86
87 // compute colors for whole ali
88 // use 4 bits per color
89 const MultipleAlignment msa = maObj->getMultipleAlignment();
90 int nSeq = msa->getNumRows();
91 aliLen = maObj->getLength();
92 cacheVersion = objVersion;
93
94 bool stub = false;
95 int cacheSize = getCacheIdx(nSeq, aliLen, stub) + 1;
96 colorsCache.resize(cacheSize);
97
98 /* source: http://ekhidna.biocenter.helsinki.fi/pfam2/clustal_colours
99
100 BLUE
101 (W,L,V,I,M,F): {50%, P}{60%, WLVIMAFCYHP}
102 (A): {50%, P}{60%, WLVIMAFCYHP}{85%, T,S,G}
103 (C): {50%, P}{60%, WLVIMAFCYHP}{85%, S}
104 RED
105 (K,R): {60%, KR}{85%, Q}
106 GREEN
107 (T): {50%, TS}{60%, WLVIMAFCYHP}
108 (S): {50%, TS}{80%, WLVIMAFCYHP}
109 (N): {50%, N}{85%, D}
110 (Q): {50%, QE}{60%, KR}
111 PINK
112 (C): {85%, C}
113 MAGENTA
114 (D): {50%, DE,N}
115 (E): {50%, DE,QE}
116 ORANGE
117 (G): {ALWAYS}
118 CYAN
119 (H,Y): {50%, P}{60%, WLVIMAFCYHP}
120 YELLOW
121 (P): {ALWAYS}
122
123 WARN: do not count gaps in percents!
124 */
125
126 QVector<int> freqsByChar(256);
127 const int *freqs = freqsByChar.data();
128
129 for (int pos = 0; pos < aliLen; pos++) {
130 int nonGapChars = 0;
131 MSAConsensusUtils::getColumnFreqs(msa, pos, freqsByChar, nonGapChars);
132 int content50 = int(nonGapChars * 50.0 / 100);
133 int content60 = int(nonGapChars * 60.0 / 100);
134 int content80 = int(nonGapChars * 80.0 / 100);
135 int content85 = int(nonGapChars * 85.0 / 100);
136
137 for (int seq = 0; seq < nSeq; seq++) {
138 char c = msa->charAt(seq, pos);
139 int colorIdx = ClustalColor_NO_COLOR;
140 switch (c) {
141 case 'W': //(W,L,V,I,M,F): {50%, P}{60%, WLVIMAFCYHP} -> BLUE
142 case 'L':
143 case 'V':
144 case 'I':
145 case 'M':
146 case 'F':
147 if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
148 colorIdx = ClustalColor_BLUE;
149 }
150 break;
151 case 'A': // {50%, P}{60%, WLVIMAFCYHP}{85%, T,S,G} -> BLUE
152 if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
153 colorIdx = ClustalColor_BLUE;
154 } else if (freqs['T'] > content85 || freqs['S'] > content85 || freqs['G'] > 85) {
155 colorIdx = ClustalColor_BLUE;
156 }
157 break;
158
159 case 'K': //{60%, KR}{85%, Q} -> RED
160 case 'R':
161 if ((freqs['K'] + freqs['R'] > content60) || freqs['Q'] > content85) {
162 colorIdx = ClustalColor_RED;
163 }
164 break;
165
166 case 'T': // {50%, TS}{60%, WLVIMAFCYHP} -> GREEN
167 if ((freqs['T'] + freqs['S'] > content50) || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
168 colorIdx = ClustalColor_GREEN;
169 }
170 break;
171
172 case 'S': // {50%, TS}{80%, WLVIMAFCYHP} -> GREEN
173 if ((freqs['T'] + freqs['S'] > content50) || basesContent(freqs, "WLVIMAFCYHP", 11) > content80) {
174 colorIdx = ClustalColor_GREEN;
175 }
176 break;
177
178 case 'N': // {50%, N}{85%, D} -> GREEN
179 if (freqs['N'] > content50 || freqs['D'] > content85) {
180 colorIdx = ClustalColor_GREEN;
181 }
182 break;
183
184 case 'Q': // {50%, QE}{60%, KR} -> GREEN
185 if ((freqs['Q'] + freqs['E']) > content50 || (freqs['K'] + freqs['R']) > content60) {
186 colorIdx = ClustalColor_GREEN;
187 }
188 break;
189
190 case 'C': //{85%, C} -> PINK
191 //{50%, P}{60%, WLVIMAFCYHP}{85%, S} -> BLUE
192 if (freqs['C'] > content85) {
193 colorIdx = ClustalColor_PINK;
194 } else if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60 || freqs['S'] > content85) {
195 colorIdx = ClustalColor_BLUE;
196 }
197 break;
198
199 case 'D': //{50%, DE,N} -> MAGENTA
200 if ((freqs['D'] + freqs['E']) > content50 || freqs['N'] > content50) {
201 colorIdx = ClustalColor_MAGENTA;
202 }
203 break;
204 case 'E': //{50%, DE,QE} -> MAGENTA
205 if ((freqs['D'] + freqs['E']) > content50 || (freqs['Q'] + freqs['E']) > content50) {
206 colorIdx = ClustalColor_MAGENTA;
207 }
208 break;
209 case 'G': //{ALWAYS} -> ORANGE
210 colorIdx = ClustalColor_ORANGE;
211 break;
212
213 case 'H': // {50%, P}{60%, WLVIMAFCYHP} -> CYAN
214 case 'Y':
215 if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
216 colorIdx = ClustalColor_CYAN;
217 }
218 break;
219
220 case 'P': //{ALWAYS} -> YELLOW
221 colorIdx = ClustalColor_YELLOW;
222 break;
223 default:
224 break;
225 }
226 setColorIdx(seq, pos, colorIdx);
227 }
228 }
229 }
230
getCacheIdx(int seq,int pos,bool & low) const231 int MsaColorSchemeClustalX::getCacheIdx(int seq, int pos, bool &low) const {
232 assert(objVersion == cacheVersion);
233 int res = seq * aliLen + pos;
234 low = !(res & 0x1);
235 return res / 2;
236 }
237
getColorIdx(int seq,int pos) const238 int MsaColorSchemeClustalX::getColorIdx(int seq, int pos) const {
239 bool low = false;
240 int cacheIdx = getCacheIdx(seq, pos, low);
241 quint8 val = colorsCache[cacheIdx];
242 int colorIdx = low ? val & 0x0F : (val & 0xF0) >> 4;
243 assert(colorIdx >= 0 && colorIdx < ClustalColor_NUM_COLORS);
244 return colorIdx;
245 }
246
setColorIdx(int seq,int pos,int colorIdx) const247 void MsaColorSchemeClustalX::setColorIdx(int seq, int pos, int colorIdx) const {
248 assert(colorIdx >= 0 && colorIdx < ClustalColor_NUM_COLORS);
249 bool low = false;
250 int cacheIdx = getCacheIdx(seq, pos, low);
251 quint8 val = colorsCache[cacheIdx];
252 if (low) {
253 val = (val & 0xF0) | colorIdx;
254 } else {
255 val = (val & 0x0F) | (colorIdx << 4);
256 }
257 colorsCache[cacheIdx] = val;
258 }
259
MsaColorSchemeClustalXFactory(QObject * parent,const QString & id,const QString & name,const AlphabetFlags & supportedAlphabets)260 MsaColorSchemeClustalXFactory::MsaColorSchemeClustalXFactory(QObject *parent, const QString &id, const QString &name, const AlphabetFlags &supportedAlphabets)
261 : MsaColorSchemeFactory(parent, id, name, supportedAlphabets) {
262 }
263
create(QObject * parent,MultipleAlignmentObject * maObj) const264 MsaColorScheme *MsaColorSchemeClustalXFactory::create(QObject *parent, MultipleAlignmentObject *maObj) const {
265 return new MsaColorSchemeClustalX(parent, this, maObj);
266 }
267
268 } // namespace U2
269