1 /**
2  * UGENE - Integrated Bioinformatics Tools.
3  * Copyright (C) 2008-2021 UniPro <ugene@unipro.ru>
4  * http://ugene.net
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
19  * MA 02110-1301, USA.
20  */
21 
22 #include "MsaColorSchemeClustalX.h"
23 
24 #include <U2Algorithm/MSAConsensusUtils.h>
25 
26 #include <U2Core/MultipleAlignmentObject.h>
27 
28 namespace U2 {
29 
MsaColorSchemeClustalX(QObject * parent,const MsaColorSchemeFactory * factory,MultipleAlignmentObject * maObj)30 MsaColorSchemeClustalX::MsaColorSchemeClustalX(QObject *parent, const MsaColorSchemeFactory *factory, MultipleAlignmentObject *maObj)
31     : MsaColorScheme(parent, factory, maObj),
32       objVersion(1),
33       cacheVersion(0),
34       aliLen(maObj->getLength()) {
35     colorByIdx[ClustalColor_BLUE] = "#80a0f0";
36     colorByIdx[ClustalColor_RED] = "#f01505";
37     colorByIdx[ClustalColor_GREEN] = "#15c015";
38     colorByIdx[ClustalColor_PINK] = "#f08080";
39     colorByIdx[ClustalColor_MAGENTA] = "#c048c0";
40     colorByIdx[ClustalColor_ORANGE] = "#f09048";
41     colorByIdx[ClustalColor_CYAN] = "#15a4a4";
42     colorByIdx[ClustalColor_YELLOW] = "#c0c000";
43 
44     connect(maObj, SIGNAL(si_alignmentChanged(const MultipleAlignment &, const MaModificationInfo &)), SLOT(sl_alignmentChanged()));
45 }
46 
getBackgroundColor(int seq,int pos,char) const47 QColor MsaColorSchemeClustalX::getBackgroundColor(int seq, int pos, char) const {
48     if (cacheVersion != objVersion) {
49         updateCache();
50     }
51 
52     int idx = getColorIdx(seq, pos);
53     assert(idx >= 0 && idx < ClustalColor_NUM_COLORS);
54     return colorByIdx[idx];
55 }
56 
getFontColor(int seq,int pos,char c) const57 QColor MsaColorSchemeClustalX::getFontColor(int seq, int pos, char c) const {
58     Q_UNUSED(seq);
59     Q_UNUSED(pos);
60     Q_UNUSED(c);
61 
62     return QColor();
63 }
64 
sl_alignmentChanged()65 void MsaColorSchemeClustalX::sl_alignmentChanged() {
66     objVersion++;
67 }
68 
69 namespace {
70 
basesContent(const int * freqs,const char * str,int len)71 int basesContent(const int *freqs, const char *str, int len) {
72     int res = 0;
73     for (int i = 0; i < len; i++) {
74         uchar c = str[i];
75         res += freqs[c];
76     }
77     return res;
78 }
79 
80 }  // namespace
81 
updateCache() const82 void MsaColorSchemeClustalX::updateCache() const {
83     if (cacheVersion == objVersion) {
84         return;
85     }
86 
87     // compute colors for whole ali
88     // use 4 bits per color
89     const MultipleAlignment msa = maObj->getMultipleAlignment();
90     int nSeq = msa->getNumRows();
91     aliLen = maObj->getLength();
92     cacheVersion = objVersion;
93 
94     bool stub = false;
95     int cacheSize = getCacheIdx(nSeq, aliLen, stub) + 1;
96     colorsCache.resize(cacheSize);
97 
98     /*  source: http://ekhidna.biocenter.helsinki.fi/pfam2/clustal_colours
99 
100         BLUE
101             (W,L,V,I,M,F):  {50%, P}{60%, WLVIMAFCYHP}
102             (A):            {50%, P}{60%, WLVIMAFCYHP}{85%, T,S,G}
103             (C):            {50%, P}{60%, WLVIMAFCYHP}{85%, S}
104         RED
105             (K,R):          {60%, KR}{85%, Q}
106         GREEN
107             (T):            {50%, TS}{60%, WLVIMAFCYHP}
108             (S):            {50%, TS}{80%, WLVIMAFCYHP}
109             (N):            {50%, N}{85%, D}
110             (Q):            {50%, QE}{60%, KR}
111         PINK
112             (C):            {85%, C}
113         MAGENTA
114             (D):            {50%, DE,N}
115             (E):            {50%, DE,QE}
116         ORANGE
117             (G):            {ALWAYS}
118         CYAN
119             (H,Y):          {50%, P}{60%, WLVIMAFCYHP}
120         YELLOW
121             (P):            {ALWAYS}
122 
123         WARN: do not count gaps in percents!
124     */
125 
126     QVector<int> freqsByChar(256);
127     const int *freqs = freqsByChar.data();
128 
129     for (int pos = 0; pos < aliLen; pos++) {
130         int nonGapChars = 0;
131         MSAConsensusUtils::getColumnFreqs(msa, pos, freqsByChar, nonGapChars);
132         int content50 = int(nonGapChars * 50.0 / 100);
133         int content60 = int(nonGapChars * 60.0 / 100);
134         int content80 = int(nonGapChars * 80.0 / 100);
135         int content85 = int(nonGapChars * 85.0 / 100);
136 
137         for (int seq = 0; seq < nSeq; seq++) {
138             char c = msa->charAt(seq, pos);
139             int colorIdx = ClustalColor_NO_COLOR;
140             switch (c) {
141                 case 'W':  //(W,L,V,I,M,F): {50%, P}{60%, WLVIMAFCYHP} -> BLUE
142                 case 'L':
143                 case 'V':
144                 case 'I':
145                 case 'M':
146                 case 'F':
147                     if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
148                         colorIdx = ClustalColor_BLUE;
149                     }
150                     break;
151                 case 'A':  // {50%, P}{60%, WLVIMAFCYHP}{85%, T,S,G} -> BLUE
152                     if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
153                         colorIdx = ClustalColor_BLUE;
154                     } else if (freqs['T'] > content85 || freqs['S'] > content85 || freqs['G'] > 85) {
155                         colorIdx = ClustalColor_BLUE;
156                     }
157                     break;
158 
159                 case 'K':  //{60%, KR}{85%, Q} -> RED
160                 case 'R':
161                     if ((freqs['K'] + freqs['R'] > content60) || freqs['Q'] > content85) {
162                         colorIdx = ClustalColor_RED;
163                     }
164                     break;
165 
166                 case 'T':  // {50%, TS}{60%, WLVIMAFCYHP} -> GREEN
167                     if ((freqs['T'] + freqs['S'] > content50) || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
168                         colorIdx = ClustalColor_GREEN;
169                     }
170                     break;
171 
172                 case 'S':  // {50%, TS}{80%, WLVIMAFCYHP} -> GREEN
173                     if ((freqs['T'] + freqs['S'] > content50) || basesContent(freqs, "WLVIMAFCYHP", 11) > content80) {
174                         colorIdx = ClustalColor_GREEN;
175                     }
176                     break;
177 
178                 case 'N':  // {50%, N}{85%, D} -> GREEN
179                     if (freqs['N'] > content50 || freqs['D'] > content85) {
180                         colorIdx = ClustalColor_GREEN;
181                     }
182                     break;
183 
184                 case 'Q':  // {50%, QE}{60%, KR} -> GREEN
185                     if ((freqs['Q'] + freqs['E']) > content50 || (freqs['K'] + freqs['R']) > content60) {
186                         colorIdx = ClustalColor_GREEN;
187                     }
188                     break;
189 
190                 case 'C':  //{85%, C} -> PINK
191                     //{50%, P}{60%, WLVIMAFCYHP}{85%, S} -> BLUE
192                     if (freqs['C'] > content85) {
193                         colorIdx = ClustalColor_PINK;
194                     } else if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60 || freqs['S'] > content85) {
195                         colorIdx = ClustalColor_BLUE;
196                     }
197                     break;
198 
199                 case 'D':  //{50%, DE,N} -> MAGENTA
200                     if ((freqs['D'] + freqs['E']) > content50 || freqs['N'] > content50) {
201                         colorIdx = ClustalColor_MAGENTA;
202                     }
203                     break;
204                 case 'E':  //{50%, DE,QE} -> MAGENTA
205                     if ((freqs['D'] + freqs['E']) > content50 || (freqs['Q'] + freqs['E']) > content50) {
206                         colorIdx = ClustalColor_MAGENTA;
207                     }
208                     break;
209                 case 'G':  //{ALWAYS} -> ORANGE
210                     colorIdx = ClustalColor_ORANGE;
211                     break;
212 
213                 case 'H':  // {50%, P}{60%, WLVIMAFCYHP} -> CYAN
214                 case 'Y':
215                     if (freqs['P'] > content50 || basesContent(freqs, "WLVIMAFCYHP", 11) > content60) {
216                         colorIdx = ClustalColor_CYAN;
217                     }
218                     break;
219 
220                 case 'P':  //{ALWAYS} -> YELLOW
221                     colorIdx = ClustalColor_YELLOW;
222                     break;
223                 default:
224                     break;
225             }
226             setColorIdx(seq, pos, colorIdx);
227         }
228     }
229 }
230 
getCacheIdx(int seq,int pos,bool & low) const231 int MsaColorSchemeClustalX::getCacheIdx(int seq, int pos, bool &low) const {
232     assert(objVersion == cacheVersion);
233     int res = seq * aliLen + pos;
234     low = !(res & 0x1);
235     return res / 2;
236 }
237 
getColorIdx(int seq,int pos) const238 int MsaColorSchemeClustalX::getColorIdx(int seq, int pos) const {
239     bool low = false;
240     int cacheIdx = getCacheIdx(seq, pos, low);
241     quint8 val = colorsCache[cacheIdx];
242     int colorIdx = low ? val & 0x0F : (val & 0xF0) >> 4;
243     assert(colorIdx >= 0 && colorIdx < ClustalColor_NUM_COLORS);
244     return colorIdx;
245 }
246 
setColorIdx(int seq,int pos,int colorIdx) const247 void MsaColorSchemeClustalX::setColorIdx(int seq, int pos, int colorIdx) const {
248     assert(colorIdx >= 0 && colorIdx < ClustalColor_NUM_COLORS);
249     bool low = false;
250     int cacheIdx = getCacheIdx(seq, pos, low);
251     quint8 val = colorsCache[cacheIdx];
252     if (low) {
253         val = (val & 0xF0) | colorIdx;
254     } else {
255         val = (val & 0x0F) | (colorIdx << 4);
256     }
257     colorsCache[cacheIdx] = val;
258 }
259 
MsaColorSchemeClustalXFactory(QObject * parent,const QString & id,const QString & name,const AlphabetFlags & supportedAlphabets)260 MsaColorSchemeClustalXFactory::MsaColorSchemeClustalXFactory(QObject *parent, const QString &id, const QString &name, const AlphabetFlags &supportedAlphabets)
261     : MsaColorSchemeFactory(parent, id, name, supportedAlphabets) {
262 }
263 
create(QObject * parent,MultipleAlignmentObject * maObj) const264 MsaColorScheme *MsaColorSchemeClustalXFactory::create(QObject *parent, MultipleAlignmentObject *maObj) const {
265     return new MsaColorSchemeClustalX(parent, this, maObj);
266 }
267 
268 }  // namespace U2
269