1 /* CoilFeatureAlgorithm.java
2  *
3  * created: Mon Dec 21 1998
4  *
5  * This file is part of Artemis
6  *
7  * Copyright (C) 1998,1999,2000  Genome Research Limited
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; either version 2
12  * of the License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22  *
23  * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/plot/CoilFeatureAlgorithm.java,v 1.1 2004-06-09 09:51:24 tjc Exp $
24  **/
25 
26 package uk.ac.sanger.artemis.plot;
27 
28 import uk.ac.sanger.artemis.*;
29 import uk.ac.sanger.artemis.sequence.*;
30 import uk.ac.sanger.artemis.io.Range;
31 
32 /**
33  *  Objects of this class have one useful method - getValues (), which takes a
34  *  range of bases and returns a single floating point number.  See
35  *  "Predicting Coiled Coils from Protein Sequences", Science Vol. 252 page
36  *  1162 for details of the alogrithm used.
37  *
38  *  @author Kim Rutherford
39  *  @version $Id: CoilFeatureAlgorithm.java,v 1.1 2004-06-09 09:51:24 tjc Exp $
40  **/
41 
42 public class CoilFeatureAlgorithm extends FeatureAlgorithm {
43   /**
44    *  Create a new CoilFeatureAlgorithm object for the given Feature.
45    **/
CoilFeatureAlgorithm(Feature feature)46   public CoilFeatureAlgorithm (Feature feature) {
47     super (feature, "Coiled Coils", "coiled_coil");
48   }
49 
50   private static final int WINDOW_SIZE = 28;
51 
52   /**
53    *  Return the coiled coil score of the (28) residues in the given range.
54    *  The start and end values should be 28*3 bases apart (inclusive).
55    *  @param start The start base (included in the range).
56    *  @param end The end base (included in the range).
57    *  @param values The one return value for this algorithm is returned in
58    *    this array.
59    **/
getValues(int start, int end, float [] values)60   public void getValues (int start, int end, float [] values) {
61 
62     final int number_of_weights = weight_array[0].length;
63 
64     final AminoAcidSequence translation = getFeature ().getTranslation ();
65 
66     final String translation_string =
67       translation.toString ().substring (start, end + 1);
68 
69     final float [] scores = new float [WINDOW_SIZE];
70 
71     float max_score = -1.0F;
72 
73     int k = 0;
74 
75     for (int frame = 0 ; frame < 7 ; ++frame) {
76       k = frame - start % 7;
77       if (k < 0) {
78         k = k + 7;
79       }
80       for (int j = 0 ; j < WINDOW_SIZE ; ++j) {
81 //        System.out.println (k + " " + frame + " " + j);
82 
83         final char this_char = translation_string.charAt (j);
84         final int index =
85           AminoAcidSequence.getSymbolIndex (this_char);
86 
87         final float this_score = weight_array [index][k];
88 
89         scores [j] = this_score;
90 
91         ++k;
92 
93         if (k >= 7) {
94           k = 0;
95         }
96       }
97 
98       final float score = geometricMean (scores);
99 
100       if (score > max_score) {
101         max_score = score;
102       }
103     }
104 
105     values[0] = probCoil (max_score);
106   }
107 
108   /**
109    *  Return the geometric mean of the floating point values in the argument.
110    **/
geometricMean(final float [] scores)111   private float geometricMean (final float [] scores) {
112     float total = 1;
113 
114     for (int i = 0 ; i < scores.length ; ++i) {
115       total *= scores [i];
116     }
117 
118     return (float) Math.pow (total, 1.0 / scores.length);
119   }
120 
121 
122   /**
123    *  Returns probability of coiled-coil for the given score from the
124    *  statistics in the original paper.
125    **/
probCoil(final float score)126   private float probCoil (final float score) {
127     final float gcc_mean = 1.63F;
128     final float gg_mean = 0.77F;
129     final float gcc_sd = 0.24F;
130     final float gg_sd =0.20F;
131     final float gcc = gauss (gcc_mean, gcc_sd, score);
132     final float gg = gauss (gg_mean,  gg_sd,  score);
133 
134     return (float) (gcc/(30.*gg+gcc));
135   }
136 
137   /**
138    *  Calculates probability based on a Gaussian distribution.
139    **/
gauss(final float mean, final float sd, final float score)140   private float gauss (final float mean, final float sd, final float score) {
141     return
142       (float) (Math.pow (Math.E, -0.5 * Math.pow ((score - mean) / sd, 2)) /
143       (sd * 2.0 * Math.PI));
144   }
145 
146   /**
147    *  Return the number of values a call to getValues () will return - one
148    *  in this case.
149    **/
getValueCount()150   public int getValueCount () {
151     return 1;
152   }
153 
154   /**
155    *  Return the default or optimal window size.
156    *  @return null is returned if this algorithm doesn't have optimal window
157    *    size.
158    **/
getDefaultWindowSize()159   public Integer getDefaultWindowSize () {
160     return new Integer (WINDOW_SIZE);
161   }
162 
163   /**
164    *  Return the default maximum window size for this algorithm.
165    *  @return null is returned if this algorithm doesn't have maximum window
166    *    size.
167    **/
getDefaultMaxWindowSize()168   public Integer getDefaultMaxWindowSize () {
169     return new Integer (WINDOW_SIZE);
170   }
171 
172   /**
173    *  Return the default minimum window size for this algorithm.
174    *  @return null is returned if this algorithm doesn't have minimum window
175    *    size.
176    **/
getDefaultMinWindowSize()177   public Integer getDefaultMinWindowSize () {
178     return new Integer (WINDOW_SIZE);
179   }
180 
181   /**
182    *  Return the default or optimal step size.
183    *  @return null is returned if this algorithm doesn't have optimal step
184    *    size.
185    **/
getDefaultStepSize(int window_size)186   public Integer getDefaultStepSize (int window_size) {
187     return new Integer (1);
188   }
189   /**
190    *  Return the maximum value of this algorithm.
191    *  @return The maximum is 4.
192    **/
getMaximumInternal()193   protected Float getMaximumInternal () {
194     return new Float (1.01);
195   }
196 
197   /**
198    *  Return the minimum value of this algorithm.
199    *  @return The minimum is -4..
200    **/
getMinimumInternal()201   protected Float getMinimumInternal () {
202     return new Float (-0.01);
203   }
204 
205   /**
206    *  Return the average value of function over the whole strand.
207    *  @return null is returned if this algorithm doesn't have an average or if
208    *    the average can't be calculated.
209    **/
getAverage()210   public Float getAverage () {
211     return null;
212   }
213 
214   /**
215    *  The weighting matrix from the Science paper.
216    **/
217   private final static float [] [] weight_array = {
218     { 1.297F,1.551F,1.084F,2.612F,0.377F,1.248F,0.877F, },  // Ala A    7.59
219     { 0.659F,1.163F,1.210F,0.031F,1.358F,1.937F,1.798F, },  // arg R    5.39
220     { 0.835F,1.475F,1.534F,0.039F,1.722F,2.456F,2.280F, },  // Asn N    4.25
221     { 0.030F,2.352F,2.268F,0.237F,0.663F,1.620F,1.448F, },  // Asp D    5.03
222     { 0.824F,0.022F,0.308F,0.152F,0.180F,0.156F,0.044F, },  // Cys C    1.86
223     { 0.179F,2.114F,1.778F,0.631F,2.550F,1.578F,2.526F, },  // Gln Q    4.27
224     { 0.262F,3.496F,3.108F,0.998F,5.685F,2.494F,3.048F, },  // Glu E    6.10
225     { 0.045F,0.275F,0.578F,0.216F,0.211F,0.426F,0.156F, },  // Gly G    7.10
226     { 0.347F,0.275F,0.679F,0.395F,0.294F,0.579F,0.213F, },  // His H    2.25
227     { 2.597F,0.098F,0.345F,0.894F,0.514F,0.471F,0.431F, },  // Ile I    5.35
228     { 3.167F,0.297F,0.398F,3.902F,0.585F,0.501F,0.483F, },  // Leu L    9.33
229     { 1.375F,2.639F,1.763F,0.191F,1.815F,1.961F,2.795F, },  // Lys K    5.72
230     { 2.240F,0.370F,0.480F,1.409F,0.541F,0.772F,0.663F, },  // Met M    2.34
231     { 0.531F,0.076F,0.403F,0.662F,0.189F,0.106F,0.013F, },  // Phe F    3.88
232     { 0.0F,  0.008F,0.0F,  0.013F,0.0F,  0.0F,  0.0F,   },  // Pro P    5.28
233     { 0.382F,0.583F,1.052F,0.419F,0.525F,0.916F,0.628F, },  // Ser S    7.28
234     { 0.169F,0.702F,0.955F,0.654F,0.791F,0.843F,0.647F, },  // Thr T    5.97
235     { 0.240F,0.0F,  0.0F,  0.456F,0.019F,0.0F,  0.0F,   },  // Trp W    1.41
236     { 1.417F,0.090F,0.122F,1.659F,0.190F,0.130F,0.155F, },  // Tyr Y    3.16
237     { 1.665F,0.403F,0.386F,0.949F,0.211F,0.342F,0.360F, },  // Val V    6.42
238     { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, },  // Opl *    0.00
239     { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, },  // Ocr #    0.00
240     { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, },  // Amb +    0.00
241     { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, },  // --- .    0.00
242   };
243 }
244