1 /* CoilFeatureAlgorithm.java 2 * 3 * created: Mon Dec 21 1998 4 * 5 * This file is part of Artemis 6 * 7 * Copyright (C) 1998,1999,2000 Genome Research Limited 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 2 12 * of the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/plot/CoilFeatureAlgorithm.java,v 1.1 2004-06-09 09:51:24 tjc Exp $ 24 **/ 25 26 package uk.ac.sanger.artemis.plot; 27 28 import uk.ac.sanger.artemis.*; 29 import uk.ac.sanger.artemis.sequence.*; 30 import uk.ac.sanger.artemis.io.Range; 31 32 /** 33 * Objects of this class have one useful method - getValues (), which takes a 34 * range of bases and returns a single floating point number. See 35 * "Predicting Coiled Coils from Protein Sequences", Science Vol. 252 page 36 * 1162 for details of the alogrithm used. 37 * 38 * @author Kim Rutherford 39 * @version $Id: CoilFeatureAlgorithm.java,v 1.1 2004-06-09 09:51:24 tjc Exp $ 40 **/ 41 42 public class CoilFeatureAlgorithm extends FeatureAlgorithm { 43 /** 44 * Create a new CoilFeatureAlgorithm object for the given Feature. 45 **/ CoilFeatureAlgorithm(Feature feature)46 public CoilFeatureAlgorithm (Feature feature) { 47 super (feature, "Coiled Coils", "coiled_coil"); 48 } 49 50 private static final int WINDOW_SIZE = 28; 51 52 /** 53 * Return the coiled coil score of the (28) residues in the given range. 54 * The start and end values should be 28*3 bases apart (inclusive). 55 * @param start The start base (included in the range). 56 * @param end The end base (included in the range). 57 * @param values The one return value for this algorithm is returned in 58 * this array. 59 **/ getValues(int start, int end, float [] values)60 public void getValues (int start, int end, float [] values) { 61 62 final int number_of_weights = weight_array[0].length; 63 64 final AminoAcidSequence translation = getFeature ().getTranslation (); 65 66 final String translation_string = 67 translation.toString ().substring (start, end + 1); 68 69 final float [] scores = new float [WINDOW_SIZE]; 70 71 float max_score = -1.0F; 72 73 int k = 0; 74 75 for (int frame = 0 ; frame < 7 ; ++frame) { 76 k = frame - start % 7; 77 if (k < 0) { 78 k = k + 7; 79 } 80 for (int j = 0 ; j < WINDOW_SIZE ; ++j) { 81 // System.out.println (k + " " + frame + " " + j); 82 83 final char this_char = translation_string.charAt (j); 84 final int index = 85 AminoAcidSequence.getSymbolIndex (this_char); 86 87 final float this_score = weight_array [index][k]; 88 89 scores [j] = this_score; 90 91 ++k; 92 93 if (k >= 7) { 94 k = 0; 95 } 96 } 97 98 final float score = geometricMean (scores); 99 100 if (score > max_score) { 101 max_score = score; 102 } 103 } 104 105 values[0] = probCoil (max_score); 106 } 107 108 /** 109 * Return the geometric mean of the floating point values in the argument. 110 **/ geometricMean(final float [] scores)111 private float geometricMean (final float [] scores) { 112 float total = 1; 113 114 for (int i = 0 ; i < scores.length ; ++i) { 115 total *= scores [i]; 116 } 117 118 return (float) Math.pow (total, 1.0 / scores.length); 119 } 120 121 122 /** 123 * Returns probability of coiled-coil for the given score from the 124 * statistics in the original paper. 125 **/ probCoil(final float score)126 private float probCoil (final float score) { 127 final float gcc_mean = 1.63F; 128 final float gg_mean = 0.77F; 129 final float gcc_sd = 0.24F; 130 final float gg_sd =0.20F; 131 final float gcc = gauss (gcc_mean, gcc_sd, score); 132 final float gg = gauss (gg_mean, gg_sd, score); 133 134 return (float) (gcc/(30.*gg+gcc)); 135 } 136 137 /** 138 * Calculates probability based on a Gaussian distribution. 139 **/ gauss(final float mean, final float sd, final float score)140 private float gauss (final float mean, final float sd, final float score) { 141 return 142 (float) (Math.pow (Math.E, -0.5 * Math.pow ((score - mean) / sd, 2)) / 143 (sd * 2.0 * Math.PI)); 144 } 145 146 /** 147 * Return the number of values a call to getValues () will return - one 148 * in this case. 149 **/ getValueCount()150 public int getValueCount () { 151 return 1; 152 } 153 154 /** 155 * Return the default or optimal window size. 156 * @return null is returned if this algorithm doesn't have optimal window 157 * size. 158 **/ getDefaultWindowSize()159 public Integer getDefaultWindowSize () { 160 return new Integer (WINDOW_SIZE); 161 } 162 163 /** 164 * Return the default maximum window size for this algorithm. 165 * @return null is returned if this algorithm doesn't have maximum window 166 * size. 167 **/ getDefaultMaxWindowSize()168 public Integer getDefaultMaxWindowSize () { 169 return new Integer (WINDOW_SIZE); 170 } 171 172 /** 173 * Return the default minimum window size for this algorithm. 174 * @return null is returned if this algorithm doesn't have minimum window 175 * size. 176 **/ getDefaultMinWindowSize()177 public Integer getDefaultMinWindowSize () { 178 return new Integer (WINDOW_SIZE); 179 } 180 181 /** 182 * Return the default or optimal step size. 183 * @return null is returned if this algorithm doesn't have optimal step 184 * size. 185 **/ getDefaultStepSize(int window_size)186 public Integer getDefaultStepSize (int window_size) { 187 return new Integer (1); 188 } 189 /** 190 * Return the maximum value of this algorithm. 191 * @return The maximum is 4. 192 **/ getMaximumInternal()193 protected Float getMaximumInternal () { 194 return new Float (1.01); 195 } 196 197 /** 198 * Return the minimum value of this algorithm. 199 * @return The minimum is -4.. 200 **/ getMinimumInternal()201 protected Float getMinimumInternal () { 202 return new Float (-0.01); 203 } 204 205 /** 206 * Return the average value of function over the whole strand. 207 * @return null is returned if this algorithm doesn't have an average or if 208 * the average can't be calculated. 209 **/ getAverage()210 public Float getAverage () { 211 return null; 212 } 213 214 /** 215 * The weighting matrix from the Science paper. 216 **/ 217 private final static float [] [] weight_array = { 218 { 1.297F,1.551F,1.084F,2.612F,0.377F,1.248F,0.877F, }, // Ala A 7.59 219 { 0.659F,1.163F,1.210F,0.031F,1.358F,1.937F,1.798F, }, // arg R 5.39 220 { 0.835F,1.475F,1.534F,0.039F,1.722F,2.456F,2.280F, }, // Asn N 4.25 221 { 0.030F,2.352F,2.268F,0.237F,0.663F,1.620F,1.448F, }, // Asp D 5.03 222 { 0.824F,0.022F,0.308F,0.152F,0.180F,0.156F,0.044F, }, // Cys C 1.86 223 { 0.179F,2.114F,1.778F,0.631F,2.550F,1.578F,2.526F, }, // Gln Q 4.27 224 { 0.262F,3.496F,3.108F,0.998F,5.685F,2.494F,3.048F, }, // Glu E 6.10 225 { 0.045F,0.275F,0.578F,0.216F,0.211F,0.426F,0.156F, }, // Gly G 7.10 226 { 0.347F,0.275F,0.679F,0.395F,0.294F,0.579F,0.213F, }, // His H 2.25 227 { 2.597F,0.098F,0.345F,0.894F,0.514F,0.471F,0.431F, }, // Ile I 5.35 228 { 3.167F,0.297F,0.398F,3.902F,0.585F,0.501F,0.483F, }, // Leu L 9.33 229 { 1.375F,2.639F,1.763F,0.191F,1.815F,1.961F,2.795F, }, // Lys K 5.72 230 { 2.240F,0.370F,0.480F,1.409F,0.541F,0.772F,0.663F, }, // Met M 2.34 231 { 0.531F,0.076F,0.403F,0.662F,0.189F,0.106F,0.013F, }, // Phe F 3.88 232 { 0.0F, 0.008F,0.0F, 0.013F,0.0F, 0.0F, 0.0F, }, // Pro P 5.28 233 { 0.382F,0.583F,1.052F,0.419F,0.525F,0.916F,0.628F, }, // Ser S 7.28 234 { 0.169F,0.702F,0.955F,0.654F,0.791F,0.843F,0.647F, }, // Thr T 5.97 235 { 0.240F,0.0F, 0.0F, 0.456F,0.019F,0.0F, 0.0F, }, // Trp W 1.41 236 { 1.417F,0.090F,0.122F,1.659F,0.190F,0.130F,0.155F, }, // Tyr Y 3.16 237 { 1.665F,0.403F,0.386F,0.949F,0.211F,0.342F,0.360F, }, // Val V 6.42 238 { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, }, // Opl * 0.00 239 { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, }, // Ocr # 0.00 240 { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, }, // Amb + 0.00 241 { 0.000F,0.000F,0.000F,0.000F,0.000F,0.000F,0.000F, }, // --- . 0.00 242 }; 243 } 244