1 /* GCSDWindowAlgorithm.java 2 * 3 * created: Mon Oct 25 1999 4 * 5 * This file is part of Artemis 6 * 7 * Copyright (C) 1999 Genome Research Limited 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 2 12 * of the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/plot/GCSDWindowAlgorithm.java,v 1.4 2009-03-17 17:47:42 tjc Exp $ 24 */ 25 26 package uk.ac.sanger.artemis.plot; 27 28 import uk.ac.sanger.artemis.util.*; 29 import uk.ac.sanger.artemis.io.Range; 30 31 import uk.ac.sanger.artemis.sequence.*; 32 33 /** 34 * Objects of this class have one useful method - getValues (), which takes a 35 * range of bases and returns a single floating point number, which is the 36 * percent GC content of the range if the GC content of the range is more 37 * than 2.5 standard deviations from the average GC content of the sequence. 38 * It returns the average GC content of the sequence otherwise. The Strand 39 * to use is set in the constructor. 40 * 41 * @author Kim Rutherford 42 * @version $Id: GCSDWindowAlgorithm.java,v 1.4 2009-03-17 17:47:42 tjc Exp $ 43 **/ 44 45 public class GCSDWindowAlgorithm extends BaseAlgorithm { 46 /** 47 * Create a new GCSDWindowAlgorithm object. 48 * @param strand The strand to do the calculation on. 49 **/ GCSDWindowAlgorithm(final Strand strand)50 public GCSDWindowAlgorithm (final Strand strand) { 51 super (strand, "GC Content (%) With A 2.5 SD Cutoff", "sd_gc_content"); 52 53 setScalingFlag (false); 54 } 55 56 /** 57 * Return the percent GC between a pair of bases if the GC content of the 58 * range is more than 2.5 standard deviations from the average GC content 59 * of the sequence. It returns the average GC content of the sequence 60 * otherwise. 61 * @param start The start base (included in the range). 62 * @param end The end base (included in the range). 63 * @param values The one return value for this algorithm is returned in 64 * this array. 65 **/ getValues(int start, int end, float [] values)66 public void getValues (int start, int end, float [] values) { 67 final int window_size = end - start + 1; 68 69 final float standard_deviation; 70 71 if (window_size > getDefaultMaxWindowSize ().intValue ()) { 72 standard_deviation = calculateSD (window_size); 73 } else { 74 if (standard_deviations[window_size - 1] < 0) { 75 // set the cached value 76 standard_deviation = calculateSD (window_size); 77 standard_deviations[window_size - 1] = standard_deviation; 78 79 // System.err.println ("SD: " + standard_deviation); 80 } else { 81 standard_deviation = standard_deviations[window_size - 1]; 82 } 83 } 84 85 final String sequence; 86 87 try { 88 sequence = getStrand ().getSubSequence (new Range (start, end)); 89 } catch (OutOfRangeException e) { 90 throw new Error ("internal error - unexpected exception: " + e); 91 } 92 93 float gc_count = 0; 94 95 for (int i = 0 ; i < sequence.length () ; ++i) { 96 final char this_char = sequence.charAt (i); 97 // System.out.println (this_char); 98 99 if (this_char == 'g' || this_char == 'c') { 100 ++gc_count; 101 } 102 } 103 104 final float gc_content = gc_count/sequence.length () * 100; 105 106 final float gc_average = 107 getStrand ().getBases ().getAverageGCPercent (); 108 109 if (Math.abs (gc_content - gc_average) < standard_deviation * 2.5) { 110 values[0] = gc_average; 111 } else { 112 values[0] = gc_content; 113 } 114 } 115 116 /** 117 * Calculate and return the standard deviation of the GC content of the 118 * Bases object of the Strand that was passed to the constructor. 119 **/ calculateSD(final int window_size)120 private float calculateSD (final int window_size) { 121 // System.err.println ("calculateSD:"); 122 123 final int sequence_length = getStrand ().getBases ().getLength (); 124 125 final String bases = getStrand ().getBases ().toString (); 126 127 // the number of windows to search 128 final int window_count = sequence_length - window_size; 129 130 // System.err.println ("window_size: " + window_size); 131 // System.err.println ("window_count: " + window_count); 132 133 // this is the sum over all the windows of: 134 // (GC content) * (percent GC) / window_size * window_size 135 double sum_so_far = 0; 136 137 // this is the sum over all the windows of (GC content)/window_size 138 double gc_so_far = 0; 139 140 double current_gc_count = 0; 141 142 for (int i = - window_size ; i < window_count ; ++i) { 143 if (i > 0) { 144 final char previous_char = bases.charAt (i - 1); 145 146 if (previous_char == 'g' || previous_char == 'c') { 147 --current_gc_count; 148 } 149 } 150 151 final char new_char = bases.charAt (i + window_size); 152 153 if (new_char == 'g' || new_char == 'c') { 154 ++current_gc_count; 155 } 156 157 if (i >= 0) { 158 final double this_value = current_gc_count / window_size; 159 160 sum_so_far += this_value * this_value; 161 162 gc_so_far += this_value; 163 } 164 } 165 166 // System.err.println ("sum_so_far: " + sum_so_far); 167 // System.err.println ("gc_so_far: " + gc_so_far); 168 169 final double gc_average = gc_so_far / window_count; 170 171 // System.err.println ("gc_average: " + gc_average); 172 // System.err.println ("sum_so_far/window_count: " + 173 // (sum_so_far/window_count)); 174 // System.err.println ("gc_average*gc_average: " + (gc_average * gc_average)); 175 176 // System.err.println ("sd*sd: " + (sum_so_far / window_count - 177 // gc_average * gc_average)); 178 179 return (float) Math.sqrt (sum_so_far / window_count - 180 gc_average * gc_average) * 100; 181 } 182 183 /** 184 * Return the number of values a call to getValues () will return - one 185 * in this case. 186 **/ getValueCount()187 public int getValueCount () { 188 return 1; 189 } 190 191 /** 192 * Return the default or optimal window size. 193 * @return null is returned if this algorithm doesn't have optimal window 194 * size. 195 **/ getDefaultWindowSize()196 public Integer getDefaultWindowSize () { 197 final Integer super_window_size = super.getDefaultWindowSize (); 198 if (super_window_size != null) { 199 // the superclass version of getDefaultWindowSize () returns non-null 200 // iff the user has set the window size in the options file 201 return super_window_size; 202 } 203 return new Integer (1000); 204 } 205 206 /** 207 * Return the default maximum window size for this algorithm. 208 * @return null is returned if this algorithm doesn't have maximum window 209 * size. 210 **/ getDefaultMaxWindowSize()211 public Integer getDefaultMaxWindowSize () { 212 final Integer super_max_window_size = super.getDefaultMaxWindowSize (); 213 if (super_max_window_size != null) { 214 // the superclass version of getDefaultMaxWindowSize () returns non-null 215 // iff the user has set the max window size in the options file 216 return super_max_window_size; 217 } 218 return new Integer (5000); 219 } 220 221 /** 222 * Return the default minimum window size for this algorithm. 223 * @return null is returned if this algorithm doesn't have minimum window 224 * size. 225 **/ getDefaultMinWindowSize()226 public Integer getDefaultMinWindowSize () { 227 final Integer super_min_window_size = super.getDefaultMinWindowSize (); 228 if (super_min_window_size != null) { 229 // the superclass version of getDefaultMinWindowSize () returns non-null 230 // iff the user has set the minimum window size in the options file 231 return super_min_window_size; 232 } 233 return new Integer (100); 234 } 235 236 /** 237 * Return the default or optimal step size. 238 * @return null is returned if this algorithm doesn't have optimal step 239 * size. 240 **/ getDefaultStepSize(int window_size)241 public Integer getDefaultStepSize (int window_size) { 242 return new Integer (1); 243 } 244 245 /** 246 * Return the average value of function over the whole strand. 247 * @return null is returned if this algorithm doesn't have an average or if 248 * the average can't be calculated. 249 **/ getAverage()250 public Float getAverage () { 251 return new Float (getStrand ().getBases ().getAverageGCPercent ()); 252 } 253 254 /** 255 * Return the maximum value of this algorithm. 256 * @return The maximum is 100. 257 **/ getMaximumInternal()258 protected Float getMaximumInternal () { 259 return new Float (100); 260 } 261 262 /** 263 * Return the minimum value of this algorithm. 264 * @return The minimum is 0. 265 **/ getMinimumInternal()266 protected Float getMinimumInternal () { 267 return new Float (0); 268 } 269 270 /** 271 * This array contains a cache of the standard deviations of the GC content 272 * of the Bases object of the Strand that was passed to the constructor. 273 * (Set by the constructor). The array is indexed by window size. 274 **/ 275 float [] standard_deviations; 276 277 { 278 final int max_window_size = getDefaultMaxWindowSize ().intValue (); 279 standard_deviations = new float [max_window_size]; 280 281 for (int i = 0 ; i < max_window_size ; ++i) { 282 standard_deviations[i] = -1; 283 } 284 } 285 } 286