1 /*************************************************************** 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of the GNU General Public License 5 * as published by the Free Software Foundation; either version 2 6 * of the License, or (at your option) any later version. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License 14 * along with this program; if not, write to the Free Software 15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 16 * 17 * @author: Copyright (C) Tim Carver 18 * 19 ***************************************************************/ 20 21 package org.emboss.jemboss.editor; 22 23 import java.io.*; 24 import java.util.Vector; 25 import java.util.Enumeration; 26 27 import javax.swing.JOptionPane; 28 29 30 /** 31 * 32 * Calculate a consensus using the same method as 'cons' 33 * in the EMBOSS suite. 34 * 35 */ 36 public class Consensus 37 { 38 private int matrix[][]; 39 private String cons = ""; 40 41 public static final String DEFAULT_SEQUENCE_NAME ="Consensus"; 42 43 /** 44 * 45 * @param matrixFile scoring matrix file 46 * @param seqs vector of Sequence objects 47 * @param fplural defines no. of +ve scoring matches below 48 * which there is no consensus. 49 * @param setcase upper/lower case given if score above/below 50 * user defined +ve matching threshold. 51 * @param identity defines the number of identical symbols 52 * requires in an alignment column for it to 53 * included in the consensus. 54 * 55 */ Consensus(File matrixFile, Vector seqs, float fplural, float setcase, int identity)56 public Consensus(File matrixFile, Vector seqs, float fplural, 57 float setcase, int identity) 58 { 59 Matrix mat = new Matrix(matrixFile); 60 matrix = mat.getMatrix(); 61 calculateCons(mat,seqs,fplural,setcase,identity); 62 } 63 64 /** 65 * 66 * @param matrixJar jar file containing scoring matrix 67 * @param matrixFileName scoring matrix file name 68 * @param seqs vector of Sequence objects 69 * @param fplural defines no. of +ve scoring matches below 70 * which there is no consensus. 71 * @param setcase upper/lower case given if score above/below 72 * user defined +ve matching threshold. 73 * @param identity defines the number of identical symbols 74 * requires in an alignment column for it to 75 * included in the consensus. 76 * 77 */ Consensus(String matrixJar, String matrixFileName, Vector seqs, float fplural, float setcase, int identity)78 public Consensus(String matrixJar, String matrixFileName, 79 Vector seqs, float fplural, 80 float setcase, int identity) 81 { 82 this(new Matrix(matrixJar,matrixFileName), 83 seqs,fplural,setcase,identity); 84 } 85 86 /** 87 * 88 * @param mat scoring matrix 89 * @param seqs vector of Sequence objects 90 * @param fplural defines no. of +ve scoring matches below 91 * which there is no consensus. 92 * @param setcase upper/lower case given if score above/below 93 * user defined +ve matching threshold. 94 * @param identity defines the number of identical symbols 95 * requires in an alignment column for it to 96 * included in the consensus. 97 * 98 */ Consensus(Matrix mat, Vector seqs, float fplural, float setcase, int identity)99 public Consensus(Matrix mat, 100 Vector seqs, float fplural, 101 float setcase, int identity) 102 { 103 matrix = mat.getMatrix(); 104 calculateCons(mat,seqs,fplural,setcase,identity); 105 } 106 107 108 /** 109 * 110 * Routine to calculate the consensus of a set of sequences 111 * 112 * @param mat scoring matrix 113 * @param seqs vector of Sequence objects 114 * @param fplural defines no. of +ve scoring matches below 115 * which there is no consensus. 116 * @param setcase upper/lower case given if score above/below 117 * user defined +ve matching threshold. 118 * @param identity defines the number of identical symbols 119 * requires in an alignment column for it to 120 * included in the consensus. 121 * 122 */ calculateCons(Matrix mat, Vector seqs, float fplural, float setcase, int identity)123 private void calculateCons(Matrix mat, Vector seqs, float fplural, 124 float setcase, int identity) 125 { 126 int nseqs = seqs.size(); 127 int mlen = getMaxSequenceLength(seqs); 128 129 String nocon = "-"; 130 if(((Sequence)seqs.get(0)).isProtein()) 131 nocon = "x"; 132 133 String res = ""; 134 135 int matsize = mat.getIDimension(); 136 float identical[] = new float[matsize]; 137 float matching[] = new float[matsize]; 138 float score[] = new float[nseqs]; 139 140 int i; 141 int j; 142 int m1; 143 int m2; 144 float contri = 0.f; 145 float contrj = 0.f; 146 String s1; 147 String s2; 148 149 for(int k=0; k< mlen; k++) 150 { 151 res = nocon; 152 for(i=0;i<matsize;i++) /* reset id's and +ve matches */ 153 { 154 identical[i] = 0.f; 155 matching[i] = 0.f; 156 } 157 158 for(i=0;i<nseqs;i++) 159 score[i] = 0.f; 160 161 for(i=0;i<nseqs;i++) /* generate score for columns */ 162 { 163 s1 = getResidue(seqs,i,k); 164 m1 = mat.getMatrixIndex(s1); 165 166 if(m1 >= 0) 167 identical[m1] += getSequenceWeight(seqs,i); 168 for(j=i+1;j<nseqs;j++) 169 { 170 s2 = getResidue(seqs,j,k); 171 m2 = mat.getMatrixIndex(s2); 172 if(m1 >= 0 && m2 >= 0) 173 { 174 contri = matrix[m1][m2]*getSequenceWeight(seqs,j)+score[i]; 175 contrj = matrix[m1][m2]*getSequenceWeight(seqs,i)+score[j]; 176 score[i] = contri; 177 score[j] = contrj; 178 } 179 } 180 } 181 182 int highindex = -1; 183 float max = -(float)Integer.MAX_VALUE; 184 for(i=0;i<nseqs;i++) 185 { 186 if( score[i] > max || 187 (score[i] == max && 188 (getResidue(seqs,highindex,k).equals("-") || 189 getResidue(seqs,highindex,k).equals(".")) )) 190 { 191 highindex = i; 192 max = score[i]; 193 } 194 } 195 196 197 for(i=0;i<nseqs;i++) /* find +ve matches in the column */ 198 { 199 s1 = getResidue(seqs,i,k); 200 m1 = mat.getMatrixIndex(s1); 201 202 if(m1 == -1) 203 { 204 JOptionPane.showMessageDialog(null, 205 "<html>Jemboss Alignment Viewer; " + 206 "cannot find the residue <em>'"+ s1 + 207 "'</em> in the scoring matrix <em>'" + 208 mat.getCurrentMatrixName()+"'</em></html>", 209 "Error", JOptionPane.ERROR_MESSAGE); 210 return; 211 } 212 213 if(matching[m1] == 0.f) 214 { 215 for(j=0;j<nseqs;j++) 216 { 217 if(i != j) 218 { 219 s2 = getResidue(seqs,j,k); 220 m2 = mat.getMatrixIndex(s2); 221 if(m1 >= 0 && m2 >= 0 && matrix[m1][m2] > 0) 222 matching[m1] += getSequenceWeight(seqs,j); 223 } 224 } 225 } 226 } 227 228 229 int matchingmaxindex = 0; /* get max matching and identical */ 230 int identicalmaxindex = 0; 231 for(i=0;i<nseqs;i++) 232 { 233 s1 = getResidue(seqs,i,k); 234 m1 = mat.getMatrixIndex(s1); 235 236 if(m1 >= 0) 237 { 238 if(identical[m1] > identical[identicalmaxindex]) 239 identicalmaxindex = m1; 240 } 241 } 242 243 for(i=0;i<nseqs;i++) 244 { 245 s1 = getResidue(seqs,i,k); 246 m1 = mat.getMatrixIndex(s1); 247 if(m1 >= 0) 248 { 249 if(matching[m1] > matching[matchingmaxindex]) 250 matchingmaxindex = m1; 251 else if(matching[m1] == matching[matchingmaxindex]) 252 { 253 if(identical[m1] > identical[matchingmaxindex]) 254 matchingmaxindex= m1; 255 } 256 } 257 } 258 259 260 /* plurality check */ 261 s1 = getResidue(seqs,highindex,k); 262 m1 = mat.getMatrixIndex(s1); 263 264 if(m1 >= 0 && matching[m1] >= fplural 265 && !s1.equals("-") && !s1.equals(".")) 266 res = s1; 267 268 if(matching[m1] <= setcase) 269 res = res.toLowerCase(); 270 271 if(identity>0) /* if just looking for id's */ 272 { 273 j=0; 274 for(i=0;i<nseqs;i++) 275 { 276 s1 = getResidue(seqs,i,k); 277 m1 = mat.getMatrixIndex(s1); 278 279 if(matchingmaxindex == m1 280 && !s1.equals("-") && !s1.equals(".")) 281 j++; 282 } 283 if(j<identity) 284 res = nocon; 285 } 286 287 cons = cons.concat(res); 288 } 289 } 290 291 /** 292 * 293 * Check all sequences are the same length 294 * @param seqs collection of sequences 295 * @return true if all sequences are the same length 296 * 297 */ isEqualSequenceLength(Vector seqs)298 public boolean isEqualSequenceLength(Vector seqs) 299 { 300 int len = 0; 301 int numseq=0; 302 Enumeration enumer = seqs.elements(); 303 while(enumer.hasMoreElements()) 304 { 305 Sequence seq = (Sequence)enumer.nextElement(); 306 if(numseq > 0) 307 if(len != seq.getLength()) 308 return false; 309 310 len = seq.getLength(); 311 numseq++; 312 } 313 return true; 314 } 315 316 /** 317 * 318 * Check all sequences lengths and return length of 319 * the longest sequence 320 * @param seqs collection of sequences 321 * @return length of longest sequence 322 * 323 */ getMaxSequenceLength(Vector seqs)324 public int getMaxSequenceLength(Vector seqs) 325 { 326 int len = 0; 327 328 Enumeration enumer = seqs.elements(); 329 while(enumer.hasMoreElements()) 330 { 331 Sequence seq = (Sequence)enumer.nextElement(); 332 if(len < seq.getLength()) 333 len = seq.getLength(); 334 } 335 return len; 336 } 337 338 /** 339 * 340 * Get the consensus sequence 341 * @return the consensus sequence 342 * 343 */ getConsensusSequence()344 public Sequence getConsensusSequence() 345 { 346 return new Sequence(DEFAULT_SEQUENCE_NAME,cons); 347 } 348 349 /** 350 * 351 * Get the sequence weight 352 * @param seqs set of sequences 353 * @param i index of a sequence in the set 354 * @return sequence weight 355 * 356 */ getSequenceWeight(Vector seqs, int i)357 public float getSequenceWeight(Vector seqs, int i) 358 { 359 return ((Sequence)seqs.get(i)).getWeight(); 360 } 361 362 /** 363 * 364 * Get the residue at a given position from a given sequence 365 * @param seqs set of sequences 366 * @param i index of a sequence in the set 367 * @param j residue position 368 * @return residue 369 * 370 */ getResidue(Vector seqs, int i, int k)371 public String getResidue(Vector seqs, int i, int k) 372 { 373 String res = "-"; 374 try 375 { 376 res = ((Sequence)seqs.get(i)).getSequence().substring(k,k+1); 377 } 378 catch(StringIndexOutOfBoundsException sexp) 379 { 380 } 381 return res; 382 } 383 main(String args[])384 public static void main(String args[]) 385 { 386 Vector seqs = new Vector(); 387 388 seqs.add(new Sequence("MHQDGISSMNQLGGLFVNGRPQ")); 389 seqs.add(new Sequence("-MQNSHSGVNQLGGVFVNGRPQ")); 390 seqs.add(new Sequence("STPLGQGRVNQLGGVFINGRPP")); 391 seqs.add(new Sequence("STPLGQGRVNQLGGVFINGRPP")); 392 seqs.add(new Sequence("-MEQTYGEVNQLGGVFVNGRPE")); 393 seqs.add(new Sequence("-MEQTYGEVNQLGGVFVNGRPE")); 394 seqs.add(new Sequence("MHQDGISSMNQLGGLFVNGRPH")); 395 seqs.add(new Sequence("MHQDGISSMNQLGGLFVNGRPR")); 396 seqs.add(new Sequence("MHQDGISSMNQLLGLFVNGRPR")); 397 seqs.add(new Sequence("MHQDGISSMNQLLGGGGGGGGR")); 398 399 new Consensus(new File("/packages/emboss_dev/tcarver/emboss/emboss/emboss/data/EBLOSUM62"), 400 seqs,49.75f,0.f,0); 401 } 402 } 403 404