1 /***************************************************************
2 *
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License
5 * as published by the Free Software Foundation; either version 2
6 * of the License, or (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
16 *
17 *  @author: Copyright (C) Tim Carver
18 *
19 ***************************************************************/
20 
21 package org.emboss.jemboss.editor;
22 
23 import java.io.*;
24 import java.util.Vector;
25 import java.util.Enumeration;
26 
27 import javax.swing.JOptionPane;
28 
29 
30 /**
31 *
32 * Calculate a consensus using the same method as 'cons'
33 * in the EMBOSS suite.
34 *
35 */
36 public class Consensus
37 {
38   private int matrix[][];
39   private String cons = "";
40 
41   public static final String DEFAULT_SEQUENCE_NAME ="Consensus";
42 
43   /**
44   *
45   * @param matrixFile	scoring matrix file
46   * @param seqs		vector of Sequence objects
47   * @param fplural	defines no. of +ve scoring matches below
48   *               	which there is no consensus.
49   * @param setcase	upper/lower case given if score above/below
50   *                     user defined +ve matching threshold.
51   * @param identity	defines the number of identical symbols
52   *                	requires in an alignment column for it to
53   *                	included in the consensus.
54   *
55   */
Consensus(File matrixFile, Vector seqs, float fplural, float setcase, int identity)56   public Consensus(File matrixFile, Vector seqs, float fplural,
57                    float setcase, int identity)
58   {
59     Matrix mat = new Matrix(matrixFile);
60     matrix = mat.getMatrix();
61     calculateCons(mat,seqs,fplural,setcase,identity);
62   }
63 
64   /**
65   *
66   * @param matrixJar    	jar file containing scoring matrix
67   * @param matrixFileName   	scoring matrix file name
68   * @param seqs         	vector of Sequence objects
69   * @param fplural      	defines no. of +ve scoring matches below
70   *                     	which there is no consensus.
71   * @param setcase      	upper/lower case given if score above/below
72   *                     	user defined +ve matching threshold.
73   * @param identity     	defines the number of identical symbols
74   *                     	requires in an alignment column for it to
75   *                     	included in the consensus.
76   *
77   */
Consensus(String matrixJar, String matrixFileName, Vector seqs, float fplural, float setcase, int identity)78   public Consensus(String matrixJar, String matrixFileName,
79                    Vector seqs, float fplural,
80                    float setcase, int identity)
81   {
82     this(new Matrix(matrixJar,matrixFileName),
83                seqs,fplural,setcase,identity);
84   }
85 
86   /**
87   *
88   * @param mat		scoring matrix
89   * @param seqs         vector of Sequence objects
90   * @param fplural      defines no. of +ve scoring matches below
91   *                     which there is no consensus.
92   * @param setcase      upper/lower case given if score above/below
93   *                     user defined +ve matching threshold.
94   * @param identity     defines the number of identical symbols
95   *                     requires in an alignment column for it to
96   *                     included in the consensus.
97   *
98   */
Consensus(Matrix mat, Vector seqs, float fplural, float setcase, int identity)99   public Consensus(Matrix mat,
100                    Vector seqs, float fplural,
101                    float setcase, int identity)
102   {
103     matrix = mat.getMatrix();
104     calculateCons(mat,seqs,fplural,setcase,identity);
105   }
106 
107 
108   /**
109   *
110   * Routine to calculate the consensus of a set of sequences
111   *
112   * @param mat          scoring matrix
113   * @param seqs         vector of Sequence objects
114   * @param fplural      defines no. of +ve scoring matches below
115   *                     which there is no consensus.
116   * @param setcase      upper/lower case given if score above/below
117   *                     user defined +ve matching threshold.
118   * @param identity     defines the number of identical symbols
119   *                     requires in an alignment column for it to
120   *                     included in the consensus.
121   *
122   */
calculateCons(Matrix mat, Vector seqs, float fplural, float setcase, int identity)123   private void calculateCons(Matrix mat, Vector seqs, float fplural,
124                             float setcase, int identity)
125   {
126     int nseqs = seqs.size();
127     int mlen = getMaxSequenceLength(seqs);
128 
129     String nocon = "-";
130     if(((Sequence)seqs.get(0)).isProtein())
131       nocon = "x";
132 
133     String res = "";
134 
135     int matsize = mat.getIDimension();
136     float identical[] = new float[matsize];
137     float matching[] = new float[matsize];
138     float score[]    = new float[nseqs];
139 
140     int i;
141     int j;
142     int m1;
143     int m2;
144     float contri = 0.f;
145     float contrj = 0.f;
146     String s1;
147     String s2;
148 
149     for(int k=0; k< mlen; k++)
150     {
151       res = nocon;
152       for(i=0;i<matsize;i++)          /* reset id's and +ve matches */
153       {
154         identical[i] = 0.f;
155         matching[i]  = 0.f;
156       }
157 
158       for(i=0;i<nseqs;i++)
159         score[i] = 0.f;
160 
161       for(i=0;i<nseqs;i++)            /* generate score for columns */
162       {
163         s1 = getResidue(seqs,i,k);
164         m1 = mat.getMatrixIndex(s1);
165 
166         if(m1 >= 0)
167           identical[m1] += getSequenceWeight(seqs,i);
168         for(j=i+1;j<nseqs;j++)
169         {
170           s2 = getResidue(seqs,j,k);
171           m2 = mat.getMatrixIndex(s2);
172           if(m1 >= 0 && m2 >= 0)
173           {
174             contri = matrix[m1][m2]*getSequenceWeight(seqs,j)+score[i];
175             contrj = matrix[m1][m2]*getSequenceWeight(seqs,i)+score[j];
176             score[i] = contri;
177             score[j] = contrj;
178           }
179         }
180       }
181 
182       int highindex = -1;
183       float max = -(float)Integer.MAX_VALUE;
184       for(i=0;i<nseqs;i++)
185       {
186         if( score[i] > max ||
187            (score[i] == max &&
188              (getResidue(seqs,highindex,k).equals("-") ||
189               getResidue(seqs,highindex,k).equals(".")) ))
190         {
191           highindex = i;
192           max       = score[i];
193         }
194       }
195 
196 
197       for(i=0;i<nseqs;i++)        /* find +ve matches in the column */
198       {
199         s1 = getResidue(seqs,i,k);
200         m1 = mat.getMatrixIndex(s1);
201 
202         if(m1 == -1)
203         {
204             JOptionPane.showMessageDialog(null,
205                     "<html>Jemboss Alignment Viewer; " +
206                     "cannot find the residue <em>'"+ s1 +
207                     "'</em> in the scoring matrix <em>'" +
208                     mat.getCurrentMatrixName()+"'</em></html>",
209                     "Error", JOptionPane.ERROR_MESSAGE);
210             return;
211         }
212 
213         if(matching[m1] == 0.f)
214         {
215           for(j=0;j<nseqs;j++)
216           {
217             if(i != j)
218             {
219               s2 = getResidue(seqs,j,k);
220               m2 = mat.getMatrixIndex(s2);
221               if(m1 >= 0 && m2 >= 0 && matrix[m1][m2] > 0)
222                 matching[m1] += getSequenceWeight(seqs,j);
223             }
224           }
225         }
226       }
227 
228 
229       int matchingmaxindex  = 0;  /* get max matching and identical */
230       int identicalmaxindex = 0;
231       for(i=0;i<nseqs;i++)
232       {
233         s1 = getResidue(seqs,i,k);
234         m1 = mat.getMatrixIndex(s1);
235 
236         if(m1 >= 0)
237         {
238           if(identical[m1] > identical[identicalmaxindex])
239             identicalmaxindex = m1;
240         }
241       }
242 
243       for(i=0;i<nseqs;i++)
244       {
245         s1 = getResidue(seqs,i,k);
246         m1 = mat.getMatrixIndex(s1);
247         if(m1 >= 0)
248         {
249           if(matching[m1] > matching[matchingmaxindex])
250             matchingmaxindex = m1;
251           else if(matching[m1] ==  matching[matchingmaxindex])
252           {
253             if(identical[m1] > identical[matchingmaxindex])
254               matchingmaxindex= m1;
255           }
256         }
257       }
258 
259 
260       /* plurality check */
261       s1 = getResidue(seqs,highindex,k);
262       m1 = mat.getMatrixIndex(s1);
263 
264       if(m1 >= 0 && matching[m1] >= fplural
265          && !s1.equals("-") && !s1.equals("."))
266          res = s1;
267 
268       if(matching[m1] <= setcase)
269         res = res.toLowerCase();
270 
271       if(identity>0)                      /* if just looking for id's */
272       {
273         j=0;
274         for(i=0;i<nseqs;i++)
275         {
276           s1 = getResidue(seqs,i,k);
277           m1 = mat.getMatrixIndex(s1);
278 
279           if(matchingmaxindex == m1
280              && !s1.equals("-") && !s1.equals("."))
281             j++;
282         }
283         if(j<identity)
284           res = nocon;
285       }
286 
287       cons = cons.concat(res);
288     }
289   }
290 
291 /**
292 *
293 * Check all sequences are the same length
294 * @param seqs	collection of sequences
295 * @return	true if all sequences are the same length
296 *
297 */
isEqualSequenceLength(Vector seqs)298   public boolean isEqualSequenceLength(Vector seqs)
299   {
300     int len = 0;
301     int numseq=0;
302     Enumeration enumer = seqs.elements();
303     while(enumer.hasMoreElements())
304     {
305       Sequence seq = (Sequence)enumer.nextElement();
306       if(numseq > 0)
307         if(len != seq.getLength())
308           return false;
309 
310       len = seq.getLength();
311       numseq++;
312     }
313     return true;
314   }
315 
316 /**
317 *
318 * Check all sequences lengths and return length of
319 * the longest sequence
320 * @param seqs   collection of sequences
321 * @return       length of longest sequence
322 *
323 */
getMaxSequenceLength(Vector seqs)324   public int getMaxSequenceLength(Vector seqs)
325   {
326     int len = 0;
327 
328     Enumeration enumer = seqs.elements();
329     while(enumer.hasMoreElements())
330     {
331       Sequence seq = (Sequence)enumer.nextElement();
332       if(len < seq.getLength())
333         len = seq.getLength();
334     }
335     return len;
336   }
337 
338 /**
339 *
340 * Get the consensus sequence
341 * @return 	the consensus sequence
342 *
343 */
getConsensusSequence()344   public Sequence getConsensusSequence()
345   {
346     return new Sequence(DEFAULT_SEQUENCE_NAME,cons);
347   }
348 
349 /**
350 *
351 * Get the sequence weight
352 * @param seqs	set of sequences
353 * @param i 	index of a sequence in the set
354 * @return	sequence weight
355 *
356 */
getSequenceWeight(Vector seqs, int i)357   public float getSequenceWeight(Vector seqs, int i)
358   {
359     return ((Sequence)seqs.get(i)).getWeight();
360   }
361 
362 /**
363 *
364 * Get the residue at a given position from a given sequence
365 * @param seqs   set of sequences
366 * @param i      index of a sequence in the set
367 * @param j	residue position
368 * @return       residue
369 *
370 */
getResidue(Vector seqs, int i, int k)371   public String getResidue(Vector seqs, int i, int k)
372   {
373     String res = "-";
374     try
375     {
376       res = ((Sequence)seqs.get(i)).getSequence().substring(k,k+1);
377     }
378     catch(StringIndexOutOfBoundsException sexp)
379     {
380     }
381     return res;
382   }
383 
main(String args[])384   public static void main(String args[])
385   {
386     Vector seqs = new Vector();
387 
388     seqs.add(new Sequence("MHQDGISSMNQLGGLFVNGRPQ"));
389     seqs.add(new Sequence("-MQNSHSGVNQLGGVFVNGRPQ"));
390     seqs.add(new Sequence("STPLGQGRVNQLGGVFINGRPP"));
391     seqs.add(new Sequence("STPLGQGRVNQLGGVFINGRPP"));
392     seqs.add(new Sequence("-MEQTYGEVNQLGGVFVNGRPE"));
393     seqs.add(new Sequence("-MEQTYGEVNQLGGVFVNGRPE"));
394     seqs.add(new Sequence("MHQDGISSMNQLGGLFVNGRPH"));
395     seqs.add(new Sequence("MHQDGISSMNQLGGLFVNGRPR"));
396     seqs.add(new Sequence("MHQDGISSMNQLLGLFVNGRPR"));
397     seqs.add(new Sequence("MHQDGISSMNQLLGGGGGGGGR"));
398 
399     new Consensus(new File("/packages/emboss_dev/tcarver/emboss/emboss/emboss/data/EBLOSUM62"),
400                   seqs,49.75f,0.f,0);
401   }
402 }
403 
404