1 /* Bases.java
2  *
3  * created: Sun Oct 11 1998
4  *
5  * This file is part of Artemis
6  *
7  * Copyright (C) 1998-2005  Genome Research Limited
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; either version 2
12  * of the License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22  *
23  * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/sequence/Bases.java,v 1.26 2009-03-27 14:00:51 tjc Exp $
24  */
25 
26 package uk.ac.sanger.artemis.sequence;
27 
28 import uk.ac.sanger.artemis.Feature;
29 import uk.ac.sanger.artemis.util.*;
30 import uk.ac.sanger.artemis.io.Range;
31 import uk.ac.sanger.artemis.io.EmblStreamSequence;
32 import uk.ac.sanger.artemis.io.Sequence;
33 import uk.ac.sanger.artemis.io.StreamSequence;
34 
35 import org.biojava.bio.symbol.IllegalSymbolException;
36 
37 import java.util.WeakHashMap;
38 import java.util.Iterator;
39 
40 /**
41  *  This class is a wrapper for the uk.ac.sanger.artemis.io.Sequence class
42  *  that allows us to control what is done to the sequence and to send events
43  *  to interested objects when changes happen.  Note: a '@' character is used
44  *  as a marker when we don't have a base letter, for example complementing a
45  *  non-base letter returns '@'.
46  *
47  *  @author Kim Rutherford
48  *  @version $Id: Bases.java,v 1.26 2009-03-27 14:00:51 tjc Exp $ */
49 
50 public class Bases
51 {
52   /**
53    *  Indicates the bases should be read in the forward direction for a
54    *  particular operation.
55    **/
56   static public final int FORWARD = 1;
57 
58   /**
59    *  Indicates the bases should be read in the reverse direction for a
60    *  particular operation.
61    **/
62   static public final int REVERSE = 2;
63 
64   /**
65    *  The lowest possible value for use with addSequenceChangeListener ().
66    **/
67   static public final int MIN_PRIORITY = -5;
68 
69   /**
70    *  An arbitrary value for use with addSequenceChangeListener ().
71    **/
72   static public final int MEDIUM_PRIORITY = 0;
73 
74   /**
75    *  The highest possible value for use with addSequenceChangeListener ().
76    **/
77   static public final int MAX_PRIORITY = 5;
78 
79   /**
80    *  A cache of the forward & reverse stop codon positions.
81    *  0 means not set/cached yet, 1 not a stop codon, 2 and 3 are a
82    *  stop codon on fwd or reverse strand respectively.
83    **/
84   private byte [] stop_codon_cache = null;
85 
86   /**
87    *  A cache of the forward & reverse start codon positions.
88    *  0 means not set/cached yet, 1 not a start codon, 2 and 3 are a
89    *  start codon on fwd or reverse strand repectively.
90    **/
91   private byte [] start_codon_cache = null;
92 
93   /**
94    *  Create a new Bases object.
95    *  @param sequence The raw sequence that the new object will use.
96    **/
Bases(final Sequence sequence)97   public Bases(final Sequence sequence)
98   {
99     this.embl_sequence = sequence;
100 
101     stop_codon_cache = null;
102 
103     forward_strand = new Strand(this, FORWARD);
104     reverse_strand = new Strand(this, REVERSE);
105 
106     for(int i = 0 ; i < listener_hash_map_array.length ; ++i)
107       listener_hash_map_array [i] = new WeakHashMap();
108   }
109 
110   /**
111    *  Return the object representing the forward sequence of bases for this
112    *  object.
113    **/
getForwardStrand()114   public Strand getForwardStrand()
115   {
116     return forward_strand;
117   }
118 
119   /**
120    *  Return the object representing the reverse complemented sequence of
121    *  bases for this Bases objects.
122    **/
getReverseStrand()123   public Strand getReverseStrand()
124   {
125     return reverse_strand;
126   }
127 
128   /**
129    *  Returns the length of the sequence in bases.
130    **/
getLength()131   public int getLength()
132   {
133     return embl_sequence.length();
134   }
135 
136   /**
137    *  Return a String representation of the sequence.
138    **/
toString()139   public String toString()
140   {
141     return embl_sequence.getSubSequence(1,getLength());
142   }
143 
144   /**
145    *  Reverse and complement both of the Strand objects (by swapping them and
146    *  reverse complementing the sequence).
147    *  @exception ReadOnlyException If the Bases cannot be changed.
148    **/
reverseComplement()149   public void reverseComplement()
150       throws ReadOnlyException
151   {
152     stop_codon_cache = null;
153 
154     final Strand temp = forward_strand;
155     forward_strand = reverse_strand;
156     reverse_strand = temp;
157 
158 //  final String new_sequence =
159 //    reverseComplement(getSequence().getSubSequence(1, getLength()));
160 
161     final char[] new_sequence =
162       reverseComplement(getSequence().getCharSubSequence(1, getLength()));
163 
164     try
165     {
166 //    getSequence().setFromChar(new_sequence.toCharArray());
167       getSequence().setFromChar(new_sequence);
168     }
169     catch (IllegalSymbolException e)
170     {
171       throw new Error ("internal error - unexpected exception: " + e);
172     }
173 
174     final SequenceChangeEvent event =
175       new SequenceChangeEvent (this, SequenceChangeEvent.REVERSE_COMPLEMENT);
176 
177     fireSequenceChangeEvent (event);
178   }
179 
180   /**
181    *  This array is used to convert between bases and indices.  See
182    *  getIndexOfBase()
183    **/
184   public final static char[] letter_index =
185   {
186     't', 'c', 'a', 'g', 'n'
187   };
188 
189   /**
190    *  Given a base letter return its index where t = 0, c = 1, a = 2, g = 3, 4
191    *  otherwise.
192    *  See letter_index.
193    **/
getIndexOfBase(final char base)194   public final static int getIndexOfBase(final char base)
195   {
196     switch(base)
197     {
198       case 'c':
199         return 1;
200       case 'a':
201         return 2;
202       case 'g':
203         return 3;
204       case 't':
205       case 'u':
206         return 0;
207     }
208 
209     return 4;
210   }
211 
212   /**
213    *  Return the complement of the given Range.  eg. if the sequence length is
214    *  100 and the Range is 1..10 then the return value will be 90..100.
215    **/
complementRange(final Range range)216   private Range complementRange (final Range range) {
217     final int real_start = getComplementPosition (range.getEnd ());
218     final int real_end   = getComplementPosition (range.getStart ());
219 
220     try {
221       final Range real_range = new Range (real_start, real_end);
222 
223       return real_range;
224     } catch (OutOfRangeException e) {
225       throw new Error ("internal error - unexpected exception: " + e);
226     }
227   }
228 
229   /**
230    *  Return the complement of the given position on the sequence.  eg. if the
231    *  sequence length is 100 and the position is 10 then the return value will
232    *  be 90.
233    **/
getComplementPosition(final int position)234   public int getComplementPosition (final int position) {
235     return getLength () - position + 1;
236   }
237 
238   /**
239    *  Return the raw of a base position on this object.  The raw position of a
240    *  base on the forward strand is the same as the position itself.  The raw
241    *  position of a base on the reverse strand is position of the
242    *  corresponding bases on the forward strand.
243    *  @param position The position of the base.
244    *  @param direction The direction (strand) that the position refers to.
245    **/
getRawPosition(final int position, final int direction)246   public int getRawPosition (final int position, final int direction) {
247     if (direction == FORWARD) {
248       return position;
249     } else {
250       return getComplementPosition (position);
251     }
252   }
253 
254   /**
255    *  Translate a sequence of bases into the corresponding single letter amino
256    *  acid codes.
257    *  @param range The range of the bases to translated.  If the range.start
258    *    - range.end + 1 is not a multiple of three the last codon is
259    *    incomplete and will not be translated.  If the range is out of range
260    *    ie. it has a start or end less than one or greater than the length of
261    *    the sequence, then the out of range codons will be translated as '.'.
262    *  @param direction The direction of the translation.  If FORWARD the
263    *    translation will happen as expected, if REVERSE the translation will
264    *    be done on the reverse complement.
265    *  @param unknown_is_x If this parameter is true codons that contain
266    *    ambiguous bases will be translated as 'x', if false they will be
267    *    translated as '.'
268    *  @return The translated sequence in one letter abbreviated form.
269    **/
getTranslation(final Range range, final int direction, final boolean unknown_is_x)270   public AminoAcidSequence getTranslation(final Range range,
271                                           final int direction,
272                                           final boolean unknown_is_x)
273   {
274     // getSubSequenceC() will return a sequence going in the right direction
275     // so we don't have to worry.
276     final char[] sub_sequence = getSubSequenceC(range, direction);
277     return AminoAcidSequence.getTranslation(sub_sequence, unknown_is_x);
278   }
279 
280 
getSpacedTranslation(final Range range, final int direction, final boolean unknown_is_x)281   public AminoAcidSequence getSpacedTranslation(final Range range,
282                                            final int direction,
283                                            final boolean unknown_is_x)
284   {
285     // getSubSequenceC() will return a sequence going in the right direction
286     // so we don't have to worry.
287     final char[] sub_sequence = getSubSequenceC(range, direction);
288     return AminoAcidSequence.getSpacedTranslation(sub_sequence, unknown_is_x);
289   }
290 
291   /**
292    *  Return an array containing the positions of the codons that match the
293    *  strings given by the query_codons argument.  Only those codons that are
294    *  in the same frame as the first base of the range are returned.
295    *  @param range The inclusive range of bases to get the codons from.
296    *  @param direction The direction of the translation.  REVERSE means
297    *    translate the reverse complement bases (the positions in the range
298    *    argument are complemented first.)
299    *  @param query_codons The codons to search for.  Each element of this
300    *    vector should be a string that is 3 characters long.
301    *  @return An array containing the positions of the first base of the
302    *    codons.  This array is padded with zeros at the end.
303    **/
getMatchingCodons(final Range range, final int direction, final StringVector query_codons)304   public int [] getMatchingCodons (final Range range, final int direction,
305                                    final StringVector query_codons) {
306     final Range real_range;
307 
308     if(direction == FORWARD)
309       real_range = range;
310     else
311       real_range = complementRange(range);
312 
313     // guess the number of codons in getCount () bases - there are
314     // query_codons.size() search codons in every 64 codons if G+C is 50%
315     // and we have getCount()/3 codons to look at.
316 
317     float at_content = (100 - getAverageGCPercent()) / 100;
318 
319     int array_start_size =
320       (int) (range.getCount () *
321              at_content * at_content * (2-at_content) *
322              query_codons.size () / 64);
323 
324     if(array_start_size < 20)
325       array_start_size = 20;
326 
327     // this array will be resized as necessary
328     int[] return_positions = new int[array_start_size];
329 
330     int current_return_array_index = 0;
331 
332     final String sequence_string =
333       getSequence ().getSubSequence (1, getLength ());
334 
335     final int range_start_index = real_range.getStart () - 1;
336     final int range_end_index = real_range.getEnd () - 1;
337 
338     if(direction == FORWARD)
339     {
340       for (int i = range_start_index ; i < range_end_index - 2 ; i += 3) {
341         if (i < 0 || i >= sequence_string.length () - 2) {
342           continue;
343         }
344 
345         boolean is_matching_codon =
346           isMatchingCodon (sequence_string, i, direction, query_codons);
347 
348         if (is_matching_codon) {
349           if (current_return_array_index == return_positions.length) {
350             // first reallocate the array
351             final int [] new_array =
352               new int [return_positions.length * 3 / 2 + 1];
353 
354             System.arraycopy (return_positions, 0,
355                               new_array, 0,
356                               return_positions.length);
357             return_positions = new_array;
358           }
359 
360           return_positions[current_return_array_index] = i + 1;
361 
362           ++current_return_array_index;
363         }
364       }
365     } else {
366 
367       for (int i = range_end_index ; i > range_start_index + 2 ; i -= 3) {
368         if (i < 2 || i >= sequence_string.length ()) {
369           continue;
370         }
371 
372         boolean is_matching_codon =
373           isMatchingCodon (sequence_string, i, direction, query_codons);
374 
375         if (is_matching_codon) {
376           if (current_return_array_index == return_positions.length) {
377             // first reallocate the array
378             final int [] new_array =
379               new int [return_positions.length * 3 / 2 + 1];
380 
381             System.arraycopy (return_positions, 0,
382                               new_array, 0,
383                               return_positions.length);
384             return_positions = new_array;
385           }
386 
387           // return the complemented base position
388           return_positions[current_return_array_index] =
389             sequence_string.length () - i;
390 
391           ++current_return_array_index;
392         }
393       }
394     }
395 
396     return return_positions;
397 
398   }
399 
400   /**
401    *  Check a three character substring and return true if and only if the
402    *  three bases match an element of the query_codons argument.  If the
403    *  direction is REVERSE then the three bases to check are at start_index,
404    *  start_index - 1 and start_index - 2.  In that case true is returned if
405    *  and only the complement of those three bases matches.
406    **/
isMatchingCodon(final String sequence_string, final int start_index, final int direction, final StringVector query_codons)407   private boolean isMatchingCodon (final String sequence_string,
408                                    final int start_index,
409                                    final int direction,
410                                    final StringVector query_codons) {
411     for (int query_codon_index = 0 ;
412          query_codon_index < query_codons.size () ;
413          ++query_codon_index) {
414       if (isMatchingCodon (sequence_string, start_index, direction,
415                            (String)query_codons.elementAt (query_codon_index))) {
416         return true;
417       }
418     }
419 
420     return false;
421   }
422 
423   /**
424    *  Check a three character substring and return true if and only if the
425    *  three bases match the query_codon argument.  If the direction is
426    *  REVERSE then the three bases to check are at start_index, start_index -
427    *  1 and start_index - 2.  In that case true is returned if and only the
428    *  complement of those three bases matches.
429    **/
isMatchingCodon(final String sequence_string, final int start_index, final int direction, final String query_codon)430   private boolean isMatchingCodon (final String sequence_string,
431                                    final int start_index,
432                                    final int direction,
433                                    final String query_codon) {
434     if (direction == FORWARD) {
435       if (query_codon.charAt (0) == sequence_string.charAt (start_index) &&
436           query_codon.charAt (1) == sequence_string.charAt (start_index + 1) &&
437           query_codon.charAt (2) == sequence_string.charAt (start_index + 2)) {
438         return true;
439       }
440     } else {
441       final char first_letter =
442         complement (sequence_string.charAt (start_index));
443       final char second_letter =
444         complement (sequence_string.charAt (start_index - 1));
445       final char third_letter =
446         complement (sequence_string.charAt (start_index - 2));
447 
448       if (query_codon.charAt (0) == first_letter &&
449           query_codon.charAt (1) == second_letter &&
450           query_codon.charAt (2) == third_letter) {
451         return true;
452       }
453     }
454 
455     return false;
456   }
457 
458   /**
459    *  Returns stop_codon_cache after allocating it (if it is null).
460    **/
getStopCodonCache()461   private byte[] getStopCodonCache()
462   {
463     if (stop_codon_cache == null)
464     {
465       final int nbytes = getLength() >> 1 >> 1;
466       stop_codon_cache = new byte[nbytes+1];
467     }
468 
469     return stop_codon_cache;
470   }
471 
472 
473   /**
474    *  Returns start_codon_cache after allocating it (if it is null).
475    **/
getStartCodonCache()476   private byte[] getStartCodonCache()
477   {
478     if (start_codon_cache == null)
479     {
480       final int nbytes = getLength() >> 1 >> 1;
481       start_codon_cache = new byte[nbytes+1];
482     }
483 
484     return start_codon_cache;
485   }
486 
487   /**
488    *  Clear stop codon cache (forward and reverse).
489    **/
clearCodonCache()490   public void clearCodonCache()
491   {
492     stop_codon_cache = null;
493     start_codon_cache = null;
494   }
495 
496 
497   /**
498    *  Return an array containing the positions of the stop codons.  Only those
499    *  codons that are in the same frame as the first base of the range are
500    *  returned.
501    *  @param range The inclusive range of bases to get the stop codons from.
502    *  @param direction The direction of the translation.  REVERSE means
503    *    translate the reverse complement bases (the positions in the range
504    *    argument are complemented first.)
505    *  @return An array containing the positions of the first base of the stop
506    *    codons.  This array is padded with zeros at the end.
507    **/
getStopCodons(final Range range, final int direction)508   protected int[] getStopCodons(final Range range, final int direction)
509   {
510     final Range real_range;
511 
512     if(direction == FORWARD)
513       real_range = range;
514     else
515       real_range = complementRange (range);
516 
517     // guess the number of stop codons in getCount() bases - there are 3
518     // stop codons in every 64 codons if G+C is 50% and we have getCount()/3
519     // codons to look at.
520 
521     float at_content = (100 - getAverageGCPercent()) / 100;
522 
523     int array_start_size =
524       (int)(range.getCount() *
525              at_content * at_content * (2-at_content) * 3 / 64);
526 
527     if(array_start_size < 20)
528       array_start_size = 20;
529 
530     // this array will be resized as necessary
531     int[] return_positions = new int[array_start_size];
532 
533     int current_return_array_index = 0;
534     int range_start_index = real_range.getStart();
535     int range_end_index   = real_range.getEnd();
536 
537     final int sequence_length = getLength();
538 
539     if(range_start_index < 1)
540     {
541       if(direction == FORWARD)
542         range_start_index = 3 + (range_start_index % 3);
543       else
544         range_start_index =  1;
545     }
546     if(range_end_index > sequence_length)
547       range_end_index = sequence_length;
548 
549     final char sequence_string[] =
550       getSequence().getCharSubSequence(range_start_index, range_end_index);
551 
552     range_start_index--;
553     range_end_index--;
554 
555     // whether a codon is a stop codon or not is cached in
556     // 2 bit chunks (i.e. 4 per byte)
557     int ncurrent_byte;
558     int bit_position;
559     byte bitty;
560 
561     final byte[] this_stop_codon_flags = getStopCodonCache();
562     if(direction == FORWARD)
563     {
564 
565       for(int i = range_start_index; i < range_end_index + 1; i += 3)
566       {
567         if(i < 0 || i >= sequence_length-1)
568           continue;
569 
570         ncurrent_byte = i >> 1 >> 1;
571         bit_position  = i % 4;
572 
573         // determine if codon type is cached or not
574         bitty = (byte) ((this_stop_codon_flags[ncurrent_byte]
575                               >> (2*bit_position) ) & 0x0003);
576         if(bitty == 0)
577         {
578           // not cached yet
579           setCache(range_start_index, range_end_index, sequence_string, i,
580               null, this_stop_codon_flags, ncurrent_byte, bit_position);
581         }
582 
583         bitty = (byte) ((this_stop_codon_flags[ncurrent_byte]
584                            >> (2*bit_position) ) & 0x0003);
585         if( bitty == 1 || bitty == 3 )
586           continue;
587 
588         // if we reach here this is a stop codon
589         if(current_return_array_index == return_positions.length)
590         {
591             // first reallocate the array
592           final int[] new_array =
593             new int[return_positions.length * 3 / 2 + 1];
594 
595           System.arraycopy(return_positions, 0,
596                            new_array, 0,
597                            return_positions.length);
598           return_positions = new_array;
599         }
600 
601         return_positions[current_return_array_index] = i + 1;
602         ++current_return_array_index;
603       }
604     }
605     else
606     {
607       for (int i = range_end_index ; i > range_start_index + 2 ; i -= 3)
608       {
609         if(i < 2 || i >= sequence_length)
610           continue;
611 
612         ncurrent_byte = i >> 1 >> 1;
613         bit_position = i % 4;
614         bitty = (byte) ((this_stop_codon_flags[ncurrent_byte]
615                               >> (2*bit_position) ) & 0x0003);
616 
617         if(bitty == 0)
618         {
619           // not cached yet
620           setCache(range_start_index, range_end_index, sequence_string, i,
621               null, this_stop_codon_flags, ncurrent_byte, bit_position);
622         }
623 
624         bitty = (byte) ((this_stop_codon_flags[ncurrent_byte]
625                            >> (2*bit_position) ) & 0x0003);
626 
627         if( bitty == 1 || bitty != 3 )
628           continue;
629 
630         // if we reach here this is a stop codon
631         if(current_return_array_index == return_positions.length)
632         {
633           // first reallocate the array
634           final int[] new_array =
635             new int[return_positions.length * 3 / 2 + 1];
636 
637           System.arraycopy(return_positions, 0,
638                            new_array, 0,
639                            return_positions.length);
640           return_positions = new_array;
641         }
642 
643         return_positions[current_return_array_index] =
644             sequence_length - i;
645         ++current_return_array_index;
646       }
647     }
648 
649     return return_positions;
650   }
651 
652   /**
653    * Return an 2D array containing the stop or start codons in a range for
654    *  all 3 frames of the strand.
655    *  @param range The inclusive range of bases to get the codons from.
656    *  @param direction The direction of the translation.  REVERSE means
657    *    translate the reverse complement bases (the positions in the range
658    *    argument are complemented first.)
659    *  @param query_codons if this is NULL then this assumes we are looking
660    *    for stop codons, otherwise this is used to look for start codons.
661    *  @return An array containing the positions of the first base of the stop
662    *    codons.  This array is padded with zeros at the end.
663    **/
getStopOrStartCodons(final Range range, final int direction, final StringVector query_codons)664   protected int[][] getStopOrStartCodons(final Range range,
665                                          final int direction,
666                                          final StringVector query_codons)
667   {
668     final Range real_range;
669 
670     if(direction == FORWARD)
671       real_range = range;
672     else
673       real_range = complementRange(range);
674 
675     // guess the number of stop codons in getCount() bases - there are 3
676     // stop codons in every 64 codons if G+C is 50% and we have getCount()/3
677     // codons to look at.
678 
679     float at_content = (100 - getAverageGCPercent()) / 100;
680 
681     int array_start_size =
682       (int)(range.getCount() *
683             at_content * at_content * (2-at_content) * 3 / 64);
684 
685     if(array_start_size < 20)
686       array_start_size = 20;
687     // this array will be resized as necessary
688     int[][] return_positions = new int[3][array_start_size];
689 
690     int[] current_return_array_index = new int[3];
691     current_return_array_index[0] = 0;
692     current_return_array_index[1] = 0;
693     current_return_array_index[2] = 0;
694 
695     int range_start_index = real_range.getStart();
696     int range_end_index   = real_range.getEnd();
697 
698     final int sequence_length = getLength();
699 
700     if(range_start_index < 1)
701     {
702       if(direction == FORWARD)
703         range_start_index = 3 + (range_start_index % 3);
704       else
705         range_start_index =  1;
706     }
707 
708     if(range_end_index > sequence_length)
709       range_end_index = sequence_length;
710 
711     range_start_index--;
712     range_end_index--;
713     char[] sequence_string = null;
714 
715     // whether a codon is a stp codon or not is cached in
716     // 2 bit chunks (i.e. 4 per byte)
717     int ncurrent_byte;
718     int bit_position;
719     int nframe = 0;
720     byte bitty;
721 
722     final byte[] this_forward_codon_flags;
723     // if this is null then searching for stop codons
724     if(query_codons == null)
725       this_forward_codon_flags = getStopCodonCache();
726     else
727       this_forward_codon_flags = getStartCodonCache();
728 
729     for(int i = range_start_index; i < range_end_index+1; i += 1)
730     {
731       if(i < 0 || i >= sequence_length)
732         continue;
733 
734       ncurrent_byte = i >> 1 >> 1;
735       bit_position  = i % 4;
736 
737       // determine if codon type is cached or not
738       bitty = (byte) ((this_forward_codon_flags[ncurrent_byte]
739                                 >> (2*bit_position) ) & 0x0003);
740 
741       if(bitty == 0)  // not cached yet
742       {
743         if(sequence_string == null)
744           sequence_string = getSequence().getCharSubSequence(range_start_index+1,
745                                                                range_end_index+1);
746 
747         setCache(range_start_index, range_end_index, sequence_string, i,
748                  query_codons, this_forward_codon_flags, ncurrent_byte,
749                  bit_position);
750         bitty = (byte) ((this_forward_codon_flags[ncurrent_byte]
751                                  >> (2*bit_position) ) & 0x0003);
752       }
753 
754       if(  bitty == 1 ||                         // not a stop/start codon
755           (direction == FORWARD && bitty == 3) ||
756           (direction != FORWARD && bitty != 3 ))
757         continue;
758 
759       if(direction == FORWARD)
760         nframe = (i-range_start_index) % 3;
761       else
762         nframe = (range_end_index-i) % 3;
763 
764       // if we reach here this is a stop/start codon
765       if(current_return_array_index[nframe] == return_positions[nframe].length)
766       {
767         // first reallocate the array
768         final int[][] new_array =
769             new int[3][return_positions[nframe].length * 3 / 2 + 1];
770 
771         for(int j=0; j<3; j++)
772           System.arraycopy(return_positions[j], 0,
773                            new_array[j], 0,
774                            return_positions[j].length);
775         return_positions = new_array;
776       }
777 
778       if(direction == FORWARD)
779       {
780         if(i==0)
781           return_positions[nframe][current_return_array_index[nframe]] = i + 1;
782         else
783           return_positions[nframe][current_return_array_index[nframe]] = i;
784       }
785       else
786         return_positions[nframe][current_return_array_index[nframe]] =
787               sequence_length - i;
788       ++current_return_array_index[nframe];
789     }
790 
791     return return_positions;
792   }
793 
794   /**
795    * Set the codon cache for forward and reverse strand.
796    * @param range_start_index
797    * @param range_end_index
798    * @param sequence_string
799    * @param i
800    * @param query_codons
801    * @param this_codon_flags
802    * @param ncurrent_byte
803    * @param bit_position
804    */
setCache(int range_start_index, int range_end_index, char[] sequence_string, int i, final StringVector query_codons, final byte[] this_codon_flags, int ncurrent_byte, int bit_position)805   private void setCache(int range_start_index,
806                         int range_end_index,
807                         char[] sequence_string,
808                         int i,
809                         final StringVector query_codons,
810                         final byte[] this_codon_flags,
811                         int ncurrent_byte,
812                         int bit_position)
813   {
814     // test if stop (or start) codon
815     boolean ismatch = false;
816 
817     // forward codon
818     if(i < range_end_index-1)
819       if(query_codons == null)
820         ismatch = isStopCodon(sequence_string[i-range_start_index],
821                               sequence_string[i-range_start_index+1],
822                               sequence_string[i-range_start_index+2]);
823       else
824         ismatch = isCodon(sequence_string[i-range_start_index],
825                           sequence_string[i-range_start_index+1],
826                           sequence_string[i-range_start_index+2],
827                           query_codons);
828 
829     if(ismatch)
830     {
831       this_codon_flags[ncurrent_byte] =                // forward strand stop/start = 2
832              (byte)(this_codon_flags[ncurrent_byte]
833                      | (0x0002 << 2*bit_position));
834     }
835     else
836     {
837       this_codon_flags[ncurrent_byte] =                // cached no stop/start = 1
838         (byte)(this_codon_flags[ncurrent_byte]
839                | (0x0001 << 2*bit_position));
840     }
841 
842     // reverse codon
843     ismatch = false;
844     if(i-range_start_index > 1 && i-range_start_index < sequence_string.length)
845       if(query_codons == null)
846         ismatch = isStopCodon(complement(sequence_string[i-range_start_index]),
847                               complement(sequence_string[i-range_start_index-1]),
848                               complement(sequence_string[i-range_start_index-2]));
849       else
850         ismatch = isCodon(complement(sequence_string[i-range_start_index]),
851                           complement(sequence_string[i-range_start_index-1]),
852                           complement(sequence_string[i-range_start_index-2]),
853                           query_codons);
854     if(ismatch)
855       this_codon_flags[ncurrent_byte] =                // reverse strand stop/start = 3
856              (byte)(this_codon_flags[ncurrent_byte]
857                      | (0x0003 << 2*bit_position));
858   }
859 
860   /**
861    *  Return the base at the given position.
862    **/
getBaseAt(final int position)863   public char getBaseAt (final int position)
864       throws OutOfRangeException
865   {
866     if(position > getLength())
867       throw new OutOfRangeException(position + " > " + getLength());
868 
869     if(position < 1)
870       throw new OutOfRangeException(position + " < " + 1);
871 
872     return getSequence().charAt(position);
873   }
874 
875   /**
876    *  Return a sub sequence of the bases from this object.
877    *  @param range The range of the bases to be extracted.
878    *  @param direction The direction of the returned sequence.  If FORWARD the
879    *    sub sequence will be as expected, if REVERSE it will be reverse
880    *    complemented.
881    *  @return The extracted sequence, which will include the end bases of the
882    *    range.
883    **/
getSubSequence(final Range range, final int direction)884   public String getSubSequence (final Range range, final int direction) {
885     final Range real_range;
886 
887     if(direction == FORWARD)
888       real_range = range;
889     else
890       real_range = complementRange (range);
891 
892     // we need to make sure that we pass in-range coordinates to
893     // Sequence.getSubSequence()
894     final int sub_seq_start_index;
895     final int sub_seq_end_index;
896 
897     if(real_range.getStart () < 1)
898       sub_seq_start_index = 1;
899     else
900       sub_seq_start_index = real_range.getStart ();
901 
902     if(real_range.getEnd () > getLength ())
903       sub_seq_end_index = getLength ();
904     else
905       sub_seq_end_index = real_range.getEnd ();
906 
907     String sub_sequence =
908       getSequence().getSubSequence(sub_seq_start_index, sub_seq_end_index);
909 
910     // sanity checks - if the user asks for more bases than we
911     // have, we return the symbol "@" for the out-of-range bases.
912     if (real_range.getStart () < 1) {
913       final int dummy_base_count = 1 - real_range.getStart ();
914       final char [] dummy_bases = new char [dummy_base_count];
915 
916       for (int i = 0 ; i < dummy_base_count ; ++i) {
917         dummy_bases[i] = '@';
918       }
919 
920       sub_sequence = new String (dummy_bases) + sub_sequence;
921     }
922 
923     if (real_range.getEnd () > getLength ()) {
924       final int dummy_base_count = real_range.getEnd () - getLength ();
925       final char [] dummy_bases = new char [dummy_base_count];
926 
927       for (int i = 0 ; i < dummy_base_count ; ++i) {
928         dummy_bases[i] = '@';
929       }
930 
931       sub_sequence = sub_sequence + new String (dummy_bases);
932     }
933 
934     if (FORWARD == direction) {
935       return sub_sequence;
936     } else {
937       return reverseComplement (sub_sequence);
938     }
939   }
940 
getSubSequenceC(final Range range, final int direction)941   public char[] getSubSequenceC(final Range range, final int direction)
942   {
943     final Range real_range;
944 
945     if(direction == FORWARD)
946       real_range = range;
947     else
948       real_range = complementRange (range);
949 
950     // we need to make sure that we pass in-range coordinates to
951     // Sequence.getSubSequence()
952     final int sub_seq_start_index;
953     final int sub_seq_end_index;
954 
955     if(real_range.getStart () < 1)
956       sub_seq_start_index = 1;
957     else
958       sub_seq_start_index = real_range.getStart ();
959 
960     if(real_range.getEnd () > getLength ())
961       sub_seq_end_index = getLength ();
962     else
963       sub_seq_end_index = real_range.getEnd ();
964 
965     char[] sub_sequence =
966       getSequence().getCharSubSequence(sub_seq_start_index, sub_seq_end_index);
967 
968     if(real_range.getStart() < 1)
969     {
970       final int dummy_base_count = 1 - real_range.getStart();
971       final char[] dummy_bases = new char[dummy_base_count+sub_sequence.length];
972 
973       for(int i = 0; i < dummy_base_count; ++i)
974         dummy_bases[i] = '@';
975 
976       System.arraycopy(sub_sequence, 0, dummy_bases, dummy_base_count, sub_sequence.length);
977       sub_sequence = dummy_bases;
978     }
979 
980     if(real_range.getEnd() > getLength())
981     {
982       final int dummy_base_count = real_range.getEnd() - getLength();
983       final char[] dummy_bases = new char[dummy_base_count+sub_sequence.length];
984 
985       for(int i = sub_sequence.length; i < dummy_bases.length; ++i)
986         dummy_bases[i] = '@';
987 
988       System.arraycopy(sub_sequence, 0, dummy_bases, 0, sub_sequence.length);
989       sub_sequence = dummy_bases;
990     }
991 
992 
993     if(FORWARD == direction)
994       return sub_sequence;
995     else
996       return reverseComplement(sub_sequence);
997   }
998 
999   /**
1000    *  This method truncates the sequence use the start and end of the argument.
1001    *  @param constraint This contains the start and end base of the new
1002    *    sequence.
1003    *  @return the Bases truncated into the new coordinate system.
1004    **/
truncate(final Range constraint)1005   public Bases truncate (final Range constraint) {
1006     final String bases_string = getSubSequence (constraint, FORWARD);
1007 
1008     final Sequence new_sequence = new EmblStreamSequence (bases_string);
1009 
1010     return new Bases (new_sequence);
1011   }
1012 
1013 
1014   /**
1015   *
1016   * Reverse complement a range of the sequence.
1017   *
1018   */
reverseComplement(final Feature feature)1019   public void reverseComplement(final Feature feature)
1020               throws ReadOnlyException
1021   {
1022     stop_codon_cache = null;
1023 
1024     final Range range = feature.getMaxRawRange();
1025     final int range_start_index = range.getStart();
1026     final int range_end_index   = range.getEnd();
1027 
1028     // ensure we just get subsequence of interest
1029     ((StreamSequence)getSequence()).forceReset();
1030     // sequence to reverse complement
1031     final char[] sub_sequence = reverseComplement(getSequence().getCharSubSequence(
1032                                               range_start_index, range_end_index));
1033     final char[] new_sequence = new char[getLength()];
1034     final char[] old_sequence = ((StreamSequence)getSequence()).getCharSequence();
1035 
1036 //  System.out.println("range_start_index  "+range_start_index);
1037 //  System.out.println("range_end_index    "+range_end_index);
1038 //  System.out.println("getLength          "+getLength());
1039 //  System.out.println("sub_sequence.length "+sub_sequence.length);
1040 //  System.out.println(feature.getEntry().getEMBLEntry().toString());
1041 //  System.out.println(new String(sub_sequence));
1042 
1043     // if not first contig
1044     if(range_start_index != 1)
1045       System.arraycopy(old_sequence, 0, new_sequence, 0, range_start_index-1);
1046 
1047     // copy in new sequence fragment that has been reverse complemented
1048     System.arraycopy(sub_sequence, 0, new_sequence, range_start_index-1,
1049                                                     sub_sequence.length);
1050 
1051     // if not last contig
1052     if(range_end_index != getLength())
1053       System.arraycopy(old_sequence, range.getEnd(), new_sequence, range_end_index,
1054                                                     getLength()-range_end_index);
1055 
1056     try
1057     {
1058       embl_sequence.setFromChar(new_sequence);
1059     }
1060     catch (IllegalSymbolException e)
1061     {
1062       throw new Error ("internal error - unexpected exception: " + e);
1063     }
1064 
1065     final SequenceChangeEvent event =
1066       new SequenceChangeEvent(this, SequenceChangeEvent.CONTIG_REVERSE_COMPLEMENT,
1067                               range, sub_sequence.length);
1068 
1069     fireSequenceChangeEvent(event);
1070   }
1071 
1072 
contigRearrange(final Feature feature, final int new_base_pos)1073   public void contigRearrange(final Feature feature, final int new_base_pos)
1074               throws ReadOnlyException
1075   {
1076     stop_codon_cache = null;
1077 
1078     final Range range = feature.getMaxRawRange();
1079     final int range_start_index = range.getStart();
1080     final int range_end_index   = range.getEnd();
1081 
1082     if(new_base_pos == range_start_index)
1083       return;
1084 
1085     final char[] new_sequence = new char[getLength()];
1086     final char[] old_sequence = ((StreamSequence)getSequence()).getCharSequence();
1087 
1088     int contig_length = 0;
1089     if(new_base_pos < range_start_index)
1090     {
1091       // if not first contig
1092       if(new_base_pos != 1)
1093         System.arraycopy(old_sequence, 0, new_sequence, 0, new_base_pos-1);
1094 
1095       contig_length = range_end_index - range_start_index + 1;
1096       // copy in new sequence fragment that has been reverse complemented
1097       System.arraycopy(old_sequence, range_start_index-1,
1098                        new_sequence, new_base_pos-1, contig_length);
1099 
1100       System.arraycopy(old_sequence, new_base_pos-1,
1101                        new_sequence, new_base_pos+contig_length-1,
1102                        range_start_index-new_base_pos);
1103 
1104       // if not last contig
1105       if(new_base_pos < getLength()+1)
1106         System.arraycopy(old_sequence, range_end_index,
1107                          new_sequence, range_end_index,
1108                          getLength()-range_end_index);
1109     }
1110     else
1111     {
1112       System.arraycopy(old_sequence, 0, new_sequence, 0, range_start_index-1);
1113 
1114       System.arraycopy(old_sequence, range_end_index,
1115                        new_sequence, range_start_index-1,
1116                        new_base_pos-range_end_index-1);
1117 
1118       System.arraycopy(old_sequence, range_start_index-1,
1119                        new_sequence, (range_start_index-1)+(new_base_pos-range_end_index)-1,
1120                        range_end_index - range_start_index + 1);
1121 
1122       // if not last contig
1123       if(new_base_pos < getLength()+1)
1124         System.arraycopy(old_sequence, new_base_pos-1,
1125                          new_sequence, new_base_pos-1,
1126                          getLength()-new_base_pos+1);
1127     }
1128 
1129 
1130     try
1131     {
1132       embl_sequence.setFromChar(new_sequence);
1133     }
1134     catch (IllegalSymbolException e)
1135     {
1136       throw new Error ("internal error - unexpected exception: " + e);
1137     }
1138 
1139     final SequenceChangeEvent event =
1140       new SequenceChangeEvent(SequenceChangeEvent.CONTIG_REORDER,
1141                               new_base_pos, range);
1142 
1143     fireSequenceChangeEvent(event);
1144   }
1145 
1146 
1147   /**
1148    *  Delete the bases in the given range and send out a SequenceChange event
1149    *  to all the listeners.
1150    *  @param range The inclusive range of bases to delete.
1151    *  @return A String containing the deleted bases.
1152    *  @exception ReadOnlyException If this Bases object cannot be changed.
1153    **/
deleteRange(final Range range)1154   public String deleteRange (final Range range)
1155       throws ReadOnlyException {
1156     stop_codon_cache = null;
1157 
1158     final String removed_bases =
1159       getSequence ().getSubSequence (range.getStart (), range.getEnd ());
1160 
1161     final String new_sequence =
1162       getSequence ().getSubSequence (1, range.getStart () - 1) +
1163       getSequence ().getSubSequence (range.getEnd () + 1,
1164                                      embl_sequence.length ());
1165 
1166     try {
1167       embl_sequence.setFromChar(new_sequence.toCharArray());
1168     } catch (IllegalSymbolException e) {
1169       throw new Error ("internal error - unexpected exception: " + e);
1170     }
1171 
1172     final SequenceChangeEvent event =
1173       new SequenceChangeEvent (this,
1174                                SequenceChangeEvent.DELETION,
1175                                range.getStart (),
1176                                removed_bases);
1177 
1178     fireSequenceChangeEvent (event);
1179 
1180     return removed_bases;
1181   }
1182 
1183   /**
1184    *  Insert the given bases at the given base position and send out a
1185    *  SequenceChange event to all the listeners.
1186    *  @param position The bases are inserted just before this base position if
1187    *    direction is FORWARD or just after if direction is REVERSE.
1188    *  @param direction If this is FORWARD, then the bases is the bases String
1189    *    will be inserted just before the base given by position.  If this is
1190    *    REVERSE the bases will be reversed, complemented and inserted just
1191    *    after the position.
1192    *  @param bases The bases to add (or the reverse complement of the bases to
1193    *    add if direction is REVERSE).
1194    *  @exception ReadOnlyException If this Bases object cannot be changed.
1195    **/
addBases(final int position, final int direction, final String bases)1196   public void addBases (final int position, final int direction,
1197                         final String bases)
1198       throws ReadOnlyException, IllegalSymbolException {
1199     stop_codon_cache = null;
1200 
1201     final String new_sequence;
1202     final int real_position;
1203     final String real_bases;
1204 
1205     if (direction == FORWARD) {
1206       real_position = position;
1207       real_bases = bases.toLowerCase ();
1208     } else {
1209       real_position = position + 1;
1210       real_bases = reverseComplement (bases.toLowerCase ());
1211     }
1212 
1213     new_sequence =
1214       getSequence ().getSubSequence (1, real_position - 1) +
1215       real_bases +
1216       getSequence ().getSubSequence (real_position, getLength ());
1217 
1218     getSequence ().setFromChar(new_sequence.toCharArray());
1219 
1220     final SequenceChangeEvent event =
1221       new SequenceChangeEvent (this,
1222                                SequenceChangeEvent.INSERTION,
1223                                real_position,
1224                                real_bases);
1225 
1226     fireSequenceChangeEvent (event);
1227 
1228     return;
1229   }
1230 
1231   /**
1232    *  There is one element in this array for each possible
1233    *  SequenceChangeListener priority.  This array is changed by
1234    *  addSequenceChangeListener() and removeSequenceChangeListener().
1235    **/
1236   final private WeakHashMap listener_hash_map_array [] =
1237     new WeakHashMap [MAX_PRIORITY - MIN_PRIORITY + 1];
1238 
1239   /**
1240    *  Adds the specified event listener to the list of object that receive
1241    *  sequence change events from this object.
1242    *  @param l the event change listener.
1243    *  @param priority The listeners are stored in a priority queue using this
1244    *    value.  Larger priority means that the listener will receive the event
1245    *    sooner (than lower priority listeners).  Values less than MIN_PRIORITY
1246    *    will be treated like MIN_PRIORITY values higher than MAX_PRIORITY will
1247    *    be treated like MAX_PRIORITY.
1248    **/
addSequenceChangeListener(final SequenceChangeListener l, int priority)1249   public void addSequenceChangeListener (final SequenceChangeListener l,
1250                                          int priority) {
1251     if (priority < MIN_PRIORITY) {
1252       priority = MIN_PRIORITY;
1253     }
1254 
1255     if (priority > MAX_PRIORITY) {
1256       priority = MAX_PRIORITY;
1257     }
1258 
1259     listener_hash_map_array [priority - MIN_PRIORITY].put (l, null);
1260   }
1261 
1262   /**
1263    *  Removes the specified event listener so that it no longer receives
1264    *  sequence change events from this object.
1265    *  @param l the event change listener.
1266    **/
removeSequenceChangeListener(final SequenceChangeListener l)1267   public void removeSequenceChangeListener (final SequenceChangeListener l) {
1268     for (int i = 0 ; i < listener_hash_map_array.length ; ++i) {
1269       final WeakHashMap this_hash_map = listener_hash_map_array [i];
1270 
1271       if (this_hash_map.containsKey (l)) {
1272         this_hash_map.remove (l);
1273         return;
1274       }
1275     }
1276   }
1277 
1278   /**
1279    *  Send a SequenceChangeEvent to each object that is listening for it.
1280    **/
fireSequenceChangeEvent(final SequenceChangeEvent event)1281   private void fireSequenceChangeEvent (final SequenceChangeEvent event) {
1282     for (int i = listener_hash_map_array.length - 1 ; i >= 0 ; --i) {
1283       final WeakHashMap this_hash_map = listener_hash_map_array [i];
1284 
1285       if (this_hash_map != null) {
1286         final Iterator iter = this_hash_map.keySet ().iterator ();
1287 
1288         while (iter.hasNext())
1289         {
1290           final SequenceChangeListener this_listener =
1291             (SequenceChangeListener) iter.next();
1292           this_listener.sequenceChanged (event);
1293         }
1294       }
1295     }
1296   }
1297 
1298   /**
1299    *  Return the average gc percent for the sequence.
1300    **/
getAverageGCPercent()1301   public float getAverageGCPercent () {
1302     return ((float)(getSequence ().getCCount () +
1303                     getSequence ().getGCount ())) /
1304       getSequence ().length () * 100;
1305   }
1306 
1307   /**
1308    *  Return the average AG percent for the sequence as a percentage.
1309    **/
getAverageAGPercent()1310   public float getAverageAGPercent () {
1311     return ((float)(getSequence ().getACount () +
1312                     getSequence ().getGCount ())) /
1313       getSequence ().length () * 100;
1314   }
1315 
1316   /**
1317    *  Return the number of 'A's in this Bases object.
1318    **/
getACount()1319   public int getACount () {
1320     return getSequence ().getACount ();
1321   }
1322 
1323   /**
1324    *  Return the number of 'T's in this Bases object.
1325    **/
getTCount()1326   public int getTCount () {
1327     return getSequence ().getTCount ();
1328   }
1329 
1330   /**
1331    *  Return the number of 'G's in this Bases object.
1332    **/
getGCount()1333   public int getGCount () {
1334     return getSequence ().getGCount ();
1335   }
1336 
1337   /**
1338    *  Return the number of 'C's in this Bases object.
1339    **/
getCCount()1340   public int getCCount () {
1341     return getSequence ().getCCount ();
1342   }
1343 
1344   /**
1345    *  Return a String containing the reverse complement of the argument
1346    *  String.  For example an argument of "aatc" will result in "gatt".
1347    **/
reverseComplement(final String sequence_string)1348   public static String reverseComplement (final String sequence_string) {
1349     StringBuffer return_buffer = new StringBuffer (sequence_string.length ());
1350 
1351     for (int i = sequence_string.length () - 1 ; i >= 0 ; --i) {
1352       return_buffer.append (complement (sequence_string.charAt (i)));
1353     }
1354 
1355     return return_buffer.toString ();
1356   }
1357 
1358   /**
1359    *  Return a char[] containing the reverse complement of the argument
1360    *  String.  For example an argument of "aatc" will result in "gatt".
1361    **/
reverseComplement(final char[] sequence_char)1362   public static char[] reverseComplement (final char[] sequence_char)
1363   {
1364     final int length = sequence_char.length;
1365     final char[] return_sequence = new char[length];
1366     int j = 0;
1367 
1368     for(int i = length - 1 ; i >= 0 ; --i)
1369     {
1370       return_sequence[j] = complement(sequence_char[i]);
1371       j++;
1372     }
1373 
1374     return return_sequence;
1375   }
1376 
1377 
1378   /**
1379    *  Return a String containing the complement of the argument String.  For
1380    *  example an argument of "aatc" will result in "ttag".
1381    **/
complement(final String sequence_string)1382   public static String complement (final String sequence_string) {
1383     StringBuffer return_buffer = new StringBuffer (sequence_string.length ());
1384 
1385     for (int i = 0 ; i < sequence_string.length () ; ++i) {
1386       return_buffer.append (complement (sequence_string.charAt (i)));
1387     }
1388 
1389     return return_buffer.toString ();
1390   }
1391 
1392   /**
1393    *  Return a char array containing the complement of the argument char[].  For
1394    *  example an argument of "aatc" will result in "ttag".
1395    **/
complement(final char sequence[])1396   public static char[] complement (final char sequence[])
1397   {
1398     final char[] seq_comp = new char[sequence.length];
1399 
1400     for (int i = 0 ; i < sequence.length; ++i)
1401       seq_comp[i] = complement(sequence[i]);
1402 
1403     return seq_comp;
1404   }
1405 
1406   /**
1407    *  Returns the complement base of it's argument - c for g, a for t etc.
1408    *  The argument may be upper or lower case, but the result is always lower
1409    *  case.  This also works for IUB base codes: the complement of 'y' is 'r'
1410    *  because 'y' is 'c' or 't' and 'r' is 'a' or 'g', the complement of 'n'
1411    *  or 'x' (any base) is 'n'.
1412    **/
complement(final char base)1413   public final static char complement (final char base) {
1414 
1415     switch (base) {
1416     case 'a': case 'A': return 't';
1417     case 't': case 'T': case 'u': case 'U': return 'a';
1418     case 'g': case 'G': return 'c';
1419     case 'c': case 'C': return 'g';
1420     case 'r': case 'R': return 'y';
1421     case 'y': case 'Y': return 'r';
1422     case 'k': case 'K': return 'm';
1423     case 'm': case 'M': return 'k';
1424     case 's': case 'S': return 's';
1425     case 'w': case 'W': return 'w';
1426     case 'b': case 'B': return 'v';
1427     case 'd': case 'D': return 'h';
1428     case 'h': case 'H': return 'd';
1429     case 'v': case 'V': return 'b';
1430     case 'n': case 'N': return 'n';
1431     case 'x': case 'X': return 'x';
1432     default:
1433       return '@';
1434 //      throw new Error ("in Bases.complement - tried to complement a letter " +
1435 //                       "that isn't a base");
1436     }
1437   }
1438 
1439   /**
1440    *  Return the Sequence object that was passed to the constructor.
1441    **/
getSequence()1442   public Sequence getSequence ()
1443   {
1444     return embl_sequence;
1445   }
1446 
1447 
1448   /**
1449    *  Check a three character substring and return true if and only if the
1450    *  three bases translate to a stop codon.  If the direction is REVERSE
1451    *  then the three bases to check are at start_index, start_index - 1 and
1452    *  start_index - 2.  In that case true is returned if and only the
1453    *  complement of those three bases is a stop codon.
1454    *  Codons that contain an X are considered to be stop codons.
1455    **/
isCodon(char first_letter, char second_letter, char third_letter, final StringVector query_codons)1456   private static boolean isCodon(char first_letter, char second_letter, char third_letter,
1457                                  final StringVector query_codons)
1458   {
1459     char[] tran = {first_letter, second_letter, third_letter };
1460 
1461     if(query_codons.contains( new String(tran) ))
1462       return true;
1463 
1464     return false;
1465   }
1466 
1467   /**
1468    *  Check a three character substring and return true if and only if the
1469    *  three bases translate to a stop codon.  If the direction is REVERSE
1470    *  then the three bases to check are at start_index, start_index - 1 and
1471    *  start_index - 2.  In that case true is returned if and only the
1472    *  complement of those three bases is a stop codon.
1473    *  Codons that contain an X are considered to be stop codons.
1474    **/
isStopCodon(char first_letter, char second_letter, char third_letter)1475   private static boolean isStopCodon(char first_letter, char second_letter, char third_letter)
1476   {
1477     // codons that contain an X are considered to be stop codons.
1478     if(first_letter == 'x' || second_letter == 'x' || third_letter == 'x')
1479       return true;
1480 
1481     final char translation = AminoAcidSequence.getCodonTranslation(first_letter,
1482                                                                   second_letter,
1483                                                                   third_letter);
1484 
1485     if(translation == '+' || translation == '*' || translation == '#')
1486       return true;
1487     else
1488       return false;
1489   }
1490 
1491 
1492   /**
1493    *  Check a three character substring and return true if and only if the
1494    *  three bases are legal (see isLegalBase ()).
1495    **/
1496   /*private static boolean isLegalCodon (final String sequence_string,
1497                                        final int start_index,
1498                                        final int direction) {
1499     if (direction == FORWARD) {
1500       if (isLegalBase (sequence_string.charAt (start_index)) &&
1501           isLegalBase (sequence_string.charAt (start_index + 1)) &&
1502           isLegalBase (sequence_string.charAt (start_index + 2))) {
1503         return true;
1504       }
1505     } else {
1506       if (isLegalBase (sequence_string.charAt (start_index)) &&
1507           isLegalBase (sequence_string.charAt (start_index - 1)) &&
1508           isLegalBase (sequence_string.charAt (start_index - 2))) {
1509         return true;
1510       }
1511     }
1512 
1513     // this isn't a stop codon
1514     return false;
1515 
1516   }*/
1517 
1518   /**
1519    *  Return true if and only if the given base character is one of 'a', 't',
1520    *  'c', 'g' or 'u'.
1521    **/
isLegalBase(final char base_char)1522   public final static boolean isLegalBase (final char base_char) {
1523     switch (base_char) {
1524     case 'a': case 'A': return true;
1525     case 't': case 'T': return true;
1526     case 'u': case 'U': return true;
1527     case 'g': case 'G': return true;
1528     case 'c': case 'C': return true;
1529     default:
1530       return false;
1531     }
1532   }
1533 
1534   /**
1535    *  The underlying sequence object that holds the data for this object.
1536    *  This is the same object that was passed to the constructor.
1537    **/
1538   private Sequence embl_sequence;
1539 
1540   /**
1541    *  The object representing the forward sequence of bases.
1542    **/
1543   private Strand forward_strand;
1544 
1545   /**
1546    *  The object representing the reverse (reverse complemented)
1547    *  sequence of bases.
1548    **/
1549   private Strand reverse_strand;
1550 }
1551