1 /* Bases.java 2 * 3 * created: Sun Oct 11 1998 4 * 5 * This file is part of Artemis 6 * 7 * Copyright (C) 1998-2005 Genome Research Limited 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 2 12 * of the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/sequence/Bases.java,v 1.26 2009-03-27 14:00:51 tjc Exp $ 24 */ 25 26 package uk.ac.sanger.artemis.sequence; 27 28 import uk.ac.sanger.artemis.Feature; 29 import uk.ac.sanger.artemis.util.*; 30 import uk.ac.sanger.artemis.io.Range; 31 import uk.ac.sanger.artemis.io.EmblStreamSequence; 32 import uk.ac.sanger.artemis.io.Sequence; 33 import uk.ac.sanger.artemis.io.StreamSequence; 34 35 import org.biojava.bio.symbol.IllegalSymbolException; 36 37 import java.util.WeakHashMap; 38 import java.util.Iterator; 39 40 /** 41 * This class is a wrapper for the uk.ac.sanger.artemis.io.Sequence class 42 * that allows us to control what is done to the sequence and to send events 43 * to interested objects when changes happen. Note: a '@' character is used 44 * as a marker when we don't have a base letter, for example complementing a 45 * non-base letter returns '@'. 46 * 47 * @author Kim Rutherford 48 * @version $Id: Bases.java,v 1.26 2009-03-27 14:00:51 tjc Exp $ */ 49 50 public class Bases 51 { 52 /** 53 * Indicates the bases should be read in the forward direction for a 54 * particular operation. 55 **/ 56 static public final int FORWARD = 1; 57 58 /** 59 * Indicates the bases should be read in the reverse direction for a 60 * particular operation. 61 **/ 62 static public final int REVERSE = 2; 63 64 /** 65 * The lowest possible value for use with addSequenceChangeListener (). 66 **/ 67 static public final int MIN_PRIORITY = -5; 68 69 /** 70 * An arbitrary value for use with addSequenceChangeListener (). 71 **/ 72 static public final int MEDIUM_PRIORITY = 0; 73 74 /** 75 * The highest possible value for use with addSequenceChangeListener (). 76 **/ 77 static public final int MAX_PRIORITY = 5; 78 79 /** 80 * A cache of the forward & reverse stop codon positions. 81 * 0 means not set/cached yet, 1 not a stop codon, 2 and 3 are a 82 * stop codon on fwd or reverse strand respectively. 83 **/ 84 private byte [] stop_codon_cache = null; 85 86 /** 87 * A cache of the forward & reverse start codon positions. 88 * 0 means not set/cached yet, 1 not a start codon, 2 and 3 are a 89 * start codon on fwd or reverse strand repectively. 90 **/ 91 private byte [] start_codon_cache = null; 92 93 /** 94 * Create a new Bases object. 95 * @param sequence The raw sequence that the new object will use. 96 **/ Bases(final Sequence sequence)97 public Bases(final Sequence sequence) 98 { 99 this.embl_sequence = sequence; 100 101 stop_codon_cache = null; 102 103 forward_strand = new Strand(this, FORWARD); 104 reverse_strand = new Strand(this, REVERSE); 105 106 for(int i = 0 ; i < listener_hash_map_array.length ; ++i) 107 listener_hash_map_array [i] = new WeakHashMap(); 108 } 109 110 /** 111 * Return the object representing the forward sequence of bases for this 112 * object. 113 **/ getForwardStrand()114 public Strand getForwardStrand() 115 { 116 return forward_strand; 117 } 118 119 /** 120 * Return the object representing the reverse complemented sequence of 121 * bases for this Bases objects. 122 **/ getReverseStrand()123 public Strand getReverseStrand() 124 { 125 return reverse_strand; 126 } 127 128 /** 129 * Returns the length of the sequence in bases. 130 **/ getLength()131 public int getLength() 132 { 133 return embl_sequence.length(); 134 } 135 136 /** 137 * Return a String representation of the sequence. 138 **/ toString()139 public String toString() 140 { 141 return embl_sequence.getSubSequence(1,getLength()); 142 } 143 144 /** 145 * Reverse and complement both of the Strand objects (by swapping them and 146 * reverse complementing the sequence). 147 * @exception ReadOnlyException If the Bases cannot be changed. 148 **/ reverseComplement()149 public void reverseComplement() 150 throws ReadOnlyException 151 { 152 stop_codon_cache = null; 153 154 final Strand temp = forward_strand; 155 forward_strand = reverse_strand; 156 reverse_strand = temp; 157 158 // final String new_sequence = 159 // reverseComplement(getSequence().getSubSequence(1, getLength())); 160 161 final char[] new_sequence = 162 reverseComplement(getSequence().getCharSubSequence(1, getLength())); 163 164 try 165 { 166 // getSequence().setFromChar(new_sequence.toCharArray()); 167 getSequence().setFromChar(new_sequence); 168 } 169 catch (IllegalSymbolException e) 170 { 171 throw new Error ("internal error - unexpected exception: " + e); 172 } 173 174 final SequenceChangeEvent event = 175 new SequenceChangeEvent (this, SequenceChangeEvent.REVERSE_COMPLEMENT); 176 177 fireSequenceChangeEvent (event); 178 } 179 180 /** 181 * This array is used to convert between bases and indices. See 182 * getIndexOfBase() 183 **/ 184 public final static char[] letter_index = 185 { 186 't', 'c', 'a', 'g', 'n' 187 }; 188 189 /** 190 * Given a base letter return its index where t = 0, c = 1, a = 2, g = 3, 4 191 * otherwise. 192 * See letter_index. 193 **/ getIndexOfBase(final char base)194 public final static int getIndexOfBase(final char base) 195 { 196 switch(base) 197 { 198 case 'c': 199 return 1; 200 case 'a': 201 return 2; 202 case 'g': 203 return 3; 204 case 't': 205 case 'u': 206 return 0; 207 } 208 209 return 4; 210 } 211 212 /** 213 * Return the complement of the given Range. eg. if the sequence length is 214 * 100 and the Range is 1..10 then the return value will be 90..100. 215 **/ complementRange(final Range range)216 private Range complementRange (final Range range) { 217 final int real_start = getComplementPosition (range.getEnd ()); 218 final int real_end = getComplementPosition (range.getStart ()); 219 220 try { 221 final Range real_range = new Range (real_start, real_end); 222 223 return real_range; 224 } catch (OutOfRangeException e) { 225 throw new Error ("internal error - unexpected exception: " + e); 226 } 227 } 228 229 /** 230 * Return the complement of the given position on the sequence. eg. if the 231 * sequence length is 100 and the position is 10 then the return value will 232 * be 90. 233 **/ getComplementPosition(final int position)234 public int getComplementPosition (final int position) { 235 return getLength () - position + 1; 236 } 237 238 /** 239 * Return the raw of a base position on this object. The raw position of a 240 * base on the forward strand is the same as the position itself. The raw 241 * position of a base on the reverse strand is position of the 242 * corresponding bases on the forward strand. 243 * @param position The position of the base. 244 * @param direction The direction (strand) that the position refers to. 245 **/ getRawPosition(final int position, final int direction)246 public int getRawPosition (final int position, final int direction) { 247 if (direction == FORWARD) { 248 return position; 249 } else { 250 return getComplementPosition (position); 251 } 252 } 253 254 /** 255 * Translate a sequence of bases into the corresponding single letter amino 256 * acid codes. 257 * @param range The range of the bases to translated. If the range.start 258 * - range.end + 1 is not a multiple of three the last codon is 259 * incomplete and will not be translated. If the range is out of range 260 * ie. it has a start or end less than one or greater than the length of 261 * the sequence, then the out of range codons will be translated as '.'. 262 * @param direction The direction of the translation. If FORWARD the 263 * translation will happen as expected, if REVERSE the translation will 264 * be done on the reverse complement. 265 * @param unknown_is_x If this parameter is true codons that contain 266 * ambiguous bases will be translated as 'x', if false they will be 267 * translated as '.' 268 * @return The translated sequence in one letter abbreviated form. 269 **/ getTranslation(final Range range, final int direction, final boolean unknown_is_x)270 public AminoAcidSequence getTranslation(final Range range, 271 final int direction, 272 final boolean unknown_is_x) 273 { 274 // getSubSequenceC() will return a sequence going in the right direction 275 // so we don't have to worry. 276 final char[] sub_sequence = getSubSequenceC(range, direction); 277 return AminoAcidSequence.getTranslation(sub_sequence, unknown_is_x); 278 } 279 280 getSpacedTranslation(final Range range, final int direction, final boolean unknown_is_x)281 public AminoAcidSequence getSpacedTranslation(final Range range, 282 final int direction, 283 final boolean unknown_is_x) 284 { 285 // getSubSequenceC() will return a sequence going in the right direction 286 // so we don't have to worry. 287 final char[] sub_sequence = getSubSequenceC(range, direction); 288 return AminoAcidSequence.getSpacedTranslation(sub_sequence, unknown_is_x); 289 } 290 291 /** 292 * Return an array containing the positions of the codons that match the 293 * strings given by the query_codons argument. Only those codons that are 294 * in the same frame as the first base of the range are returned. 295 * @param range The inclusive range of bases to get the codons from. 296 * @param direction The direction of the translation. REVERSE means 297 * translate the reverse complement bases (the positions in the range 298 * argument are complemented first.) 299 * @param query_codons The codons to search for. Each element of this 300 * vector should be a string that is 3 characters long. 301 * @return An array containing the positions of the first base of the 302 * codons. This array is padded with zeros at the end. 303 **/ getMatchingCodons(final Range range, final int direction, final StringVector query_codons)304 public int [] getMatchingCodons (final Range range, final int direction, 305 final StringVector query_codons) { 306 final Range real_range; 307 308 if(direction == FORWARD) 309 real_range = range; 310 else 311 real_range = complementRange(range); 312 313 // guess the number of codons in getCount () bases - there are 314 // query_codons.size() search codons in every 64 codons if G+C is 50% 315 // and we have getCount()/3 codons to look at. 316 317 float at_content = (100 - getAverageGCPercent()) / 100; 318 319 int array_start_size = 320 (int) (range.getCount () * 321 at_content * at_content * (2-at_content) * 322 query_codons.size () / 64); 323 324 if(array_start_size < 20) 325 array_start_size = 20; 326 327 // this array will be resized as necessary 328 int[] return_positions = new int[array_start_size]; 329 330 int current_return_array_index = 0; 331 332 final String sequence_string = 333 getSequence ().getSubSequence (1, getLength ()); 334 335 final int range_start_index = real_range.getStart () - 1; 336 final int range_end_index = real_range.getEnd () - 1; 337 338 if(direction == FORWARD) 339 { 340 for (int i = range_start_index ; i < range_end_index - 2 ; i += 3) { 341 if (i < 0 || i >= sequence_string.length () - 2) { 342 continue; 343 } 344 345 boolean is_matching_codon = 346 isMatchingCodon (sequence_string, i, direction, query_codons); 347 348 if (is_matching_codon) { 349 if (current_return_array_index == return_positions.length) { 350 // first reallocate the array 351 final int [] new_array = 352 new int [return_positions.length * 3 / 2 + 1]; 353 354 System.arraycopy (return_positions, 0, 355 new_array, 0, 356 return_positions.length); 357 return_positions = new_array; 358 } 359 360 return_positions[current_return_array_index] = i + 1; 361 362 ++current_return_array_index; 363 } 364 } 365 } else { 366 367 for (int i = range_end_index ; i > range_start_index + 2 ; i -= 3) { 368 if (i < 2 || i >= sequence_string.length ()) { 369 continue; 370 } 371 372 boolean is_matching_codon = 373 isMatchingCodon (sequence_string, i, direction, query_codons); 374 375 if (is_matching_codon) { 376 if (current_return_array_index == return_positions.length) { 377 // first reallocate the array 378 final int [] new_array = 379 new int [return_positions.length * 3 / 2 + 1]; 380 381 System.arraycopy (return_positions, 0, 382 new_array, 0, 383 return_positions.length); 384 return_positions = new_array; 385 } 386 387 // return the complemented base position 388 return_positions[current_return_array_index] = 389 sequence_string.length () - i; 390 391 ++current_return_array_index; 392 } 393 } 394 } 395 396 return return_positions; 397 398 } 399 400 /** 401 * Check a three character substring and return true if and only if the 402 * three bases match an element of the query_codons argument. If the 403 * direction is REVERSE then the three bases to check are at start_index, 404 * start_index - 1 and start_index - 2. In that case true is returned if 405 * and only the complement of those three bases matches. 406 **/ isMatchingCodon(final String sequence_string, final int start_index, final int direction, final StringVector query_codons)407 private boolean isMatchingCodon (final String sequence_string, 408 final int start_index, 409 final int direction, 410 final StringVector query_codons) { 411 for (int query_codon_index = 0 ; 412 query_codon_index < query_codons.size () ; 413 ++query_codon_index) { 414 if (isMatchingCodon (sequence_string, start_index, direction, 415 (String)query_codons.elementAt (query_codon_index))) { 416 return true; 417 } 418 } 419 420 return false; 421 } 422 423 /** 424 * Check a three character substring and return true if and only if the 425 * three bases match the query_codon argument. If the direction is 426 * REVERSE then the three bases to check are at start_index, start_index - 427 * 1 and start_index - 2. In that case true is returned if and only the 428 * complement of those three bases matches. 429 **/ isMatchingCodon(final String sequence_string, final int start_index, final int direction, final String query_codon)430 private boolean isMatchingCodon (final String sequence_string, 431 final int start_index, 432 final int direction, 433 final String query_codon) { 434 if (direction == FORWARD) { 435 if (query_codon.charAt (0) == sequence_string.charAt (start_index) && 436 query_codon.charAt (1) == sequence_string.charAt (start_index + 1) && 437 query_codon.charAt (2) == sequence_string.charAt (start_index + 2)) { 438 return true; 439 } 440 } else { 441 final char first_letter = 442 complement (sequence_string.charAt (start_index)); 443 final char second_letter = 444 complement (sequence_string.charAt (start_index - 1)); 445 final char third_letter = 446 complement (sequence_string.charAt (start_index - 2)); 447 448 if (query_codon.charAt (0) == first_letter && 449 query_codon.charAt (1) == second_letter && 450 query_codon.charAt (2) == third_letter) { 451 return true; 452 } 453 } 454 455 return false; 456 } 457 458 /** 459 * Returns stop_codon_cache after allocating it (if it is null). 460 **/ getStopCodonCache()461 private byte[] getStopCodonCache() 462 { 463 if (stop_codon_cache == null) 464 { 465 final int nbytes = getLength() >> 1 >> 1; 466 stop_codon_cache = new byte[nbytes+1]; 467 } 468 469 return stop_codon_cache; 470 } 471 472 473 /** 474 * Returns start_codon_cache after allocating it (if it is null). 475 **/ getStartCodonCache()476 private byte[] getStartCodonCache() 477 { 478 if (start_codon_cache == null) 479 { 480 final int nbytes = getLength() >> 1 >> 1; 481 start_codon_cache = new byte[nbytes+1]; 482 } 483 484 return start_codon_cache; 485 } 486 487 /** 488 * Clear stop codon cache (forward and reverse). 489 **/ clearCodonCache()490 public void clearCodonCache() 491 { 492 stop_codon_cache = null; 493 start_codon_cache = null; 494 } 495 496 497 /** 498 * Return an array containing the positions of the stop codons. Only those 499 * codons that are in the same frame as the first base of the range are 500 * returned. 501 * @param range The inclusive range of bases to get the stop codons from. 502 * @param direction The direction of the translation. REVERSE means 503 * translate the reverse complement bases (the positions in the range 504 * argument are complemented first.) 505 * @return An array containing the positions of the first base of the stop 506 * codons. This array is padded with zeros at the end. 507 **/ getStopCodons(final Range range, final int direction)508 protected int[] getStopCodons(final Range range, final int direction) 509 { 510 final Range real_range; 511 512 if(direction == FORWARD) 513 real_range = range; 514 else 515 real_range = complementRange (range); 516 517 // guess the number of stop codons in getCount() bases - there are 3 518 // stop codons in every 64 codons if G+C is 50% and we have getCount()/3 519 // codons to look at. 520 521 float at_content = (100 - getAverageGCPercent()) / 100; 522 523 int array_start_size = 524 (int)(range.getCount() * 525 at_content * at_content * (2-at_content) * 3 / 64); 526 527 if(array_start_size < 20) 528 array_start_size = 20; 529 530 // this array will be resized as necessary 531 int[] return_positions = new int[array_start_size]; 532 533 int current_return_array_index = 0; 534 int range_start_index = real_range.getStart(); 535 int range_end_index = real_range.getEnd(); 536 537 final int sequence_length = getLength(); 538 539 if(range_start_index < 1) 540 { 541 if(direction == FORWARD) 542 range_start_index = 3 + (range_start_index % 3); 543 else 544 range_start_index = 1; 545 } 546 if(range_end_index > sequence_length) 547 range_end_index = sequence_length; 548 549 final char sequence_string[] = 550 getSequence().getCharSubSequence(range_start_index, range_end_index); 551 552 range_start_index--; 553 range_end_index--; 554 555 // whether a codon is a stop codon or not is cached in 556 // 2 bit chunks (i.e. 4 per byte) 557 int ncurrent_byte; 558 int bit_position; 559 byte bitty; 560 561 final byte[] this_stop_codon_flags = getStopCodonCache(); 562 if(direction == FORWARD) 563 { 564 565 for(int i = range_start_index; i < range_end_index + 1; i += 3) 566 { 567 if(i < 0 || i >= sequence_length-1) 568 continue; 569 570 ncurrent_byte = i >> 1 >> 1; 571 bit_position = i % 4; 572 573 // determine if codon type is cached or not 574 bitty = (byte) ((this_stop_codon_flags[ncurrent_byte] 575 >> (2*bit_position) ) & 0x0003); 576 if(bitty == 0) 577 { 578 // not cached yet 579 setCache(range_start_index, range_end_index, sequence_string, i, 580 null, this_stop_codon_flags, ncurrent_byte, bit_position); 581 } 582 583 bitty = (byte) ((this_stop_codon_flags[ncurrent_byte] 584 >> (2*bit_position) ) & 0x0003); 585 if( bitty == 1 || bitty == 3 ) 586 continue; 587 588 // if we reach here this is a stop codon 589 if(current_return_array_index == return_positions.length) 590 { 591 // first reallocate the array 592 final int[] new_array = 593 new int[return_positions.length * 3 / 2 + 1]; 594 595 System.arraycopy(return_positions, 0, 596 new_array, 0, 597 return_positions.length); 598 return_positions = new_array; 599 } 600 601 return_positions[current_return_array_index] = i + 1; 602 ++current_return_array_index; 603 } 604 } 605 else 606 { 607 for (int i = range_end_index ; i > range_start_index + 2 ; i -= 3) 608 { 609 if(i < 2 || i >= sequence_length) 610 continue; 611 612 ncurrent_byte = i >> 1 >> 1; 613 bit_position = i % 4; 614 bitty = (byte) ((this_stop_codon_flags[ncurrent_byte] 615 >> (2*bit_position) ) & 0x0003); 616 617 if(bitty == 0) 618 { 619 // not cached yet 620 setCache(range_start_index, range_end_index, sequence_string, i, 621 null, this_stop_codon_flags, ncurrent_byte, bit_position); 622 } 623 624 bitty = (byte) ((this_stop_codon_flags[ncurrent_byte] 625 >> (2*bit_position) ) & 0x0003); 626 627 if( bitty == 1 || bitty != 3 ) 628 continue; 629 630 // if we reach here this is a stop codon 631 if(current_return_array_index == return_positions.length) 632 { 633 // first reallocate the array 634 final int[] new_array = 635 new int[return_positions.length * 3 / 2 + 1]; 636 637 System.arraycopy(return_positions, 0, 638 new_array, 0, 639 return_positions.length); 640 return_positions = new_array; 641 } 642 643 return_positions[current_return_array_index] = 644 sequence_length - i; 645 ++current_return_array_index; 646 } 647 } 648 649 return return_positions; 650 } 651 652 /** 653 * Return an 2D array containing the stop or start codons in a range for 654 * all 3 frames of the strand. 655 * @param range The inclusive range of bases to get the codons from. 656 * @param direction The direction of the translation. REVERSE means 657 * translate the reverse complement bases (the positions in the range 658 * argument are complemented first.) 659 * @param query_codons if this is NULL then this assumes we are looking 660 * for stop codons, otherwise this is used to look for start codons. 661 * @return An array containing the positions of the first base of the stop 662 * codons. This array is padded with zeros at the end. 663 **/ getStopOrStartCodons(final Range range, final int direction, final StringVector query_codons)664 protected int[][] getStopOrStartCodons(final Range range, 665 final int direction, 666 final StringVector query_codons) 667 { 668 final Range real_range; 669 670 if(direction == FORWARD) 671 real_range = range; 672 else 673 real_range = complementRange(range); 674 675 // guess the number of stop codons in getCount() bases - there are 3 676 // stop codons in every 64 codons if G+C is 50% and we have getCount()/3 677 // codons to look at. 678 679 float at_content = (100 - getAverageGCPercent()) / 100; 680 681 int array_start_size = 682 (int)(range.getCount() * 683 at_content * at_content * (2-at_content) * 3 / 64); 684 685 if(array_start_size < 20) 686 array_start_size = 20; 687 // this array will be resized as necessary 688 int[][] return_positions = new int[3][array_start_size]; 689 690 int[] current_return_array_index = new int[3]; 691 current_return_array_index[0] = 0; 692 current_return_array_index[1] = 0; 693 current_return_array_index[2] = 0; 694 695 int range_start_index = real_range.getStart(); 696 int range_end_index = real_range.getEnd(); 697 698 final int sequence_length = getLength(); 699 700 if(range_start_index < 1) 701 { 702 if(direction == FORWARD) 703 range_start_index = 3 + (range_start_index % 3); 704 else 705 range_start_index = 1; 706 } 707 708 if(range_end_index > sequence_length) 709 range_end_index = sequence_length; 710 711 range_start_index--; 712 range_end_index--; 713 char[] sequence_string = null; 714 715 // whether a codon is a stp codon or not is cached in 716 // 2 bit chunks (i.e. 4 per byte) 717 int ncurrent_byte; 718 int bit_position; 719 int nframe = 0; 720 byte bitty; 721 722 final byte[] this_forward_codon_flags; 723 // if this is null then searching for stop codons 724 if(query_codons == null) 725 this_forward_codon_flags = getStopCodonCache(); 726 else 727 this_forward_codon_flags = getStartCodonCache(); 728 729 for(int i = range_start_index; i < range_end_index+1; i += 1) 730 { 731 if(i < 0 || i >= sequence_length) 732 continue; 733 734 ncurrent_byte = i >> 1 >> 1; 735 bit_position = i % 4; 736 737 // determine if codon type is cached or not 738 bitty = (byte) ((this_forward_codon_flags[ncurrent_byte] 739 >> (2*bit_position) ) & 0x0003); 740 741 if(bitty == 0) // not cached yet 742 { 743 if(sequence_string == null) 744 sequence_string = getSequence().getCharSubSequence(range_start_index+1, 745 range_end_index+1); 746 747 setCache(range_start_index, range_end_index, sequence_string, i, 748 query_codons, this_forward_codon_flags, ncurrent_byte, 749 bit_position); 750 bitty = (byte) ((this_forward_codon_flags[ncurrent_byte] 751 >> (2*bit_position) ) & 0x0003); 752 } 753 754 if( bitty == 1 || // not a stop/start codon 755 (direction == FORWARD && bitty == 3) || 756 (direction != FORWARD && bitty != 3 )) 757 continue; 758 759 if(direction == FORWARD) 760 nframe = (i-range_start_index) % 3; 761 else 762 nframe = (range_end_index-i) % 3; 763 764 // if we reach here this is a stop/start codon 765 if(current_return_array_index[nframe] == return_positions[nframe].length) 766 { 767 // first reallocate the array 768 final int[][] new_array = 769 new int[3][return_positions[nframe].length * 3 / 2 + 1]; 770 771 for(int j=0; j<3; j++) 772 System.arraycopy(return_positions[j], 0, 773 new_array[j], 0, 774 return_positions[j].length); 775 return_positions = new_array; 776 } 777 778 if(direction == FORWARD) 779 { 780 if(i==0) 781 return_positions[nframe][current_return_array_index[nframe]] = i + 1; 782 else 783 return_positions[nframe][current_return_array_index[nframe]] = i; 784 } 785 else 786 return_positions[nframe][current_return_array_index[nframe]] = 787 sequence_length - i; 788 ++current_return_array_index[nframe]; 789 } 790 791 return return_positions; 792 } 793 794 /** 795 * Set the codon cache for forward and reverse strand. 796 * @param range_start_index 797 * @param range_end_index 798 * @param sequence_string 799 * @param i 800 * @param query_codons 801 * @param this_codon_flags 802 * @param ncurrent_byte 803 * @param bit_position 804 */ setCache(int range_start_index, int range_end_index, char[] sequence_string, int i, final StringVector query_codons, final byte[] this_codon_flags, int ncurrent_byte, int bit_position)805 private void setCache(int range_start_index, 806 int range_end_index, 807 char[] sequence_string, 808 int i, 809 final StringVector query_codons, 810 final byte[] this_codon_flags, 811 int ncurrent_byte, 812 int bit_position) 813 { 814 // test if stop (or start) codon 815 boolean ismatch = false; 816 817 // forward codon 818 if(i < range_end_index-1) 819 if(query_codons == null) 820 ismatch = isStopCodon(sequence_string[i-range_start_index], 821 sequence_string[i-range_start_index+1], 822 sequence_string[i-range_start_index+2]); 823 else 824 ismatch = isCodon(sequence_string[i-range_start_index], 825 sequence_string[i-range_start_index+1], 826 sequence_string[i-range_start_index+2], 827 query_codons); 828 829 if(ismatch) 830 { 831 this_codon_flags[ncurrent_byte] = // forward strand stop/start = 2 832 (byte)(this_codon_flags[ncurrent_byte] 833 | (0x0002 << 2*bit_position)); 834 } 835 else 836 { 837 this_codon_flags[ncurrent_byte] = // cached no stop/start = 1 838 (byte)(this_codon_flags[ncurrent_byte] 839 | (0x0001 << 2*bit_position)); 840 } 841 842 // reverse codon 843 ismatch = false; 844 if(i-range_start_index > 1 && i-range_start_index < sequence_string.length) 845 if(query_codons == null) 846 ismatch = isStopCodon(complement(sequence_string[i-range_start_index]), 847 complement(sequence_string[i-range_start_index-1]), 848 complement(sequence_string[i-range_start_index-2])); 849 else 850 ismatch = isCodon(complement(sequence_string[i-range_start_index]), 851 complement(sequence_string[i-range_start_index-1]), 852 complement(sequence_string[i-range_start_index-2]), 853 query_codons); 854 if(ismatch) 855 this_codon_flags[ncurrent_byte] = // reverse strand stop/start = 3 856 (byte)(this_codon_flags[ncurrent_byte] 857 | (0x0003 << 2*bit_position)); 858 } 859 860 /** 861 * Return the base at the given position. 862 **/ getBaseAt(final int position)863 public char getBaseAt (final int position) 864 throws OutOfRangeException 865 { 866 if(position > getLength()) 867 throw new OutOfRangeException(position + " > " + getLength()); 868 869 if(position < 1) 870 throw new OutOfRangeException(position + " < " + 1); 871 872 return getSequence().charAt(position); 873 } 874 875 /** 876 * Return a sub sequence of the bases from this object. 877 * @param range The range of the bases to be extracted. 878 * @param direction The direction of the returned sequence. If FORWARD the 879 * sub sequence will be as expected, if REVERSE it will be reverse 880 * complemented. 881 * @return The extracted sequence, which will include the end bases of the 882 * range. 883 **/ getSubSequence(final Range range, final int direction)884 public String getSubSequence (final Range range, final int direction) { 885 final Range real_range; 886 887 if(direction == FORWARD) 888 real_range = range; 889 else 890 real_range = complementRange (range); 891 892 // we need to make sure that we pass in-range coordinates to 893 // Sequence.getSubSequence() 894 final int sub_seq_start_index; 895 final int sub_seq_end_index; 896 897 if(real_range.getStart () < 1) 898 sub_seq_start_index = 1; 899 else 900 sub_seq_start_index = real_range.getStart (); 901 902 if(real_range.getEnd () > getLength ()) 903 sub_seq_end_index = getLength (); 904 else 905 sub_seq_end_index = real_range.getEnd (); 906 907 String sub_sequence = 908 getSequence().getSubSequence(sub_seq_start_index, sub_seq_end_index); 909 910 // sanity checks - if the user asks for more bases than we 911 // have, we return the symbol "@" for the out-of-range bases. 912 if (real_range.getStart () < 1) { 913 final int dummy_base_count = 1 - real_range.getStart (); 914 final char [] dummy_bases = new char [dummy_base_count]; 915 916 for (int i = 0 ; i < dummy_base_count ; ++i) { 917 dummy_bases[i] = '@'; 918 } 919 920 sub_sequence = new String (dummy_bases) + sub_sequence; 921 } 922 923 if (real_range.getEnd () > getLength ()) { 924 final int dummy_base_count = real_range.getEnd () - getLength (); 925 final char [] dummy_bases = new char [dummy_base_count]; 926 927 for (int i = 0 ; i < dummy_base_count ; ++i) { 928 dummy_bases[i] = '@'; 929 } 930 931 sub_sequence = sub_sequence + new String (dummy_bases); 932 } 933 934 if (FORWARD == direction) { 935 return sub_sequence; 936 } else { 937 return reverseComplement (sub_sequence); 938 } 939 } 940 getSubSequenceC(final Range range, final int direction)941 public char[] getSubSequenceC(final Range range, final int direction) 942 { 943 final Range real_range; 944 945 if(direction == FORWARD) 946 real_range = range; 947 else 948 real_range = complementRange (range); 949 950 // we need to make sure that we pass in-range coordinates to 951 // Sequence.getSubSequence() 952 final int sub_seq_start_index; 953 final int sub_seq_end_index; 954 955 if(real_range.getStart () < 1) 956 sub_seq_start_index = 1; 957 else 958 sub_seq_start_index = real_range.getStart (); 959 960 if(real_range.getEnd () > getLength ()) 961 sub_seq_end_index = getLength (); 962 else 963 sub_seq_end_index = real_range.getEnd (); 964 965 char[] sub_sequence = 966 getSequence().getCharSubSequence(sub_seq_start_index, sub_seq_end_index); 967 968 if(real_range.getStart() < 1) 969 { 970 final int dummy_base_count = 1 - real_range.getStart(); 971 final char[] dummy_bases = new char[dummy_base_count+sub_sequence.length]; 972 973 for(int i = 0; i < dummy_base_count; ++i) 974 dummy_bases[i] = '@'; 975 976 System.arraycopy(sub_sequence, 0, dummy_bases, dummy_base_count, sub_sequence.length); 977 sub_sequence = dummy_bases; 978 } 979 980 if(real_range.getEnd() > getLength()) 981 { 982 final int dummy_base_count = real_range.getEnd() - getLength(); 983 final char[] dummy_bases = new char[dummy_base_count+sub_sequence.length]; 984 985 for(int i = sub_sequence.length; i < dummy_bases.length; ++i) 986 dummy_bases[i] = '@'; 987 988 System.arraycopy(sub_sequence, 0, dummy_bases, 0, sub_sequence.length); 989 sub_sequence = dummy_bases; 990 } 991 992 993 if(FORWARD == direction) 994 return sub_sequence; 995 else 996 return reverseComplement(sub_sequence); 997 } 998 999 /** 1000 * This method truncates the sequence use the start and end of the argument. 1001 * @param constraint This contains the start and end base of the new 1002 * sequence. 1003 * @return the Bases truncated into the new coordinate system. 1004 **/ truncate(final Range constraint)1005 public Bases truncate (final Range constraint) { 1006 final String bases_string = getSubSequence (constraint, FORWARD); 1007 1008 final Sequence new_sequence = new EmblStreamSequence (bases_string); 1009 1010 return new Bases (new_sequence); 1011 } 1012 1013 1014 /** 1015 * 1016 * Reverse complement a range of the sequence. 1017 * 1018 */ reverseComplement(final Feature feature)1019 public void reverseComplement(final Feature feature) 1020 throws ReadOnlyException 1021 { 1022 stop_codon_cache = null; 1023 1024 final Range range = feature.getMaxRawRange(); 1025 final int range_start_index = range.getStart(); 1026 final int range_end_index = range.getEnd(); 1027 1028 // ensure we just get subsequence of interest 1029 ((StreamSequence)getSequence()).forceReset(); 1030 // sequence to reverse complement 1031 final char[] sub_sequence = reverseComplement(getSequence().getCharSubSequence( 1032 range_start_index, range_end_index)); 1033 final char[] new_sequence = new char[getLength()]; 1034 final char[] old_sequence = ((StreamSequence)getSequence()).getCharSequence(); 1035 1036 // System.out.println("range_start_index "+range_start_index); 1037 // System.out.println("range_end_index "+range_end_index); 1038 // System.out.println("getLength "+getLength()); 1039 // System.out.println("sub_sequence.length "+sub_sequence.length); 1040 // System.out.println(feature.getEntry().getEMBLEntry().toString()); 1041 // System.out.println(new String(sub_sequence)); 1042 1043 // if not first contig 1044 if(range_start_index != 1) 1045 System.arraycopy(old_sequence, 0, new_sequence, 0, range_start_index-1); 1046 1047 // copy in new sequence fragment that has been reverse complemented 1048 System.arraycopy(sub_sequence, 0, new_sequence, range_start_index-1, 1049 sub_sequence.length); 1050 1051 // if not last contig 1052 if(range_end_index != getLength()) 1053 System.arraycopy(old_sequence, range.getEnd(), new_sequence, range_end_index, 1054 getLength()-range_end_index); 1055 1056 try 1057 { 1058 embl_sequence.setFromChar(new_sequence); 1059 } 1060 catch (IllegalSymbolException e) 1061 { 1062 throw new Error ("internal error - unexpected exception: " + e); 1063 } 1064 1065 final SequenceChangeEvent event = 1066 new SequenceChangeEvent(this, SequenceChangeEvent.CONTIG_REVERSE_COMPLEMENT, 1067 range, sub_sequence.length); 1068 1069 fireSequenceChangeEvent(event); 1070 } 1071 1072 contigRearrange(final Feature feature, final int new_base_pos)1073 public void contigRearrange(final Feature feature, final int new_base_pos) 1074 throws ReadOnlyException 1075 { 1076 stop_codon_cache = null; 1077 1078 final Range range = feature.getMaxRawRange(); 1079 final int range_start_index = range.getStart(); 1080 final int range_end_index = range.getEnd(); 1081 1082 if(new_base_pos == range_start_index) 1083 return; 1084 1085 final char[] new_sequence = new char[getLength()]; 1086 final char[] old_sequence = ((StreamSequence)getSequence()).getCharSequence(); 1087 1088 int contig_length = 0; 1089 if(new_base_pos < range_start_index) 1090 { 1091 // if not first contig 1092 if(new_base_pos != 1) 1093 System.arraycopy(old_sequence, 0, new_sequence, 0, new_base_pos-1); 1094 1095 contig_length = range_end_index - range_start_index + 1; 1096 // copy in new sequence fragment that has been reverse complemented 1097 System.arraycopy(old_sequence, range_start_index-1, 1098 new_sequence, new_base_pos-1, contig_length); 1099 1100 System.arraycopy(old_sequence, new_base_pos-1, 1101 new_sequence, new_base_pos+contig_length-1, 1102 range_start_index-new_base_pos); 1103 1104 // if not last contig 1105 if(new_base_pos < getLength()+1) 1106 System.arraycopy(old_sequence, range_end_index, 1107 new_sequence, range_end_index, 1108 getLength()-range_end_index); 1109 } 1110 else 1111 { 1112 System.arraycopy(old_sequence, 0, new_sequence, 0, range_start_index-1); 1113 1114 System.arraycopy(old_sequence, range_end_index, 1115 new_sequence, range_start_index-1, 1116 new_base_pos-range_end_index-1); 1117 1118 System.arraycopy(old_sequence, range_start_index-1, 1119 new_sequence, (range_start_index-1)+(new_base_pos-range_end_index)-1, 1120 range_end_index - range_start_index + 1); 1121 1122 // if not last contig 1123 if(new_base_pos < getLength()+1) 1124 System.arraycopy(old_sequence, new_base_pos-1, 1125 new_sequence, new_base_pos-1, 1126 getLength()-new_base_pos+1); 1127 } 1128 1129 1130 try 1131 { 1132 embl_sequence.setFromChar(new_sequence); 1133 } 1134 catch (IllegalSymbolException e) 1135 { 1136 throw new Error ("internal error - unexpected exception: " + e); 1137 } 1138 1139 final SequenceChangeEvent event = 1140 new SequenceChangeEvent(SequenceChangeEvent.CONTIG_REORDER, 1141 new_base_pos, range); 1142 1143 fireSequenceChangeEvent(event); 1144 } 1145 1146 1147 /** 1148 * Delete the bases in the given range and send out a SequenceChange event 1149 * to all the listeners. 1150 * @param range The inclusive range of bases to delete. 1151 * @return A String containing the deleted bases. 1152 * @exception ReadOnlyException If this Bases object cannot be changed. 1153 **/ deleteRange(final Range range)1154 public String deleteRange (final Range range) 1155 throws ReadOnlyException { 1156 stop_codon_cache = null; 1157 1158 final String removed_bases = 1159 getSequence ().getSubSequence (range.getStart (), range.getEnd ()); 1160 1161 final String new_sequence = 1162 getSequence ().getSubSequence (1, range.getStart () - 1) + 1163 getSequence ().getSubSequence (range.getEnd () + 1, 1164 embl_sequence.length ()); 1165 1166 try { 1167 embl_sequence.setFromChar(new_sequence.toCharArray()); 1168 } catch (IllegalSymbolException e) { 1169 throw new Error ("internal error - unexpected exception: " + e); 1170 } 1171 1172 final SequenceChangeEvent event = 1173 new SequenceChangeEvent (this, 1174 SequenceChangeEvent.DELETION, 1175 range.getStart (), 1176 removed_bases); 1177 1178 fireSequenceChangeEvent (event); 1179 1180 return removed_bases; 1181 } 1182 1183 /** 1184 * Insert the given bases at the given base position and send out a 1185 * SequenceChange event to all the listeners. 1186 * @param position The bases are inserted just before this base position if 1187 * direction is FORWARD or just after if direction is REVERSE. 1188 * @param direction If this is FORWARD, then the bases is the bases String 1189 * will be inserted just before the base given by position. If this is 1190 * REVERSE the bases will be reversed, complemented and inserted just 1191 * after the position. 1192 * @param bases The bases to add (or the reverse complement of the bases to 1193 * add if direction is REVERSE). 1194 * @exception ReadOnlyException If this Bases object cannot be changed. 1195 **/ addBases(final int position, final int direction, final String bases)1196 public void addBases (final int position, final int direction, 1197 final String bases) 1198 throws ReadOnlyException, IllegalSymbolException { 1199 stop_codon_cache = null; 1200 1201 final String new_sequence; 1202 final int real_position; 1203 final String real_bases; 1204 1205 if (direction == FORWARD) { 1206 real_position = position; 1207 real_bases = bases.toLowerCase (); 1208 } else { 1209 real_position = position + 1; 1210 real_bases = reverseComplement (bases.toLowerCase ()); 1211 } 1212 1213 new_sequence = 1214 getSequence ().getSubSequence (1, real_position - 1) + 1215 real_bases + 1216 getSequence ().getSubSequence (real_position, getLength ()); 1217 1218 getSequence ().setFromChar(new_sequence.toCharArray()); 1219 1220 final SequenceChangeEvent event = 1221 new SequenceChangeEvent (this, 1222 SequenceChangeEvent.INSERTION, 1223 real_position, 1224 real_bases); 1225 1226 fireSequenceChangeEvent (event); 1227 1228 return; 1229 } 1230 1231 /** 1232 * There is one element in this array for each possible 1233 * SequenceChangeListener priority. This array is changed by 1234 * addSequenceChangeListener() and removeSequenceChangeListener(). 1235 **/ 1236 final private WeakHashMap listener_hash_map_array [] = 1237 new WeakHashMap [MAX_PRIORITY - MIN_PRIORITY + 1]; 1238 1239 /** 1240 * Adds the specified event listener to the list of object that receive 1241 * sequence change events from this object. 1242 * @param l the event change listener. 1243 * @param priority The listeners are stored in a priority queue using this 1244 * value. Larger priority means that the listener will receive the event 1245 * sooner (than lower priority listeners). Values less than MIN_PRIORITY 1246 * will be treated like MIN_PRIORITY values higher than MAX_PRIORITY will 1247 * be treated like MAX_PRIORITY. 1248 **/ addSequenceChangeListener(final SequenceChangeListener l, int priority)1249 public void addSequenceChangeListener (final SequenceChangeListener l, 1250 int priority) { 1251 if (priority < MIN_PRIORITY) { 1252 priority = MIN_PRIORITY; 1253 } 1254 1255 if (priority > MAX_PRIORITY) { 1256 priority = MAX_PRIORITY; 1257 } 1258 1259 listener_hash_map_array [priority - MIN_PRIORITY].put (l, null); 1260 } 1261 1262 /** 1263 * Removes the specified event listener so that it no longer receives 1264 * sequence change events from this object. 1265 * @param l the event change listener. 1266 **/ removeSequenceChangeListener(final SequenceChangeListener l)1267 public void removeSequenceChangeListener (final SequenceChangeListener l) { 1268 for (int i = 0 ; i < listener_hash_map_array.length ; ++i) { 1269 final WeakHashMap this_hash_map = listener_hash_map_array [i]; 1270 1271 if (this_hash_map.containsKey (l)) { 1272 this_hash_map.remove (l); 1273 return; 1274 } 1275 } 1276 } 1277 1278 /** 1279 * Send a SequenceChangeEvent to each object that is listening for it. 1280 **/ fireSequenceChangeEvent(final SequenceChangeEvent event)1281 private void fireSequenceChangeEvent (final SequenceChangeEvent event) { 1282 for (int i = listener_hash_map_array.length - 1 ; i >= 0 ; --i) { 1283 final WeakHashMap this_hash_map = listener_hash_map_array [i]; 1284 1285 if (this_hash_map != null) { 1286 final Iterator iter = this_hash_map.keySet ().iterator (); 1287 1288 while (iter.hasNext()) 1289 { 1290 final SequenceChangeListener this_listener = 1291 (SequenceChangeListener) iter.next(); 1292 this_listener.sequenceChanged (event); 1293 } 1294 } 1295 } 1296 } 1297 1298 /** 1299 * Return the average gc percent for the sequence. 1300 **/ getAverageGCPercent()1301 public float getAverageGCPercent () { 1302 return ((float)(getSequence ().getCCount () + 1303 getSequence ().getGCount ())) / 1304 getSequence ().length () * 100; 1305 } 1306 1307 /** 1308 * Return the average AG percent for the sequence as a percentage. 1309 **/ getAverageAGPercent()1310 public float getAverageAGPercent () { 1311 return ((float)(getSequence ().getACount () + 1312 getSequence ().getGCount ())) / 1313 getSequence ().length () * 100; 1314 } 1315 1316 /** 1317 * Return the number of 'A's in this Bases object. 1318 **/ getACount()1319 public int getACount () { 1320 return getSequence ().getACount (); 1321 } 1322 1323 /** 1324 * Return the number of 'T's in this Bases object. 1325 **/ getTCount()1326 public int getTCount () { 1327 return getSequence ().getTCount (); 1328 } 1329 1330 /** 1331 * Return the number of 'G's in this Bases object. 1332 **/ getGCount()1333 public int getGCount () { 1334 return getSequence ().getGCount (); 1335 } 1336 1337 /** 1338 * Return the number of 'C's in this Bases object. 1339 **/ getCCount()1340 public int getCCount () { 1341 return getSequence ().getCCount (); 1342 } 1343 1344 /** 1345 * Return a String containing the reverse complement of the argument 1346 * String. For example an argument of "aatc" will result in "gatt". 1347 **/ reverseComplement(final String sequence_string)1348 public static String reverseComplement (final String sequence_string) { 1349 StringBuffer return_buffer = new StringBuffer (sequence_string.length ()); 1350 1351 for (int i = sequence_string.length () - 1 ; i >= 0 ; --i) { 1352 return_buffer.append (complement (sequence_string.charAt (i))); 1353 } 1354 1355 return return_buffer.toString (); 1356 } 1357 1358 /** 1359 * Return a char[] containing the reverse complement of the argument 1360 * String. For example an argument of "aatc" will result in "gatt". 1361 **/ reverseComplement(final char[] sequence_char)1362 public static char[] reverseComplement (final char[] sequence_char) 1363 { 1364 final int length = sequence_char.length; 1365 final char[] return_sequence = new char[length]; 1366 int j = 0; 1367 1368 for(int i = length - 1 ; i >= 0 ; --i) 1369 { 1370 return_sequence[j] = complement(sequence_char[i]); 1371 j++; 1372 } 1373 1374 return return_sequence; 1375 } 1376 1377 1378 /** 1379 * Return a String containing the complement of the argument String. For 1380 * example an argument of "aatc" will result in "ttag". 1381 **/ complement(final String sequence_string)1382 public static String complement (final String sequence_string) { 1383 StringBuffer return_buffer = new StringBuffer (sequence_string.length ()); 1384 1385 for (int i = 0 ; i < sequence_string.length () ; ++i) { 1386 return_buffer.append (complement (sequence_string.charAt (i))); 1387 } 1388 1389 return return_buffer.toString (); 1390 } 1391 1392 /** 1393 * Return a char array containing the complement of the argument char[]. For 1394 * example an argument of "aatc" will result in "ttag". 1395 **/ complement(final char sequence[])1396 public static char[] complement (final char sequence[]) 1397 { 1398 final char[] seq_comp = new char[sequence.length]; 1399 1400 for (int i = 0 ; i < sequence.length; ++i) 1401 seq_comp[i] = complement(sequence[i]); 1402 1403 return seq_comp; 1404 } 1405 1406 /** 1407 * Returns the complement base of it's argument - c for g, a for t etc. 1408 * The argument may be upper or lower case, but the result is always lower 1409 * case. This also works for IUB base codes: the complement of 'y' is 'r' 1410 * because 'y' is 'c' or 't' and 'r' is 'a' or 'g', the complement of 'n' 1411 * or 'x' (any base) is 'n'. 1412 **/ complement(final char base)1413 public final static char complement (final char base) { 1414 1415 switch (base) { 1416 case 'a': case 'A': return 't'; 1417 case 't': case 'T': case 'u': case 'U': return 'a'; 1418 case 'g': case 'G': return 'c'; 1419 case 'c': case 'C': return 'g'; 1420 case 'r': case 'R': return 'y'; 1421 case 'y': case 'Y': return 'r'; 1422 case 'k': case 'K': return 'm'; 1423 case 'm': case 'M': return 'k'; 1424 case 's': case 'S': return 's'; 1425 case 'w': case 'W': return 'w'; 1426 case 'b': case 'B': return 'v'; 1427 case 'd': case 'D': return 'h'; 1428 case 'h': case 'H': return 'd'; 1429 case 'v': case 'V': return 'b'; 1430 case 'n': case 'N': return 'n'; 1431 case 'x': case 'X': return 'x'; 1432 default: 1433 return '@'; 1434 // throw new Error ("in Bases.complement - tried to complement a letter " + 1435 // "that isn't a base"); 1436 } 1437 } 1438 1439 /** 1440 * Return the Sequence object that was passed to the constructor. 1441 **/ getSequence()1442 public Sequence getSequence () 1443 { 1444 return embl_sequence; 1445 } 1446 1447 1448 /** 1449 * Check a three character substring and return true if and only if the 1450 * three bases translate to a stop codon. If the direction is REVERSE 1451 * then the three bases to check are at start_index, start_index - 1 and 1452 * start_index - 2. In that case true is returned if and only the 1453 * complement of those three bases is a stop codon. 1454 * Codons that contain an X are considered to be stop codons. 1455 **/ isCodon(char first_letter, char second_letter, char third_letter, final StringVector query_codons)1456 private static boolean isCodon(char first_letter, char second_letter, char third_letter, 1457 final StringVector query_codons) 1458 { 1459 char[] tran = {first_letter, second_letter, third_letter }; 1460 1461 if(query_codons.contains( new String(tran) )) 1462 return true; 1463 1464 return false; 1465 } 1466 1467 /** 1468 * Check a three character substring and return true if and only if the 1469 * three bases translate to a stop codon. If the direction is REVERSE 1470 * then the three bases to check are at start_index, start_index - 1 and 1471 * start_index - 2. In that case true is returned if and only the 1472 * complement of those three bases is a stop codon. 1473 * Codons that contain an X are considered to be stop codons. 1474 **/ isStopCodon(char first_letter, char second_letter, char third_letter)1475 private static boolean isStopCodon(char first_letter, char second_letter, char third_letter) 1476 { 1477 // codons that contain an X are considered to be stop codons. 1478 if(first_letter == 'x' || second_letter == 'x' || third_letter == 'x') 1479 return true; 1480 1481 final char translation = AminoAcidSequence.getCodonTranslation(first_letter, 1482 second_letter, 1483 third_letter); 1484 1485 if(translation == '+' || translation == '*' || translation == '#') 1486 return true; 1487 else 1488 return false; 1489 } 1490 1491 1492 /** 1493 * Check a three character substring and return true if and only if the 1494 * three bases are legal (see isLegalBase ()). 1495 **/ 1496 /*private static boolean isLegalCodon (final String sequence_string, 1497 final int start_index, 1498 final int direction) { 1499 if (direction == FORWARD) { 1500 if (isLegalBase (sequence_string.charAt (start_index)) && 1501 isLegalBase (sequence_string.charAt (start_index + 1)) && 1502 isLegalBase (sequence_string.charAt (start_index + 2))) { 1503 return true; 1504 } 1505 } else { 1506 if (isLegalBase (sequence_string.charAt (start_index)) && 1507 isLegalBase (sequence_string.charAt (start_index - 1)) && 1508 isLegalBase (sequence_string.charAt (start_index - 2))) { 1509 return true; 1510 } 1511 } 1512 1513 // this isn't a stop codon 1514 return false; 1515 1516 }*/ 1517 1518 /** 1519 * Return true if and only if the given base character is one of 'a', 't', 1520 * 'c', 'g' or 'u'. 1521 **/ isLegalBase(final char base_char)1522 public final static boolean isLegalBase (final char base_char) { 1523 switch (base_char) { 1524 case 'a': case 'A': return true; 1525 case 't': case 'T': return true; 1526 case 'u': case 'U': return true; 1527 case 'g': case 'G': return true; 1528 case 'c': case 'C': return true; 1529 default: 1530 return false; 1531 } 1532 } 1533 1534 /** 1535 * The underlying sequence object that holds the data for this object. 1536 * This is the same object that was passed to the constructor. 1537 **/ 1538 private Sequence embl_sequence; 1539 1540 /** 1541 * The object representing the forward sequence of bases. 1542 **/ 1543 private Strand forward_strand; 1544 1545 /** 1546 * The object representing the reverse (reverse complemented) 1547 * sequence of bases. 1548 **/ 1549 private Strand reverse_strand; 1550 } 1551