1 /* 2 * Jalview - A Sequence Alignment Editor and Viewer (2.11.1.4) 3 * Copyright (C) 2021 The Jalview Authors 4 * 5 * This file is part of Jalview. 6 * 7 * Jalview is free software: you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation, either version 3 10 * of the License, or (at your option) any later version. 11 * 12 * Jalview is distributed in the hope that it will be useful, but 13 * WITHOUT ANY WARRANTY; without even the implied warranty 14 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR 15 * PURPOSE. See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Jalview. If not, see <http://www.gnu.org/licenses/>. 19 * The Jalview Authors are detailed in the 'AUTHORS' file. 20 */ 21 package jalview.analysis; 22 23 import jalview.api.AlignViewportI; 24 import jalview.datamodel.AlignedCodon; 25 import jalview.datamodel.AlignedCodonFrame; 26 import jalview.datamodel.Alignment; 27 import jalview.datamodel.AlignmentAnnotation; 28 import jalview.datamodel.AlignmentI; 29 import jalview.datamodel.Annotation; 30 import jalview.datamodel.DBRefEntry; 31 import jalview.datamodel.DBRefSource; 32 import jalview.datamodel.FeatureProperties; 33 import jalview.datamodel.GraphLine; 34 import jalview.datamodel.Mapping; 35 import jalview.datamodel.Sequence; 36 import jalview.datamodel.SequenceFeature; 37 import jalview.datamodel.SequenceI; 38 import jalview.schemes.ResidueProperties; 39 import jalview.util.Comparison; 40 import jalview.util.DBRefUtils; 41 import jalview.util.MapList; 42 import jalview.util.ShiftList; 43 44 import java.util.ArrayList; 45 import java.util.Arrays; 46 import java.util.Comparator; 47 import java.util.Iterator; 48 import java.util.List; 49 50 public class Dna 51 { 52 private static final String STOP_ASTERIX = "*"; 53 54 private static final Comparator<AlignedCodon> comparator = new CodonComparator(); 55 56 /* 57 * 'final' variables describe the inputs to the translation, which should not 58 * be modified. 59 */ 60 private final List<SequenceI> selection; 61 62 private final String[] seqstring; 63 64 private final Iterator<int[]> contigs; 65 66 private final char gapChar; 67 68 private final AlignmentAnnotation[] annotations; 69 70 private final int dnaWidth; 71 72 private final AlignmentI dataset; 73 74 private ShiftList vismapping; 75 76 private int[] startcontigs; 77 78 /* 79 * Working variables for the translation. 80 * 81 * The width of the translation-in-progress protein alignment. 82 */ 83 private int aaWidth = 0; 84 85 /* 86 * This array will be built up so that position i holds the codon positions 87 * e.g. [7, 9, 10] that match column i (base 0) in the aligned translation. 88 * Note this implies a contract that if two codons do not align exactly, their 89 * translated products must occupy different column positions. 90 */ 91 private AlignedCodon[] alignedCodons; 92 93 /** 94 * Constructor given a viewport and the visible contigs. 95 * 96 * @param viewport 97 * @param visibleContigs 98 */ Dna(AlignViewportI viewport, Iterator<int[]> visibleContigs)99 public Dna(AlignViewportI viewport, Iterator<int[]> visibleContigs) 100 { 101 this.selection = Arrays.asList(viewport.getSequenceSelection()); 102 this.seqstring = viewport.getViewAsString(true); 103 this.contigs = visibleContigs; 104 this.gapChar = viewport.getGapCharacter(); 105 this.annotations = viewport.getAlignment().getAlignmentAnnotation(); 106 this.dnaWidth = viewport.getAlignment().getWidth(); 107 this.dataset = viewport.getAlignment().getDataset(); 108 initContigs(); 109 } 110 111 /** 112 * Initialise contigs used as starting point for translateCodingRegion 113 */ initContigs()114 private void initContigs() 115 { 116 vismapping = new ShiftList(); // map from viscontigs to seqstring 117 // intervals 118 119 int npos = 0; 120 int[] lastregion = null; 121 ArrayList<Integer> tempcontigs = new ArrayList<>(); 122 while (contigs.hasNext()) 123 { 124 int[] region = contigs.next(); 125 if (lastregion == null) 126 { 127 vismapping.addShift(npos, region[0]); 128 } 129 else 130 { 131 // hidden region 132 vismapping.addShift(npos, region[0] - lastregion[1] + 1); 133 } 134 lastregion = region; 135 tempcontigs.add(region[0]); 136 tempcontigs.add(region[1]); 137 } 138 139 startcontigs = new int[tempcontigs.size()]; 140 int i = 0; 141 for (Integer val : tempcontigs) 142 { 143 startcontigs[i] = val; 144 i++; 145 } 146 tempcontigs = null; 147 } 148 149 /** 150 * Test whether codon positions cdp1 should align before, with, or after cdp2. 151 * Returns zero if all positions match (or either argument is null). Returns 152 * -1 if any position in the first codon precedes the corresponding position 153 * in the second codon. Else returns +1 (some position in the second codon 154 * precedes the corresponding position in the first). 155 * 156 * Note this is not necessarily symmetric, for example: 157 * <ul> 158 * <li>compareCodonPos([2,5,6], [3,4,5]) returns -1</li> 159 * <li>compareCodonPos([3,4,5], [2,5,6]) also returns -1</li> 160 * </ul> 161 * 162 * @param ac1 163 * @param ac2 164 * @return 165 */ compareCodonPos(AlignedCodon ac1, AlignedCodon ac2)166 public static final int compareCodonPos(AlignedCodon ac1, AlignedCodon ac2) 167 { 168 return comparator.compare(ac1, ac2); 169 // return jalview_2_8_2compare(ac1, ac2); 170 } 171 172 /** 173 * Codon comparison up to Jalview 2.8.2. This rule is sequence order dependent 174 * - see http://issues.jalview.org/browse/JAL-1635 175 * 176 * @param ac1 177 * @param ac2 178 * @return 179 */ jalview_2_8_2compare(AlignedCodon ac1, AlignedCodon ac2)180 private static int jalview_2_8_2compare(AlignedCodon ac1, 181 AlignedCodon ac2) 182 { 183 if (ac1 == null || ac2 == null || (ac1.equals(ac2))) 184 { 185 return 0; 186 } 187 if (ac1.pos1 < ac2.pos1 || ac1.pos2 < ac2.pos2 || ac1.pos3 < ac2.pos3) 188 { 189 // one base in cdp1 precedes the corresponding base in the other codon 190 return -1; 191 } 192 // one base in cdp1 appears after the corresponding base in the other codon. 193 return 1; 194 } 195 196 /** 197 * Translates cDNA using the specified code table 198 * 199 * @return 200 */ translateCdna(GeneticCodeI codeTable)201 public AlignmentI translateCdna(GeneticCodeI codeTable) 202 { 203 AlignedCodonFrame acf = new AlignedCodonFrame(); 204 205 alignedCodons = new AlignedCodon[dnaWidth]; 206 207 int s; 208 int sSize = selection.size(); 209 List<SequenceI> pepseqs = new ArrayList<>(); 210 for (s = 0; s < sSize; s++) 211 { 212 SequenceI newseq = translateCodingRegion(selection.get(s), 213 seqstring[s], acf, pepseqs, codeTable); 214 215 if (newseq != null) 216 { 217 pepseqs.add(newseq); 218 SequenceI ds = newseq; 219 if (dataset != null) 220 { 221 while (ds.getDatasetSequence() != null) 222 { 223 ds = ds.getDatasetSequence(); 224 } 225 dataset.addSequence(ds); 226 } 227 } 228 } 229 230 SequenceI[] newseqs = pepseqs.toArray(new SequenceI[pepseqs.size()]); 231 AlignmentI al = new Alignment(newseqs); 232 // ensure we look aligned. 233 al.padGaps(); 234 // link the protein translation to the DNA dataset 235 al.setDataset(dataset); 236 translateAlignedAnnotations(al, acf); 237 al.addCodonFrame(acf); 238 return al; 239 } 240 241 /** 242 * fake the collection of DbRefs with associated exon mappings to identify if 243 * a translation would generate distinct product in the currently selected 244 * region. 245 * 246 * @param selection 247 * @param viscontigs 248 * @return 249 */ canTranslate(SequenceI[] selection, int viscontigs[])250 public static boolean canTranslate(SequenceI[] selection, 251 int viscontigs[]) 252 { 253 for (int gd = 0; gd < selection.length; gd++) 254 { 255 SequenceI dna = selection[gd]; 256 DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(), 257 jalview.datamodel.DBRefSource.DNACODINGDBS); 258 if (dnarefs != null) 259 { 260 // intersect with pep 261 List<DBRefEntry> mappedrefs = new ArrayList<>(); 262 DBRefEntry[] refs = dna.getDBRefs(); 263 for (int d = 0; d < refs.length; d++) 264 { 265 if (refs[d].getMap() != null && refs[d].getMap().getMap() != null 266 && refs[d].getMap().getMap().getFromRatio() == 3 267 && refs[d].getMap().getMap().getToRatio() == 1) 268 { 269 mappedrefs.add(refs[d]); // add translated protein maps 270 } 271 } 272 dnarefs = mappedrefs.toArray(new DBRefEntry[mappedrefs.size()]); 273 for (int d = 0; d < dnarefs.length; d++) 274 { 275 Mapping mp = dnarefs[d].getMap(); 276 if (mp != null) 277 { 278 for (int vc = 0; vc < viscontigs.length; vc += 2) 279 { 280 int[] mpr = mp.locateMappedRange(viscontigs[vc], 281 viscontigs[vc + 1]); 282 if (mpr != null) 283 { 284 return true; 285 } 286 } 287 } 288 } 289 } 290 } 291 return false; 292 } 293 294 /** 295 * Translate nucleotide alignment annotations onto translated amino acid 296 * alignment using codon mapping codons 297 * 298 * @param al 299 * the translated protein alignment 300 */ translateAlignedAnnotations(AlignmentI al, AlignedCodonFrame acf)301 protected void translateAlignedAnnotations(AlignmentI al, 302 AlignedCodonFrame acf) 303 { 304 // Can only do this for columns with consecutive codons, or where 305 // annotation is sequence associated. 306 307 if (annotations != null) 308 { 309 for (AlignmentAnnotation annotation : annotations) 310 { 311 /* 312 * Skip hidden or autogenerated annotation. Also (for now), RNA 313 * secondary structure annotation. If we want to show this against 314 * protein we need a smarter way to 'translate' without generating 315 * invalid (unbalanced) structure annotation. 316 */ 317 if (annotation.autoCalculated || !annotation.visible 318 || annotation.isRNA()) 319 { 320 continue; 321 } 322 323 int aSize = aaWidth; 324 Annotation[] anots = (annotation.annotations == null) ? null 325 : new Annotation[aSize]; 326 if (anots != null) 327 { 328 for (int a = 0; a < aSize; a++) 329 { 330 // process through codon map. 331 if (a < alignedCodons.length && alignedCodons[a] != null 332 && alignedCodons[a].pos1 == (alignedCodons[a].pos3 - 2)) 333 { 334 anots[a] = getCodonAnnotation(alignedCodons[a], 335 annotation.annotations); 336 } 337 } 338 } 339 340 AlignmentAnnotation aa = new AlignmentAnnotation(annotation.label, 341 annotation.description, anots); 342 aa.graph = annotation.graph; 343 aa.graphGroup = annotation.graphGroup; 344 aa.graphHeight = annotation.graphHeight; 345 if (annotation.getThreshold() != null) 346 { 347 aa.setThreshold(new GraphLine(annotation.getThreshold())); 348 } 349 if (annotation.hasScore) 350 { 351 aa.setScore(annotation.getScore()); 352 } 353 354 final SequenceI seqRef = annotation.sequenceRef; 355 if (seqRef != null) 356 { 357 SequenceI aaSeq = acf.getAaForDnaSeq(seqRef); 358 if (aaSeq != null) 359 { 360 // aa.compactAnnotationArray(); // throw away alignment annotation 361 // positioning 362 aa.setSequenceRef(aaSeq); 363 // rebuild mapping 364 aa.createSequenceMapping(aaSeq, aaSeq.getStart(), true); 365 aa.adjustForAlignment(); 366 aaSeq.addAlignmentAnnotation(aa); 367 } 368 } 369 al.addAnnotation(aa); 370 } 371 } 372 } 373 getCodonAnnotation(AlignedCodon is, Annotation[] annotations)374 private static Annotation getCodonAnnotation(AlignedCodon is, 375 Annotation[] annotations) 376 { 377 // Have a look at all the codon positions for annotation and put the first 378 // one found into the translated annotation pos. 379 int contrib = 0; 380 Annotation annot = null; 381 for (int p = 1; p <= 3; p++) 382 { 383 int dnaCol = is.getBaseColumn(p); 384 if (annotations[dnaCol] != null) 385 { 386 if (annot == null) 387 { 388 annot = new Annotation(annotations[dnaCol]); 389 contrib = 1; 390 } 391 else 392 { 393 // merge with last 394 Annotation cpy = new Annotation(annotations[dnaCol]); 395 if (annot.colour == null) 396 { 397 annot.colour = cpy.colour; 398 } 399 if (annot.description == null || annot.description.length() == 0) 400 { 401 annot.description = cpy.description; 402 } 403 if (annot.displayCharacter == null) 404 { 405 annot.displayCharacter = cpy.displayCharacter; 406 } 407 if (annot.secondaryStructure == 0) 408 { 409 annot.secondaryStructure = cpy.secondaryStructure; 410 } 411 annot.value += cpy.value; 412 contrib++; 413 } 414 } 415 } 416 if (contrib > 1) 417 { 418 annot.value /= contrib; 419 } 420 return annot; 421 } 422 423 /** 424 * Translate a na sequence 425 * 426 * @param selection 427 * sequence displayed under viscontigs visible columns 428 * @param seqstring 429 * ORF read in some global alignment reference frame 430 * @param acf 431 * Definition of global ORF alignment reference frame 432 * @param proteinSeqs 433 * @param codeTable 434 * @return sequence ready to be added to alignment. 435 */ translateCodingRegion(SequenceI selection, String seqstring, AlignedCodonFrame acf, List<SequenceI> proteinSeqs, GeneticCodeI codeTable)436 protected SequenceI translateCodingRegion(SequenceI selection, 437 String seqstring, AlignedCodonFrame acf, 438 List<SequenceI> proteinSeqs, GeneticCodeI codeTable) 439 { 440 List<int[]> skip = new ArrayList<>(); 441 int[] skipint = null; 442 443 int npos = 0; 444 int vc = 0; 445 446 int[] scontigs = new int[startcontigs.length]; 447 System.arraycopy(startcontigs, 0, scontigs, 0, startcontigs.length); 448 449 // allocate a roughly sized buffer for the protein sequence 450 StringBuilder protein = new StringBuilder(seqstring.length() / 2); 451 String seq = seqstring.replace('U', 'T').replace('u', 'T'); 452 char codon[] = new char[3]; 453 int cdp[] = new int[3]; 454 int rf = 0; 455 int lastnpos = 0; 456 int nend; 457 int aspos = 0; 458 int resSize = 0; 459 for (npos = 0, nend = seq.length(); npos < nend; npos++) 460 { 461 if (!Comparison.isGap(seq.charAt(npos))) 462 { 463 cdp[rf] = npos; // store position 464 codon[rf++] = seq.charAt(npos); // store base 465 } 466 if (rf == 3) 467 { 468 /* 469 * Filled up a reading frame... 470 */ 471 AlignedCodon alignedCodon = new AlignedCodon(cdp[0], cdp[1], cdp[2]); 472 String aa = codeTable.translate(new String(codon)); 473 rf = 0; 474 final String gapString = String.valueOf(gapChar); 475 if (aa == null) 476 { 477 aa = gapString; 478 if (skipint == null) 479 { 480 skipint = new int[] { alignedCodon.pos1, 481 alignedCodon.pos3 /* 482 * cdp[0], 483 * cdp[2] 484 */ }; 485 } 486 skipint[1] = alignedCodon.pos3; // cdp[2]; 487 } 488 else 489 { 490 if (skipint != null) 491 { 492 // edit scontigs 493 skipint[0] = vismapping.shift(skipint[0]); 494 skipint[1] = vismapping.shift(skipint[1]); 495 for (vc = 0; vc < scontigs.length;) 496 { 497 if (scontigs[vc + 1] < skipint[0]) 498 { 499 // before skipint starts 500 vc += 2; 501 continue; 502 } 503 if (scontigs[vc] > skipint[1]) 504 { 505 // finished editing so 506 break; 507 } 508 // Edit the contig list to include the skipped region which did 509 // not translate 510 int[] t; 511 // from : s1 e1 s2 e2 s3 e3 512 // to s: s1 e1 s2 k0 k1 e2 s3 e3 513 // list increases by one unless one boundary (s2==k0 or e2==k1) 514 // matches, and decreases by one if skipint intersects whole 515 // visible contig 516 if (scontigs[vc] <= skipint[0]) 517 { 518 if (skipint[0] == scontigs[vc]) 519 { 520 // skipint at start of contig 521 // shift the start of this contig 522 if (scontigs[vc + 1] > skipint[1]) 523 { 524 scontigs[vc] = skipint[1]; 525 vc += 2; 526 } 527 else 528 { 529 if (scontigs[vc + 1] == skipint[1]) 530 { 531 // remove the contig 532 t = new int[scontigs.length - 2]; 533 if (vc > 0) 534 { 535 System.arraycopy(scontigs, 0, t, 0, vc - 1); 536 } 537 if (vc + 2 < t.length) 538 { 539 System.arraycopy(scontigs, vc + 2, t, vc, 540 t.length - vc + 2); 541 } 542 scontigs = t; 543 } 544 else 545 { 546 // truncate contig to before the skipint region 547 scontigs[vc + 1] = skipint[0] - 1; 548 vc += 2; 549 } 550 } 551 } 552 else 553 { 554 // scontig starts before start of skipint 555 if (scontigs[vc + 1] < skipint[1]) 556 { 557 // skipint truncates end of scontig 558 scontigs[vc + 1] = skipint[0] - 1; 559 vc += 2; 560 } 561 else 562 { 563 // divide region to new contigs 564 t = new int[scontigs.length + 2]; 565 System.arraycopy(scontigs, 0, t, 0, vc + 1); 566 t[vc + 1] = skipint[0]; 567 t[vc + 2] = skipint[1]; 568 System.arraycopy(scontigs, vc + 1, t, vc + 3, 569 scontigs.length - (vc + 1)); 570 scontigs = t; 571 vc += 4; 572 } 573 } 574 } 575 } 576 skip.add(skipint); 577 skipint = null; 578 } 579 if (aa.equals(ResidueProperties.STOP)) 580 { 581 aa = STOP_ASTERIX; 582 } 583 resSize++; 584 } 585 boolean findpos = true; 586 while (findpos) 587 { 588 /* 589 * Compare this codon's base positions with those currently aligned to 590 * this column in the translation. 591 */ 592 final int compareCodonPos = compareCodonPos(alignedCodon, 593 alignedCodons[aspos]); 594 switch (compareCodonPos) 595 { 596 case -1: 597 598 /* 599 * This codon should precede the mapped positions - need to insert a 600 * gap in all prior sequences. 601 */ 602 insertAAGap(aspos, proteinSeqs); 603 findpos = false; 604 break; 605 606 case +1: 607 608 /* 609 * This codon belongs after the aligned codons at aspos. Prefix it 610 * with a gap and try the next position. 611 */ 612 aa = gapString + aa; 613 aspos++; 614 break; 615 616 case 0: 617 618 /* 619 * Exact match - codon 'belongs' at this translated position. 620 */ 621 findpos = false; 622 } 623 } 624 protein.append(aa); 625 lastnpos = npos; 626 if (alignedCodons[aspos] == null) 627 { 628 // mark this column as aligning to this aligned reading frame 629 alignedCodons[aspos] = alignedCodon; 630 } 631 else if (!alignedCodons[aspos].equals(alignedCodon)) 632 { 633 throw new IllegalStateException( 634 "Tried to coalign " + alignedCodons[aspos].toString() 635 + " with " + alignedCodon.toString()); 636 } 637 if (aspos >= aaWidth) 638 { 639 // update maximum alignment width 640 aaWidth = aspos; 641 } 642 // ready for next translated reading frame alignment position (if any) 643 aspos++; 644 } 645 } 646 if (resSize > 0) 647 { 648 SequenceI newseq = new Sequence(selection.getName(), 649 protein.toString()); 650 if (rf != 0) 651 { 652 final String errMsg = "trimming contigs for incomplete terminal codon."; 653 System.err.println(errMsg); 654 // map and trim contigs to ORF region 655 vc = scontigs.length - 1; 656 lastnpos = vismapping.shift(lastnpos); // place npos in context of 657 // whole dna alignment (rather 658 // than visible contigs) 659 // incomplete ORF could be broken over one or two visible contig 660 // intervals. 661 while (vc >= 0 && scontigs[vc] > lastnpos) 662 { 663 if (vc > 0 && scontigs[vc - 1] > lastnpos) 664 { 665 vc -= 2; 666 } 667 else 668 { 669 // correct last interval in list. 670 scontigs[vc] = lastnpos; 671 } 672 } 673 674 if (vc > 0 && (vc + 1) < scontigs.length) 675 { 676 // truncate map list to just vc elements 677 int t[] = new int[vc + 1]; 678 System.arraycopy(scontigs, 0, t, 0, vc + 1); 679 scontigs = t; 680 } 681 if (vc <= 0) 682 { 683 scontigs = null; 684 } 685 } 686 if (scontigs != null) 687 { 688 npos = 0; 689 // map scontigs to actual sequence positions on selection 690 for (vc = 0; vc < scontigs.length; vc += 2) 691 { 692 scontigs[vc] = selection.findPosition(scontigs[vc]); // not from 1! 693 scontigs[vc + 1] = selection.findPosition(scontigs[vc + 1]); // exclusive 694 if (scontigs[vc + 1] == selection.getEnd()) 695 { 696 break; 697 } 698 } 699 // trim trailing empty intervals. 700 if ((vc + 2) < scontigs.length) 701 { 702 int t[] = new int[vc + 2]; 703 System.arraycopy(scontigs, 0, t, 0, vc + 2); 704 scontigs = t; 705 } 706 /* 707 * delete intervals in scontigs which are not translated. 1. map skip 708 * into sequence position intervals 2. truncate existing ranges and add 709 * new ranges to exclude untranslated regions. if (skip.size()>0) { 710 * Vector narange = new Vector(); for (vc=0; vc<scontigs.length; vc++) { 711 * narange.addElement(new int[] {scontigs[vc]}); } int sint=0,iv[]; vc = 712 * 0; while (sint<skip.size()) { skipint = (int[]) skip.elementAt(sint); 713 * do { iv = (int[]) narange.elementAt(vc); if (iv[0]>=skipint[0] && 714 * iv[0]<=skipint[1]) { if (iv[0]==skipint[0]) { // delete beginning of 715 * range } else { // truncate range and create new one if necessary iv = 716 * (int[]) narange.elementAt(vc+1); if (iv[0]<=skipint[1]) { // truncate 717 * range iv[0] = skipint[1]; } else { } } } else if (iv[0]<skipint[0]) { 718 * iv = (int[]) narange.elementAt(vc+1); } } while (iv[0]) } } 719 */ 720 MapList map = new MapList(scontigs, new int[] { 1, resSize }, 3, 1); 721 722 transferCodedFeatures(selection, newseq, map); 723 724 /* 725 * Construct a dataset sequence for our new peptide. 726 */ 727 SequenceI rseq = newseq.deriveSequence(); 728 729 /* 730 * Store a mapping (between the dataset sequences for the two 731 * sequences). 732 */ 733 // SIDE-EFFECT: acf stores the aligned sequence reseq; to remove! 734 acf.addMap(selection, rseq, map); 735 return rseq; 736 } 737 } 738 // register the mapping somehow 739 // 740 return null; 741 } 742 743 /** 744 * Insert a gap into the aligned proteins and the codon mapping array. 745 * 746 * @param pos 747 * @param proteinSeqs 748 * @return 749 */ insertAAGap(int pos, List<SequenceI> proteinSeqs)750 protected void insertAAGap(int pos, List<SequenceI> proteinSeqs) 751 { 752 aaWidth++; 753 for (SequenceI seq : proteinSeqs) 754 { 755 seq.insertCharAt(pos, gapChar); 756 } 757 758 checkCodonFrameWidth(); 759 if (pos < aaWidth) 760 { 761 aaWidth++; 762 763 /* 764 * Shift from [pos] to the end one to the right, and null out [pos] 765 */ 766 System.arraycopy(alignedCodons, pos, alignedCodons, pos + 1, 767 alignedCodons.length - pos - 1); 768 alignedCodons[pos] = null; 769 } 770 } 771 772 /** 773 * Check the codons array can accommodate a single insertion, if not resize 774 * it. 775 */ checkCodonFrameWidth()776 protected void checkCodonFrameWidth() 777 { 778 if (alignedCodons[alignedCodons.length - 1] != null) 779 { 780 /* 781 * arraycopy insertion would bump a filled slot off the end, so expand. 782 */ 783 AlignedCodon[] c = new AlignedCodon[alignedCodons.length + 10]; 784 System.arraycopy(alignedCodons, 0, c, 0, alignedCodons.length); 785 alignedCodons = c; 786 } 787 } 788 789 /** 790 * Given a peptide newly translated from a dna sequence, copy over and set any 791 * features on the peptide from the DNA. 792 * 793 * @param dna 794 * @param pep 795 * @param map 796 */ transferCodedFeatures(SequenceI dna, SequenceI pep, MapList map)797 private static void transferCodedFeatures(SequenceI dna, SequenceI pep, 798 MapList map) 799 { 800 DBRefEntry[] dnarefs = DBRefUtils.selectRefs(dna.getDBRefs(), 801 DBRefSource.DNACODINGDBS); 802 if (dnarefs != null) 803 { 804 // intersect with pep 805 for (int d = 0; d < dnarefs.length; d++) 806 { 807 Mapping mp = dnarefs[d].getMap(); 808 if (mp != null) 809 { 810 } 811 } 812 } 813 for (SequenceFeature sf : dna.getFeatures().getAllFeatures()) 814 { 815 if (FeatureProperties.isCodingFeature(null, sf.getType())) 816 { 817 // if (map.intersectsFrom(sf[f].begin, sf[f].end)) 818 { 819 820 } 821 } 822 } 823 } 824 825 /** 826 * Returns an alignment consisting of the reversed (and optionally 827 * complemented) sequences set in this object's constructor 828 * 829 * @param complement 830 * @return 831 */ reverseCdna(boolean complement)832 public AlignmentI reverseCdna(boolean complement) 833 { 834 int sSize = selection.size(); 835 List<SequenceI> reversed = new ArrayList<>(); 836 for (int s = 0; s < sSize; s++) 837 { 838 SequenceI newseq = reverseSequence(selection.get(s).getName(), 839 seqstring[s], complement); 840 841 if (newseq != null) 842 { 843 reversed.add(newseq); 844 } 845 } 846 847 SequenceI[] newseqs = reversed.toArray(new SequenceI[reversed.size()]); 848 AlignmentI al = new Alignment(newseqs); 849 ((Alignment) al).createDatasetAlignment(); 850 return al; 851 } 852 853 /** 854 * Returns a reversed, and optionally complemented, sequence. The new 855 * sequence's name is the original name with "|rev" or "|revcomp" appended. 856 * aAcCgGtT and DNA ambiguity codes are complemented, any other characters are 857 * left unchanged. 858 * 859 * @param seq 860 * @param complement 861 * @return 862 */ reverseSequence(String seqName, String sequence, boolean complement)863 public static SequenceI reverseSequence(String seqName, String sequence, 864 boolean complement) 865 { 866 String newName = seqName + "|rev" + (complement ? "comp" : ""); 867 char[] originalSequence = sequence.toCharArray(); 868 int length = originalSequence.length; 869 char[] reversedSequence = new char[length]; 870 int bases = 0; 871 for (int i = 0; i < length; i++) 872 { 873 char c = complement ? getComplement(originalSequence[i]) 874 : originalSequence[i]; 875 reversedSequence[length - i - 1] = c; 876 if (!Comparison.isGap(c)) 877 { 878 bases++; 879 } 880 } 881 SequenceI reversed = new Sequence(newName, reversedSequence, 1, bases); 882 return reversed; 883 } 884 885 /** 886 * Answers the reverse complement of the input string 887 * 888 * @see #getComplement(char) 889 * @param s 890 * @return 891 */ reverseComplement(String s)892 public static String reverseComplement(String s) 893 { 894 StringBuilder sb = new StringBuilder(s.length()); 895 for (int i = s.length() - 1; i >= 0; i--) 896 { 897 sb.append(Dna.getComplement(s.charAt(i))); 898 } 899 return sb.toString(); 900 } 901 902 /** 903 * Returns dna complement (preserving case) for aAcCgGtTuU. Ambiguity codes 904 * are treated as on http://reverse-complement.com/. Anything else is left 905 * unchanged. 906 * 907 * @param c 908 * @return 909 */ getComplement(char c)910 public static char getComplement(char c) 911 { 912 char result = c; 913 switch (c) 914 { 915 case '-': 916 case '.': 917 case ' ': 918 break; 919 case 'a': 920 result = 't'; 921 break; 922 case 'A': 923 result = 'T'; 924 break; 925 case 'c': 926 result = 'g'; 927 break; 928 case 'C': 929 result = 'G'; 930 break; 931 case 'g': 932 result = 'c'; 933 break; 934 case 'G': 935 result = 'C'; 936 break; 937 case 't': 938 result = 'a'; 939 break; 940 case 'T': 941 result = 'A'; 942 break; 943 case 'u': 944 result = 'a'; 945 break; 946 case 'U': 947 result = 'A'; 948 break; 949 case 'r': 950 result = 'y'; 951 break; 952 case 'R': 953 result = 'Y'; 954 break; 955 case 'y': 956 result = 'r'; 957 break; 958 case 'Y': 959 result = 'R'; 960 break; 961 case 'k': 962 result = 'm'; 963 break; 964 case 'K': 965 result = 'M'; 966 break; 967 case 'm': 968 result = 'k'; 969 break; 970 case 'M': 971 result = 'K'; 972 break; 973 case 'b': 974 result = 'v'; 975 break; 976 case 'B': 977 result = 'V'; 978 break; 979 case 'v': 980 result = 'b'; 981 break; 982 case 'V': 983 result = 'B'; 984 break; 985 case 'd': 986 result = 'h'; 987 break; 988 case 'D': 989 result = 'H'; 990 break; 991 case 'h': 992 result = 'd'; 993 break; 994 case 'H': 995 result = 'D'; 996 break; 997 } 998 999 return result; 1000 } 1001 } 1002