1 /* ChadoCanonicalGene.java 2 * 3 * created: 2006 4 * 5 * This file is part of Artemis 6 * 7 * Copyright (C) 2006 Genome Research Limited 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 2 12 * of the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17 * GNU General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 22 * 23 * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/ChadoCanonicalGene.java,v 1.34 2009-08-11 08:59:46 tjc Exp $ 24 */ 25 26 package uk.ac.sanger.artemis.io; 27 28 import uk.ac.sanger.artemis.components.genebuilder.GeneUtils; 29 import uk.ac.sanger.artemis.util.DatabaseDocument; 30 import uk.ac.sanger.artemis.util.ReadOnlyException; 31 import uk.ac.sanger.artemis.util.StringVector; 32 33 import java.util.Collections; 34 import java.util.Comparator; 35 import java.util.Iterator; 36 import java.util.Vector; 37 import java.util.Hashtable; 38 import java.util.Enumeration; 39 import java.util.List; 40 import java.util.Set; 41 import java.util.HashSet; 42 import java.util.regex.Matcher; 43 import java.util.regex.Pattern; 44 45 import org.gmod.schema.sequence.FeatureLoc; 46 47 /** 48 * Used by GFFStreamFeature to represent the chado canonical gene. 49 * Contains gene, transcript, exons and proteins. 50 **/ 51 public class ChadoCanonicalGene 52 { 53 private Feature gene; 54 55 // part_of gene 56 private List<Feature> transcripts = new Vector<Feature>(); 57 58 // part_of transcripts 59 private Hashtable<String, List<Feature>> splicedFeatures = 60 new Hashtable<String, List<Feature>>(); 61 62 // derives_from transript 63 private Hashtable<String, Feature> proteins = new Hashtable<String, Feature>(); 64 65 // utr features 66 private Hashtable<String, List<Feature>> three_prime_utr = 67 new Hashtable<String, List<Feature>>(); 68 private Hashtable<String, List<Feature>> five_prime_utr = 69 new Hashtable<String, List<Feature>>(); 70 71 // other child features of transcript 72 private Hashtable<String, List<Feature>> other_features = 73 new Hashtable<String, List<Feature>>(); 74 75 // srcfeature 76 private int srcfeature_id; 77 78 // srcfeature length 79 private int seqlen; 80 81 82 /** 83 * Get the gene feaure object. 84 * @return 85 */ getGene()86 public Feature getGene() 87 { 88 return gene; 89 } 90 91 /** 92 * Set the gene feature object. 93 * @param gene 94 */ setGene(Feature gene)95 public void setGene(Feature gene) 96 { 97 this.gene = gene; 98 } 99 getGeneUniqueName()100 public String getGeneUniqueName() 101 { 102 try 103 { 104 return getQualifier(getGene(), "ID"); 105 } 106 catch(InvalidRelationException e) 107 { 108 return null; 109 } 110 } 111 112 /** 113 * Add a transcript to the model 114 * @param transcript 115 */ addTranscript(Feature transcript)116 public void addTranscript(Feature transcript) 117 { 118 transcripts.add(transcript); 119 } 120 121 /** 122 * Delete a transcript and child features. 123 * @param transcript_name 124 */ deleteTranscript(String transcript_name)125 public void deleteTranscript(String transcript_name) 126 { 127 for(int i=0; i<transcripts.size(); i++) 128 { 129 try 130 { 131 Feature transcript = (Feature)transcripts.get(i); 132 133 if( transcript_name.equals(getQualifier(transcript, "ID")) ) 134 { 135 transcripts.remove(transcript); 136 splicedFeatures.remove(transcript_name); 137 three_prime_utr.remove(transcript_name); 138 five_prime_utr.remove(transcript_name); 139 other_features.remove(transcript_name); 140 proteins.remove(transcript_name); 141 } 142 } 143 catch(InvalidRelationException e) 144 { 145 e.printStackTrace(); 146 } 147 } 148 } 149 150 /** 151 * This should be called if the uniqueName of a gene model 152 * feature is changed. 153 * @param oldName 154 * @param newName 155 * @param children 156 */ updateUniqueName(final String oldName, final String newName, final Set<Feature> children)157 public void updateUniqueName(final String oldName, 158 final String newName, 159 final Set<Feature> children) 160 { 161 updateNames(splicedFeatures,oldName,newName); 162 updateNames(proteins,oldName,newName); 163 updateNames(three_prime_utr,oldName,newName); 164 updateNames(five_prime_utr,oldName,newName); 165 updateNames(other_features,oldName,newName); 166 167 if(children != null) 168 GeneUtils.fixParentQualifier(oldName, newName, children); 169 } 170 171 /** 172 * Utility for changing the key used in a Hashtable 173 * @param hash 174 * @param oldName 175 * @param newName 176 */ updateNames(final Hashtable hash, final String oldName, final String newName)177 private static void updateNames(final Hashtable hash, 178 final String oldName, 179 final String newName) 180 { 181 Object features = hash.get(oldName); 182 if(features != null) 183 { 184 hash.remove(oldName); 185 hash.put(newName, features); 186 } 187 } 188 189 /** 190 * Delete features. 191 * @param embl_feature 192 */ deleteFeature(final Feature embl_feature)193 public void deleteFeature(final Feature embl_feature) 194 { 195 try 196 { 197 final String name = getQualifier(embl_feature, "ID"); 198 Object feature = getSplicedFeatures(name); 199 200 if(feature != null) 201 { 202 String transcript_name = getQualifier((Feature) feature, "Parent"); 203 splicedFeatures.remove(transcript_name); 204 return; 205 } 206 207 final Enumeration<String> enum_protein = proteins.keys(); 208 while(enum_protein.hasMoreElements()) 209 { 210 final String transcriptName = (String)enum_protein.nextElement(); 211 Feature protein = (Feature)proteins.get(transcriptName); 212 if(getQualifier(protein, "ID").equals(name)) 213 { 214 proteins.remove(transcriptName); 215 return; 216 } 217 } 218 219 feature = getFeatureFromHash(name, three_prime_utr); 220 if(feature != null) 221 { 222 String transcript_name = getQualifier((Feature) feature, "Parent"); 223 List<Feature> utr = get3UtrOfTranscript(transcript_name); 224 utr.remove(feature); 225 return; 226 } 227 228 feature = getFeatureFromHash(name, five_prime_utr); 229 if(feature != null) 230 { 231 String transcript_name = getQualifier((Feature) feature, "Parent"); 232 List<Feature> utr = get5UtrOfTranscript(transcript_name); 233 utr.remove(feature); 234 return; 235 } 236 237 feature = getFeatureFromHash(name, other_features); 238 if(feature != null) 239 { 240 String transcript_name = getQualifier((Feature) feature, "Parent"); 241 List<Feature> others = getOtherFeaturesOfTranscript(transcript_name); 242 others.remove(feature); 243 return; 244 } 245 246 deleteTranscript(name); 247 } 248 catch(InvalidRelationException e1) 249 { 250 e1.printStackTrace(); 251 } 252 } 253 254 /** 255 * Get all child members of a feature 256 * @param embl_feature 257 * @return 258 */ getChildren(Feature embl_feature)259 public Set<Feature> getChildren(Feature embl_feature) 260 { 261 Set<Feature> children = new HashSet<Feature>(); 262 try 263 { 264 String name = getQualifier(embl_feature, "ID"); 265 266 String gene_name = getQualifier(getGene(), "ID"); 267 if(name.equals(gene_name)) 268 { 269 List<Feature> transcripts = getTranscripts(); 270 for(int i=0; i<transcripts.size(); i++) 271 { 272 Feature transcript = transcripts.get(i); 273 children.add(transcript); 274 children.addAll( getChildren(transcript) ); 275 } 276 return children; 277 } 278 279 searchForChildren(splicedFeatures, name, children); 280 searchForChildren(three_prime_utr, name, children); 281 searchForChildren(five_prime_utr, name, children); 282 searchForChildren(other_features, name, children); 283 284 // protein 285 Enumeration<Feature> pep_enum = proteins.elements(); 286 while(pep_enum.hasMoreElements()) 287 { 288 Feature child = pep_enum.nextElement(); 289 String parent = getQualifier(child, "Derives_from"); 290 if(parent != null && parent.equals(name)) 291 children.add(child); 292 } 293 return children; 294 } 295 catch(InvalidRelationException e) 296 { 297 e.printStackTrace(); 298 } 299 return null; 300 } 301 302 /** 303 * Search in a <code>Hashtable</code> for child Features with a 304 * matching parent ID. Child features are added to the <code>Set</code> 305 * that is passed into this method. 306 * @param hash Hashtable to search for children in 307 * @param parent_id uniquname to look for 308 * @param children collection to add child features to 309 * @throws InvalidRelationException 310 */ searchForChildren(Hashtable<String, List<Feature>> hash, String parent_id, Set<Feature> children)311 private void searchForChildren(Hashtable<String, List<Feature>> hash, 312 String parent_id, 313 Set<Feature> children) 314 throws InvalidRelationException 315 { 316 Enumeration<List<Feature>> feature_enum = hash.elements(); 317 String parent; 318 319 while(feature_enum.hasMoreElements()) 320 { 321 List<Feature> child_list = feature_enum.nextElement(); 322 323 for(int i=0; i<child_list.size(); i++) 324 { 325 Feature child = child_list.get(i); 326 //if(children.contains(child)) 327 // continue; 328 329 parent = getQualifier(child, "Parent"); 330 if(parent != null && parent.equals(parent_id)) 331 children.add(child); 332 else 333 { 334 parent = getQualifier(child, "Derives_from"); 335 if(parent != null && parent.equals(parent_id)) 336 children.add(child); 337 } 338 } 339 } 340 } 341 342 /** 343 * Add exon feature to the chado gene model. 344 * @param transcript_name 345 * @param exon 346 * @param reset 347 * @throws InvalidRelationException 348 */ addSplicedFeatures(final String transcript_name, final Feature exon, boolean reset)349 public void addSplicedFeatures(final String transcript_name, 350 final Feature exon, boolean reset) 351 { 352 if(reset) 353 splicedFeatures.remove(transcript_name); 354 addSplicedFeatures(transcript_name, exon); 355 } 356 357 /** 358 * Add exon feature to the chado gene model. 359 * @param transcript_name 360 * @param v_spliced 361 * @throws InvalidRelationException 362 */ addSplicedFeatures(final String transcript_name, final Feature spliced)363 public void addSplicedFeatures(final String transcript_name, 364 final Feature spliced) 365 { 366 final List<Feature> v_spliced; 367 if(splicedFeatures.containsKey(transcript_name)) 368 v_spliced = (Vector<Feature>)splicedFeatures.get(transcript_name); 369 else 370 v_spliced = new Vector<Feature>(); 371 372 v_spliced.add(spliced); 373 splicedFeatures.put(transcript_name, v_spliced); 374 } 375 correctSpliceSiteAssignments()376 public void correctSpliceSiteAssignments() 377 { 378 Enumeration<String> enumSplicedFeatures = splicedFeatures.keys(); 379 while(enumSplicedFeatures.hasMoreElements()) 380 { 381 String transcriptId = enumSplicedFeatures.nextElement(); 382 Vector<Feature> v_spliced = (Vector<Feature>)splicedFeatures.get(transcriptId); 383 Set<String> splicedTypes = getSpliceTypes(transcriptId); 384 Iterator<String> it = splicedTypes.iterator(); 385 while(it.hasNext()) 386 { 387 String type = it.next(); 388 if(!type.equals(DatabaseDocument.EXONMODEL) && 389 !type.equals("pseudogenic_exon") && 390 !type.equals("exon")) 391 { 392 List<Feature> splicedFeatures = getSpliceSitesOfTranscript(transcriptId, type); 393 if(splicedFeatures.size() == 1) 394 { 395 Feature f = (Feature)splicedFeatures.get(0); 396 addOtherFeatures(transcriptId, f); 397 v_spliced.remove(f); 398 try 399 { 400 f.removeQualifierByName("feature_relationship_rank"); 401 } 402 catch(ReadOnlyException e){} 403 catch(EntryInformationException e){} 404 } 405 } 406 } 407 splicedFeatures.put(transcriptId, v_spliced); 408 } 409 } 410 411 /** 412 * Add protein feature to the chado gene model. 413 * @param transcript_name 414 * @param protein 415 * @throws InvalidRelationException 416 */ addProtein(final String transcript_name, final Feature protein)417 public void addProtein(final String transcript_name, 418 final Feature protein) 419 { 420 proteins.put(transcript_name, protein); 421 } 422 423 /** 424 * Add 3'UTR to chado gene model. 425 * @param transcript_name 426 * @param utr 427 * @throws InvalidRelationException 428 */ add3PrimeUtr(final String transcript_name, final Feature utr)429 public void add3PrimeUtr(final String transcript_name, 430 final Feature utr) 431 { 432 final List<Feature> utr_list; 433 if(three_prime_utr.containsKey(transcript_name)) 434 utr_list = three_prime_utr.get(transcript_name); 435 else 436 utr_list = new Vector<Feature>(); 437 438 utr_list.add(utr); 439 three_prime_utr.put(transcript_name, utr_list); 440 } 441 442 /** 443 * Add 5'UTR to chado gene model. 444 * @param transcript_name 445 * @param utr 446 * @throws InvalidRelationException 447 */ add5PrimeUtr(final String transcript_name, final Feature utr)448 public void add5PrimeUtr(final String transcript_name, 449 final Feature utr) 450 { 451 final List<Feature> utr_list; 452 if(five_prime_utr.containsKey(transcript_name)) 453 utr_list = (Vector<Feature>)five_prime_utr.get(transcript_name); 454 else 455 utr_list = new Vector<Feature>(); 456 457 utr_list.add(utr); 458 five_prime_utr.put(transcript_name, utr_list); 459 } 460 461 /** 462 * Add other child features of a transcript to the chado 463 * gene model. 464 * @param transcript_name 465 * @param other_feature 466 */ addOtherFeatures(final String transcript_name, final Feature other_feature)467 public void addOtherFeatures(final String transcript_name, 468 final Feature other_feature) 469 { 470 final List<Feature> v_other_features; 471 if(other_features.containsKey(transcript_name)) 472 v_other_features = (Vector<Feature>)other_features.get(transcript_name); 473 else 474 v_other_features = new Vector<Feature>(); 475 v_other_features.add(other_feature); 476 other_features.put(transcript_name, v_other_features); 477 } 478 479 /** 480 * Check if this gene model contains a transcript with an ID equal to 481 * any of the names in the <code>StringVector</code>. If it does find 482 * it returns the transcript feature, otherwise it returns null. 483 * @param names 484 * @return 485 */ containsTranscript(final StringVector names)486 public Feature containsTranscript(final StringVector names) 487 { 488 for(int i=0; i<transcripts.size(); i++) 489 { 490 try 491 { 492 Feature transcript = (Feature)transcripts.get(i); 493 494 if( names.contains(getQualifier(transcript, "ID")) ) 495 return transcript; 496 } 497 catch(InvalidRelationException e) 498 { 499 e.printStackTrace(); 500 } 501 } 502 return null; 503 } 504 505 getSpliceSitesOfTranscript(final String transcript_name, final String type)506 public List<Feature> getSpliceSitesOfTranscript(final String transcript_name, 507 final String type) 508 { 509 if(splicedFeatures.containsKey(transcript_name)) 510 { 511 List<Feature> splicedFeaturesOfTranscript = splicedFeatures.get(transcript_name); 512 List<Feature> results = new Vector<Feature>(); 513 for(int i=0; i<splicedFeaturesOfTranscript.size(); i++) 514 { 515 Feature feature = (Feature)splicedFeaturesOfTranscript.get(i); 516 if(feature.getKey().getKeyString().equals(type)) 517 results.add(feature); 518 } 519 return results; 520 } 521 522 return null; 523 } 524 525 /** 526 * Get a list of the feature keys of the types that are splice sites 527 * @param transcript_name 528 * @return 529 */ getSpliceTypes(final String transcript_name)530 public Set<String> getSpliceTypes(final String transcript_name) 531 { 532 if(splicedFeatures.containsKey(transcript_name)) 533 { 534 List<Feature> splicedFeaturesOfTranscript = splicedFeatures.get(transcript_name); 535 Set<String> splicedTypes = new HashSet<String>(); 536 for(int i=0; i<splicedFeaturesOfTranscript.size(); i++) 537 { 538 Feature feature = (Feature)splicedFeaturesOfTranscript.get(i); 539 splicedTypes.add( feature.getKey().getKeyString() ); 540 } 541 return splicedTypes; 542 } 543 544 return null; 545 } 546 547 /** 548 * Return the exons of a given transcript as a <code>List</code>. 549 * @param transcript_name 550 * @return 551 */ getSplicedFeaturesOfTranscript(final String transcript_name)552 public List<Feature> getSplicedFeaturesOfTranscript(final String transcript_name) 553 { 554 if(splicedFeatures.containsKey(transcript_name)) 555 { 556 return splicedFeatures.get(transcript_name); 557 } 558 559 return null; 560 } 561 562 /** 563 * Return the transcript from the name of a constituent feature 564 * @param constituent feature name 565 * @return transcript 566 */ getTranscriptFeatureFromName(final String name)567 public Feature getTranscriptFeatureFromName(final String name) 568 { 569 String transcriptName = getTranscriptFromName(name); 570 if(transcriptName == null) 571 return null; 572 573 try 574 { 575 for (int i = 0; i < transcripts.size(); i++) 576 { 577 Feature feature = (Feature) transcripts.get(i); 578 if (getQualifier(feature, "ID").equals(transcriptName)) 579 return feature; 580 } 581 } 582 catch (InvalidRelationException ire){} 583 return null; 584 } 585 586 /** 587 * Return the transcript from the name of a constituent feature 588 * @param constituent feature name 589 * @return transcript name 590 */ getTranscriptFromName(final String name)591 public String getTranscriptFromName(final String name) 592 { 593 // check transcript 594 StringVector sv = new StringVector(); 595 sv.add(name); 596 Feature feature = containsTranscript(sv); 597 598 if(feature != null) 599 return name; 600 601 // check exons 602 List<String> transcriptNames = getTranscriptNames(); 603 feature = getSplicedFeatures(name); 604 605 if(feature != null) 606 { 607 for(int i=0; i<transcriptNames.size(); i++) 608 { 609 String transcriptName = (String)transcriptNames.get(i); 610 List<Feature> splicedSegments = getSplicedFeaturesOfTranscript(transcriptName); 611 612 if(splicedSegments != null) 613 { 614 for(int j=0; j<splicedSegments.size(); j++) 615 { 616 Feature segment = splicedSegments.get(j); 617 try 618 { 619 String segmentName = (String)segment.getQualifierByName("ID").getValues().get(0); 620 if(name.equals(segmentName)) 621 return transcriptName; 622 } 623 catch(InvalidRelationException e) 624 { 625 // TODO Auto-generated catch block 626 e.printStackTrace(); 627 } 628 } 629 } 630 } 631 } 632 633 feature = getProtein(name); 634 635 if(feature != null) 636 { 637 for(int i=0; i<transcriptNames.size(); i++) 638 { 639 String transcriptName = (String)transcriptNames.get(i); 640 Feature protein = getProteinOfTranscript(transcriptName); 641 try 642 { 643 String proteinsName = (String)protein.getQualifierByName("ID").getValues().get(0); 644 if(name.equals(proteinsName)) 645 return transcriptName; 646 } 647 catch(InvalidRelationException e) 648 { 649 // TODO Auto-generated catch block 650 e.printStackTrace(); 651 } 652 } 653 } 654 655 // search children of all transcripts 656 List<Feature> transcripts = getTranscripts(); 657 for(int i=0;i<transcripts.size(); i++) 658 { 659 Feature transcript = transcripts.get(i); 660 Set<Feature> children = getChildren(transcript); 661 Iterator<Feature> it = children.iterator(); 662 while(it.hasNext()) 663 { 664 Feature f = it.next(); 665 if(name.equals(GeneUtils.getUniqueName(f))) 666 return GeneUtils.getUniqueName(transcript); 667 } 668 } 669 670 return null; 671 } 672 673 /** 674 * Return the protein feature of a transcipt. 675 * @param transcript_name 676 * @return 677 */ getProteinOfTranscript(final String transcript_name)678 public Feature getProteinOfTranscript(final String transcript_name) 679 { 680 if(proteins.containsKey(transcript_name)) 681 return (Feature)proteins.get(transcript_name);; 682 683 return null; 684 } 685 686 /** 687 * Return the 3'UTR features of a transcriot as a <code>List</code>. 688 * @param transcript_name 689 * @return 690 */ get3UtrOfTranscript(final String transcript_name)691 public List<Feature> get3UtrOfTranscript(final String transcript_name) 692 { 693 if(three_prime_utr.containsKey(transcript_name)) 694 return (List<Feature>)three_prime_utr.get(transcript_name); 695 696 return null; 697 } 698 699 /** 700 * Return the 5'UTR features of a transcriot as a <code>List</code>. 701 * @param transcript_name 702 * @return 703 */ get5UtrOfTranscript(final String transcript_name)704 public List<Feature> get5UtrOfTranscript(final String transcript_name) 705 { 706 if(five_prime_utr.containsKey(transcript_name)) 707 return (List<Feature>)five_prime_utr.get(transcript_name); 708 709 return null; 710 } 711 712 /** 713 * Utility to determine if this is the first or only UTR, so that 714 * partial qualifiers can be added to the correct UTR feature. 715 * @param utrName 716 * @param isFwd 717 * @return 718 */ isFirstUtr(final String utrName, final boolean isFwd)719 public boolean isFirstUtr(final String utrName, final boolean isFwd) 720 { 721 try 722 { 723 Feature this5Utr = getFeatureFromHash(utrName, five_prime_utr); 724 if (this5Utr != null) 725 { 726 String transcript_name = getQualifier(this5Utr, "Parent"); 727 List<Feature> utrs = get5UtrOfTranscript(transcript_name); 728 if (utrs.size() == 1) 729 return true; 730 731 for (Feature utr : utrs) 732 { 733 if (isFwd && utr.getFirstBase() < this5Utr.getFirstBase()) 734 return false; 735 else if (!isFwd && utr.getLastBase() > this5Utr.getLastBase()) 736 return false; 737 } 738 return true; 739 } 740 741 742 Feature this3Utr = getFeatureFromHash(utrName, three_prime_utr); 743 if (this3Utr != null) 744 { 745 String transcript_name = getQualifier(this3Utr, "Parent"); 746 List<Feature> utrs = get3UtrOfTranscript(transcript_name); 747 if (utrs.size() == 1) 748 return true; 749 750 for (Feature utr : utrs) 751 { 752 if (!isFwd && utr.getFirstBase() < this3Utr.getFirstBase()) 753 return false; 754 else if (isFwd && utr.getLastBase() > this3Utr.getLastBase()) 755 return false; 756 } 757 return true; 758 } 759 } 760 catch(InvalidRelationException ire){} 761 return false; 762 } 763 764 /** 765 * Return the other child features of a transcriot as a <code>List</code>. 766 * @param transcript_name 767 * @return 768 */ getOtherFeaturesOfTranscript(final String transcript_name)769 public List<Feature> getOtherFeaturesOfTranscript(final String transcript_name) 770 { 771 if(other_features.containsKey(transcript_name)) 772 return other_features.get(transcript_name); 773 return null; 774 } 775 776 /** 777 * Get a list of trancripts. 778 * @return 779 */ getTranscripts()780 public List<Feature> getTranscripts() 781 { 782 return transcripts; 783 } 784 785 786 /** 787 * Get a list of trancripts. 788 * @return 789 */ getTranscriptNames()790 private List<String> getTranscriptNames() 791 { 792 List<String> names = new Vector<String>(); 793 for(int i=0; i<transcripts.size(); i++) 794 { 795 Feature f = (Feature)transcripts.get(i); 796 try 797 { 798 names.add( (String)f.getQualifierByName("ID").getValues().get(0) ); 799 } 800 catch(InvalidRelationException e) 801 { 802 // TODO Auto-generated catch block 803 e.printStackTrace(); 804 } 805 806 } 807 808 return names; 809 } 810 811 /** 812 * Test if a name is already used in this gene model 813 * @param name 814 * @return 815 */ isUniqueName(final String name)816 private boolean isUniqueName(final String name) 817 { 818 if(isTranscript(name)) 819 return false; 820 if(isSplicedFeatures(name)) 821 return false; 822 823 try 824 { 825 if(getFeatureFromHash(name, three_prime_utr) != null) 826 return false; 827 if(getFeatureFromHash(name, five_prime_utr) != null) 828 return false; 829 if(getFeatureFromHash(name, other_features) != null) 830 return false; 831 832 final Enumeration<Feature> enum_pp = proteins.elements(); 833 while(enum_pp.hasMoreElements()) 834 { 835 final Feature pp = enum_pp.nextElement(); 836 if( getQualifier(pp, "ID").equals(name) ) 837 return false; 838 } 839 840 if( getQualifier(getGene(), "ID").equals(name) ) 841 return false; 842 } 843 catch(InvalidRelationException e) 844 { 845 e.printStackTrace(); 846 } 847 848 return true; 849 } 850 851 /** 852 * Test if the name is a transcript in this gene model. 853 * @param feature_id 854 * @return true if a transcript 855 */ isTranscript(final String feature_id)856 public boolean isTranscript(final String feature_id) 857 { 858 try 859 { 860 for(int i=0; i<transcripts.size(); i++) 861 { 862 if(feature_id.equals(getQualifier((Feature)transcripts.get(i), "ID"))) 863 return true; 864 } 865 } 866 catch(InvalidRelationException e) 867 { 868 // TODO Auto-generated catch block 869 e.printStackTrace(); 870 } 871 872 return false; 873 } 874 875 /** 876 * Test if this is an exon of transcript. 877 * @param feature_id exon feature 878 * @param transcript_id transcript feature 879 * @return 880 */ isSplicedFeatures(final String feature_id)881 private boolean isSplicedFeatures(final String feature_id) 882 { 883 List<Feature> splicedFeatures = new Vector<Feature>(); 884 List<Feature> transcripts = getTranscripts(); 885 886 try 887 { 888 for(int i = 0; i < transcripts.size(); i++) 889 { 890 Feature transcript = (Feature) transcripts.get(i); 891 String transcript_id = getQualifier(transcript, "ID"); 892 List<Feature> splicedSites = getSplicedFeaturesOfTranscript(transcript_id); 893 if(splicedSites != null) 894 splicedFeatures.addAll(splicedSites); 895 } 896 897 if(splicedFeatures == null) 898 return false; 899 900 for(int i=0; i<splicedFeatures.size(); i++) 901 { 902 GFFStreamFeature feature = (GFFStreamFeature)splicedFeatures.get(i); 903 RangeVector rv = feature.getLocation().getRanges(); 904 for(int j=0; j<rv.size(); j++) 905 { 906 String this_feature_id = feature.getSegmentID((Range)rv.get(j)); 907 if(feature_id.equals(this_feature_id)) 908 return true; 909 } 910 } 911 } 912 catch(InvalidRelationException e) 913 { 914 e.printStackTrace(); 915 } 916 917 return false; 918 } 919 920 /** 921 * Method to automatically generate ID's for transcripts 922 * @param transcript_key 923 * @return 924 */ autoGenerateTanscriptName(String transcript_key)925 public String autoGenerateTanscriptName(String transcript_key) 926 { 927 try 928 { 929 String name = getQualifier(getGene(), "ID"); 930 int auto = 1; 931 while( isTranscript( name + "." + auto ) && 932 auto < 50) 933 auto++; 934 return name + "." + auto; 935 } 936 catch(InvalidRelationException e) 937 { 938 // TODO Auto-generated catch block 939 e.printStackTrace(); 940 } 941 return null; 942 } 943 944 /** 945 * Generate new names for exon features for this gene model 946 * @param transcript_id 947 * @return 948 */ autoGenerateSplicedFeatureName(final String transcript_id)949 public String autoGenerateSplicedFeatureName(final String transcript_id) 950 { 951 try 952 { 953 int index = transcript_id.lastIndexOf('.'); 954 if(index == -1) 955 index = transcript_id.lastIndexOf(':'); 956 int transcript_number = -1; 957 String name = (String)getGene().getQualifierByName("ID").getValues().get(0); 958 959 if(index > -1) 960 { 961 try 962 { 963 transcript_number = Integer.parseInt(transcript_id.substring(index+1)); 964 } 965 catch(NumberFormatException nfe) 966 { 967 transcript_number = -1; 968 } 969 } 970 971 if(transcript_number < 1) 972 { 973 for(transcript_number = 0; transcript_number <= transcripts.size(); 974 transcript_number++) 975 { 976 Feature transcript = (Feature) transcripts.get(transcript_number); 977 if(transcript_id.equals(getQualifier(transcript, "ID"))) 978 break; 979 } 980 } 981 if(transcript_number == 0) 982 name = name + ":exon:"; 983 else 984 name = name + "." + transcript_number + ":exon:"; 985 986 int auto = 1; 987 while( isSplicedFeatures(name + auto) && auto < 50) 988 auto++; 989 return name + auto; 990 } 991 catch(InvalidRelationException e) 992 { 993 e.printStackTrace(); 994 } 995 return null; 996 } 997 998 999 /** 1000 * Generate new names for peptide features for this gene model 1001 * @param transcript_id 1002 * @return 1003 */ autoGeneratePepName(final String transcript_id)1004 public String autoGeneratePepName(final String transcript_id) 1005 { 1006 try 1007 { 1008 int index = transcript_id.lastIndexOf('.'); 1009 if(index == -1) 1010 index = transcript_id.lastIndexOf(':'); 1011 int transcript_number = -1; 1012 1013 if(index > -1) 1014 { 1015 try 1016 { 1017 transcript_number = Integer.parseInt(transcript_id.substring(index+1)); 1018 } 1019 catch(NumberFormatException nfe) 1020 { 1021 transcript_number = -1; 1022 } 1023 } 1024 1025 if(transcript_number < 1) 1026 { 1027 for(transcript_number = 1; transcript_number <= transcripts.size(); 1028 transcript_number++) 1029 { 1030 Feature transcript = (Feature) transcripts.get(transcript_number - 1); 1031 if(transcript_id.equals(getQualifier(transcript, "ID"))) 1032 break; 1033 } 1034 } 1035 1036 String name = (String)getGene().getQualifierByName("ID").getValues().get(0); 1037 1038 if(isUniqueName(name+ "." + transcript_number + ":pep")) 1039 return name+ "." + transcript_number + ":pep"; 1040 else 1041 return name + "." + transcript_number + "a:pep"; 1042 } 1043 catch(InvalidRelationException e) 1044 { 1045 e.printStackTrace(); 1046 } 1047 return null; 1048 } 1049 1050 /** 1051 * Generate new names for generic region features for this gene model 1052 * @param transcript_id 1053 * @return 1054 */ autoGenerateFeatureName(final String transcript_id, final String keyName)1055 public String autoGenerateFeatureName(final String transcript_id, 1056 final String keyName) 1057 { 1058 String featureName = ""; 1059 try 1060 { 1061 featureName = 1062 (String)getGene().getQualifierByName("ID").getValues().get(0); 1063 } 1064 catch(InvalidRelationException e){} 1065 1066 final Pattern pattern = Pattern.compile("\\d+$"); 1067 final Matcher matcher = pattern.matcher(transcript_id); 1068 if(matcher.find()) 1069 featureName = featureName+"."+matcher.group()+":"+keyName; 1070 else 1071 featureName = featureName+":"+keyName; 1072 1073 if(!isUniqueName(featureName)) 1074 { 1075 int num = 1; 1076 while(!isUniqueName(featureName + ":" + num) && num < 100) 1077 num++; 1078 featureName = featureName + ":" + num; 1079 } 1080 1081 return featureName; 1082 } 1083 1084 /** 1085 * Search for the feature with a particular uniquename 1086 * @param name uniquename 1087 * @return 1088 */ getFeatureFromId(final String name)1089 public Object getFeatureFromId(final String name) 1090 { 1091 Object feature = null; 1092 1093 // check gene 1094 try 1095 { 1096 final String uniquename = getQualifier(gene, "ID"); 1097 1098 if(uniquename.equals(name)) 1099 return gene; 1100 } 1101 catch(InvalidRelationException e) 1102 { 1103 e.printStackTrace(); 1104 } 1105 1106 // check transcript 1107 StringVector sv = new StringVector(); 1108 sv.add(name); 1109 1110 feature = containsTranscript(sv); 1111 1112 if(feature != null) 1113 return feature; 1114 1115 // check exons 1116 feature = getSplicedFeatures(name); 1117 1118 if(feature != null) 1119 return feature; 1120 1121 feature = getProtein(name); 1122 1123 if(feature != null) 1124 return feature; 1125 1126 try 1127 { 1128 feature = getFeatureFromHash(name, three_prime_utr); 1129 if(feature != null) 1130 return feature; 1131 1132 feature = getFeatureFromHash(name, five_prime_utr); 1133 if(feature != null) 1134 return feature; 1135 1136 feature = getFeatureFromHash(name, other_features); 1137 } 1138 catch(InvalidRelationException e) 1139 { 1140 e.printStackTrace(); 1141 } 1142 1143 return feature; 1144 } 1145 1146 /** 1147 * Routine to look for a exon with a particular 1148 * uniquename 1149 * @param name 1150 * @return 1151 */ getSplicedFeatures(final String name)1152 private Feature getSplicedFeatures(final String name) 1153 { 1154 Enumeration<List<Feature>> enum_exons = splicedFeatures.elements(); 1155 try 1156 { 1157 while(enum_exons.hasMoreElements()) 1158 { 1159 List<Feature> exons = enum_exons.nextElement(); 1160 1161 for(int i=0; i<exons.size(); i++) 1162 { 1163 String uniquename = getQualifier((Feature)exons.get(i), "ID"); 1164 1165 if(uniquename.equals(name)) 1166 return (Feature)exons.get(i); 1167 } 1168 } 1169 } 1170 catch(InvalidRelationException e) 1171 { 1172 e.printStackTrace(); 1173 } 1174 return null; 1175 } 1176 getProtein(final String id)1177 private Feature getProtein(final String id) 1178 { 1179 Enumeration<Feature> enum_proteins = proteins.elements(); 1180 try 1181 { 1182 while(enum_proteins.hasMoreElements()) 1183 { 1184 Feature protein = enum_proteins.nextElement(); 1185 if(getQualifier(protein, "ID").equals(id)) 1186 return protein; 1187 } 1188 } 1189 catch(InvalidRelationException e) 1190 { 1191 e.printStackTrace(); 1192 } 1193 return null; 1194 } 1195 1196 /** 1197 * Search for a feature uniquename 1198 * @param id 1199 * @param UTR 1200 * @return 1201 * @throws InvalidRelationException 1202 */ getFeatureFromHash(final String id, final Hashtable<String, List<Feature>> UTR)1203 private Feature getFeatureFromHash 1204 (final String id, 1205 final Hashtable<String, List<Feature>> UTR) 1206 throws InvalidRelationException 1207 { 1208 Enumeration<List<Feature>> enum_utr = UTR.elements(); 1209 1210 while(enum_utr.hasMoreElements()) 1211 { 1212 List<Feature> utrs = enum_utr.nextElement(); 1213 1214 for(int i=0; i<utrs.size(); i++) 1215 { 1216 Feature utr = utrs.get(i); 1217 if(getQualifier(utr, "ID").equals(id)) 1218 return utr; 1219 } 1220 } 1221 1222 return null; 1223 } 1224 1225 /** 1226 * Utility for get feature ID and Parent qualifiers. 1227 * @param feature 1228 * @param name 1229 * @return 1230 * @throws InvalidRelationException 1231 */ getQualifier(final Feature feature, final String name)1232 public String getQualifier(final Feature feature, 1233 final String name) 1234 throws InvalidRelationException 1235 { 1236 Qualifier qualifier = feature.getQualifierByName(name); 1237 if(qualifier == null) 1238 return null; 1239 1240 return (String)(qualifier.getValues().get(0)); 1241 } 1242 1243 /** 1244 * Get the srcfeature residue length 1245 * @return 1246 */ getSeqlen()1247 public int getSeqlen() 1248 { 1249 return seqlen; 1250 } 1251 1252 /** 1253 * Set the srcfeature residue length 1254 * @param seqlen 1255 */ setSeqlen(int seqlen)1256 public void setSeqlen(int seqlen) 1257 { 1258 this.seqlen = seqlen; 1259 } 1260 getSrcfeature_id()1261 public int getSrcfeature_id() 1262 { 1263 return srcfeature_id; 1264 } 1265 setSrcfeature_id(int srcfeature_id)1266 public void setSrcfeature_id(int srcfeature_id) 1267 { 1268 this.srcfeature_id = srcfeature_id; 1269 } 1270 getSplicedFeatures()1271 public Hashtable<String, List<Feature>> getSplicedFeatures() 1272 { 1273 return splicedFeatures; 1274 } 1275 1276 /** 1277 * Get the nucleotide location for a featureloc in amino acid 1278 * coordinates. 1279 * @param proteinFeature 1280 * @param featureLocToProtein 1281 * @return 1282 * @throws LocationParseException 1283 */ getNucLocation(final Feature proteinFeature, final FeatureLoc featureLocToProtein)1284 public Location getNucLocation(final Feature proteinFeature, 1285 final FeatureLoc featureLocToProtein) 1286 throws LocationParseException 1287 { 1288 String transcriptName = getTranscriptFromName( 1289 GeneUtils.getUniqueName(proteinFeature)); 1290 List<Feature> spliced = getSplicedFeaturesOfTranscript(transcriptName); 1291 if(spliced == null) 1292 return null; 1293 1294 RangeVector ranges = new RangeVector(); 1295 for(int i=0; i<spliced.size(); i++) 1296 { 1297 Feature f = spliced.get(i); 1298 if(f.getKey().getKeyString().equals(DatabaseDocument.EXONMODEL)) 1299 ranges.addAll(f.getLocation().getRanges()); 1300 } 1301 1302 int start = proteinFeature.getLocation().getFirstBase(); 1303 int fmin = start+(featureLocToProtein.getFmin()*3)+1; 1304 int fmax = start+(featureLocToProtein.getFmax()*3); 1305 1306 int len = proteinFeature.getEntry().getSequence().length(); 1307 if(fmax > len) 1308 fmax = len; 1309 1310 if(ranges.size()>1) 1311 { 1312 Collections.sort(ranges, new RangeComparator()); 1313 1314 for(int i=0;i<ranges.size()-1; i++) 1315 { 1316 Range range1 = (Range) ranges.get(i); 1317 Range range2 = (Range) ranges.get(i+1); 1318 if(fmin > range1.getEnd()) 1319 fmin += range2.getStart()-range1.getEnd(); 1320 if(fmax > range1.getEnd()) 1321 fmax += range2.getStart()-range1.getEnd(); 1322 } 1323 } 1324 1325 Location location; 1326 if(proteinFeature.getLocation().isComplement()) 1327 location = new Location("complement("+fmin+".."+fmax+")"); 1328 else 1329 location = new Location(fmin+".."+fmax); 1330 return location; 1331 } 1332 1333 1334 class RangeComparator implements Comparator<Range> 1335 { compare(Range o1, Range o2)1336 public int compare(Range o1, Range o2) 1337 { 1338 int start1 = o1.getStart(); 1339 int start2 = o2.getStart(); 1340 return start1-start2; 1341 } 1342 } 1343 }