1 /* ChadoCanonicalGene.java
2  *
3  * created: 2006
4  *
5  * This file is part of Artemis
6  *
7  * Copyright (C) 2006 Genome Research Limited
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License
11  * as published by the Free Software Foundation; either version 2
12  * of the License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
22  *
23  * $Header: //tmp/pathsoft/artemis/uk/ac/sanger/artemis/io/ChadoCanonicalGene.java,v 1.34 2009-08-11 08:59:46 tjc Exp $
24  */
25 
26 package uk.ac.sanger.artemis.io;
27 
28 import uk.ac.sanger.artemis.components.genebuilder.GeneUtils;
29 import uk.ac.sanger.artemis.util.DatabaseDocument;
30 import uk.ac.sanger.artemis.util.ReadOnlyException;
31 import uk.ac.sanger.artemis.util.StringVector;
32 
33 import java.util.Collections;
34 import java.util.Comparator;
35 import java.util.Iterator;
36 import java.util.Vector;
37 import java.util.Hashtable;
38 import java.util.Enumeration;
39 import java.util.List;
40 import java.util.Set;
41 import java.util.HashSet;
42 import java.util.regex.Matcher;
43 import java.util.regex.Pattern;
44 
45 import org.gmod.schema.sequence.FeatureLoc;
46 
47 /**
48  *  Used by GFFStreamFeature to represent the chado canonical gene.
49  *  Contains gene, transcript, exons and proteins.
50  **/
51 public class ChadoCanonicalGene
52 {
53   private Feature gene;
54 
55   // part_of gene
56   private List<Feature> transcripts = new Vector<Feature>();
57 
58   // part_of transcripts
59   private Hashtable<String, List<Feature>> splicedFeatures =
60     new Hashtable<String, List<Feature>>();
61 
62   // derives_from transript
63   private Hashtable<String, Feature> proteins = new Hashtable<String, Feature>();
64 
65   // utr features
66   private Hashtable<String,  List<Feature>> three_prime_utr =
67     new Hashtable<String,  List<Feature>>();
68   private Hashtable<String,  List<Feature>> five_prime_utr  =
69     new Hashtable<String,  List<Feature>>();
70 
71   // other child features of transcript
72   private Hashtable<String,  List<Feature>> other_features =
73     new Hashtable<String,  List<Feature>>();
74 
75   // srcfeature
76   private int srcfeature_id;
77 
78   // srcfeature length
79   private int seqlen;
80 
81 
82   /**
83    * Get the gene feaure object.
84    * @return
85    */
getGene()86   public Feature getGene()
87   {
88     return gene;
89   }
90 
91   /**
92    * Set the gene feature object.
93    * @param gene
94    */
setGene(Feature gene)95   public void setGene(Feature gene)
96   {
97     this.gene = gene;
98   }
99 
getGeneUniqueName()100   public String getGeneUniqueName()
101   {
102     try
103     {
104       return getQualifier(getGene(), "ID");
105     }
106     catch(InvalidRelationException e)
107     {
108       return null;
109     }
110   }
111 
112   /**
113    * Add a transcript to the model
114    * @param transcript
115    */
addTranscript(Feature transcript)116   public void addTranscript(Feature transcript)
117   {
118     transcripts.add(transcript);
119   }
120 
121   /**
122    * Delete a transcript and child features.
123    * @param transcript_name
124    */
deleteTranscript(String transcript_name)125   public void deleteTranscript(String transcript_name)
126   {
127     for(int i=0; i<transcripts.size(); i++)
128     {
129       try
130       {
131         Feature transcript = (Feature)transcripts.get(i);
132 
133         if( transcript_name.equals(getQualifier(transcript, "ID")) )
134         {
135           transcripts.remove(transcript);
136           splicedFeatures.remove(transcript_name);
137           three_prime_utr.remove(transcript_name);
138           five_prime_utr.remove(transcript_name);
139           other_features.remove(transcript_name);
140           proteins.remove(transcript_name);
141         }
142       }
143       catch(InvalidRelationException e)
144       {
145         e.printStackTrace();
146       }
147     }
148   }
149 
150   /**
151    * This should be called if the uniqueName of a gene model
152    * feature is changed.
153    * @param oldName
154    * @param newName
155    * @param children
156    */
updateUniqueName(final String oldName, final String newName, final Set<Feature> children)157   public void updateUniqueName(final String oldName,
158                                final String newName,
159                                final Set<Feature> children)
160   {
161     updateNames(splicedFeatures,oldName,newName);
162     updateNames(proteins,oldName,newName);
163     updateNames(three_prime_utr,oldName,newName);
164     updateNames(five_prime_utr,oldName,newName);
165     updateNames(other_features,oldName,newName);
166 
167     if(children != null)
168       GeneUtils.fixParentQualifier(oldName, newName, children);
169   }
170 
171   /**
172    * Utility for changing the key used in a Hashtable
173    * @param hash
174    * @param oldName
175    * @param newName
176    */
updateNames(final Hashtable hash, final String oldName, final String newName)177   private static void updateNames(final Hashtable hash,
178                                   final String oldName,
179                                   final String newName)
180   {
181     Object features = hash.get(oldName);
182     if(features != null)
183     {
184       hash.remove(oldName);
185       hash.put(newName, features);
186     }
187   }
188 
189   /**
190    * Delete features.
191    * @param embl_feature
192    */
deleteFeature(final Feature embl_feature)193   public void deleteFeature(final Feature embl_feature)
194   {
195     try
196     {
197       final String name = getQualifier(embl_feature, "ID");
198       Object feature = getSplicedFeatures(name);
199 
200       if(feature != null)
201       {
202         String transcript_name = getQualifier((Feature) feature, "Parent");
203         splicedFeatures.remove(transcript_name);
204         return;
205       }
206 
207       final Enumeration<String> enum_protein = proteins.keys();
208       while(enum_protein.hasMoreElements())
209       {
210         final String transcriptName = (String)enum_protein.nextElement();
211         Feature protein = (Feature)proteins.get(transcriptName);
212         if(getQualifier(protein, "ID").equals(name))
213         {
214           proteins.remove(transcriptName);
215           return;
216         }
217       }
218 
219       feature = getFeatureFromHash(name, three_prime_utr);
220       if(feature != null)
221       {
222         String transcript_name = getQualifier((Feature) feature, "Parent");
223         List<Feature> utr = get3UtrOfTranscript(transcript_name);
224         utr.remove(feature);
225         return;
226       }
227 
228       feature = getFeatureFromHash(name, five_prime_utr);
229       if(feature != null)
230       {
231         String transcript_name = getQualifier((Feature) feature, "Parent");
232         List<Feature> utr = get5UtrOfTranscript(transcript_name);
233         utr.remove(feature);
234         return;
235       }
236 
237       feature = getFeatureFromHash(name, other_features);
238       if(feature != null)
239       {
240         String transcript_name = getQualifier((Feature) feature, "Parent");
241         List<Feature> others = getOtherFeaturesOfTranscript(transcript_name);
242         others.remove(feature);
243         return;
244       }
245 
246       deleteTranscript(name);
247     }
248     catch(InvalidRelationException e1)
249     {
250       e1.printStackTrace();
251     }
252   }
253 
254   /**
255    * Get all child members of a feature
256    * @param embl_feature
257    * @return
258    */
getChildren(Feature embl_feature)259   public Set<Feature> getChildren(Feature embl_feature)
260   {
261     Set<Feature> children = new HashSet<Feature>();
262     try
263     {
264       String name = getQualifier(embl_feature, "ID");
265 
266       String gene_name = getQualifier(getGene(), "ID");
267       if(name.equals(gene_name))
268       {
269         List<Feature> transcripts = getTranscripts();
270         for(int i=0; i<transcripts.size(); i++)
271         {
272           Feature transcript = transcripts.get(i);
273           children.add(transcript);
274           children.addAll( getChildren(transcript) );
275         }
276         return children;
277       }
278 
279       searchForChildren(splicedFeatures, name, children);
280       searchForChildren(three_prime_utr, name, children);
281       searchForChildren(five_prime_utr, name, children);
282       searchForChildren(other_features, name, children);
283 
284       // protein
285       Enumeration<Feature> pep_enum = proteins.elements();
286       while(pep_enum.hasMoreElements())
287       {
288         Feature child = pep_enum.nextElement();
289         String parent = getQualifier(child, "Derives_from");
290         if(parent != null && parent.equals(name))
291           children.add(child);
292       }
293       return children;
294     }
295     catch(InvalidRelationException e)
296     {
297       e.printStackTrace();
298     }
299     return null;
300   }
301 
302   /**
303    * Search in a <code>Hashtable</code> for child Features with a
304    * matching parent ID. Child features are added to the <code>Set</code>
305    * that is passed into this method.
306    * @param hash        Hashtable to search for children in
307    * @param parent_id   uniquname to look for
308    * @param children    collection to add child features to
309    * @throws InvalidRelationException
310    */
searchForChildren(Hashtable<String, List<Feature>> hash, String parent_id, Set<Feature> children)311   private void searchForChildren(Hashtable<String, List<Feature>> hash,
312                                  String parent_id,
313                                  Set<Feature> children)
314                throws InvalidRelationException
315   {
316     Enumeration<List<Feature>> feature_enum = hash.elements();
317     String parent;
318 
319     while(feature_enum.hasMoreElements())
320     {
321       List<Feature> child_list = feature_enum.nextElement();
322 
323       for(int i=0; i<child_list.size(); i++)
324       {
325         Feature child = child_list.get(i);
326         //if(children.contains(child))
327         //  continue;
328 
329         parent = getQualifier(child, "Parent");
330         if(parent != null && parent.equals(parent_id))
331           children.add(child);
332         else
333         {
334           parent = getQualifier(child, "Derives_from");
335           if(parent != null && parent.equals(parent_id))
336             children.add(child);
337         }
338       }
339     }
340   }
341 
342   /**
343    * Add exon feature to the chado gene model.
344    * @param transcript_name
345    * @param exon
346    * @param reset
347    * @throws InvalidRelationException
348    */
addSplicedFeatures(final String transcript_name, final Feature exon, boolean reset)349   public void addSplicedFeatures(final String transcript_name,
350                       final Feature exon, boolean reset)
351   {
352     if(reset)
353       splicedFeatures.remove(transcript_name);
354     addSplicedFeatures(transcript_name, exon);
355   }
356 
357   /**
358    * Add exon feature to the chado gene model.
359    * @param transcript_name
360    * @param v_spliced
361    * @throws InvalidRelationException
362    */
addSplicedFeatures(final String transcript_name, final Feature spliced)363   public void addSplicedFeatures(final String transcript_name,
364                                  final Feature spliced)
365   {
366     final List<Feature> v_spliced;
367     if(splicedFeatures.containsKey(transcript_name))
368       v_spliced = (Vector<Feature>)splicedFeatures.get(transcript_name);
369     else
370       v_spliced = new Vector<Feature>();
371 
372     v_spliced.add(spliced);
373     splicedFeatures.put(transcript_name, v_spliced);
374   }
375 
correctSpliceSiteAssignments()376   public void correctSpliceSiteAssignments()
377   {
378     Enumeration<String> enumSplicedFeatures = splicedFeatures.keys();
379     while(enumSplicedFeatures.hasMoreElements())
380     {
381       String transcriptId = enumSplicedFeatures.nextElement();
382       Vector<Feature> v_spliced = (Vector<Feature>)splicedFeatures.get(transcriptId);
383       Set<String> splicedTypes = getSpliceTypes(transcriptId);
384       Iterator<String> it = splicedTypes.iterator();
385       while(it.hasNext())
386       {
387         String type = it.next();
388         if(!type.equals(DatabaseDocument.EXONMODEL) &&
389            !type.equals("pseudogenic_exon") &&
390            !type.equals("exon"))
391         {
392           List<Feature> splicedFeatures = getSpliceSitesOfTranscript(transcriptId, type);
393           if(splicedFeatures.size() == 1)
394           {
395             Feature f = (Feature)splicedFeatures.get(0);
396             addOtherFeatures(transcriptId, f);
397             v_spliced.remove(f);
398             try
399             {
400               f.removeQualifierByName("feature_relationship_rank");
401             }
402             catch(ReadOnlyException e){}
403             catch(EntryInformationException e){}
404           }
405         }
406       }
407       splicedFeatures.put(transcriptId, v_spliced);
408     }
409   }
410 
411   /**
412    * Add protein feature to the chado gene model.
413    * @param transcript_name
414    * @param protein
415    * @throws InvalidRelationException
416    */
addProtein(final String transcript_name, final Feature protein)417   public void addProtein(final String transcript_name,
418                          final Feature protein)
419   {
420     proteins.put(transcript_name, protein);
421   }
422 
423   /**
424    * Add 3'UTR to chado gene model.
425    * @param transcript_name
426    * @param utr
427    * @throws InvalidRelationException
428    */
add3PrimeUtr(final String transcript_name, final Feature utr)429   public void add3PrimeUtr(final String transcript_name,
430                            final Feature utr)
431   {
432     final List<Feature> utr_list;
433     if(three_prime_utr.containsKey(transcript_name))
434       utr_list = three_prime_utr.get(transcript_name);
435     else
436       utr_list = new Vector<Feature>();
437 
438     utr_list.add(utr);
439     three_prime_utr.put(transcript_name, utr_list);
440   }
441 
442   /**
443    * Add 5'UTR to chado gene model.
444    * @param transcript_name
445    * @param utr
446    * @throws InvalidRelationException
447    */
add5PrimeUtr(final String transcript_name, final Feature utr)448   public void add5PrimeUtr(final String transcript_name,
449                            final Feature utr)
450   {
451     final List<Feature> utr_list;
452     if(five_prime_utr.containsKey(transcript_name))
453       utr_list = (Vector<Feature>)five_prime_utr.get(transcript_name);
454     else
455       utr_list = new Vector<Feature>();
456 
457     utr_list.add(utr);
458     five_prime_utr.put(transcript_name, utr_list);
459   }
460 
461   /**
462    * Add other child features of a transcript to the chado
463    * gene model.
464    * @param transcript_name
465    * @param other_feature
466    */
addOtherFeatures(final String transcript_name, final Feature other_feature)467   public void addOtherFeatures(final String transcript_name,
468                                final Feature other_feature)
469   {
470     final List<Feature> v_other_features;
471     if(other_features.containsKey(transcript_name))
472       v_other_features = (Vector<Feature>)other_features.get(transcript_name);
473     else
474       v_other_features = new Vector<Feature>();
475     v_other_features.add(other_feature);
476     other_features.put(transcript_name, v_other_features);
477   }
478 
479   /**
480    * Check if this gene model contains a transcript with an ID equal to
481    * any of the names in the <code>StringVector</code>. If it does find
482    * it returns the transcript feature, otherwise it returns null.
483    * @param names
484    * @return
485    */
containsTranscript(final StringVector names)486   public Feature containsTranscript(final StringVector names)
487   {
488     for(int i=0; i<transcripts.size(); i++)
489     {
490       try
491       {
492         Feature transcript = (Feature)transcripts.get(i);
493 
494         if( names.contains(getQualifier(transcript, "ID")) )
495           return transcript;
496       }
497       catch(InvalidRelationException e)
498       {
499         e.printStackTrace();
500       }
501     }
502     return null;
503   }
504 
505 
getSpliceSitesOfTranscript(final String transcript_name, final String type)506   public List<Feature> getSpliceSitesOfTranscript(final String transcript_name,
507                                          final String type)
508   {
509     if(splicedFeatures.containsKey(transcript_name))
510     {
511       List<Feature> splicedFeaturesOfTranscript = splicedFeatures.get(transcript_name);
512       List<Feature> results = new Vector<Feature>();
513       for(int i=0; i<splicedFeaturesOfTranscript.size(); i++)
514       {
515         Feature feature = (Feature)splicedFeaturesOfTranscript.get(i);
516         if(feature.getKey().getKeyString().equals(type))
517           results.add(feature);
518       }
519       return results;
520     }
521 
522     return null;
523   }
524 
525   /**
526    * Get a list of the feature keys of the types that are splice sites
527    * @param transcript_name
528    * @return
529    */
getSpliceTypes(final String transcript_name)530   public Set<String> getSpliceTypes(final String transcript_name)
531   {
532     if(splicedFeatures.containsKey(transcript_name))
533     {
534       List<Feature> splicedFeaturesOfTranscript = splicedFeatures.get(transcript_name);
535       Set<String> splicedTypes = new HashSet<String>();
536       for(int i=0; i<splicedFeaturesOfTranscript.size(); i++)
537       {
538         Feature feature = (Feature)splicedFeaturesOfTranscript.get(i);
539         splicedTypes.add( feature.getKey().getKeyString() );
540       }
541       return splicedTypes;
542     }
543 
544     return null;
545   }
546 
547   /**
548    * Return the exons of a given transcript as a <code>List</code>.
549    * @param transcript_name
550    * @return
551    */
getSplicedFeaturesOfTranscript(final String transcript_name)552   public List<Feature> getSplicedFeaturesOfTranscript(final String transcript_name)
553   {
554     if(splicedFeatures.containsKey(transcript_name))
555     {
556       return splicedFeatures.get(transcript_name);
557     }
558 
559     return null;
560   }
561 
562   /**
563    * Return the transcript from the name of a constituent feature
564    * @param constituent feature name
565    * @return transcript
566    */
getTranscriptFeatureFromName(final String name)567   public Feature getTranscriptFeatureFromName(final String name)
568   {
569     String transcriptName = getTranscriptFromName(name);
570     if(transcriptName == null)
571       return null;
572 
573     try
574     {
575       for (int i = 0; i < transcripts.size(); i++)
576       {
577         Feature feature = (Feature) transcripts.get(i);
578         if (getQualifier(feature, "ID").equals(transcriptName))
579           return feature;
580       }
581     }
582     catch (InvalidRelationException ire){}
583     return null;
584   }
585 
586   /**
587    * Return the transcript from the name of a constituent feature
588    * @param constituent feature name
589    * @return transcript name
590    */
getTranscriptFromName(final String name)591   public String getTranscriptFromName(final String name)
592   {
593     //  check transcript
594     StringVector sv = new StringVector();
595     sv.add(name);
596     Feature feature = containsTranscript(sv);
597 
598     if(feature != null)
599       return name;
600 
601     // check exons
602     List<String> transcriptNames = getTranscriptNames();
603     feature = getSplicedFeatures(name);
604 
605     if(feature != null)
606     {
607       for(int i=0; i<transcriptNames.size(); i++)
608       {
609         String transcriptName = (String)transcriptNames.get(i);
610         List<Feature> splicedSegments = getSplicedFeaturesOfTranscript(transcriptName);
611 
612         if(splicedSegments != null)
613         {
614           for(int j=0; j<splicedSegments.size(); j++)
615           {
616             Feature segment = splicedSegments.get(j);
617             try
618             {
619               String segmentName = (String)segment.getQualifierByName("ID").getValues().get(0);
620               if(name.equals(segmentName))
621                 return transcriptName;
622             }
623             catch(InvalidRelationException e)
624             {
625               // TODO Auto-generated catch block
626               e.printStackTrace();
627             }
628           }
629         }
630       }
631     }
632 
633     feature = getProtein(name);
634 
635     if(feature != null)
636     {
637       for(int i=0; i<transcriptNames.size(); i++)
638       {
639         String transcriptName = (String)transcriptNames.get(i);
640         Feature protein = getProteinOfTranscript(transcriptName);
641         try
642         {
643           String proteinsName = (String)protein.getQualifierByName("ID").getValues().get(0);
644           if(name.equals(proteinsName))
645             return transcriptName;
646         }
647         catch(InvalidRelationException e)
648         {
649           // TODO Auto-generated catch block
650           e.printStackTrace();
651         }
652       }
653     }
654 
655     // search children of all transcripts
656     List<Feature> transcripts = getTranscripts();
657     for(int i=0;i<transcripts.size(); i++)
658     {
659       Feature transcript = transcripts.get(i);
660       Set<Feature> children = getChildren(transcript);
661       Iterator<Feature> it = children.iterator();
662       while(it.hasNext())
663       {
664         Feature f = it.next();
665         if(name.equals(GeneUtils.getUniqueName(f)))
666           return GeneUtils.getUniqueName(transcript);
667       }
668     }
669 
670     return null;
671   }
672 
673   /**
674    * Return the protein feature of a transcipt.
675    * @param transcript_name
676    * @return
677    */
getProteinOfTranscript(final String transcript_name)678   public Feature getProteinOfTranscript(final String transcript_name)
679   {
680     if(proteins.containsKey(transcript_name))
681       return (Feature)proteins.get(transcript_name);;
682 
683     return null;
684   }
685 
686   /**
687    * Return the 3'UTR features of a transcriot as a <code>List</code>.
688    * @param transcript_name
689    * @return
690    */
get3UtrOfTranscript(final String transcript_name)691   public List<Feature> get3UtrOfTranscript(final String transcript_name)
692   {
693     if(three_prime_utr.containsKey(transcript_name))
694       return (List<Feature>)three_prime_utr.get(transcript_name);
695 
696     return null;
697   }
698 
699   /**
700    * Return the 5'UTR features of a transcriot as a <code>List</code>.
701    * @param transcript_name
702    * @return
703    */
get5UtrOfTranscript(final String transcript_name)704   public List<Feature> get5UtrOfTranscript(final String transcript_name)
705   {
706     if(five_prime_utr.containsKey(transcript_name))
707       return (List<Feature>)five_prime_utr.get(transcript_name);
708 
709     return null;
710   }
711 
712   /**
713    * Utility to determine if this is the first or only UTR, so that
714    * partial qualifiers can be added to the correct UTR feature.
715    * @param utrName
716    * @param isFwd
717    * @return
718    */
isFirstUtr(final String utrName, final boolean isFwd)719   public boolean isFirstUtr(final String utrName, final boolean isFwd)
720   {
721     try
722     {
723       Feature this5Utr = getFeatureFromHash(utrName, five_prime_utr);
724       if (this5Utr != null)
725       {
726         String transcript_name = getQualifier(this5Utr, "Parent");
727         List<Feature> utrs = get5UtrOfTranscript(transcript_name);
728         if (utrs.size() == 1)
729           return true;
730 
731         for (Feature utr : utrs)
732         {
733           if (isFwd && utr.getFirstBase() < this5Utr.getFirstBase())
734             return false;
735           else if (!isFwd && utr.getLastBase() > this5Utr.getLastBase())
736             return false;
737         }
738         return true;
739       }
740 
741 
742       Feature this3Utr = getFeatureFromHash(utrName, three_prime_utr);
743       if (this3Utr != null)
744       {
745         String transcript_name = getQualifier(this3Utr, "Parent");
746         List<Feature> utrs = get3UtrOfTranscript(transcript_name);
747         if (utrs.size() == 1)
748           return true;
749 
750         for (Feature utr : utrs)
751         {
752           if (!isFwd && utr.getFirstBase() < this3Utr.getFirstBase())
753             return false;
754           else if (isFwd && utr.getLastBase() > this3Utr.getLastBase())
755             return false;
756         }
757         return true;
758       }
759     }
760     catch(InvalidRelationException ire){}
761     return false;
762   }
763 
764   /**
765    * Return the other child features of a transcriot as a <code>List</code>.
766    * @param transcript_name
767    * @return
768    */
getOtherFeaturesOfTranscript(final String transcript_name)769   public List<Feature> getOtherFeaturesOfTranscript(final String transcript_name)
770   {
771     if(other_features.containsKey(transcript_name))
772       return other_features.get(transcript_name);
773     return null;
774   }
775 
776   /**
777    * Get a list of trancripts.
778    * @return
779    */
getTranscripts()780   public List<Feature> getTranscripts()
781   {
782     return transcripts;
783   }
784 
785 
786   /**
787    * Get a list of trancripts.
788    * @return
789    */
getTranscriptNames()790   private List<String> getTranscriptNames()
791   {
792     List<String> names = new Vector<String>();
793     for(int i=0; i<transcripts.size(); i++)
794     {
795       Feature f = (Feature)transcripts.get(i);
796       try
797       {
798         names.add( (String)f.getQualifierByName("ID").getValues().get(0) );
799       }
800       catch(InvalidRelationException e)
801       {
802         // TODO Auto-generated catch block
803         e.printStackTrace();
804       }
805 
806     }
807 
808     return names;
809   }
810 
811   /**
812    * Test if a name is already used in this gene model
813    * @param name
814    * @return
815    */
isUniqueName(final String name)816   private boolean isUniqueName(final String name)
817   {
818     if(isTranscript(name))
819       return false;
820     if(isSplicedFeatures(name))
821       return false;
822 
823     try
824     {
825       if(getFeatureFromHash(name, three_prime_utr) != null)
826         return false;
827       if(getFeatureFromHash(name, five_prime_utr) != null)
828         return false;
829       if(getFeatureFromHash(name, other_features) != null)
830         return false;
831 
832       final Enumeration<Feature> enum_pp = proteins.elements();
833       while(enum_pp.hasMoreElements())
834       {
835         final Feature pp = enum_pp.nextElement();
836         if( getQualifier(pp, "ID").equals(name) )
837           return false;
838       }
839 
840       if( getQualifier(getGene(), "ID").equals(name) )
841         return false;
842     }
843     catch(InvalidRelationException e)
844     {
845       e.printStackTrace();
846     }
847 
848     return true;
849   }
850 
851   /**
852    * Test if the name is a transcript in this gene model.
853    * @param feature_id
854    * @return true if a transcript
855    */
isTranscript(final String feature_id)856   public boolean isTranscript(final String feature_id)
857   {
858     try
859     {
860       for(int i=0; i<transcripts.size(); i++)
861       {
862         if(feature_id.equals(getQualifier((Feature)transcripts.get(i), "ID")))
863           return true;
864       }
865     }
866     catch(InvalidRelationException e)
867     {
868       // TODO Auto-generated catch block
869       e.printStackTrace();
870     }
871 
872     return false;
873   }
874 
875   /**
876    * Test if this is an exon of transcript.
877    * @param feature_id    exon feature
878    * @param transcript_id transcript feature
879    * @return
880    */
isSplicedFeatures(final String feature_id)881   private boolean isSplicedFeatures(final String feature_id)
882   {
883     List<Feature> splicedFeatures = new Vector<Feature>();
884     List<Feature> transcripts = getTranscripts();
885 
886     try
887     {
888       for(int i = 0; i < transcripts.size(); i++)
889       {
890         Feature transcript = (Feature) transcripts.get(i);
891         String transcript_id = getQualifier(transcript, "ID");
892         List<Feature> splicedSites = getSplicedFeaturesOfTranscript(transcript_id);
893         if(splicedSites != null)
894           splicedFeatures.addAll(splicedSites);
895       }
896 
897       if(splicedFeatures == null)
898         return false;
899 
900       for(int i=0; i<splicedFeatures.size(); i++)
901       {
902         GFFStreamFeature feature = (GFFStreamFeature)splicedFeatures.get(i);
903         RangeVector rv = feature.getLocation().getRanges();
904         for(int j=0; j<rv.size(); j++)
905         {
906           String this_feature_id = feature.getSegmentID((Range)rv.get(j));
907           if(feature_id.equals(this_feature_id))
908             return true;
909         }
910       }
911     }
912     catch(InvalidRelationException e)
913     {
914       e.printStackTrace();
915     }
916 
917     return false;
918   }
919 
920   /**
921    * Method to automatically generate ID's for transcripts
922    * @param transcript_key
923    * @return
924    */
autoGenerateTanscriptName(String transcript_key)925   public String autoGenerateTanscriptName(String transcript_key)
926   {
927     try
928     {
929       String name = getQualifier(getGene(), "ID");
930       int auto = 1;
931       while( isTranscript( name + "." + auto ) &&
932              auto < 50)
933         auto++;
934       return name + "." + auto;
935     }
936     catch(InvalidRelationException e)
937     {
938       // TODO Auto-generated catch block
939       e.printStackTrace();
940     }
941     return null;
942   }
943 
944   /**
945    * Generate new names for exon features for this gene model
946    * @param transcript_id
947    * @return
948    */
autoGenerateSplicedFeatureName(final String transcript_id)949   public String autoGenerateSplicedFeatureName(final String transcript_id)
950   {
951     try
952     {
953       int index = transcript_id.lastIndexOf('.');
954       if(index == -1)
955         index = transcript_id.lastIndexOf(':');
956       int transcript_number = -1;
957       String name = (String)getGene().getQualifierByName("ID").getValues().get(0);
958 
959       if(index > -1)
960       {
961         try
962         {
963           transcript_number = Integer.parseInt(transcript_id.substring(index+1));
964         }
965         catch(NumberFormatException nfe)
966         {
967           transcript_number = -1;
968         }
969       }
970 
971       if(transcript_number < 1)
972       {
973         for(transcript_number = 0; transcript_number <= transcripts.size();
974             transcript_number++)
975         {
976           Feature transcript = (Feature) transcripts.get(transcript_number);
977           if(transcript_id.equals(getQualifier(transcript, "ID")))
978             break;
979         }
980       }
981       if(transcript_number == 0)
982         name = name + ":exon:";
983       else
984         name = name + "." + transcript_number + ":exon:";
985 
986       int auto = 1;
987       while( isSplicedFeatures(name + auto) && auto < 50)
988         auto++;
989       return name + auto;
990     }
991     catch(InvalidRelationException e)
992     {
993       e.printStackTrace();
994     }
995     return null;
996   }
997 
998 
999   /**
1000    * Generate new names for peptide features for this gene model
1001    * @param transcript_id
1002    * @return
1003    */
autoGeneratePepName(final String transcript_id)1004   public String autoGeneratePepName(final String transcript_id)
1005   {
1006     try
1007     {
1008       int index = transcript_id.lastIndexOf('.');
1009       if(index == -1)
1010         index = transcript_id.lastIndexOf(':');
1011       int transcript_number = -1;
1012 
1013       if(index > -1)
1014       {
1015         try
1016         {
1017           transcript_number = Integer.parseInt(transcript_id.substring(index+1));
1018         }
1019         catch(NumberFormatException nfe)
1020         {
1021           transcript_number = -1;
1022         }
1023       }
1024 
1025       if(transcript_number < 1)
1026       {
1027         for(transcript_number = 1; transcript_number <= transcripts.size();
1028             transcript_number++)
1029         {
1030           Feature transcript = (Feature) transcripts.get(transcript_number - 1);
1031           if(transcript_id.equals(getQualifier(transcript, "ID")))
1032             break;
1033         }
1034       }
1035 
1036       String name = (String)getGene().getQualifierByName("ID").getValues().get(0);
1037 
1038       if(isUniqueName(name+ "." + transcript_number + ":pep"))
1039         return name+ "." + transcript_number + ":pep";
1040       else
1041         return name + "." + transcript_number + "a:pep";
1042     }
1043     catch(InvalidRelationException e)
1044     {
1045       e.printStackTrace();
1046     }
1047     return null;
1048   }
1049 
1050   /**
1051    * Generate new names for generic region features for this gene model
1052    * @param transcript_id
1053    * @return
1054    */
autoGenerateFeatureName(final String transcript_id, final String keyName)1055   public String autoGenerateFeatureName(final String transcript_id,
1056                                         final String keyName)
1057   {
1058     String featureName = "";
1059     try
1060     {
1061       featureName =
1062         (String)getGene().getQualifierByName("ID").getValues().get(0);
1063     }
1064     catch(InvalidRelationException e){}
1065 
1066     final Pattern pattern = Pattern.compile("\\d+$");
1067     final Matcher matcher = pattern.matcher(transcript_id);
1068     if(matcher.find())
1069       featureName = featureName+"."+matcher.group()+":"+keyName;
1070     else
1071       featureName = featureName+":"+keyName;
1072 
1073     if(!isUniqueName(featureName))
1074     {
1075       int num = 1;
1076       while(!isUniqueName(featureName + ":" + num) && num < 100)
1077         num++;
1078       featureName = featureName + ":" + num;
1079     }
1080 
1081     return featureName;
1082   }
1083 
1084   /**
1085    * Search for the feature with a particular uniquename
1086    * @param name  uniquename
1087    * @return
1088    */
getFeatureFromId(final String name)1089   public Object getFeatureFromId(final String name)
1090   {
1091     Object feature = null;
1092 
1093     // check gene
1094     try
1095     {
1096       final String uniquename = getQualifier(gene, "ID");
1097 
1098       if(uniquename.equals(name))
1099         return gene;
1100     }
1101     catch(InvalidRelationException e)
1102     {
1103       e.printStackTrace();
1104     }
1105 
1106     // check transcript
1107     StringVector sv = new StringVector();
1108     sv.add(name);
1109 
1110     feature = containsTranscript(sv);
1111 
1112     if(feature != null)
1113       return feature;
1114 
1115     // check exons
1116     feature = getSplicedFeatures(name);
1117 
1118     if(feature != null)
1119       return feature;
1120 
1121     feature = getProtein(name);
1122 
1123     if(feature != null)
1124       return feature;
1125 
1126     try
1127     {
1128       feature = getFeatureFromHash(name, three_prime_utr);
1129       if(feature != null)
1130         return feature;
1131 
1132       feature = getFeatureFromHash(name, five_prime_utr);
1133       if(feature != null)
1134         return feature;
1135 
1136       feature = getFeatureFromHash(name, other_features);
1137     }
1138     catch(InvalidRelationException e)
1139     {
1140       e.printStackTrace();
1141     }
1142 
1143     return feature;
1144   }
1145 
1146   /**
1147    * Routine to look for a exon with a particular
1148    * uniquename
1149    * @param name
1150    * @return
1151    */
getSplicedFeatures(final String name)1152   private Feature getSplicedFeatures(final String name)
1153   {
1154     Enumeration<List<Feature>> enum_exons = splicedFeatures.elements();
1155     try
1156     {
1157       while(enum_exons.hasMoreElements())
1158       {
1159         List<Feature> exons = enum_exons.nextElement();
1160 
1161         for(int i=0; i<exons.size(); i++)
1162         {
1163           String uniquename = getQualifier((Feature)exons.get(i), "ID");
1164 
1165           if(uniquename.equals(name))
1166             return (Feature)exons.get(i);
1167         }
1168       }
1169     }
1170     catch(InvalidRelationException e)
1171     {
1172       e.printStackTrace();
1173     }
1174     return null;
1175   }
1176 
getProtein(final String id)1177   private Feature getProtein(final String id)
1178   {
1179     Enumeration<Feature> enum_proteins = proteins.elements();
1180     try
1181     {
1182       while(enum_proteins.hasMoreElements())
1183       {
1184         Feature protein = enum_proteins.nextElement();
1185         if(getQualifier(protein, "ID").equals(id))
1186           return protein;
1187       }
1188     }
1189     catch(InvalidRelationException e)
1190     {
1191       e.printStackTrace();
1192     }
1193     return null;
1194   }
1195 
1196   /**
1197    * Search for a feature uniquename
1198    * @param id
1199    * @param UTR
1200    * @return
1201    * @throws InvalidRelationException
1202    */
getFeatureFromHash(final String id, final Hashtable<String, List<Feature>> UTR)1203   private Feature getFeatureFromHash
1204                          (final String id,
1205                          final Hashtable<String, List<Feature>> UTR)
1206           throws InvalidRelationException
1207   {
1208     Enumeration<List<Feature>> enum_utr = UTR.elements();
1209 
1210     while(enum_utr.hasMoreElements())
1211     {
1212       List<Feature> utrs = enum_utr.nextElement();
1213 
1214       for(int i=0; i<utrs.size(); i++)
1215       {
1216         Feature utr = utrs.get(i);
1217         if(getQualifier(utr, "ID").equals(id))
1218           return utr;
1219       }
1220     }
1221 
1222     return null;
1223   }
1224 
1225   /**
1226    * Utility for get feature ID and Parent qualifiers.
1227    * @param feature
1228    * @param name
1229    * @return
1230    * @throws InvalidRelationException
1231    */
getQualifier(final Feature feature, final String name)1232   public String getQualifier(final Feature feature,
1233                               final String name)
1234           throws InvalidRelationException
1235   {
1236     Qualifier qualifier = feature.getQualifierByName(name);
1237     if(qualifier == null)
1238       return null;
1239 
1240     return (String)(qualifier.getValues().get(0));
1241   }
1242 
1243   /**
1244    * Get the srcfeature residue length
1245    * @return
1246    */
getSeqlen()1247   public int getSeqlen()
1248   {
1249     return seqlen;
1250   }
1251 
1252   /**
1253    * Set the srcfeature residue length
1254    * @param seqlen
1255    */
setSeqlen(int seqlen)1256   public void setSeqlen(int seqlen)
1257   {
1258     this.seqlen = seqlen;
1259   }
1260 
getSrcfeature_id()1261   public int getSrcfeature_id()
1262   {
1263     return srcfeature_id;
1264   }
1265 
setSrcfeature_id(int srcfeature_id)1266   public void setSrcfeature_id(int srcfeature_id)
1267   {
1268     this.srcfeature_id = srcfeature_id;
1269   }
1270 
getSplicedFeatures()1271   public Hashtable<String, List<Feature>> getSplicedFeatures()
1272   {
1273     return splicedFeatures;
1274   }
1275 
1276   /**
1277    * Get the nucleotide location for a featureloc in amino acid
1278    * coordinates.
1279    * @param proteinFeature
1280    * @param featureLocToProtein
1281    * @return
1282    * @throws LocationParseException
1283    */
getNucLocation(final Feature proteinFeature, final FeatureLoc featureLocToProtein)1284   public Location getNucLocation(final Feature proteinFeature,
1285                                  final FeatureLoc featureLocToProtein)
1286          throws LocationParseException
1287   {
1288     String transcriptName = getTranscriptFromName(
1289                          GeneUtils.getUniqueName(proteinFeature));
1290     List<Feature> spliced = getSplicedFeaturesOfTranscript(transcriptName);
1291     if(spliced == null)
1292       return null;
1293 
1294     RangeVector ranges = new RangeVector();
1295     for(int i=0; i<spliced.size(); i++)
1296     {
1297       Feature f = spliced.get(i);
1298       if(f.getKey().getKeyString().equals(DatabaseDocument.EXONMODEL))
1299         ranges.addAll(f.getLocation().getRanges());
1300     }
1301 
1302     int start = proteinFeature.getLocation().getFirstBase();
1303     int fmin = start+(featureLocToProtein.getFmin()*3)+1;
1304     int fmax = start+(featureLocToProtein.getFmax()*3);
1305 
1306     int len = proteinFeature.getEntry().getSequence().length();
1307     if(fmax > len)
1308       fmax = len;
1309 
1310     if(ranges.size()>1)
1311     {
1312       Collections.sort(ranges, new RangeComparator());
1313 
1314       for(int i=0;i<ranges.size()-1; i++)
1315       {
1316         Range range1 = (Range) ranges.get(i);
1317         Range range2 = (Range) ranges.get(i+1);
1318         if(fmin > range1.getEnd())
1319           fmin += range2.getStart()-range1.getEnd();
1320         if(fmax > range1.getEnd())
1321           fmax += range2.getStart()-range1.getEnd();
1322       }
1323     }
1324 
1325     Location location;
1326     if(proteinFeature.getLocation().isComplement())
1327       location = new Location("complement("+fmin+".."+fmax+")");
1328     else
1329       location = new Location(fmin+".."+fmax);
1330     return location;
1331   }
1332 
1333 
1334   class RangeComparator implements Comparator<Range>
1335   {
compare(Range o1, Range o2)1336     public int compare(Range o1, Range o2)
1337     {
1338       int start1 = o1.getStart();
1339       int start2 = o2.getStart();
1340       return start1-start2;
1341     }
1342   }
1343 }