1 #ifndef __GNOMON__ANNOT__HPP 2 #define __GNOMON__ANNOT__HPP 3 4 /* $Id: annot.hpp 465881 2015-04-27 14:27:13Z souvorov $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Vyacheslav Chetvernin 30 * 31 * File Description: 32 * 33 * Builds annotation models out of chained alignments: 34 * selects good chains as alternatively spliced genes, 35 * selects good chains inside other chains introns, 36 * other chains filtered to leave one chain per placement, 37 * gnomon is run to improve chains and predict models in regions w/o chains 38 * 39 */ 40 41 #include <algo/gnomon/gnomon_model.hpp> 42 #include <algo/gnomon/gnomon.hpp> 43 #include <algo/gnomon/chainer.hpp> 44 45 #include <objects/seqloc/Seq_loc.hpp> 46 47 BEGIN_NCBI_SCOPE 48 49 class CArgDescriptions; 50 class CArgs; 51 52 BEGIN_SCOPE(gnomon) 53 54 55 class NCBI_XALGOGNOMON_EXPORT CGeneSelector { 56 public: 57 CGeneSelector(); 58 59 /// Filters genes, excluding genes flagged to be skipped. 60 /// 61 /// Results are pushed to the back of the specifed output container. 62 /// 63 /// @param chains Input list of models, which are not modified. 64 /// @param bad_aligns Output list of skipped models. 65 /// @param dest Output list of models which were not skipped. 66 /// @return List of models which were not skipped. 67 /// @warning The models' gene IDs are renumbered. 68 /// 69 /// @see SelectGenes(TGeneModelList& chains, TGeneModelList& bad_aligns) 70 void FilterGenes(TGeneModelList& chains, TGeneModelList& bad_aligns, 71 TGeneModelList& dest); 72 73 /// Filters genes, excluding genes flagged to be skipped. 74 /// 75 /// Results are returned in a new list. 76 /// 77 /// @param chains Input list of models, which are not modified. 78 /// @param bad_aligns Output list of skipped models. 79 /// @return List of models which were not skipped. 80 /// @warning The models' gene IDs are renumbered. 81 /// 82 /// @see SelectGenes(TGeneModelList& chains, TGeneModelList& bad_aligns) 83 TGeneModelList FilterGenes(TGeneModelList& chains, TGeneModelList& bad_aligns); 84 85 private: 86 }; 87 88 class NCBI_XALGOGNOMON_EXPORT CGnomonAnnotator : public CGnomonAnnotator_Base { 89 public: 90 CGnomonAnnotator(); 91 ~CGnomonAnnotator(); 92 GnomonNeeded() const93 bool GnomonNeeded() const { return do_gnomon; } 94 95 void Predict(TGeneModelList& models, TGeneModelList& bad_aligns); 96 void Predict(TGeneModelList& models, TGeneModelList& bad_aligns, TSignedSeqPos left, TSignedSeqPos right); 97 98 public: 99 int mincontig; 100 int minCdsLen; 101 102 private: 103 void RemoveShortHolesAndRescore(TGeneModelList chains); 104 void Predict(TSignedSeqPos llimit, TSignedSeqPos rlimit, TGeneModelList::const_iterator il, TGeneModelList::const_iterator ir, 105 TGeneModelList& models, 106 bool leftmostwall, bool rightmostwall, bool leftmostanchor, bool rightmostanchor, 107 TGeneModelList& bad_aligns); 108 109 double TryWithoutObviouslyBadAlignments(TGeneModelList& aligns, TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns, 110 bool leftwall, bool rightwall, bool leftanchor, bool rightanchor, 111 TSignedSeqPos left, TSignedSeqPos right, 112 TSignedSeqRange& tested_range); 113 double TryToEliminateOneAlignment(TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns, 114 bool leftwall, bool rightwall, bool leftanchor, bool rightanchor); 115 double TryToEliminateAlignmentsFromTail(TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns, 116 bool leftwall, bool rightwall, bool leftanchor, bool rightanchor); 117 double ExtendJustThisChain(CGeneModel& chain, TSignedSeqPos left, TSignedSeqPos right); 118 119 bool do_gnomon; 120 int window; 121 int margin; 122 bool wall; 123 double mpp; 124 double nonconsensp; 125 126 friend class CGnomonAnnotatorArgUtil; 127 }; 128 129 struct RemoveTrailingNs : public TransformFunction { 130 RemoveTrailingNs(const CResidueVec& seq); 131 132 virtual void transform_model(CGeneModel& a); 133 private: 134 const CResidueVec& seq; 135 }; 136 137 class CModelCompare { 138 public: 139 static bool CanBeConnectedIntoOne(const CGeneModel& a, const CGeneModel& b); 140 static size_t CountCommonSplices(const CGeneModel& a, const CGeneModel& b); 141 static bool AreSimilar(const CGeneModel& a, const CGeneModel& b, int tolerance); 142 static bool BadOverlapTest(const CGeneModel& a, const CGeneModel& b); 143 static bool RangeNestedInIntron(TSignedSeqRange r, const CGeneModel& algn, bool check_in_holes = true); 144 static bool HaveCommonExonOrIntron(const CGeneModel& a, const CGeneModel& b); 145 }; 146 147 class NCBI_XALGOGNOMON_EXPORT CGnomonAnnotatorArgUtil { 148 public: 149 static void SetupArgDescriptions(CArgDescriptions* arg_desc); 150 static void ReadArgs(CGnomonAnnotator* annot, const CArgs& args); 151 }; 152 153 END_SCOPE(gnomon) 154 END_NCBI_SCOPE 155 156 #endif // __GNOMON__ANNOT__HPP 157 158