1 #ifndef __GNOMON__ANNOT__HPP
2 #define __GNOMON__ANNOT__HPP
3 
4 /*  $Id: annot.hpp 465881 2015-04-27 14:27:13Z souvorov $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Vyacheslav Chetvernin
30  *
31  * File Description:
32  *
33  * Builds annotation models out of chained alignments:
34  * selects good chains as alternatively spliced genes,
35  * selects good chains inside other chains introns,
36  * other chains filtered to leave one chain per placement,
37  * gnomon is run to improve chains and predict models in regions w/o chains
38  *
39  */
40 
41 #include <algo/gnomon/gnomon_model.hpp>
42 #include <algo/gnomon/gnomon.hpp>
43 #include <algo/gnomon/chainer.hpp>
44 
45 #include <objects/seqloc/Seq_loc.hpp>
46 
47 BEGIN_NCBI_SCOPE
48 
49 class CArgDescriptions;
50 class CArgs;
51 
52 BEGIN_SCOPE(gnomon)
53 
54 
55 class NCBI_XALGOGNOMON_EXPORT CGeneSelector {
56 public:
57     CGeneSelector();
58 
59     /// Filters genes, excluding genes flagged to be skipped.
60     ///
61     /// Results are pushed to the back of the specifed output container.
62     ///
63     /// @param chains Input list of models, which are not modified.
64     /// @param bad_aligns Output list of skipped models.
65     /// @param dest Output list of models which were not skipped.
66     /// @return List of models which were not skipped.
67     /// @warning The models' gene IDs are renumbered.
68     ///
69     /// @see SelectGenes(TGeneModelList& chains, TGeneModelList& bad_aligns)
70     void FilterGenes(TGeneModelList& chains, TGeneModelList& bad_aligns,
71                      TGeneModelList& dest);
72 
73     /// Filters genes, excluding genes flagged to be skipped.
74     ///
75     /// Results are returned in a new list.
76     ///
77     /// @param chains Input list of models, which are not modified.
78     /// @param bad_aligns Output list of skipped models.
79     /// @return List of models which were not skipped.
80     /// @warning The models' gene IDs are renumbered.
81     ///
82     /// @see SelectGenes(TGeneModelList& chains, TGeneModelList& bad_aligns)
83     TGeneModelList FilterGenes(TGeneModelList& chains, TGeneModelList& bad_aligns);
84 
85 private:
86 };
87 
88 class NCBI_XALGOGNOMON_EXPORT CGnomonAnnotator : public CGnomonAnnotator_Base {
89 public:
90     CGnomonAnnotator();
91     ~CGnomonAnnotator();
92 
GnomonNeeded() const93     bool GnomonNeeded() const { return do_gnomon; }
94 
95     void Predict(TGeneModelList& models, TGeneModelList& bad_aligns);
96     void Predict(TGeneModelList& models, TGeneModelList& bad_aligns, TSignedSeqPos left, TSignedSeqPos right);
97 
98 public:
99     int mincontig;
100     int minCdsLen;
101 
102 private:
103     void RemoveShortHolesAndRescore(TGeneModelList chains);
104     void Predict(TSignedSeqPos llimit, TSignedSeqPos rlimit, TGeneModelList::const_iterator il, TGeneModelList::const_iterator ir,
105                  TGeneModelList& models,
106                  bool leftmostwall, bool rightmostwall, bool leftmostanchor, bool rightmostanchor,
107                  TGeneModelList& bad_aligns);
108 
109     double TryWithoutObviouslyBadAlignments(TGeneModelList& aligns, TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns,
110                                             bool leftwall, bool rightwall, bool leftanchor, bool rightanchor,
111                                             TSignedSeqPos left, TSignedSeqPos right,
112                                             TSignedSeqRange& tested_range);
113     double TryToEliminateOneAlignment(TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns,
114                                       bool leftwall, bool rightwall, bool leftanchor, bool rightanchor);
115     double TryToEliminateAlignmentsFromTail(TGeneModelList& suspect_aligns, TGeneModelList& bad_aligns,
116                                             bool leftwall, bool rightwall, bool leftanchor, bool rightanchor);
117     double ExtendJustThisChain(CGeneModel& chain, TSignedSeqPos left, TSignedSeqPos right);
118 
119     bool do_gnomon;
120     int window;
121     int margin;
122     bool wall;
123     double mpp;
124     double nonconsensp;
125 
126     friend class CGnomonAnnotatorArgUtil;
127 };
128 
129 struct RemoveTrailingNs : public TransformFunction {
130     RemoveTrailingNs(const CResidueVec& seq);
131 
132     virtual void transform_model(CGeneModel& a);
133 private:
134     const CResidueVec& seq;
135 };
136 
137 class CModelCompare {
138 public:
139     static bool CanBeConnectedIntoOne(const CGeneModel& a, const CGeneModel& b);
140     static size_t CountCommonSplices(const CGeneModel& a, const CGeneModel& b);
141     static bool AreSimilar(const CGeneModel& a, const CGeneModel& b, int tolerance);
142     static bool BadOverlapTest(const CGeneModel& a, const CGeneModel& b);
143     static bool RangeNestedInIntron(TSignedSeqRange r, const CGeneModel& algn, bool check_in_holes = true);
144     static bool HaveCommonExonOrIntron(const CGeneModel& a, const CGeneModel& b);
145 };
146 
147 class NCBI_XALGOGNOMON_EXPORT CGnomonAnnotatorArgUtil {
148 public:
149     static void SetupArgDescriptions(CArgDescriptions* arg_desc);
150     static void ReadArgs(CGnomonAnnotator* annot, const CArgs& args);
151 };
152 
153 END_SCOPE(gnomon)
154 END_NCBI_SCOPE
155 
156 #endif  // __GNOMON__ANNOT__HPP
157 
158