1 #ifndef NGALIGN_BANDED_ALIGNER__HPP
2 #define NGALIGN_BANDED_ALIGNER__HPP
3 
4 /*  $Id: banded_aligner.hpp 204783 2010-09-10 16:21:43Z dicuccio $
5  * ===========================================================================
6  *
7  *                            PUBLIC DOMAIN NOTICE
8  *               National Center for Biotechnology Information
9  *
10  *  This software/database is a "United States Government Work" under the
11  *  terms of the United States Copyright Act.  It was written as part of
12  *  the author's official duties as a United States Government employee and
13  *  thus cannot be copyrighted.  This software/database is freely available
14  *  to the public for use. The National Library of Medicine and the U.S.
15  *  Government have not placed any restriction on its use or reproduction.
16  *
17  *  Although all reasonable efforts have been taken to ensure the accuracy
18  *  and reliability of the software and data, the NLM and the U.S.
19  *  Government do not and cannot warrant the performance or results that
20  *  may be obtained by using this software or data. The NLM and the U.S.
21  *  Government disclaim all warranties, express or implied, including
22  *  warranties of performance, merchantability or fitness for any particular
23  *  purpose.
24  *
25  *  Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors:  Nathan Bouk
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbiobj.hpp>
37 #include <objects/seqloc/Na_strand.hpp>
38 
39 #include <objects/seqloc/Seq_loc.hpp>
40 #include <objects/seqloc/Seq_id.hpp>
41 #include <objmgr/scope.hpp>
42 #include <algo/blast/api/blast_types.hpp>
43 #include <algo/blast/api/bl2seq.hpp>
44 #include <algo/blast/api/blast_options_handle.hpp>
45 #include <algo/blast/api/blast_nucl_options.hpp>
46 #include <objects/seqalign/Seq_align.hpp>
47 #include <objects/seqalign/Seq_align_set.hpp>
48 #include <objects/seqalign/Dense_seg.hpp>
49 
50 #include <algo/align/ngalign/ngalign_interface.hpp>
51 
52 
53 BEGIN_NCBI_SCOPE
54 
55 BEGIN_SCOPE(objects)
56     class CScope;
57     class CSeq_align;
58     class CSeq_align_set;
59     class CSeq_id;
60     class CDense_seg;
61 END_SCOPE(objects)
62 
63 
64 
65 
66 class CInstance : public CObject {
67 public:
68 
69     CInstance(const CRef<objects::CSeq_align> Align);
70     CInstance(const objects::CSeq_align_set& AlignSet);
71 
72     void MergeIn(CRef<objects::CSeq_align> Align);
73 
74     bool IsAlignmentContained(const objects::CSeq_align& Align) const;
75     int GapDistance(const objects::CSeq_align& Align) const;
76 
77     double SubjToQueryRatio() const;
78     TSeqPos QueryLength() const;
79 
80     objects::CSeq_interval  Query;
81     objects::CSeq_interval  Subject;
82     objects::CSeq_align_set Alignments;
83 };
84 
85 
86 class CInstancedAligner : public IAlignmentFactory
87 {
88 public:
89 
CInstancedAligner(int TimeOutSeconds,float MaxRatio,float MinPctCoverage,int Threshold)90     CInstancedAligner(int TimeOutSeconds, float MaxRatio, float MinPctCoverage, int Threshold)
91         : m_TimeOutSeconds(TimeOutSeconds), m_MaxRatio(MaxRatio),
92           m_MinPctCoverage(MinPctCoverage), m_Threshold(Threshold),
93           m_Match(2), m_Mismatch(-3), m_GapOpen(-100), m_GapExtend(-1) { ; }
94 
95     // Defaults to +2, -3, -100, -1
SetPathValues(int Match,int Mismatch,int GapOpen,int GapExtend)96     void SetPathValues(int Match, int Mismatch, int GapOpen, int GapExtend)
97     {
98         m_Match = Match;
99         m_Mismatch = Mismatch;
100         m_GapOpen = GapOpen;
101         m_GapExtend = GapExtend;
102     }
103 
GetName() const104     string GetName() const { return "instanced_mm_aligner"; }
105 
106     TAlignResultsRef GenerateAlignments(objects::CScope& Scope,
107                                         ISequenceSet* QuerySet,
108                                         ISequenceSet* SubjectSet,
109                                         TAlignResultsRef AccumResults);
110 
111 protected:
112 
113 
114 private:
115 
116     int m_TimeOutSeconds;
117     float m_MaxRatio;
118     float m_MinPctCoverage;
119     int m_Threshold;
120 
121     int m_Match, m_Mismatch, m_GapOpen, m_GapExtend;
122 
123 
124     void x_RunAligner(objects::CScope& Scope,
125                       CQuerySet& QueryAligns,
126                       TAlignResultsRef Results);
127 
128     CRef<objects::CDense_seg> x_RunMMGlobal(const objects::CSeq_id& QueryId,
129                                             const objects::CSeq_id& SubjectId,
130                                             objects::ENa_strand Strand,
131                                             TSeqPos QueryStart,
132                                             TSeqPos QueryStop,
133                                             TSeqPos SubjectStart,
134                                             TSeqPos SubjectStop,
135                                             objects::CScope& Scope);
136 
137     CRef<objects::CSeq_align_set> x_RunCleanup(const objects::CSeq_align_set& AlignSet,
138                                                objects::CScope& Scope);
139 
140     void x_GetCleanupInstances(CQuerySet& QueryAligns, objects::CScope& Scope,
141                         vector<CRef<CInstance> >& Instances);
142     void x_GetDistanceInstances(CQuerySet& QueryAligns, objects::CScope& Scope,
143                         vector<CRef<CInstance> >& Instances);
144 
145     void x_FilterInstances(vector<CRef<CInstance> >& Instances, double MaxRatio);
146 
147     bool x_MinCoverageCheck(const CQuerySet& QueryAligns);
148 };
149 
150 
151 
152 
153 //TSeqPos x_CalcQueryCoverage(TAlignSetRef Alignments, int Row, objects::CScope& Scope);
154 
155 
156 
157 
158 
159 END_NCBI_SCOPE
160 
161 #endif
162