1 #ifndef NGALIGN_BANDED_ALIGNER__HPP 2 #define NGALIGN_BANDED_ALIGNER__HPP 3 4 /* $Id: banded_aligner.hpp 204783 2010-09-10 16:21:43Z dicuccio $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Authors: Nathan Bouk 30 * 31 * File Description: 32 * 33 */ 34 35 #include <corelib/ncbistd.hpp> 36 #include <corelib/ncbiobj.hpp> 37 #include <objects/seqloc/Na_strand.hpp> 38 39 #include <objects/seqloc/Seq_loc.hpp> 40 #include <objects/seqloc/Seq_id.hpp> 41 #include <objmgr/scope.hpp> 42 #include <algo/blast/api/blast_types.hpp> 43 #include <algo/blast/api/bl2seq.hpp> 44 #include <algo/blast/api/blast_options_handle.hpp> 45 #include <algo/blast/api/blast_nucl_options.hpp> 46 #include <objects/seqalign/Seq_align.hpp> 47 #include <objects/seqalign/Seq_align_set.hpp> 48 #include <objects/seqalign/Dense_seg.hpp> 49 50 #include <algo/align/ngalign/ngalign_interface.hpp> 51 52 53 BEGIN_NCBI_SCOPE 54 55 BEGIN_SCOPE(objects) 56 class CScope; 57 class CSeq_align; 58 class CSeq_align_set; 59 class CSeq_id; 60 class CDense_seg; 61 END_SCOPE(objects) 62 63 64 65 66 class CInstance : public CObject { 67 public: 68 69 CInstance(const CRef<objects::CSeq_align> Align); 70 CInstance(const objects::CSeq_align_set& AlignSet); 71 72 void MergeIn(CRef<objects::CSeq_align> Align); 73 74 bool IsAlignmentContained(const objects::CSeq_align& Align) const; 75 int GapDistance(const objects::CSeq_align& Align) const; 76 77 double SubjToQueryRatio() const; 78 TSeqPos QueryLength() const; 79 80 objects::CSeq_interval Query; 81 objects::CSeq_interval Subject; 82 objects::CSeq_align_set Alignments; 83 }; 84 85 86 class CInstancedAligner : public IAlignmentFactory 87 { 88 public: 89 CInstancedAligner(int TimeOutSeconds,float MaxRatio,float MinPctCoverage,int Threshold)90 CInstancedAligner(int TimeOutSeconds, float MaxRatio, float MinPctCoverage, int Threshold) 91 : m_TimeOutSeconds(TimeOutSeconds), m_MaxRatio(MaxRatio), 92 m_MinPctCoverage(MinPctCoverage), m_Threshold(Threshold), 93 m_Match(2), m_Mismatch(-3), m_GapOpen(-100), m_GapExtend(-1) { ; } 94 95 // Defaults to +2, -3, -100, -1 SetPathValues(int Match,int Mismatch,int GapOpen,int GapExtend)96 void SetPathValues(int Match, int Mismatch, int GapOpen, int GapExtend) 97 { 98 m_Match = Match; 99 m_Mismatch = Mismatch; 100 m_GapOpen = GapOpen; 101 m_GapExtend = GapExtend; 102 } 103 GetName() const104 string GetName() const { return "instanced_mm_aligner"; } 105 106 TAlignResultsRef GenerateAlignments(objects::CScope& Scope, 107 ISequenceSet* QuerySet, 108 ISequenceSet* SubjectSet, 109 TAlignResultsRef AccumResults); 110 111 protected: 112 113 114 private: 115 116 int m_TimeOutSeconds; 117 float m_MaxRatio; 118 float m_MinPctCoverage; 119 int m_Threshold; 120 121 int m_Match, m_Mismatch, m_GapOpen, m_GapExtend; 122 123 124 void x_RunAligner(objects::CScope& Scope, 125 CQuerySet& QueryAligns, 126 TAlignResultsRef Results); 127 128 CRef<objects::CDense_seg> x_RunMMGlobal(const objects::CSeq_id& QueryId, 129 const objects::CSeq_id& SubjectId, 130 objects::ENa_strand Strand, 131 TSeqPos QueryStart, 132 TSeqPos QueryStop, 133 TSeqPos SubjectStart, 134 TSeqPos SubjectStop, 135 objects::CScope& Scope); 136 137 CRef<objects::CSeq_align_set> x_RunCleanup(const objects::CSeq_align_set& AlignSet, 138 objects::CScope& Scope); 139 140 void x_GetCleanupInstances(CQuerySet& QueryAligns, objects::CScope& Scope, 141 vector<CRef<CInstance> >& Instances); 142 void x_GetDistanceInstances(CQuerySet& QueryAligns, objects::CScope& Scope, 143 vector<CRef<CInstance> >& Instances); 144 145 void x_FilterInstances(vector<CRef<CInstance> >& Instances, double MaxRatio); 146 147 bool x_MinCoverageCheck(const CQuerySet& QueryAligns); 148 }; 149 150 151 152 153 //TSeqPos x_CalcQueryCoverage(TAlignSetRef Alignments, int Row, objects::CScope& Scope); 154 155 156 157 158 159 END_NCBI_SCOPE 160 161 #endif 162