1 #ifndef OBJECTS_ALNMGR___ALNSEQ__HPP 2 #define OBJECTS_ALNMGR___ALNSEQ__HPP 3 4 /* $Id: alnseq.hpp 310697 2011-07-05 14:21:21Z grichenk $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Kamen Todorov, NCBI 30 * 31 * File Description: 32 * Alignment sequences 33 * 34 */ 35 36 37 #include <objects/seqloc/Seq_id.hpp> 38 #include <objmgr/seq_vector.hpp> 39 #include <objtools/alnmgr/alnexception.hpp> 40 41 42 BEGIN_NCBI_SCOPE 43 44 BEGIN_objects_SCOPE // namespace ncbi::objects:: 45 46 47 class CAlnMixSeq; 48 class CAlnMixStarts; 49 class CAlnMixSegment; 50 class CAlnMixMatch; 51 class CAlnMixMerger; 52 class CBioseq_Handle; 53 class CScope; 54 class CDense_seg; 55 56 57 class NCBI_XALNMGR_EXPORT CAlnMixSequences : public CObject 58 { 59 public: 60 61 // Constructors 62 CAlnMixSequences(void); 63 CAlnMixSequences(CScope& scope); 64 65 typedef vector<CRef<CAlnMixSeq> > TSeqs; 66 Get() const67 const TSeqs& Get () const { return m_Seqs; }; Set()68 TSeqs& Set () { return m_Seqs; }; 69 70 enum EAddFlags { 71 // Determine score of each aligned segment in the process of mixing 72 // (only makes sense if scope was provided at construction time) 73 fCalcScore = 0x01, 74 75 // Force translation of nucleotide rows 76 // This will result in an output Dense-seg that has Widths, 77 // no matter if the whole alignment consists of nucleotides only. 78 fForceTranslation = 0x02, 79 80 // Used for mapping sequence to itself 81 fPreserveRows = 0x04 82 }; 83 typedef int TAddFlags; // binary OR of EMergeFlags 84 85 void Add (const CDense_seg& ds, TAddFlags flags = 0); 86 87 88 // Sorting algirithms 89 void SortByScore(); 90 void SortByChainScore(); 91 92 93 // Rows-related methods 94 void BuildRows(); 95 void InitRowsStartIts(); 96 void InitExtraRowsStartIts(); 97 void RowsStartItsContsistencyCheck(size_t match_idx); 98 99 private: 100 friend class CAlnMix; 101 friend class CAlnMixMatches; 102 friend class CAlnMixSegments; 103 friend class CAlnMixMerger; 104 105 typedef map<CBioseq_Handle, CRef<CAlnMixSeq> > TBioseqHandleMap; 106 107 // CRef<Seq-id> comparison predicate 108 struct SSeqIds { 109 bool operator ()CAlnMixSequences::SSeqIds110 operator() (const CRef<CSeq_id>& id1, const CRef<CSeq_id>& id2) const { 111 return (*id1 < *id2); 112 } 113 }; 114 typedef map<CRef<CSeq_id>, CRef<CAlnMixSeq>, SSeqIds> TSeqIdMap; 115 116 static bool x_CompareScores (const CRef<CAlnMixSeq>& seq1, 117 const CRef<CAlnMixSeq>& seq2); 118 static bool x_CompareChainScores(const CRef<CAlnMixSeq>& seq1, 119 const CRef<CAlnMixSeq>& seq2); 120 121 void x_IdentifyAlnMixSeq (CRef<CAlnMixSeq>& aln_seq, 122 const CSeq_id& seq_id); 123 124 size_t m_DsCnt; 125 map<const CDense_seg*, 126 vector<CRef<CAlnMixSeq> > > m_DsSeq; 127 CRef<CScope> m_Scope; 128 TSeqs m_Seqs; 129 TSeqIdMap m_SeqIds; 130 TBioseqHandleMap m_BioseqHandles; 131 bool m_ContainsAA; 132 bool m_ContainsNA; 133 vector<CRef<CAlnMixSeq> > m_Rows; 134 list<CRef<CAlnMixSeq> > m_ExtraRows; 135 }; 136 137 138 139 class NCBI_XALNMGR_EXPORT CAlnMixSeq : public CObject 140 { 141 public: 142 CAlnMixSeq(void); 143 ~CAlnMixSeq(); 144 145 typedef list<CAlnMixMatch *> TMatchList; 146 147 int m_DsCnt; 148 const CBioseq_Handle* m_BioseqHandle; 149 CRef<CSeq_id> m_SeqId; 150 int m_Score; 151 int m_ChainScore; 152 int m_StrandScore; 153 bool m_IsAA; 154 unsigned m_Width; 155 int m_Frame; 156 bool m_PositiveStrand; 157 CAlnMixSeq * m_RefBy; 158 CAlnMixSeq * m_ExtraRow; 159 int m_ExtraRowIdx; 160 CAlnMixSeq * m_AnotherRow; 161 int m_DsIdx; 162 int m_SeqIdx; 163 int m_ChildIdx; 164 int m_RowIdx; 165 TMatchList m_MatchList; 166 GetStarts() const167 const CAlnMixStarts& GetStarts() const { return *m_Starts; } SetStarts()168 CAlnMixStarts& SetStarts() { return *m_Starts; } 169 GetPlusStrandSeqVector(void)170 CSeqVector& GetPlusStrandSeqVector(void) 171 { 172 if ( !m_PlusStrandSeqVector ) { 173 m_PlusStrandSeqVector = new CSeqVector 174 (m_BioseqHandle->GetSeqVector(CBioseq_Handle::eCoding_Iupac, 175 CBioseq_Handle::eStrand_Plus)); 176 } 177 return *m_PlusStrandSeqVector; 178 } 179 GetMinusStrandSeqVector(void)180 CSeqVector& GetMinusStrandSeqVector(void) 181 { 182 if ( !m_MinusStrandSeqVector ) { 183 m_MinusStrandSeqVector = new CSeqVector 184 (m_BioseqHandle->GetSeqVector(CBioseq_Handle::eCoding_Iupac, 185 CBioseq_Handle::eStrand_Minus)); 186 } 187 return *m_MinusStrandSeqVector; 188 } 189 GetSeqString(string & s,TSeqPos start,TSeqPos len,bool positive_strand=true)190 void GetSeqString(string& s, 191 TSeqPos start, 192 TSeqPos len, 193 bool positive_strand = true) 194 { 195 if (positive_strand) { 196 GetPlusStrandSeqVector().GetSeqData(start, start + len, s); 197 } else { 198 TSeqPos size = GetMinusStrandSeqVector().size(); 199 GetMinusStrandSeqVector().GetSeqData(size - (start + len), 200 size - start, 201 s); 202 } 203 if (s.length() != len) { 204 string errstr = "Unable to load data for seq-id=\"" + 205 m_SeqId->AsFastaString() + "\" " 206 "start=" + NStr::UIntToString(start) + " " 207 "length=" + NStr::UIntToString(len) + "."; 208 NCBI_THROW(CAlnException, eInvalidSeqId, 209 errstr); 210 } 211 } 212 213 private: 214 CRef<CSeqVector> m_PlusStrandSeqVector; 215 CRef<CSeqVector> m_MinusStrandSeqVector; 216 auto_ptr<CAlnMixStarts> m_Starts; 217 218 /// forbidden 219 CAlnMixSeq(const CAlnMixSeq&); 220 CAlnMixSeq& operator=(const CAlnMixSeq&); 221 }; 222 223 224 225 END_objects_SCOPE // namespace ncbi::objects:: 226 227 END_NCBI_SCOPE 228 229 #endif // OBJECTS_ALNMGR___ALNSEQ__HPP 230