1 #ifndef OBJECTS_ALNMGR___ALNSEQ__HPP
2 #define OBJECTS_ALNMGR___ALNSEQ__HPP
3 
4 /*  $Id: alnseq.hpp 310697 2011-07-05 14:21:21Z grichenk $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author:  Kamen Todorov, NCBI
30 *
31 * File Description:
32 *   Alignment sequences
33 *
34 */
35 
36 
37 #include <objects/seqloc/Seq_id.hpp>
38 #include <objmgr/seq_vector.hpp>
39 #include <objtools/alnmgr/alnexception.hpp>
40 
41 
42 BEGIN_NCBI_SCOPE
43 
44 BEGIN_objects_SCOPE // namespace ncbi::objects::
45 
46 
47 class CAlnMixSeq;
48 class CAlnMixStarts;
49 class CAlnMixSegment;
50 class CAlnMixMatch;
51 class CAlnMixMerger;
52 class CBioseq_Handle;
53 class CScope;
54 class CDense_seg;
55 
56 
57 class NCBI_XALNMGR_EXPORT CAlnMixSequences : public CObject
58 {
59 public:
60 
61     // Constructors
62     CAlnMixSequences(void);
63     CAlnMixSequences(CScope& scope);
64 
65     typedef vector<CRef<CAlnMixSeq> > TSeqs;
66 
Get() const67     const TSeqs& Get        () const { return m_Seqs; };
Set()68     TSeqs&       Set        () { return m_Seqs; };
69 
70     enum EAddFlags {
71         // Determine score of each aligned segment in the process of mixing
72         // (only makes sense if scope was provided at construction time)
73         fCalcScore            = 0x01,
74 
75         // Force translation of nucleotide rows
76         // This will result in an output Dense-seg that has Widths,
77         // no matter if the whole alignment consists of nucleotides only.
78         fForceTranslation     = 0x02,
79 
80         // Used for mapping sequence to itself
81         fPreserveRows         = 0x04
82     };
83     typedef int TAddFlags; // binary OR of EMergeFlags
84 
85     void         Add        (const CDense_seg& ds, TAddFlags flags = 0);
86 
87 
88     // Sorting algirithms
89     void         SortByScore();
90     void         SortByChainScore();
91 
92 
93     // Rows-related methods
94     void         BuildRows();
95     void         InitRowsStartIts();
96     void         InitExtraRowsStartIts();
97     void         RowsStartItsContsistencyCheck(size_t match_idx);
98 
99 private:
100     friend class CAlnMix;
101     friend class CAlnMixMatches;
102     friend class CAlnMixSegments;
103     friend class CAlnMixMerger;
104 
105     typedef map<CBioseq_Handle, CRef<CAlnMixSeq> >        TBioseqHandleMap;
106 
107     // CRef<Seq-id> comparison predicate
108     struct SSeqIds {
109         bool
operator ()CAlnMixSequences::SSeqIds110         operator() (const CRef<CSeq_id>& id1, const CRef<CSeq_id>& id2) const {
111             return (*id1 < *id2);
112         }
113     };
114     typedef map<CRef<CSeq_id>, CRef<CAlnMixSeq>, SSeqIds> TSeqIdMap;
115 
116     static bool x_CompareScores     (const CRef<CAlnMixSeq>& seq1,
117                                      const CRef<CAlnMixSeq>& seq2);
118     static bool x_CompareChainScores(const CRef<CAlnMixSeq>& seq1,
119                                      const CRef<CAlnMixSeq>& seq2);
120 
121     void x_IdentifyAlnMixSeq        (CRef<CAlnMixSeq>& aln_seq,
122                                      const CSeq_id& seq_id);
123 
124     size_t                          m_DsCnt;
125     map<const CDense_seg*,
126         vector<CRef<CAlnMixSeq> > > m_DsSeq;
127     CRef<CScope>                    m_Scope;
128     TSeqs                           m_Seqs;
129     TSeqIdMap                       m_SeqIds;
130     TBioseqHandleMap                m_BioseqHandles;
131     bool                            m_ContainsAA;
132     bool                            m_ContainsNA;
133     vector<CRef<CAlnMixSeq> >       m_Rows;
134     list<CRef<CAlnMixSeq> >         m_ExtraRows;
135 };
136 
137 
138 
139 class NCBI_XALNMGR_EXPORT CAlnMixSeq : public CObject
140 {
141 public:
142     CAlnMixSeq(void);
143     ~CAlnMixSeq();
144 
145     typedef list<CAlnMixMatch *>          TMatchList;
146 
147     int                   m_DsCnt;
148     const CBioseq_Handle* m_BioseqHandle;
149     CRef<CSeq_id>         m_SeqId;
150     int                   m_Score;
151     int                   m_ChainScore;
152     int                   m_StrandScore;
153     bool                  m_IsAA;
154     unsigned              m_Width;
155     int                   m_Frame;
156     bool                  m_PositiveStrand;
157     CAlnMixSeq *          m_RefBy;
158     CAlnMixSeq *          m_ExtraRow;
159     int                   m_ExtraRowIdx;
160     CAlnMixSeq *          m_AnotherRow;
161     int                   m_DsIdx;
162     int                   m_SeqIdx;
163     int                   m_ChildIdx;
164     int                   m_RowIdx;
165     TMatchList            m_MatchList;
166 
GetStarts() const167     const CAlnMixStarts& GetStarts() const { return *m_Starts; }
SetStarts()168     CAlnMixStarts& SetStarts() { return *m_Starts; }
169 
GetPlusStrandSeqVector(void)170     CSeqVector& GetPlusStrandSeqVector(void)
171     {
172         if ( !m_PlusStrandSeqVector ) {
173             m_PlusStrandSeqVector = new CSeqVector
174                 (m_BioseqHandle->GetSeqVector(CBioseq_Handle::eCoding_Iupac,
175                                               CBioseq_Handle::eStrand_Plus));
176         }
177         return *m_PlusStrandSeqVector;
178     }
179 
GetMinusStrandSeqVector(void)180     CSeqVector& GetMinusStrandSeqVector(void)
181     {
182         if ( !m_MinusStrandSeqVector ) {
183             m_MinusStrandSeqVector = new CSeqVector
184                 (m_BioseqHandle->GetSeqVector(CBioseq_Handle::eCoding_Iupac,
185                                               CBioseq_Handle::eStrand_Minus));
186         }
187         return *m_MinusStrandSeqVector;
188     }
189 
GetSeqString(string & s,TSeqPos start,TSeqPos len,bool positive_strand=true)190     void GetSeqString(string& s,
191                       TSeqPos start,
192                       TSeqPos len,
193                       bool    positive_strand = true)
194     {
195         if (positive_strand) {
196             GetPlusStrandSeqVector().GetSeqData(start, start + len, s);
197         } else {
198             TSeqPos size = GetMinusStrandSeqVector().size();
199             GetMinusStrandSeqVector().GetSeqData(size - (start + len),
200                                                  size - start,
201                                                  s);
202         }
203         if (s.length() != len) {
204             string errstr = "Unable to load data for seq-id=\"" +
205                 m_SeqId->AsFastaString() + "\" "
206                 "start=" + NStr::UIntToString(start) + " "
207                 "length=" + NStr::UIntToString(len) + ".";
208             NCBI_THROW(CAlnException, eInvalidSeqId,
209                        errstr);
210         }
211     }
212 
213 private:
214     CRef<CSeqVector> m_PlusStrandSeqVector;
215     CRef<CSeqVector> m_MinusStrandSeqVector;
216     auto_ptr<CAlnMixStarts> m_Starts;
217 
218     /// forbidden
219     CAlnMixSeq(const CAlnMixSeq&);
220     CAlnMixSeq& operator=(const CAlnMixSeq&);
221 };
222 
223 
224 
225 END_objects_SCOPE // namespace ncbi::objects::
226 
227 END_NCBI_SCOPE
228 
229 #endif // OBJECTS_ALNMGR___ALNSEQ__HPP
230