1 /*  $Id: pairwise_aln.cpp 489594 2016-01-14 14:58:22Z grichenk $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author:  Kamen Todorov, NCBI
27 *
28 * File Description:
29 *   Pairwise and query-anchored alignments
30 *
31 * ===========================================================================
32 */
33 
34 
35 #include <ncbi_pch.hpp>
36 
37 #include <objtools/alnmgr/pairwise_aln.hpp>
38 
39 
40 BEGIN_NCBI_SCOPE
41 USING_SCOPE(objects);
42 
43 
operator ++(void)44 CPairwise_CI& CPairwise_CI::operator++(void)
45 {
46     if ( m_Direct ) {
47         if (m_It == m_GapIt) {
48             // Advance to the next gap.
49             ++m_It;
50         }
51         // Advance to the next non-gap segment if there's no unaligned range
52         // to show.
53         else if ( !m_Unaligned ) {
54             // Advance to the next segment.
55             ++m_GapIt;
56             _ASSERT(m_It == m_GapIt);
57         }
58     }
59     else {
60         if (m_It == m_GapIt) {
61             if (m_It == m_Aln->begin()) {
62                 m_It = m_Aln->end();
63                 m_GapIt = m_Aln->end();
64             }
65             else {
66                 --m_It;
67             }
68         }
69         else if ( !m_Unaligned ) {
70             _ASSERT(m_GapIt != m_Aln->begin());
71             --m_GapIt;
72             _ASSERT(m_It == m_GapIt);
73         }
74     }
75     x_InitSegment();
76     return *this;
77 }
78 
79 
x_Init(bool force_direct)80 void CPairwise_CI::x_Init(bool force_direct)
81 {
82     // Mixed direction and empty alignments are iterated in direct order.
83     m_Direct = force_direct ||
84         ((m_Aln->GetFlags() & CPairwiseAln::fMixedDir) == CPairwiseAln::fMixedDir)  ||
85         m_Aln->empty()  ||
86         m_Aln->begin()->IsFirstDirect();
87     if ( m_Direct ) {
88         TCheckedIterator it = m_Aln->find_2(m_Range.GetFrom());
89         m_It = it.first;
90         m_GapIt = it.first;
91         if ( !it.second ) {
92             if (m_GapIt != m_Aln->begin()) {
93                 --m_GapIt;
94             }
95         }
96     }
97     else {
98         CPairwiseAln::const_iterator last = m_Aln->end();
99         if ( !m_Aln->empty() ) {
100             --last;
101         }
102         TCheckedIterator it = m_Range.IsWhole() ?
103             TCheckedIterator(last, true)
104             : m_Aln->find_2(m_Range.GetTo());
105         if (it.first == m_Aln->end()) {
106             it.first = last;
107         }
108         m_It = it.first;
109         m_GapIt = it.first;
110         if ( !it.second ) {
111             if (m_GapIt != m_Aln->end()  &&  m_GapIt != last) {
112                 ++m_GapIt;
113             }
114         }
115     }
116     x_InitSegment();
117 }
118 
119 
x_InitSegment(void)120 void CPairwise_CI::x_InitSegment(void)
121 {
122     if ( !*this ) {
123         m_FirstRg = TSignedRange::GetEmpty();
124         m_SecondRg = TSignedRange::GetEmpty();
125         return;
126     }
127     _ASSERT(m_It != m_Aln->end()  &&  m_GapIt != m_Aln->end());
128     if (m_It == m_GapIt) {
129         // Normal segment
130         m_FirstRg = m_It->GetFirstRange();
131         m_SecondRg = m_It->GetSecondRange();
132     }
133     else {
134         // Gap
135         TSignedSeqPos it_second_from = m_It->GetSecondFrom();
136         TSignedSeqPos it_second_to = m_It->GetSecondToOpen();
137         TSignedSeqPos gap_second_from = m_GapIt->GetSecondFrom();
138         TSignedSeqPos gap_second_to = m_GapIt->GetSecondToOpen();
139         if ( m_Direct ) {
140             m_FirstRg.SetOpen(m_GapIt->GetFirstToOpen(), m_It->GetFirstFrom());
141             if ( m_It->IsDirect() ) {
142                 if ( m_GapIt->IsDirect() ) {
143                     m_SecondRg.SetOpen(gap_second_to, it_second_from);
144                 }
145                 else {
146                     m_SecondRg.SetOpen(min(gap_second_from, it_second_from),
147                         max(gap_second_from, it_second_from));
148                 }
149             }
150             else {
151                 if ( !m_GapIt->IsDirect() ) {
152                     m_SecondRg.SetOpen(it_second_to, gap_second_from);
153                 }
154                 else {
155                     m_SecondRg.SetOpen(min(gap_second_to, it_second_to),
156                         max(gap_second_to, it_second_to));
157                 }
158             }
159             if ( !m_Unaligned ) {
160                 if (!m_FirstRg.Empty()  &&  !m_SecondRg.Empty()) {
161                     // Show gap first, then unaligned segment.
162                     m_SecondRg.SetToOpen(m_SecondRg.GetFrom());
163                     m_Unaligned = true;
164                 }
165             }
166             else {
167                 // Show unaligned segment after gap.
168                 m_FirstRg.SetFrom(m_FirstRg.GetToOpen());
169                 m_Unaligned = false;
170                 // Don't clip unaligned segments.
171                 return;
172             }
173         }
174         else {
175             m_FirstRg.SetOpen(m_It->GetFirstToOpen(), m_GapIt->GetFirstFrom());
176             if ( m_It->IsDirect() ) {
177                 if ( m_GapIt->IsDirect() ) {
178                     m_SecondRg.SetOpen(it_second_to, gap_second_from);
179                 }
180                 else {
181                     m_SecondRg.SetOpen(min(it_second_to, gap_second_to),
182                         max(it_second_to, gap_second_to));
183                 }
184             }
185             else {
186                 if ( !m_GapIt->IsDirect() ) {
187                     m_SecondRg.SetOpen(gap_second_to, it_second_from);
188                 }
189                 else {
190                     m_SecondRg.SetOpen(min(it_second_from, gap_second_from),
191                         max(it_second_from, gap_second_from));
192                 }
193             }
194             if ( !m_Unaligned ) {
195                 if ( !m_FirstRg.Empty()  &&  !m_SecondRg.Empty()) {
196                     m_SecondRg.SetFrom(m_SecondRg.GetToOpen());
197                     m_Unaligned = true;
198                 }
199             }
200             else {
201                 m_FirstRg.SetToOpen(m_FirstRg.GetFrom());
202                 m_Unaligned = false;
203                 return;
204             }
205         }
206     }
207     if ( m_Range.IsWhole() ) {
208         return;
209     }
210     // Take both direction into account, adjust ranges if clipped.
211     TSignedSeqPos left_shift = 0;
212     TSignedSeqPos right_shift = 0;
213     if (m_FirstRg.GetFrom() < m_Range.GetFrom()) {
214         left_shift = m_Range.GetFrom() - m_FirstRg.GetFrom();
215     }
216     if (m_FirstRg.GetToOpen() > m_Range.GetToOpen()) {
217         right_shift = m_FirstRg.GetToOpen() - m_Range.GetToOpen();
218     }
219     m_FirstRg.IntersectWith(m_Range);
220     if (left_shift != 0  ||  right_shift != 0) {
221         if ( m_It->IsReversed() ) {
222             swap(left_shift, right_shift);
223         }
224         m_SecondRg.SetOpen(m_SecondRg.GetFrom() + left_shift,
225             m_SecondRg.GetToOpen() - right_shift);
226         if (m_SecondRg.GetToOpen() < m_SecondRg.GetFrom()) {
227             m_SecondRg.SetToOpen(m_SecondRg.GetFrom());
228         }
229     }
230 }
231 
232 
233 /// Split rows with mixed dir into separate rows
234 /// returns true if the operation was performed
SplitStrands()235 bool CAnchoredAln::SplitStrands()
236 {
237     TDim dim = GetDim();
238     TDim new_dim = dim;
239     TDim row;
240     TDim new_row;
241 
242     for (row = 0;  row < dim;  ++row) {
243         if (m_PairwiseAlns[row]->IsSet(CPairwiseAln::fMixedDir)) {
244             ++new_dim;
245         }
246     }
247     _ASSERT(dim <= new_dim);
248     if (new_dim > dim) {
249         m_PairwiseAlns.resize(new_dim);
250         row = dim - 1;
251         new_row = new_dim - 1;
252         while (row < new_row) {
253             _ASSERT(row >= 0);
254             _ASSERT(new_row > 0);
255             if (row == m_AnchorRow) {
256                 m_AnchorRow = new_row;
257             }
258             const CPairwiseAln& aln = *m_PairwiseAlns[row];
259             if (aln.IsSet(CPairwiseAln::fMixedDir)) {
260                 m_PairwiseAlns[new_row].Reset
261                     (new CPairwiseAln(aln.GetFirstId(),
262                                       aln.GetSecondId(),
263                                       aln.GetPolicyFlags()));
264                 CPairwiseAln& reverse_aln = *m_PairwiseAlns[new_row--];
265                 m_PairwiseAlns[new_row].Reset
266                     (new CPairwiseAln(aln.GetFirstId(),
267                                       aln.GetSecondId(),
268                                       aln.GetPolicyFlags()));
269                 CPairwiseAln& direct_aln = *m_PairwiseAlns[new_row--];
270                 ITERATE (CPairwiseAln, aln_rng_it, aln) {
271                     if (aln_rng_it->IsDirect()) {
272                         direct_aln.push_back(*aln_rng_it);
273                     } else {
274                         reverse_aln.push_back(*aln_rng_it);
275                     }
276                 }
277             } else {
278                 m_PairwiseAlns[new_row--].Reset
279                     (new CPairwiseAln(aln));
280             }
281             --row;
282             _ASSERT(row <= new_row);
283         }
284         return true;
285     } else {
286         _ASSERT(dim == new_dim);
287         return false;
288     }
289 }
290 
291 
292 END_NCBI_SCOPE
293