1 /* $Id: pairwise_aln.cpp 489594 2016-01-14 14:58:22Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Kamen Todorov, NCBI
27 *
28 * File Description:
29 * Pairwise and query-anchored alignments
30 *
31 * ===========================================================================
32 */
33
34
35 #include <ncbi_pch.hpp>
36
37 #include <objtools/alnmgr/pairwise_aln.hpp>
38
39
40 BEGIN_NCBI_SCOPE
41 USING_SCOPE(objects);
42
43
operator ++(void)44 CPairwise_CI& CPairwise_CI::operator++(void)
45 {
46 if ( m_Direct ) {
47 if (m_It == m_GapIt) {
48 // Advance to the next gap.
49 ++m_It;
50 }
51 // Advance to the next non-gap segment if there's no unaligned range
52 // to show.
53 else if ( !m_Unaligned ) {
54 // Advance to the next segment.
55 ++m_GapIt;
56 _ASSERT(m_It == m_GapIt);
57 }
58 }
59 else {
60 if (m_It == m_GapIt) {
61 if (m_It == m_Aln->begin()) {
62 m_It = m_Aln->end();
63 m_GapIt = m_Aln->end();
64 }
65 else {
66 --m_It;
67 }
68 }
69 else if ( !m_Unaligned ) {
70 _ASSERT(m_GapIt != m_Aln->begin());
71 --m_GapIt;
72 _ASSERT(m_It == m_GapIt);
73 }
74 }
75 x_InitSegment();
76 return *this;
77 }
78
79
x_Init(bool force_direct)80 void CPairwise_CI::x_Init(bool force_direct)
81 {
82 // Mixed direction and empty alignments are iterated in direct order.
83 m_Direct = force_direct ||
84 ((m_Aln->GetFlags() & CPairwiseAln::fMixedDir) == CPairwiseAln::fMixedDir) ||
85 m_Aln->empty() ||
86 m_Aln->begin()->IsFirstDirect();
87 if ( m_Direct ) {
88 TCheckedIterator it = m_Aln->find_2(m_Range.GetFrom());
89 m_It = it.first;
90 m_GapIt = it.first;
91 if ( !it.second ) {
92 if (m_GapIt != m_Aln->begin()) {
93 --m_GapIt;
94 }
95 }
96 }
97 else {
98 CPairwiseAln::const_iterator last = m_Aln->end();
99 if ( !m_Aln->empty() ) {
100 --last;
101 }
102 TCheckedIterator it = m_Range.IsWhole() ?
103 TCheckedIterator(last, true)
104 : m_Aln->find_2(m_Range.GetTo());
105 if (it.first == m_Aln->end()) {
106 it.first = last;
107 }
108 m_It = it.first;
109 m_GapIt = it.first;
110 if ( !it.second ) {
111 if (m_GapIt != m_Aln->end() && m_GapIt != last) {
112 ++m_GapIt;
113 }
114 }
115 }
116 x_InitSegment();
117 }
118
119
x_InitSegment(void)120 void CPairwise_CI::x_InitSegment(void)
121 {
122 if ( !*this ) {
123 m_FirstRg = TSignedRange::GetEmpty();
124 m_SecondRg = TSignedRange::GetEmpty();
125 return;
126 }
127 _ASSERT(m_It != m_Aln->end() && m_GapIt != m_Aln->end());
128 if (m_It == m_GapIt) {
129 // Normal segment
130 m_FirstRg = m_It->GetFirstRange();
131 m_SecondRg = m_It->GetSecondRange();
132 }
133 else {
134 // Gap
135 TSignedSeqPos it_second_from = m_It->GetSecondFrom();
136 TSignedSeqPos it_second_to = m_It->GetSecondToOpen();
137 TSignedSeqPos gap_second_from = m_GapIt->GetSecondFrom();
138 TSignedSeqPos gap_second_to = m_GapIt->GetSecondToOpen();
139 if ( m_Direct ) {
140 m_FirstRg.SetOpen(m_GapIt->GetFirstToOpen(), m_It->GetFirstFrom());
141 if ( m_It->IsDirect() ) {
142 if ( m_GapIt->IsDirect() ) {
143 m_SecondRg.SetOpen(gap_second_to, it_second_from);
144 }
145 else {
146 m_SecondRg.SetOpen(min(gap_second_from, it_second_from),
147 max(gap_second_from, it_second_from));
148 }
149 }
150 else {
151 if ( !m_GapIt->IsDirect() ) {
152 m_SecondRg.SetOpen(it_second_to, gap_second_from);
153 }
154 else {
155 m_SecondRg.SetOpen(min(gap_second_to, it_second_to),
156 max(gap_second_to, it_second_to));
157 }
158 }
159 if ( !m_Unaligned ) {
160 if (!m_FirstRg.Empty() && !m_SecondRg.Empty()) {
161 // Show gap first, then unaligned segment.
162 m_SecondRg.SetToOpen(m_SecondRg.GetFrom());
163 m_Unaligned = true;
164 }
165 }
166 else {
167 // Show unaligned segment after gap.
168 m_FirstRg.SetFrom(m_FirstRg.GetToOpen());
169 m_Unaligned = false;
170 // Don't clip unaligned segments.
171 return;
172 }
173 }
174 else {
175 m_FirstRg.SetOpen(m_It->GetFirstToOpen(), m_GapIt->GetFirstFrom());
176 if ( m_It->IsDirect() ) {
177 if ( m_GapIt->IsDirect() ) {
178 m_SecondRg.SetOpen(it_second_to, gap_second_from);
179 }
180 else {
181 m_SecondRg.SetOpen(min(it_second_to, gap_second_to),
182 max(it_second_to, gap_second_to));
183 }
184 }
185 else {
186 if ( !m_GapIt->IsDirect() ) {
187 m_SecondRg.SetOpen(gap_second_to, it_second_from);
188 }
189 else {
190 m_SecondRg.SetOpen(min(it_second_from, gap_second_from),
191 max(it_second_from, gap_second_from));
192 }
193 }
194 if ( !m_Unaligned ) {
195 if ( !m_FirstRg.Empty() && !m_SecondRg.Empty()) {
196 m_SecondRg.SetFrom(m_SecondRg.GetToOpen());
197 m_Unaligned = true;
198 }
199 }
200 else {
201 m_FirstRg.SetToOpen(m_FirstRg.GetFrom());
202 m_Unaligned = false;
203 return;
204 }
205 }
206 }
207 if ( m_Range.IsWhole() ) {
208 return;
209 }
210 // Take both direction into account, adjust ranges if clipped.
211 TSignedSeqPos left_shift = 0;
212 TSignedSeqPos right_shift = 0;
213 if (m_FirstRg.GetFrom() < m_Range.GetFrom()) {
214 left_shift = m_Range.GetFrom() - m_FirstRg.GetFrom();
215 }
216 if (m_FirstRg.GetToOpen() > m_Range.GetToOpen()) {
217 right_shift = m_FirstRg.GetToOpen() - m_Range.GetToOpen();
218 }
219 m_FirstRg.IntersectWith(m_Range);
220 if (left_shift != 0 || right_shift != 0) {
221 if ( m_It->IsReversed() ) {
222 swap(left_shift, right_shift);
223 }
224 m_SecondRg.SetOpen(m_SecondRg.GetFrom() + left_shift,
225 m_SecondRg.GetToOpen() - right_shift);
226 if (m_SecondRg.GetToOpen() < m_SecondRg.GetFrom()) {
227 m_SecondRg.SetToOpen(m_SecondRg.GetFrom());
228 }
229 }
230 }
231
232
233 /// Split rows with mixed dir into separate rows
234 /// returns true if the operation was performed
SplitStrands()235 bool CAnchoredAln::SplitStrands()
236 {
237 TDim dim = GetDim();
238 TDim new_dim = dim;
239 TDim row;
240 TDim new_row;
241
242 for (row = 0; row < dim; ++row) {
243 if (m_PairwiseAlns[row]->IsSet(CPairwiseAln::fMixedDir)) {
244 ++new_dim;
245 }
246 }
247 _ASSERT(dim <= new_dim);
248 if (new_dim > dim) {
249 m_PairwiseAlns.resize(new_dim);
250 row = dim - 1;
251 new_row = new_dim - 1;
252 while (row < new_row) {
253 _ASSERT(row >= 0);
254 _ASSERT(new_row > 0);
255 if (row == m_AnchorRow) {
256 m_AnchorRow = new_row;
257 }
258 const CPairwiseAln& aln = *m_PairwiseAlns[row];
259 if (aln.IsSet(CPairwiseAln::fMixedDir)) {
260 m_PairwiseAlns[new_row].Reset
261 (new CPairwiseAln(aln.GetFirstId(),
262 aln.GetSecondId(),
263 aln.GetPolicyFlags()));
264 CPairwiseAln& reverse_aln = *m_PairwiseAlns[new_row--];
265 m_PairwiseAlns[new_row].Reset
266 (new CPairwiseAln(aln.GetFirstId(),
267 aln.GetSecondId(),
268 aln.GetPolicyFlags()));
269 CPairwiseAln& direct_aln = *m_PairwiseAlns[new_row--];
270 ITERATE (CPairwiseAln, aln_rng_it, aln) {
271 if (aln_rng_it->IsDirect()) {
272 direct_aln.push_back(*aln_rng_it);
273 } else {
274 reverse_aln.push_back(*aln_rng_it);
275 }
276 }
277 } else {
278 m_PairwiseAlns[new_row--].Reset
279 (new CPairwiseAln(aln));
280 }
281 --row;
282 _ASSERT(row <= new_row);
283 }
284 return true;
285 } else {
286 _ASSERT(dim == new_dim);
287 return false;
288 }
289 }
290
291
292 END_NCBI_SCOPE
293