1 /*  $Id: bioseq_ci.cpp 444539 2014-08-25 18:43:02Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 *   Bioseq iterator
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <objmgr/bioseq_ci.hpp>
35 #include <objmgr/scope.hpp>
36 #include <objmgr/bioseq_handle.hpp>
37 #include <objmgr/impl/scope_impl.hpp>
38 #include <objmgr/impl/bioseq_set_info.hpp>
39 
40 BEGIN_NCBI_SCOPE
BEGIN_SCOPE(objects)41 BEGIN_SCOPE(objects)
42 
43 
44 inline
45 bool CBioseq_CI::x_IsValidMolType(const CBioseq_Info& seq) const
46 {
47     switch (m_Filter) {
48     case CSeq_inst::eMol_not_set:
49         return true;
50     case CSeq_inst::eMol_na:
51         return seq.IsNa();
52     default:
53         break;
54     }
55     return seq.GetInst_Mol() == m_Filter;
56 }
57 
58 
x_PushEntry(const CSeq_entry_Handle & entry)59 void CBioseq_CI::x_PushEntry(const CSeq_entry_Handle& entry)
60 {
61     if ( !entry || entry.IsSeq() ) {
62         m_CurrentEntry = entry;
63     }
64     else {
65         if ( entry.x_GetInfo().GetSet().GetClass() ==
66              CBioseq_set::eClass_parts ) {
67             if ( m_Level == eLevel_Mains ) {
68                 x_NextEntry();
69                 return;
70             }
71             ++m_InParts;
72         }
73         m_EntryStack.push_back(CSeq_entry_CI(entry));
74         _ASSERT(m_EntryStack.back().GetParentBioseq_set()==entry.GetSet());
75         if ( m_EntryStack.back() ) {
76             m_CurrentEntry = *m_EntryStack.back();
77         }
78         else {
79             m_CurrentEntry.Reset();
80         }
81     }
82 }
83 
84 
x_NextEntry(void)85 void CBioseq_CI::x_NextEntry(void)
86 {
87     if ( !m_EntryStack.empty() &&
88          m_EntryStack.back() &&
89          ++m_EntryStack.back() ) {
90         m_CurrentEntry = *m_EntryStack.back();
91     }
92     else {
93         m_CurrentEntry.Reset();
94     }
95 }
96 
97 
x_PopEntry(bool next)98 void CBioseq_CI::x_PopEntry(bool next)
99 {
100     if ( m_EntryStack.back().GetParentBioseq_set().GetClass() ==
101          CBioseq_set::eClass_parts ) {
102         --m_InParts;
103     }
104     m_EntryStack.pop_back();
105     if ( next ) {
106         x_NextEntry();
107     }
108     else {
109         m_CurrentEntry.Reset();
110     }
111 }
112 
113 
114 inline
sx_IsNa(CSeq_inst::EMol mol)115 bool sx_IsNa(CSeq_inst::EMol mol)
116 {
117     return mol == CSeq_inst::eMol_dna  ||
118         mol == CSeq_inst::eMol_rna  ||
119         mol == CSeq_inst::eMol_na;
120 }
121 
122 
123 inline
sx_IsProt(CSeq_inst::EMol mol)124 bool sx_IsProt(CSeq_inst::EMol mol)
125 {
126     return mol == CSeq_inst::eMol_aa;
127 }
128 
129 
x_SkipClass(CBioseq_set::TClass set_class)130 bool CBioseq_CI::x_SkipClass(CBioseq_set::TClass set_class)
131 {
132     size_t pos = m_EntryStack.size();
133     do {
134         if ( pos == 0 ) { // no Bioseq-set with requested class is found
135             return false;
136         }
137     } while ( m_EntryStack[--pos].GetParentBioseq_set().GetClass() != set_class );
138     while ( m_EntryStack.size() > pos+1 ) {
139         x_PopEntry(false);
140     }
141     x_PopEntry();
142     return true;
143 }
144 
145 
x_Settle(void)146 void CBioseq_CI::x_Settle(void)
147 {
148     bool found_na = m_CurrentBioseq  &&  sx_IsNa(m_Filter);
149     m_CurrentBioseq.Reset();
150     for ( ;; ) {
151         if ( !m_CurrentEntry ) {
152             if ( m_EntryStack.empty() ) {
153                 // no more entries
154                 return;
155             }
156             x_PopEntry();
157         }
158         else if ( m_CurrentEntry.IsSeq() ) {
159             // Single bioseq
160             if ( m_Level != eLevel_Parts  ||  m_InParts > 0 ) {
161                 if ( x_IsValidMolType(m_CurrentEntry.x_GetInfo().GetSeq()) ) {
162                     m_CurrentBioseq = m_CurrentEntry.GetSeq();
163                     return; // valid bioseq found
164                 }
165                 else if ( m_Level != eLevel_IgnoreClass  &&
166                           !m_EntryStack.empty() ) {
167                     if ( found_na &&
168                          m_EntryStack.back().GetParentBioseq_set().GetClass()
169                          == CBioseq_set::eClass_nuc_prot ) {
170                         // Skip only the same level nuc-prot set
171                         found_na = false; // no more skipping
172                         if ( x_SkipClass(CBioseq_set::eClass_nuc_prot) ) {
173                             continue;
174                         }
175                     }
176                     else if ( sx_IsProt(m_Filter) ) {
177                         // Skip the whole nuc segset when collecting prots
178                         // Also skip conset
179                         if ( x_SkipClass(CBioseq_set::eClass_segset) ||
180                              x_SkipClass(CBioseq_set::eClass_conset) ) {
181                             continue;
182                         }
183                     }
184                 }
185             }
186             x_NextEntry();
187         }
188         else {
189             found_na = false; // no more skipping
190             x_PushEntry(m_CurrentEntry);
191         }
192     }
193 }
194 
195 
x_Initialize(const CSeq_entry_Handle & entry)196 void CBioseq_CI::x_Initialize(const CSeq_entry_Handle& entry)
197 {
198     if ( !entry ) {
199         NCBI_THROW(CObjMgrException, eOtherError,
200                    "Can not find seq-entry to initialize bioseq iterator");
201     }
202     x_PushEntry(entry);
203     x_Settle();
204 }
205 
206 
operator ++(void)207 CBioseq_CI& CBioseq_CI::operator++ (void)
208 {
209     x_NextEntry();
210     x_Settle();
211     return *this;
212 }
213 
214 
CBioseq_CI(void)215 CBioseq_CI::CBioseq_CI(void)
216     : m_Filter(CSeq_inst::eMol_not_set),
217       m_Level(eLevel_All),
218       m_InParts(0)
219 {
220 }
221 
222 
CBioseq_CI(const CBioseq_CI & bioseq_ci)223 CBioseq_CI::CBioseq_CI(const CBioseq_CI& bioseq_ci)
224 {
225     *this = bioseq_ci;
226 }
227 
228 
~CBioseq_CI(void)229 CBioseq_CI::~CBioseq_CI(void)
230 {
231 }
232 
233 
CBioseq_CI(const CSeq_entry_Handle & entry,CSeq_inst::EMol filter,EBioseqLevelFlag level)234 CBioseq_CI::CBioseq_CI(const CSeq_entry_Handle& entry,
235                        CSeq_inst::EMol filter,
236                        EBioseqLevelFlag level)
237     : m_Scope(&entry.GetScope()),
238       m_Filter(filter),
239       m_Level(level),
240       m_InParts(0)
241 {
242     x_Initialize(entry);
243 }
244 
245 
CBioseq_CI(const CBioseq_set_Handle & bioseq_set,CSeq_inst::EMol filter,EBioseqLevelFlag level)246 CBioseq_CI::CBioseq_CI(const CBioseq_set_Handle& bioseq_set,
247                        CSeq_inst::EMol filter,
248                        EBioseqLevelFlag level)
249     : m_Scope(&bioseq_set.GetScope()),
250       m_Filter(filter),
251       m_Level(level),
252       m_InParts(0)
253 {
254     x_Initialize(bioseq_set.GetParentEntry());
255 }
256 
257 
CBioseq_CI(CScope & scope,const CSeq_entry & entry,CSeq_inst::EMol filter,EBioseqLevelFlag level)258 CBioseq_CI::CBioseq_CI(CScope& scope, const CSeq_entry& entry,
259                        CSeq_inst::EMol filter,
260                        EBioseqLevelFlag level)
261     : m_Scope(&scope),
262       m_Filter(filter),
263       m_Level(level),
264       m_InParts(0)
265 {
266     x_Initialize(scope.GetSeq_entryHandle(entry));
267 }
268 
269 
operator =(const CBioseq_CI & bioseq_ci)270 CBioseq_CI& CBioseq_CI::operator= (const CBioseq_CI& bioseq_ci)
271 {
272     if ( this != &bioseq_ci ) {
273         m_Scope = bioseq_ci.m_Scope;
274         m_Filter = bioseq_ci.m_Filter;
275         m_Level = bioseq_ci.m_Level;
276         m_InParts = bioseq_ci.m_InParts;
277         m_EntryStack = bioseq_ci.m_EntryStack;
278         m_CurrentEntry = bioseq_ci.m_CurrentEntry;
279         m_CurrentBioseq = bioseq_ci.m_CurrentBioseq;
280     }
281     return *this;
282 }
283 
284 
285 END_SCOPE(objects)
286 END_NCBI_SCOPE
287