1 /* $Id: annot_object.cpp 618511 2020-10-21 16:04:06Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 *
30 */
31
32 #include <ncbi_pch.hpp>
33 #include <objmgr/impl/annot_object.hpp>
34 #include <objmgr/impl/handle_range_map.hpp>
35 #include <objmgr/impl/seq_entry_info.hpp>
36 #include <objmgr/impl/bioseq_base_info.hpp>
37 #include <objmgr/impl/seq_annot_info.hpp>
38 #include <objmgr/impl/tse_chunk_info.hpp>
39 #include <objmgr/impl/annot_type_index.hpp>
40 #include <objmgr/objmgr_exception.hpp>
41 #include <objmgr/error_codes.hpp>
42
43 #include <objects/seqset/Seq_entry.hpp>
44 #include <objects/seq/Seq_annot.hpp>
45 #include <objects/seq/Annotdesc.hpp>
46 #include <objects/seq/Annot_descr.hpp>
47
48 #include <objects/seqloc/Seq_interval.hpp>
49 #include <objects/seqloc/Seq_loc.hpp>
50
51 #include <objects/seqalign/Dense_diag.hpp>
52 #include <objects/seqalign/Dense_seg.hpp>
53 #include <objects/seqalign/Std_seg.hpp>
54 #include <objects/seqalign/Packed_seg.hpp>
55 #include <objects/seqalign/Seq_align_set.hpp>
56 #include <objects/seqalign/Spliced_seg.hpp>
57 #include <objects/seqalign/Spliced_exon.hpp>
58 #include <objects/seqalign/Product_pos.hpp>
59 #include <objects/seqalign/Prot_pos.hpp>
60 #include <objects/seqalign/Sparse_seg.hpp>
61 #include <objects/seqalign/Sparse_align.hpp>
62 #include <objects/seqalign/Seq_align.hpp>
63
64 #include <objects/seqfeat/Seq_feat.hpp>
65
66 #include <objects/seqres/Seq_graph.hpp>
67
68 #include <objects/general/User_object.hpp>
69 #include <objects/general/User_field.hpp>
70 #include <objects/general/Object_id.hpp>
71
72
73 #define NCBI_USE_ERRCODE_X ObjMgr_AnnotObject
74
75 BEGIN_NCBI_SCOPE
76
77 NCBI_DEFINE_ERR_SUBCODE_X(12);
78
BEGIN_SCOPE(objects)79 BEGIN_SCOPE(objects)
80
81 ////////////////////////////////////////////////////////////////////
82 //
83 // CAnnotObject_Info::
84 //
85
86
87 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
88 TIndex index,
89 const SAnnotTypeSelector& type)
90 : m_Seq_annot_Info(&annot),
91 m_ObjectIndex(index),
92 m_Type(type)
93 {
94 m_Iter.m_RawPtr = 0;
95 }
96
97
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TFtable::iterator iter)98 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
99 TIndex index,
100 TFtable::iterator iter)
101 : m_Seq_annot_Info(&annot),
102 m_ObjectIndex(index),
103 m_Type((*iter)->GetData().GetSubtype())
104 {
105 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
106 m_Iter.m_Feat.Construct();
107 #endif
108 *m_Iter.m_Feat = iter;
109 _ASSERT(IsRegular());
110 _ASSERT(m_Iter.m_RawPtr != 0);
111 }
112
113
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TAlign::iterator iter)114 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
115 TIndex index,
116 TAlign::iterator iter)
117 : m_Seq_annot_Info(&annot),
118 m_ObjectIndex(index),
119 m_Type(C_Data::e_Align)
120 {
121 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
122 m_Iter.m_Align.Construct();
123 #endif
124 *m_Iter.m_Align = iter;
125 _ASSERT(IsRegular());
126 _ASSERT(m_Iter.m_RawPtr != 0);
127 }
128
129
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TGraph::iterator iter)130 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
131 TIndex index,
132 TGraph::iterator iter)
133 : m_Seq_annot_Info(&annot),
134 m_ObjectIndex(index),
135 m_Type(C_Data::e_Graph)
136 {
137 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
138 m_Iter.m_Graph.Construct();
139 #endif
140 *m_Iter.m_Graph = iter;
141 _ASSERT(IsRegular());
142 _ASSERT(m_Iter.m_RawPtr != 0);
143 }
144
145
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TLocs::iterator iter)146 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
147 TIndex index,
148 TLocs::iterator iter)
149 : m_Seq_annot_Info(&annot),
150 m_ObjectIndex(index),
151 m_Type(C_Data::e_Locs)
152 {
153 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
154 m_Iter.m_Locs.Construct();
155 #endif
156 *m_Iter.m_Locs = iter;
157 _ASSERT(IsRegular());
158 _ASSERT(m_Iter.m_RawPtr != 0);
159 }
160
161
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TFtable & cont,const CSeq_feat & obj)162 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
163 TIndex index,
164 TFtable& cont,
165 const CSeq_feat& obj)
166 : m_Seq_annot_Info(&annot),
167 m_ObjectIndex(index),
168 m_Type(obj.GetData().GetSubtype())
169 {
170 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
171 m_Iter.m_Feat.Construct();
172 #endif
173 *m_Iter.m_Feat = cont.insert(cont.end(),
174 Ref(const_cast<CSeq_feat*>(&obj)));
175 _ASSERT(IsRegular());
176 _ASSERT(m_Iter.m_RawPtr != 0);
177 }
178
179
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TAlign & cont,const CSeq_align & obj)180 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
181 TIndex index,
182 TAlign& cont,
183 const CSeq_align& obj)
184 : m_Seq_annot_Info(&annot),
185 m_ObjectIndex(index),
186 m_Type(C_Data::e_Align)
187 {
188 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
189 m_Iter.m_Align.Construct();
190 #endif
191 *m_Iter.m_Align = cont.insert(cont.end(),
192 Ref(const_cast<CSeq_align*>(&obj)));
193 _ASSERT(IsRegular());
194 _ASSERT(m_Iter.m_RawPtr != 0);
195 }
196
197
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TGraph & cont,const CSeq_graph & obj)198 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
199 TIndex index,
200 TGraph& cont,
201 const CSeq_graph& obj)
202 : m_Seq_annot_Info(&annot),
203 m_ObjectIndex(index),
204 m_Type(C_Data::e_Graph)
205 {
206 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
207 m_Iter.m_Graph.Construct();
208 #endif
209 *m_Iter.m_Graph = cont.insert(cont.end(),
210 Ref(const_cast<CSeq_graph*>(&obj)));
211 _ASSERT(IsRegular());
212 _ASSERT(m_Iter.m_RawPtr != 0);
213 }
214
215
CAnnotObject_Info(CSeq_annot_Info & annot,TIndex index,TLocs & cont,const CSeq_loc & obj)216 CAnnotObject_Info::CAnnotObject_Info(CSeq_annot_Info& annot,
217 TIndex index,
218 TLocs& cont,
219 const CSeq_loc& obj)
220 : m_Seq_annot_Info(&annot),
221 m_ObjectIndex(index),
222 m_Type(C_Data::e_Locs)
223 {
224 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
225 m_Iter.m_Locs.Construct();
226 #endif
227 *m_Iter.m_Locs = cont.insert(cont.end(),
228 Ref(const_cast<CSeq_loc*>(&obj)));
229 _ASSERT(IsRegular());
230 _ASSERT(m_Iter.m_RawPtr != 0);
231 }
232
233
CAnnotObject_Info(CTSE_Chunk_Info & chunk_info,const SAnnotTypeSelector & sel)234 CAnnotObject_Info::CAnnotObject_Info(CTSE_Chunk_Info& chunk_info,
235 const SAnnotTypeSelector& sel)
236 : m_Seq_annot_Info(0),
237 m_ObjectIndex(eChunkStub),
238 m_Type(sel)
239 {
240 m_Iter.m_Chunk = &chunk_info;
241 _ASSERT(IsChunkStub());
242 _ASSERT(m_Iter.m_RawPtr != 0);
243 }
244
245
~CAnnotObject_Info()246 CAnnotObject_Info::~CAnnotObject_Info()
247 {
248 Reset();
249 }
250
251 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
252
CAnnotObject_Info(const CAnnotObject_Info & info)253 CAnnotObject_Info::CAnnotObject_Info(const CAnnotObject_Info& info)
254 : m_Seq_annot_Info(info.m_Seq_annot_Info),
255 m_ObjectIndex(info.m_ObjectIndex),
256 m_Type(info.m_Type)
257 {
258 if ( info.IsRegular() ) {
259 if ( info.IsFeat() ) {
260 m_Iter.m_Feat.Construct();
261 *m_Iter.m_Feat = *info.m_Iter.m_Feat;
262 _ASSERT(IsFeat());
263 }
264 else if ( info.IsAlign() ) {
265 m_Iter.m_Align.Construct();
266 *m_Iter.m_Align = *info.m_Iter.m_Align;
267 _ASSERT(IsAlign());
268 }
269 else if ( info.IsGraph() ) {
270 m_Iter.m_Graph.Construct();
271 *m_Iter.m_Graph = *info.m_Iter.m_Graph;
272 _ASSERT(IsGraph());
273 }
274 else if ( info.IsLocs() ) {
275 m_Iter.m_Locs.Construct();
276 *m_Iter.m_Locs = *info.m_Iter.m_Locs;
277 _ASSERT(IsLocs());
278 }
279 _ASSERT(IsRegular());
280 }
281 else {
282 m_Iter.m_RawPtr = info.m_Iter.m_RawPtr;
283 _ASSERT(!IsRegular());
284 }
285 }
286
287
operator =(const CAnnotObject_Info & info)288 CAnnotObject_Info& CAnnotObject_Info::operator=(const CAnnotObject_Info& info)
289 {
290 if ( this != &info ) {
291 Reset();
292 m_Seq_annot_Info = info.m_Seq_annot_Info;
293 m_ObjectIndex = info.m_ObjectIndex;
294 m_Type = info.m_Type;
295 if ( info.IsRegular() ) {
296 if ( info.IsFeat() ) {
297 m_Iter.m_Feat.Construct();
298 *m_Iter.m_Feat = *info.m_Iter.m_Feat;
299 _ASSERT(IsFeat());
300 }
301 else if ( info.IsAlign() ) {
302 m_Iter.m_Align.Construct();
303 *m_Iter.m_Align = *info.m_Iter.m_Align;
304 _ASSERT(IsAlign());
305 }
306 else if ( info.IsGraph() ) {
307 m_Iter.m_Graph.Construct();
308 *m_Iter.m_Graph = *info.m_Iter.m_Graph;
309 _ASSERT(IsGraph());
310 }
311 else if ( info.IsLocs() ) {
312 m_Iter.m_Locs.Construct();
313 *m_Iter.m_Locs = *info.m_Iter.m_Locs;
314 _ASSERT(IsLocs());
315 }
316 _ASSERT(IsRegular());
317 }
318 else {
319 m_Iter.m_RawPtr = info.m_Iter.m_RawPtr;
320 _ASSERT(!IsRegular());
321 }
322 }
323 return *this;
324 }
325
326 #endif
327
328
Reset(void)329 void CAnnotObject_Info::Reset(void)
330 {
331 #ifdef NCBI_NON_POD_TYPE_STL_ITERATORS
332 if ( IsRegular() ) {
333 if ( IsFeat() ) {
334 m_Iter.m_Feat.Destruct();
335 }
336 else if ( IsAlign() ) {
337 m_Iter.m_Align.Destruct();
338 }
339 else if ( IsGraph() ) {
340 m_Iter.m_Graph.Destruct();
341 }
342 else if ( IsLocs() ) {
343 m_Iter.m_Locs.Destruct();
344 }
345 }
346 #endif
347 m_Type.SetAnnotType(C_Data::e_not_set);
348 m_Iter.m_RawPtr = 0;
349 m_ObjectIndex = eEmpty;
350 m_Seq_annot_Info = 0;
351 }
352
353
GetObject(void) const354 CConstRef<CObject> CAnnotObject_Info::GetObject(void) const
355 {
356 return ConstRef(GetObjectPointer());
357 }
358
359
GetObjectPointer(void) const360 const CObject* CAnnotObject_Info::GetObjectPointer(void) const
361 {
362 switch ( Which() ) {
363 case C_Data::e_Ftable:
364 return GetFeatFast();
365 case C_Data::e_Graph:
366 return GetGraphFast();
367 case C_Data::e_Align:
368 return &GetAlign();
369 case C_Data::e_Locs:
370 return &GetLocs();
371 default:
372 return 0;
373 }
374 }
375
376
GetMaps(vector<CHandleRangeMap> & hrmaps,const CMasterSeqSegments * master) const377 void CAnnotObject_Info::GetMaps(vector<CHandleRangeMap>& hrmaps,
378 const CMasterSeqSegments* master) const
379 {
380 _ASSERT(IsRegular());
381 switch ( Which() ) {
382 case C_Data::e_Ftable:
383 x_ProcessFeat(hrmaps, *GetFeatFast(), master);
384 break;
385 case C_Data::e_Graph:
386 x_ProcessGraph(hrmaps, *GetGraphFast(), master);
387 break;
388 case C_Data::e_Align:
389 {
390 const CSeq_align& align = GetAlign();
391 // TODO: separate alignment locations
392 hrmaps.clear();
393 x_ProcessAlign(hrmaps, align, master);
394 break;
395 }
396 case C_Data::e_Locs:
397 {
398 _ASSERT(!IsRemoved());
399 // Index by location in region descriptor, not by referenced one
400 const CSeq_annot& annot = *GetSeq_annot_Info().GetCompleteSeq_annot();
401 if ( !annot.IsSetDesc() ) {
402 break;
403 }
404 CConstRef<CSeq_loc> region;
405 ITERATE(CSeq_annot::TDesc::Tdata, desc_it, annot.GetDesc().Get()) {
406 if ( (*desc_it)->IsRegion() ) {
407 region.Reset(&(*desc_it)->GetRegion());
408 break;
409 }
410 }
411 if ( region ) {
412 hrmaps.resize(1);
413 hrmaps[0].clear();
414 hrmaps[0].SetMasterSeq(master);
415 hrmaps[0].AddLocation(*region);
416 }
417 break;
418 }
419 default:
420 break;
421 }
422 }
423
424 /* static */
x_ProcessFeat(vector<CHandleRangeMap> & hrmaps,const CSeq_feat & feat,const CMasterSeqSegments * master)425 void CAnnotObject_Info::x_ProcessFeat(vector<CHandleRangeMap>& hrmaps,
426 const CSeq_feat& feat,
427 const CMasterSeqSegments* master)
428 {
429 hrmaps.resize(feat.IsSetProduct()? 2: 1);
430 hrmaps[0].clear();
431 hrmaps[0].SetMasterSeq(master);
432 CHandleRangeMap::ETransSplicing mode = CHandleRangeMap::eNoTransSplicing;
433 if ( feat.IsSetExcept_text() &&
434 feat.GetExcept_text().find("trans-splicing") != NPOS ) {
435 mode = CHandleRangeMap::eTransSplicing;
436 }
437 hrmaps[0].AddLocation(feat.GetLocation(), mode);
438 if ( feat.IsSetProduct() ) {
439 hrmaps[1].clear();
440 hrmaps[1].SetMasterSeq(master);
441 hrmaps[1].AddLocation(feat.GetProduct(), mode);
442 }
443 }
444 /* static */
x_ProcessGraph(vector<CHandleRangeMap> & hrmaps,const CSeq_graph & graph,const CMasterSeqSegments * master)445 void CAnnotObject_Info::x_ProcessGraph(vector<CHandleRangeMap>& hrmaps,
446 const CSeq_graph& graph,
447 const CMasterSeqSegments* master)
448 {
449 hrmaps.resize(1);
450 hrmaps[0].clear();
451 hrmaps[0].SetMasterSeq(master);
452 hrmaps[0].AddLocation(graph.GetLoc());
453 }
454
GetSeq_entry_Info(void) const455 const CSeq_entry_Info& CAnnotObject_Info::GetSeq_entry_Info(void) const
456 {
457 return GetSeq_annot_Info().GetParentSeq_entry_Info();
458 }
459
460
GetTSE_Info(void) const461 const CTSE_Info& CAnnotObject_Info::GetTSE_Info(void) const
462 {
463 return GetSeq_annot_Info().GetTSE_Info();
464 }
465
466
GetTSE_Info(void)467 CTSE_Info& CAnnotObject_Info::GetTSE_Info(void)
468 {
469 return GetSeq_annot_Info().GetTSE_Info();
470 }
471
472
GetDataSource(void) const473 CDataSource& CAnnotObject_Info::GetDataSource(void) const
474 {
475 return GetSeq_annot_Info().GetDataSource();
476 }
477
478
479 const CTempString kAnnotTypePrefix = "Seq-annot.data.";
480
GetLocsTypes(TTypeIndexSet & idx_set) const481 void CAnnotObject_Info::GetLocsTypes(TTypeIndexSet& idx_set) const
482 {
483 const CSeq_annot& annot = *GetSeq_annot_Info().GetCompleteSeq_annot();
484 _ASSERT(annot.IsSetDesc());
485 ITERATE(CSeq_annot::TDesc::Tdata, desc_it, annot.GetDesc().Get()) {
486 if ( !(*desc_it)->IsUser() ) {
487 continue;
488 }
489 const CUser_object& obj = (*desc_it)->GetUser();
490 if ( !obj.GetType().IsStr() ) {
491 continue;
492 }
493 CTempString type = obj.GetType().GetStr();
494 if (type.substr(0, kAnnotTypePrefix.size()) != kAnnotTypePrefix) {
495 continue;
496 }
497 type = type.substr(kAnnotTypePrefix.size());
498 if (type == "align") {
499 idx_set.push_back(CAnnotType_Index::GetAnnotTypeRange(
500 C_Data::e_Align));
501 }
502 else if (type == "graph") {
503 idx_set.push_back(CAnnotType_Index::GetAnnotTypeRange(
504 C_Data::e_Graph));
505 }
506 else if (type == "ftable") {
507 if ( obj.GetData().size() == 0 ) {
508 // Feature type/subtype not set
509 idx_set.push_back(CAnnotType_Index::GetAnnotTypeRange(
510 C_Data::e_Ftable));
511 continue;
512 }
513 // Parse feature types and subtypes
514 ITERATE(CUser_object::TData, data_it, obj.GetData()) {
515 const CUser_field& field = **data_it;
516 if ( !field.GetLabel().IsId() ) {
517 continue;
518 }
519 int ftype = field.GetLabel().GetId();
520 switch (field.GetData().Which()) {
521 case CUser_field::C_Data::e_Int:
522 x_Locs_AddFeatSubtype(ftype,
523 field.GetData().GetInt(), idx_set);
524 break;
525 case CUser_field::C_Data::e_Ints:
526 {
527 ITERATE(CUser_field::C_Data::TInts, it,
528 field.GetData().GetInts()) {
529 x_Locs_AddFeatSubtype(ftype, *it, idx_set);
530 }
531 break;
532 }
533 default:
534 break;
535 }
536 }
537 }
538 }
539 }
540
541
x_Locs_AddFeatSubtype(int ftype,int subtype,TTypeIndexSet & idx_set) const542 void CAnnotObject_Info::x_Locs_AddFeatSubtype(int ftype,
543 int subtype,
544 TTypeIndexSet& idx_set) const
545 {
546 if (subtype != CSeqFeatData::eSubtype_any) {
547 size_t idx =
548 CAnnotType_Index::GetSubtypeIndex(CSeqFeatData::ESubtype(subtype));
549 idx_set.push_back(TIndexRange(idx, idx+1));
550 }
551 else {
552 idx_set.push_back(
553 CAnnotType_Index::GetFeatTypeRange(CSeqFeatData::E_Choice(ftype)));
554 }
555 }
556
557
558 /* static */
x_ProcessAlign(vector<CHandleRangeMap> & hrmaps,const CSeq_align & align,const CMasterSeqSegments * master)559 void CAnnotObject_Info::x_ProcessAlign(vector<CHandleRangeMap>& hrmaps,
560 const CSeq_align& align,
561 const CMasterSeqSegments* master)
562 {
563 //### Check the implementation.
564 switch ( align.GetSegs().Which() ) {
565 case CSeq_align::C_Segs::e_not_set:
566 {
567 break;
568 }
569 case CSeq_align::C_Segs::e_Dendiag:
570 {
571 const CSeq_align::C_Segs::TDendiag& dendiag =
572 align.GetSegs().GetDendiag();
573 ITERATE ( CSeq_align::C_Segs::TDendiag, it, dendiag ) {
574 const CDense_diag& diag = **it;
575 int dim = diag.GetDim();
576 if (dim != (int)diag.GetIds().size()) {
577 ERR_POST_X(1, Warning << "Invalid 'ids' size in dendiag");
578 dim = min(dim, (int)diag.GetIds().size());
579 }
580 if (dim != (int)diag.GetStarts().size()) {
581 ERR_POST_X(2, Warning << "Invalid 'starts' size in dendiag");
582 dim = min(dim, (int)diag.GetStarts().size());
583 }
584 if (diag.IsSetStrands()
585 && dim != (int)diag.GetStrands().size()) {
586 ERR_POST_X(3, Warning << "Invalid 'strands' size in dendiag");
587 dim = min(dim, (int)diag.GetStrands().size());
588 }
589 if ((int)hrmaps.size() < dim) {
590 hrmaps.resize(dim);
591 }
592 TSeqPos len = (*it)->GetLen();
593 for (int row = 0; row < dim; ++row) {
594 const CSeq_id& id = *(*it)->GetIds()[row];
595 TSeqPos from = (*it)->GetStarts()[row];
596 TSeqPos to = from + len - 1;
597 ENa_strand strand = eNa_strand_unknown;
598 if ( (*it)->IsSetStrands() ) {
599 strand = (*it)->GetStrands()[row];
600 }
601 hrmaps[row].SetMasterSeq(master);
602 hrmaps[row].AddRange(id, from, to, strand);
603 }
604 }
605 break;
606 }
607 case CSeq_align::C_Segs::e_Denseg:
608 {
609 const CSeq_align::C_Segs::TDenseg& denseg =
610 align.GetSegs().GetDenseg();
611 size_t dim = size_t(denseg.GetDim());
612 size_t numseg = size_t(denseg.GetNumseg());
613 // claimed dimension may not be accurate :-/
614 if (numseg != denseg.GetLens().size()) {
615 ERR_POST_X(4, Warning << "Invalid 'lens' size in denseg");
616 numseg = min(numseg, denseg.GetLens().size());
617 }
618 if (dim != denseg.GetIds().size()) {
619 ERR_POST_X(5, Warning << "Invalid 'ids' size in denseg");
620 dim = min(dim, denseg.GetIds().size());
621 }
622 if (dim*numseg != denseg.GetStarts().size()) {
623 ERR_POST_X(6, Warning << "Invalid 'starts' size in denseg");
624 dim = min(dim*numseg, denseg.GetStarts().size()) / numseg;
625 }
626 if (denseg.IsSetStrands()
627 && dim*numseg != denseg.GetStrands().size()) {
628 ERR_POST_X(7, Warning << "Invalid 'strands' size in denseg");
629 dim = min(dim*numseg, denseg.GetStrands().size()) / numseg;
630 }
631 if (hrmaps.size() < dim) {
632 hrmaps.resize(dim);
633 }
634 for (size_t seg = 0; seg < numseg; seg++) {
635 for (size_t row = 0; row < dim; row++) {
636 if (denseg.GetStarts()[seg*dim + row] < 0 ) {
637 continue;
638 }
639 const CSeq_id& id = *denseg.GetIds()[row];
640 TSeqPos from = denseg.GetStarts()[seg*dim + row];
641 TSeqPos to = from + denseg.GetLens()[seg] - 1;
642 ENa_strand strand = eNa_strand_unknown;
643 if ( denseg.IsSetStrands() ) {
644 strand = denseg.GetStrands()[seg*dim + row];
645 }
646 hrmaps[row].SetMasterSeq(master);
647 hrmaps[row].AddRange(id, from, to, strand);
648 }
649 }
650 break;
651 }
652 case CSeq_align::C_Segs::e_Std:
653 {
654 const CSeq_align::C_Segs::TStd& std =
655 align.GetSegs().GetStd();
656 ITERATE ( CSeq_align::C_Segs::TStd, it, std ) {
657 size_t dim = size_t((*it)->GetDim());
658 if (hrmaps.size() < dim) {
659 hrmaps.resize((*it)->GetDim());
660 }
661 ITERATE ( CStd_seg::TLoc, it_loc, (*it)->GetLoc() ) {
662 CSeq_loc_CI row_it(**it_loc);
663 for (size_t row = 0; row_it; ++row_it, ++row) {
664 if (row >= hrmaps.size()) {
665 hrmaps.resize(row + 1);
666 }
667 const CSeq_id& id = row_it.GetSeq_id();
668 TSeqPos from = row_it.GetRange().GetFrom();
669 TSeqPos to = row_it.GetRange().GetTo();
670 ENa_strand strand = row_it.GetStrand();
671 hrmaps[row].SetMasterSeq(master);
672 hrmaps[row].AddRange(id, from, to, strand);
673 }
674 }
675 }
676 break;
677 }
678 case CSeq_align::C_Segs::e_Packed:
679 {
680 const CSeq_align::C_Segs::TPacked& packed =
681 align.GetSegs().GetPacked();
682 size_t dim = size_t(packed.GetDim());
683 size_t numseg = size_t(packed.GetNumseg());
684 // claimed dimension may not be accurate :-/
685 if (dim * numseg > packed.GetStarts().size()) {
686 dim = packed.GetStarts().size() / numseg;
687 }
688 if (dim * numseg > packed.GetPresent().size()) {
689 dim = packed.GetPresent().size() / numseg;
690 }
691 if (dim > packed.GetLens().size()) {
692 dim = packed.GetLens().size();
693 }
694 if (hrmaps.size() < dim) {
695 hrmaps.resize(dim);
696 }
697 for (size_t seg = 0; seg < numseg; seg++) {
698 for (size_t row = 0; row < dim; row++) {
699 if ( packed.GetPresent()[seg*dim + row] ) {
700 hrmaps[row].SetMasterSeq(master);
701 const CSeq_id& id = *packed.GetIds()[row];
702 TSeqPos from = packed.GetStarts()[seg*dim + row];
703 TSeqPos to = from + packed.GetLens()[seg] - 1;
704 ENa_strand strand = eNa_strand_unknown;
705 if ( packed.IsSetStrands() ) {
706 strand = packed.GetStrands()[seg*dim + row];
707 }
708 hrmaps[row].AddRange(id, from, to, strand);
709 }
710 }
711 }
712 break;
713 }
714 case CSeq_align::C_Segs::e_Disc:
715 {
716 const CSeq_align::C_Segs::TDisc& disc =
717 align.GetSegs().GetDisc();
718 ITERATE ( CSeq_align_set::Tdata, it, disc.Get() ) {
719 x_ProcessAlign(hrmaps, **it, master);
720 }
721 break;
722 }
723 case CSeq_align::C_Segs::e_Spliced:
724 {
725 const CSeq_align::C_Segs::TSpliced& spliced =
726 align.GetSegs().GetSpliced();
727 const CSeq_id* gen_id = spliced.IsSetGenomic_id() ?
728 &spliced.GetGenomic_id() : 0;
729 const CSeq_id* prod_id = spliced.IsSetProduct_id() ?
730 &spliced.GetProduct_id() : 0;
731 hrmaps.resize(2);
732 ITERATE ( CSpliced_seg::TExons, it, spliced.GetExons() ) {
733 const CSpliced_exon& ex = **it;
734 const CSeq_id* ex_gen_id = ex.IsSetGenomic_id() ?
735 &ex.GetGenomic_id() : gen_id;
736 if ( ex_gen_id ) {
737 const CSeq_id& id = *ex_gen_id;
738 TSeqPos from = ex.GetGenomic_start();
739 TSeqPos to = ex.GetGenomic_end();
740 ENa_strand strand = eNa_strand_unknown;
741 if ( ex.IsSetGenomic_strand() ) {
742 strand = ex.GetGenomic_strand();
743 }
744 else if ( spliced.IsSetGenomic_strand() ) {
745 strand = spliced.GetGenomic_strand();
746 }
747 hrmaps[1].SetMasterSeq(master);
748 hrmaps[1].AddRange(id, from, to, strand);
749 }
750 const CSeq_id* ex_prod_id = ex.IsSetProduct_id() ?
751 &ex.GetProduct_id() : prod_id;
752 if ( ex_prod_id ) {
753 const CSeq_id& id = *ex_prod_id;
754 TSeqPos from =
755 (ex.GetProduct_start().IsNucpos() ?
756 ex.GetProduct_start().GetNucpos() :
757 ex.GetProduct_start().GetProtpos().GetAmin());
758 TSeqPos to =
759 (ex.GetProduct_end().IsNucpos() ?
760 ex.GetProduct_end().GetNucpos() :
761 ex.GetProduct_end().GetProtpos().GetAmin());
762 ENa_strand strand = eNa_strand_unknown;
763 if ( ex.IsSetProduct_strand() ) {
764 strand = ex.GetProduct_strand();
765 }
766 else if ( spliced.IsSetProduct_strand() ) {
767 strand = spliced.GetProduct_strand();
768 }
769 hrmaps[0].SetMasterSeq(master);
770 hrmaps[0].AddRange(id, from, to, strand);
771 }
772 }
773 break;
774 }
775 case CSeq_align::C_Segs::e_Sparse:
776 {
777 const CSeq_align::C_Segs::TSparse& sparse =
778 align.GetSegs().GetSparse();
779 // consensus sequence row + one row for each sub-alignment
780 size_t dim = sparse.GetRows().size() + 1;
781 if (hrmaps.size() < dim) {
782 hrmaps.resize(dim);
783 }
784 size_t row = 0;
785 hrmaps[0].SetMasterSeq(master);
786 ITERATE ( CSparse_seg::TRows, it, sparse.GetRows() ) {
787 const CSparse_align& aln_row = **it;
788 size_t numseg = aln_row.GetNumseg();
789 if (numseg != aln_row.GetFirst_starts().size()) {
790 ERR_POST_X(9, Warning <<
791 "Invalid size of 'first-starts' in sparse-align");
792 numseg = min(numseg, aln_row.GetFirst_starts().size());
793 }
794 if (numseg != aln_row.GetSecond_starts().size()) {
795 ERR_POST_X(10, Warning <<
796 "Invalid size of 'second-starts' in sparse-align");
797 numseg = min(numseg, aln_row.GetSecond_starts().size());
798 }
799 if (numseg != aln_row.GetLens().size()) {
800 ERR_POST_X(11, Warning <<
801 "Invalid size of 'lens' in sparse-align");
802 numseg = min(numseg, aln_row.GetLens().size());
803 }
804 if (aln_row.IsSetSecond_strands() &&
805 numseg != aln_row.GetSecond_strands().size()) {
806 ERR_POST_X(12, Warning <<
807 "Invalid size of 'second-strands' in sparse-align");
808 numseg = min(numseg, aln_row.GetSecond_strands().size());
809 }
810
811 hrmaps[row+1].SetMasterSeq(master);
812 for (size_t seg = 0; seg < numseg; ++seg) {
813 TSeqPos len = aln_row.GetLens()[seg];
814 {
815 const CSeq_id& id = aln_row.GetFirst_id();
816 TSeqPos from = aln_row.GetFirst_starts()[seg];
817 TSeqPos to = from + len - 1;
818 // consensus sequence goes to the first row
819 hrmaps[0].AddRange(id, from, to);
820 }
821 {
822 const CSeq_id& id = aln_row.GetSecond_id();
823 TSeqPos from = aln_row.GetSecond_starts()[seg];
824 TSeqPos to = from + len - 1;
825 ENa_strand strand = eNa_strand_unknown;
826 if ( aln_row.IsSetSecond_strands() ) {
827 strand = aln_row.GetSecond_strands()[row];
828 }
829 hrmaps[row+1].AddRange(id, from, to, strand);
830 }
831 }
832 row++;
833 }
834 break;
835 }
836 default:
837 {
838 ERR_POST_X(8, Warning << "Unknown type of Seq-align: "<<
839 align.GetSegs().Which());
840 break;
841 }
842 }
843 }
844
845
x_SetObject(const CSeq_feat & new_obj)846 void CAnnotObject_Info::x_SetObject(const CSeq_feat& new_obj)
847 {
848 x_GetFeatIter()->Reset(&const_cast<CSeq_feat&>(new_obj));
849 m_Type.SetFeatSubtype(new_obj.GetData().GetSubtype());
850 }
851
852
x_SetObject(const CSeq_align & new_obj)853 void CAnnotObject_Info::x_SetObject(const CSeq_align& new_obj)
854 {
855 x_GetAlignIter()->Reset(&const_cast<CSeq_align&>(new_obj));
856 m_Type.SetAnnotType(C_Data::e_Align);
857 }
858
859
x_SetObject(const CSeq_graph & new_obj)860 void CAnnotObject_Info::x_SetObject(const CSeq_graph& new_obj)
861 {
862 x_GetGraphIter()->Reset(&const_cast<CSeq_graph&>(new_obj));
863 m_Type.SetAnnotType(C_Data::e_Graph);
864 }
865
866
x_MoveToBack(TFtable & cont)867 void CAnnotObject_Info::x_MoveToBack(TFtable& cont)
868 {
869 _ASSERT(IsFeat() && IsRegular() && m_Iter.m_RawPtr);
870 TFtable::iterator old_iter = *m_Iter.m_Feat;
871 *m_Iter.m_Feat = cont.insert(cont.end(), *old_iter);
872 cont.erase(old_iter);
873 }
874
875
876 END_SCOPE(objects)
877 END_NCBI_SCOPE
878