1 /* $Id: annot_collector.cpp 608523 2020-05-18 16:26:33Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Annotation collector for annot iterators
30 *
31 */
32
33 #include <ncbi_pch.hpp>
34 #include <objmgr/impl/annot_collector.hpp>
35
36 #include <objmgr/scope.hpp>
37 #include <objmgr/bioseq_handle.hpp>
38 #include <objmgr/seq_entry_handle.hpp>
39 #include <objmgr/seq_annot_handle.hpp>
40 #include <objmgr/seq_feat_handle.hpp>
41 #include <objmgr/seq_map_ci.hpp>
42 #include <objmgr/impl/annot_object.hpp>
43 #include <objmgr/impl/tse_info.hpp>
44 #include <objmgr/impl/annot_type_index.hpp>
45 #include <objmgr/impl/tse_chunk_info.hpp>
46 #include <objmgr/impl/data_source.hpp>
47 #include <objmgr/impl/seq_annot_info.hpp>
48 #include <objmgr/impl/bioseq_set_info.hpp>
49 #include <objmgr/impl/handle_range_map.hpp>
50 #include <objmgr/impl/synonyms.hpp>
51 #include <objmgr/impl/seq_loc_cvt.hpp>
52 #include <objmgr/impl/seq_align_mapper.hpp>
53 #include <objmgr/impl/snp_annot_info.hpp>
54 #include <objmgr/impl/seq_table_info.hpp>
55 #include <objmgr/impl/bioseq_info.hpp>
56 #include <objmgr/impl/scope_impl.hpp>
57 #include <objmgr/mapped_feat.hpp>
58 #include <objmgr/graph_ci.hpp>
59 #include <objmgr/objmgr_exception.hpp>
60 #include <objmgr/impl/tse_split_info.hpp>
61 #include <objmgr/error_codes.hpp>
62
63 #include <objects/seq/Bioseq.hpp>
64 #include <objects/seqloc/Seq_loc.hpp>
65 #include <objects/seqset/Seq_entry.hpp>
66 #include <objects/seqalign/Seq_align.hpp>
67 #include <objects/seqres/Seq_graph.hpp>
68 #include <objects/seqloc/Seq_loc_equiv.hpp>
69 #include <objects/seqloc/Seq_bond.hpp>
70 #include <objects/seqfeat/seqfeat__.hpp>
71 #include <objects/general/User_object.hpp>
72
73 #include <serial/typeinfo.hpp>
74 #include <serial/objostr.hpp>
75 #include <serial/objostrasn.hpp>
76 #include <serial/serial.hpp>
77 #include <serial/serialutil.hpp>
78
79 #include <util/timsort.hpp>
80 #include <algorithm>
81 #include <typeinfo>
82
83
84 #define NCBI_USE_ERRCODE_X ObjMgr_AnnotCollect
85
86 BEGIN_NCBI_SCOPE
87
88 NCBI_DEFINE_ERR_SUBCODE_X(2);
89
BEGIN_SCOPE(objects)90 BEGIN_SCOPE(objects)
91
92
93 /////////////////////////////////////////////////////////////////////////////
94 // CAnnotMapping_Info
95 /////////////////////////////////////////////////////////////////////////////
96
97
98 void CAnnotMapping_Info::Reset(void)
99 {
100 m_TotalRange = TRange::GetEmpty();
101 m_MappedObject.Reset();
102 m_MappedObjectType = eMappedObjType_not_set;
103 m_MappedStrand = eNa_strand_unknown;
104 m_MappedFlags = 0;
105 }
106
107
GetMappedSeq_loc_Conv(void) const108 CSeq_loc_Conversion& CAnnotMapping_Info::GetMappedSeq_loc_Conv(void) const
109 {
110 _ASSERT(GetMappedObjectType() == eMappedObjType_Seq_loc_Conv);
111 return static_cast<CSeq_loc_Conversion&>(m_MappedObject.GetNCObject());
112 }
113
114
SetMappedConverstion(CSeq_loc_Conversion & cvt)115 void CAnnotMapping_Info::SetMappedConverstion(CSeq_loc_Conversion& cvt)
116 {
117 _ASSERT(!IsMapped());
118 m_MappedObject.Reset(&cvt);
119 m_MappedObjectType = eMappedObjType_Seq_loc_Conv;
120 }
121
122
SetMappedSeq_align(CSeq_align * align)123 void CAnnotMapping_Info::SetMappedSeq_align(CSeq_align* align)
124 {
125 _ASSERT(m_MappedObjectType == eMappedObjType_Seq_loc_Conv_Set);
126 m_MappedObject.Reset(align);
127 m_MappedObjectType =
128 align? eMappedObjType_Seq_align: eMappedObjType_not_set;
129 }
130
131
SetMappedSeq_align_Cvts(CSeq_loc_Conversion_Set & cvts)132 void CAnnotMapping_Info::SetMappedSeq_align_Cvts(CSeq_loc_Conversion_Set& cvts)
133 {
134 _ASSERT(!IsMapped());
135 m_MappedObject.Reset(&cvts);
136 m_MappedObjectType = eMappedObjType_Seq_loc_Conv_Set;
137 }
138
139
SetGraphRanges(CGraphRanges * ranges)140 void CAnnotMapping_Info::SetGraphRanges(CGraphRanges* ranges)
141 {
142 m_GraphRanges.Reset(ranges);
143 }
144
145
GetGraphRanges(void) const146 const CGraphRanges* CAnnotMapping_Info::GetGraphRanges(void) const
147 {
148 return m_GraphRanges.GetPointerOrNull();
149 }
150
151
152 const CSeq_align&
GetMappedSeq_align(const CSeq_align & orig) const153 CAnnotMapping_Info::GetMappedSeq_align(const CSeq_align& orig) const
154 {
155 if (m_MappedObjectType == eMappedObjType_Seq_loc_Conv_Set) {
156 // Map the alignment, replace conv-set with the mapped align
157 CSeq_loc_Conversion_Set& cvts =
158 const_cast<CSeq_loc_Conversion_Set&>(
159 *CTypeConverter<CSeq_loc_Conversion_Set>::
160 SafeCast(m_MappedObject.GetPointer()));
161
162 CRef<CSeq_align> dst;
163 cvts.Convert(orig, dst);
164
165 CRange<TSeqPos>& range = const_cast<CRange<TSeqPos>&>(m_TotalRange);
166 range = range.GetEmpty();
167 vector<CHandleRangeMap> hrmaps;
168 CAnnotObject_Info::x_ProcessAlign(hrmaps, *dst, 0);
169 const CSeq_loc_Conversion_Set::TSeq_id_Handles& dst_ids =
170 cvts.GetDst_id_Handles();
171 ITERATE ( vector<CHandleRangeMap>, rowit, hrmaps ) {
172 ITERATE ( CHandleRangeMap, idit, *rowit ) {
173 if ( dst_ids.find(idit->first) != dst_ids.end() ) {
174 range.CombineWith(idit->second.GetOverlappingRange());
175 }
176 }
177 }
178
179 const_cast<CAnnotMapping_Info&>(*this).
180 SetMappedSeq_align(dst.GetPointerOrNull());
181 }
182 _ASSERT(m_MappedObjectType == eMappedObjType_Seq_align);
183 return *CTypeConverter<CSeq_align>::
184 SafeCast(m_MappedObject.GetPointer());
185 }
186
187
UpdateMappedSeq_loc(CRef<CSeq_loc> & loc,CRef<CSeq_point> & pnt_ref,CRef<CSeq_interval> & int_ref,const CSeq_feat * orig_feat) const188 void CAnnotMapping_Info::UpdateMappedSeq_loc(CRef<CSeq_loc>& loc,
189 CRef<CSeq_point>& pnt_ref,
190 CRef<CSeq_interval>& int_ref,
191 const CSeq_feat* orig_feat) const
192 {
193 _ASSERT(MappedSeq_locNeedsUpdate());
194 if ( !loc || !loc->ReferencedOnlyOnce() ) {
195 loc.Reset(new CSeq_loc);
196 }
197 else {
198 loc->Reset();
199 loc->InvalidateTotalRangeCache();
200 }
201 if ( GetMappedObjectType() == eMappedObjType_Seq_id ) {
202 CSeq_id& id = const_cast<CSeq_id&>(GetMappedSeq_id());
203 if ( IsMappedPoint() ) {
204 if ( !pnt_ref || !pnt_ref->ReferencedOnlyOnce() ) {
205 pnt_ref.Reset(new CSeq_point);
206 }
207 CSeq_point& point = *pnt_ref;
208 loc->SetPnt(point);
209 point.SetId(id);
210 point.SetPoint(m_TotalRange.GetFrom());
211 if ( GetMappedStrand() != eNa_strand_unknown )
212 point.SetStrand(GetMappedStrand());
213 else
214 point.ResetStrand();
215 if ( m_MappedFlags & fMapped_Partial_from ) {
216 point.SetFuzz().SetLim(CInt_fuzz::eLim_lt);
217 }
218 else {
219 point.ResetFuzz();
220 }
221 }
222 else {
223 if ( !int_ref || !int_ref->ReferencedOnlyOnce() ) {
224 int_ref.Reset(new CSeq_interval);
225 }
226 CSeq_interval& interval = *int_ref;
227 loc->SetInt(interval);
228 interval.SetId(id);
229 interval.SetFrom(m_TotalRange.GetFrom());
230 interval.SetTo(m_TotalRange.GetTo());
231 if ( GetMappedStrand() != eNa_strand_unknown )
232 interval.SetStrand(GetMappedStrand());
233 else
234 interval.ResetStrand();
235 if ( m_MappedFlags & fMapped_Partial_from ) {
236 interval.SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);
237 }
238 else {
239 interval.ResetFuzz_from();
240 }
241 if ( m_MappedFlags & fMapped_Partial_to ) {
242 interval.SetFuzz_to().SetLim(CInt_fuzz::eLim_gt);
243 }
244 else {
245 interval.ResetFuzz_to();
246 }
247 }
248 }
249 else {
250 CSeq_loc_Conversion& cvt = GetMappedSeq_loc_Conv();
251 const CSeq_loc& orig_loc = m_MappedFlags & fMapped_Product?
252 orig_feat->GetProduct(): orig_feat->GetLocation();
253 cvt.MakeDstMix(loc->SetMix(), orig_loc.GetMix());
254 }
255 }
256
257
SetMappedSeq_feat(CSeq_feat & feat)258 void CAnnotMapping_Info::SetMappedSeq_feat(CSeq_feat& feat)
259 {
260 _ASSERT( IsMapped() );
261 _ASSERT(GetMappedObjectType() != eMappedObjType_Seq_feat);
262
263 // Fill mapped location and product in the mapped feature
264 CRef<CSeq_loc> mapped_loc;
265 if ( MappedSeq_locNeedsUpdate() ) {
266 mapped_loc.Reset(new CSeq_loc);
267 CRef<CSeq_point> mapped_pnt;
268 CRef<CSeq_interval> mapped_int;
269 UpdateMappedSeq_loc(mapped_loc, mapped_pnt, mapped_int, &feat);
270 }
271 else {
272 mapped_loc.Reset(&const_cast<CSeq_loc&>(GetMappedSeq_loc()));
273 }
274 if ( IsMappedLocation() ) {
275 feat.SetLocation(*mapped_loc);
276 }
277 else if ( IsMappedProduct() ) {
278 feat.SetProduct(*mapped_loc);
279 }
280 if ( IsPartial() ) {
281 feat.SetPartial(true);
282 }
283 else {
284 feat.ResetPartial();
285 }
286
287 m_MappedObject.Reset(&feat);
288 m_MappedObjectType = eMappedObjType_Seq_feat;
289 }
290
291
InitializeMappedSeq_feat(const CSeq_feat & src,CSeq_feat & dst) const292 void CAnnotMapping_Info::InitializeMappedSeq_feat(const CSeq_feat& src,
293 CSeq_feat& dst) const
294 {
295 CSeq_feat& src_nc = const_cast<CSeq_feat&>(src);
296 if ( src_nc.IsSetId() )
297 dst.SetId(src_nc.SetId());
298 else
299 dst.ResetId();
300
301 dst.SetData(src_nc.SetData());
302
303 if ( src_nc.IsSetExcept() )
304 dst.SetExcept(src_nc.GetExcept());
305 else
306 dst.ResetExcept();
307
308 if ( src_nc.IsSetComment() )
309 dst.SetComment(src_nc.GetComment());
310 else
311 dst.ResetComment();
312
313 if ( src_nc.IsSetQual() )
314 dst.SetQual() = src_nc.GetQual();
315 else
316 dst.ResetQual();
317
318 if ( src_nc.IsSetTitle() )
319 dst.SetTitle(src_nc.GetTitle());
320 else
321 dst.ResetTitle();
322
323 if ( src_nc.IsSetExt() )
324 dst.SetExt(src_nc.SetExt());
325 else
326 dst.ResetExt();
327
328 if ( src_nc.IsSetCit() )
329 dst.SetCit(src_nc.SetCit());
330 else
331 dst.ResetCit();
332
333 if ( src_nc.IsSetExp_ev() )
334 dst.SetExp_ev(src_nc.GetExp_ev());
335 else
336 dst.ResetExp_ev();
337
338 if ( src_nc.IsSetXref() )
339 dst.SetXref() = src_nc.SetXref();
340 else
341 dst.ResetXref();
342
343 if ( src_nc.IsSetDbxref() )
344 dst.SetDbxref() = src_nc.SetDbxref();
345 else
346 dst.ResetDbxref();
347
348 if ( src_nc.IsSetPseudo() )
349 dst.SetPseudo(src_nc.GetPseudo());
350 else
351 dst.ResetPseudo();
352
353 if ( src_nc.IsSetExcept_text() )
354 dst.SetExcept_text(src_nc.GetExcept_text());
355 else
356 dst.ResetExcept_text();
357
358 if ( src_nc.IsSetIds() )
359 dst.SetIds() = src_nc.GetIds();
360 else
361 dst.ResetIds();
362
363 if ( src_nc.IsSetExts() )
364 dst.SetExts() = src_nc.GetExts();
365 else
366 dst.ResetExts();
367
368 dst.SetLocation(src_nc.SetLocation());
369 if ( src_nc.IsSetProduct() )
370 dst.SetProduct(src_nc.SetProduct());
371 else
372 dst.ResetProduct();
373 }
374
375
GetLocationId(void) const376 const CSeq_id* CAnnotMapping_Info::GetLocationId(void) const
377 {
378 switch ( GetMappedObjectType() ) {
379 case eMappedObjType_Seq_id:
380 return &GetMappedSeq_id();
381 case eMappedObjType_Seq_loc:
382 return GetMappedSeq_loc().GetId();
383 case eMappedObjType_Seq_feat:
384 return GetMappedSeq_feat().GetLocation().GetId();
385 case eMappedObjType_Seq_loc_Conv:
386 return &GetMappedSeq_loc_Conv().GetId();
387 default:
388 break;
389 }
390 return 0;
391 }
392
393
GetProductId(void) const394 const CSeq_id* CAnnotMapping_Info::GetProductId(void) const
395 {
396 switch ( GetMappedObjectType() ) {
397 case eMappedObjType_Seq_id:
398 return &GetMappedSeq_id();
399 case eMappedObjType_Seq_loc:
400 return GetMappedSeq_loc().GetId();
401 case eMappedObjType_Seq_feat:
402 return GetMappedSeq_feat().GetProduct().GetId();
403 default:
404 break;
405 }
406 return 0;
407 }
408
409
410 // Maps each seq-id to the total range for faster sorting.
411 class CIdRangeMap : public CObject
412 {
413 public:
414 CIdRangeMap(const CAnnotObject_Ref& annot_ref, const SAnnotSelector& sel);
~CIdRangeMap(void)415 virtual ~CIdRangeMap(void) {}
416
417 struct SExtremes {
418 TSeqPos from = kInvalidSeqPos;
419 TSeqPos to = kInvalidSeqPos;
420
EmptyCIdRangeMap::SExtremes421 bool Empty(void) const { return from == kInvalidSeqPos && to == kInvalidSeqPos; }
422 };
423 typedef map<CSeq_id_Handle, SExtremes> TIdRangeMap;
424 typedef CRange<TSeqPos> TRange;
425
CanSort(void) const426 bool CanSort(void) const { return m_Map.get() != nullptr; }
427
GetMap(void) const428 const TIdRangeMap& GetMap(void) const { return *m_Map; }
429
430 private:
431 unique_ptr<TIdRangeMap> m_Map;
432 };
433
434
CIdRangeMap(const CAnnotObject_Ref & annot_ref,const SAnnotSelector & sel)435 CIdRangeMap::CIdRangeMap(const CAnnotObject_Ref& annot_ref,
436 const SAnnotSelector& sel)
437 {
438 if (!annot_ref.IsPlainFeat()) {
439 return;
440 }
441 const CAnnotObject_Info& info = annot_ref.GetAnnotObject_Info();
442 _ASSERT(info.IsRegular());
443 m_Map.reset(new TIdRangeMap);
444 const CSeq_loc& loc = sel.GetFeatProduct() ?
445 info.GetFeatFast()->GetProduct() : info.GetFeatFast()->GetLocation();
446 const CSeq_id* id = loc.GetId();
447 if ( id ) {
448 SExtremes& ext = (*m_Map)[CSeq_id_Handle::GetHandle(*id)];
449 ext.from = loc.GetStart(eExtreme_Positional);
450 ext.to = loc.GetStop(eExtreme_Positional);
451 }
452 else {
453 for (CSeq_loc_CI it(loc); it; ++it) {
454 TRange rg = it.GetRange();
455 SExtremes& ext = (*m_Map)[it.GetSeq_id_Handle()];
456 if ( !ext.Empty() ) {
457 rg.CombineWith(TRange(ext.from, ext.to));
458 }
459 ext.from = rg.GetFrom();
460 ext.to = rg.GetToOpen();
461 }
462 }
463 }
464
465
SetIdRangeMap(CIdRangeMap & id_range_map)466 void CAnnotMapping_Info::SetIdRangeMap(CIdRangeMap& id_range_map)
467 {
468 if ( IsMapped() ) return;
469 _ASSERT(!IsMapped());
470 m_MappedObject.Reset(&id_range_map);
471 m_MappedObjectType = eMappedObjType_IdRangeMap;
472 }
473
474
GetIdRangeMap(void) const475 const CIdRangeMap& CAnnotMapping_Info::GetIdRangeMap(void) const
476 {
477 _ASSERT(GetMappedObjectType() == eMappedObjType_IdRangeMap);
478 return static_cast<const CIdRangeMap&>(*m_MappedObject);
479 }
480
481
482 /////////////////////////////////////////////////////////////////////////////
483 // CAnnotObject_Ref
484 /////////////////////////////////////////////////////////////////////////////
485
486
CAnnotObject_Ref(const CAnnotObject_Info & object,const CSeq_annot_Handle & annot_handle)487 CAnnotObject_Ref::CAnnotObject_Ref(const CAnnotObject_Info& object,
488 const CSeq_annot_Handle& annot_handle)
489 : m_Seq_annot(annot_handle),
490 m_AnnotIndex(object.GetAnnotIndex()),
491 m_AnnotType(eAnnot_Regular)
492 {
493 if ( object.IsFeat() ) {
494 if ( object.IsRegular() ) {
495 const CSeq_feat& feat = *object.GetFeatFast();
496 if ( feat.IsSetPartial() ) {
497 m_MappingInfo.SetPartial(feat.GetPartial());
498 }
499 }
500 else {
501 m_AnnotType = eAnnot_SeqTable;
502 m_MappingInfo.SetPartial(GetSeq_annot_Info().IsTableFeatPartial(object));
503 }
504 }
505 if ( object.HasSingleKey() ) {
506 m_MappingInfo.SetTotalRange(object.GetKey().m_Range);
507 }
508 else {
509 size_t beg = object.GetKeysBegin();
510 size_t end = object.GetKeysEnd();
511 if ( beg < end ) {
512 const SAnnotObject_Key& key =
513 GetSeq_annot_Info().GetAnnotObjectKey(beg);
514 m_MappingInfo.SetTotalRange(key.m_Range);
515 }
516 }
517 }
518
519
CAnnotObject_Ref(const CSeq_annot_SNP_Info & snp_annot,const CSeq_annot_Handle & annot_handle,const SSNP_Info & snp,CSeq_loc_Conversion * cvt)520 CAnnotObject_Ref::CAnnotObject_Ref(const CSeq_annot_SNP_Info& snp_annot,
521 const CSeq_annot_Handle& annot_handle,
522 const SSNP_Info& snp,
523 CSeq_loc_Conversion* cvt)
524 : m_Seq_annot(annot_handle),
525 m_AnnotIndex(TAnnotIndex(snp_annot.GetIndex(snp))),
526 m_AnnotType(eAnnot_SNPTable)
527 {
528 _ASSERT(IsSNPTableFeat());
529 TSeqPos src_from = snp.GetFrom(), src_to = snp.GetTo();
530 ENa_strand src_strand = eNa_strand_unknown;
531 if ( snp.MinusStrand() ) {
532 src_strand = eNa_strand_minus;
533 }
534 else if ( snp.PlusStrand() ) {
535 src_strand = eNa_strand_plus;
536 }
537 if ( !cvt ) {
538 m_MappingInfo.SetTotalRange(TRange(src_from, src_to));
539 m_MappingInfo.SetMappedSeq_id(
540 const_cast<CSeq_id&>(snp_annot.GetSeq_id()),
541 src_from == src_to);
542 m_MappingInfo.SetMappedStrand(src_strand);
543 return;
544 }
545
546 cvt->Reset();
547 if ( src_from == src_to ) {
548 // point
549 _VERIFY(cvt->ConvertPoint(src_from, src_strand));
550 }
551 else {
552 // interval
553 _VERIFY(cvt->ConvertInterval(src_from, src_to, src_strand));
554 }
555 cvt->SetMappedLocation(*this, CSeq_loc_Conversion::eLocation);
556 }
557
558
CAnnotObject_Ref(const CSeq_annot_Handle & annot_handle,const CSeq_annot_SortedIter & iter,CSeq_loc_Conversion * cvt)559 CAnnotObject_Ref::CAnnotObject_Ref(const CSeq_annot_Handle& annot_handle,
560 const CSeq_annot_SortedIter& iter,
561 CSeq_loc_Conversion* cvt)
562 : m_Seq_annot(annot_handle),
563 m_AnnotIndex(TAnnotIndex(iter.GetRow())),
564 m_AnnotType(eAnnot_SortedSeqTable)
565 {
566 _ASSERT(IsSortedSeqTableFeat());
567 const CSeqTableInfo& annot_table = GetSeqTableInfo();
568 TRange src_range = iter.GetRange();
569 ENa_strand src_strand = annot_table.GetLocationStrand(m_AnnotIndex);
570 if ( !cvt ) {
571 m_MappingInfo.SetTotalRange(src_range);
572 m_MappingInfo.SetMappedSeq_id(
573 const_cast<CSeq_id&>(*annot_table.GetLocationId(m_AnnotIndex)),
574 src_range.GetLength() == 1);
575 m_MappingInfo.SetMappedStrand(src_strand);
576 return;
577 }
578
579 cvt->Reset();
580 if ( src_range.GetLength() == 1 ) {
581 // point
582 _VERIFY(cvt->ConvertPoint(src_range.GetFrom(),
583 src_strand));
584 }
585 else {
586 // interval
587 _VERIFY(cvt->ConvertInterval(src_range.GetFrom(),
588 src_range.GetTo(),
589 src_strand));
590 }
591 cvt->SetMappedLocation(*this, CSeq_loc_Conversion::eLocation);
592 }
593
594
ResetLocation(void)595 void CAnnotObject_Ref::ResetLocation(void)
596 {
597 m_MappingInfo.Reset();
598 if ( HasAnnotObject_Info() ) {
599 const CAnnotObject_Info& object = GetAnnotObject_Info();
600 if ( object.IsFeat() ) {
601 const CSeq_feat& feat = *object.GetFeatFast();
602 if ( feat.IsSetPartial() ) {
603 m_MappingInfo.SetPartial(feat.GetPartial());
604 }
605 }
606 }
607 }
608
609
GetSeq_annot_SNP_Info(void) const610 const CSeq_annot_SNP_Info& CAnnotObject_Ref::GetSeq_annot_SNP_Info(void) const
611 {
612 _ASSERT(IsSNPTableFeat());
613 return GetSeq_annot_Info().x_GetSNP_annot_Info();
614 }
615
616
GetSeqTableInfo(void) const617 const CSeqTableInfo& CAnnotObject_Ref::GetSeqTableInfo(void) const
618 {
619 _ASSERT(IsAnySeqTableFeat());
620 return GetSeq_annot_Info().GetTableInfo();
621 }
622
623
GetAnnotObject_Info(void) const624 const CAnnotObject_Info& CAnnotObject_Ref::GetAnnotObject_Info(void) const
625 {
626 _ASSERT(HasAnnotObject_Info());
627 return GetSeq_annot_Info().GetInfo(GetAnnotIndex());
628 }
629
630
GetSNP_Info(void) const631 const SSNP_Info& CAnnotObject_Ref::GetSNP_Info(void) const
632 {
633 _ASSERT(IsSNPTableFeat());
634 return GetSeq_annot_SNP_Info().GetInfo(GetAnnotIndex());
635 }
636
637
IsFeat(void) const638 bool CAnnotObject_Ref::IsFeat(void) const
639 {
640 return !HasAnnotObject_Info() || GetAnnotObject_Info().IsFeat();
641 }
642
643
IsGraph(void) const644 bool CAnnotObject_Ref::IsGraph(void) const
645 {
646 return HasAnnotObject_Info() && GetAnnotObject_Info().IsGraph();
647 }
648
649
IsAlign(void) const650 bool CAnnotObject_Ref::IsAlign(void) const
651 {
652 return HasAnnotObject_Info() && GetAnnotObject_Info().IsAlign();
653 }
654
655
GetFeat(void) const656 const CSeq_feat& CAnnotObject_Ref::GetFeat(void) const
657 {
658 return GetAnnotObject_Info().GetFeat();
659 }
660
661
GetGraph(void) const662 const CSeq_graph& CAnnotObject_Ref::GetGraph(void) const
663 {
664 return GetAnnotObject_Info().GetGraph();
665 }
666
667
GetAlign(void) const668 const CSeq_align& CAnnotObject_Ref::GetAlign(void) const
669 {
670 return GetAnnotObject_Info().GetAlign();
671 }
672
673
674 BEGIN_LOCAL_NAMESPACE;
675
676 /////////////////////////////////////////////////////////////////////////////
677 // CAnnotObject_Ref comparision
678 /////////////////////////////////////////////////////////////////////////////
679
680 struct CAnnotObjectType_Less
681 {
682 bool m_ByProduct;
683 IFeatComparator* m_FeatComparator;
684 CScope* m_Scope;
685 bool m_DoWeIgnoreFarLocationsForSorting;
686
687 class CNearnessTester : public CSeq_loc::ISubLocFilter {
688 public:
CNearnessTester(const CBioseq_Handle & handle)689 CNearnessTester( const CBioseq_Handle &handle )
690 : m_BioseqHandle(handle)
691 {
692
693 }
694
695 DECLARE_OPERATOR_BOOL(m_BioseqHandle);
696
operator ()(const CSeq_id * id) const697 bool operator()( const CSeq_id *id ) const {
698 return id && m_BioseqHandle.IsSynonym(*id);
699 }
700 private:
701 CBioseq_Handle m_BioseqHandle;
702 };
703
704 CNearnessTester m_TesterForIgnoreFarLocationsForSorting;
CAnnotObjectType_LessCAnnotObjectType_Less705 explicit CAnnotObjectType_Less(const SAnnotSelector* sel,
706 CScope* scope = 0)
707 : m_ByProduct(sel->GetFeatProduct()),
708 m_FeatComparator(sel->GetFeatComparator()),
709 m_Scope(scope),
710 m_TesterForIgnoreFarLocationsForSorting(sel->GetIgnoreFarLocationsForSorting())
711 {
712 }
713
714 bool operator()(const CAnnotObject_Ref& x,
715 const CAnnotObject_Ref& y) const;
716
717 // smaller first
GetTypeOrderCAnnotObjectType_Less718 static int GetTypeOrder(CSeqFeatData::E_Choice type,
719 CSeqFeatData::ESubtype subtype)
720 {
721 if ( subtype == CSeqFeatData::eSubtype_operon ) {
722 // operon first
723 return -1;
724 }
725 else {
726 return CSeq_feat::GetTypeSortingOrder(type);
727 }
728 }
729 };
730
731 class CCreateFeat
732 {
733 public:
CCreateFeat(const CAnnotObject_Ref & ref,const CAnnotObject_Info * info)734 CCreateFeat(const CAnnotObject_Ref& ref,
735 const CAnnotObject_Info* info)
736 : m_Ref(ref), m_Info(info)
737 {
738 }
739
740 const CSeq_feat& GetOriginalFeat(void);
741 const CSeq_feat& GetMappedFeat(void);
742 int GetCdregionOrder(void);
743 const char* GetImpKey(void);
744
GetLoc(const CSeq_feat & feat,bool by_product)745 static const CSeq_loc& GetLoc(const CSeq_feat& feat, bool by_product) {
746 return by_product? feat.GetProduct(): feat.GetLocation();
747 }
748
749 ENa_strand GetStrand(bool by_product);
750
751 const CSeq_loc* GetComplexLoc(bool by_product);
752
753 bool IsSetProduct(void);
754 CConstRef<CSeq_id> GetProductId(void);
755
756 bool HasFeatLabel(void);
757 string GetFeatLabel(void);
758
759 private:
760 CRef<CSeq_feat> m_CreatedOriginalFeat;
761 const CAnnotObject_Ref& m_Ref;
762 const CAnnotObject_Info* m_Info;
763 };
764
765
GetOriginalFeat(void)766 const CSeq_feat& CCreateFeat::GetOriginalFeat(void)
767 {
768 if ( m_Ref.IsPlainFeat() ) {
769 // real Seq-feat exists
770 return *m_Info->GetFeatFast();
771 }
772 else {
773 // table feature
774 if ( !m_CreatedOriginalFeat ) {
775 CRef<CSeq_point> seq_pnt;
776 CRef<CSeq_interval> seq_int;
777 if ( m_Ref.IsSNPTableFeat() ) {
778 // SNP table feature
779 const CSeq_annot_SNP_Info& snp_info =
780 m_Ref.GetSeq_annot_SNP_Info();
781 snp_info.GetInfo(m_Ref.GetAnnotIndex())
782 .UpdateSeq_feat(m_CreatedOriginalFeat,
783 seq_pnt, seq_int,
784 snp_info);
785 }
786 else {
787 _ASSERT(m_Ref.IsAnySeqTableFeat());
788 const CSeqTableInfo& table_info =
789 m_Ref.GetSeqTableInfo();
790 table_info
791 .UpdateSeq_feat(m_Ref.GetAnnotIndex(),
792 m_CreatedOriginalFeat,
793 seq_pnt, seq_int);
794 }
795 _ASSERT(m_CreatedOriginalFeat);
796 }
797 return *m_CreatedOriginalFeat;
798 }
799 }
800
801
GetMappedFeat(void)802 const CSeq_feat& CCreateFeat::GetMappedFeat(void)
803 {
804 CAnnotMapping_Info& map = m_Ref.GetMappingInfo();
805 if ( !map.IsMapped() ) {
806 return GetOriginalFeat();
807 }
808 if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat ) {
809 // mapped Seq-feat is created already
810 return map.GetMappedSeq_feat();
811 }
812
813 CRef<CSeq_feat> mapped_feat(new CSeq_feat);
814 map.InitializeMappedSeq_feat(GetOriginalFeat(), *mapped_feat);
815 map.SetMappedSeq_feat(*mapped_feat);
816 return map.GetMappedSeq_feat();
817 }
818
819
GetCdregionOrder(void)820 int CCreateFeat::GetCdregionOrder(void)
821 {
822 CCdregion::EFrame frame =
823 GetMappedFeat().GetData().GetCdregion().GetFrame();
824 if ( frame == CCdregion::eFrame_not_set ) {
825 frame = CCdregion::eFrame_one;
826 }
827 return frame;
828 }
829
830
GetImpKey(void)831 const char* CCreateFeat::GetImpKey(void)
832 {
833 static const char* const variation_key = "variation";
834 if ( !m_Info ) {
835 return variation_key;
836 }
837 return GetOriginalFeat().GetData().GetImp().GetKey().c_str();
838 }
839
840
GetStrand(bool by_product)841 ENa_strand CCreateFeat::GetStrand(bool by_product)
842 {
843 try {
844 CAnnotMapping_Info& map = m_Ref.GetMappingInfo();
845 if ( map.IsMappedLocation() ) {
846 // location is mapped
847 if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat ) {
848 // mapped Seq-feat is created already
849 return GetLoc(map.GetMappedSeq_feat(), by_product).GetStrand();
850 }
851 else if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_loc ) {
852 // mapped Seq-loc is created already
853 return map.GetMappedSeq_loc().GetStrand();
854 }
855 else {
856 // whole, interval, point, or mix
857 return map.GetMappedStrand();
858 }
859 }
860 else {
861 // location is not mapped - use original
862 if ( !m_Info ) {
863 // table SNP or sorted table features have strand in mapping
864 return map.GetMappedStrand();
865 }
866 else {
867 // get location from the Seq-feat
868 return GetLoc(GetOriginalFeat(), by_product).GetStrand();
869 }
870 }
871 }
872 catch ( CException& /*ignored*/ ) {
873 // assume unknown strand for sorting
874 return eNa_strand_unknown;
875 }
876 }
877
878
GetComplexLoc(bool by_product)879 const CSeq_loc* CCreateFeat::GetComplexLoc(bool by_product)
880 {
881 if ( !m_Info ) {
882 // table SNP, or sorted feature table -> no mix
883 return 0;
884 }
885 CAnnotMapping_Info& map = m_Ref.GetMappingInfo();
886 if ( map.IsMappedLocation() ) {
887 // location is mapped
888 if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_loc ) {
889 // mapped Seq-loc is created already
890 const CSeq_loc& loc = map.GetMappedSeq_loc();
891 return &loc;
892 }
893 else if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_id ) {
894 // whole, interval, or point
895 return 0;
896 }
897 // get location from the Seq-feat
898 const CSeq_loc& loc = GetLoc(GetMappedFeat(), by_product);
899 return &loc;
900 }
901 else {
902 // get location from the Seq-feat
903 const CSeq_loc& loc = GetLoc(GetOriginalFeat(), by_product);
904 return &loc;
905 }
906 }
907
908
IsSetProduct(void)909 bool CCreateFeat::IsSetProduct(void)
910 {
911 if ( !m_Info ) {
912 // table SNP or sorted table features -> no product
913 return false;
914 }
915 return GetOriginalFeat().IsSetProduct();
916 }
917
918
GetProductId(void)919 CConstRef<CSeq_id> CCreateFeat::GetProductId(void)
920 {
921 _ASSERT(IsSetProduct());
922 return ConstRef(GetOriginalFeat().GetProduct().GetId());
923 }
924
925
HasFeatLabel(void)926 bool CCreateFeat::HasFeatLabel(void)
927 {
928 if ( !m_Info ) {
929 return m_Ref.GetSeq_annot_Info()
930 .TableFeat_HasLabel(m_Ref.GetAnnotIndex());
931 }
932 const CSeq_feat& feat = GetOriginalFeat();
933 return (feat.IsSetQual() && !feat.GetQual().empty()) ||
934 (feat.IsSetComment() && !feat.GetComment().empty());
935 }
936
937
GetFeatLabel(void)938 string CCreateFeat::GetFeatLabel(void)
939 {
940 if ( !m_Info ) {
941 return m_Ref.GetSeq_annot_Info()
942 .TableFeat_GetLabel(m_Ref.GetAnnotIndex());
943 }
944
945 string label;
946
947 const CSeq_feat& feat = GetOriginalFeat();
948
949 // Put Seq-feat qual into label
950 if ( feat.IsSetQual() ) {
951 ITERATE( CSeq_feat::TQual, it, feat.GetQual() ) {
952 label += label.empty()? '/': ' ';
953 label += (**it).GetQual();
954 if (!(**it).GetVal().empty()) {
955 label += '=';
956 label += (**it).GetVal();
957 }
958 }
959 }
960
961 // Put Seq-feat comment into label
962 if ( feat.IsSetComment() ) {
963 if ( !label.empty()) {
964 label += "; ";
965 }
966 label += feat.GetComment();
967 }
968
969 return label;
970 }
971
972
operator ()(const CAnnotObject_Ref & x,const CAnnotObject_Ref & y) const973 bool CAnnotObjectType_Less::operator()(const CAnnotObject_Ref& x,
974 const CAnnotObject_Ref& y) const
975 {
976 // gather x annotation type
977 const CAnnotObject_Info* x_info;
978 CSeq_annot::C_Data::E_Choice x_annot_type;
979 if ( x.HasAnnotObject_Info() ) {
980 x_info = &x.GetAnnotObject_Info();
981 x_annot_type = x_info->GetAnnotType();
982 }
983 else {
984 x_info = 0;
985 x_annot_type = CSeq_annot::C_Data::e_Ftable;
986 }
987
988 // gather y annotation type
989 const CAnnotObject_Info* y_info;
990 CSeq_annot::C_Data::E_Choice y_annot_type;
991 if ( y.HasAnnotObject_Info() ) {
992 y_info = &y.GetAnnotObject_Info();
993 y_annot_type = y_info->GetAnnotType();
994 }
995 else {
996 y_info = 0;
997 y_annot_type = CSeq_annot::C_Data::e_Ftable;
998 }
999
1000 // compare by annotation type (feature, align, graph)
1001 if ( x_annot_type != y_annot_type ) {
1002 return x_annot_type < y_annot_type;
1003 }
1004
1005 if ( x_annot_type == CSeq_annot::C_Data::e_Ftable ) {
1006 // compare features
1007
1008 // get x feature type
1009 CSeqFeatData::E_Choice x_feat_type;
1010 CSeqFeatData::ESubtype x_feat_subtype;
1011 if ( x_info ) {
1012 x_feat_type = x_info->GetFeatType();
1013 x_feat_subtype = x_info->GetFeatSubtype();
1014 }
1015 else if ( x.IsSNPTableFeat() ) {
1016 x_feat_type = CSeqFeatData::e_Imp;
1017 x_feat_subtype = CSeqFeatData::eSubtype_variation;
1018 }
1019 else {
1020 SAnnotTypeSelector type = x.GetSeqTableInfo().GetType();
1021 x_feat_type = type.GetFeatType();
1022 x_feat_subtype = type.GetFeatSubtype();
1023 }
1024
1025 // get y feature type
1026 CSeqFeatData::E_Choice y_feat_type;
1027 CSeqFeatData::ESubtype y_feat_subtype;
1028 if ( y_info ) {
1029 y_feat_type = y_info->GetFeatType();
1030 y_feat_subtype = y_info->GetFeatSubtype();
1031 }
1032 else if ( y.IsSNPTableFeat() ) {
1033 y_feat_type = CSeqFeatData::e_Imp;
1034 y_feat_subtype = CSeqFeatData::eSubtype_variation;
1035 }
1036 else {
1037 SAnnotTypeSelector type = y.GetSeqTableInfo().GetType();
1038 y_feat_type = type.GetFeatType();
1039 y_feat_subtype = type.GetFeatSubtype();
1040 }
1041
1042 // order by feature type
1043 if ( x_feat_subtype != y_feat_subtype ) {
1044 int x_order = GetTypeOrder(x_feat_type, x_feat_subtype);
1045 int y_order = GetTypeOrder(y_feat_type, y_feat_subtype);
1046 if ( x_order != y_order ) {
1047 return x_order < y_order;
1048 }
1049 }
1050
1051 CCreateFeat x_create(x, x_info);
1052 CCreateFeat y_create(y, y_info);
1053
1054 // compare strands
1055 ENa_strand x_strand = x_create.GetStrand(m_ByProduct);
1056 ENa_strand y_strand = y_create.GetStrand(m_ByProduct);
1057 bool x_minus = IsReverse(x_strand);
1058 bool y_minus = IsReverse(y_strand);
1059 if ( x_minus != y_minus ) {
1060 // minus strand last
1061 return y_minus;
1062 }
1063
1064 // compare complex locations (mix or packed intervals)
1065 const CSeq_loc* x_loc = x_create.GetComplexLoc(m_ByProduct);
1066 const CSeq_loc* y_loc = y_create.GetComplexLoc(m_ByProduct);
1067
1068 bool x_complex = x_loc && (x_loc->IsMix() || x_loc->IsPacked_int());
1069 bool y_complex = y_loc && (y_loc->IsMix() || y_loc->IsPacked_int());
1070 if ( x_complex != y_complex ) {
1071 // simple loc before complex on plus strand, after on minus strand
1072 return x_minus ^ y_complex;
1073 }
1074
1075 if ( x_complex ) {
1076 int diff = 0;
1077 if( m_TesterForIgnoreFarLocationsForSorting ) {
1078 diff = x_loc->CompareSubLoc(*y_loc, x_strand, &m_TesterForIgnoreFarLocationsForSorting);
1079 } else {
1080 diff = x_loc->CompareSubLoc(*y_loc, x_strand);
1081 }
1082 if ( diff != 0 ) {
1083 return diff < 0;
1084 }
1085 }
1086
1087 // compare subtypes
1088 if ( x_feat_subtype != y_feat_subtype ) {
1089 return x_feat_subtype < y_feat_subtype;
1090 }
1091
1092 _ASSERT(x_feat_type == y_feat_type);
1093 // type dependent comparison
1094 if ( x_feat_type == CSeqFeatData::e_Cdregion ) {
1095 // compare frames of identical CDS ranges
1096 int x_frame = x_create.GetCdregionOrder();
1097 int y_frame = y_create.GetCdregionOrder();
1098 if ( x_frame != y_frame ) {
1099 return x_frame < y_frame;
1100 }
1101 }
1102 else if ( x_feat_subtype == CSeqFeatData::eSubtype_imp ) {
1103 // all non-standard imported features have the same subtype
1104 const char* x_key = x_create.GetImpKey();
1105 const char* y_key = y_create.GetImpKey();
1106
1107 // compare labels of imp features
1108 if ( x_key != y_key ) {
1109 int diff = NStr::CompareNocase(x_key, y_key);
1110 if ( diff != 0 ) {
1111 return diff < 0;
1112 }
1113 }
1114 }
1115 else if ( x_feat_type == CSeqFeatData::e_Gene ) {
1116 const CGene_ref& x_gene = x_info->GetFeatFast()->GetData().GetGene();
1117 const CGene_ref& y_gene = y_info->GetFeatFast()->GetData().GetGene();
1118 const string& x_locus = x_gene.IsSetLocus()? x_gene.GetLocus(): kEmptyStr;
1119 const string& y_locus = y_gene.IsSetLocus()? y_gene.GetLocus(): kEmptyStr;
1120 if ( int diff = NStr::CompareNocase(x_locus, y_locus) ) {
1121 return diff < 0;
1122 }
1123 const string& x_desc = x_gene.IsSetDesc()? x_gene.GetDesc(): kEmptyStr;
1124 const string& y_desc = y_gene.IsSetDesc()? y_gene.GetDesc(): kEmptyStr;
1125 if ( int diff = NStr::CompareNocase(x_desc, y_desc) ) {
1126 return diff < 0;
1127 }
1128 }
1129
1130 if ( !m_ByProduct ) {
1131 // order by product id
1132 bool x_has_product = x_create.IsSetProduct();
1133 bool y_has_product = y_create.IsSetProduct();
1134 if ( x_has_product != y_has_product ) {
1135 return !x_has_product; // without product first
1136 }
1137 if ( x_has_product ) {
1138 CConstRef<CSeq_id> x_id = x_create.GetProductId();
1139 CConstRef<CSeq_id> y_id = y_create.GetProductId();
1140 if ( x_id.IsNull() != y_id.IsNull() ) {
1141 return x_id.IsNull(); // no product id first
1142 }
1143 if ( x_id ) {
1144 string x_id_str = x_id->AsFastaString();
1145 string y_id_str = y_id->AsFastaString();
1146 if ( int diff = NStr::CompareNocase(x_id_str, y_id_str) ) {
1147 return diff < 0;
1148 }
1149 }
1150 }
1151 }
1152
1153 bool x_has_label = x_create.HasFeatLabel();
1154 bool y_has_label = y_create.HasFeatLabel();
1155 if ( x_has_label != y_has_label ) {
1156 return !x_has_label; // no-label first
1157 }
1158 if ( x_has_label ) {
1159 string x_label = x_create.GetFeatLabel();
1160 string y_label = y_create.GetFeatLabel();
1161 if ( int diff = NStr::CompareNocase(x_label, y_label) ) {
1162 return diff < 0;
1163 }
1164 }
1165
1166 if ( m_FeatComparator ) {
1167 const CSeq_feat& x_feat = x_create.GetMappedFeat();
1168 const CSeq_feat& y_feat = y_create.GetMappedFeat();
1169 if ( m_FeatComparator->Less(x_feat, y_feat, m_Scope) ) {
1170 return true;
1171 }
1172 if ( m_FeatComparator->Less(y_feat, x_feat, m_Scope) ) {
1173 return false;
1174 }
1175 }
1176 }
1177 if ( x.IsFromOtherTSE() != y.IsFromOtherTSE() ) {
1178 // non-sequence TSE annotations should come later
1179 return y.IsFromOtherTSE();
1180 }
1181
1182 return x < y;
1183 }
1184
1185
1186 struct CAnnotObject_Less
1187 {
CAnnotObject_LessCAnnotObject_Less1188 explicit CAnnotObject_Less(const SAnnotSelector* sel,
1189 CScope* scope = 0)
1190 : type_less(sel, scope),
1191 ignore_far_handle(sel->GetIgnoreFarLocationsForSorting())
1192 {
1193 }
1194
x_GetExtremesCAnnotObject_Less1195 void x_GetExtremes( TSeqPos &out_from, TSeqPos &out_to,
1196 const CAnnotObject_Ref& obj_ref ) const
1197 {
1198 out_from = kInvalidSeqPos;
1199 out_to = kInvalidSeqPos;
1200
1201 bool is_circular = ( ignore_far_handle.CanGetInst_Topology() &&
1202 ignore_far_handle.GetInst_Topology() == CSeq_inst::eTopology_circular );
1203
1204 bool all_minus = true;
1205 bool all_non_minus = true;
1206
1207 const CSeq_loc & loc = obj_ref.GetAnnotObject_Info().GetFeatFast()->GetLocation();
1208
1209 CSeq_loc_CI first_piece;
1210 CSeq_loc_CI last_piece;
1211
1212 TSeqPos lowest = kInvalidSeqPos;
1213 TSeqPos highest = kInvalidSeqPos;
1214
1215 CSeq_loc_CI loc_ci( loc, CSeq_loc_CI::eEmpty_Skip, CSeq_loc_CI::eOrder_Biological );
1216 for( ; loc_ci; ++loc_ci ) {
1217 if( ! ignore_far_handle.IsSynonym(loc_ci.GetSeq_id_Handle()) ) {
1218 continue;
1219 }
1220 if( ! first_piece ) {
1221 first_piece = loc_ci;
1222 }
1223 last_piece = loc_ci;
1224
1225 TSeqPos piece_start = kInvalidSeqPos;
1226 TSeqPos piece_stop = kInvalidSeqPos;
1227
1228 if( loc_ci.IsSetStrand() && loc_ci.GetStrand() == eNa_strand_minus ) {
1229 all_non_minus = false;
1230 } else {
1231 all_minus = false;
1232 }
1233
1234 piece_start = loc_ci.GetRange().GetFrom();
1235 piece_stop = loc_ci.GetRange().GetToOpen();
1236
1237 if( lowest == kInvalidSeqPos ) {
1238 lowest = piece_start;
1239 } else {
1240 lowest = min( lowest, piece_start );
1241 }
1242
1243 if( highest == kInvalidSeqPos ) {
1244 highest = piece_stop;
1245 } else {
1246 highest = max( highest, piece_stop );
1247 }
1248 }
1249
1250 // ignore circularity if strandedness is mixed
1251 if( ! all_minus && ! all_non_minus ) {
1252 is_circular = false;
1253 }
1254
1255 // out_from
1256 if (is_circular) {
1257 if (all_minus) {
1258 if( last_piece ) {
1259 out_from = last_piece.GetRange().GetFrom();
1260 }
1261 } else {
1262 if( first_piece ) {
1263 out_from = first_piece.GetRange().GetFrom();
1264 }
1265 }
1266 } else {
1267 out_from = lowest;
1268 }
1269
1270 // out_to
1271 if (is_circular) {
1272 if (all_minus) {
1273 if( first_piece ) {
1274 out_to = first_piece.GetRange().GetToOpen();
1275 }
1276 } else {
1277 if( last_piece ) {
1278 out_to = last_piece.GetRange().GetToOpen();
1279 }
1280 }
1281 } else {
1282 out_to = highest;
1283 }
1284 }
1285
1286 static
GetRangeOpenCAnnotObject_Less1287 void GetRangeOpen(TSeqPos &out_from, TSeqPos &out_to,
1288 const CAnnotObject_Ref& obj_ref)
1289 {
1290 out_from = obj_ref.GetMappingInfo().GetFrom();
1291 out_to = obj_ref.GetMappingInfo().GetToOpen();
1292 if ( out_from != kInvalidSeqPos ||
1293 out_to != kInvalidSeqPos ||
1294 !obj_ref.IsAlign() ||
1295 (obj_ref.GetMappingInfo().GetMappedObjectType() !=
1296 CAnnotMapping_Info::eMappedObjType_Seq_loc_Conv_Set) ) {
1297 return;
1298 }
1299 // mapped align may have uninitialized total range
1300 // force mapping
1301 obj_ref.GetMappingInfo().GetMappedSeq_align(obj_ref.GetAlign());
1302 // re-get updated range
1303 out_from = obj_ref.GetMappingInfo().GetFrom();
1304 out_to = obj_ref.GetMappingInfo().GetToOpen();
1305 }
1306
CompareRangesCAnnotObject_Less1307 static int CompareRanges(TSeqPos x_from, TSeqPos x_to, TSeqPos y_from, TSeqPos y_to)
1308 {
1309 // (from >= to) means circular location.
1310 // Any circular location is less than (before) non-circular one.
1311 // If both are circular, compare them regular way.
1312 bool x_circular = x_from >= x_to;
1313 bool y_circular = y_from >= y_to;
1314 if ( x_circular != y_circular ) {
1315 return x_circular ? -1 : 1;
1316 }
1317 // smallest left extreme first
1318 if ( x_from != y_from ) {
1319 return x_from < y_from ? -1 : 1;
1320 }
1321 // longest feature first
1322 if ( x_to != y_to ) {
1323 return x_to > y_to ? -1 : 1;
1324 }
1325 return 0;
1326 }
1327
1328 // Compare CRef-s: both must be features
operator ()CAnnotObject_Less1329 bool operator()(const CAnnotObject_Ref& x,
1330 const CAnnotObject_Ref& y) const
1331 {
1332 if (x == y) { // small speedup
1333 return false;
1334 }
1335
1336 if (x.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap &&
1337 y.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap &&
1338 x.GetMappingInfo().GetIdRangeMap().CanSort() &&
1339 y.GetMappingInfo().GetIdRangeMap().CanSort()) {
1340 // Perform full location comparison instead of using total range shortcut.
1341 const CIdRangeMap::TIdRangeMap& x_idmap = x.GetMappingInfo().GetIdRangeMap().GetMap();
1342 const CIdRangeMap::TIdRangeMap& y_idmap = y.GetMappingInfo().GetIdRangeMap().GetMap();
1343 CIdRangeMap::TIdRangeMap::const_iterator x_it = x_idmap.begin();
1344 CIdRangeMap::TIdRangeMap::const_iterator y_it = y_idmap.begin();
1345 for (; x_it != x_idmap.end() && y_it != y_idmap.end(); ++x_it, ++y_it) {
1346 if (x_it->first != y_it->first) return x_it->first < y_it->first;
1347 int cmp = CompareRanges(x_it->second.from, x_it->second.to, y_it->second.from, y_it->second.to);
1348 if (cmp != 0) return cmp < 0;
1349 }
1350 if (y_it != y_idmap.end()) return true;
1351 if (x_it != x_idmap.end()) return false;
1352 }
1353 else {
1354 TSeqPos x_from = kInvalidSeqPos;
1355 TSeqPos y_from = kInvalidSeqPos;
1356 TSeqPos x_to = kInvalidSeqPos;
1357 TSeqPos y_to = kInvalidSeqPos;
1358
1359 if( ignore_far_handle ) {
1360 x_GetExtremes( x_from, x_to, x );
1361 x_GetExtremes( y_from, y_to, y );
1362 } else {
1363 GetRangeOpen(x_from, x_to, x);
1364 GetRangeOpen(y_from, y_to, y);
1365 }
1366
1367 // (from >= to) means circular location.
1368 // Any circular location is less than (before) non-circular one.
1369 // If both are circular, compare them regular way.
1370 bool x_circular = x_from >= x_to;
1371 bool y_circular = y_from >= y_to;
1372 if ( x_circular != y_circular ) {
1373 return x_circular;
1374 }
1375 // smallest left extreme first
1376 if ( x_from != y_from ) {
1377 return x_from < y_from;
1378 }
1379 // longest feature first
1380 if ( x_to != y_to ) {
1381 return x_to > y_to;
1382 }
1383 }
1384
1385 return type_less(x, y);
1386 }
1387 CAnnotObjectType_Less type_less;
1388 CBioseq_Handle ignore_far_handle;
1389 };
1390
1391
1392 struct CAnnotObject_LessReverse
1393 {
CAnnotObject_LessReverseCAnnotObject_LessReverse1394 explicit CAnnotObject_LessReverse(const SAnnotSelector* sel,
1395 CScope* scope = 0)
1396 : type_less(sel, scope)
1397 {
1398 }
1399 // Compare CRef-s: both must be features
operator ()CAnnotObject_LessReverse1400 bool operator()(const CAnnotObject_Ref& x,
1401 const CAnnotObject_Ref& y) const
1402 {
1403 if ( x == y ) { // small speedup
1404 return false;
1405 }
1406
1407 if (x.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap &&
1408 y.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap &&
1409 x.GetMappingInfo().GetIdRangeMap().CanSort() &&
1410 y.GetMappingInfo().GetIdRangeMap().CanSort()) {
1411 // Perform full location comparison instead of using total range shortcut.
1412 const CIdRangeMap::TIdRangeMap& x_idmap = x.GetMappingInfo().GetIdRangeMap().GetMap();
1413 const CIdRangeMap::TIdRangeMap& y_idmap = y.GetMappingInfo().GetIdRangeMap().GetMap();
1414 CIdRangeMap::TIdRangeMap::const_iterator x_it = x_idmap.begin();
1415 CIdRangeMap::TIdRangeMap::const_iterator y_it = y_idmap.begin();
1416 for (; x_it != x_idmap.end() && y_it != y_idmap.end(); ++x_it, ++y_it) {
1417 if (x_it->first != y_it->first) return y_it->first < x_it->first;
1418 int cmp = CAnnotObject_Less::CompareRanges(
1419 x_it->second.from, x_it->second.to, y_it->second.from, y_it->second.to);
1420 if (cmp != 0) return cmp > 0;
1421 }
1422 if (x_it != x_idmap.end()) return true;
1423 if (y_it != y_idmap.end()) return false;
1424 }
1425 else {
1426 TSeqPos x_from = kInvalidSeqPos;
1427 TSeqPos x_to = kInvalidSeqPos;
1428 TSeqPos y_from = kInvalidSeqPos;
1429 TSeqPos y_to = kInvalidSeqPos;
1430
1431 CAnnotObject_Less::GetRangeOpen(x_from, x_to, x);
1432 CAnnotObject_Less::GetRangeOpen(y_from, y_to, y);
1433
1434 // (from >= to) means circular location.
1435 // Any circular location is less than (before) non-circular one.
1436 // If both are circular, compare them regular way.
1437 bool x_circular = x_from >= x_to;
1438 bool y_circular = y_from >= y_to;
1439 if ( x_circular != y_circular ) {
1440 return x_circular;
1441 }
1442 // largest right extreme first
1443 if ( x_to != y_to ) {
1444 return x_to > y_to;
1445 }
1446 // longest feature first
1447 if ( x_from != y_from ) {
1448 return x_from < y_from;
1449 }
1450 }
1451
1452 return type_less(x, y);
1453 }
1454 CAnnotObjectType_Less type_less;
1455 };
1456
1457
1458 END_LOCAL_NAMESPACE;
1459
1460
1461 /////////////////////////////////////////////////////////////////////////////
1462 // CCreatedFeat_Ref
1463 /////////////////////////////////////////////////////////////////////////////
1464
1465
CCreatedFeat_Ref(void)1466 CCreatedFeat_Ref::CCreatedFeat_Ref(void)
1467 {
1468 }
1469
1470
~CCreatedFeat_Ref(void)1471 CCreatedFeat_Ref::~CCreatedFeat_Ref(void)
1472 {
1473 }
1474
1475
ResetRefs(void)1476 void CCreatedFeat_Ref::ResetRefs(void)
1477 {
1478 m_CreatedSeq_feat.Reset();
1479 m_CreatedSeq_loc.Reset();
1480 m_CreatedSeq_point.Reset();
1481 m_CreatedSeq_interval.Reset();
1482 }
1483
1484
ReleaseRefsTo(CRef<CSeq_feat> * feat,CRef<CSeq_loc> * loc,CRef<CSeq_point> * point,CRef<CSeq_interval> * interval)1485 void CCreatedFeat_Ref::ReleaseRefsTo(CRef<CSeq_feat>* feat,
1486 CRef<CSeq_loc>* loc,
1487 CRef<CSeq_point>* point,
1488 CRef<CSeq_interval>* interval)
1489 {
1490 if (feat) {
1491 m_CreatedSeq_feat.AtomicReleaseTo(*feat);
1492 }
1493 if (loc) {
1494 m_CreatedSeq_loc.AtomicReleaseTo(*loc);
1495 }
1496 if (point) {
1497 m_CreatedSeq_point.AtomicReleaseTo(*point);
1498 }
1499 if (interval) {
1500 m_CreatedSeq_interval.AtomicReleaseTo(*interval);
1501 }
1502 }
1503
1504
ResetRefsFrom(CRef<CSeq_feat> * feat,CRef<CSeq_loc> * loc,CRef<CSeq_point> * point,CRef<CSeq_interval> * interval)1505 void CCreatedFeat_Ref::ResetRefsFrom(CRef<CSeq_feat>* feat,
1506 CRef<CSeq_loc>* loc,
1507 CRef<CSeq_point>* point,
1508 CRef<CSeq_interval>* interval)
1509 {
1510 if (feat) {
1511 m_CreatedSeq_feat.AtomicResetFrom(*feat);
1512 }
1513 if (loc) {
1514 m_CreatedSeq_loc.AtomicResetFrom(*loc);
1515 }
1516 if (point) {
1517 m_CreatedSeq_point.AtomicResetFrom(*point);
1518 }
1519 if (interval) {
1520 m_CreatedSeq_interval.AtomicResetFrom(*interval);
1521 }
1522 }
1523
1524
1525 CConstRef<CSeq_feat>
GetOriginalFeature(const CSeq_feat_Handle & feat_h)1526 CCreatedFeat_Ref::GetOriginalFeature(const CSeq_feat_Handle& feat_h)
1527 {
1528 CConstRef<CSeq_feat> ret;
1529 if ( feat_h.IsTableSNP() ) {
1530 const CSeq_annot_SNP_Info& snp_annot = feat_h.x_GetSNP_annot_Info();
1531 const SSNP_Info& snp_info = feat_h.x_GetSNP_Info();
1532 CRef<CSeq_feat> orig_feat;
1533 CRef<CSeq_point> created_point;
1534 CRef<CSeq_interval> created_interval;
1535 ReleaseRefsTo(&orig_feat, 0, &created_point, &created_interval);
1536 snp_info.UpdateSeq_feat(orig_feat,
1537 created_point,
1538 created_interval,
1539 snp_annot);
1540 ret = orig_feat;
1541 ResetRefsFrom(&orig_feat, 0, &created_point, &created_interval);
1542 }
1543 else if ( feat_h.IsTableFeat() ) {
1544 if ( feat_h.m_CreatedOriginalFeat ) {
1545 ret = feat_h.m_CreatedOriginalFeat;
1546 }
1547 else {
1548 const CSeq_annot_Info& annot = feat_h.x_GetSeq_annot_Info();
1549 CRef<CSeq_feat> orig_feat;
1550 CRef<CSeq_point> created_point;
1551 CRef<CSeq_interval> created_interval;
1552 //ReleaseRefsTo(&orig_feat, 0, &created_point, &created_interval);
1553 annot.GetTableInfo().UpdateSeq_feat(feat_h.x_GetFeatIndex(),
1554 orig_feat,
1555 created_point,
1556 created_interval);
1557 ret = orig_feat;
1558 //ResetRefsFrom(&orig_feat, 0, &created_point, &created_interval);
1559 feat_h.m_CreatedOriginalFeat = ret;
1560 }
1561 }
1562 else {
1563 ret = feat_h.GetPlainSeq_feat();
1564 }
1565 return ret;
1566 }
1567
1568
1569 CRef<CSeq_loc>
GetMappedLocation(const CAnnotMapping_Info & map,const CSeq_feat & orig_feat)1570 CCreatedFeat_Ref::GetMappedLocation(const CAnnotMapping_Info& map,
1571 const CSeq_feat& orig_feat)
1572 {
1573 CRef<CSeq_loc> ret;
1574 if ( map.MappedSeq_locNeedsUpdate() ) {
1575 // need to convert Seq_id to Seq_loc
1576 // clear references to mapped location from mapped feature
1577 // Can not use m_MappedSeq_feat since it's a const-ref
1578 CRef<CSeq_feat> mapped_feat;
1579 m_CreatedSeq_feat.AtomicReleaseTo(mapped_feat);
1580 if ( mapped_feat ) {
1581 if ( !mapped_feat->ReferencedOnlyOnce() ) {
1582 mapped_feat.Reset();
1583 }
1584 else {
1585 CRef<CSeq_loc> null_loc(new CSeq_loc);
1586 null_loc->SetNull();
1587 // ResetLocation doesn't do what we'd like because
1588 // Seq-feat.location isn't optional.
1589 mapped_feat->SetLocation(*null_loc);
1590 mapped_feat->ResetProduct();
1591 }
1592 }
1593 m_CreatedSeq_feat.AtomicResetFrom(mapped_feat);
1594
1595 CRef<CSeq_loc> mapped_loc;
1596 CRef<CSeq_point> created_point;
1597 CRef<CSeq_interval> created_interval;
1598 ReleaseRefsTo(0, &mapped_loc, &created_point, &created_interval);
1599 map.UpdateMappedSeq_loc(mapped_loc,
1600 created_point,
1601 created_interval,
1602 &orig_feat);
1603 ret = mapped_loc;
1604 ResetRefsFrom(0, &mapped_loc, &created_point, &created_interval);
1605 }
1606 else if ( map.IsMapped() ) {
1607 ret = const_cast<CSeq_loc*>(&map.GetMappedSeq_loc());
1608 }
1609 return ret;
1610 }
1611
1612
1613 CRef<CSeq_loc>
GetMappedLocation(const CAnnotMapping_Info & map,const CMappedFeat & feat)1614 CCreatedFeat_Ref::GetMappedLocation(const CAnnotMapping_Info& map,
1615 const CMappedFeat& feat)
1616 {
1617 if ( !map.IsMapped() ) {
1618 return null;
1619 }
1620 else if ( !map.MappedSeq_locNeedsUpdate() ) {
1621 return Ref(const_cast<CSeq_loc*>(&map.GetMappedSeq_loc()));
1622 }
1623 else {
1624 return GetMappedLocation(map, *feat.GetOriginalSeq_feat());
1625 }
1626 }
1627
1628
1629 CConstRef<CSeq_feat>
GetMappedFeature(const CAnnotMapping_Info & map,const CMappedFeat & feat)1630 CCreatedFeat_Ref::GetMappedFeature(const CAnnotMapping_Info& map,
1631 const CMappedFeat& feat)
1632 {
1633 if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat) {
1634 return ConstRef(&map.GetMappedSeq_feat());
1635 }
1636 else {
1637 return GetMappedFeature(map, *feat.GetOriginalSeq_feat());
1638 }
1639 }
1640
1641
1642 CConstRef<CSeq_feat>
GetMappedFeature(const CAnnotMapping_Info & map,const CSeq_feat & orig_feat)1643 CCreatedFeat_Ref::GetMappedFeature(const CAnnotMapping_Info& map,
1644 const CSeq_feat& orig_feat)
1645 {
1646 CConstRef<CSeq_feat> ret;
1647 if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat) {
1648 ret = &map.GetMappedSeq_feat();
1649 }
1650 else if ( !map.IsMapped() ) {
1651 ret = &orig_feat;
1652 }
1653 else {
1654 CRef<CSeq_loc> loc = GetMappedLocation(map, orig_feat);
1655
1656 // some Seq-loc object is mapped
1657 CRef<CSeq_feat> mapped_feat;
1658 m_CreatedSeq_feat.AtomicReleaseTo(mapped_feat);
1659 if ( !mapped_feat || !mapped_feat->ReferencedOnlyOnce() ) {
1660 mapped_feat.Reset(new CSeq_feat);
1661 // copy all fields from original feature
1662 map.InitializeMappedSeq_feat(orig_feat, *mapped_feat);
1663 }
1664 else {
1665 // copy only unmapped location/product fields from original feature
1666 CSeq_feat& src_nc = const_cast<CSeq_feat&>(orig_feat);
1667 if ( !map.IsMappedLocation() ) {
1668 mapped_feat->SetLocation(src_nc.SetLocation());
1669 }
1670 if ( !map.IsMappedProduct() ) {
1671 if ( orig_feat.IsSetProduct() )
1672 mapped_feat->SetProduct(src_nc.SetProduct());
1673 else
1674 mapped_feat->ResetProduct();
1675 }
1676 }
1677
1678 // set mapped location/product field
1679 if ( map.IsMappedLocation() ) {
1680 mapped_feat->SetLocation(*loc);
1681 }
1682 else if ( map.IsMappedProduct() ) {
1683 mapped_feat->SetProduct(*loc);
1684 }
1685 // set mapped partial field
1686 if ( map.IsPartial() ) {
1687 mapped_feat->SetPartial(true);
1688 }
1689 else {
1690 mapped_feat->ResetPartial();
1691 }
1692
1693 ret = mapped_feat;
1694 m_CreatedSeq_feat.AtomicResetFrom(mapped_feat);
1695 }
1696 return ret;
1697 }
1698
1699
1700 /////////////////////////////////////////////////////////////////////////////
1701 // CAnnot_Collector, CAnnotMappingCollector
1702 /////////////////////////////////////////////////////////////////////////////
1703
1704
1705 class CAnnotMappingCollector
1706 {
1707 public:
1708 typedef map<CAnnotObject_Ref,
1709 CRef<CSeq_loc_Conversion_Set> > TAnnotMappingSet;
1710 // Set of annotations for complex remapping
1711 TAnnotMappingSet m_AnnotMappingSet;
1712 };
1713
1714
CAnnot_Collector(CScope & scope)1715 CAnnot_Collector::CAnnot_Collector(CScope& scope)
1716 : m_Selector(0),
1717 m_Scope(scope),
1718 m_LoadBytes(0),
1719 m_LoadSeconds(0),
1720 m_FromOtherTSE(false)
1721 {
1722 }
1723
1724
~CAnnot_Collector(void)1725 CAnnot_Collector::~CAnnot_Collector(void)
1726 {
1727 }
1728
1729
x_NoMoreObjects(void) const1730 bool CAnnot_Collector::x_NoMoreObjects(void) const
1731 {
1732 if ( x_MaxSearchSegmentsLimitIsReached() ) {
1733 // search segment limit reached
1734 return true;
1735 }
1736 typedef SAnnotSelector::TMaxSize TMaxSize;
1737 TMaxSize limit = m_Selector->GetMaxSize();
1738 if ( limit >= numeric_limits<TMaxSize>::max() ) {
1739 return false;
1740 }
1741 size_t size = m_AnnotSet.size();
1742 if ( m_MappingCollector.get() ) {
1743 size += m_MappingCollector->m_AnnotMappingSet.size();
1744 }
1745 return size >= limit;
1746 }
1747
1748
CanResolveId(const CSeq_id_Handle & idh,const CBioseq_Handle & bh)1749 bool CAnnot_Collector::CanResolveId(const CSeq_id_Handle& idh,
1750 const CBioseq_Handle& bh)
1751 {
1752 switch ( m_Selector->GetResolveMethod() ) {
1753 case SAnnotSelector::eResolve_All:
1754 return true;
1755 case SAnnotSelector::eResolve_TSE:
1756 return m_Scope->GetBioseqHandleFromTSE(idh, bh.GetTSE_Handle());
1757 default:
1758 return false;
1759 }
1760 }
1761
1762 static CSeqFeatData::ESubtype s_DefaultAdaptiveTriggers[] = {
1763 CSeqFeatData::eSubtype_gene,
1764 CSeqFeatData::eSubtype_cdregion,
1765 CSeqFeatData::eSubtype_mRNA
1766 };
1767
x_Initialize0(const SAnnotSelector & selector)1768 void CAnnot_Collector::x_Initialize0(const SAnnotSelector& selector)
1769 {
1770 m_Selector = &selector;
1771 m_TriggerTypes.reset();
1772 SAnnotSelector::TAdaptiveDepthFlags adaptive_flags = 0;
1773 if ( !selector.GetExactDepth() ||
1774 selector.GetResolveDepth() == kMax_Int ) {
1775 adaptive_flags = selector.GetAdaptiveDepthFlags();
1776 }
1777 if ( adaptive_flags & selector.fAdaptive_ByTriggers ) {
1778 if ( selector.m_AdaptiveTriggers.empty() ) {
1779 const size_t count =
1780 sizeof(s_DefaultAdaptiveTriggers)/
1781 sizeof(s_DefaultAdaptiveTriggers[0]);
1782 for ( int i = count - 1; i >= 0; --i ) {
1783 CSeqFeatData::ESubtype subtype = s_DefaultAdaptiveTriggers[i];
1784 size_t index = CAnnotType_Index::GetSubtypeIndex(subtype);
1785 if ( index ) {
1786 m_TriggerTypes.set(index);
1787 }
1788 }
1789 }
1790 else {
1791 ITERATE ( SAnnotSelector::TAdaptiveTriggers, it,
1792 selector.m_AdaptiveTriggers ) {
1793 pair<size_t, size_t> idxs =
1794 CAnnotType_Index::GetIndexRange(*it);
1795 for ( size_t i = idxs.first; i < idxs.second; ++i ) {
1796 m_TriggerTypes.set(i);
1797 }
1798 }
1799 }
1800 }
1801 m_UnseenAnnotTypes.set();
1802 m_CollectAnnotTypes = selector.m_AnnotTypesBitset;
1803 if ( !m_CollectAnnotTypes.any() ) {
1804 pair<size_t, size_t> range =
1805 CAnnotType_Index::GetIndexRange(selector);
1806 for ( size_t index = range.first; index < range.second; ++index ) {
1807 m_CollectAnnotTypes.set(index);
1808 }
1809 }
1810 if ( selector.m_CollectNames ) {
1811 m_AnnotNames.reset(new TAnnotNames());
1812 }
1813 selector.CheckLimitObjectType();
1814 if ( selector.m_LimitObjectType != SAnnotSelector::eLimit_None ) {
1815 x_GetTSE_Info();
1816 }
1817 m_SearchSegments = selector.GetMaxSearchSegments();
1818 m_SearchSegmentsAction = selector.GetMaxSearchSegmentsAction();
1819 double max_time = selector.GetMaxSearchTime();
1820 if ( max_time <= 86400 ) { // 24 hours
1821 m_SearchTime.Start();
1822 }
1823 }
1824
1825
x_StopSearchLimits(void)1826 void CAnnot_Collector::x_StopSearchLimits(void)
1827 {
1828 if ( m_SearchSegments != numeric_limits<TMaxSearchSegments>::max() ) {
1829 m_SearchSegments = numeric_limits<TMaxSearchSegments>::max();
1830 }
1831 m_SearchTime.Stop();
1832 }
1833
1834
x_FoundAllNamedAnnotAccessions(unique_ptr<SAnnotSelector> & local_sel)1835 bool CAnnot_Collector::x_FoundAllNamedAnnotAccessions(unique_ptr<SAnnotSelector>& local_sel)
1836 {
1837 if ( !m_AnnotNames.get() ) {
1838 return false;
1839 }
1840 set<string> found_accs;
1841 for ( auto& n : *m_AnnotNames ) {
1842 if ( !n.IsNamed() ) {
1843 continue;
1844 }
1845 string acc;
1846 ExtractZoomLevel(n.GetName(), &acc, 0);
1847 if ( m_Selector->GetNamedAnnotAccessions().find(acc) !=
1848 m_Selector->GetNamedAnnotAccessions().end() ) {
1849 found_accs.insert(acc);
1850 }
1851 }
1852 if ( !found_accs.empty() ) {
1853 if ( !local_sel ) {
1854 local_sel.reset(new SAnnotSelector(*m_Selector));
1855 m_Selector = local_sel.get();
1856 }
1857 for ( auto& acc : found_accs ) {
1858 local_sel->ExcludeNamedAnnotAccession(acc);
1859 }
1860 }
1861 return !m_Selector->IsIncludedAnyNamedAnnotAccession();
1862 }
1863
1864
1865 static const bool kTraceFullCvt = false;
1866
x_Initialize(const SAnnotSelector & selector,const CBioseq_Handle & bh,const CRange<TSeqPos> & range,ENa_strand strand)1867 void CAnnot_Collector::x_Initialize(const SAnnotSelector& selector,
1868 const CBioseq_Handle& bh,
1869 const CRange<TSeqPos>& range,
1870 ENa_strand strand)
1871 {
1872 if ( !bh ) {
1873 NCBI_THROW(CAnnotException, eBadLocation,
1874 "Bioseq handle is null");
1875 }
1876 CScope_Impl::TConfReadLockGuard guard(m_Scope->m_ConfLock);
1877 x_Initialize0(selector);
1878
1879 CSeq_id_Handle master_id = bh.GetAccessSeq_id_Handle();
1880 CHandleRange master_range;
1881 master_range.AddRange(range, strand);
1882
1883 int depth = selector.GetResolveDepth();
1884 bool depth_is_set = depth >= 0 && depth < kMax_Int;
1885 bool exact_depth = selector.GetExactDepth() && depth_is_set;
1886 int adaptive_flags = exact_depth? 0: selector.GetAdaptiveDepthFlags();
1887 int by_policy = adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy;
1888 adaptive_flags &=
1889 SAnnotSelector::fAdaptive_ByTriggers |
1890 SAnnotSelector::fAdaptive_BySubtypes |
1891 SAnnotSelector::fAdaptive_ByNamedAcc;
1892
1893 // main sequence
1894 bool deeper = true;
1895 if ( adaptive_flags || !exact_depth || depth == 0 ) {
1896 x_SearchMaster(bh, master_id, master_range);
1897 deeper = !x_NoMoreObjects();
1898 }
1899 if ( deeper ) {
1900 deeper = depth > 0 &&
1901 selector.GetResolveMethod() != selector.eResolve_None;
1902 }
1903 if ( deeper && by_policy ) {
1904 deeper =
1905 bh.GetFeatureFetchPolicy() != bh.eFeatureFetchPolicy_only_near;
1906 }
1907 bool only_named_annot_accs = false;
1908 unique_ptr<SAnnotSelector> local_sel;
1909 if ( deeper && adaptive_flags ) {
1910 m_CollectAnnotTypes &= m_UnseenAnnotTypes;
1911 deeper = m_CollectAnnotTypes.any();
1912 if ( deeper && (adaptive_flags & SAnnotSelector::fAdaptive_ByNamedAcc)) {
1913 only_named_annot_accs = selector.HasIncludedOnlyNamedAnnotAccessions();
1914 }
1915 if ( deeper && only_named_annot_accs && x_FoundAllNamedAnnotAccessions(local_sel) ) {
1916 deeper = false;
1917 }
1918 }
1919 if ( deeper ) {
1920 deeper = bh.GetSeqMap().HasSegmentOfType(CSeqMap::eSeqRef);
1921 }
1922
1923 int last_depth = 0;
1924 if ( deeper ) {
1925 CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
1926 master_loc_empty->
1927 SetEmpty(const_cast<CSeq_id&>(*master_id.GetSeqId()));
1928 for ( int level = 1; level <= depth && deeper; ++level ) {
1929 last_depth = level;
1930 // segments
1931 if ( adaptive_flags || !exact_depth || depth == level ) {
1932 deeper = x_SearchSegments(bh, master_id, master_range,
1933 *master_loc_empty, level);
1934 if ( deeper ) {
1935 deeper = !x_NoMoreObjects();
1936 }
1937 }
1938 if ( deeper ) {
1939 deeper = depth > level;
1940 }
1941 if ( deeper && adaptive_flags ) {
1942 m_CollectAnnotTypes &= m_UnseenAnnotTypes;
1943 deeper = m_CollectAnnotTypes.any();
1944 if ( deeper && only_named_annot_accs && x_FoundAllNamedAnnotAccessions(local_sel) ) {
1945 deeper = false;
1946 }
1947 }
1948 }
1949 }
1950
1951 x_AddPostMappings();
1952 if ( m_MappingCollector.get() ) {
1953 // need full conversion set
1954 if ( kTraceFullCvt ) {
1955 LOG_POST("Need full conversion set for "<<
1956 m_MappingCollector->m_AnnotMappingSet.size()<<" annots");
1957 }
1958 CSeq_loc_Conversion_Set cvt_set(m_Scope);
1959 CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
1960 master_loc_empty->
1961 SetEmpty(const_cast<CSeq_id&>(*master_id.GetSeqId()));
1962 for ( int level = 1; level <= last_depth; ++level ) {
1963 // segments
1964 if ( adaptive_flags || !exact_depth || depth == level ) {
1965 x_CollectSegments(bh, master_id, master_range,
1966 *master_loc_empty, level, cvt_set);
1967 }
1968 }
1969 x_AddPostMappingsCvt(cvt_set);
1970 }
1971 x_Sort();
1972 }
1973
1974
x_Initialize(const SAnnotSelector & selector,const CHandleRangeMap & master_loc)1975 void CAnnot_Collector::x_Initialize(const SAnnotSelector& selector,
1976 const CHandleRangeMap& master_loc)
1977 {
1978 CScope_Impl::TConfReadLockGuard guard(m_Scope->m_ConfLock);
1979 x_Initialize0(selector);
1980
1981 int depth = selector.GetResolveDepth();
1982 bool depth_is_set = depth >= 0 && depth < kMax_Int;
1983 bool exact_depth = selector.GetExactDepth() && depth_is_set;
1984 int adaptive_flags = exact_depth? 0: selector.GetAdaptiveDepthFlags();
1985 adaptive_flags &=
1986 SAnnotSelector::fAdaptive_ByTriggers |
1987 SAnnotSelector::fAdaptive_BySubtypes;
1988
1989 // main sequence
1990 bool deeper = true;
1991 if ( adaptive_flags || !exact_depth || depth == 0 ) {
1992 x_SearchLoc(master_loc, 0, 0, true);
1993 deeper = !x_NoMoreObjects();
1994 }
1995 if ( deeper ) {
1996 deeper = depth > 0 &&
1997 selector.GetResolveMethod() != selector.eResolve_None;
1998 }
1999 if ( deeper && adaptive_flags ) {
2000 m_CollectAnnotTypes &= m_UnseenAnnotTypes;
2001 deeper = m_CollectAnnotTypes.any();
2002 }
2003
2004 int last_depth = 0;
2005 if ( deeper ) {
2006 for ( int level = 1; level <= depth && deeper; ++level ) {
2007 last_depth = level;
2008 // segments
2009 if ( adaptive_flags || !exact_depth || depth == level ) {
2010 deeper = x_SearchSegments(master_loc, level);
2011 if ( deeper ) {
2012 deeper = !x_NoMoreObjects();
2013 }
2014 }
2015 if ( deeper ) {
2016 deeper = depth > level;
2017 }
2018 if ( deeper && adaptive_flags ) {
2019 m_CollectAnnotTypes &= m_UnseenAnnotTypes;
2020 deeper = m_CollectAnnotTypes.any();
2021 }
2022 }
2023 }
2024
2025 x_AddPostMappings();
2026 if ( m_MappingCollector.get() ) {
2027 // need full conversion set
2028 if ( kTraceFullCvt ) {
2029 LOG_POST("Need full conversion set for "<<
2030 m_MappingCollector->m_AnnotMappingSet.size()<<" annots");
2031 }
2032 CSeq_loc_Conversion_Set cvt_set(m_Scope);
2033 for ( int level = 1; level <= last_depth; ++level ) {
2034 // segments
2035 if ( adaptive_flags || !exact_depth || depth == level ) {
2036 x_CollectSegments(master_loc, level, cvt_set);
2037 }
2038 }
2039 x_AddPostMappingsCvt(cvt_set);
2040 }
2041 x_Sort();
2042 }
2043
2044
x_CheckAdaptive(const CBioseq_Handle & bh) const2045 bool CAnnot_Collector::x_CheckAdaptive(const CBioseq_Handle& bh) const
2046 {
2047 int adaptive_flags = GetSelector().GetAdaptiveDepthFlags();
2048 if ( !(adaptive_flags & (SAnnotSelector::fAdaptive_ByTriggers |
2049 SAnnotSelector::fAdaptive_BySubtypes)) ) {
2050 // no heuristics
2051 return false;
2052 }
2053 if ( !(adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy) ) {
2054 // heuristics only
2055 return true;
2056 }
2057 // both policy and heuristics are active
2058 // use heuristics only if there is no policy information on sequence
2059 return bh && bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_default;
2060 }
2061
2062
x_CheckAdaptive(const CSeq_id_Handle & id) const2063 bool CAnnot_Collector::x_CheckAdaptive(const CSeq_id_Handle& id) const
2064 {
2065 int adaptive_flags = GetSelector().GetAdaptiveDepthFlags();
2066 if ( !(adaptive_flags & (SAnnotSelector::fAdaptive_ByTriggers |
2067 SAnnotSelector::fAdaptive_BySubtypes)) ) {
2068 // no heuristics
2069 return false;
2070 }
2071 if ( !(adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy) ) {
2072 // heuristics only
2073 return true;
2074 }
2075 // both policy and heuristics are active
2076 // use heuristics only if there is no policy information on sequence
2077 CBioseq_Handle bh = x_GetBioseqHandle(id);
2078 return bh && bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_default;
2079 }
2080
2081
x_SearchMaster(const CBioseq_Handle & bh,const CSeq_id_Handle & master_id,const CHandleRange & master_range)2082 void CAnnot_Collector::x_SearchMaster(const CBioseq_Handle& bh,
2083 const CSeq_id_Handle& master_id,
2084 const CHandleRange& master_range)
2085 {
2086 bool check_adaptive = x_CheckAdaptive(bh);
2087 if ( m_Selector->m_LimitObjectType == SAnnotSelector::eLimit_None ) {
2088 // any data source
2089 const CTSE_Handle& tse = bh.GetTSE_Handle();
2090 m_FromOtherTSE = false;
2091 if ( m_Selector->m_ExcludeExternal ) {
2092 const CTSE_Info& tse_info = tse.x_GetTSE_Info();
2093 tse_info.UpdateAnnotIndex();
2094 if ( tse_info.HasMatchingAnnotIds() ) {
2095 CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(bh);
2096 ITERATE(CSynonymsSet, syn_it, *syns) {
2097 x_SearchTSE(tse, syns->GetSeq_id_Handle(syn_it),
2098 master_range, 0, check_adaptive);
2099 if ( x_NoMoreObjects() ) {
2100 break;
2101 }
2102 }
2103 }
2104 else {
2105 const CBioseq_Handle::TId& syns = bh.GetId();
2106 bool only_gi = tse_info.OnlyGiAnnotIds();
2107 ITERATE ( CBioseq_Handle::TId, syn_it, syns ) {
2108 if ( !only_gi || syn_it->IsGi() ) {
2109 x_SearchTSE(tse, *syn_it,
2110 master_range, 0, check_adaptive);
2111 if ( x_NoMoreObjects() ) {
2112 break;
2113 }
2114 }
2115 }
2116 }
2117 }
2118 else {
2119 CScope_Impl::TTSE_LockMatchSet tse_map;
2120 if ( m_Selector->IsIncludedAnyNamedAnnotAccession() ) {
2121 m_Scope->GetTSESetWithAnnots(bh, tse_map, *m_Selector);
2122 }
2123 else {
2124 m_Scope->GetTSESetWithAnnots(bh, tse_map);
2125 }
2126 ITERATE (CScope_Impl::TTSE_LockMatchSet, tse_it, tse_map) {
2127 m_FromOtherTSE = tse_it->first != bh.GetTSE_Handle();
2128 tse.AddUsedTSE(tse_it->first);
2129 x_SearchTSE(tse_it->first, tse_it->second,
2130 master_range, 0, check_adaptive);
2131 if ( x_NoMoreObjects() ) {
2132 break;
2133 }
2134 }
2135 }
2136 }
2137 else {
2138 // Search in the limit objects
2139 CConstRef<CSynonymsSet> syns;
2140 bool syns_initialized = false;
2141 ITERATE ( TTSE_LockMap, tse_it, m_TSE_LockMap ) {
2142 const CTSE_Info& tse_info = *tse_it->first;
2143 m_FromOtherTSE = tse_it->second != bh.GetTSE_Handle();
2144 tse_info.UpdateAnnotIndex();
2145 if ( tse_info.HasMatchingAnnotIds() ) {
2146 if ( !syns_initialized ) {
2147 syns = m_Scope->GetSynonyms(bh);
2148 syns_initialized = true;
2149 }
2150 if ( !syns ) {
2151 x_SearchTSE(tse_it->second, master_id,
2152 master_range, 0, check_adaptive);
2153 }
2154 else {
2155 ITERATE(CSynonymsSet, syn_it, *syns) {
2156 x_SearchTSE(tse_it->second,
2157 syns->GetSeq_id_Handle(syn_it),
2158 master_range, 0, check_adaptive);
2159 if ( x_NoMoreObjects() ) {
2160 break;
2161 }
2162 }
2163 }
2164 }
2165 else {
2166 const CBioseq_Handle::TId& syns_id = bh.GetId();
2167 bool only_gi = tse_info.OnlyGiAnnotIds();
2168 ITERATE ( CBioseq_Handle::TId, syn_it, syns_id ) {
2169 if ( !only_gi || syn_it->IsGi() ) {
2170 x_SearchTSE(tse_it->second, *syn_it,
2171 master_range, 0, check_adaptive);
2172 if ( x_NoMoreObjects() ) {
2173 break;
2174 }
2175 }
2176 }
2177 }
2178 if ( x_NoMoreObjects() ) {
2179 break;
2180 }
2181 }
2182 }
2183 }
2184
2185
x_CollectSegments(const CBioseq_Handle & bh,const CSeq_id_Handle & master_id,const CHandleRange & master_range,CSeq_loc & master_loc_empty,int level,CSeq_loc_Conversion_Set & cvt_set)2186 void CAnnot_Collector::x_CollectSegments(const CBioseq_Handle& bh,
2187 const CSeq_id_Handle& master_id,
2188 const CHandleRange& master_range,
2189 CSeq_loc& master_loc_empty,
2190 int level,
2191 CSeq_loc_Conversion_Set& cvt_set)
2192 {
2193 // CSeqMap_CI must be the same as in x_SearchSegments
2194 _ASSERT(m_Selector->m_ResolveMethod != m_Selector->eResolve_None);
2195 CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2196 if ( m_Selector->m_UnresolvedFlag != SAnnotSelector::eFailUnresolved ) {
2197 flags |= CSeqMap::fIgnoreUnresolved;
2198 }
2199 SSeqMapSelector sel(flags, level-1);
2200 if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2201 sel.SetLimitTSE(bh.GetTSE_Handle());
2202 }
2203
2204 int depth = m_Selector->GetResolveDepth();
2205 bool depth_is_set = depth >= 0 && depth < kMax_Int;
2206 bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2207 int adaptive_flags = exact_depth? 0: m_Selector->GetAdaptiveDepthFlags();
2208 if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2209 sel.SetByFeaturePolicy();
2210 }
2211 if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2212 sel.SetBySequenceClass();
2213 }
2214
2215 const CRange<TSeqPos>& range = master_range.begin()->first;
2216 for ( CSeqMap_CI smit(bh, sel, range);
2217 smit && smit.GetPosition() < range.GetToOpen();
2218 ++smit ) {
2219 _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2220 if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2221 // External bioseq, try to search if limit is set
2222 if ( m_Selector->m_UnresolvedFlag !=
2223 SAnnotSelector::eSearchUnresolved ||
2224 !m_Selector->m_LimitObject ) {
2225 // Do not try to search on external segments
2226 continue;
2227 }
2228 }
2229
2230 x_CollectMapped(smit, master_loc_empty, master_id, master_range,
2231 cvt_set);
2232 }
2233 }
2234
2235
x_SearchSegments(const CBioseq_Handle & bh,const CSeq_id_Handle & master_id,const CHandleRange & master_range,CSeq_loc & master_loc_empty,int level)2236 bool CAnnot_Collector::x_SearchSegments(const CBioseq_Handle& bh,
2237 const CSeq_id_Handle& master_id,
2238 const CHandleRange& master_range,
2239 CSeq_loc& master_loc_empty,
2240 int level)
2241 {
2242 _ASSERT(m_Selector->m_ResolveMethod != m_Selector->eResolve_None);
2243 CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2244 if ( m_Selector->m_UnresolvedFlag != SAnnotSelector::eFailUnresolved ) {
2245 flags |= CSeqMap::fIgnoreUnresolved;
2246 }
2247 SSeqMapSelector sel(flags, level-1);
2248 if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2249 sel.SetLimitTSE(bh.GetTSE_Handle());
2250 }
2251
2252 int depth = m_Selector->GetResolveDepth();
2253 bool depth_is_set = depth >= 0 && depth < kMax_Int;
2254 bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2255 int adaptive_flags = exact_depth? 0: m_Selector->GetAdaptiveDepthFlags();
2256 if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2257 sel.SetByFeaturePolicy();
2258 }
2259 if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2260 sel.SetBySequenceClass();
2261 }
2262
2263 bool has_more = false;
2264 const CRange<TSeqPos>& range = master_range.begin()->first;
2265 for ( CSeqMap_CI smit(bh, sel, range);
2266 smit && smit.GetPosition() < range.GetToOpen();
2267 ++smit ) {
2268 _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2269 if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2270 // External bioseq, try to search if limit is set
2271 if ( m_Selector->m_UnresolvedFlag !=
2272 SAnnotSelector::eSearchUnresolved ||
2273 !m_Selector->m_LimitObject ) {
2274 // Do not try to search on external segments
2275 continue;
2276 }
2277 }
2278
2279 has_more = true;
2280 x_SearchMapped(smit, master_loc_empty, master_id, master_range);
2281
2282 if ( x_NoMoreObjects() ) {
2283 return has_more;
2284 }
2285 }
2286 return has_more;
2287 }
2288
2289
2290 static
sx_GetFlag(const SAnnotSelector & selector)2291 CScope::EGetBioseqFlag sx_GetFlag(const SAnnotSelector& selector)
2292 {
2293 switch (selector.GetResolveMethod()) {
2294 case SAnnotSelector::eResolve_All:
2295 return CScope::eGetBioseq_All;
2296 default:
2297 // Do not load new TSEs
2298 return CScope::eGetBioseq_Loaded;
2299 }
2300 }
2301
2302
x_GetBioseqHandle(const CSeq_id_Handle & id,bool top_level) const2303 CBioseq_Handle CAnnot_Collector::x_GetBioseqHandle(const CSeq_id_Handle& id,
2304 bool top_level) const
2305 {
2306 CScope::EGetBioseqFlag flag =
2307 top_level? CScope::eGetBioseq_All: sx_GetFlag(GetSelector());
2308 return m_Scope->GetBioseqHandle(id, flag);
2309 }
2310
2311
x_CollectSegments(const CHandleRangeMap & master_loc,int level,CSeq_loc_Conversion_Set & cvt_set)2312 void CAnnot_Collector::x_CollectSegments(const CHandleRangeMap& master_loc,
2313 int level,
2314 CSeq_loc_Conversion_Set& cvt_set)
2315 {
2316 ITERATE ( CHandleRangeMap::TLocMap, idit, master_loc.GetMap() ) {
2317 CBioseq_Handle bh = x_GetBioseqHandle(idit->first);
2318 if ( !bh ) {
2319 if (m_Selector->m_UnresolvedFlag == SAnnotSelector::eFailUnresolved) {
2320 // resolve by Seq-id only
2321 NCBI_THROW(CAnnotException, eFindFailed,
2322 "Cannot resolve master id");
2323 }
2324 // skip unresolvable IDs
2325 continue;
2326 }
2327
2328 if ( !bh.GetSeqMap().HasSegmentOfType(CSeqMap::eSeqRef) ) {
2329 continue;
2330 }
2331
2332 CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
2333 master_loc_empty->SetEmpty(
2334 const_cast<CSeq_id&>(*idit->first.GetSeqId()));
2335
2336 CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2337 if ( m_Selector->m_UnresolvedFlag != m_Selector->eFailUnresolved ) {
2338 flags |= CSeqMap::fIgnoreUnresolved;
2339 }
2340
2341 SSeqMapSelector sel(flags, level-1);
2342 if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2343 sel.SetLimitTSE(bh.GetTSE_Handle());
2344 }
2345
2346 int depth = m_Selector->GetResolveDepth();
2347 bool depth_is_set = depth >= 0 && depth < kMax_Int;
2348 bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2349 int adaptive_flags = exact_depth?0:m_Selector->GetAdaptiveDepthFlags();
2350 if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2351 sel.SetByFeaturePolicy();
2352 }
2353 if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2354 sel.SetBySequenceClass();
2355 }
2356
2357 CHandleRange::TRange range = idit->second.GetOverlappingRange();
2358 for ( CSeqMap_CI smit(bh, sel, range);
2359 smit && smit.GetPosition() < range.GetToOpen();
2360 ++smit ) {
2361 _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2362 if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2363 // External bioseq, try to search if limit is set
2364 if ( m_Selector->m_UnresolvedFlag !=
2365 SAnnotSelector::eSearchUnresolved ||
2366 !m_Selector->m_LimitObject ) {
2367 // Do not try to search on external segments
2368 continue;
2369 }
2370 }
2371
2372 x_CollectMapped(smit, *master_loc_empty, idit->first, idit->second,
2373 cvt_set);
2374 }
2375 }
2376 }
2377
x_SearchSegments(const CHandleRangeMap & master_loc,int level)2378 bool CAnnot_Collector::x_SearchSegments(const CHandleRangeMap& master_loc,
2379 int level)
2380 {
2381 bool has_more = false;
2382 ITERATE ( CHandleRangeMap::TLocMap, idit, master_loc.GetMap() ) {
2383 CBioseq_Handle bh = x_GetBioseqHandle(idit->first);
2384 if ( !bh ) {
2385 if (m_Selector->m_UnresolvedFlag == SAnnotSelector::eFailUnresolved) {
2386 // resolve by Seq-id only
2387 NCBI_THROW(CAnnotException, eFindFailed,
2388 "Cannot resolve master id");
2389 }
2390 // skip unresolvable IDs
2391 continue;
2392 }
2393 else if ( m_Selector->GetAdaptiveDepthFlags() & SAnnotSelector::fAdaptive_ByPolicy &&
2394 bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_only_near ) {
2395 // skip going deeper because of top-level interval policy
2396 continue;
2397 }
2398
2399 if ( !bh.GetSeqMap().HasSegmentOfType(CSeqMap::eSeqRef) ) {
2400 continue;
2401 }
2402
2403 CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
2404 master_loc_empty->SetEmpty(
2405 const_cast<CSeq_id&>(*idit->first.GetSeqId()));
2406
2407 CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2408 if ( m_Selector->m_UnresolvedFlag != m_Selector->eFailUnresolved ) {
2409 flags |= CSeqMap::fIgnoreUnresolved;
2410 }
2411
2412 SSeqMapSelector sel(flags, level-1);
2413 if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2414 sel.SetLimitTSE(bh.GetTSE_Handle());
2415 }
2416
2417 int depth = m_Selector->GetResolveDepth();
2418 bool depth_is_set = depth >= 0 && depth < kMax_Int;
2419 bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2420 int adaptive_flags = exact_depth?0:m_Selector->GetAdaptiveDepthFlags();
2421 if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2422 sel.SetByFeaturePolicy();
2423 }
2424 if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2425 sel.SetBySequenceClass();
2426 }
2427
2428 CHandleRange::TRange range = idit->second.GetOverlappingRange();
2429 for ( CSeqMap_CI smit(bh, sel, range);
2430 smit && smit.GetPosition() < range.GetToOpen();
2431 ++smit ) {
2432 _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2433 if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2434 // External bioseq, try to search if limit is set
2435 if ( m_Selector->m_UnresolvedFlag !=
2436 SAnnotSelector::eSearchUnresolved ||
2437 !m_Selector->m_LimitObject ) {
2438 // Do not try to search on external segments
2439 continue;
2440 }
2441 }
2442
2443 has_more = true;
2444 x_SearchMapped(smit, *master_loc_empty, idit->first, idit->second);
2445
2446 if ( x_NoMoreObjects() ) {
2447 return has_more;
2448 }
2449 }
2450 }
2451 return has_more;
2452 }
2453
x_AddTSE(const CTSE_Handle & tse)2454 void CAnnot_Collector::x_AddTSE(const CTSE_Handle& tse)
2455 {
2456 const CTSE_Info* key = &tse.x_GetTSE_Info();
2457 _ASSERT(key);
2458 TTSE_LockMap::iterator iter = m_TSE_LockMap.lower_bound(key);
2459 if ( iter == m_TSE_LockMap.end() || iter->first != key ) {
2460 iter = m_TSE_LockMap.insert(iter, TTSE_LockMap::value_type(key, tse));
2461 }
2462 _ASSERT(iter != m_TSE_LockMap.end());
2463 _ASSERT(iter->first == key);
2464 _ASSERT(iter->second == tse);
2465 }
2466
2467
2468
2469 struct SLessByInfo
2470 {
operator ()SLessByInfo2471 bool operator()(const CSeq_annot_Handle& a,
2472 const CSeq_annot_Handle& b) const
2473 {
2474 return &a.x_GetInfo() < &b.x_GetInfo();
2475 }
operator ()SLessByInfo2476 bool operator()(const CSeq_annot_Handle& a,
2477 const CSeq_annot_Info* b) const
2478 {
2479 return &a.x_GetInfo() < b;
2480 }
operator ()SLessByInfo2481 bool operator()(const CSeq_annot_Info* a,
2482 const CSeq_annot_Handle& b) const
2483 {
2484 return a < &b.x_GetInfo();
2485 }
operator ()SLessByInfo2486 bool operator()(const CSeq_annot_Info* a,
2487 const CSeq_annot_Info* b) const
2488 {
2489 return a < b;
2490 }
2491 };
2492
2493
x_AddObject(CAnnotObject_Ref & ref)2494 void CAnnot_Collector::x_AddObject(CAnnotObject_Ref& ref)
2495 {
2496 ref.SetFromOtherTSE(m_FromOtherTSE);
2497 m_AnnotSet.push_back(ref);
2498 }
2499
2500
x_AddObject(CAnnotObject_Ref & object_ref,CSeq_loc_Conversion * cvt,unsigned int loc_index)2501 void CAnnot_Collector::x_AddObject(CAnnotObject_Ref& object_ref,
2502 CSeq_loc_Conversion* cvt,
2503 unsigned int loc_index)
2504 {
2505 // Always map aligns through conv. set
2506 if ( (cvt && cvt->IsPartial()) || object_ref.IsAlign() ) {
2507 x_AddObjectMapping(object_ref, cvt, loc_index);
2508 }
2509 else {
2510 x_AddObject(object_ref);
2511 }
2512 }
2513
2514
x_AddPostMappings(void)2515 void CAnnot_Collector::x_AddPostMappings(void)
2516 {
2517 if ( !m_MappingCollector.get() ) {
2518 return;
2519 }
2520 CSeq_loc_Conversion::ELocationType loctype =
2521 (m_Selector->m_FeatProduct ?
2522 CSeq_loc_Conversion::eProduct :
2523 CSeq_loc_Conversion::eLocation);
2524 vector<CAnnotObject_Ref> partial_refs;
2525 ERASE_ITERATE ( CAnnotMappingCollector::TAnnotMappingSet, amit,
2526 m_MappingCollector->m_AnnotMappingSet ) {
2527 CAnnotObject_Ref annot_ref = amit->first;
2528 if ( !amit->second ) {
2529 // no actual mapping, just filtering duplicates
2530 x_AddObject(annot_ref);
2531 }
2532 else {
2533 amit->second->Convert(annot_ref, loctype);
2534 if ( amit->second->IsPartial() &&
2535 amit->second->HasUnconvertedId() ) {
2536 // conversion is not complete
2537 // keep the annotation for further conversion
2538 continue;
2539 }
2540 if ( annot_ref.IsAlign() ||
2541 !annot_ref.GetMappingInfo().GetTotalRange().Empty() ) {
2542 x_AddObject(annot_ref);
2543 }
2544 }
2545 m_MappingCollector->m_AnnotMappingSet.erase(amit);
2546 }
2547 if ( m_MappingCollector->m_AnnotMappingSet.empty() ) {
2548 m_MappingCollector.reset();
2549 }
2550 }
2551
2552
2553 CConstRef<CSerialObject>
x_GetMappedObject(const CAnnotObject_Ref & obj)2554 CAnnot_Collector::x_GetMappedObject(const CAnnotObject_Ref& obj)
2555 {
2556 CConstRef<CSerialObject> ret;
2557 if ( obj.IsFeat() ) {
2558 CMappedFeat feat;
2559 feat.Set(*this, obj);
2560 ret = feat.GetSeq_feat();
2561 }
2562 else if ( obj.IsGraph() ) {
2563 CMappedGraph graph;
2564 graph.Set(*this, obj);
2565 ret = &graph.GetMappedGraph();
2566 }
2567 else if ( obj.IsAlign() ) {
2568 }
2569 return ret;
2570 }
2571
2572
x_AddPostMappingsCvt(CSeq_loc_Conversion_Set & cvt)2573 void CAnnot_Collector::x_AddPostMappingsCvt(CSeq_loc_Conversion_Set& cvt)
2574 {
2575 if ( !m_MappingCollector.get() ) {
2576 return;
2577 }
2578 CSeq_loc_Conversion::ELocationType loctype =
2579 (m_Selector->m_FeatProduct ?
2580 CSeq_loc_Conversion::eProduct :
2581 CSeq_loc_Conversion::eLocation);
2582 ITERATE ( CAnnotMappingCollector::TAnnotMappingSet, amit,
2583 m_MappingCollector->m_AnnotMappingSet ) {
2584 CAnnotObject_Ref annot_ref = amit->first;
2585 if ( kTraceFullCvt ) {
2586 amit->second.GetNCObject().Convert(annot_ref, loctype);
2587 LOG_POST("Full conversion, was: "<<
2588 MSerial_AsnText<<*x_GetMappedObject(annot_ref));
2589 }
2590 cvt.Convert(annot_ref, loctype);
2591 if ( kTraceFullCvt ) {
2592 LOG_POST("Full conversion, now: "<<
2593 MSerial_AsnText<<*x_GetMappedObject(annot_ref));
2594 }
2595 if ( annot_ref.IsAlign() ||
2596 !annot_ref.GetMappingInfo().GetTotalRange().Empty() ) {
2597 x_AddObject(annot_ref);
2598 }
2599 }
2600 m_MappingCollector.reset();
2601 }
2602
2603
x_Initialize(const SAnnotSelector & selector)2604 void CAnnot_Collector::x_Initialize(const SAnnotSelector& selector)
2605 {
2606 CScope_Impl::TConfReadLockGuard guard(m_Scope->m_ConfLock);
2607 x_Initialize0(selector);
2608 // Limit must be set, resolving is obsolete
2609 _ASSERT(m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None);
2610 _ASSERT(m_Selector->m_LimitObject);
2611 _ASSERT(m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_None);
2612 x_SearchAll();
2613 x_Sort();
2614 }
2615
2616
x_Sort(void)2617 void CAnnot_Collector::x_Sort(void)
2618 {
2619 //CStopWatch sw(CStopWatch::eStart);
2620 _ASSERT(!m_MappingCollector.get());
2621
2622 // Prepare id/range information for sorting.
2623 if (m_Selector->GetAnnotType() == CSeq_annot::C_Data::e_Ftable &&
2624 m_Selector->m_LimitObjectType == SAnnotSelector::eLimit_Seq_annot_Info) {
2625 ITERATE(TAnnotSet, it, m_AnnotSet) {
2626 CRef<CIdRangeMap> id_rg_map(new CIdRangeMap(*it, *m_Selector));
2627 it->GetMappingInfo().SetIdRangeMap(*id_rg_map);
2628 }
2629 }
2630
2631 switch ( m_Selector->m_SortOrder ) {
2632 case SAnnotSelector::eSortOrder_Normal:
2633 gfx::timsort(m_AnnotSet.begin(), m_AnnotSet.end(),
2634 CAnnotObject_Less(m_Selector, m_Scope));
2635 break;
2636 case SAnnotSelector::eSortOrder_Reverse:
2637 gfx::timsort(m_AnnotSet.begin(), m_AnnotSet.end(),
2638 CAnnotObject_LessReverse(m_Selector, m_Scope));
2639 break;
2640 default:
2641 // do nothing
2642 break;
2643 }
2644 //LOG_POST(Info<<"Sorted in "<<sw.Elapsed());
2645 }
2646
2647
2648 bool
x_MatchLimitObject(const CAnnotObject_Info & object) const2649 CAnnot_Collector::x_MatchLimitObject(const CAnnotObject_Info& object) const
2650 {
2651 if ( m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None ) {
2652 const CObject* limit = &*m_Selector->m_LimitObject;
2653 switch ( m_Selector->m_LimitObjectType ) {
2654 case SAnnotSelector::eLimit_TSE_Info:
2655 {{
2656 const CTSE_Info* info = &object.GetTSE_Info();
2657 _ASSERT(info);
2658 return info == limit;
2659 }}
2660 case SAnnotSelector::eLimit_Seq_entry_Info:
2661 {{
2662 const CSeq_entry_Info* info = &object.GetSeq_entry_Info();
2663 _ASSERT(info);
2664 for ( ;; ) {
2665 if ( info == limit ) {
2666 return true;
2667 }
2668 if ( !info->HasParent_Info() ) {
2669 return false;
2670 }
2671 info = &info->GetParentSeq_entry_Info();
2672 }
2673 }}
2674 case SAnnotSelector::eLimit_Seq_annot_Info:
2675 {{
2676 const CSeq_annot_Info* info = &object.GetSeq_annot_Info();
2677 _ASSERT(info);
2678 return info == limit;
2679 }}
2680 default:
2681 NCBI_THROW(CAnnotException, eLimitError,
2682 "CAnnot_Collector::x_MatchLimitObject: invalid mode");
2683 }
2684 }
2685 return true;
2686 }
2687
2688
x_MatchLocIndex(const SAnnotObject_Index & index) const2689 bool CAnnot_Collector::x_MatchLocIndex(const SAnnotObject_Index& index) const
2690 {
2691 return index.m_AnnotObject_Info->IsAlign() ||
2692 m_Selector->m_FeatProduct == (index.m_AnnotLocationIndex == 1);
2693 }
2694
2695
x_MatchRange(const CHandleRange & hr,const CRange<TSeqPos> & range,const SAnnotObject_Index & index) const2696 bool CAnnot_Collector::x_MatchRange(const CHandleRange& hr,
2697 const CRange<TSeqPos>& range,
2698 const SAnnotObject_Index& index) const
2699 {
2700 if ( m_Selector->m_OverlapType == SAnnotSelector::eOverlap_Intervals ) {
2701 if ( index.m_HandleRange ) {
2702 if (m_Selector->m_IgnoreStrand) {
2703 if ( !hr.IntersectingWith_NoStrand(*index.m_HandleRange) ) {
2704 return false;
2705 }
2706 }
2707 else {
2708 if ( !hr.IntersectingWith(*index.m_HandleRange) ) {
2709 return false;
2710 }
2711 }
2712 }
2713 else {
2714 ENa_strand strand;
2715 if (m_Selector->m_IgnoreStrand) {
2716 strand = eNa_strand_unknown;
2717 }
2718 else {
2719 switch ( index.m_Flags & SAnnotObject_Index::fStrand_both ) {
2720 case SAnnotObject_Index::fStrand_plus:
2721 strand = eNa_strand_plus;
2722 break;
2723 case SAnnotObject_Index::fStrand_minus:
2724 strand = eNa_strand_minus;
2725 break;
2726 default:
2727 strand = eNa_strand_unknown;
2728 break;
2729 }
2730 }
2731 if ( !hr.IntersectingWith(range, strand) ) {
2732 return false;
2733 }
2734 }
2735 }
2736 else {
2737 if ( !m_Selector->m_IgnoreStrand &&
2738 (hr.GetStrandsFlag() & index.m_Flags) == 0 ) {
2739 return false; // different strands
2740 }
2741 }
2742 if ( !x_MatchLocIndex(index) ) {
2743 return false;
2744 }
2745 return true;
2746 }
2747
2748
x_GetTSE_Info(void)2749 void CAnnot_Collector::x_GetTSE_Info(void)
2750 {
2751 // only one TSE is needed
2752 _ASSERT(m_TSE_LockMap.empty());
2753 _ASSERT(m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None);
2754 _ASSERT(m_Selector->m_LimitObject);
2755
2756 switch ( m_Selector->m_LimitObjectType ) {
2757 case SAnnotSelector::eLimit_TSE_Info:
2758 {
2759 _ASSERT(m_Selector->m_LimitTSE);
2760 _ASSERT(CTypeConverter<CTSE_Info>::
2761 SafeCast(&*m_Selector->m_LimitObject));
2762 break;
2763 }
2764 case SAnnotSelector::eLimit_Seq_entry_Info:
2765 {
2766 _ASSERT(m_Selector->m_LimitTSE);
2767 _ASSERT(CTypeConverter<CSeq_entry_Info>::
2768 SafeCast(&*m_Selector->m_LimitObject));
2769 break;
2770 }
2771 case SAnnotSelector::eLimit_Seq_annot_Info:
2772 {
2773 _ASSERT(m_Selector->m_LimitTSE);
2774 _ASSERT(CTypeConverter<CSeq_annot_Info>::
2775 SafeCast(&*m_Selector->m_LimitObject));
2776 break;
2777 }
2778 default:
2779 NCBI_THROW(CAnnotException, eLimitError,
2780 "CAnnot_Collector::x_GetTSE_Info: invalid mode");
2781 }
2782 _ASSERT(m_Selector->m_LimitObject);
2783 _ASSERT(m_Selector->m_LimitTSE);
2784 x_AddTSE(m_Selector->m_LimitTSE);
2785 }
2786
2787
x_SearchTSE(const CTSE_Handle & tseh,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt,bool check_adaptive)2788 bool CAnnot_Collector::x_SearchTSE(const CTSE_Handle& tseh,
2789 const CSeq_id_Handle& id,
2790 const CHandleRange& hr,
2791 CSeq_loc_Conversion* cvt,
2792 bool check_adaptive)
2793 {
2794 if ( !m_Selector->m_SourceLoc ) {
2795 return x_SearchTSE2(tseh, id, hr, cvt, check_adaptive);
2796 }
2797 const CHandleRangeMap& src_hrm = *m_Selector->m_SourceLoc;
2798 CHandleRangeMap::const_iterator it = src_hrm.find(id);
2799 if ( it == src_hrm.end() || !hr.IntersectingWithTotalRange(it->second) ) {
2800 // non-overlapping loc
2801 return false;
2802 }
2803 CHandleRange hr2(hr, it->second.GetOverlappingRange());
2804 return !hr2.Empty() && x_SearchTSE2(tseh, id, hr2, cvt, check_adaptive);
2805 }
2806
2807
x_SearchTSE2(const CTSE_Handle & tseh,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt,bool check_adaptive)2808 bool CAnnot_Collector::x_SearchTSE2(const CTSE_Handle& tseh,
2809 const CSeq_id_Handle& id,
2810 const CHandleRange& hr,
2811 CSeq_loc_Conversion* cvt,
2812 bool check_adaptive)
2813 {
2814 const CTSE_Info& tse = tseh.x_GetTSE_Info();
2815 bool found = false;
2816
2817 tse.UpdateAnnotIndex(id);
2818 CTSE_Info::TAnnotLockReadGuard guard(tse.GetAnnotLock());
2819
2820 //CStopWatch sw(CStopWatch::eStart);
2821
2822 if (cvt) {
2823 cvt->SetSrcId(id);
2824 }
2825 // Skip excluded TSEs
2826 //if ( ExcludedTSE(tse) ) {
2827 //continue;
2828 //}
2829
2830 SAnnotSelector::TAdaptiveDepthFlags adaptive_flags = 0;
2831 if ( check_adaptive &&
2832 (!m_Selector->GetExactDepth() ||
2833 m_Selector->GetResolveDepth() == kMax_Int) ) {
2834 adaptive_flags = m_Selector->GetAdaptiveDepthFlags();
2835 }
2836 if ( (adaptive_flags & SAnnotSelector::fAdaptive_ByTriggers) &&
2837 m_TriggerTypes.any() &&
2838 tse.ContainsMatchingBioseq(id) ) {
2839 // first check triggers
2840 const SIdAnnotObjs* objs = tse.x_GetUnnamedIdObjects(id);
2841 if ( objs ) {
2842 for ( size_t index = 0, count = objs->x_GetRangeMapCount();
2843 index < count; ++index ) {
2844 if ( objs->x_RangeMapIsEmpty(index) ) {
2845 continue;
2846 }
2847 if ( m_TriggerTypes.test(index) ) {
2848 m_UnseenAnnotTypes.reset();
2849 found = true;
2850 // If we have found adaptive depth trigger features
2851 // it means that sequence is annotated and
2852 // time/segments limits are no longer active.
2853 x_StopSearchLimits();
2854 break;
2855 }
2856 }
2857 }
2858 }
2859 if ( (adaptive_flags & SAnnotSelector::fAdaptive_BySubtypes) &&
2860 m_UnseenAnnotTypes.any() ) {
2861 ITERATE (CTSE_Info::TNamedAnnotObjs, iter, tse.m_NamedAnnotObjs) {
2862 const SIdAnnotObjs* objs =
2863 tse.x_GetIdObjects(iter->second, id);
2864 if ( objs ) {
2865 for ( size_t index = 0, count = objs->x_GetRangeMapCount();
2866 index < count; ++index ) {
2867 if ( !objs->x_RangeMapIsEmpty(index) ) {
2868 m_UnseenAnnotTypes.reset(index);
2869 }
2870 }
2871 }
2872 }
2873 }
2874
2875 if ( m_Selector->HasExplicitAnnotsNames() ) {
2876 // only 'included' annots
2877 ITERATE ( SAnnotSelector::TAnnotsNames, iter, m_Selector->GetIncludedAnnotsNames() ) {
2878 if ( m_Selector->ExcludedAnnotName(*iter) ) {
2879 // it may happen e.g. when another zoom level is selected
2880 continue;
2881 }
2882 const SIdAnnotObjs* objs = tse.x_GetIdObjects(*iter, id);
2883 if ( objs ) {
2884 x_SearchObjects(tseh, objs, guard, *iter, id, hr, cvt);
2885 if ( x_NoMoreObjects() ) {
2886 return found;
2887 }
2888 }
2889 }
2890 }
2891 else {
2892 // all annots, skipping 'excluded'
2893 ITERATE (CTSE_Info::TNamedAnnotObjs, iter, tse.m_NamedAnnotObjs) {
2894 if ( m_Selector->ExcludedAnnotName(iter->first) ) {
2895 continue;
2896 }
2897 const SIdAnnotObjs* objs = tse.x_GetIdObjects(iter->second, id);
2898 if ( objs ) {
2899 x_SearchObjects(tseh, objs, guard, iter->first, id, hr, cvt);
2900 if ( x_NoMoreObjects() ) {
2901 return found;
2902 }
2903 }
2904 }
2905 }
2906
2907 //LOG_POST(Info<<"Collected annots in "<<sw.Elapsed());
2908 return found;
2909 }
2910
2911
x_AddObjectMapping(CAnnotObject_Ref & object_ref,CSeq_loc_Conversion * cvt,unsigned int loc_index)2912 void CAnnot_Collector::x_AddObjectMapping(CAnnotObject_Ref& object_ref,
2913 CSeq_loc_Conversion* cvt,
2914 unsigned int loc_index)
2915 {
2916 if ( cvt ) {
2917 // reset current mapping info, it will be updated by conversion set
2918 object_ref.ResetLocation();
2919 }
2920 if ( !m_MappingCollector.get() ) {
2921 m_MappingCollector.reset(new CAnnotMappingCollector);
2922 }
2923 object_ref.SetFromOtherTSE(m_FromOtherTSE);
2924 CRef<CSeq_loc_Conversion_Set>& mapping_set =
2925 m_MappingCollector->m_AnnotMappingSet[object_ref];
2926 if ( cvt ) {
2927 if ( !mapping_set ) {
2928 mapping_set.Reset(new CSeq_loc_Conversion_Set(m_Scope));
2929 }
2930 _ASSERT(cvt->IsPartial() || object_ref.IsAlign());
2931 CRef<CSeq_loc_Conversion> cvt_copy(new CSeq_loc_Conversion(*cvt));
2932 mapping_set->Add(*cvt_copy, loc_index);
2933 }
2934 }
2935
2936
sx_IsEmpty(const SAnnotSelector & sel)2937 static bool sx_IsEmpty(const SAnnotSelector& sel)
2938 {
2939 if ( sel.GetAnnotType() != CSeq_annot::C_Data::e_not_set ) {
2940 return false;
2941 }
2942 return true;
2943 }
2944
2945
x_SearchObjects(const CTSE_Handle & tseh,const SIdAnnotObjs * objs,CTSE_Info::TAnnotLockReadGuard & guard,const CAnnotName & annot_name,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt)2946 void CAnnot_Collector::x_SearchObjects(const CTSE_Handle& tseh,
2947 const SIdAnnotObjs* objs,
2948 CTSE_Info::TAnnotLockReadGuard& guard,
2949 const CAnnotName& annot_name,
2950 const CSeq_id_Handle& id,
2951 const CHandleRange& hr,
2952 CSeq_loc_Conversion* cvt)
2953 {
2954 if ( m_Selector->m_CollectNames ) {
2955 if ( m_AnnotNames->find(annot_name) != m_AnnotNames->end() ) {
2956 // already found
2957 return;
2958 }
2959 if ( sx_IsEmpty(*m_Selector) ) {
2960 // no search for individual annotations
2961 // just remember the name and leave
2962 m_AnnotNames->insert(annot_name);
2963 return;
2964 }
2965 }
2966
2967 if ( m_CollectAnnotTypes.any() ) {
2968 x_SearchRange(tseh, objs, guard, annot_name, id, hr, cvt);
2969 if ( x_NoMoreObjects() ) {
2970 return;
2971 }
2972 }
2973 if ( m_Selector->m_CollectCostOfLoading ) {
2974 return;
2975 }
2976
2977 static const size_t kAnnotTypeIndex_SNP =
2978 CAnnotType_Index::GetSubtypeIndex(CSeqFeatData::eSubtype_variation);
2979
2980 if ( m_CollectAnnotTypes.test(kAnnotTypeIndex_SNP) ) {
2981 if ( m_Selector->m_CollectTypes &&
2982 m_AnnotTypes.test(kAnnotTypeIndex_SNP) ) {
2983 return;
2984 }
2985 CSeq_annot_Handle sah;
2986 CHandleRange::TRange range = hr.GetOverlappingRange();
2987 ITERATE ( CTSE_Info::TSNPSet, snp_annot_it, objs->m_SNPSet ) {
2988 const CSeq_annot_SNP_Info& snp_annot = **snp_annot_it;
2989 CSeq_annot_SNP_Info::const_iterator snp_it =
2990 snp_annot.FirstIn(range);
2991 if ( snp_it != snp_annot.end() ) {
2992 x_AddTSE(tseh);
2993 const CSeq_annot_Info& annot_info =
2994 snp_annot.GetParentSeq_annot_Info();
2995 if ( !sah || &sah.x_GetInfo() != &annot_info ) {
2996 sah.x_Set(annot_info, tseh);
2997 }
2998
2999 do {
3000 const SSNP_Info& snp = *snp_it;
3001 if ( snp.NoMore(range) ) {
3002 break;
3003 }
3004 if ( snp.NotThis(range) ) {
3005 continue;
3006 }
3007
3008 if (m_Selector->m_CollectTypes) {
3009 m_AnnotTypes.set(kAnnotTypeIndex_SNP);
3010 break;
3011 }
3012 if (m_Selector->m_CollectNames) {
3013 m_AnnotNames->insert(annot_name);
3014 break;
3015 }
3016
3017 CAnnotObject_Ref annot_ref(snp_annot, sah, snp, cvt);
3018 x_AddObject(annot_ref);
3019 if ( x_NoMoreObjects() ) {
3020 return;
3021 }
3022 if ( m_Selector->m_CollectSeq_annots ) {
3023 // Ignore multiple SNPs from the same seq-annot
3024 break;
3025 }
3026 } while ( ++snp_it != snp_annot.end() );
3027 }
3028 }
3029 }
3030 }
3031
3032
3033 static inline
sx_GeneIsSuppressed(const CSeq_feat & feat)3034 bool sx_GeneIsSuppressed(const CSeq_feat& feat)
3035 {
3036 if ( feat.IsSetXref() ) {
3037 const CSeq_feat::TXref& xrefs = feat.GetXref();
3038 if ( xrefs.size() == 1 ) {
3039 const CSeqFeatXref& xref = *xrefs[0];
3040 if ( xref.IsSetData() ) {
3041 const CSeqFeatData& data = xref.GetData();
3042 if ( data.IsGene() ) {
3043 const CGene_ref& gene = data.GetGene();
3044 if ( !gene.IsSetLocus() && !gene.IsSetLocus_tag() ) {
3045 // feature has single empty gene xref
3046 return true;
3047 }
3048 }
3049 }
3050 }
3051 }
3052 return false;
3053 }
3054
3055
x_SearchRange(const CTSE_Handle & tseh,const SIdAnnotObjs * objs,CTSE_Info::TAnnotLockReadGuard & guard,const CAnnotName & annot_name,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt)3056 void CAnnot_Collector::x_SearchRange(const CTSE_Handle& tseh,
3057 const SIdAnnotObjs* objs,
3058 CTSE_Info::TAnnotLockReadGuard& guard,
3059 const CAnnotName& annot_name,
3060 const CSeq_id_Handle& id,
3061 const CHandleRange& hr,
3062 CSeq_loc_Conversion* cvt)
3063 {
3064 const CTSE_Info& tse = tseh.x_GetTSE_Info();
3065 _ASSERT(objs);
3066
3067 // CHandleRange::TRange range = hr.GetOverlappingRange();
3068
3069 x_AddTSE(tseh);
3070 CSeq_annot_Handle sah;
3071
3072 size_t from_idx = 0;
3073 bool enough = false;
3074
3075 typedef vector<const CTSE_Chunk_Info*> TStubs;
3076 typedef map<const CTSE_Split_Info*, CTSE_Split_Info::TChunkIds> TStubMap;
3077 TStubs stubs;
3078 bool restart = false;
3079 do {
3080 if ( restart ) {
3081 _ASSERT(!enough);
3082
3083 TStubMap stubmap;
3084 ITERATE ( TStubs, it, stubs ) {
3085 const CTSE_Chunk_Info& chunk = **it;
3086 stubmap[&chunk.GetSplitInfo()].
3087 push_back(chunk.GetChunkId());
3088 }
3089 stubs.clear();
3090 restart = false;
3091
3092 // Release lock for tse update:
3093 guard.Release();
3094 for ( auto& it : stubmap) {
3095 if ( m_Selector->GetMaxSize() < numeric_limits<TMaxSize>::max() ) {
3096 it.first->LoadChunk(it.second.front());
3097 break;
3098 }
3099 gfx::timsort(it.second.begin(), it.second.end());
3100 it.second.erase(unique(it.second.begin(), it.second.end()), it.second.end());
3101 it.first->LoadChunks(it.second);
3102 }
3103 tse.UpdateAnnotIndex(id);
3104
3105 // Acquire the lock again:
3106 guard.Guard(tse.GetAnnotLock());
3107
3108 // Reget range map pointer as it may change:
3109 objs = tse.x_GetIdObjects(annot_name, id);
3110 _ASSERT(objs);
3111 }
3112 for ( size_t index = from_idx, count = objs->x_GetRangeMapCount();
3113 index < count; ++index ) {
3114 if ( m_Selector->m_CollectTypes && m_AnnotTypes.test(index)) {
3115 continue;
3116 }
3117 if ( !m_CollectAnnotTypes.test(index) ) {
3118 continue;
3119 }
3120
3121 if ( objs->x_RangeMapIsEmpty(index) ) {
3122 continue;
3123 }
3124 const CTSE_Info::TRangeMap& rmap = objs->x_GetRangeMap(index);
3125
3126 size_t start_size = m_AnnotSet.size(); // for rollback
3127
3128 // Same annotations may appear more than once if circular.
3129 // In this case duplicated annotation entries need to be removed.
3130 bool need_unique = false;
3131
3132 ITERATE(CHandleRange, rg_it, hr) {
3133 CHandleRange::TRange range = rg_it->first;
3134
3135 for ( CTSE_Info::TRangeMap::const_iterator
3136 aoit(rmap.begin(range));
3137 aoit; ++aoit ) {
3138 const CAnnotObject_Info& annot_info =
3139 *aoit->second.m_AnnotObject_Info;
3140
3141 // special filtering
3142 if ( m_Selector->GetExcludeIfGeneIsSuppressed() &&
3143 annot_info.IsFeat() ) {
3144 if ( annot_info.IsRegular() ) {
3145 if ( sx_GeneIsSuppressed(annot_info.GetFeat()) ) {
3146 continue;
3147 }
3148 }
3149 }
3150
3151 // Collect types
3152 if (m_Selector->m_CollectTypes) {
3153 if (x_MatchLimitObject(annot_info) &&
3154 x_MatchRange(hr, aoit->first, aoit->second) ) {
3155 m_AnnotTypes.set(index);
3156 break;
3157 }
3158 }
3159 if (m_Selector->m_CollectNames) {
3160 if (x_MatchLimitObject(annot_info) &&
3161 x_MatchRange(hr, aoit->first, aoit->second) ) {
3162 m_AnnotNames->insert(annot_name);
3163 return;
3164 }
3165 }
3166
3167 if ( annot_info.IsChunkStub() ) {
3168 const CTSE_Chunk_Info& chunk = annot_info.GetChunk_Info();
3169 if ( !chunk.NotLoaded() && !tse.x_DirtyAnnotIndex() ) {
3170 // Skip chunk stub
3171 continue;
3172 }
3173 if ( chunk.NotLoaded() &&
3174 m_Selector->m_CollectCostOfLoading &&
3175 chunk.GetChunkId() != CTSE_Chunk_Info::kDelayedMain_ChunkId ) {
3176 // accumulate cost of chunks to be loaded
3177 auto cost = chunk.GetLoadCost();
3178 m_LoadBytes += cost.first;
3179 m_LoadSeconds += cost.second;
3180 continue;
3181 }
3182 if ( !restart ) {
3183 restart = true;
3184 // New annot objects are to be loaded,
3185 // so we'll need to restart scan of current range.
3186 // Forget already found objects
3187 // as they will be found again:
3188 m_AnnotSet.resize(start_size);
3189 // Update start index for the new search
3190 from_idx = index;
3191 }
3192 if ( chunk.NotLoaded() ) {
3193 stubs.push_back(&chunk);
3194 }
3195 }
3196 if ( restart ) {
3197 _ASSERT(!enough);
3198 continue;
3199 }
3200 if ( m_Selector->m_CollectCostOfLoading ) {
3201 continue;
3202 }
3203
3204 if ( annot_info.IsLocs() ) {
3205 const CSeq_loc& ref_loc = annot_info.GetLocs();
3206
3207 // Check if the stub has been already processed
3208 if ( m_AnnotLocsSet.get() ) {
3209 CConstRef<CSeq_loc> ploc(&ref_loc);
3210 TAnnotLocsSet::const_iterator found =
3211 m_AnnotLocsSet->find(ploc);
3212 if (found != m_AnnotLocsSet->end()) {
3213 continue;
3214 }
3215 }
3216 else {
3217 m_AnnotLocsSet.reset(new TAnnotLocsSet);
3218 }
3219 m_AnnotLocsSet->insert(ConstRef(&ref_loc));
3220
3221 // Search annotations on the referenced location
3222 if ( !ref_loc.IsInt() ) {
3223 ERR_POST_X(1, "CAnnot_Collector: "
3224 "Seq-annot.locs is not Seq-interval");
3225 continue;
3226 }
3227 const CSeq_interval& ref_int = ref_loc.GetInt();
3228 const CSeq_id& ref_id = ref_int.GetId();
3229 CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(ref_id);
3230 // check ResolveTSE limit
3231 if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
3232 if ( !tseh.GetBioseqHandle(ref_idh) ) {
3233 continue;
3234 }
3235 }
3236
3237 // calculate ranges
3238 TSeqPos ref_from = ref_int.GetFrom();
3239 TSeqPos ref_to = ref_int.GetTo();
3240 bool ref_minus = ref_int.IsSetStrand()?
3241 IsReverse(ref_int.GetStrand()) : false;
3242 TSeqPos loc_from = aoit->first.GetFrom();
3243 TSeqPos loc_to = aoit->first.GetTo();
3244 TSeqPos loc_view_from = max(range.GetFrom(), loc_from);
3245 TSeqPos loc_view_to = min(range.GetTo(), loc_to);
3246
3247 CHandleRangeMap ref_rmap;
3248 CHandleRange::TRange ref_search_range;
3249 if ( !ref_minus ) {
3250 ref_search_range.Set(ref_from + (loc_view_from - loc_from),
3251 ref_to + (loc_view_to - loc_to));
3252 }
3253 else {
3254 ref_search_range.Set(ref_from - (loc_view_to - loc_to),
3255 ref_to - (loc_view_from - loc_from));
3256 }
3257 ref_rmap.AddRanges(ref_idh).AddRange(ref_search_range,
3258 eNa_strand_unknown);
3259
3260 if (m_Selector->m_NoMapping) {
3261 x_SearchLoc(ref_rmap, 0, &tseh);
3262 }
3263 else {
3264 CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
3265 master_loc_empty->SetEmpty(
3266 const_cast<CSeq_id&>(*id.GetSeqId()));
3267 CRef<CSeq_loc_Conversion> locs_cvt(new CSeq_loc_Conversion(
3268 *master_loc_empty,
3269 id,
3270 aoit->first,
3271 ref_idh,
3272 ref_from,
3273 ref_minus,
3274 m_Scope));
3275 if ( cvt ) {
3276 locs_cvt->CombineWith(*cvt);
3277 }
3278 x_SearchLoc(ref_rmap, &*locs_cvt, &tseh);
3279 }
3280 if ( x_NoMoreObjects() ) {
3281 _ASSERT(!restart);
3282 enough = true;
3283 break;
3284 }
3285 continue;
3286 }
3287
3288 _ASSERT(m_Selector->MatchType(annot_info));
3289
3290 if ( !x_MatchLimitObject(annot_info) ) {
3291 continue;
3292 }
3293
3294 if ( !x_MatchRange(hr, aoit->first, aoit->second) ) {
3295 continue;
3296 }
3297
3298 if ( annot_info.GetAnnotIndex() == CSeq_annot_Info::kWholeAnnotIndex ) {
3299 const CSeq_annot_Info& seq_annot = annot_info.GetSeq_annot_Info();
3300 if ( seq_annot.IsSortedTable() ) {
3301 sah.x_Set(seq_annot, tseh);
3302 CHandleRange::TRange hrange = hr.GetOverlappingRange();
3303 for ( CSeq_annot_SortedIter iter =
3304 seq_annot.StartSortedIterator(hrange);
3305 iter; ++iter ) {
3306
3307 if (m_Selector->HasBitFilter() &&
3308 !seq_annot.MatchBitFilter(*m_Selector,
3309 iter) ) {
3310 continue;
3311 }
3312
3313 if (m_Selector->m_CollectTypes) {
3314 m_AnnotTypes.set(index);
3315 break;
3316 }
3317
3318 if (m_Selector->m_CollectNames) {
3319 m_AnnotNames->insert(annot_name);
3320 break;
3321 }
3322
3323 CAnnotObject_Ref annot_ref(sah, iter, cvt);
3324 x_AddObject(annot_ref);
3325 if ( x_NoMoreObjects() ) {
3326 _ASSERT(!restart);
3327 enough = true;
3328 break;
3329 }
3330
3331 if ( m_Selector->m_CollectSeq_annots ) {
3332 // Ignore multiple feats from the same seq-annot
3333 break;
3334 }
3335 }
3336 }
3337 if ( enough ) {
3338 _ASSERT(!restart);
3339 break;
3340 }
3341 continue;
3342 }
3343
3344 bool is_circular = aoit->second.m_HandleRange &&
3345 aoit->second.m_HandleRange->GetData().IsCircular();
3346 need_unique |= is_circular;
3347 const CSeq_annot_Info& sa_info =
3348 annot_info.GetSeq_annot_Info();
3349 if ( !sah || &sah.x_GetInfo() != &sa_info ){
3350 sah.x_Set(sa_info, tseh);
3351 }
3352
3353 CAnnotObject_Ref annot_ref(annot_info, sah);
3354 if ( !cvt && aoit->second.GetMultiIdFlag() ) {
3355 // Create self-conversion, add to conversion set
3356 CHandleRange::TRange ref_rg = aoit->first;
3357 if (is_circular ) {
3358 TSeqPos from = aoit->second.m_HandleRange->
3359 GetData().GetLeft();
3360 TSeqPos to =aoit->second.m_HandleRange->
3361 GetData().GetRight();
3362 ref_rg = CHandleRange::TRange(from, to);
3363 }
3364 annot_ref.GetMappingInfo().SetAnnotObjectRange(ref_rg,
3365 m_Selector->m_FeatProduct);
3366 x_AddObjectMapping(annot_ref, 0,
3367 aoit->second.m_AnnotLocationIndex);
3368 }
3369 else {
3370 if (cvt && !annot_ref.IsAlign() ) {
3371 cvt->Convert(annot_ref,
3372 m_Selector->m_FeatProduct ?
3373 CSeq_loc_Conversion::eProduct :
3374 CSeq_loc_Conversion::eLocation,
3375 id,
3376 aoit->first,
3377 aoit->second);
3378 }
3379 else {
3380 CHandleRange::TRange ref_rg = aoit->first;
3381 if ( is_circular ) {
3382 TSeqPos from = aoit->second.m_HandleRange->
3383 GetData().GetLeft();
3384 TSeqPos to = aoit->second.m_HandleRange->
3385 GetData().GetRight();
3386 ref_rg = CHandleRange::TRange(from, to);
3387 }
3388 annot_ref.GetMappingInfo().SetAnnotObjectRange(ref_rg,
3389 m_Selector->m_FeatProduct);
3390 }
3391 x_AddObject(annot_ref, cvt,
3392 aoit->second.m_AnnotLocationIndex);
3393 }
3394 if ( x_NoMoreObjects() ) {
3395 _ASSERT(!restart);
3396 enough = true;
3397 break;
3398 }
3399 }
3400 if ( enough ) {
3401 _ASSERT(!restart);
3402 break;
3403 }
3404 if ( restart ) {
3405 _ASSERT(!enough);
3406 continue;
3407 }
3408 }
3409 if ( restart ) {
3410 _ASSERT(!enough);
3411 continue;
3412 }
3413 if ( need_unique || hr.end() - hr.begin() > 1 ) {
3414 TAnnotSet::iterator first_added = m_AnnotSet.begin() + start_size;
3415 stable_sort(first_added, m_AnnotSet.end());
3416 m_AnnotSet.erase(unique(first_added, m_AnnotSet.end()),
3417 m_AnnotSet.end());
3418 }
3419 if ( enough ) {
3420 _ASSERT(!restart);
3421 break;
3422 }
3423 }
3424 if ( enough ) {
3425 _ASSERT(!restart);
3426 break;
3427 }
3428 } while ( restart );
3429 }
3430
3431
x_SearchLoc(const CHandleRangeMap & loc,CSeq_loc_Conversion * cvt,const CTSE_Handle * using_tse,bool top_level)3432 bool CAnnot_Collector::x_SearchLoc(const CHandleRangeMap& loc,
3433 CSeq_loc_Conversion* cvt,
3434 const CTSE_Handle* using_tse,
3435 bool top_level)
3436 {
3437 bool found = false;
3438 ITERATE ( CHandleRangeMap, idit, loc ) {
3439 if ( idit->second.Empty() ) {
3440 continue;
3441 }
3442 if ( m_Selector->m_LimitObjectType == SAnnotSelector::eLimit_None ) {
3443 // any data source
3444 const CTSE_Handle* tse = 0;
3445 CBioseq_Handle bh = x_GetBioseqHandle(idit->first, top_level);
3446 if ( !bh ) {
3447 if ( m_Selector->m_UnresolvedFlag ==
3448 SAnnotSelector::eFailUnresolved ) {
3449 NCBI_THROW(CAnnotException, eFindFailed,
3450 "Cannot find id synonyms");
3451 }
3452 if ( m_Selector->m_UnresolvedFlag ==
3453 SAnnotSelector::eIgnoreUnresolved ) {
3454 continue; // skip unresolvable IDs
3455 }
3456 tse = using_tse;
3457 }
3458 else {
3459 tse = &bh.GetTSE_Handle();
3460 if ( using_tse ) {
3461 using_tse->AddUsedTSE(*tse);
3462 }
3463 }
3464 bool check_adaptive = x_CheckAdaptive(bh);
3465 if ( m_Selector->m_ExcludeExternal ) {
3466 if ( !bh ) {
3467 // no sequence tse
3468 continue;
3469 }
3470 _ASSERT(tse);
3471 m_FromOtherTSE = false;
3472 const CTSE_Info& tse_info = tse->x_GetTSE_Info();
3473 tse_info.UpdateAnnotIndex();
3474 if ( tse_info.HasMatchingAnnotIds() ) {
3475 CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(bh);
3476 ITERATE(CSynonymsSet, syn_it, *syns) {
3477 found |= x_SearchTSE(*tse,
3478 syns->GetSeq_id_Handle(syn_it),
3479 idit->second, cvt, check_adaptive);
3480 if ( x_NoMoreObjects() ) {
3481 break;
3482 }
3483 }
3484 }
3485 else {
3486 const CBioseq_Handle::TId& syns = bh.GetId();
3487 bool only_gi = tse_info.OnlyGiAnnotIds();
3488 ITERATE ( CBioseq_Handle::TId, syn_it, syns ) {
3489 if ( !only_gi || syn_it->IsGi() ) {
3490 found |= x_SearchTSE(*tse, *syn_it,
3491 idit->second, cvt, check_adaptive);
3492 if ( x_NoMoreObjects() ) {
3493 break;
3494 }
3495 }
3496 }
3497 }
3498 }
3499 else {
3500 CScope_Impl::TTSE_LockMatchSet tse_map;
3501 if ( m_Selector->IsIncludedAnyNamedAnnotAccession() ) {
3502 m_Scope->GetTSESetWithAnnots(idit->first, tse_map,
3503 *m_Selector);
3504 }
3505 else {
3506 m_Scope->GetTSESetWithAnnots(idit->first, tse_map);
3507 }
3508 ITERATE ( CScope_Impl::TTSE_LockMatchSet, tse_it, tse_map ) {
3509 if ( tse ) {
3510 tse->AddUsedTSE(tse_it->first);
3511 }
3512 m_FromOtherTSE = !bh || tse_it->first != bh.GetTSE_Handle();
3513 found |= x_SearchTSE(tse_it->first, tse_it->second,
3514 idit->second, cvt, check_adaptive);
3515 if ( x_NoMoreObjects() ) {
3516 break;
3517 }
3518 }
3519 }
3520 }
3521 else if ( m_Selector->m_UnresolvedFlag == SAnnotSelector::eSearchUnresolved &&
3522 m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE &&
3523 m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None &&
3524 m_Selector->m_LimitObject ) {
3525 // external annotations only
3526 m_FromOtherTSE = true;
3527 bool check_adaptive = x_CheckAdaptive(idit->first);
3528 ITERATE ( TTSE_LockMap, tse_it, m_TSE_LockMap ) {
3529 const CTSE_Info& tse_info = *tse_it->first;
3530 tse_info.UpdateAnnotIndex();
3531 found |= x_SearchTSE(tse_it->second, idit->first,
3532 idit->second, cvt, check_adaptive);
3533 }
3534 }
3535 else {
3536 // Search in the limit objects
3537 bool check_adaptive = x_CheckAdaptive(idit->first);
3538 CConstRef<CSynonymsSet> syns;
3539 bool syns_initialized = false;
3540 ITERATE ( TTSE_LockMap, tse_it, m_TSE_LockMap ) {
3541 const CTSE_Info& tse_info = *tse_it->first;
3542 tse_info.UpdateAnnotIndex();
3543 if ( tse_info.HasMatchingAnnotIds() ) {
3544 if ( !syns_initialized ) {
3545 syns = m_Scope->GetSynonyms(idit->first,
3546 sx_GetFlag(GetSelector()));
3547 syns_initialized = true;
3548 }
3549 if ( !syns ) {
3550 found |= x_SearchTSE(tse_it->second, idit->first,
3551 idit->second, cvt, check_adaptive);
3552 }
3553 else {
3554 ITERATE(CSynonymsSet, syn_it, *syns) {
3555 found |= x_SearchTSE(tse_it->second,
3556 syns->GetSeq_id_Handle(syn_it),
3557 idit->second, cvt, check_adaptive);
3558 if ( x_NoMoreObjects() ) {
3559 break;
3560 }
3561 }
3562 }
3563 }
3564 else {
3565 const CBioseq_Handle::TId& ids = m_Scope->GetIds(idit->first);
3566 bool only_gi = tse_info.OnlyGiAnnotIds();
3567 ITERATE ( CBioseq_Handle::TId, syn_it, ids ) {
3568 if ( !only_gi || syn_it->IsGi() ) {
3569 found |= x_SearchTSE(tse_it->second, *syn_it,
3570 idit->second, cvt, check_adaptive);
3571 if ( x_NoMoreObjects() ) {
3572 break;
3573 }
3574 }
3575 }
3576 }
3577 if ( x_NoMoreObjects() ) {
3578 break;
3579 }
3580 }
3581 }
3582 if ( x_NoMoreObjects() ) {
3583 break;
3584 }
3585 }
3586 return found;
3587 }
3588
3589
x_SearchAll(void)3590 void CAnnot_Collector::x_SearchAll(void)
3591 {
3592 _ASSERT(m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None);
3593 _ASSERT(m_Selector->m_LimitObject);
3594 if ( m_TSE_LockMap.empty() ) {
3595 // data source name not matched
3596 return;
3597 }
3598 switch ( m_Selector->m_LimitObjectType ) {
3599 case SAnnotSelector::eLimit_TSE_Info:
3600 x_SearchAll(*CTypeConverter<CTSE_Info>::
3601 SafeCast(&*m_Selector->m_LimitObject));
3602 break;
3603 case SAnnotSelector::eLimit_Seq_entry_Info:
3604 x_SearchAll(*CTypeConverter<CSeq_entry_Info>::
3605 SafeCast(&*m_Selector->m_LimitObject));
3606 break;
3607 case SAnnotSelector::eLimit_Seq_annot_Info:
3608 x_SearchAll(*CTypeConverter<CSeq_annot_Info>::
3609 SafeCast(&*m_Selector->m_LimitObject));
3610 break;
3611 default:
3612 NCBI_THROW(CAnnotException, eLimitError,
3613 "CAnnot_Collector::x_SearchAll: invalid mode");
3614 }
3615 }
3616
3617
x_SearchAll(const CSeq_entry_Info & entry_info)3618 void CAnnot_Collector::x_SearchAll(const CSeq_entry_Info& entry_info)
3619 {
3620 {{
3621 entry_info.UpdateAnnotIndex();
3622 const CBioseq_Base_Info& base = entry_info.x_GetBaseInfo();
3623 // Collect all annotations from the entry
3624 ITERATE( CBioseq_Base_Info::TAnnot, ait, base.GetAnnot() ) {
3625 x_SearchAll(**ait);
3626 if ( x_NoMoreObjects() )
3627 return;
3628 }
3629 }}
3630
3631 if ( entry_info.IsSet() ) {
3632 CConstRef<CBioseq_set_Info> set(&entry_info.GetSet());
3633 // Collect annotations from all children
3634 ITERATE( CBioseq_set_Info::TSeq_set, cit, set->GetSeq_set() ) {
3635 x_SearchAll(**cit);
3636 if ( x_NoMoreObjects() )
3637 return;
3638 }
3639 }
3640 }
3641
3642
x_SearchAll(const CSeq_annot_Info & annot_info)3643 void CAnnot_Collector::x_SearchAll(const CSeq_annot_Info& annot_info)
3644 {
3645 if ( m_Selector->ExcludedAnnotName(annot_info.GetName()) ) {
3646 return;
3647 }
3648
3649 _ASSERT(m_Selector->m_LimitTSE);
3650 annot_info.UpdateAnnotIndex();
3651 CSeq_annot_Handle sah(annot_info, m_Selector->m_LimitTSE);
3652 // Collect all annotations from the annot
3653 ITERATE ( CSeq_annot_Info::TAnnotObjectInfos, aoit,
3654 annot_info.GetAnnotObjectInfos() ) {
3655 const CAnnotObject_Info& annot_info = *aoit;
3656 if ( annot_info.IsRemoved() ) {
3657 continue;
3658 }
3659 if ( !m_Selector->MatchType(annot_info) ) {
3660 continue;
3661 }
3662
3663 if ( annot_info.GetAnnotIndex() == CSeq_annot_Info::kWholeAnnotIndex ) {
3664 const CSeq_annot_Info& seq_annot = annot_info.GetSeq_annot_Info();
3665 if ( seq_annot.IsSortedTable() ) {
3666 // sorted Seq-table has only one CAnnotObject_Info
3667 // but we need to add all individual features
3668 auto whole = CRange<TSeqPos>::GetWhole();
3669 for ( CSeq_annot_SortedIter it = seq_annot.StartSortedIterator(whole); it; ++it ) {
3670 CAnnotObject_Ref annot_ref(sah, it, 0);
3671 x_AddObject(annot_ref);
3672 if ( m_Selector->m_CollectSeq_annots || x_NoMoreObjects() ) {
3673 return;
3674 }
3675 }
3676 }
3677 continue;
3678 }
3679
3680 CAnnotObject_Ref annot_ref(annot_info, sah);
3681 x_AddObject(annot_ref);
3682 if ( m_Selector->m_CollectSeq_annots || x_NoMoreObjects() ) {
3683 return;
3684 }
3685 }
3686
3687 static const size_t kAnnotTypeIndex_SNP =
3688 CAnnotType_Index::GetSubtypeIndex(CSeqFeatData::eSubtype_variation);
3689
3690 if ( m_CollectAnnotTypes.test(kAnnotTypeIndex_SNP) &&
3691 annot_info.x_HasSNP_annot_Info() ) {
3692 const CSeq_annot_SNP_Info& snp_annot =
3693 annot_info.x_GetSNP_annot_Info();
3694 TSeqPos index = 0;
3695 ITERATE ( CSeq_annot_SNP_Info, snp_it, snp_annot ) {
3696 const SSNP_Info& snp = *snp_it;
3697 CAnnotObject_Ref annot_ref(snp_annot, sah, snp, 0);
3698 x_AddObject(annot_ref);
3699 if ( m_Selector->m_CollectSeq_annots || x_NoMoreObjects() ) {
3700 return;
3701 }
3702 ++index;
3703 }
3704 }
3705 }
3706
3707
x_CollectMapped(const CSeqMap_CI & seg,CSeq_loc & master_loc_empty,const CSeq_id_Handle & master_id,const CHandleRange & master_hr,CSeq_loc_Conversion_Set & cvt_set)3708 void CAnnot_Collector::x_CollectMapped(const CSeqMap_CI& seg,
3709 CSeq_loc& master_loc_empty,
3710 const CSeq_id_Handle& master_id,
3711 const CHandleRange& master_hr,
3712 CSeq_loc_Conversion_Set& cvt_set)
3713 {
3714 CHandleRange::TOpenRange master_seg_range(
3715 seg.GetPosition(),
3716 seg.GetEndPosition());
3717 CHandleRange::TOpenRange ref_seg_range(seg.GetRefPosition(),
3718 seg.GetRefEndPosition());
3719 bool reversed = seg.GetRefMinusStrand();
3720 TSignedSeqPos shift;
3721 if ( !reversed ) {
3722 shift = ref_seg_range.GetFrom() - master_seg_range.GetFrom();
3723 }
3724 else {
3725 shift = ref_seg_range.GetTo() + master_seg_range.GetFrom();
3726 }
3727 CSeq_id_Handle ref_id = seg.GetRefSeqid();
3728 CHandleRangeMap ref_loc;
3729 {{ // translate master_loc to ref_loc
3730 CHandleRange& hr = ref_loc.AddRanges(ref_id);
3731 ITERATE ( CHandleRange, mlit, master_hr ) {
3732 CHandleRange::TOpenRange range = master_seg_range & mlit->first;
3733 if ( !range.Empty() ) {
3734 ENa_strand strand = mlit->second;
3735 if ( !reversed ) {
3736 range.SetOpen(range.GetFrom() + shift,
3737 range.GetToOpen() + shift);
3738 }
3739 else {
3740 if ( strand != eNa_strand_unknown ) {
3741 strand = Reverse(strand);
3742 }
3743 range.Set(shift - range.GetTo(), shift - range.GetFrom());
3744 }
3745 hr.AddRange(range, strand);
3746 }
3747 }
3748 if ( hr.Empty() )
3749 return;
3750 }}
3751
3752 CRef<CSeq_loc_Conversion> cvt(new CSeq_loc_Conversion(master_loc_empty,
3753 master_id,
3754 seg,
3755 ref_id,
3756 m_Scope));
3757 cvt_set.Add(*cvt, cvt_set.kAllIndexes);
3758 }
3759
3760
x_SearchMapped(const CSeqMap_CI & seg,CSeq_loc & master_loc_empty,const CSeq_id_Handle & master_id,const CHandleRange & master_hr)3761 bool CAnnot_Collector::x_SearchMapped(const CSeqMap_CI& seg,
3762 CSeq_loc& master_loc_empty,
3763 const CSeq_id_Handle& master_id,
3764 const CHandleRange& master_hr)
3765 {
3766 if ( seg.FeaturePolicyWasApplied() ) {
3767 // If we have found explict feature policy object
3768 // it means that time/segments limits are no longer active.
3769 x_StopSearchLimits();
3770 }
3771 if ( !m_AnnotSet.empty() || m_MappingCollector.get() ) {
3772 // If we have found matching annotations it means the sequence
3773 // is annotated and time/segments limits are no longer active.
3774 x_StopSearchLimits();
3775 }
3776 if ( m_SearchTime.IsRunning() &&
3777 m_SearchTime.Elapsed() > m_Selector->GetMaxSearchTime() ) {
3778 NCBI_THROW(CAnnotSearchLimitException, eTimeLimitExceded,
3779 "CAnnot_Collector: "
3780 "search time limit exceeded, no annotations found");
3781 }
3782 if ( m_SearchSegments != numeric_limits<TMaxSearchSegments>::max() &&
3783 (x_MaxSearchSegmentsLimitIsReached() || --m_SearchSegments == 0) ) {
3784 if ( m_SearchSegmentsAction == SAnnotSelector::eMaxSearchSegmentsThrow ) {
3785 NCBI_THROW(CAnnotSearchLimitException, eSegmentsLimitExceded,
3786 "CAnnot_Collector: "
3787 "search segments limit exceeded, no annotations found");
3788 }
3789 if ( m_SearchSegmentsAction == SAnnotSelector::eMaxSearchSegmentsLog ) {
3790 ERR_POST_X(2, Warning << "CAnnot_Collector: "
3791 "search segments limit exceeded, no annotations found");
3792 }
3793 // stop searching
3794 return false;
3795 }
3796 CHandleRange::TOpenRange master_seg_range(
3797 seg.GetPosition(),
3798 seg.GetEndPosition());
3799 CHandleRange::TOpenRange ref_seg_range(seg.GetRefPosition(),
3800 seg.GetRefEndPosition());
3801 bool reversed = seg.GetRefMinusStrand();
3802 TSignedSeqPos shift;
3803 if ( !reversed ) {
3804 shift = ref_seg_range.GetFrom() - master_seg_range.GetFrom();
3805 }
3806 else {
3807 shift = ref_seg_range.GetTo() + master_seg_range.GetFrom();
3808 }
3809 CSeq_id_Handle ref_id = seg.GetRefSeqid();
3810 CHandleRangeMap ref_loc;
3811 {{ // translate master_loc to ref_loc
3812 CHandleRange& hr = ref_loc.AddRanges(ref_id);
3813 ITERATE ( CHandleRange, mlit, master_hr ) {
3814 CHandleRange::TOpenRange range = master_seg_range & mlit->first;
3815 if ( !range.Empty() ) {
3816 ENa_strand strand = mlit->second;
3817 if ( !reversed ) {
3818 range.SetOpen(range.GetFrom() + shift,
3819 range.GetToOpen() + shift);
3820 }
3821 else {
3822 if ( strand != eNa_strand_unknown ) {
3823 strand = Reverse(strand);
3824 }
3825 range.Set(shift - range.GetTo(), shift - range.GetFrom());
3826 }
3827 hr.AddRange(range, strand);
3828 }
3829 }
3830 if ( hr.Empty() )
3831 return false;
3832 }}
3833
3834 if (m_Selector->m_NoMapping) {
3835 return x_SearchLoc(ref_loc, 0, &seg.GetUsingTSE());
3836 }
3837 else {
3838 CRef<CSeq_loc_Conversion> cvt(new CSeq_loc_Conversion(master_loc_empty,
3839 master_id,
3840 seg,
3841 ref_id,
3842 m_Scope));
3843 return x_SearchLoc(ref_loc, &*cvt, &seg.GetUsingTSE());
3844 }
3845 }
3846
3847
3848 const CAnnot_Collector::TAnnotTypes&
x_GetAnnotTypes(void) const3849 CAnnot_Collector::x_GetAnnotTypes(void) const
3850 {
3851 if (m_AnnotTypes2.empty() && m_AnnotTypes.any()) {
3852 for (size_t i = 0; i < m_AnnotTypes.size(); ++i) {
3853 if ( m_AnnotTypes.test(i) ) {
3854 m_AnnotTypes2.push_back(CAnnotType_Index::GetTypeSelector(i));
3855 }
3856 }
3857 }
3858 return m_AnnotTypes2;
3859 }
3860
3861
3862 const CAnnot_Collector::TAnnotNames&
x_GetAnnotNames(void) const3863 CAnnot_Collector::x_GetAnnotNames(void) const
3864 {
3865 if ( !m_AnnotNames.get() ) {
3866 TAnnotNames* names = new TAnnotNames;
3867 m_AnnotNames.reset(names);
3868 ITERATE ( TAnnotSet, it, m_AnnotSet ) {
3869 names->insert(it->GetSeq_annot_Info().GetName());
3870 }
3871 }
3872 return *m_AnnotNames;
3873 }
3874
3875
x_GetCostOfLoadingInBytes(void) const3876 Uint8 CAnnot_Collector::x_GetCostOfLoadingInBytes(void) const
3877 {
3878 return m_LoadBytes;
3879 }
3880
3881
x_GetCostOfLoadingInSeconds(void) const3882 double CAnnot_Collector::x_GetCostOfLoadingInSeconds(void) const
3883 {
3884 return m_LoadSeconds;
3885 }
3886
3887
3888 END_SCOPE(objects)
3889 END_NCBI_SCOPE
3890