1 /*  $Id: annot_collector.cpp 608523 2020-05-18 16:26:33Z vasilche $
2 * ===========================================================================
3 *
4 *                            PUBLIC DOMAIN NOTICE
5 *               National Center for Biotechnology Information
6 *
7 *  This software/database is a "United States Government Work" under the
8 *  terms of the United States Copyright Act.  It was written as part of
9 *  the author's official duties as a United States Government employee and
10 *  thus cannot be copyrighted.  This software/database is freely available
11 *  to the public for use. The National Library of Medicine and the U.S.
12 *  Government have not placed any restriction on its use or reproduction.
13 *
14 *  Although all reasonable efforts have been taken to ensure the accuracy
15 *  and reliability of the software and data, the NLM and the U.S.
16 *  Government do not and cannot warrant the performance or results that
17 *  may be obtained by using this software or data. The NLM and the U.S.
18 *  Government disclaim all warranties, express or implied, including
19 *  warranties of performance, merchantability or fitness for any particular
20 *  purpose.
21 *
22 *  Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 *   Annotation collector for annot iterators
30 *
31 */
32 
33 #include <ncbi_pch.hpp>
34 #include <objmgr/impl/annot_collector.hpp>
35 
36 #include <objmgr/scope.hpp>
37 #include <objmgr/bioseq_handle.hpp>
38 #include <objmgr/seq_entry_handle.hpp>
39 #include <objmgr/seq_annot_handle.hpp>
40 #include <objmgr/seq_feat_handle.hpp>
41 #include <objmgr/seq_map_ci.hpp>
42 #include <objmgr/impl/annot_object.hpp>
43 #include <objmgr/impl/tse_info.hpp>
44 #include <objmgr/impl/annot_type_index.hpp>
45 #include <objmgr/impl/tse_chunk_info.hpp>
46 #include <objmgr/impl/data_source.hpp>
47 #include <objmgr/impl/seq_annot_info.hpp>
48 #include <objmgr/impl/bioseq_set_info.hpp>
49 #include <objmgr/impl/handle_range_map.hpp>
50 #include <objmgr/impl/synonyms.hpp>
51 #include <objmgr/impl/seq_loc_cvt.hpp>
52 #include <objmgr/impl/seq_align_mapper.hpp>
53 #include <objmgr/impl/snp_annot_info.hpp>
54 #include <objmgr/impl/seq_table_info.hpp>
55 #include <objmgr/impl/bioseq_info.hpp>
56 #include <objmgr/impl/scope_impl.hpp>
57 #include <objmgr/mapped_feat.hpp>
58 #include <objmgr/graph_ci.hpp>
59 #include <objmgr/objmgr_exception.hpp>
60 #include <objmgr/impl/tse_split_info.hpp>
61 #include <objmgr/error_codes.hpp>
62 
63 #include <objects/seq/Bioseq.hpp>
64 #include <objects/seqloc/Seq_loc.hpp>
65 #include <objects/seqset/Seq_entry.hpp>
66 #include <objects/seqalign/Seq_align.hpp>
67 #include <objects/seqres/Seq_graph.hpp>
68 #include <objects/seqloc/Seq_loc_equiv.hpp>
69 #include <objects/seqloc/Seq_bond.hpp>
70 #include <objects/seqfeat/seqfeat__.hpp>
71 #include <objects/general/User_object.hpp>
72 
73 #include <serial/typeinfo.hpp>
74 #include <serial/objostr.hpp>
75 #include <serial/objostrasn.hpp>
76 #include <serial/serial.hpp>
77 #include <serial/serialutil.hpp>
78 
79 #include <util/timsort.hpp>
80 #include <algorithm>
81 #include <typeinfo>
82 
83 
84 #define NCBI_USE_ERRCODE_X   ObjMgr_AnnotCollect
85 
86 BEGIN_NCBI_SCOPE
87 
88 NCBI_DEFINE_ERR_SUBCODE_X(2);
89 
BEGIN_SCOPE(objects)90 BEGIN_SCOPE(objects)
91 
92 
93 /////////////////////////////////////////////////////////////////////////////
94 // CAnnotMapping_Info
95 /////////////////////////////////////////////////////////////////////////////
96 
97 
98 void CAnnotMapping_Info::Reset(void)
99 {
100     m_TotalRange = TRange::GetEmpty();
101     m_MappedObject.Reset();
102     m_MappedObjectType = eMappedObjType_not_set;
103     m_MappedStrand = eNa_strand_unknown;
104     m_MappedFlags = 0;
105 }
106 
107 
GetMappedSeq_loc_Conv(void) const108 CSeq_loc_Conversion& CAnnotMapping_Info::GetMappedSeq_loc_Conv(void) const
109 {
110     _ASSERT(GetMappedObjectType() == eMappedObjType_Seq_loc_Conv);
111     return static_cast<CSeq_loc_Conversion&>(m_MappedObject.GetNCObject());
112 }
113 
114 
SetMappedConverstion(CSeq_loc_Conversion & cvt)115 void CAnnotMapping_Info::SetMappedConverstion(CSeq_loc_Conversion& cvt)
116 {
117     _ASSERT(!IsMapped());
118     m_MappedObject.Reset(&cvt);
119     m_MappedObjectType = eMappedObjType_Seq_loc_Conv;
120 }
121 
122 
SetMappedSeq_align(CSeq_align * align)123 void CAnnotMapping_Info::SetMappedSeq_align(CSeq_align* align)
124 {
125     _ASSERT(m_MappedObjectType == eMappedObjType_Seq_loc_Conv_Set);
126     m_MappedObject.Reset(align);
127     m_MappedObjectType =
128         align? eMappedObjType_Seq_align: eMappedObjType_not_set;
129 }
130 
131 
SetMappedSeq_align_Cvts(CSeq_loc_Conversion_Set & cvts)132 void CAnnotMapping_Info::SetMappedSeq_align_Cvts(CSeq_loc_Conversion_Set& cvts)
133 {
134     _ASSERT(!IsMapped());
135     m_MappedObject.Reset(&cvts);
136     m_MappedObjectType = eMappedObjType_Seq_loc_Conv_Set;
137 }
138 
139 
SetGraphRanges(CGraphRanges * ranges)140 void CAnnotMapping_Info::SetGraphRanges(CGraphRanges* ranges)
141 {
142     m_GraphRanges.Reset(ranges);
143 }
144 
145 
GetGraphRanges(void) const146 const CGraphRanges* CAnnotMapping_Info::GetGraphRanges(void) const
147 {
148     return m_GraphRanges.GetPointerOrNull();
149 }
150 
151 
152 const CSeq_align&
GetMappedSeq_align(const CSeq_align & orig) const153 CAnnotMapping_Info::GetMappedSeq_align(const CSeq_align& orig) const
154 {
155     if (m_MappedObjectType == eMappedObjType_Seq_loc_Conv_Set) {
156         // Map the alignment, replace conv-set with the mapped align
157         CSeq_loc_Conversion_Set& cvts =
158             const_cast<CSeq_loc_Conversion_Set&>(
159             *CTypeConverter<CSeq_loc_Conversion_Set>::
160             SafeCast(m_MappedObject.GetPointer()));
161 
162         CRef<CSeq_align> dst;
163         cvts.Convert(orig, dst);
164 
165         CRange<TSeqPos>& range = const_cast<CRange<TSeqPos>&>(m_TotalRange);
166         range = range.GetEmpty();
167         vector<CHandleRangeMap> hrmaps;
168         CAnnotObject_Info::x_ProcessAlign(hrmaps, *dst, 0);
169         const CSeq_loc_Conversion_Set::TSeq_id_Handles& dst_ids =
170             cvts.GetDst_id_Handles();
171         ITERATE ( vector<CHandleRangeMap>, rowit, hrmaps ) {
172             ITERATE ( CHandleRangeMap, idit, *rowit ) {
173                 if ( dst_ids.find(idit->first) != dst_ids.end() ) {
174                     range.CombineWith(idit->second.GetOverlappingRange());
175                 }
176             }
177         }
178 
179         const_cast<CAnnotMapping_Info&>(*this).
180             SetMappedSeq_align(dst.GetPointerOrNull());
181     }
182     _ASSERT(m_MappedObjectType == eMappedObjType_Seq_align);
183     return *CTypeConverter<CSeq_align>::
184         SafeCast(m_MappedObject.GetPointer());
185 }
186 
187 
UpdateMappedSeq_loc(CRef<CSeq_loc> & loc,CRef<CSeq_point> & pnt_ref,CRef<CSeq_interval> & int_ref,const CSeq_feat * orig_feat) const188 void CAnnotMapping_Info::UpdateMappedSeq_loc(CRef<CSeq_loc>& loc,
189                                              CRef<CSeq_point>& pnt_ref,
190                                              CRef<CSeq_interval>& int_ref,
191                                              const CSeq_feat* orig_feat) const
192 {
193     _ASSERT(MappedSeq_locNeedsUpdate());
194     if ( !loc || !loc->ReferencedOnlyOnce() ) {
195         loc.Reset(new CSeq_loc);
196     }
197     else {
198         loc->Reset();
199         loc->InvalidateTotalRangeCache();
200     }
201     if ( GetMappedObjectType() == eMappedObjType_Seq_id ) {
202         CSeq_id& id = const_cast<CSeq_id&>(GetMappedSeq_id());
203         if ( IsMappedPoint() ) {
204             if ( !pnt_ref || !pnt_ref->ReferencedOnlyOnce() ) {
205                 pnt_ref.Reset(new CSeq_point);
206             }
207             CSeq_point& point = *pnt_ref;
208             loc->SetPnt(point);
209             point.SetId(id);
210             point.SetPoint(m_TotalRange.GetFrom());
211             if ( GetMappedStrand() != eNa_strand_unknown )
212                 point.SetStrand(GetMappedStrand());
213             else
214                 point.ResetStrand();
215             if ( m_MappedFlags & fMapped_Partial_from ) {
216                 point.SetFuzz().SetLim(CInt_fuzz::eLim_lt);
217             }
218             else {
219                 point.ResetFuzz();
220             }
221         }
222         else {
223             if ( !int_ref || !int_ref->ReferencedOnlyOnce() ) {
224                 int_ref.Reset(new CSeq_interval);
225             }
226             CSeq_interval& interval = *int_ref;
227             loc->SetInt(interval);
228             interval.SetId(id);
229             interval.SetFrom(m_TotalRange.GetFrom());
230             interval.SetTo(m_TotalRange.GetTo());
231             if ( GetMappedStrand() != eNa_strand_unknown )
232                 interval.SetStrand(GetMappedStrand());
233             else
234                 interval.ResetStrand();
235             if ( m_MappedFlags & fMapped_Partial_from ) {
236                 interval.SetFuzz_from().SetLim(CInt_fuzz::eLim_lt);
237             }
238             else {
239                 interval.ResetFuzz_from();
240             }
241             if ( m_MappedFlags & fMapped_Partial_to ) {
242                 interval.SetFuzz_to().SetLim(CInt_fuzz::eLim_gt);
243             }
244             else {
245                 interval.ResetFuzz_to();
246             }
247         }
248     }
249     else {
250         CSeq_loc_Conversion& cvt = GetMappedSeq_loc_Conv();
251         const CSeq_loc& orig_loc = m_MappedFlags & fMapped_Product?
252             orig_feat->GetProduct(): orig_feat->GetLocation();
253         cvt.MakeDstMix(loc->SetMix(), orig_loc.GetMix());
254     }
255 }
256 
257 
SetMappedSeq_feat(CSeq_feat & feat)258 void CAnnotMapping_Info::SetMappedSeq_feat(CSeq_feat& feat)
259 {
260     _ASSERT( IsMapped() );
261     _ASSERT(GetMappedObjectType() != eMappedObjType_Seq_feat);
262 
263     // Fill mapped location and product in the mapped feature
264     CRef<CSeq_loc> mapped_loc;
265     if ( MappedSeq_locNeedsUpdate() ) {
266         mapped_loc.Reset(new CSeq_loc);
267         CRef<CSeq_point> mapped_pnt;
268         CRef<CSeq_interval> mapped_int;
269         UpdateMappedSeq_loc(mapped_loc, mapped_pnt, mapped_int, &feat);
270     }
271     else {
272         mapped_loc.Reset(&const_cast<CSeq_loc&>(GetMappedSeq_loc()));
273     }
274     if ( IsMappedLocation() ) {
275         feat.SetLocation(*mapped_loc);
276     }
277     else if ( IsMappedProduct() ) {
278         feat.SetProduct(*mapped_loc);
279     }
280     if ( IsPartial() ) {
281         feat.SetPartial(true);
282     }
283     else {
284         feat.ResetPartial();
285     }
286 
287     m_MappedObject.Reset(&feat);
288     m_MappedObjectType = eMappedObjType_Seq_feat;
289 }
290 
291 
InitializeMappedSeq_feat(const CSeq_feat & src,CSeq_feat & dst) const292 void CAnnotMapping_Info::InitializeMappedSeq_feat(const CSeq_feat& src,
293                                                   CSeq_feat& dst) const
294 {
295     CSeq_feat& src_nc = const_cast<CSeq_feat&>(src);
296     if ( src_nc.IsSetId() )
297         dst.SetId(src_nc.SetId());
298     else
299         dst.ResetId();
300 
301     dst.SetData(src_nc.SetData());
302 
303     if ( src_nc.IsSetExcept() )
304         dst.SetExcept(src_nc.GetExcept());
305     else
306         dst.ResetExcept();
307 
308     if ( src_nc.IsSetComment() )
309         dst.SetComment(src_nc.GetComment());
310     else
311         dst.ResetComment();
312 
313     if ( src_nc.IsSetQual() )
314         dst.SetQual() = src_nc.GetQual();
315     else
316         dst.ResetQual();
317 
318     if ( src_nc.IsSetTitle() )
319         dst.SetTitle(src_nc.GetTitle());
320     else
321         dst.ResetTitle();
322 
323     if ( src_nc.IsSetExt() )
324         dst.SetExt(src_nc.SetExt());
325     else
326         dst.ResetExt();
327 
328     if ( src_nc.IsSetCit() )
329         dst.SetCit(src_nc.SetCit());
330     else
331         dst.ResetCit();
332 
333     if ( src_nc.IsSetExp_ev() )
334         dst.SetExp_ev(src_nc.GetExp_ev());
335     else
336         dst.ResetExp_ev();
337 
338     if ( src_nc.IsSetXref() )
339         dst.SetXref() = src_nc.SetXref();
340     else
341         dst.ResetXref();
342 
343     if ( src_nc.IsSetDbxref() )
344         dst.SetDbxref() = src_nc.SetDbxref();
345     else
346         dst.ResetDbxref();
347 
348     if ( src_nc.IsSetPseudo() )
349         dst.SetPseudo(src_nc.GetPseudo());
350     else
351         dst.ResetPseudo();
352 
353     if ( src_nc.IsSetExcept_text() )
354         dst.SetExcept_text(src_nc.GetExcept_text());
355     else
356         dst.ResetExcept_text();
357 
358     if ( src_nc.IsSetIds() )
359         dst.SetIds() = src_nc.GetIds();
360     else
361         dst.ResetIds();
362 
363     if ( src_nc.IsSetExts() )
364         dst.SetExts() = src_nc.GetExts();
365     else
366         dst.ResetExts();
367 
368     dst.SetLocation(src_nc.SetLocation());
369     if ( src_nc.IsSetProduct() )
370         dst.SetProduct(src_nc.SetProduct());
371     else
372         dst.ResetProduct();
373 }
374 
375 
GetLocationId(void) const376 const CSeq_id* CAnnotMapping_Info::GetLocationId(void) const
377 {
378     switch ( GetMappedObjectType() ) {
379     case eMappedObjType_Seq_id:
380         return &GetMappedSeq_id();
381     case eMappedObjType_Seq_loc:
382         return GetMappedSeq_loc().GetId();
383     case eMappedObjType_Seq_feat:
384         return GetMappedSeq_feat().GetLocation().GetId();
385     case eMappedObjType_Seq_loc_Conv:
386         return &GetMappedSeq_loc_Conv().GetId();
387     default:
388         break;
389     }
390     return 0;
391 }
392 
393 
GetProductId(void) const394 const CSeq_id* CAnnotMapping_Info::GetProductId(void) const
395 {
396     switch ( GetMappedObjectType() ) {
397     case eMappedObjType_Seq_id:
398         return &GetMappedSeq_id();
399     case eMappedObjType_Seq_loc:
400         return GetMappedSeq_loc().GetId();
401     case eMappedObjType_Seq_feat:
402         return GetMappedSeq_feat().GetProduct().GetId();
403     default:
404         break;
405     }
406     return 0;
407 }
408 
409 
410 // Maps each seq-id to the total range for faster sorting.
411 class CIdRangeMap : public CObject
412 {
413 public:
414     CIdRangeMap(const CAnnotObject_Ref& annot_ref, const SAnnotSelector& sel);
~CIdRangeMap(void)415     virtual ~CIdRangeMap(void) {}
416 
417     struct SExtremes {
418         TSeqPos from = kInvalidSeqPos;
419         TSeqPos to = kInvalidSeqPos;
420 
EmptyCIdRangeMap::SExtremes421         bool Empty(void) const { return from == kInvalidSeqPos && to == kInvalidSeqPos; }
422     };
423     typedef map<CSeq_id_Handle, SExtremes> TIdRangeMap;
424     typedef CRange<TSeqPos> TRange;
425 
CanSort(void) const426     bool CanSort(void) const { return m_Map.get() != nullptr; }
427 
GetMap(void) const428     const TIdRangeMap& GetMap(void) const { return *m_Map; }
429 
430 private:
431     unique_ptr<TIdRangeMap> m_Map;
432 };
433 
434 
CIdRangeMap(const CAnnotObject_Ref & annot_ref,const SAnnotSelector & sel)435 CIdRangeMap::CIdRangeMap(const CAnnotObject_Ref& annot_ref,
436                          const SAnnotSelector& sel)
437 {
438     if (!annot_ref.IsPlainFeat()) {
439         return;
440     }
441     const CAnnotObject_Info& info = annot_ref.GetAnnotObject_Info();
442     _ASSERT(info.IsRegular());
443     m_Map.reset(new TIdRangeMap);
444     const CSeq_loc& loc = sel.GetFeatProduct() ?
445         info.GetFeatFast()->GetProduct() : info.GetFeatFast()->GetLocation();
446     const CSeq_id* id = loc.GetId();
447     if ( id ) {
448         SExtremes& ext = (*m_Map)[CSeq_id_Handle::GetHandle(*id)];
449         ext.from = loc.GetStart(eExtreme_Positional);
450         ext.to = loc.GetStop(eExtreme_Positional);
451     }
452     else {
453         for (CSeq_loc_CI it(loc); it; ++it) {
454             TRange rg = it.GetRange();
455             SExtremes& ext = (*m_Map)[it.GetSeq_id_Handle()];
456             if ( !ext.Empty() ) {
457                 rg.CombineWith(TRange(ext.from, ext.to));
458             }
459             ext.from = rg.GetFrom();
460             ext.to = rg.GetToOpen();
461         }
462     }
463 }
464 
465 
SetIdRangeMap(CIdRangeMap & id_range_map)466 void CAnnotMapping_Info::SetIdRangeMap(CIdRangeMap& id_range_map)
467 {
468     if ( IsMapped() ) return;
469     _ASSERT(!IsMapped());
470     m_MappedObject.Reset(&id_range_map);
471     m_MappedObjectType = eMappedObjType_IdRangeMap;
472 }
473 
474 
GetIdRangeMap(void) const475 const CIdRangeMap& CAnnotMapping_Info::GetIdRangeMap(void) const
476 {
477     _ASSERT(GetMappedObjectType() == eMappedObjType_IdRangeMap);
478     return static_cast<const CIdRangeMap&>(*m_MappedObject);
479 }
480 
481 
482 /////////////////////////////////////////////////////////////////////////////
483 // CAnnotObject_Ref
484 /////////////////////////////////////////////////////////////////////////////
485 
486 
CAnnotObject_Ref(const CAnnotObject_Info & object,const CSeq_annot_Handle & annot_handle)487 CAnnotObject_Ref::CAnnotObject_Ref(const CAnnotObject_Info& object,
488                                    const CSeq_annot_Handle& annot_handle)
489     : m_Seq_annot(annot_handle),
490       m_AnnotIndex(object.GetAnnotIndex()),
491       m_AnnotType(eAnnot_Regular)
492 {
493     if ( object.IsFeat() ) {
494         if ( object.IsRegular() ) {
495             const CSeq_feat& feat = *object.GetFeatFast();
496             if ( feat.IsSetPartial() ) {
497                 m_MappingInfo.SetPartial(feat.GetPartial());
498             }
499         }
500         else {
501             m_AnnotType = eAnnot_SeqTable;
502             m_MappingInfo.SetPartial(GetSeq_annot_Info().IsTableFeatPartial(object));
503         }
504     }
505     if ( object.HasSingleKey() ) {
506         m_MappingInfo.SetTotalRange(object.GetKey().m_Range);
507     }
508     else {
509         size_t beg = object.GetKeysBegin();
510         size_t end = object.GetKeysEnd();
511         if ( beg < end ) {
512             const SAnnotObject_Key& key =
513                 GetSeq_annot_Info().GetAnnotObjectKey(beg);
514             m_MappingInfo.SetTotalRange(key.m_Range);
515         }
516     }
517 }
518 
519 
CAnnotObject_Ref(const CSeq_annot_SNP_Info & snp_annot,const CSeq_annot_Handle & annot_handle,const SSNP_Info & snp,CSeq_loc_Conversion * cvt)520 CAnnotObject_Ref::CAnnotObject_Ref(const CSeq_annot_SNP_Info& snp_annot,
521                                    const CSeq_annot_Handle& annot_handle,
522                                    const SSNP_Info& snp,
523                                    CSeq_loc_Conversion* cvt)
524     : m_Seq_annot(annot_handle),
525       m_AnnotIndex(TAnnotIndex(snp_annot.GetIndex(snp))),
526       m_AnnotType(eAnnot_SNPTable)
527 {
528     _ASSERT(IsSNPTableFeat());
529     TSeqPos src_from = snp.GetFrom(), src_to = snp.GetTo();
530     ENa_strand src_strand = eNa_strand_unknown;
531     if ( snp.MinusStrand() ) {
532         src_strand = eNa_strand_minus;
533     }
534     else if ( snp.PlusStrand() ) {
535         src_strand = eNa_strand_plus;
536     }
537     if ( !cvt ) {
538         m_MappingInfo.SetTotalRange(TRange(src_from, src_to));
539         m_MappingInfo.SetMappedSeq_id(
540             const_cast<CSeq_id&>(snp_annot.GetSeq_id()),
541             src_from == src_to);
542         m_MappingInfo.SetMappedStrand(src_strand);
543         return;
544     }
545 
546     cvt->Reset();
547     if ( src_from == src_to ) {
548         // point
549         _VERIFY(cvt->ConvertPoint(src_from, src_strand));
550     }
551     else {
552         // interval
553         _VERIFY(cvt->ConvertInterval(src_from, src_to, src_strand));
554     }
555     cvt->SetMappedLocation(*this, CSeq_loc_Conversion::eLocation);
556 }
557 
558 
CAnnotObject_Ref(const CSeq_annot_Handle & annot_handle,const CSeq_annot_SortedIter & iter,CSeq_loc_Conversion * cvt)559 CAnnotObject_Ref::CAnnotObject_Ref(const CSeq_annot_Handle& annot_handle,
560                                    const CSeq_annot_SortedIter& iter,
561                                    CSeq_loc_Conversion* cvt)
562     : m_Seq_annot(annot_handle),
563       m_AnnotIndex(TAnnotIndex(iter.GetRow())),
564       m_AnnotType(eAnnot_SortedSeqTable)
565 {
566     _ASSERT(IsSortedSeqTableFeat());
567     const CSeqTableInfo& annot_table = GetSeqTableInfo();
568     TRange src_range = iter.GetRange();
569     ENa_strand src_strand = annot_table.GetLocationStrand(m_AnnotIndex);
570     if ( !cvt ) {
571         m_MappingInfo.SetTotalRange(src_range);
572         m_MappingInfo.SetMappedSeq_id(
573             const_cast<CSeq_id&>(*annot_table.GetLocationId(m_AnnotIndex)),
574             src_range.GetLength() == 1);
575         m_MappingInfo.SetMappedStrand(src_strand);
576         return;
577     }
578 
579     cvt->Reset();
580     if ( src_range.GetLength() == 1 ) {
581         // point
582         _VERIFY(cvt->ConvertPoint(src_range.GetFrom(),
583                                   src_strand));
584     }
585     else {
586         // interval
587         _VERIFY(cvt->ConvertInterval(src_range.GetFrom(),
588                                      src_range.GetTo(),
589                                      src_strand));
590     }
591     cvt->SetMappedLocation(*this, CSeq_loc_Conversion::eLocation);
592 }
593 
594 
ResetLocation(void)595 void CAnnotObject_Ref::ResetLocation(void)
596 {
597     m_MappingInfo.Reset();
598     if ( HasAnnotObject_Info() ) {
599         const CAnnotObject_Info& object = GetAnnotObject_Info();
600         if ( object.IsFeat() ) {
601             const CSeq_feat& feat = *object.GetFeatFast();
602             if ( feat.IsSetPartial() ) {
603                 m_MappingInfo.SetPartial(feat.GetPartial());
604             }
605         }
606     }
607 }
608 
609 
GetSeq_annot_SNP_Info(void) const610 const CSeq_annot_SNP_Info& CAnnotObject_Ref::GetSeq_annot_SNP_Info(void) const
611 {
612     _ASSERT(IsSNPTableFeat());
613     return GetSeq_annot_Info().x_GetSNP_annot_Info();
614 }
615 
616 
GetSeqTableInfo(void) const617 const CSeqTableInfo& CAnnotObject_Ref::GetSeqTableInfo(void) const
618 {
619     _ASSERT(IsAnySeqTableFeat());
620     return GetSeq_annot_Info().GetTableInfo();
621 }
622 
623 
GetAnnotObject_Info(void) const624 const CAnnotObject_Info& CAnnotObject_Ref::GetAnnotObject_Info(void) const
625 {
626     _ASSERT(HasAnnotObject_Info());
627     return GetSeq_annot_Info().GetInfo(GetAnnotIndex());
628 }
629 
630 
GetSNP_Info(void) const631 const SSNP_Info& CAnnotObject_Ref::GetSNP_Info(void) const
632 {
633     _ASSERT(IsSNPTableFeat());
634     return GetSeq_annot_SNP_Info().GetInfo(GetAnnotIndex());
635 }
636 
637 
IsFeat(void) const638 bool CAnnotObject_Ref::IsFeat(void) const
639 {
640     return !HasAnnotObject_Info() || GetAnnotObject_Info().IsFeat();
641 }
642 
643 
IsGraph(void) const644 bool CAnnotObject_Ref::IsGraph(void) const
645 {
646     return HasAnnotObject_Info()  &&  GetAnnotObject_Info().IsGraph();
647 }
648 
649 
IsAlign(void) const650 bool CAnnotObject_Ref::IsAlign(void) const
651 {
652     return HasAnnotObject_Info()  &&  GetAnnotObject_Info().IsAlign();
653 }
654 
655 
GetFeat(void) const656 const CSeq_feat& CAnnotObject_Ref::GetFeat(void) const
657 {
658     return GetAnnotObject_Info().GetFeat();
659 }
660 
661 
GetGraph(void) const662 const CSeq_graph& CAnnotObject_Ref::GetGraph(void) const
663 {
664     return GetAnnotObject_Info().GetGraph();
665 }
666 
667 
GetAlign(void) const668 const CSeq_align& CAnnotObject_Ref::GetAlign(void) const
669 {
670     return GetAnnotObject_Info().GetAlign();
671 }
672 
673 
674 BEGIN_LOCAL_NAMESPACE;
675 
676 /////////////////////////////////////////////////////////////////////////////
677 // CAnnotObject_Ref comparision
678 /////////////////////////////////////////////////////////////////////////////
679 
680 struct CAnnotObjectType_Less
681 {
682     bool m_ByProduct;
683     IFeatComparator* m_FeatComparator;
684     CScope* m_Scope;
685     bool m_DoWeIgnoreFarLocationsForSorting;
686 
687     class CNearnessTester : public CSeq_loc::ISubLocFilter {
688     public:
CNearnessTester(const CBioseq_Handle & handle)689         CNearnessTester( const CBioseq_Handle &handle )
690             : m_BioseqHandle(handle)
691         {
692 
693         }
694 
695         DECLARE_OPERATOR_BOOL(m_BioseqHandle);
696 
operator ()(const CSeq_id * id) const697         bool operator()( const CSeq_id *id ) const {
698             return id && m_BioseqHandle.IsSynonym(*id);
699         }
700     private:
701         CBioseq_Handle m_BioseqHandle;
702     };
703 
704     CNearnessTester m_TesterForIgnoreFarLocationsForSorting;
CAnnotObjectType_LessCAnnotObjectType_Less705     explicit CAnnotObjectType_Less(const SAnnotSelector* sel,
706                                    CScope* scope = 0)
707         : m_ByProduct(sel->GetFeatProduct()),
708           m_FeatComparator(sel->GetFeatComparator()),
709           m_Scope(scope),
710           m_TesterForIgnoreFarLocationsForSorting(sel->GetIgnoreFarLocationsForSorting())
711         {
712         }
713 
714     bool operator()(const CAnnotObject_Ref& x,
715                     const CAnnotObject_Ref& y) const;
716 
717     // smaller first
GetTypeOrderCAnnotObjectType_Less718     static int GetTypeOrder(CSeqFeatData::E_Choice type,
719                             CSeqFeatData::ESubtype subtype)
720         {
721             if ( subtype == CSeqFeatData::eSubtype_operon ) {
722                 // operon first
723                 return -1;
724             }
725             else {
726                 return CSeq_feat::GetTypeSortingOrder(type);
727             }
728         }
729 };
730 
731 class CCreateFeat
732 {
733 public:
CCreateFeat(const CAnnotObject_Ref & ref,const CAnnotObject_Info * info)734     CCreateFeat(const CAnnotObject_Ref& ref,
735                 const CAnnotObject_Info* info)
736         : m_Ref(ref), m_Info(info)
737         {
738         }
739 
740     const CSeq_feat& GetOriginalFeat(void);
741     const CSeq_feat& GetMappedFeat(void);
742     int GetCdregionOrder(void);
743     const char* GetImpKey(void);
744 
GetLoc(const CSeq_feat & feat,bool by_product)745     static const CSeq_loc& GetLoc(const CSeq_feat& feat, bool by_product) {
746         return by_product? feat.GetProduct(): feat.GetLocation();
747     }
748 
749     ENa_strand GetStrand(bool by_product);
750 
751     const CSeq_loc* GetComplexLoc(bool by_product);
752 
753     bool IsSetProduct(void);
754     CConstRef<CSeq_id> GetProductId(void);
755 
756     bool HasFeatLabel(void);
757     string GetFeatLabel(void);
758 
759 private:
760     CRef<CSeq_feat> m_CreatedOriginalFeat;
761     const CAnnotObject_Ref& m_Ref;
762     const CAnnotObject_Info* m_Info;
763 };
764 
765 
GetOriginalFeat(void)766 const CSeq_feat& CCreateFeat::GetOriginalFeat(void)
767 {
768     if ( m_Ref.IsPlainFeat() ) {
769         // real Seq-feat exists
770         return *m_Info->GetFeatFast();
771     }
772     else {
773         // table feature
774         if ( !m_CreatedOriginalFeat ) {
775             CRef<CSeq_point> seq_pnt;
776             CRef<CSeq_interval> seq_int;
777             if ( m_Ref.IsSNPTableFeat() ) {
778                 // SNP table feature
779                 const CSeq_annot_SNP_Info& snp_info =
780                     m_Ref.GetSeq_annot_SNP_Info();
781                 snp_info.GetInfo(m_Ref.GetAnnotIndex())
782                     .UpdateSeq_feat(m_CreatedOriginalFeat,
783                                     seq_pnt, seq_int,
784                                     snp_info);
785             }
786             else {
787                 _ASSERT(m_Ref.IsAnySeqTableFeat());
788                 const CSeqTableInfo& table_info =
789                     m_Ref.GetSeqTableInfo();
790                 table_info
791                     .UpdateSeq_feat(m_Ref.GetAnnotIndex(),
792                                     m_CreatedOriginalFeat,
793                                     seq_pnt, seq_int);
794             }
795             _ASSERT(m_CreatedOriginalFeat);
796         }
797         return *m_CreatedOriginalFeat;
798     }
799 }
800 
801 
GetMappedFeat(void)802 const CSeq_feat& CCreateFeat::GetMappedFeat(void)
803 {
804     CAnnotMapping_Info& map = m_Ref.GetMappingInfo();
805     if ( !map.IsMapped() ) {
806         return GetOriginalFeat();
807     }
808     if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat ) {
809         // mapped Seq-feat is created already
810         return map.GetMappedSeq_feat();
811     }
812 
813     CRef<CSeq_feat> mapped_feat(new CSeq_feat);
814     map.InitializeMappedSeq_feat(GetOriginalFeat(), *mapped_feat);
815     map.SetMappedSeq_feat(*mapped_feat);
816     return map.GetMappedSeq_feat();
817 }
818 
819 
GetCdregionOrder(void)820 int CCreateFeat::GetCdregionOrder(void)
821 {
822     CCdregion::EFrame frame =
823         GetMappedFeat().GetData().GetCdregion().GetFrame();
824     if ( frame == CCdregion::eFrame_not_set ) {
825         frame = CCdregion::eFrame_one;
826     }
827     return frame;
828 }
829 
830 
GetImpKey(void)831 const char* CCreateFeat::GetImpKey(void)
832 {
833     static const char* const variation_key = "variation";
834     if ( !m_Info ) {
835         return variation_key;
836     }
837     return GetOriginalFeat().GetData().GetImp().GetKey().c_str();
838 }
839 
840 
GetStrand(bool by_product)841 ENa_strand CCreateFeat::GetStrand(bool by_product)
842 {
843     try {
844         CAnnotMapping_Info& map = m_Ref.GetMappingInfo();
845         if ( map.IsMappedLocation() ) {
846             // location is mapped
847             if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat ) {
848                 // mapped Seq-feat is created already
849                 return GetLoc(map.GetMappedSeq_feat(), by_product).GetStrand();
850             }
851             else if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_loc ) {
852                 // mapped Seq-loc is created already
853                 return map.GetMappedSeq_loc().GetStrand();
854             }
855             else {
856                 // whole, interval, point, or mix
857                 return map.GetMappedStrand();
858             }
859         }
860         else {
861             // location is not mapped - use original
862             if ( !m_Info ) {
863                 // table SNP or sorted table features have strand in mapping
864                 return map.GetMappedStrand();
865             }
866             else {
867                 // get location from the Seq-feat
868                 return GetLoc(GetOriginalFeat(), by_product).GetStrand();
869             }
870         }
871     }
872     catch ( CException& /*ignored*/ ) {
873         // assume unknown strand for sorting
874         return eNa_strand_unknown;
875     }
876 }
877 
878 
GetComplexLoc(bool by_product)879 const CSeq_loc* CCreateFeat::GetComplexLoc(bool by_product)
880 {
881     if ( !m_Info ) {
882         // table SNP, or sorted feature table -> no mix
883         return 0;
884     }
885     CAnnotMapping_Info& map = m_Ref.GetMappingInfo();
886     if ( map.IsMappedLocation() ) {
887         // location is mapped
888         if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_loc ) {
889             // mapped Seq-loc is created already
890             const CSeq_loc& loc = map.GetMappedSeq_loc();
891             return &loc;
892         }
893         else if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_id ) {
894             // whole, interval, or point
895             return 0;
896         }
897         // get location from the Seq-feat
898         const CSeq_loc& loc = GetLoc(GetMappedFeat(), by_product);
899         return &loc;
900     }
901     else {
902         // get location from the Seq-feat
903         const CSeq_loc& loc = GetLoc(GetOriginalFeat(), by_product);
904         return &loc;
905     }
906 }
907 
908 
IsSetProduct(void)909 bool CCreateFeat::IsSetProduct(void)
910 {
911     if ( !m_Info ) {
912         // table SNP or sorted table features -> no product
913         return false;
914     }
915     return GetOriginalFeat().IsSetProduct();
916 }
917 
918 
GetProductId(void)919 CConstRef<CSeq_id> CCreateFeat::GetProductId(void)
920 {
921     _ASSERT(IsSetProduct());
922     return ConstRef(GetOriginalFeat().GetProduct().GetId());
923 }
924 
925 
HasFeatLabel(void)926 bool CCreateFeat::HasFeatLabel(void)
927 {
928     if ( !m_Info ) {
929         return m_Ref.GetSeq_annot_Info()
930             .TableFeat_HasLabel(m_Ref.GetAnnotIndex());
931     }
932     const CSeq_feat& feat = GetOriginalFeat();
933     return (feat.IsSetQual() && !feat.GetQual().empty()) ||
934         (feat.IsSetComment() && !feat.GetComment().empty());
935 }
936 
937 
GetFeatLabel(void)938 string CCreateFeat::GetFeatLabel(void)
939 {
940     if ( !m_Info ) {
941         return m_Ref.GetSeq_annot_Info()
942             .TableFeat_GetLabel(m_Ref.GetAnnotIndex());
943     }
944 
945     string label;
946 
947     const CSeq_feat& feat = GetOriginalFeat();
948 
949     // Put Seq-feat qual into label
950     if ( feat.IsSetQual() ) {
951         ITERATE( CSeq_feat::TQual, it, feat.GetQual() ) {
952             label += label.empty()? '/': ' ';
953             label += (**it).GetQual();
954             if (!(**it).GetVal().empty()) {
955                 label += '=';
956                 label += (**it).GetVal();
957             }
958         }
959     }
960 
961     // Put Seq-feat comment into label
962     if ( feat.IsSetComment() ) {
963         if ( !label.empty()) {
964             label += "; ";
965         }
966         label += feat.GetComment();
967     }
968 
969     return label;
970 }
971 
972 
operator ()(const CAnnotObject_Ref & x,const CAnnotObject_Ref & y) const973 bool CAnnotObjectType_Less::operator()(const CAnnotObject_Ref& x,
974                                        const CAnnotObject_Ref& y) const
975 {
976     // gather x annotation type
977     const CAnnotObject_Info* x_info;
978     CSeq_annot::C_Data::E_Choice x_annot_type;
979     if ( x.HasAnnotObject_Info() ) {
980         x_info = &x.GetAnnotObject_Info();
981         x_annot_type = x_info->GetAnnotType();
982     }
983     else {
984         x_info = 0;
985         x_annot_type = CSeq_annot::C_Data::e_Ftable;
986     }
987 
988     // gather y annotation type
989     const CAnnotObject_Info* y_info;
990     CSeq_annot::C_Data::E_Choice y_annot_type;
991     if ( y.HasAnnotObject_Info() ) {
992         y_info = &y.GetAnnotObject_Info();
993         y_annot_type = y_info->GetAnnotType();
994     }
995     else {
996         y_info = 0;
997         y_annot_type = CSeq_annot::C_Data::e_Ftable;
998     }
999 
1000     // compare by annotation type (feature, align, graph)
1001     if ( x_annot_type != y_annot_type ) {
1002         return x_annot_type < y_annot_type;
1003     }
1004 
1005     if ( x_annot_type == CSeq_annot::C_Data::e_Ftable ) {
1006         // compare features
1007 
1008         // get x feature type
1009         CSeqFeatData::E_Choice x_feat_type;
1010         CSeqFeatData::ESubtype x_feat_subtype;
1011         if ( x_info ) {
1012             x_feat_type = x_info->GetFeatType();
1013             x_feat_subtype = x_info->GetFeatSubtype();
1014         }
1015         else if ( x.IsSNPTableFeat() ) {
1016             x_feat_type = CSeqFeatData::e_Imp;
1017             x_feat_subtype = CSeqFeatData::eSubtype_variation;
1018         }
1019         else {
1020             SAnnotTypeSelector type = x.GetSeqTableInfo().GetType();
1021             x_feat_type = type.GetFeatType();
1022             x_feat_subtype = type.GetFeatSubtype();
1023         }
1024 
1025         // get y feature type
1026         CSeqFeatData::E_Choice y_feat_type;
1027         CSeqFeatData::ESubtype y_feat_subtype;
1028         if ( y_info ) {
1029             y_feat_type = y_info->GetFeatType();
1030             y_feat_subtype = y_info->GetFeatSubtype();
1031         }
1032         else if ( y.IsSNPTableFeat() ) {
1033             y_feat_type = CSeqFeatData::e_Imp;
1034             y_feat_subtype = CSeqFeatData::eSubtype_variation;
1035         }
1036         else {
1037             SAnnotTypeSelector type = y.GetSeqTableInfo().GetType();
1038             y_feat_type = type.GetFeatType();
1039             y_feat_subtype = type.GetFeatSubtype();
1040         }
1041 
1042         // order by feature type
1043         if ( x_feat_subtype != y_feat_subtype ) {
1044             int x_order = GetTypeOrder(x_feat_type, x_feat_subtype);
1045             int y_order = GetTypeOrder(y_feat_type, y_feat_subtype);
1046             if ( x_order != y_order ) {
1047                 return x_order < y_order;
1048             }
1049         }
1050 
1051         CCreateFeat x_create(x, x_info);
1052         CCreateFeat y_create(y, y_info);
1053 
1054         // compare strands
1055         ENa_strand x_strand = x_create.GetStrand(m_ByProduct);
1056         ENa_strand y_strand = y_create.GetStrand(m_ByProduct);
1057         bool x_minus = IsReverse(x_strand);
1058         bool y_minus = IsReverse(y_strand);
1059         if ( x_minus != y_minus ) {
1060             // minus strand last
1061             return y_minus;
1062         }
1063 
1064         // compare complex locations (mix or packed intervals)
1065         const CSeq_loc* x_loc = x_create.GetComplexLoc(m_ByProduct);
1066         const CSeq_loc* y_loc = y_create.GetComplexLoc(m_ByProduct);
1067 
1068         bool x_complex = x_loc && (x_loc->IsMix() || x_loc->IsPacked_int());
1069         bool y_complex = y_loc && (y_loc->IsMix() || y_loc->IsPacked_int());
1070         if ( x_complex != y_complex ) {
1071             // simple loc before complex on plus strand, after on minus strand
1072             return x_minus ^ y_complex;
1073         }
1074 
1075         if ( x_complex ) {
1076             int diff = 0;
1077             if( m_TesterForIgnoreFarLocationsForSorting ) {
1078                 diff = x_loc->CompareSubLoc(*y_loc, x_strand, &m_TesterForIgnoreFarLocationsForSorting);
1079             } else {
1080                 diff = x_loc->CompareSubLoc(*y_loc, x_strand);
1081             }
1082             if ( diff != 0 ) {
1083                 return diff < 0;
1084             }
1085         }
1086 
1087         // compare subtypes
1088         if ( x_feat_subtype != y_feat_subtype ) {
1089             return x_feat_subtype < y_feat_subtype;
1090         }
1091 
1092         _ASSERT(x_feat_type == y_feat_type);
1093         // type dependent comparison
1094         if ( x_feat_type == CSeqFeatData::e_Cdregion ) {
1095             // compare frames of identical CDS ranges
1096             int x_frame = x_create.GetCdregionOrder();
1097             int y_frame = y_create.GetCdregionOrder();
1098             if ( x_frame != y_frame ) {
1099                 return x_frame < y_frame;
1100             }
1101         }
1102         else if ( x_feat_subtype == CSeqFeatData::eSubtype_imp ) {
1103             // all non-standard imported features have the same subtype
1104             const char* x_key = x_create.GetImpKey();
1105             const char* y_key = y_create.GetImpKey();
1106 
1107             // compare labels of imp features
1108             if ( x_key != y_key ) {
1109                 int diff = NStr::CompareNocase(x_key, y_key);
1110                 if ( diff != 0 ) {
1111                     return diff < 0;
1112                 }
1113             }
1114         }
1115         else if ( x_feat_type == CSeqFeatData::e_Gene ) {
1116             const CGene_ref& x_gene = x_info->GetFeatFast()->GetData().GetGene();
1117             const CGene_ref& y_gene = y_info->GetFeatFast()->GetData().GetGene();
1118             const string& x_locus = x_gene.IsSetLocus()? x_gene.GetLocus(): kEmptyStr;
1119             const string& y_locus = y_gene.IsSetLocus()? y_gene.GetLocus(): kEmptyStr;
1120             if ( int diff = NStr::CompareNocase(x_locus, y_locus) ) {
1121                 return diff < 0;
1122             }
1123             const string& x_desc = x_gene.IsSetDesc()? x_gene.GetDesc(): kEmptyStr;
1124             const string& y_desc = y_gene.IsSetDesc()? y_gene.GetDesc(): kEmptyStr;
1125             if ( int diff = NStr::CompareNocase(x_desc, y_desc) ) {
1126                 return diff < 0;
1127             }
1128         }
1129 
1130         if ( !m_ByProduct ) {
1131             // order by product id
1132             bool x_has_product = x_create.IsSetProduct();
1133             bool y_has_product = y_create.IsSetProduct();
1134             if ( x_has_product != y_has_product ) {
1135                 return !x_has_product; // without product first
1136             }
1137             if ( x_has_product ) {
1138                 CConstRef<CSeq_id> x_id = x_create.GetProductId();
1139                 CConstRef<CSeq_id> y_id = y_create.GetProductId();
1140                 if ( x_id.IsNull() != y_id.IsNull() ) {
1141                     return x_id.IsNull(); // no product id first
1142                 }
1143                 if ( x_id ) {
1144                     string x_id_str = x_id->AsFastaString();
1145                     string y_id_str = y_id->AsFastaString();
1146                     if ( int diff = NStr::CompareNocase(x_id_str, y_id_str) ) {
1147                         return diff < 0;
1148                     }
1149                 }
1150             }
1151         }
1152 
1153         bool x_has_label = x_create.HasFeatLabel();
1154         bool y_has_label = y_create.HasFeatLabel();
1155         if ( x_has_label != y_has_label ) {
1156             return !x_has_label; // no-label first
1157         }
1158         if ( x_has_label ) {
1159             string x_label = x_create.GetFeatLabel();
1160             string y_label = y_create.GetFeatLabel();
1161             if ( int diff = NStr::CompareNocase(x_label, y_label) ) {
1162                 return diff < 0;
1163             }
1164         }
1165 
1166         if ( m_FeatComparator ) {
1167             const CSeq_feat& x_feat = x_create.GetMappedFeat();
1168             const CSeq_feat& y_feat = y_create.GetMappedFeat();
1169             if ( m_FeatComparator->Less(x_feat, y_feat, m_Scope) ) {
1170                 return true;
1171             }
1172             if ( m_FeatComparator->Less(y_feat, x_feat, m_Scope) ) {
1173                 return false;
1174             }
1175         }
1176     }
1177     if ( x.IsFromOtherTSE() != y.IsFromOtherTSE() ) {
1178         // non-sequence TSE annotations should come later
1179         return y.IsFromOtherTSE();
1180     }
1181 
1182     return x < y;
1183 }
1184 
1185 
1186 struct CAnnotObject_Less
1187 {
CAnnotObject_LessCAnnotObject_Less1188     explicit CAnnotObject_Less(const SAnnotSelector* sel,
1189                                CScope* scope = 0)
1190         : type_less(sel, scope),
1191           ignore_far_handle(sel->GetIgnoreFarLocationsForSorting())
1192         {
1193         }
1194 
x_GetExtremesCAnnotObject_Less1195     void x_GetExtremes( TSeqPos &out_from, TSeqPos &out_to,
1196                         const CAnnotObject_Ref& obj_ref ) const
1197     {
1198         out_from = kInvalidSeqPos;
1199         out_to = kInvalidSeqPos;
1200 
1201         bool is_circular = ( ignore_far_handle.CanGetInst_Topology() &&
1202             ignore_far_handle.GetInst_Topology() == CSeq_inst::eTopology_circular );
1203 
1204         bool all_minus = true;
1205         bool all_non_minus = true;
1206 
1207         const CSeq_loc & loc = obj_ref.GetAnnotObject_Info().GetFeatFast()->GetLocation();
1208 
1209         CSeq_loc_CI first_piece;
1210         CSeq_loc_CI last_piece;
1211 
1212         TSeqPos lowest = kInvalidSeqPos;
1213         TSeqPos highest = kInvalidSeqPos;
1214 
1215         CSeq_loc_CI loc_ci( loc, CSeq_loc_CI::eEmpty_Skip, CSeq_loc_CI::eOrder_Biological );
1216         for( ; loc_ci; ++loc_ci ) {
1217             if( ! ignore_far_handle.IsSynonym(loc_ci.GetSeq_id_Handle()) ) {
1218                 continue;
1219             }
1220             if( ! first_piece ) {
1221                 first_piece = loc_ci;
1222             }
1223             last_piece = loc_ci;
1224 
1225             TSeqPos piece_start = kInvalidSeqPos;
1226             TSeqPos piece_stop  = kInvalidSeqPos;
1227 
1228             if( loc_ci.IsSetStrand() && loc_ci.GetStrand() == eNa_strand_minus ) {
1229                 all_non_minus = false;
1230             } else {
1231                 all_minus = false;
1232             }
1233 
1234             piece_start = loc_ci.GetRange().GetFrom();
1235             piece_stop  = loc_ci.GetRange().GetToOpen();
1236 
1237             if( lowest == kInvalidSeqPos ) {
1238                 lowest = piece_start;
1239             } else {
1240                 lowest = min( lowest, piece_start );
1241             }
1242 
1243             if( highest == kInvalidSeqPos ) {
1244                 highest = piece_stop;
1245             } else {
1246                 highest = max( highest, piece_stop );
1247             }
1248         }
1249 
1250         // ignore circularity if strandedness is mixed
1251         if( ! all_minus && ! all_non_minus ) {
1252             is_circular = false;
1253         }
1254 
1255         // out_from
1256         if (is_circular) {
1257             if (all_minus) {
1258                 if( last_piece ) {
1259                     out_from = last_piece.GetRange().GetFrom();
1260                 }
1261             } else {
1262                 if( first_piece ) {
1263                     out_from = first_piece.GetRange().GetFrom();
1264                 }
1265             }
1266         } else {
1267             out_from = lowest;
1268         }
1269 
1270         // out_to
1271         if (is_circular) {
1272             if (all_minus) {
1273                 if( first_piece ) {
1274                     out_to = first_piece.GetRange().GetToOpen();
1275                 }
1276             } else {
1277                 if( last_piece ) {
1278                     out_to = last_piece.GetRange().GetToOpen();
1279                 }
1280             }
1281         } else {
1282             out_to = highest;
1283         }
1284     }
1285 
1286     static
GetRangeOpenCAnnotObject_Less1287     void GetRangeOpen(TSeqPos &out_from, TSeqPos &out_to,
1288                       const CAnnotObject_Ref& obj_ref)
1289     {
1290         out_from = obj_ref.GetMappingInfo().GetFrom();
1291         out_to = obj_ref.GetMappingInfo().GetToOpen();
1292         if ( out_from != kInvalidSeqPos ||
1293              out_to != kInvalidSeqPos ||
1294              !obj_ref.IsAlign() ||
1295              (obj_ref.GetMappingInfo().GetMappedObjectType() !=
1296               CAnnotMapping_Info::eMappedObjType_Seq_loc_Conv_Set) ) {
1297             return;
1298         }
1299         // mapped align may have uninitialized total range
1300         // force mapping
1301         obj_ref.GetMappingInfo().GetMappedSeq_align(obj_ref.GetAlign());
1302         // re-get updated range
1303         out_from = obj_ref.GetMappingInfo().GetFrom();
1304         out_to = obj_ref.GetMappingInfo().GetToOpen();
1305     }
1306 
CompareRangesCAnnotObject_Less1307     static int CompareRanges(TSeqPos x_from, TSeqPos x_to, TSeqPos y_from, TSeqPos y_to)
1308     {
1309         // (from >= to) means circular location.
1310         // Any circular location is less than (before) non-circular one.
1311         // If both are circular, compare them regular way.
1312         bool x_circular = x_from >= x_to;
1313         bool y_circular = y_from >= y_to;
1314         if ( x_circular != y_circular ) {
1315             return x_circular ? -1 : 1;
1316         }
1317         // smallest left extreme first
1318         if ( x_from != y_from ) {
1319             return x_from < y_from ? -1 : 1;
1320         }
1321         // longest feature first
1322         if ( x_to != y_to ) {
1323             return x_to > y_to ? -1 : 1;
1324         }
1325         return 0;
1326     }
1327 
1328     // Compare CRef-s: both must be features
operator ()CAnnotObject_Less1329     bool operator()(const CAnnotObject_Ref& x,
1330                     const CAnnotObject_Ref& y) const
1331     {
1332         if (x == y) { // small speedup
1333             return false;
1334         }
1335 
1336         if (x.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap  &&
1337             y.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap  &&
1338             x.GetMappingInfo().GetIdRangeMap().CanSort()  &&
1339             y.GetMappingInfo().GetIdRangeMap().CanSort()) {
1340             // Perform full location comparison instead of using total range shortcut.
1341             const CIdRangeMap::TIdRangeMap& x_idmap = x.GetMappingInfo().GetIdRangeMap().GetMap();
1342             const CIdRangeMap::TIdRangeMap& y_idmap = y.GetMappingInfo().GetIdRangeMap().GetMap();
1343             CIdRangeMap::TIdRangeMap::const_iterator x_it = x_idmap.begin();
1344             CIdRangeMap::TIdRangeMap::const_iterator y_it = y_idmap.begin();
1345             for (; x_it != x_idmap.end() && y_it != y_idmap.end(); ++x_it, ++y_it) {
1346                 if (x_it->first != y_it->first) return x_it->first < y_it->first;
1347                 int cmp = CompareRanges(x_it->second.from, x_it->second.to, y_it->second.from, y_it->second.to);
1348                 if (cmp != 0) return cmp < 0;
1349             }
1350             if (y_it != y_idmap.end()) return true;
1351             if (x_it != x_idmap.end()) return false;
1352         }
1353         else {
1354             TSeqPos x_from = kInvalidSeqPos;
1355             TSeqPos y_from = kInvalidSeqPos;
1356             TSeqPos x_to = kInvalidSeqPos;
1357             TSeqPos y_to = kInvalidSeqPos;
1358 
1359             if( ignore_far_handle ) {
1360                 x_GetExtremes( x_from, x_to, x );
1361                 x_GetExtremes( y_from, y_to, y );
1362             } else {
1363                 GetRangeOpen(x_from, x_to, x);
1364                 GetRangeOpen(y_from, y_to, y);
1365             }
1366 
1367             // (from >= to) means circular location.
1368             // Any circular location is less than (before) non-circular one.
1369             // If both are circular, compare them regular way.
1370             bool x_circular = x_from >= x_to;
1371             bool y_circular = y_from >= y_to;
1372             if ( x_circular != y_circular ) {
1373                 return x_circular;
1374             }
1375             // smallest left extreme first
1376             if ( x_from != y_from ) {
1377                 return x_from < y_from;
1378             }
1379             // longest feature first
1380             if ( x_to != y_to ) {
1381                 return x_to > y_to;
1382             }
1383         }
1384 
1385         return type_less(x, y);
1386     }
1387     CAnnotObjectType_Less type_less;
1388     CBioseq_Handle ignore_far_handle;
1389 };
1390 
1391 
1392 struct CAnnotObject_LessReverse
1393 {
CAnnotObject_LessReverseCAnnotObject_LessReverse1394     explicit CAnnotObject_LessReverse(const SAnnotSelector* sel,
1395                                       CScope* scope = 0)
1396         : type_less(sel, scope)
1397         {
1398         }
1399     // Compare CRef-s: both must be features
operator ()CAnnotObject_LessReverse1400     bool operator()(const CAnnotObject_Ref& x,
1401                     const CAnnotObject_Ref& y) const
1402     {
1403         if ( x == y ) { // small speedup
1404             return false;
1405         }
1406 
1407         if (x.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap  &&
1408             y.GetMappingInfo().GetMappedObjectType() == CAnnotMapping_Info::eMappedObjType_IdRangeMap &&
1409             x.GetMappingInfo().GetIdRangeMap().CanSort() &&
1410             y.GetMappingInfo().GetIdRangeMap().CanSort()) {
1411             // Perform full location comparison instead of using total range shortcut.
1412             const CIdRangeMap::TIdRangeMap& x_idmap = x.GetMappingInfo().GetIdRangeMap().GetMap();
1413             const CIdRangeMap::TIdRangeMap& y_idmap = y.GetMappingInfo().GetIdRangeMap().GetMap();
1414             CIdRangeMap::TIdRangeMap::const_iterator x_it = x_idmap.begin();
1415             CIdRangeMap::TIdRangeMap::const_iterator y_it = y_idmap.begin();
1416             for (; x_it != x_idmap.end() && y_it != y_idmap.end(); ++x_it, ++y_it) {
1417                 if (x_it->first != y_it->first) return y_it->first < x_it->first;
1418                 int cmp = CAnnotObject_Less::CompareRanges(
1419                     x_it->second.from, x_it->second.to, y_it->second.from, y_it->second.to);
1420                 if (cmp != 0) return cmp > 0;
1421             }
1422             if (x_it != x_idmap.end()) return true;
1423             if (y_it != y_idmap.end()) return false;
1424         }
1425         else {
1426             TSeqPos x_from = kInvalidSeqPos;
1427             TSeqPos x_to = kInvalidSeqPos;
1428             TSeqPos y_from = kInvalidSeqPos;
1429             TSeqPos y_to = kInvalidSeqPos;
1430 
1431             CAnnotObject_Less::GetRangeOpen(x_from, x_to, x);
1432             CAnnotObject_Less::GetRangeOpen(y_from, y_to, y);
1433 
1434             // (from >= to) means circular location.
1435             // Any circular location is less than (before) non-circular one.
1436             // If both are circular, compare them regular way.
1437             bool x_circular = x_from >= x_to;
1438             bool y_circular = y_from >= y_to;
1439             if ( x_circular != y_circular ) {
1440                 return x_circular;
1441             }
1442             // largest right extreme first
1443             if ( x_to != y_to ) {
1444                 return x_to > y_to;
1445             }
1446             // longest feature first
1447             if ( x_from != y_from ) {
1448                 return x_from < y_from;
1449             }
1450         }
1451 
1452         return type_less(x, y);
1453     }
1454     CAnnotObjectType_Less type_less;
1455 };
1456 
1457 
1458 END_LOCAL_NAMESPACE;
1459 
1460 
1461 /////////////////////////////////////////////////////////////////////////////
1462 // CCreatedFeat_Ref
1463 /////////////////////////////////////////////////////////////////////////////
1464 
1465 
CCreatedFeat_Ref(void)1466 CCreatedFeat_Ref::CCreatedFeat_Ref(void)
1467 {
1468 }
1469 
1470 
~CCreatedFeat_Ref(void)1471 CCreatedFeat_Ref::~CCreatedFeat_Ref(void)
1472 {
1473 }
1474 
1475 
ResetRefs(void)1476 void CCreatedFeat_Ref::ResetRefs(void)
1477 {
1478     m_CreatedSeq_feat.Reset();
1479     m_CreatedSeq_loc.Reset();
1480     m_CreatedSeq_point.Reset();
1481     m_CreatedSeq_interval.Reset();
1482 }
1483 
1484 
ReleaseRefsTo(CRef<CSeq_feat> * feat,CRef<CSeq_loc> * loc,CRef<CSeq_point> * point,CRef<CSeq_interval> * interval)1485 void CCreatedFeat_Ref::ReleaseRefsTo(CRef<CSeq_feat>*     feat,
1486                                      CRef<CSeq_loc>*      loc,
1487                                      CRef<CSeq_point>*    point,
1488                                      CRef<CSeq_interval>* interval)
1489 {
1490     if (feat) {
1491         m_CreatedSeq_feat.AtomicReleaseTo(*feat);
1492     }
1493     if (loc) {
1494         m_CreatedSeq_loc.AtomicReleaseTo(*loc);
1495     }
1496     if (point) {
1497         m_CreatedSeq_point.AtomicReleaseTo(*point);
1498     }
1499     if (interval) {
1500         m_CreatedSeq_interval.AtomicReleaseTo(*interval);
1501     }
1502 }
1503 
1504 
ResetRefsFrom(CRef<CSeq_feat> * feat,CRef<CSeq_loc> * loc,CRef<CSeq_point> * point,CRef<CSeq_interval> * interval)1505 void CCreatedFeat_Ref::ResetRefsFrom(CRef<CSeq_feat>*     feat,
1506                                      CRef<CSeq_loc>*      loc,
1507                                      CRef<CSeq_point>*    point,
1508                                      CRef<CSeq_interval>* interval)
1509 {
1510     if (feat) {
1511         m_CreatedSeq_feat.AtomicResetFrom(*feat);
1512     }
1513     if (loc) {
1514         m_CreatedSeq_loc.AtomicResetFrom(*loc);
1515     }
1516     if (point) {
1517         m_CreatedSeq_point.AtomicResetFrom(*point);
1518     }
1519     if (interval) {
1520         m_CreatedSeq_interval.AtomicResetFrom(*interval);
1521     }
1522 }
1523 
1524 
1525 CConstRef<CSeq_feat>
GetOriginalFeature(const CSeq_feat_Handle & feat_h)1526 CCreatedFeat_Ref::GetOriginalFeature(const CSeq_feat_Handle& feat_h)
1527 {
1528     CConstRef<CSeq_feat> ret;
1529     if ( feat_h.IsTableSNP() ) {
1530         const CSeq_annot_SNP_Info& snp_annot = feat_h.x_GetSNP_annot_Info();
1531         const SSNP_Info& snp_info = feat_h.x_GetSNP_Info();
1532         CRef<CSeq_feat> orig_feat;
1533         CRef<CSeq_point> created_point;
1534         CRef<CSeq_interval> created_interval;
1535         ReleaseRefsTo(&orig_feat, 0, &created_point, &created_interval);
1536         snp_info.UpdateSeq_feat(orig_feat,
1537                                 created_point,
1538                                 created_interval,
1539                                 snp_annot);
1540         ret = orig_feat;
1541         ResetRefsFrom(&orig_feat, 0, &created_point, &created_interval);
1542     }
1543     else if ( feat_h.IsTableFeat() ) {
1544         if ( feat_h.m_CreatedOriginalFeat ) {
1545             ret = feat_h.m_CreatedOriginalFeat;
1546         }
1547         else {
1548             const CSeq_annot_Info& annot = feat_h.x_GetSeq_annot_Info();
1549             CRef<CSeq_feat> orig_feat;
1550             CRef<CSeq_point> created_point;
1551             CRef<CSeq_interval> created_interval;
1552             //ReleaseRefsTo(&orig_feat, 0, &created_point, &created_interval);
1553             annot.GetTableInfo().UpdateSeq_feat(feat_h.x_GetFeatIndex(),
1554                                                 orig_feat,
1555                                                 created_point,
1556                                                 created_interval);
1557             ret = orig_feat;
1558             //ResetRefsFrom(&orig_feat, 0, &created_point, &created_interval);
1559             feat_h.m_CreatedOriginalFeat = ret;
1560         }
1561     }
1562     else {
1563         ret = feat_h.GetPlainSeq_feat();
1564     }
1565     return ret;
1566 }
1567 
1568 
1569 CRef<CSeq_loc>
GetMappedLocation(const CAnnotMapping_Info & map,const CSeq_feat & orig_feat)1570 CCreatedFeat_Ref::GetMappedLocation(const CAnnotMapping_Info& map,
1571                                     const CSeq_feat& orig_feat)
1572 {
1573     CRef<CSeq_loc> ret;
1574     if ( map.MappedSeq_locNeedsUpdate() ) {
1575         // need to convert Seq_id to Seq_loc
1576         // clear references to mapped location from mapped feature
1577         // Can not use m_MappedSeq_feat since it's a const-ref
1578         CRef<CSeq_feat> mapped_feat;
1579         m_CreatedSeq_feat.AtomicReleaseTo(mapped_feat);
1580         if ( mapped_feat ) {
1581             if ( !mapped_feat->ReferencedOnlyOnce() ) {
1582                 mapped_feat.Reset();
1583             }
1584             else {
1585                 CRef<CSeq_loc> null_loc(new CSeq_loc);
1586                 null_loc->SetNull();
1587                 // ResetLocation doesn't do what we'd like because
1588                 // Seq-feat.location isn't optional.
1589                 mapped_feat->SetLocation(*null_loc);
1590                 mapped_feat->ResetProduct();
1591             }
1592         }
1593         m_CreatedSeq_feat.AtomicResetFrom(mapped_feat);
1594 
1595         CRef<CSeq_loc> mapped_loc;
1596         CRef<CSeq_point> created_point;
1597         CRef<CSeq_interval> created_interval;
1598         ReleaseRefsTo(0, &mapped_loc, &created_point, &created_interval);
1599         map.UpdateMappedSeq_loc(mapped_loc,
1600                                 created_point,
1601                                 created_interval,
1602                                 &orig_feat);
1603         ret = mapped_loc;
1604         ResetRefsFrom(0, &mapped_loc, &created_point, &created_interval);
1605     }
1606     else if ( map.IsMapped() ) {
1607         ret = const_cast<CSeq_loc*>(&map.GetMappedSeq_loc());
1608     }
1609     return ret;
1610 }
1611 
1612 
1613 CRef<CSeq_loc>
GetMappedLocation(const CAnnotMapping_Info & map,const CMappedFeat & feat)1614 CCreatedFeat_Ref::GetMappedLocation(const CAnnotMapping_Info& map,
1615                                     const CMappedFeat& feat)
1616 {
1617     if ( !map.IsMapped() ) {
1618         return null;
1619     }
1620     else if ( !map.MappedSeq_locNeedsUpdate() ) {
1621         return Ref(const_cast<CSeq_loc*>(&map.GetMappedSeq_loc()));
1622     }
1623     else {
1624         return GetMappedLocation(map, *feat.GetOriginalSeq_feat());
1625     }
1626 }
1627 
1628 
1629 CConstRef<CSeq_feat>
GetMappedFeature(const CAnnotMapping_Info & map,const CMappedFeat & feat)1630 CCreatedFeat_Ref::GetMappedFeature(const CAnnotMapping_Info& map,
1631                                    const CMappedFeat& feat)
1632 {
1633     if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat) {
1634         return ConstRef(&map.GetMappedSeq_feat());
1635     }
1636     else {
1637         return GetMappedFeature(map, *feat.GetOriginalSeq_feat());
1638     }
1639 }
1640 
1641 
1642 CConstRef<CSeq_feat>
GetMappedFeature(const CAnnotMapping_Info & map,const CSeq_feat & orig_feat)1643 CCreatedFeat_Ref::GetMappedFeature(const CAnnotMapping_Info& map,
1644                                    const CSeq_feat& orig_feat)
1645 {
1646     CConstRef<CSeq_feat> ret;
1647     if ( map.GetMappedObjectType() == map.eMappedObjType_Seq_feat) {
1648         ret = &map.GetMappedSeq_feat();
1649     }
1650     else if ( !map.IsMapped() ) {
1651         ret = &orig_feat;
1652     }
1653     else {
1654         CRef<CSeq_loc> loc = GetMappedLocation(map, orig_feat);
1655 
1656         // some Seq-loc object is mapped
1657         CRef<CSeq_feat> mapped_feat;
1658         m_CreatedSeq_feat.AtomicReleaseTo(mapped_feat);
1659         if ( !mapped_feat || !mapped_feat->ReferencedOnlyOnce() ) {
1660             mapped_feat.Reset(new CSeq_feat);
1661             // copy all fields from original feature
1662             map.InitializeMappedSeq_feat(orig_feat, *mapped_feat);
1663         }
1664         else {
1665             // copy only unmapped location/product fields from original feature
1666             CSeq_feat& src_nc = const_cast<CSeq_feat&>(orig_feat);
1667             if ( !map.IsMappedLocation() ) {
1668                 mapped_feat->SetLocation(src_nc.SetLocation());
1669             }
1670             if ( !map.IsMappedProduct() ) {
1671                 if ( orig_feat.IsSetProduct() )
1672                     mapped_feat->SetProduct(src_nc.SetProduct());
1673                 else
1674                     mapped_feat->ResetProduct();
1675             }
1676         }
1677 
1678         // set mapped location/product field
1679         if ( map.IsMappedLocation() ) {
1680             mapped_feat->SetLocation(*loc);
1681         }
1682         else if ( map.IsMappedProduct() ) {
1683             mapped_feat->SetProduct(*loc);
1684         }
1685         // set mapped partial field
1686         if ( map.IsPartial() ) {
1687             mapped_feat->SetPartial(true);
1688         }
1689         else {
1690             mapped_feat->ResetPartial();
1691         }
1692 
1693         ret = mapped_feat;
1694         m_CreatedSeq_feat.AtomicResetFrom(mapped_feat);
1695     }
1696     return ret;
1697 }
1698 
1699 
1700 /////////////////////////////////////////////////////////////////////////////
1701 // CAnnot_Collector, CAnnotMappingCollector
1702 /////////////////////////////////////////////////////////////////////////////
1703 
1704 
1705 class CAnnotMappingCollector
1706 {
1707 public:
1708     typedef map<CAnnotObject_Ref,
1709                 CRef<CSeq_loc_Conversion_Set> > TAnnotMappingSet;
1710     // Set of annotations for complex remapping
1711     TAnnotMappingSet              m_AnnotMappingSet;
1712 };
1713 
1714 
CAnnot_Collector(CScope & scope)1715 CAnnot_Collector::CAnnot_Collector(CScope& scope)
1716     : m_Selector(0),
1717       m_Scope(scope),
1718       m_LoadBytes(0),
1719       m_LoadSeconds(0),
1720       m_FromOtherTSE(false)
1721 {
1722 }
1723 
1724 
~CAnnot_Collector(void)1725 CAnnot_Collector::~CAnnot_Collector(void)
1726 {
1727 }
1728 
1729 
x_NoMoreObjects(void) const1730 bool CAnnot_Collector::x_NoMoreObjects(void) const
1731 {
1732     if ( x_MaxSearchSegmentsLimitIsReached() ) {
1733         // search segment limit reached
1734         return true;
1735     }
1736     typedef SAnnotSelector::TMaxSize TMaxSize;
1737     TMaxSize limit = m_Selector->GetMaxSize();
1738     if ( limit >= numeric_limits<TMaxSize>::max() ) {
1739         return false;
1740     }
1741     size_t size = m_AnnotSet.size();
1742     if ( m_MappingCollector.get() ) {
1743         size += m_MappingCollector->m_AnnotMappingSet.size();
1744     }
1745     return size >= limit;
1746 }
1747 
1748 
CanResolveId(const CSeq_id_Handle & idh,const CBioseq_Handle & bh)1749 bool CAnnot_Collector::CanResolveId(const CSeq_id_Handle& idh,
1750                                     const CBioseq_Handle& bh)
1751 {
1752     switch ( m_Selector->GetResolveMethod() ) {
1753     case SAnnotSelector::eResolve_All:
1754         return true;
1755     case SAnnotSelector::eResolve_TSE:
1756         return m_Scope->GetBioseqHandleFromTSE(idh, bh.GetTSE_Handle());
1757     default:
1758         return false;
1759     }
1760 }
1761 
1762 static CSeqFeatData::ESubtype s_DefaultAdaptiveTriggers[] = {
1763     CSeqFeatData::eSubtype_gene,
1764     CSeqFeatData::eSubtype_cdregion,
1765     CSeqFeatData::eSubtype_mRNA
1766 };
1767 
x_Initialize0(const SAnnotSelector & selector)1768 void CAnnot_Collector::x_Initialize0(const SAnnotSelector& selector)
1769 {
1770     m_Selector = &selector;
1771     m_TriggerTypes.reset();
1772     SAnnotSelector::TAdaptiveDepthFlags adaptive_flags = 0;
1773     if ( !selector.GetExactDepth() ||
1774          selector.GetResolveDepth() == kMax_Int ) {
1775         adaptive_flags = selector.GetAdaptiveDepthFlags();
1776     }
1777     if ( adaptive_flags & selector.fAdaptive_ByTriggers ) {
1778         if ( selector.m_AdaptiveTriggers.empty() ) {
1779             const size_t count =
1780                 sizeof(s_DefaultAdaptiveTriggers)/
1781                 sizeof(s_DefaultAdaptiveTriggers[0]);
1782             for ( int i = count - 1; i >= 0; --i ) {
1783                 CSeqFeatData::ESubtype subtype = s_DefaultAdaptiveTriggers[i];
1784                 size_t index = CAnnotType_Index::GetSubtypeIndex(subtype);
1785                 if ( index ) {
1786                     m_TriggerTypes.set(index);
1787                 }
1788             }
1789         }
1790         else {
1791             ITERATE ( SAnnotSelector::TAdaptiveTriggers, it,
1792                       selector.m_AdaptiveTriggers ) {
1793                 pair<size_t, size_t> idxs =
1794                     CAnnotType_Index::GetIndexRange(*it);
1795                 for ( size_t i = idxs.first; i < idxs.second; ++i ) {
1796                     m_TriggerTypes.set(i);
1797                 }
1798             }
1799         }
1800     }
1801     m_UnseenAnnotTypes.set();
1802     m_CollectAnnotTypes = selector.m_AnnotTypesBitset;
1803     if ( !m_CollectAnnotTypes.any() ) {
1804         pair<size_t, size_t> range =
1805             CAnnotType_Index::GetIndexRange(selector);
1806         for ( size_t index = range.first; index < range.second; ++index ) {
1807             m_CollectAnnotTypes.set(index);
1808         }
1809     }
1810     if ( selector.m_CollectNames ) {
1811         m_AnnotNames.reset(new TAnnotNames());
1812     }
1813     selector.CheckLimitObjectType();
1814     if ( selector.m_LimitObjectType != SAnnotSelector::eLimit_None ) {
1815         x_GetTSE_Info();
1816     }
1817     m_SearchSegments = selector.GetMaxSearchSegments();
1818     m_SearchSegmentsAction = selector.GetMaxSearchSegmentsAction();
1819     double max_time = selector.GetMaxSearchTime();
1820     if ( max_time <= 86400 ) { // 24 hours
1821         m_SearchTime.Start();
1822     }
1823 }
1824 
1825 
x_StopSearchLimits(void)1826 void CAnnot_Collector::x_StopSearchLimits(void)
1827 {
1828     if ( m_SearchSegments != numeric_limits<TMaxSearchSegments>::max() ) {
1829         m_SearchSegments = numeric_limits<TMaxSearchSegments>::max();
1830     }
1831     m_SearchTime.Stop();
1832 }
1833 
1834 
x_FoundAllNamedAnnotAccessions(unique_ptr<SAnnotSelector> & local_sel)1835 bool CAnnot_Collector::x_FoundAllNamedAnnotAccessions(unique_ptr<SAnnotSelector>& local_sel)
1836 {
1837     if ( !m_AnnotNames.get() ) {
1838         return false;
1839     }
1840     set<string> found_accs;
1841     for ( auto& n : *m_AnnotNames ) {
1842         if ( !n.IsNamed() ) {
1843             continue;
1844         }
1845         string acc;
1846         ExtractZoomLevel(n.GetName(), &acc, 0);
1847         if ( m_Selector->GetNamedAnnotAccessions().find(acc) !=
1848              m_Selector->GetNamedAnnotAccessions().end() ) {
1849             found_accs.insert(acc);
1850         }
1851     }
1852     if ( !found_accs.empty() ) {
1853         if ( !local_sel ) {
1854             local_sel.reset(new SAnnotSelector(*m_Selector));
1855             m_Selector = local_sel.get();
1856         }
1857         for ( auto& acc : found_accs ) {
1858             local_sel->ExcludeNamedAnnotAccession(acc);
1859         }
1860     }
1861     return !m_Selector->IsIncludedAnyNamedAnnotAccession();
1862 }
1863 
1864 
1865 static const bool kTraceFullCvt = false;
1866 
x_Initialize(const SAnnotSelector & selector,const CBioseq_Handle & bh,const CRange<TSeqPos> & range,ENa_strand strand)1867 void CAnnot_Collector::x_Initialize(const SAnnotSelector& selector,
1868                                     const CBioseq_Handle& bh,
1869                                     const CRange<TSeqPos>& range,
1870                                     ENa_strand strand)
1871 {
1872     if ( !bh ) {
1873         NCBI_THROW(CAnnotException, eBadLocation,
1874                    "Bioseq handle is null");
1875     }
1876     CScope_Impl::TConfReadLockGuard guard(m_Scope->m_ConfLock);
1877     x_Initialize0(selector);
1878 
1879     CSeq_id_Handle master_id = bh.GetAccessSeq_id_Handle();
1880     CHandleRange master_range;
1881     master_range.AddRange(range, strand);
1882 
1883     int depth = selector.GetResolveDepth();
1884     bool depth_is_set = depth >= 0 && depth < kMax_Int;
1885     bool exact_depth = selector.GetExactDepth() && depth_is_set;
1886     int adaptive_flags = exact_depth? 0: selector.GetAdaptiveDepthFlags();
1887     int by_policy = adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy;
1888     adaptive_flags &=
1889         SAnnotSelector::fAdaptive_ByTriggers |
1890         SAnnotSelector::fAdaptive_BySubtypes |
1891         SAnnotSelector::fAdaptive_ByNamedAcc;
1892 
1893     // main sequence
1894     bool deeper = true;
1895     if ( adaptive_flags || !exact_depth || depth == 0 ) {
1896         x_SearchMaster(bh, master_id, master_range);
1897         deeper = !x_NoMoreObjects();
1898     }
1899     if ( deeper ) {
1900         deeper = depth > 0 &&
1901             selector.GetResolveMethod() != selector.eResolve_None;
1902     }
1903     if ( deeper && by_policy ) {
1904         deeper =
1905             bh.GetFeatureFetchPolicy() != bh.eFeatureFetchPolicy_only_near;
1906     }
1907     bool only_named_annot_accs = false;
1908     unique_ptr<SAnnotSelector> local_sel;
1909     if ( deeper && adaptive_flags ) {
1910         m_CollectAnnotTypes &= m_UnseenAnnotTypes;
1911         deeper = m_CollectAnnotTypes.any();
1912         if ( deeper && (adaptive_flags & SAnnotSelector::fAdaptive_ByNamedAcc)) {
1913             only_named_annot_accs = selector.HasIncludedOnlyNamedAnnotAccessions();
1914         }
1915         if ( deeper && only_named_annot_accs && x_FoundAllNamedAnnotAccessions(local_sel) ) {
1916             deeper = false;
1917         }
1918     }
1919     if ( deeper ) {
1920         deeper = bh.GetSeqMap().HasSegmentOfType(CSeqMap::eSeqRef);
1921     }
1922 
1923     int last_depth = 0;
1924     if ( deeper ) {
1925         CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
1926         master_loc_empty->
1927             SetEmpty(const_cast<CSeq_id&>(*master_id.GetSeqId()));
1928         for ( int level = 1; level <= depth && deeper; ++level ) {
1929             last_depth = level;
1930             // segments
1931             if ( adaptive_flags || !exact_depth || depth == level ) {
1932                 deeper = x_SearchSegments(bh, master_id, master_range,
1933                                           *master_loc_empty, level);
1934                 if ( deeper ) {
1935                     deeper = !x_NoMoreObjects();
1936                 }
1937             }
1938             if ( deeper ) {
1939                 deeper = depth > level;
1940             }
1941             if ( deeper && adaptive_flags ) {
1942                 m_CollectAnnotTypes &= m_UnseenAnnotTypes;
1943                 deeper = m_CollectAnnotTypes.any();
1944                 if ( deeper && only_named_annot_accs && x_FoundAllNamedAnnotAccessions(local_sel) ) {
1945                     deeper = false;
1946                 }
1947             }
1948         }
1949     }
1950 
1951     x_AddPostMappings();
1952     if ( m_MappingCollector.get() ) {
1953         // need full conversion set
1954         if ( kTraceFullCvt ) {
1955             LOG_POST("Need full conversion set for "<<
1956                      m_MappingCollector->m_AnnotMappingSet.size()<<" annots");
1957         }
1958         CSeq_loc_Conversion_Set cvt_set(m_Scope);
1959         CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
1960         master_loc_empty->
1961             SetEmpty(const_cast<CSeq_id&>(*master_id.GetSeqId()));
1962         for ( int level = 1; level <= last_depth; ++level ) {
1963             // segments
1964             if ( adaptive_flags || !exact_depth || depth == level ) {
1965                 x_CollectSegments(bh, master_id, master_range,
1966                                   *master_loc_empty, level, cvt_set);
1967             }
1968         }
1969         x_AddPostMappingsCvt(cvt_set);
1970     }
1971     x_Sort();
1972 }
1973 
1974 
x_Initialize(const SAnnotSelector & selector,const CHandleRangeMap & master_loc)1975 void CAnnot_Collector::x_Initialize(const SAnnotSelector& selector,
1976                                     const CHandleRangeMap& master_loc)
1977 {
1978     CScope_Impl::TConfReadLockGuard guard(m_Scope->m_ConfLock);
1979     x_Initialize0(selector);
1980 
1981     int depth = selector.GetResolveDepth();
1982     bool depth_is_set = depth >= 0 && depth < kMax_Int;
1983     bool exact_depth = selector.GetExactDepth() && depth_is_set;
1984     int adaptive_flags = exact_depth? 0: selector.GetAdaptiveDepthFlags();
1985     adaptive_flags &=
1986         SAnnotSelector::fAdaptive_ByTriggers |
1987         SAnnotSelector::fAdaptive_BySubtypes;
1988 
1989     // main sequence
1990     bool deeper = true;
1991     if ( adaptive_flags || !exact_depth || depth == 0 ) {
1992         x_SearchLoc(master_loc, 0, 0, true);
1993         deeper = !x_NoMoreObjects();
1994     }
1995     if ( deeper ) {
1996         deeper = depth > 0 &&
1997             selector.GetResolveMethod() != selector.eResolve_None;
1998     }
1999     if ( deeper && adaptive_flags ) {
2000         m_CollectAnnotTypes &= m_UnseenAnnotTypes;
2001         deeper = m_CollectAnnotTypes.any();
2002     }
2003 
2004     int last_depth = 0;
2005     if ( deeper ) {
2006         for ( int level = 1; level <= depth && deeper; ++level ) {
2007             last_depth = level;
2008             // segments
2009             if ( adaptive_flags || !exact_depth || depth == level ) {
2010                 deeper = x_SearchSegments(master_loc, level);
2011                 if ( deeper ) {
2012                     deeper = !x_NoMoreObjects();
2013                 }
2014             }
2015             if ( deeper ) {
2016                 deeper = depth > level;
2017             }
2018             if ( deeper && adaptive_flags ) {
2019                 m_CollectAnnotTypes &= m_UnseenAnnotTypes;
2020                 deeper = m_CollectAnnotTypes.any();
2021             }
2022         }
2023     }
2024 
2025     x_AddPostMappings();
2026     if ( m_MappingCollector.get() ) {
2027         // need full conversion set
2028         if ( kTraceFullCvt ) {
2029             LOG_POST("Need full conversion set for "<<
2030                      m_MappingCollector->m_AnnotMappingSet.size()<<" annots");
2031         }
2032         CSeq_loc_Conversion_Set cvt_set(m_Scope);
2033         for ( int level = 1; level <= last_depth; ++level ) {
2034             // segments
2035             if ( adaptive_flags || !exact_depth || depth == level ) {
2036                 x_CollectSegments(master_loc, level, cvt_set);
2037             }
2038         }
2039         x_AddPostMappingsCvt(cvt_set);
2040     }
2041     x_Sort();
2042 }
2043 
2044 
x_CheckAdaptive(const CBioseq_Handle & bh) const2045 bool CAnnot_Collector::x_CheckAdaptive(const CBioseq_Handle& bh) const
2046 {
2047     int adaptive_flags = GetSelector().GetAdaptiveDepthFlags();
2048     if ( !(adaptive_flags & (SAnnotSelector::fAdaptive_ByTriggers |
2049                              SAnnotSelector::fAdaptive_BySubtypes)) ) {
2050         // no heuristics
2051         return false;
2052     }
2053     if ( !(adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy) ) {
2054         // heuristics only
2055         return true;
2056     }
2057     // both policy and heuristics are active
2058     // use heuristics only if there is no policy information on sequence
2059     return bh && bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_default;
2060 }
2061 
2062 
x_CheckAdaptive(const CSeq_id_Handle & id) const2063 bool CAnnot_Collector::x_CheckAdaptive(const CSeq_id_Handle& id) const
2064 {
2065     int adaptive_flags = GetSelector().GetAdaptiveDepthFlags();
2066     if ( !(adaptive_flags & (SAnnotSelector::fAdaptive_ByTriggers |
2067                              SAnnotSelector::fAdaptive_BySubtypes)) ) {
2068         // no heuristics
2069         return false;
2070     }
2071     if ( !(adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy) ) {
2072         // heuristics only
2073         return true;
2074     }
2075     // both policy and heuristics are active
2076     // use heuristics only if there is no policy information on sequence
2077     CBioseq_Handle bh = x_GetBioseqHandle(id);
2078     return bh && bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_default;
2079 }
2080 
2081 
x_SearchMaster(const CBioseq_Handle & bh,const CSeq_id_Handle & master_id,const CHandleRange & master_range)2082 void CAnnot_Collector::x_SearchMaster(const CBioseq_Handle& bh,
2083                                       const CSeq_id_Handle& master_id,
2084                                       const CHandleRange& master_range)
2085 {
2086     bool check_adaptive = x_CheckAdaptive(bh);
2087     if ( m_Selector->m_LimitObjectType == SAnnotSelector::eLimit_None ) {
2088         // any data source
2089         const CTSE_Handle& tse = bh.GetTSE_Handle();
2090         m_FromOtherTSE = false;
2091         if ( m_Selector->m_ExcludeExternal ) {
2092             const CTSE_Info& tse_info = tse.x_GetTSE_Info();
2093             tse_info.UpdateAnnotIndex();
2094             if ( tse_info.HasMatchingAnnotIds() ) {
2095                 CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(bh);
2096                 ITERATE(CSynonymsSet, syn_it, *syns) {
2097                     x_SearchTSE(tse, syns->GetSeq_id_Handle(syn_it),
2098                                 master_range, 0, check_adaptive);
2099                     if ( x_NoMoreObjects() ) {
2100                         break;
2101                     }
2102                 }
2103             }
2104             else {
2105                 const CBioseq_Handle::TId& syns = bh.GetId();
2106                 bool only_gi = tse_info.OnlyGiAnnotIds();
2107                 ITERATE ( CBioseq_Handle::TId, syn_it, syns ) {
2108                     if ( !only_gi || syn_it->IsGi() ) {
2109                         x_SearchTSE(tse, *syn_it,
2110                                     master_range, 0, check_adaptive);
2111                         if ( x_NoMoreObjects() ) {
2112                             break;
2113                         }
2114                     }
2115                 }
2116             }
2117         }
2118         else {
2119             CScope_Impl::TTSE_LockMatchSet tse_map;
2120             if ( m_Selector->IsIncludedAnyNamedAnnotAccession() ) {
2121                 m_Scope->GetTSESetWithAnnots(bh, tse_map, *m_Selector);
2122             }
2123             else {
2124                 m_Scope->GetTSESetWithAnnots(bh, tse_map);
2125             }
2126             ITERATE (CScope_Impl::TTSE_LockMatchSet, tse_it, tse_map) {
2127                 m_FromOtherTSE = tse_it->first != bh.GetTSE_Handle();
2128                 tse.AddUsedTSE(tse_it->first);
2129                 x_SearchTSE(tse_it->first, tse_it->second,
2130                             master_range, 0, check_adaptive);
2131                 if ( x_NoMoreObjects() ) {
2132                     break;
2133                 }
2134             }
2135         }
2136     }
2137     else {
2138         // Search in the limit objects
2139         CConstRef<CSynonymsSet> syns;
2140         bool syns_initialized = false;
2141         ITERATE ( TTSE_LockMap, tse_it, m_TSE_LockMap ) {
2142             const CTSE_Info& tse_info = *tse_it->first;
2143             m_FromOtherTSE = tse_it->second != bh.GetTSE_Handle();
2144             tse_info.UpdateAnnotIndex();
2145             if ( tse_info.HasMatchingAnnotIds() ) {
2146                 if ( !syns_initialized ) {
2147                     syns = m_Scope->GetSynonyms(bh);
2148                     syns_initialized = true;
2149                 }
2150                 if ( !syns ) {
2151                     x_SearchTSE(tse_it->second, master_id,
2152                                 master_range, 0, check_adaptive);
2153                 }
2154                 else {
2155                     ITERATE(CSynonymsSet, syn_it, *syns) {
2156                         x_SearchTSE(tse_it->second,
2157                                     syns->GetSeq_id_Handle(syn_it),
2158                                     master_range, 0, check_adaptive);
2159                         if ( x_NoMoreObjects() ) {
2160                             break;
2161                         }
2162                     }
2163                 }
2164             }
2165             else {
2166                 const CBioseq_Handle::TId& syns_id = bh.GetId();
2167                 bool only_gi = tse_info.OnlyGiAnnotIds();
2168                 ITERATE ( CBioseq_Handle::TId, syn_it, syns_id ) {
2169                     if ( !only_gi || syn_it->IsGi() ) {
2170                         x_SearchTSE(tse_it->second, *syn_it,
2171                                     master_range, 0, check_adaptive);
2172                         if ( x_NoMoreObjects() ) {
2173                             break;
2174                         }
2175                     }
2176                 }
2177             }
2178             if ( x_NoMoreObjects() ) {
2179                 break;
2180             }
2181         }
2182     }
2183 }
2184 
2185 
x_CollectSegments(const CBioseq_Handle & bh,const CSeq_id_Handle & master_id,const CHandleRange & master_range,CSeq_loc & master_loc_empty,int level,CSeq_loc_Conversion_Set & cvt_set)2186 void CAnnot_Collector::x_CollectSegments(const CBioseq_Handle& bh,
2187                                          const CSeq_id_Handle& master_id,
2188                                          const CHandleRange& master_range,
2189                                          CSeq_loc& master_loc_empty,
2190                                          int level,
2191                                          CSeq_loc_Conversion_Set& cvt_set)
2192 {
2193     // CSeqMap_CI must be the same as in x_SearchSegments
2194     _ASSERT(m_Selector->m_ResolveMethod != m_Selector->eResolve_None);
2195     CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2196     if ( m_Selector->m_UnresolvedFlag != SAnnotSelector::eFailUnresolved ) {
2197         flags |= CSeqMap::fIgnoreUnresolved;
2198     }
2199     SSeqMapSelector sel(flags, level-1);
2200     if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2201         sel.SetLimitTSE(bh.GetTSE_Handle());
2202     }
2203 
2204     int depth = m_Selector->GetResolveDepth();
2205     bool depth_is_set = depth >= 0 && depth < kMax_Int;
2206     bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2207     int adaptive_flags = exact_depth? 0: m_Selector->GetAdaptiveDepthFlags();
2208     if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2209         sel.SetByFeaturePolicy();
2210     }
2211     if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2212         sel.SetBySequenceClass();
2213     }
2214 
2215     const CRange<TSeqPos>& range = master_range.begin()->first;
2216     for ( CSeqMap_CI smit(bh, sel, range);
2217           smit && smit.GetPosition() < range.GetToOpen();
2218           ++smit ) {
2219         _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2220         if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2221             // External bioseq, try to search if limit is set
2222             if ( m_Selector->m_UnresolvedFlag !=
2223                  SAnnotSelector::eSearchUnresolved  ||
2224                  !m_Selector->m_LimitObject ) {
2225                 // Do not try to search on external segments
2226                 continue;
2227             }
2228         }
2229 
2230         x_CollectMapped(smit, master_loc_empty, master_id, master_range,
2231                         cvt_set);
2232     }
2233 }
2234 
2235 
x_SearchSegments(const CBioseq_Handle & bh,const CSeq_id_Handle & master_id,const CHandleRange & master_range,CSeq_loc & master_loc_empty,int level)2236 bool CAnnot_Collector::x_SearchSegments(const CBioseq_Handle& bh,
2237                                         const CSeq_id_Handle& master_id,
2238                                         const CHandleRange& master_range,
2239                                         CSeq_loc& master_loc_empty,
2240                                         int level)
2241 {
2242     _ASSERT(m_Selector->m_ResolveMethod != m_Selector->eResolve_None);
2243     CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2244     if ( m_Selector->m_UnresolvedFlag != SAnnotSelector::eFailUnresolved ) {
2245         flags |= CSeqMap::fIgnoreUnresolved;
2246     }
2247     SSeqMapSelector sel(flags, level-1);
2248     if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2249         sel.SetLimitTSE(bh.GetTSE_Handle());
2250     }
2251 
2252     int depth = m_Selector->GetResolveDepth();
2253     bool depth_is_set = depth >= 0 && depth < kMax_Int;
2254     bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2255     int adaptive_flags = exact_depth? 0: m_Selector->GetAdaptiveDepthFlags();
2256     if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2257         sel.SetByFeaturePolicy();
2258     }
2259     if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2260         sel.SetBySequenceClass();
2261     }
2262 
2263     bool has_more = false;
2264     const CRange<TSeqPos>& range = master_range.begin()->first;
2265     for ( CSeqMap_CI smit(bh, sel, range);
2266           smit && smit.GetPosition() < range.GetToOpen();
2267           ++smit ) {
2268         _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2269         if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2270             // External bioseq, try to search if limit is set
2271             if ( m_Selector->m_UnresolvedFlag !=
2272                  SAnnotSelector::eSearchUnresolved  ||
2273                  !m_Selector->m_LimitObject ) {
2274                 // Do not try to search on external segments
2275                 continue;
2276             }
2277         }
2278 
2279         has_more = true;
2280         x_SearchMapped(smit, master_loc_empty, master_id, master_range);
2281 
2282         if ( x_NoMoreObjects() ) {
2283             return has_more;
2284         }
2285     }
2286     return has_more;
2287 }
2288 
2289 
2290 static
sx_GetFlag(const SAnnotSelector & selector)2291 CScope::EGetBioseqFlag sx_GetFlag(const SAnnotSelector& selector)
2292 {
2293     switch (selector.GetResolveMethod()) {
2294     case SAnnotSelector::eResolve_All:
2295         return CScope::eGetBioseq_All;
2296     default:
2297         // Do not load new TSEs
2298         return CScope::eGetBioseq_Loaded;
2299     }
2300 }
2301 
2302 
x_GetBioseqHandle(const CSeq_id_Handle & id,bool top_level) const2303 CBioseq_Handle CAnnot_Collector::x_GetBioseqHandle(const CSeq_id_Handle& id,
2304                                                    bool top_level) const
2305 {
2306     CScope::EGetBioseqFlag flag =
2307         top_level? CScope::eGetBioseq_All: sx_GetFlag(GetSelector());
2308     return m_Scope->GetBioseqHandle(id, flag);
2309 }
2310 
2311 
x_CollectSegments(const CHandleRangeMap & master_loc,int level,CSeq_loc_Conversion_Set & cvt_set)2312 void CAnnot_Collector::x_CollectSegments(const CHandleRangeMap& master_loc,
2313                                          int level,
2314                                          CSeq_loc_Conversion_Set& cvt_set)
2315 {
2316     ITERATE ( CHandleRangeMap::TLocMap, idit, master_loc.GetMap() ) {
2317         CBioseq_Handle bh = x_GetBioseqHandle(idit->first);
2318         if ( !bh ) {
2319             if (m_Selector->m_UnresolvedFlag == SAnnotSelector::eFailUnresolved) {
2320                 // resolve by Seq-id only
2321                 NCBI_THROW(CAnnotException, eFindFailed,
2322                            "Cannot resolve master id");
2323             }
2324             // skip unresolvable IDs
2325             continue;
2326         }
2327 
2328         if ( !bh.GetSeqMap().HasSegmentOfType(CSeqMap::eSeqRef) ) {
2329             continue;
2330         }
2331 
2332         CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
2333         master_loc_empty->SetEmpty(
2334             const_cast<CSeq_id&>(*idit->first.GetSeqId()));
2335 
2336         CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2337         if ( m_Selector->m_UnresolvedFlag != m_Selector->eFailUnresolved ) {
2338             flags |= CSeqMap::fIgnoreUnresolved;
2339         }
2340 
2341         SSeqMapSelector sel(flags, level-1);
2342         if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2343             sel.SetLimitTSE(bh.GetTSE_Handle());
2344         }
2345 
2346         int depth = m_Selector->GetResolveDepth();
2347         bool depth_is_set = depth >= 0 && depth < kMax_Int;
2348         bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2349         int adaptive_flags = exact_depth?0:m_Selector->GetAdaptiveDepthFlags();
2350         if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2351             sel.SetByFeaturePolicy();
2352         }
2353         if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2354             sel.SetBySequenceClass();
2355         }
2356 
2357         CHandleRange::TRange range = idit->second.GetOverlappingRange();
2358         for ( CSeqMap_CI smit(bh, sel, range);
2359               smit && smit.GetPosition() < range.GetToOpen();
2360               ++smit ) {
2361             _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2362             if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2363                 // External bioseq, try to search if limit is set
2364                 if ( m_Selector->m_UnresolvedFlag !=
2365                      SAnnotSelector::eSearchUnresolved  ||
2366                      !m_Selector->m_LimitObject ) {
2367                     // Do not try to search on external segments
2368                     continue;
2369                 }
2370             }
2371 
2372             x_CollectMapped(smit, *master_loc_empty, idit->first, idit->second,
2373                             cvt_set);
2374         }
2375     }
2376 }
2377 
x_SearchSegments(const CHandleRangeMap & master_loc,int level)2378 bool CAnnot_Collector::x_SearchSegments(const CHandleRangeMap& master_loc,
2379                                         int level)
2380 {
2381     bool has_more = false;
2382     ITERATE ( CHandleRangeMap::TLocMap, idit, master_loc.GetMap() ) {
2383         CBioseq_Handle bh = x_GetBioseqHandle(idit->first);
2384         if ( !bh ) {
2385             if (m_Selector->m_UnresolvedFlag == SAnnotSelector::eFailUnresolved) {
2386                 // resolve by Seq-id only
2387                 NCBI_THROW(CAnnotException, eFindFailed,
2388                            "Cannot resolve master id");
2389             }
2390             // skip unresolvable IDs
2391             continue;
2392         }
2393         else if ( m_Selector->GetAdaptiveDepthFlags() & SAnnotSelector::fAdaptive_ByPolicy &&
2394                   bh.GetFeatureFetchPolicy() == bh.eFeatureFetchPolicy_only_near ) {
2395             // skip going deeper because of top-level interval policy
2396             continue;
2397         }
2398 
2399         if ( !bh.GetSeqMap().HasSegmentOfType(CSeqMap::eSeqRef) ) {
2400             continue;
2401         }
2402 
2403         CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
2404         master_loc_empty->SetEmpty(
2405             const_cast<CSeq_id&>(*idit->first.GetSeqId()));
2406 
2407         CSeqMap::TFlags flags = CSeqMap::fFindRef | CSeqMap::fFindExactLevel;
2408         if ( m_Selector->m_UnresolvedFlag != m_Selector->eFailUnresolved ) {
2409             flags |= CSeqMap::fIgnoreUnresolved;
2410         }
2411 
2412         SSeqMapSelector sel(flags, level-1);
2413         if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
2414             sel.SetLimitTSE(bh.GetTSE_Handle());
2415         }
2416 
2417         int depth = m_Selector->GetResolveDepth();
2418         bool depth_is_set = depth >= 0 && depth < kMax_Int;
2419         bool exact_depth = m_Selector->GetExactDepth() && depth_is_set;
2420         int adaptive_flags = exact_depth?0:m_Selector->GetAdaptiveDepthFlags();
2421         if ( adaptive_flags & SAnnotSelector::fAdaptive_ByPolicy ) {
2422             sel.SetByFeaturePolicy();
2423         }
2424         if ( adaptive_flags & SAnnotSelector::fAdaptive_BySeqClass) {
2425             sel.SetBySequenceClass();
2426         }
2427 
2428         CHandleRange::TRange range = idit->second.GetOverlappingRange();
2429         for ( CSeqMap_CI smit(bh, sel, range);
2430               smit && smit.GetPosition() < range.GetToOpen();
2431               ++smit ) {
2432             _ASSERT(smit.GetType() == CSeqMap::eSeqRef);
2433             if ( !CanResolveId(smit.GetRefSeqid(), bh) ) {
2434                 // External bioseq, try to search if limit is set
2435                 if ( m_Selector->m_UnresolvedFlag !=
2436                      SAnnotSelector::eSearchUnresolved  ||
2437                      !m_Selector->m_LimitObject ) {
2438                     // Do not try to search on external segments
2439                     continue;
2440                 }
2441             }
2442 
2443             has_more = true;
2444             x_SearchMapped(smit, *master_loc_empty, idit->first, idit->second);
2445 
2446             if ( x_NoMoreObjects() ) {
2447                 return has_more;
2448             }
2449         }
2450     }
2451     return has_more;
2452 }
2453 
x_AddTSE(const CTSE_Handle & tse)2454 void CAnnot_Collector::x_AddTSE(const CTSE_Handle& tse)
2455 {
2456     const CTSE_Info* key = &tse.x_GetTSE_Info();
2457     _ASSERT(key);
2458     TTSE_LockMap::iterator iter = m_TSE_LockMap.lower_bound(key);
2459     if ( iter == m_TSE_LockMap.end() || iter->first != key ) {
2460         iter = m_TSE_LockMap.insert(iter, TTSE_LockMap::value_type(key, tse));
2461     }
2462     _ASSERT(iter != m_TSE_LockMap.end());
2463     _ASSERT(iter->first == key);
2464     _ASSERT(iter->second == tse);
2465 }
2466 
2467 
2468 
2469 struct SLessByInfo
2470 {
operator ()SLessByInfo2471     bool operator()(const CSeq_annot_Handle& a,
2472                     const CSeq_annot_Handle& b) const
2473         {
2474             return &a.x_GetInfo() < &b.x_GetInfo();
2475         }
operator ()SLessByInfo2476     bool operator()(const CSeq_annot_Handle& a,
2477                     const CSeq_annot_Info* b) const
2478         {
2479             return &a.x_GetInfo() < b;
2480         }
operator ()SLessByInfo2481     bool operator()(const CSeq_annot_Info* a,
2482                     const CSeq_annot_Handle& b) const
2483         {
2484             return a < &b.x_GetInfo();
2485         }
operator ()SLessByInfo2486     bool operator()(const CSeq_annot_Info* a,
2487                     const CSeq_annot_Info* b) const
2488         {
2489             return a < b;
2490         }
2491 };
2492 
2493 
x_AddObject(CAnnotObject_Ref & ref)2494 void CAnnot_Collector::x_AddObject(CAnnotObject_Ref& ref)
2495 {
2496     ref.SetFromOtherTSE(m_FromOtherTSE);
2497     m_AnnotSet.push_back(ref);
2498 }
2499 
2500 
x_AddObject(CAnnotObject_Ref & object_ref,CSeq_loc_Conversion * cvt,unsigned int loc_index)2501 void CAnnot_Collector::x_AddObject(CAnnotObject_Ref&    object_ref,
2502                                    CSeq_loc_Conversion* cvt,
2503                                    unsigned int         loc_index)
2504 {
2505     // Always map aligns through conv. set
2506     if ( (cvt && cvt->IsPartial()) || object_ref.IsAlign() ) {
2507         x_AddObjectMapping(object_ref, cvt, loc_index);
2508     }
2509     else {
2510         x_AddObject(object_ref);
2511     }
2512 }
2513 
2514 
x_AddPostMappings(void)2515 void CAnnot_Collector::x_AddPostMappings(void)
2516 {
2517     if ( !m_MappingCollector.get() ) {
2518         return;
2519     }
2520     CSeq_loc_Conversion::ELocationType loctype =
2521         (m_Selector->m_FeatProduct ?
2522          CSeq_loc_Conversion::eProduct :
2523          CSeq_loc_Conversion::eLocation);
2524     vector<CAnnotObject_Ref> partial_refs;
2525     ERASE_ITERATE ( CAnnotMappingCollector::TAnnotMappingSet, amit,
2526                     m_MappingCollector->m_AnnotMappingSet ) {
2527         CAnnotObject_Ref annot_ref = amit->first;
2528         if ( !amit->second ) {
2529             // no actual mapping, just filtering duplicates
2530             x_AddObject(annot_ref);
2531         }
2532         else {
2533             amit->second->Convert(annot_ref, loctype);
2534             if ( amit->second->IsPartial() &&
2535                  amit->second->HasUnconvertedId() ) {
2536                 // conversion is not complete
2537                 // keep the annotation for further conversion
2538                 continue;
2539             }
2540             if ( annot_ref.IsAlign() ||
2541                  !annot_ref.GetMappingInfo().GetTotalRange().Empty() ) {
2542                 x_AddObject(annot_ref);
2543             }
2544         }
2545         m_MappingCollector->m_AnnotMappingSet.erase(amit);
2546     }
2547     if ( m_MappingCollector->m_AnnotMappingSet.empty() ) {
2548         m_MappingCollector.reset();
2549     }
2550 }
2551 
2552 
2553 CConstRef<CSerialObject>
x_GetMappedObject(const CAnnotObject_Ref & obj)2554 CAnnot_Collector::x_GetMappedObject(const CAnnotObject_Ref& obj)
2555 {
2556     CConstRef<CSerialObject> ret;
2557     if ( obj.IsFeat() ) {
2558         CMappedFeat feat;
2559         feat.Set(*this, obj);
2560         ret = feat.GetSeq_feat();
2561     }
2562     else if ( obj.IsGraph() ) {
2563         CMappedGraph graph;
2564         graph.Set(*this, obj);
2565         ret = &graph.GetMappedGraph();
2566     }
2567     else if ( obj.IsAlign() ) {
2568     }
2569     return ret;
2570 }
2571 
2572 
x_AddPostMappingsCvt(CSeq_loc_Conversion_Set & cvt)2573 void CAnnot_Collector::x_AddPostMappingsCvt(CSeq_loc_Conversion_Set& cvt)
2574 {
2575     if ( !m_MappingCollector.get() ) {
2576         return;
2577     }
2578     CSeq_loc_Conversion::ELocationType loctype =
2579         (m_Selector->m_FeatProduct ?
2580          CSeq_loc_Conversion::eProduct :
2581          CSeq_loc_Conversion::eLocation);
2582     ITERATE ( CAnnotMappingCollector::TAnnotMappingSet, amit,
2583               m_MappingCollector->m_AnnotMappingSet ) {
2584         CAnnotObject_Ref annot_ref = amit->first;
2585         if ( kTraceFullCvt ) {
2586             amit->second.GetNCObject().Convert(annot_ref, loctype);
2587             LOG_POST("Full conversion, was: "<<
2588                      MSerial_AsnText<<*x_GetMappedObject(annot_ref));
2589         }
2590         cvt.Convert(annot_ref, loctype);
2591         if ( kTraceFullCvt ) {
2592             LOG_POST("Full conversion, now: "<<
2593                      MSerial_AsnText<<*x_GetMappedObject(annot_ref));
2594         }
2595         if ( annot_ref.IsAlign() ||
2596              !annot_ref.GetMappingInfo().GetTotalRange().Empty() ) {
2597             x_AddObject(annot_ref);
2598         }
2599     }
2600     m_MappingCollector.reset();
2601 }
2602 
2603 
x_Initialize(const SAnnotSelector & selector)2604 void CAnnot_Collector::x_Initialize(const SAnnotSelector& selector)
2605 {
2606     CScope_Impl::TConfReadLockGuard guard(m_Scope->m_ConfLock);
2607     x_Initialize0(selector);
2608     // Limit must be set, resolving is obsolete
2609     _ASSERT(m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None);
2610     _ASSERT(m_Selector->m_LimitObject);
2611     _ASSERT(m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_None);
2612     x_SearchAll();
2613     x_Sort();
2614 }
2615 
2616 
x_Sort(void)2617 void CAnnot_Collector::x_Sort(void)
2618 {
2619     //CStopWatch sw(CStopWatch::eStart);
2620     _ASSERT(!m_MappingCollector.get());
2621 
2622     // Prepare id/range information for sorting.
2623     if (m_Selector->GetAnnotType() == CSeq_annot::C_Data::e_Ftable  &&
2624         m_Selector->m_LimitObjectType == SAnnotSelector::eLimit_Seq_annot_Info) {
2625         ITERATE(TAnnotSet, it, m_AnnotSet) {
2626             CRef<CIdRangeMap> id_rg_map(new CIdRangeMap(*it, *m_Selector));
2627             it->GetMappingInfo().SetIdRangeMap(*id_rg_map);
2628         }
2629     }
2630 
2631     switch ( m_Selector->m_SortOrder ) {
2632     case SAnnotSelector::eSortOrder_Normal:
2633         gfx::timsort(m_AnnotSet.begin(), m_AnnotSet.end(),
2634                      CAnnotObject_Less(m_Selector, m_Scope));
2635         break;
2636     case SAnnotSelector::eSortOrder_Reverse:
2637         gfx::timsort(m_AnnotSet.begin(), m_AnnotSet.end(),
2638                      CAnnotObject_LessReverse(m_Selector, m_Scope));
2639         break;
2640     default:
2641         // do nothing
2642         break;
2643     }
2644     //LOG_POST(Info<<"Sorted in "<<sw.Elapsed());
2645 }
2646 
2647 
2648 bool
x_MatchLimitObject(const CAnnotObject_Info & object) const2649 CAnnot_Collector::x_MatchLimitObject(const CAnnotObject_Info& object) const
2650 {
2651     if ( m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None ) {
2652         const CObject* limit = &*m_Selector->m_LimitObject;
2653         switch ( m_Selector->m_LimitObjectType ) {
2654         case SAnnotSelector::eLimit_TSE_Info:
2655         {{
2656             const CTSE_Info* info = &object.GetTSE_Info();
2657             _ASSERT(info);
2658             return info == limit;
2659         }}
2660         case SAnnotSelector::eLimit_Seq_entry_Info:
2661         {{
2662             const CSeq_entry_Info* info = &object.GetSeq_entry_Info();
2663             _ASSERT(info);
2664             for ( ;; ) {
2665                 if ( info == limit ) {
2666                     return true;
2667                 }
2668                 if ( !info->HasParent_Info() ) {
2669                     return false;
2670                 }
2671                 info = &info->GetParentSeq_entry_Info();
2672             }
2673         }}
2674         case SAnnotSelector::eLimit_Seq_annot_Info:
2675         {{
2676             const CSeq_annot_Info* info = &object.GetSeq_annot_Info();
2677             _ASSERT(info);
2678             return info == limit;
2679         }}
2680         default:
2681             NCBI_THROW(CAnnotException, eLimitError,
2682                        "CAnnot_Collector::x_MatchLimitObject: invalid mode");
2683         }
2684     }
2685     return true;
2686 }
2687 
2688 
x_MatchLocIndex(const SAnnotObject_Index & index) const2689 bool CAnnot_Collector::x_MatchLocIndex(const SAnnotObject_Index& index) const
2690 {
2691     return index.m_AnnotObject_Info->IsAlign()  ||
2692         m_Selector->m_FeatProduct == (index.m_AnnotLocationIndex == 1);
2693 }
2694 
2695 
x_MatchRange(const CHandleRange & hr,const CRange<TSeqPos> & range,const SAnnotObject_Index & index) const2696 bool CAnnot_Collector::x_MatchRange(const CHandleRange&       hr,
2697                                     const CRange<TSeqPos>&    range,
2698                                     const SAnnotObject_Index& index) const
2699 {
2700     if ( m_Selector->m_OverlapType == SAnnotSelector::eOverlap_Intervals ) {
2701         if ( index.m_HandleRange ) {
2702             if (m_Selector->m_IgnoreStrand) {
2703                 if ( !hr.IntersectingWith_NoStrand(*index.m_HandleRange) ) {
2704                     return false;
2705                 }
2706             }
2707             else {
2708                 if ( !hr.IntersectingWith(*index.m_HandleRange) ) {
2709                     return false;
2710                 }
2711             }
2712         }
2713         else {
2714             ENa_strand strand;
2715             if (m_Selector->m_IgnoreStrand) {
2716                 strand = eNa_strand_unknown;
2717             }
2718             else {
2719                 switch ( index.m_Flags & SAnnotObject_Index::fStrand_both ) {
2720                 case SAnnotObject_Index::fStrand_plus:
2721                     strand = eNa_strand_plus;
2722                     break;
2723                 case SAnnotObject_Index::fStrand_minus:
2724                     strand = eNa_strand_minus;
2725                     break;
2726                 default:
2727                     strand = eNa_strand_unknown;
2728                     break;
2729                 }
2730             }
2731             if ( !hr.IntersectingWith(range, strand) ) {
2732                 return false;
2733             }
2734         }
2735     }
2736     else {
2737         if ( !m_Selector->m_IgnoreStrand  &&
2738             (hr.GetStrandsFlag() & index.m_Flags) == 0 ) {
2739             return false; // different strands
2740         }
2741     }
2742     if ( !x_MatchLocIndex(index) ) {
2743         return false;
2744     }
2745     return true;
2746 }
2747 
2748 
x_GetTSE_Info(void)2749 void CAnnot_Collector::x_GetTSE_Info(void)
2750 {
2751     // only one TSE is needed
2752     _ASSERT(m_TSE_LockMap.empty());
2753     _ASSERT(m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None);
2754     _ASSERT(m_Selector->m_LimitObject);
2755 
2756     switch ( m_Selector->m_LimitObjectType ) {
2757     case SAnnotSelector::eLimit_TSE_Info:
2758     {
2759         _ASSERT(m_Selector->m_LimitTSE);
2760         _ASSERT(CTypeConverter<CTSE_Info>::
2761                 SafeCast(&*m_Selector->m_LimitObject));
2762         break;
2763     }
2764     case SAnnotSelector::eLimit_Seq_entry_Info:
2765     {
2766         _ASSERT(m_Selector->m_LimitTSE);
2767         _ASSERT(CTypeConverter<CSeq_entry_Info>::
2768                 SafeCast(&*m_Selector->m_LimitObject));
2769         break;
2770     }
2771     case SAnnotSelector::eLimit_Seq_annot_Info:
2772     {
2773         _ASSERT(m_Selector->m_LimitTSE);
2774         _ASSERT(CTypeConverter<CSeq_annot_Info>::
2775                 SafeCast(&*m_Selector->m_LimitObject));
2776         break;
2777     }
2778     default:
2779         NCBI_THROW(CAnnotException, eLimitError,
2780                    "CAnnot_Collector::x_GetTSE_Info: invalid mode");
2781     }
2782     _ASSERT(m_Selector->m_LimitObject);
2783     _ASSERT(m_Selector->m_LimitTSE);
2784     x_AddTSE(m_Selector->m_LimitTSE);
2785 }
2786 
2787 
x_SearchTSE(const CTSE_Handle & tseh,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt,bool check_adaptive)2788 bool CAnnot_Collector::x_SearchTSE(const CTSE_Handle&    tseh,
2789                                    const CSeq_id_Handle& id,
2790                                    const CHandleRange&   hr,
2791                                    CSeq_loc_Conversion*  cvt,
2792                                    bool check_adaptive)
2793 {
2794     if ( !m_Selector->m_SourceLoc ) {
2795         return x_SearchTSE2(tseh, id, hr, cvt, check_adaptive);
2796     }
2797     const CHandleRangeMap& src_hrm = *m_Selector->m_SourceLoc;
2798     CHandleRangeMap::const_iterator it = src_hrm.find(id);
2799     if ( it == src_hrm.end() || !hr.IntersectingWithTotalRange(it->second) ) {
2800         // non-overlapping loc
2801         return false;
2802     }
2803     CHandleRange hr2(hr, it->second.GetOverlappingRange());
2804     return !hr2.Empty() && x_SearchTSE2(tseh, id, hr2, cvt, check_adaptive);
2805 }
2806 
2807 
x_SearchTSE2(const CTSE_Handle & tseh,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt,bool check_adaptive)2808 bool CAnnot_Collector::x_SearchTSE2(const CTSE_Handle&    tseh,
2809                                     const CSeq_id_Handle& id,
2810                                     const CHandleRange&   hr,
2811                                     CSeq_loc_Conversion*  cvt,
2812                                     bool check_adaptive)
2813 {
2814     const CTSE_Info& tse = tseh.x_GetTSE_Info();
2815     bool found = false;
2816 
2817     tse.UpdateAnnotIndex(id);
2818     CTSE_Info::TAnnotLockReadGuard guard(tse.GetAnnotLock());
2819 
2820     //CStopWatch sw(CStopWatch::eStart);
2821 
2822     if (cvt) {
2823         cvt->SetSrcId(id);
2824     }
2825     // Skip excluded TSEs
2826     //if ( ExcludedTSE(tse) ) {
2827     //continue;
2828     //}
2829 
2830     SAnnotSelector::TAdaptiveDepthFlags adaptive_flags = 0;
2831     if ( check_adaptive &&
2832          (!m_Selector->GetExactDepth() ||
2833           m_Selector->GetResolveDepth() == kMax_Int) ) {
2834         adaptive_flags = m_Selector->GetAdaptiveDepthFlags();
2835     }
2836     if ( (adaptive_flags & SAnnotSelector::fAdaptive_ByTriggers) &&
2837          m_TriggerTypes.any() &&
2838          tse.ContainsMatchingBioseq(id) ) {
2839         // first check triggers
2840         const SIdAnnotObjs* objs = tse.x_GetUnnamedIdObjects(id);
2841         if ( objs ) {
2842             for ( size_t index = 0, count = objs->x_GetRangeMapCount();
2843                   index < count; ++index ) {
2844                 if ( objs->x_RangeMapIsEmpty(index) ) {
2845                     continue;
2846                 }
2847                 if ( m_TriggerTypes.test(index) ) {
2848                     m_UnseenAnnotTypes.reset();
2849                     found = true;
2850                     // If we have found adaptive depth trigger features
2851                     // it means that sequence is annotated and
2852                     // time/segments limits are no longer active.
2853                     x_StopSearchLimits();
2854                     break;
2855                 }
2856             }
2857         }
2858     }
2859     if ( (adaptive_flags & SAnnotSelector::fAdaptive_BySubtypes) &&
2860          m_UnseenAnnotTypes.any() ) {
2861         ITERATE (CTSE_Info::TNamedAnnotObjs, iter, tse.m_NamedAnnotObjs) {
2862             const SIdAnnotObjs* objs =
2863                 tse.x_GetIdObjects(iter->second, id);
2864             if ( objs ) {
2865                 for ( size_t index = 0, count = objs->x_GetRangeMapCount();
2866                       index < count; ++index ) {
2867                     if ( !objs->x_RangeMapIsEmpty(index) ) {
2868                         m_UnseenAnnotTypes.reset(index);
2869                     }
2870                 }
2871             }
2872         }
2873     }
2874 
2875     if ( m_Selector->HasExplicitAnnotsNames() ) {
2876         // only 'included' annots
2877         ITERATE ( SAnnotSelector::TAnnotsNames, iter, m_Selector->GetIncludedAnnotsNames() ) {
2878             if ( m_Selector->ExcludedAnnotName(*iter) ) {
2879                 // it may happen e.g. when another zoom level is selected
2880                 continue;
2881             }
2882             const SIdAnnotObjs* objs = tse.x_GetIdObjects(*iter, id);
2883             if ( objs ) {
2884                 x_SearchObjects(tseh, objs, guard, *iter, id, hr, cvt);
2885                 if ( x_NoMoreObjects() ) {
2886                     return found;
2887                 }
2888             }
2889         }
2890     }
2891     else {
2892         // all annots, skipping 'excluded'
2893         ITERATE (CTSE_Info::TNamedAnnotObjs, iter, tse.m_NamedAnnotObjs) {
2894             if ( m_Selector->ExcludedAnnotName(iter->first) ) {
2895                 continue;
2896             }
2897             const SIdAnnotObjs* objs = tse.x_GetIdObjects(iter->second, id);
2898             if ( objs ) {
2899                 x_SearchObjects(tseh, objs, guard, iter->first, id, hr, cvt);
2900                 if ( x_NoMoreObjects() ) {
2901                     return found;
2902                 }
2903             }
2904         }
2905     }
2906 
2907     //LOG_POST(Info<<"Collected annots in "<<sw.Elapsed());
2908     return found;
2909 }
2910 
2911 
x_AddObjectMapping(CAnnotObject_Ref & object_ref,CSeq_loc_Conversion * cvt,unsigned int loc_index)2912 void CAnnot_Collector::x_AddObjectMapping(CAnnotObject_Ref&    object_ref,
2913                                           CSeq_loc_Conversion* cvt,
2914                                           unsigned int         loc_index)
2915 {
2916     if ( cvt ) {
2917         // reset current mapping info, it will be updated by conversion set
2918         object_ref.ResetLocation();
2919     }
2920     if ( !m_MappingCollector.get() ) {
2921         m_MappingCollector.reset(new CAnnotMappingCollector);
2922     }
2923     object_ref.SetFromOtherTSE(m_FromOtherTSE);
2924     CRef<CSeq_loc_Conversion_Set>& mapping_set =
2925         m_MappingCollector->m_AnnotMappingSet[object_ref];
2926     if ( cvt ) {
2927         if ( !mapping_set ) {
2928             mapping_set.Reset(new CSeq_loc_Conversion_Set(m_Scope));
2929         }
2930         _ASSERT(cvt->IsPartial() || object_ref.IsAlign());
2931         CRef<CSeq_loc_Conversion> cvt_copy(new CSeq_loc_Conversion(*cvt));
2932         mapping_set->Add(*cvt_copy, loc_index);
2933     }
2934 }
2935 
2936 
sx_IsEmpty(const SAnnotSelector & sel)2937 static bool sx_IsEmpty(const SAnnotSelector& sel)
2938 {
2939     if ( sel.GetAnnotType() != CSeq_annot::C_Data::e_not_set ) {
2940         return false;
2941     }
2942     return true;
2943 }
2944 
2945 
x_SearchObjects(const CTSE_Handle & tseh,const SIdAnnotObjs * objs,CTSE_Info::TAnnotLockReadGuard & guard,const CAnnotName & annot_name,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt)2946 void CAnnot_Collector::x_SearchObjects(const CTSE_Handle&    tseh,
2947                                        const SIdAnnotObjs*   objs,
2948                                        CTSE_Info::TAnnotLockReadGuard& guard,
2949                                        const CAnnotName&     annot_name,
2950                                        const CSeq_id_Handle& id,
2951                                        const CHandleRange&   hr,
2952                                        CSeq_loc_Conversion*  cvt)
2953 {
2954     if ( m_Selector->m_CollectNames ) {
2955         if ( m_AnnotNames->find(annot_name) != m_AnnotNames->end() ) {
2956             // already found
2957             return;
2958         }
2959         if ( sx_IsEmpty(*m_Selector) ) {
2960             // no search for individual annotations
2961             // just remember the name and leave
2962             m_AnnotNames->insert(annot_name);
2963             return;
2964         }
2965     }
2966 
2967     if ( m_CollectAnnotTypes.any() ) {
2968         x_SearchRange(tseh, objs, guard, annot_name, id, hr, cvt);
2969         if ( x_NoMoreObjects() ) {
2970             return;
2971         }
2972     }
2973     if ( m_Selector->m_CollectCostOfLoading ) {
2974         return;
2975     }
2976 
2977     static const size_t kAnnotTypeIndex_SNP =
2978         CAnnotType_Index::GetSubtypeIndex(CSeqFeatData::eSubtype_variation);
2979 
2980     if ( m_CollectAnnotTypes.test(kAnnotTypeIndex_SNP) ) {
2981         if ( m_Selector->m_CollectTypes &&
2982              m_AnnotTypes.test(kAnnotTypeIndex_SNP) ) {
2983             return;
2984         }
2985         CSeq_annot_Handle sah;
2986         CHandleRange::TRange range = hr.GetOverlappingRange();
2987         ITERATE ( CTSE_Info::TSNPSet, snp_annot_it, objs->m_SNPSet ) {
2988             const CSeq_annot_SNP_Info& snp_annot = **snp_annot_it;
2989             CSeq_annot_SNP_Info::const_iterator snp_it =
2990                 snp_annot.FirstIn(range);
2991             if ( snp_it != snp_annot.end() ) {
2992                 x_AddTSE(tseh);
2993                 const CSeq_annot_Info& annot_info =
2994                     snp_annot.GetParentSeq_annot_Info();
2995                 if ( !sah || &sah.x_GetInfo() != &annot_info ) {
2996                     sah.x_Set(annot_info, tseh);
2997                 }
2998 
2999                 do {
3000                     const SSNP_Info& snp = *snp_it;
3001                     if ( snp.NoMore(range) ) {
3002                         break;
3003                     }
3004                     if ( snp.NotThis(range) ) {
3005                         continue;
3006                     }
3007 
3008                     if (m_Selector->m_CollectTypes) {
3009                         m_AnnotTypes.set(kAnnotTypeIndex_SNP);
3010                         break;
3011                     }
3012                     if (m_Selector->m_CollectNames) {
3013                         m_AnnotNames->insert(annot_name);
3014                         break;
3015                     }
3016 
3017                     CAnnotObject_Ref annot_ref(snp_annot, sah, snp, cvt);
3018                     x_AddObject(annot_ref);
3019                     if ( x_NoMoreObjects() ) {
3020                         return;
3021                     }
3022                     if ( m_Selector->m_CollectSeq_annots ) {
3023                         // Ignore multiple SNPs from the same seq-annot
3024                         break;
3025                     }
3026                 } while ( ++snp_it != snp_annot.end() );
3027             }
3028         }
3029     }
3030 }
3031 
3032 
3033 static inline
sx_GeneIsSuppressed(const CSeq_feat & feat)3034 bool sx_GeneIsSuppressed(const CSeq_feat& feat)
3035 {
3036     if ( feat.IsSetXref() ) {
3037         const CSeq_feat::TXref& xrefs = feat.GetXref();
3038         if ( xrefs.size() == 1 ) {
3039             const CSeqFeatXref& xref = *xrefs[0];
3040             if ( xref.IsSetData() ) {
3041                 const CSeqFeatData& data = xref.GetData();
3042                 if ( data.IsGene() ) {
3043                     const CGene_ref& gene = data.GetGene();
3044                     if ( !gene.IsSetLocus() && !gene.IsSetLocus_tag() ) {
3045                         // feature has single empty gene xref
3046                         return true;
3047                     }
3048                 }
3049             }
3050         }
3051     }
3052     return false;
3053 }
3054 
3055 
x_SearchRange(const CTSE_Handle & tseh,const SIdAnnotObjs * objs,CTSE_Info::TAnnotLockReadGuard & guard,const CAnnotName & annot_name,const CSeq_id_Handle & id,const CHandleRange & hr,CSeq_loc_Conversion * cvt)3056 void CAnnot_Collector::x_SearchRange(const CTSE_Handle&    tseh,
3057                                      const SIdAnnotObjs*   objs,
3058                                      CTSE_Info::TAnnotLockReadGuard& guard,
3059                                      const CAnnotName&     annot_name,
3060                                      const CSeq_id_Handle& id,
3061                                      const CHandleRange&   hr,
3062                                      CSeq_loc_Conversion*  cvt)
3063 {
3064     const CTSE_Info& tse = tseh.x_GetTSE_Info();
3065     _ASSERT(objs);
3066 
3067     // CHandleRange::TRange range = hr.GetOverlappingRange();
3068 
3069     x_AddTSE(tseh);
3070     CSeq_annot_Handle sah;
3071 
3072     size_t from_idx = 0;
3073     bool enough = false;
3074 
3075     typedef vector<const CTSE_Chunk_Info*> TStubs;
3076     typedef map<const CTSE_Split_Info*, CTSE_Split_Info::TChunkIds> TStubMap;
3077     TStubs stubs;
3078     bool restart = false;
3079     do {
3080         if ( restart ) {
3081             _ASSERT(!enough);
3082 
3083             TStubMap stubmap;
3084             ITERATE ( TStubs, it, stubs ) {
3085                 const CTSE_Chunk_Info& chunk = **it;
3086                 stubmap[&chunk.GetSplitInfo()].
3087                     push_back(chunk.GetChunkId());
3088             }
3089             stubs.clear();
3090             restart = false;
3091 
3092             // Release lock for tse update:
3093             guard.Release();
3094             for ( auto& it : stubmap) {
3095                 if ( m_Selector->GetMaxSize() < numeric_limits<TMaxSize>::max() ) {
3096                     it.first->LoadChunk(it.second.front());
3097                     break;
3098                 }
3099                 gfx::timsort(it.second.begin(), it.second.end());
3100                 it.second.erase(unique(it.second.begin(), it.second.end()), it.second.end());
3101                 it.first->LoadChunks(it.second);
3102             }
3103             tse.UpdateAnnotIndex(id);
3104 
3105             // Acquire the lock again:
3106             guard.Guard(tse.GetAnnotLock());
3107 
3108             // Reget range map pointer as it may change:
3109             objs = tse.x_GetIdObjects(annot_name, id);
3110             _ASSERT(objs);
3111         }
3112         for ( size_t index = from_idx, count = objs->x_GetRangeMapCount();
3113               index < count; ++index ) {
3114             if ( m_Selector->m_CollectTypes && m_AnnotTypes.test(index)) {
3115                 continue;
3116             }
3117             if ( !m_CollectAnnotTypes.test(index) ) {
3118                 continue;
3119             }
3120 
3121             if ( objs->x_RangeMapIsEmpty(index) ) {
3122                 continue;
3123             }
3124             const CTSE_Info::TRangeMap& rmap = objs->x_GetRangeMap(index);
3125 
3126             size_t start_size = m_AnnotSet.size(); // for rollback
3127 
3128             // Same annotations may appear more than once if circular.
3129             // In this case duplicated annotation entries need to be removed.
3130             bool need_unique = false;
3131 
3132             ITERATE(CHandleRange, rg_it, hr) {
3133                 CHandleRange::TRange range = rg_it->first;
3134 
3135                 for ( CTSE_Info::TRangeMap::const_iterator
3136                           aoit(rmap.begin(range));
3137                       aoit; ++aoit ) {
3138                     const CAnnotObject_Info& annot_info =
3139                         *aoit->second.m_AnnotObject_Info;
3140 
3141                     // special filtering
3142                     if ( m_Selector->GetExcludeIfGeneIsSuppressed() &&
3143                          annot_info.IsFeat() ) {
3144                         if ( annot_info.IsRegular() ) {
3145                             if ( sx_GeneIsSuppressed(annot_info.GetFeat()) ) {
3146                                 continue;
3147                             }
3148                         }
3149                     }
3150 
3151                     // Collect types
3152                     if (m_Selector->m_CollectTypes) {
3153                         if (x_MatchLimitObject(annot_info)  &&
3154                             x_MatchRange(hr, aoit->first, aoit->second) ) {
3155                             m_AnnotTypes.set(index);
3156                             break;
3157                         }
3158                     }
3159                     if (m_Selector->m_CollectNames) {
3160                         if (x_MatchLimitObject(annot_info)  &&
3161                             x_MatchRange(hr, aoit->first, aoit->second) ) {
3162                             m_AnnotNames->insert(annot_name);
3163                             return;
3164                         }
3165                     }
3166 
3167                     if ( annot_info.IsChunkStub() ) {
3168                         const CTSE_Chunk_Info& chunk = annot_info.GetChunk_Info();
3169                         if ( !chunk.NotLoaded() && !tse.x_DirtyAnnotIndex() ) {
3170                             // Skip chunk stub
3171                             continue;
3172                         }
3173                         if ( chunk.NotLoaded() &&
3174                              m_Selector->m_CollectCostOfLoading &&
3175                              chunk.GetChunkId() != CTSE_Chunk_Info::kDelayedMain_ChunkId ) {
3176                             // accumulate cost of chunks to be loaded
3177                             auto cost = chunk.GetLoadCost();
3178                             m_LoadBytes += cost.first;
3179                             m_LoadSeconds += cost.second;
3180                             continue;
3181                         }
3182                         if ( !restart ) {
3183                             restart = true;
3184                             // New annot objects are to be loaded,
3185                             // so we'll need to restart scan of current range.
3186                             // Forget already found objects
3187                             // as they will be found again:
3188                             m_AnnotSet.resize(start_size);
3189                             // Update start index for the new search
3190                             from_idx = index;
3191                         }
3192                         if ( chunk.NotLoaded() ) {
3193                             stubs.push_back(&chunk);
3194                         }
3195                     }
3196                     if ( restart ) {
3197                         _ASSERT(!enough);
3198                         continue;
3199                     }
3200                     if ( m_Selector->m_CollectCostOfLoading ) {
3201                         continue;
3202                     }
3203 
3204                     if ( annot_info.IsLocs() ) {
3205                         const CSeq_loc& ref_loc = annot_info.GetLocs();
3206 
3207                         // Check if the stub has been already processed
3208                         if ( m_AnnotLocsSet.get() ) {
3209                             CConstRef<CSeq_loc> ploc(&ref_loc);
3210                             TAnnotLocsSet::const_iterator found =
3211                                 m_AnnotLocsSet->find(ploc);
3212                             if (found != m_AnnotLocsSet->end()) {
3213                                 continue;
3214                             }
3215                         }
3216                         else {
3217                             m_AnnotLocsSet.reset(new TAnnotLocsSet);
3218                         }
3219                         m_AnnotLocsSet->insert(ConstRef(&ref_loc));
3220 
3221                         // Search annotations on the referenced location
3222                         if ( !ref_loc.IsInt() ) {
3223                             ERR_POST_X(1, "CAnnot_Collector: "
3224                                        "Seq-annot.locs is not Seq-interval");
3225                             continue;
3226                         }
3227                         const CSeq_interval& ref_int = ref_loc.GetInt();
3228                         const CSeq_id& ref_id = ref_int.GetId();
3229                         CSeq_id_Handle ref_idh = CSeq_id_Handle::GetHandle(ref_id);
3230                         // check ResolveTSE limit
3231                         if ( m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE ) {
3232                             if ( !tseh.GetBioseqHandle(ref_idh) ) {
3233                                 continue;
3234                             }
3235                         }
3236 
3237                         // calculate ranges
3238                         TSeqPos ref_from = ref_int.GetFrom();
3239                         TSeqPos ref_to = ref_int.GetTo();
3240                         bool ref_minus = ref_int.IsSetStrand()?
3241                             IsReverse(ref_int.GetStrand()) : false;
3242                         TSeqPos loc_from = aoit->first.GetFrom();
3243                         TSeqPos loc_to = aoit->first.GetTo();
3244                         TSeqPos loc_view_from = max(range.GetFrom(), loc_from);
3245                         TSeqPos loc_view_to = min(range.GetTo(), loc_to);
3246 
3247                         CHandleRangeMap ref_rmap;
3248                         CHandleRange::TRange ref_search_range;
3249                         if ( !ref_minus ) {
3250                             ref_search_range.Set(ref_from + (loc_view_from - loc_from),
3251                                                  ref_to + (loc_view_to - loc_to));
3252                         }
3253                         else {
3254                             ref_search_range.Set(ref_from - (loc_view_to - loc_to),
3255                                                  ref_to - (loc_view_from - loc_from));
3256                         }
3257                         ref_rmap.AddRanges(ref_idh).AddRange(ref_search_range,
3258                                                              eNa_strand_unknown);
3259 
3260                         if (m_Selector->m_NoMapping) {
3261                             x_SearchLoc(ref_rmap, 0, &tseh);
3262                         }
3263                         else {
3264                             CRef<CSeq_loc> master_loc_empty(new CSeq_loc);
3265                             master_loc_empty->SetEmpty(
3266                                 const_cast<CSeq_id&>(*id.GetSeqId()));
3267                             CRef<CSeq_loc_Conversion> locs_cvt(new CSeq_loc_Conversion(
3268                                                                    *master_loc_empty,
3269                                                                    id,
3270                                                                    aoit->first,
3271                                                                    ref_idh,
3272                                                                    ref_from,
3273                                                                    ref_minus,
3274                                                                    m_Scope));
3275                             if ( cvt ) {
3276                                 locs_cvt->CombineWith(*cvt);
3277                             }
3278                             x_SearchLoc(ref_rmap, &*locs_cvt, &tseh);
3279                         }
3280                         if ( x_NoMoreObjects() ) {
3281                             _ASSERT(!restart);
3282                             enough = true;
3283                             break;
3284                         }
3285                         continue;
3286                     }
3287 
3288                     _ASSERT(m_Selector->MatchType(annot_info));
3289 
3290                     if ( !x_MatchLimitObject(annot_info) ) {
3291                         continue;
3292                     }
3293 
3294                     if ( !x_MatchRange(hr, aoit->first, aoit->second) ) {
3295                         continue;
3296                     }
3297 
3298                     if ( annot_info.GetAnnotIndex() == CSeq_annot_Info::kWholeAnnotIndex ) {
3299                         const CSeq_annot_Info& seq_annot = annot_info.GetSeq_annot_Info();
3300                         if ( seq_annot.IsSortedTable() ) {
3301                             sah.x_Set(seq_annot, tseh);
3302                             CHandleRange::TRange hrange = hr.GetOverlappingRange();
3303                             for ( CSeq_annot_SortedIter iter =
3304                                       seq_annot.StartSortedIterator(hrange);
3305                                   iter; ++iter ) {
3306 
3307                                 if (m_Selector->HasBitFilter() &&
3308                                     !seq_annot.MatchBitFilter(*m_Selector,
3309                                                               iter) ) {
3310                                     continue;
3311                                 }
3312 
3313                                 if (m_Selector->m_CollectTypes) {
3314                                     m_AnnotTypes.set(index);
3315                                     break;
3316                                 }
3317 
3318                                 if (m_Selector->m_CollectNames) {
3319                                     m_AnnotNames->insert(annot_name);
3320                                     break;
3321                                 }
3322 
3323                                 CAnnotObject_Ref annot_ref(sah, iter, cvt);
3324                                 x_AddObject(annot_ref);
3325                                 if ( x_NoMoreObjects() ) {
3326                                     _ASSERT(!restart);
3327                                     enough = true;
3328                                     break;
3329                                 }
3330 
3331                                 if ( m_Selector->m_CollectSeq_annots ) {
3332                                     // Ignore multiple feats from the same seq-annot
3333                                     break;
3334                                 }
3335                             }
3336                         }
3337                         if ( enough ) {
3338                             _ASSERT(!restart);
3339                             break;
3340                         }
3341                         continue;
3342                     }
3343 
3344                     bool is_circular = aoit->second.m_HandleRange  &&
3345                         aoit->second.m_HandleRange->GetData().IsCircular();
3346                     need_unique |= is_circular;
3347                     const CSeq_annot_Info& sa_info =
3348                         annot_info.GetSeq_annot_Info();
3349                     if ( !sah || &sah.x_GetInfo() != &sa_info ){
3350                         sah.x_Set(sa_info, tseh);
3351                     }
3352 
3353                     CAnnotObject_Ref annot_ref(annot_info, sah);
3354                     if ( !cvt  &&  aoit->second.GetMultiIdFlag() ) {
3355                         // Create self-conversion, add to conversion set
3356                         CHandleRange::TRange ref_rg = aoit->first;
3357                         if (is_circular ) {
3358                             TSeqPos from = aoit->second.m_HandleRange->
3359                                 GetData().GetLeft();
3360                             TSeqPos to =aoit->second.m_HandleRange->
3361                                 GetData().GetRight();
3362                             ref_rg = CHandleRange::TRange(from, to);
3363                         }
3364                         annot_ref.GetMappingInfo().SetAnnotObjectRange(ref_rg,
3365                                                                        m_Selector->m_FeatProduct);
3366                         x_AddObjectMapping(annot_ref, 0,
3367                                            aoit->second.m_AnnotLocationIndex);
3368                     }
3369                     else {
3370                         if (cvt  &&  !annot_ref.IsAlign() ) {
3371                             cvt->Convert(annot_ref,
3372                                          m_Selector->m_FeatProduct ?
3373                                          CSeq_loc_Conversion::eProduct :
3374                                          CSeq_loc_Conversion::eLocation,
3375                                          id,
3376                                          aoit->first,
3377                                          aoit->second);
3378                         }
3379                         else {
3380                             CHandleRange::TRange ref_rg = aoit->first;
3381                             if ( is_circular ) {
3382                                 TSeqPos from = aoit->second.m_HandleRange->
3383                                     GetData().GetLeft();
3384                                 TSeqPos to = aoit->second.m_HandleRange->
3385                                     GetData().GetRight();
3386                                 ref_rg = CHandleRange::TRange(from, to);
3387                             }
3388                             annot_ref.GetMappingInfo().SetAnnotObjectRange(ref_rg,
3389                                                                            m_Selector->m_FeatProduct);
3390                         }
3391                         x_AddObject(annot_ref, cvt,
3392                                     aoit->second.m_AnnotLocationIndex);
3393                     }
3394                     if ( x_NoMoreObjects() ) {
3395                         _ASSERT(!restart);
3396                         enough = true;
3397                         break;
3398                     }
3399                 }
3400                 if ( enough ) {
3401                     _ASSERT(!restart);
3402                     break;
3403                 }
3404                 if ( restart ) {
3405                     _ASSERT(!enough);
3406                     continue;
3407                 }
3408             }
3409             if ( restart ) {
3410                 _ASSERT(!enough);
3411                 continue;
3412             }
3413             if ( need_unique  ||  hr.end() - hr.begin() > 1 ) {
3414                 TAnnotSet::iterator first_added = m_AnnotSet.begin() + start_size;
3415                 stable_sort(first_added, m_AnnotSet.end());
3416                 m_AnnotSet.erase(unique(first_added, m_AnnotSet.end()),
3417                                  m_AnnotSet.end());
3418             }
3419             if ( enough ) {
3420                 _ASSERT(!restart);
3421                 break;
3422             }
3423         }
3424         if ( enough ) {
3425             _ASSERT(!restart);
3426             break;
3427         }
3428     } while ( restart );
3429 }
3430 
3431 
x_SearchLoc(const CHandleRangeMap & loc,CSeq_loc_Conversion * cvt,const CTSE_Handle * using_tse,bool top_level)3432 bool CAnnot_Collector::x_SearchLoc(const CHandleRangeMap& loc,
3433                                    CSeq_loc_Conversion*   cvt,
3434                                    const CTSE_Handle*     using_tse,
3435                                    bool top_level)
3436 {
3437     bool found = false;
3438     ITERATE ( CHandleRangeMap, idit, loc ) {
3439         if ( idit->second.Empty() ) {
3440             continue;
3441         }
3442         if ( m_Selector->m_LimitObjectType == SAnnotSelector::eLimit_None ) {
3443             // any data source
3444             const CTSE_Handle* tse = 0;
3445             CBioseq_Handle bh = x_GetBioseqHandle(idit->first, top_level);
3446             if ( !bh ) {
3447                 if ( m_Selector->m_UnresolvedFlag ==
3448                     SAnnotSelector::eFailUnresolved ) {
3449                     NCBI_THROW(CAnnotException, eFindFailed,
3450                                "Cannot find id synonyms");
3451                 }
3452                 if ( m_Selector->m_UnresolvedFlag ==
3453                     SAnnotSelector::eIgnoreUnresolved ) {
3454                     continue; // skip unresolvable IDs
3455                 }
3456                 tse = using_tse;
3457             }
3458             else {
3459                 tse = &bh.GetTSE_Handle();
3460                 if ( using_tse ) {
3461                     using_tse->AddUsedTSE(*tse);
3462                 }
3463             }
3464             bool check_adaptive = x_CheckAdaptive(bh);
3465             if ( m_Selector->m_ExcludeExternal ) {
3466                 if ( !bh ) {
3467                     // no sequence tse
3468                     continue;
3469                 }
3470                 _ASSERT(tse);
3471                 m_FromOtherTSE = false;
3472                 const CTSE_Info& tse_info = tse->x_GetTSE_Info();
3473                 tse_info.UpdateAnnotIndex();
3474                 if ( tse_info.HasMatchingAnnotIds() ) {
3475                     CConstRef<CSynonymsSet> syns = m_Scope->GetSynonyms(bh);
3476                     ITERATE(CSynonymsSet, syn_it, *syns) {
3477                         found |= x_SearchTSE(*tse,
3478                                              syns->GetSeq_id_Handle(syn_it),
3479                                              idit->second, cvt, check_adaptive);
3480                         if ( x_NoMoreObjects() ) {
3481                             break;
3482                         }
3483                     }
3484                 }
3485                 else {
3486                     const CBioseq_Handle::TId& syns = bh.GetId();
3487                     bool only_gi = tse_info.OnlyGiAnnotIds();
3488                     ITERATE ( CBioseq_Handle::TId, syn_it, syns ) {
3489                         if ( !only_gi || syn_it->IsGi() ) {
3490                             found |= x_SearchTSE(*tse, *syn_it,
3491                                                  idit->second, cvt, check_adaptive);
3492                             if ( x_NoMoreObjects() ) {
3493                                 break;
3494                             }
3495                         }
3496                     }
3497                 }
3498             }
3499             else {
3500                 CScope_Impl::TTSE_LockMatchSet tse_map;
3501                 if ( m_Selector->IsIncludedAnyNamedAnnotAccession() ) {
3502                     m_Scope->GetTSESetWithAnnots(idit->first, tse_map,
3503                                                  *m_Selector);
3504                 }
3505                 else {
3506                     m_Scope->GetTSESetWithAnnots(idit->first, tse_map);
3507                 }
3508                 ITERATE ( CScope_Impl::TTSE_LockMatchSet, tse_it, tse_map ) {
3509                     if ( tse ) {
3510                         tse->AddUsedTSE(tse_it->first);
3511                     }
3512                     m_FromOtherTSE = !bh || tse_it->first != bh.GetTSE_Handle();
3513                     found |= x_SearchTSE(tse_it->first, tse_it->second,
3514                                          idit->second, cvt, check_adaptive);
3515                     if ( x_NoMoreObjects() ) {
3516                         break;
3517                     }
3518                 }
3519             }
3520         }
3521         else if ( m_Selector->m_UnresolvedFlag == SAnnotSelector::eSearchUnresolved &&
3522                   m_Selector->m_ResolveMethod == SAnnotSelector::eResolve_TSE &&
3523                   m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None &&
3524                   m_Selector->m_LimitObject ) {
3525             // external annotations only
3526             m_FromOtherTSE = true;
3527             bool check_adaptive = x_CheckAdaptive(idit->first);
3528             ITERATE ( TTSE_LockMap, tse_it, m_TSE_LockMap ) {
3529                 const CTSE_Info& tse_info = *tse_it->first;
3530                 tse_info.UpdateAnnotIndex();
3531                 found |= x_SearchTSE(tse_it->second, idit->first,
3532                                      idit->second, cvt, check_adaptive);
3533             }
3534         }
3535         else {
3536             // Search in the limit objects
3537             bool check_adaptive = x_CheckAdaptive(idit->first);
3538             CConstRef<CSynonymsSet> syns;
3539             bool syns_initialized = false;
3540             ITERATE ( TTSE_LockMap, tse_it, m_TSE_LockMap ) {
3541                 const CTSE_Info& tse_info = *tse_it->first;
3542                 tse_info.UpdateAnnotIndex();
3543                 if ( tse_info.HasMatchingAnnotIds() ) {
3544                     if ( !syns_initialized ) {
3545                         syns = m_Scope->GetSynonyms(idit->first,
3546                                                     sx_GetFlag(GetSelector()));
3547                         syns_initialized = true;
3548                     }
3549                     if ( !syns ) {
3550                         found |= x_SearchTSE(tse_it->second, idit->first,
3551                                              idit->second, cvt, check_adaptive);
3552                     }
3553                     else {
3554                         ITERATE(CSynonymsSet, syn_it, *syns) {
3555                             found |= x_SearchTSE(tse_it->second,
3556                                                  syns->GetSeq_id_Handle(syn_it),
3557                                                  idit->second, cvt, check_adaptive);
3558                             if ( x_NoMoreObjects() ) {
3559                                 break;
3560                             }
3561                         }
3562                     }
3563                 }
3564                 else {
3565                     const CBioseq_Handle::TId& ids = m_Scope->GetIds(idit->first);
3566                     bool only_gi = tse_info.OnlyGiAnnotIds();
3567                     ITERATE ( CBioseq_Handle::TId, syn_it, ids ) {
3568                         if ( !only_gi || syn_it->IsGi() ) {
3569                             found |= x_SearchTSE(tse_it->second, *syn_it,
3570                                                  idit->second, cvt, check_adaptive);
3571                             if ( x_NoMoreObjects() ) {
3572                                 break;
3573                             }
3574                         }
3575                     }
3576                 }
3577                 if ( x_NoMoreObjects() ) {
3578                     break;
3579                 }
3580             }
3581         }
3582         if ( x_NoMoreObjects() ) {
3583             break;
3584         }
3585     }
3586     return found;
3587 }
3588 
3589 
x_SearchAll(void)3590 void CAnnot_Collector::x_SearchAll(void)
3591 {
3592     _ASSERT(m_Selector->m_LimitObjectType != SAnnotSelector::eLimit_None);
3593     _ASSERT(m_Selector->m_LimitObject);
3594     if ( m_TSE_LockMap.empty() ) {
3595         // data source name not matched
3596         return;
3597     }
3598     switch ( m_Selector->m_LimitObjectType ) {
3599     case SAnnotSelector::eLimit_TSE_Info:
3600         x_SearchAll(*CTypeConverter<CTSE_Info>::
3601                     SafeCast(&*m_Selector->m_LimitObject));
3602         break;
3603     case SAnnotSelector::eLimit_Seq_entry_Info:
3604         x_SearchAll(*CTypeConverter<CSeq_entry_Info>::
3605                     SafeCast(&*m_Selector->m_LimitObject));
3606         break;
3607     case SAnnotSelector::eLimit_Seq_annot_Info:
3608         x_SearchAll(*CTypeConverter<CSeq_annot_Info>::
3609                     SafeCast(&*m_Selector->m_LimitObject));
3610         break;
3611     default:
3612         NCBI_THROW(CAnnotException, eLimitError,
3613                    "CAnnot_Collector::x_SearchAll: invalid mode");
3614     }
3615 }
3616 
3617 
x_SearchAll(const CSeq_entry_Info & entry_info)3618 void CAnnot_Collector::x_SearchAll(const CSeq_entry_Info& entry_info)
3619 {
3620     {{
3621         entry_info.UpdateAnnotIndex();
3622         const CBioseq_Base_Info& base = entry_info.x_GetBaseInfo();
3623         // Collect all annotations from the entry
3624         ITERATE( CBioseq_Base_Info::TAnnot, ait, base.GetAnnot() ) {
3625             x_SearchAll(**ait);
3626             if ( x_NoMoreObjects() )
3627                 return;
3628         }
3629     }}
3630 
3631     if ( entry_info.IsSet() ) {
3632         CConstRef<CBioseq_set_Info> set(&entry_info.GetSet());
3633         // Collect annotations from all children
3634         ITERATE( CBioseq_set_Info::TSeq_set, cit, set->GetSeq_set() ) {
3635             x_SearchAll(**cit);
3636             if ( x_NoMoreObjects() )
3637                 return;
3638         }
3639     }
3640 }
3641 
3642 
x_SearchAll(const CSeq_annot_Info & annot_info)3643 void CAnnot_Collector::x_SearchAll(const CSeq_annot_Info& annot_info)
3644 {
3645     if ( m_Selector->ExcludedAnnotName(annot_info.GetName()) ) {
3646         return;
3647     }
3648 
3649     _ASSERT(m_Selector->m_LimitTSE);
3650     annot_info.UpdateAnnotIndex();
3651     CSeq_annot_Handle sah(annot_info, m_Selector->m_LimitTSE);
3652     // Collect all annotations from the annot
3653     ITERATE ( CSeq_annot_Info::TAnnotObjectInfos, aoit,
3654               annot_info.GetAnnotObjectInfos() ) {
3655         const CAnnotObject_Info& annot_info = *aoit;
3656         if ( annot_info.IsRemoved() ) {
3657             continue;
3658         }
3659         if ( !m_Selector->MatchType(annot_info) ) {
3660             continue;
3661         }
3662 
3663         if ( annot_info.GetAnnotIndex() == CSeq_annot_Info::kWholeAnnotIndex ) {
3664             const CSeq_annot_Info& seq_annot = annot_info.GetSeq_annot_Info();
3665             if ( seq_annot.IsSortedTable() ) {
3666                 // sorted Seq-table has only one CAnnotObject_Info
3667                 // but we need to add all individual features
3668                 auto whole = CRange<TSeqPos>::GetWhole();
3669                 for ( CSeq_annot_SortedIter it = seq_annot.StartSortedIterator(whole); it; ++it ) {
3670                     CAnnotObject_Ref annot_ref(sah, it, 0);
3671                     x_AddObject(annot_ref);
3672                     if ( m_Selector->m_CollectSeq_annots || x_NoMoreObjects() ) {
3673                         return;
3674                     }
3675                 }
3676             }
3677             continue;
3678         }
3679 
3680         CAnnotObject_Ref annot_ref(annot_info, sah);
3681         x_AddObject(annot_ref);
3682         if ( m_Selector->m_CollectSeq_annots || x_NoMoreObjects() ) {
3683             return;
3684         }
3685     }
3686 
3687     static const size_t kAnnotTypeIndex_SNP =
3688         CAnnotType_Index::GetSubtypeIndex(CSeqFeatData::eSubtype_variation);
3689 
3690     if ( m_CollectAnnotTypes.test(kAnnotTypeIndex_SNP) &&
3691          annot_info.x_HasSNP_annot_Info() ) {
3692         const CSeq_annot_SNP_Info& snp_annot =
3693             annot_info.x_GetSNP_annot_Info();
3694         TSeqPos index = 0;
3695         ITERATE ( CSeq_annot_SNP_Info, snp_it, snp_annot ) {
3696             const SSNP_Info& snp = *snp_it;
3697             CAnnotObject_Ref annot_ref(snp_annot, sah, snp, 0);
3698             x_AddObject(annot_ref);
3699             if ( m_Selector->m_CollectSeq_annots || x_NoMoreObjects() ) {
3700                 return;
3701             }
3702             ++index;
3703         }
3704     }
3705 }
3706 
3707 
x_CollectMapped(const CSeqMap_CI & seg,CSeq_loc & master_loc_empty,const CSeq_id_Handle & master_id,const CHandleRange & master_hr,CSeq_loc_Conversion_Set & cvt_set)3708 void CAnnot_Collector::x_CollectMapped(const CSeqMap_CI&     seg,
3709                                        CSeq_loc&             master_loc_empty,
3710                                        const CSeq_id_Handle& master_id,
3711                                        const CHandleRange&   master_hr,
3712                                        CSeq_loc_Conversion_Set& cvt_set)
3713 {
3714     CHandleRange::TOpenRange master_seg_range(
3715         seg.GetPosition(),
3716         seg.GetEndPosition());
3717     CHandleRange::TOpenRange ref_seg_range(seg.GetRefPosition(),
3718                                            seg.GetRefEndPosition());
3719     bool reversed = seg.GetRefMinusStrand();
3720     TSignedSeqPos shift;
3721     if ( !reversed ) {
3722         shift = ref_seg_range.GetFrom() - master_seg_range.GetFrom();
3723     }
3724     else {
3725         shift = ref_seg_range.GetTo() + master_seg_range.GetFrom();
3726     }
3727     CSeq_id_Handle ref_id = seg.GetRefSeqid();
3728     CHandleRangeMap ref_loc;
3729     {{ // translate master_loc to ref_loc
3730         CHandleRange& hr = ref_loc.AddRanges(ref_id);
3731         ITERATE ( CHandleRange, mlit, master_hr ) {
3732             CHandleRange::TOpenRange range = master_seg_range & mlit->first;
3733             if ( !range.Empty() ) {
3734                 ENa_strand strand = mlit->second;
3735                 if ( !reversed ) {
3736                     range.SetOpen(range.GetFrom() + shift,
3737                                   range.GetToOpen() + shift);
3738                 }
3739                 else {
3740                     if ( strand != eNa_strand_unknown ) {
3741                         strand = Reverse(strand);
3742                     }
3743                     range.Set(shift - range.GetTo(), shift - range.GetFrom());
3744                 }
3745                 hr.AddRange(range, strand);
3746             }
3747         }
3748         if ( hr.Empty() )
3749             return;
3750     }}
3751 
3752     CRef<CSeq_loc_Conversion> cvt(new CSeq_loc_Conversion(master_loc_empty,
3753                                                           master_id,
3754                                                           seg,
3755                                                           ref_id,
3756                                                           m_Scope));
3757     cvt_set.Add(*cvt, cvt_set.kAllIndexes);
3758 }
3759 
3760 
x_SearchMapped(const CSeqMap_CI & seg,CSeq_loc & master_loc_empty,const CSeq_id_Handle & master_id,const CHandleRange & master_hr)3761 bool CAnnot_Collector::x_SearchMapped(const CSeqMap_CI&     seg,
3762                                       CSeq_loc&             master_loc_empty,
3763                                       const CSeq_id_Handle& master_id,
3764                                       const CHandleRange&   master_hr)
3765 {
3766     if ( seg.FeaturePolicyWasApplied() ) {
3767         // If we have found explict feature policy object
3768         // it means that time/segments limits are no longer active.
3769         x_StopSearchLimits();
3770     }
3771     if ( !m_AnnotSet.empty() || m_MappingCollector.get() ) {
3772         // If we have found matching annotations it means the sequence
3773         // is annotated and time/segments limits are no longer active.
3774         x_StopSearchLimits();
3775     }
3776     if ( m_SearchTime.IsRunning() &&
3777          m_SearchTime.Elapsed() > m_Selector->GetMaxSearchTime() ) {
3778         NCBI_THROW(CAnnotSearchLimitException, eTimeLimitExceded,
3779                    "CAnnot_Collector: "
3780                    "search time limit exceeded, no annotations found");
3781     }
3782     if ( m_SearchSegments != numeric_limits<TMaxSearchSegments>::max() &&
3783          (x_MaxSearchSegmentsLimitIsReached() || --m_SearchSegments == 0) ) {
3784         if ( m_SearchSegmentsAction == SAnnotSelector::eMaxSearchSegmentsThrow ) {
3785             NCBI_THROW(CAnnotSearchLimitException, eSegmentsLimitExceded,
3786                        "CAnnot_Collector: "
3787                        "search segments limit exceeded, no annotations found");
3788         }
3789         if ( m_SearchSegmentsAction == SAnnotSelector::eMaxSearchSegmentsLog ) {
3790             ERR_POST_X(2, Warning << "CAnnot_Collector: "
3791                        "search segments limit exceeded, no annotations found");
3792         }
3793         // stop searching
3794         return false;
3795     }
3796     CHandleRange::TOpenRange master_seg_range(
3797         seg.GetPosition(),
3798         seg.GetEndPosition());
3799     CHandleRange::TOpenRange ref_seg_range(seg.GetRefPosition(),
3800                                            seg.GetRefEndPosition());
3801     bool reversed = seg.GetRefMinusStrand();
3802     TSignedSeqPos shift;
3803     if ( !reversed ) {
3804         shift = ref_seg_range.GetFrom() - master_seg_range.GetFrom();
3805     }
3806     else {
3807         shift = ref_seg_range.GetTo() + master_seg_range.GetFrom();
3808     }
3809     CSeq_id_Handle ref_id = seg.GetRefSeqid();
3810     CHandleRangeMap ref_loc;
3811     {{ // translate master_loc to ref_loc
3812         CHandleRange& hr = ref_loc.AddRanges(ref_id);
3813         ITERATE ( CHandleRange, mlit, master_hr ) {
3814             CHandleRange::TOpenRange range = master_seg_range & mlit->first;
3815             if ( !range.Empty() ) {
3816                 ENa_strand strand = mlit->second;
3817                 if ( !reversed ) {
3818                     range.SetOpen(range.GetFrom() + shift,
3819                                   range.GetToOpen() + shift);
3820                 }
3821                 else {
3822                     if ( strand != eNa_strand_unknown ) {
3823                         strand = Reverse(strand);
3824                     }
3825                     range.Set(shift - range.GetTo(), shift - range.GetFrom());
3826                 }
3827                 hr.AddRange(range, strand);
3828             }
3829         }
3830         if ( hr.Empty() )
3831             return false;
3832     }}
3833 
3834     if (m_Selector->m_NoMapping) {
3835         return x_SearchLoc(ref_loc, 0, &seg.GetUsingTSE());
3836     }
3837     else {
3838         CRef<CSeq_loc_Conversion> cvt(new CSeq_loc_Conversion(master_loc_empty,
3839                                                               master_id,
3840                                                               seg,
3841                                                               ref_id,
3842                                                               m_Scope));
3843         return x_SearchLoc(ref_loc, &*cvt, &seg.GetUsingTSE());
3844     }
3845 }
3846 
3847 
3848 const CAnnot_Collector::TAnnotTypes&
x_GetAnnotTypes(void) const3849 CAnnot_Collector::x_GetAnnotTypes(void) const
3850 {
3851     if (m_AnnotTypes2.empty() && m_AnnotTypes.any()) {
3852         for (size_t i = 0; i < m_AnnotTypes.size(); ++i) {
3853             if ( m_AnnotTypes.test(i) ) {
3854                 m_AnnotTypes2.push_back(CAnnotType_Index::GetTypeSelector(i));
3855             }
3856         }
3857     }
3858     return m_AnnotTypes2;
3859 }
3860 
3861 
3862 const CAnnot_Collector::TAnnotNames&
x_GetAnnotNames(void) const3863 CAnnot_Collector::x_GetAnnotNames(void) const
3864 {
3865     if ( !m_AnnotNames.get() ) {
3866         TAnnotNames* names = new TAnnotNames;
3867         m_AnnotNames.reset(names);
3868         ITERATE ( TAnnotSet, it, m_AnnotSet ) {
3869             names->insert(it->GetSeq_annot_Info().GetName());
3870         }
3871     }
3872     return *m_AnnotNames;
3873 }
3874 
3875 
x_GetCostOfLoadingInBytes(void) const3876 Uint8 CAnnot_Collector::x_GetCostOfLoadingInBytes(void) const
3877 {
3878     return m_LoadBytes;
3879 }
3880 
3881 
x_GetCostOfLoadingInSeconds(void) const3882 double CAnnot_Collector::x_GetCostOfLoadingInSeconds(void) const
3883 {
3884     return m_LoadSeconds;
3885 }
3886 
3887 
3888 END_SCOPE(objects)
3889 END_NCBI_SCOPE
3890