1 #ifndef ANNOT_SELECTOR__HPP
2 #define ANNOT_SELECTOR__HPP
3 
4 /*  $Id: annot_selector.hpp 603744 2020-03-16 17:42:16Z vasilche $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko, Michael Kimelman, Eugene Vasilchenko
30 *
31 * File Description:
32 *   Annotations selector structure.
33 *
34 */
35 
36 #include <corelib/ncbi_limits.h>
37 #include <objmgr/annot_name.hpp>
38 #include <objmgr/annot_type_selector.hpp>
39 #include <objmgr/tse_handle.hpp>
40 #include <objmgr/bioseq_handle.hpp>
41 
42 #include <bitset>
43 #include <vector>
44 
45 BEGIN_NCBI_SCOPE
46 BEGIN_SCOPE(objects)
47 
48 
49 /** @addtogroup ObjectManagerIterators
50  *
51  * @{
52  */
53 
54 
55 class CSeq_entry;
56 class CSeq_annot;
57 
58 class CTSE_Handle;
59 class CBioseq_Handle;
60 class CSeq_entry_Handle;
61 class CSeq_annot_Handle;
62 class CAnnotObject_Info;
63 class CHandleRangeMap;
64 
65 
66 /////////////////////////////////////////////////////////////////////////////
67 ///
68 ///  IFeatComparator
69 ///
70 ///  Interface for user-defined ordering of features,
71 ///  should be inherited from CObject
72 ///
73 
74 struct NCBI_XOBJMGR_EXPORT IFeatComparator
75 {
76     virtual ~IFeatComparator();
77     virtual bool Less(const CSeq_feat& f1,
78                       const CSeq_feat& f2,
79                       CScope* scope) = 0;
80 };
81 
82 
83 /////////////////////////////////////////////////////////////////////////////
84 ///
85 ///  SAnnotSelector --
86 ///
87 ///  Structure to control retrieval of Seq-annots
88 
89 struct NCBI_XOBJMGR_EXPORT SAnnotSelector : public SAnnotTypeSelector
90 {
91     /// Flag to indicate location overlapping method
92     enum EOverlapType {
93         eOverlap_Intervals,  ///< default - overlapping of individual intervals
94         eOverlap_TotalRange  ///< overlapping of total ranges only
95     };
96     /// Flag to indicate references resolution method
97     enum EResolveMethod {
98         eResolve_None,   ///< Do not search annotations on segments
99         eResolve_TSE,    ///< default - search only on segments in the same TSE
100         eResolve_All     ///< Search annotations for all referenced sequences
101     };
102     /// Flag to indicate sorting method
103     enum ESortOrder {
104         eSortOrder_None,    ///< do not sort annotations for faster retrieval
105         eSortOrder_Normal,  ///< default - increasing start, decreasing length
106         eSortOrder_Reverse  ///< decresing end, decreasing length
107     };
108     /// Flag to indicate handling of unresolved seq-ids
109     enum EUnresolvedFlag {
110         eIgnoreUnresolved, ///< Ignore unresolved ids (default)
111         eSearchUnresolved, ///< Search annotations for unresolvable IDs
112         eFailUnresolved    ///< Throw exception for unresolved ids
113     };
114 
115     SAnnotSelector(TAnnotType annot = CSeq_annot::C_Data::e_not_set,
116                    TFeatType  feat  = CSeqFeatData::e_not_set,
117                    bool       feat_product = false);
118     SAnnotSelector(TFeatType  feat,
119                    bool       feat_product = false);
120     SAnnotSelector(TFeatSubtype feat_subtype);
121 
122     SAnnotSelector(const SAnnotSelector& sel);
123     SAnnotSelector& operator=(const SAnnotSelector& sel);
124     ~SAnnotSelector(void);
125 
126     /// Set annotation type (feat, align, graph)
SetAnnotTypeSAnnotSelector127     SAnnotSelector& SetAnnotType(TAnnotType type)
128         {
129             if ( GetAnnotType() != type ) {
130                 x_ClearAnnotTypesSet();
131                 SAnnotTypeSelector::SetAnnotType(type);
132             }
133             return *this;
134         }
135 
136     /// Set feature type (also set annotation type to feat)
SetFeatTypeSAnnotSelector137     SAnnotSelector& SetFeatType(TFeatType type)
138         {
139             x_ClearAnnotTypesSet();
140             SAnnotTypeSelector::SetFeatType(type);
141             return *this;
142         }
143 
144     /// Set feature subtype (also set annotation and feat type)
SetFeatSubtypeSAnnotSelector145     SAnnotSelector& SetFeatSubtype(TFeatSubtype subtype)
146         {
147             x_ClearAnnotTypesSet();
148             SAnnotTypeSelector::SetFeatSubtype(subtype);
149             return *this;
150         }
151 
152     /// Include annotation type in the search
153     SAnnotSelector& IncludeAnnotType(TAnnotType type);
154     /// Exclude annotation type from the search
155     SAnnotSelector& ExcludeAnnotType(TAnnotType type);
156     /// Include feature type in the search
157     SAnnotSelector& IncludeFeatType(TFeatType type);
158     /// Exclude feature type from the search
159     SAnnotSelector& ExcludeFeatType(TFeatType type);
160     /// Include feature subtype in the search
161     SAnnotSelector& IncludeFeatSubtype(TFeatSubtype subtype);
162     /// Exclude feature subtype from the search
163     SAnnotSelector& ExcludeFeatSubtype(TFeatSubtype subtype);
164 
165     /// Check annot type (ignore subtypes).
CheckAnnotTypeSAnnotSelector166     bool CheckAnnotType(TAnnotType type) const
167         {
168             return GetAnnotType() == type;
169         }
170 
171     /// Set annot type, include all subtypes.
172     SAnnotSelector& ForceAnnotType(TAnnotType type);
173 
174     /// Return true if at least one subtype of the type is included
175     /// or selected type is not set (any).
176     bool IncludedAnnotType(TAnnotType type) const;
177     bool IncludedFeatType(TFeatType type) const;
178     bool IncludedFeatSubtype(TFeatSubtype subtype) const;
179 
180     /// Check if type of the annotation matches the selector
181     bool MatchType(const CAnnotObject_Info& annot_info) const;
182 
183     /// Return true if the features should be searched using their
184     /// product rather than location.
GetFeatProductSAnnotSelector185     bool GetFeatProduct(void) const
186         {
187             return m_FeatProduct;
188         }
189     /// Set flag indicating if the features should be searched by
190     /// their product rather than location.
SetByProductSAnnotSelector191     SAnnotSelector& SetByProduct(bool byProduct = true)
192         {
193             m_FeatProduct = byProduct;
194             return *this;
195         }
196 
197     /// Get the selected overlap type
GetOverlapTypeSAnnotSelector198     EOverlapType GetOverlapType(void) const
199         {
200             return m_OverlapType;
201         }
202     /// Set overlap type.
203     ///   eOverlap_Intervals - default, overlapping of locations should
204     ///       be checked using individual intervals.
205     ///   eOverlap_TotalRange indicates that overlapping of locations
206     ///       should be checked only by their total range.
SetOverlapTypeSAnnotSelector207     SAnnotSelector& SetOverlapType(EOverlapType overlap_type)
208         {
209             m_OverlapType = overlap_type;
210             return *this;
211         }
212     /// Check overlapping of individual intervals
SetOverlapIntervalsSAnnotSelector213     SAnnotSelector& SetOverlapIntervals(void)
214         {
215             return SetOverlapType(eOverlap_Intervals);
216         }
217     /// Check overlapping only of total ranges
SetOverlapTotalRangeSAnnotSelector218     SAnnotSelector& SetOverlapTotalRange(void)
219         {
220             return SetOverlapType(eOverlap_TotalRange);
221         }
222 
223     /// Get the selected sort order
GetSortOrderSAnnotSelector224     ESortOrder GetSortOrder(void) const
225         {
226             return m_SortOrder;
227         }
228     /// Set sort order of annotations.
229     ///   eSortOrder_None - do not sort annotations for faster retrieval.
230     ///   eSortOrder_Normal - default. Sort by start (increasing), then by
231     ///       length (decreasing).
232     ///   eSortOrder_Reverse - sort by end (decresing), then length
233     ///       (decreasing).
SetSortOrderSAnnotSelector234     SAnnotSelector& SetSortOrder(ESortOrder sort_order)
235         {
236             m_SortOrder = sort_order;
237             return *this;
238         }
239 
SetFeatComparatorSAnnotSelector240     SAnnotSelector& SetFeatComparator(IFeatComparator* comparator)
241         {
242             m_FeatComparator = comparator;
243             return *this;
244         }
ResetFeatComparatorSAnnotSelector245     SAnnotSelector& ResetFeatComparator(void)
246         {
247             m_FeatComparator.Reset();
248             return *this;
249         }
IsSetFeatComparatorSAnnotSelector250     bool IsSetFeatComparator(void) const
251         {
252             return (0 != m_FeatComparator);
253         }
GetFeatComparatorSAnnotSelector254     IFeatComparator* GetFeatComparator(void) const
255         {
256             return m_FeatComparator.GetNCPointerOrNull();
257         }
258 
259     /// GetResolveMethod() returns current value of resolve_method.
260     ///
261     ///  @sa
262     ///    SetResolveMethod(), SetResolveNone(), SetResolveTSE(),
263     ///    SetResolveAll()
GetResolveMethodSAnnotSelector264     EResolveMethod GetResolveMethod(void) const
265         {
266             return m_ResolveMethod;
267         }
268     /// SetResolveMethod() controls visibility of subsegments depending
269     /// on whether it's packaged together with master sequence.
270     ///   eResolve_None means to skip all subsegments completely.
271     ///       It has the same effect as calling SetResolveDepth(0).
272     ///   eResolve_TSE means to look on 'near' segments, packaged in the
273     ///       Same TSE (top level Seq-entry).
274     ///   eResolve_All lifts any restriction of segments by packaging.
275     /// This option works in addition to 'resolve depth', 'adaptive depth',
276     /// and 'exact depth'.
277     ///
278     ///  @sa
279     ///    SetResolveNone(), SetResolveTSE(), SetResolveAll(),
280     ///    SetResolveDepth(), SetExactDepth(), SetAdaptiveDepth(),
281     ///    SetUnresolvedFlag().
SetResolveMethodSAnnotSelector282     SAnnotSelector& SetResolveMethod(EResolveMethod resolve_method)
283         {
284             m_ResolveMethod = resolve_method;
285             return *this;
286         }
287     /// SetResolveNone() is equivalent to SetResolveMethod(eResolve_None).
288     ///  @sa
289     ///    SetResolveMethod()
SetResolveNoneSAnnotSelector290     SAnnotSelector& SetResolveNone(void)
291         {
292             return SetResolveMethod(eResolve_None);
293         }
294     /// SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
295     ///  @sa
296     ///    SetResolveMethod()
SetResolveTSESAnnotSelector297     SAnnotSelector& SetResolveTSE(void)
298         {
299             return SetResolveMethod(eResolve_TSE);
300         }
301     /// SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
302     ///  @sa
303     ///    SetResolveMethod()
SetResolveAllSAnnotSelector304     SAnnotSelector& SetResolveAll(void)
305         {
306             return SetResolveMethod(eResolve_All);
307         }
308 
309     /// GetResolveDepth() returns current limit of subsegment resolution
310     /// in searching annotations.
311     ///
312     ///  @sa
313     ///    SetResolveDepth()
GetResolveDepthSAnnotSelector314     int GetResolveDepth(void) const
315         {
316             return m_ResolveDepth;
317         }
318     /// SetResolveDepth sets the limit of subsegment resolution
319     /// in searching annotations.
320     /// Zero means look for annotations directly pointing
321     /// to the sequence. One means to look on direct segments
322     /// of the sequence too.
323     /// By default the limit is set to kIntMax, meaning no restriction.
324     ///
325     ///  @sa
326     ///    SetExactDepth(), SetAdaptiveDepth(), GetResolveDepth()
SetResolveDepthSAnnotSelector327     SAnnotSelector& SetResolveDepth(int depth)
328         {
329             m_ResolveDepth = depth;
330             return *this;
331         }
332 
333     typedef vector<SAnnotTypeSelector> TAdaptiveTriggers;
334     enum EAdaptiveDepthFlags {
335         kAdaptive_None       = 0,
336         fAdaptive_Default    = 1,
337         kAdaptive_Default    = fAdaptive_Default,
338         fAdaptive_ByTriggers = 1<<1,
339         fAdaptive_BySubtypes = 1<<2,
340         fAdaptive_ByPolicy   = 1<<3,
341         fAdaptive_BySeqClass = 1<<4,
342         fAdaptive_ByNamedAcc = 1<<5,
343         kAdaptive_All        = (fAdaptive_ByTriggers | fAdaptive_BySubtypes |
344                                 fAdaptive_ByPolicy | fAdaptive_BySeqClass |
345                                 fAdaptive_ByNamedAcc),
346         kAdaptive_DefaultBits= (fAdaptive_ByTriggers | fAdaptive_ByPolicy |
347                                 fAdaptive_BySeqClass | fAdaptive_ByNamedAcc)
348     };
349     typedef Uint1 TAdaptiveDepthFlags;
350     /// GetAdaptiveDepth() returns current value of 'adaptive depth' flag.
351     ///
352     ///  @sa
353     ///    SetAdaptiveDepth()
GetAdaptiveDepthSAnnotSelector354     bool GetAdaptiveDepth(void) const
355         {
356             return m_AdaptiveDepthFlags != 0;
357         }
358     /// SetAdaptiveDepth() requests to restrict subsegment resolution
359     /// depending on annotations found on lower level of segments.
360     /// It's meaningful in cases when features on segments are also
361     /// annotated on master sequence. Setting this flag will avoid
362     /// duplicates, and speed up loading.
363     /// Annotation iterator will look for annotations of special types,
364     /// by default it's gene, mrna, and cds. If any annotation from those
365     /// types is found the iterator will treat this as annotated sequence,
366     /// and not look for more annotations on subsegments.
367     /// This option works in addition to SetResolveDepth(), so subsegment
368     /// resolution stops when either specified depth is reached, or if
369     /// any adaptive trigger annotation is found.
370     /// Adaptive depth flag has no effect if 'exact depth' is set.
371     ///
372     /// Note, that trigger annotations on one segment has no effect on
373     /// adaptive resolution of another segment.
374     /// So, for example, if
375     /// Master sequence A has segments B1 and B2, while B1 has segments
376     /// C11 and C12, and B2 has segments C21 and C22:    <br>
377     ///  |--------------- A ----------------|            <br>
378     ///  |------ B1 ------||------ B2 ------|            <br>
379     ///  |- C11 -||- C12 -||- C21 -||- C22 -|            <br>
380     /// Also, there are genes only on sequences B1, C11, C12, C21, and C22.
381     /// For simplicity, there are no other adaptive trigger annotations.
382     /// In this case annotation iterator in 'adaptive' mode will return
383     /// genes and other annotations from sequences A, B1, B2, C21, and C22.
384     /// It will skip searching on C11, and C12 because trigger feature will
385     /// be found on B1.
386     ///
387     ///  @sa
388     ///    SetResolveDepth(), SetExactDepth(), SetAdaptiveTrigger(), GetAdaptiveDepth()
389     SAnnotSelector& SetAdaptiveDepth(bool value = true);
390 
391     /// SetAdaptiveDepthFlags() sets flags for adaptive depth heuristics
392     ///
393     ///  @sa
394     ///    SetAdaptiveDepth(), SetAdaptiveTrigger(), GetAdaptiveDepthFlags()
395     SAnnotSelector& SetAdaptiveDepthFlags(TAdaptiveDepthFlags flags);
396 
397     /// GetAdaptiveDepthFlags() returns current set of adaptive depth
398     /// heuristics flags
399     ///
400     ///  @sa
401     ///    SetAdaptiveDepthFlags()
GetAdaptiveDepthFlagsSAnnotSelector402     TAdaptiveDepthFlags GetAdaptiveDepthFlags(void) const
403         {
404             return m_AdaptiveDepthFlags;
405         }
406 
407     /// Get default set of adaptive depth flags
408     static TAdaptiveDepthFlags GetDefaultAdaptiveDepthFlags();
409 
410     /// Set default set of adaptive depth flags
411     static void SetDefaultAdaptiveDepthFlags(TAdaptiveDepthFlags flags);
412 
413     /// SetAdaptiveTrigger() allows to change default set of adaptive trigger
414     /// annotations.
415     /// Default set is: gene, mrna, cds.
416     SAnnotSelector& SetAdaptiveTrigger(const SAnnotTypeSelector& sel);
417 
418     /// SetExactDepth() specifies that annotations will be searched
419     /// on the segment level specified by SetResolveDepth() only.
420     /// By default this flag is not set, and annotations iterators
421     /// will also return annotations from above levels.
422     /// This flag, when set, overrides 'adaptive depth' flag.
423     ///
424     /// Examples:
425     ///   SetResolveDepth(0)
426     ///       - only direct annotations on the sequence will be found.
427     ///   SetResolveDepth(1), SetExactDepth(false) (default)
428     ///       - find annotations on the sequence, and its direct segments.
429     ///   SetResolveDepth(1), SetExactDepth(true)
430     ///       - find annotations on the direct segments.
431     ///
432     ///  @sa
433     ///    SetResolveDepth(), SetAdaptiveDepth(), SetAdaptiveTrigger()
SetExactDepthSAnnotSelector434     SAnnotSelector& SetExactDepth(bool value = true)
435         {
436             m_ExactDepth = value;
437             return *this;
438         }
439     /// GetExactDepth() returns current value of 'exact depth' flag.
440     ///
441     ///  @sa
442     ///    SetExactDepth()
GetExactDepthSAnnotSelector443     bool GetExactDepth(void) const
444         {
445             return m_ExactDepth;
446         }
447 
448     typedef size_t TMaxSize;
449     /// Get maximum allowed number of annotations to find.
GetMaxSizeSAnnotSelector450     TMaxSize GetMaxSize(void) const
451         {
452             return m_MaxSize;
453         }
454     /// Set maximum number of annotations to find.
455     /// Set to 0 or numeric_limits<TMaxSize>::max() for no limit (default).
SetMaxSizeSAnnotSelector456     SAnnotSelector& SetMaxSize(TMaxSize max_size)
457         {
458             m_MaxSize = max_size>0? max_size: numeric_limits<TMaxSize>::max();
459             return *this;
460         }
461 
462     typedef unsigned TMaxSearchSegments;
463     /// Get maximum number of empty segments to search before giving up.
GetMaxSearchSegmentsSAnnotSelector464     TMaxSearchSegments GetMaxSearchSegments(void) const
465         {
466             return m_MaxSearchSegments;
467         }
468     /// Set maximum number of empty segments to search before giving up.
469     /// The limit is effective only if no annotation was found.
470     /// Set to 0 for no limit (default).
SetMaxSearchSegmentsSAnnotSelector471     SAnnotSelector& SetMaxSearchSegments(TMaxSearchSegments max_segments)
472         {
473             m_MaxSearchSegments = max_segments? max_segments: numeric_limits<TMaxSearchSegments>::max();
474             return *this;
475         }
476 
477     enum EMaxSearchSegmentsAction {
478         eMaxSearchSegmentsThrow, // default
479         eMaxSearchSegmentsLog,   // log error message, and stop searching
480         eMaxSearchSegmentsSilent // silently stop searching
481     };
GetMaxSearchSegmentsActionSAnnotSelector482     EMaxSearchSegmentsAction GetMaxSearchSegmentsAction(void) const
483         {
484             return m_MaxSearchSegmentsAction;
485         }
SetMaxSearchSegmentsActionSAnnotSelector486     SAnnotSelector& SetMaxSearchSegmentsAction(EMaxSearchSegmentsAction action)
487         {
488             m_MaxSearchSegmentsAction = action;
489             return *this;
490         }
491 
492     typedef float TMaxSearchTime;
493     /// Get maximum time (in seconds) to search before giving up.
GetMaxSearchTimeSAnnotSelector494     TMaxSearchTime GetMaxSearchTime(void) const
495         {
496             return m_MaxSearchTime;
497         }
498     /// Set maximum time (in seconds) to search before giving up.
499     /// The limit is effective only if no annotation was found.
500     /// Set to 0 for no limit (default).
SetMaxSearchTimeSAnnotSelector501     SAnnotSelector& SetMaxSearchTime(TMaxSearchTime max_time)
502         {
503             m_MaxSearchTime = max_time>0? max_time: numeric_limits<TMaxSearchTime>::max();
504             return *this;
505         }
506 
507     /// Check if the parent object of annotations is set. If set,
508     /// only the annotations from the object (TSE, seq-entry or seq-annot)
509     /// will be found.
HasLimitSAnnotSelector510     bool HasLimit(void)
511         {
512             return m_LimitObject.NotEmpty();
513         }
514     /// Remove restrictions on the parent object of annotations.
515     SAnnotSelector& SetLimitNone(void);
516     /// Limit annotations to those from the TSE only.
517     SAnnotSelector& SetLimitTSE(const CTSE_Handle& limit);
518     SAnnotSelector& SetLimitTSE(const CSeq_entry_Handle& limit);
519     /// Limit annotations to those from the seq-entry only.
520     SAnnotSelector& SetLimitSeqEntry(const CSeq_entry_Handle& limit);
521     /// Limit annotations to those from the seq-annot only.
522     SAnnotSelector& SetLimitSeqAnnot(const CSeq_annot_Handle& limit);
523 
524     /// Get current method of handling unresolved seq-ids
GetUnresolvedFlagSAnnotSelector525     EUnresolvedFlag GetUnresolvedFlag(void) const
526         {
527             return m_UnresolvedFlag;
528         }
529     /// Set method of handling unresolved seq-ids. A seq-id may be
530     /// unresolvable due to EResolveMethod restrictions. E.g. a
531     /// seq-id may be a far reference, while the annotations for this
532     /// seq-id are stored in the master sequence TSE.
533     ///   eIgnoreUnresolved - default, do not search for annotations
534     ///       on unresolved seq-ids.
535     ///   eSearchUnresolved - search for annotations on unresolved
536     ///       seq-ids.
537     ///   eFailUnresolved - throw CAnnotException exception if a seq-id
538     ///       can not be resolved.
539     ///  @sa
540     ///    SetSearchExternal()
SetUnresolvedFlagSAnnotSelector541     SAnnotSelector& SetUnresolvedFlag(EUnresolvedFlag flag)
542         {
543             m_UnresolvedFlag = flag;
544             return *this;
545         }
SetIgnoreUnresolvedSAnnotSelector546     SAnnotSelector& SetIgnoreUnresolved(void)
547         {
548             m_UnresolvedFlag = eIgnoreUnresolved;
549             return *this;
550         }
SetSearchUnresolvedSAnnotSelector551     SAnnotSelector& SetSearchUnresolved(void)
552         {
553             m_UnresolvedFlag = eSearchUnresolved;
554             return *this;
555         }
SetFailUnresolvedSAnnotSelector556     SAnnotSelector& SetFailUnresolved(void)
557         {
558             m_UnresolvedFlag = eFailUnresolved;
559             return *this;
560         }
561 
562     /// External annotations for the Object Manger are annotations located in
563     /// top level Seq-entry different from TSE with the sequence they annotate.
564     /// They can be excluded from search by SetExcludeExternal() option.
565     ///
566     /// Exclude all external annotations from the search.
567     /// Effective only when no Seq-entry/Bioseq/Seq-annot limit is set.
SetExcludeExternalSAnnotSelector568     SAnnotSelector& SetExcludeExternal(bool exclude = true)
569         {
570             m_ExcludeExternal = exclude;
571             return *this;
572         }
573 
574     /// Set all flags for searching standard GenBank external annotations.
575     /// 'seq' or 'tse' should contain the virtual segmented bioseq as provided
576     /// by GenBank in its external annotations blobs (SNP, CDD, etc.)
577     /// The GenBank external annotations are presented as virtual delte
578     /// sequence referencing annotated GI.
579     /// So it's possible to lookup for external annotations without retrieving
580     /// the GI itself.
581     /// To make it possible the following flags are set by SetSearchExternal():
582     ///   SetResolveTSE() - prevents loading the GI sequence bioseqs
583     ///   SetLimitTSE()   - search only external annotations in the given TSE
584     ///   SearchUnresolved() - search on unresolved IDs (GI) too.
585     SAnnotSelector& SetSearchExternal(const CTSE_Handle& tse);
586     SAnnotSelector& SetSearchExternal(const CSeq_entry_Handle& tse);
587     SAnnotSelector& SetSearchExternal(const CBioseq_Handle& seq);
588 
589     /// Object manager recognizes several fields of Seq-annot as annot name.
590     /// The fields are:
591     ///   1. Seq-annot.id.other.accession with optional version.
592     ///   2. Seq-annot.desc.name
593     /// This annot name applies to all contained annotations, and it's
594     /// possible to filter annotations by their name.
595     /// By default, or after ResetAnnotsNames() is called, no filter is set.
596     /// You can set add annot names to the filter, so only annotations with
597     /// added names are visilble.
598     /// Otherwise, or you can exclude annot names, so that matching annot
599     /// names are excluded from search.
600     /// Relevant methods are:
601     ///   ResetAnnotsNames()
602     ///   ResetUnnamedAnnots()
603     ///   ResetNamedAnnots()
604     ///   AddUnnamedAnnots()
605     ///   AddNamedAnnots()
606     ///   ExcludeUnnamedAnnots()
607     ///   ExcludeNamedAnnots()
608     ///   SetAllNamedAnnots()
609     typedef vector<CAnnotName> TAnnotsNames;
610     typedef map<string, int> TNamedAnnotAccessions;
611     /// Select annotations from all Seq-annots
612     SAnnotSelector& ResetAnnotsNames(void);
613     /// Reset special processing of unnamed annots (added or excluded)
614     SAnnotSelector& ResetUnnamedAnnots(void);
615     /// Reset special processing of named annots (added or excluded)
616     SAnnotSelector& ResetNamedAnnots(const CAnnotName& name);
617     SAnnotSelector& ResetNamedAnnots(const char* name);
618     /// Add unnamed annots to set of annots names to look for
619     SAnnotSelector& AddUnnamedAnnots(void);
620     /// Add named annot to set of annots names to look for
621     SAnnotSelector& AddNamedAnnots(const CAnnotName& name);
622     SAnnotSelector& AddNamedAnnots(const char* name);
623     /// Add unnamed annots to set of annots names to exclude
624     SAnnotSelector& ExcludeUnnamedAnnots(void);
625     /// Add named annot to set of annots names to exclude
626     SAnnotSelector& ExcludeNamedAnnots(const CAnnotName& name);
627     SAnnotSelector& ExcludeNamedAnnots(const char* name);
628     /// Look for all named Seq-annots
629     /// Resets the filter, and then excludes unnamed annots.
630     SAnnotSelector& SetAllNamedAnnots(void);
631     // Compatibility:
632     /// Look for named annot.
633     /// If name is empty ("") look for unnamed annots too.
634     SAnnotSelector& SetDataSource(const string& name);
635 
636     // The following methods can be used to inspect annot filter:
637     //   GetIncludedAnnotsNames()
638     //   GetExcludedAnnotsNames()
639     //   IsSetAnnotsNames()
640     //   IsSetIncludedAnnotsNames()
641     //   IncludedAnnotName()
642     //   ExcludedAnnotName()
643     // Access methods for iterator
GetIncludedAnnotsNamesSAnnotSelector644     const TAnnotsNames& GetIncludedAnnotsNames(void) const
645         {
646             return m_IncludeAnnotsNames;
647         }
GetExcludedAnnotsNamesSAnnotSelector648     const TAnnotsNames& GetExcludedAnnotsNames(void) const
649         {
650             return m_ExcludeAnnotsNames;
651         }
IsSetAnnotsNamesSAnnotSelector652     bool IsSetAnnotsNames(void) const
653         {
654             return
655                 !m_IncludeAnnotsNames.empty() ||
656                 !m_ExcludeAnnotsNames.empty();
657         }
IsSetIncludedAnnotsNamesSAnnotSelector658     bool IsSetIncludedAnnotsNames(void) const
659         {
660             return !m_IncludeAnnotsNames.empty();
661         }
662     // There is a wildcard in some of included or excluded annot names,
663     // so direct string comparison is not enough
HasWildcardInAnnotsNamesSAnnotSelector664     bool HasWildcardInAnnotsNames() const
665         {
666             return m_HasWildcardInAnnotsNames;
667         }
668     // Included annot names list is unambiguous and can be used for lookup
HasExplicitAnnotsNamesSAnnotSelector669     bool HasExplicitAnnotsNames() const
670         {
671             return !HasWildcardInAnnotsNames() && IsSetIncludedAnnotsNames();
672         }
673     bool IncludedAnnotName(const CAnnotName& name) const;
674     bool ExcludedAnnotName(const CAnnotName& name) const;
675     // check if only named annot accessions are included in annot names
676     bool HasIncludedOnlyNamedAnnotAccessions() const;
677 
678     /// Add named annot accession (NA*) in the search.
679     SAnnotSelector& ResetNamedAnnotAccessions(void);
680     SAnnotSelector& IncludeNamedAnnotAccession(const string& acc,
681                                                int zoom_level = 0);
682     SAnnotSelector& ExcludeNamedAnnotAccession(const string& acc);
683     ///
GetNamedAnnotAccessionsSAnnotSelector684     const TNamedAnnotAccessions& GetNamedAnnotAccessions(void) const
685         {
686             return *m_NamedAnnotAccessions;
687         }
688     /// check if any named annot accession is included in the search
IsIncludedAnyNamedAnnotAccessionSAnnotSelector689     bool IsIncludedAnyNamedAnnotAccession(void) const
690         {
691             return m_NamedAnnotAccessions;
692         }
693     /// check if named annot accession is included in the search
694     bool IsIncludedNamedAnnotAccession(const string& acc) const;
695 
696     // Limit search with a set of TSEs
697     SAnnotSelector& ExcludeTSE(const CTSE_Handle& tse);
698     SAnnotSelector& ExcludeTSE(const CSeq_entry_Handle& tse);
699     SAnnotSelector& ResetExcludedTSE(void);
700     bool ExcludedTSE(const CTSE_Handle& tse) const;
701     bool ExcludedTSE(const CSeq_entry_Handle& tse) const;
702 
703     // No locations mapping flag. Set to true by CAnnot_CI.
GetNoMappingSAnnotSelector704     bool GetNoMapping(void) const
705         {
706             return m_NoMapping;
707         }
SetNoMappingSAnnotSelector708     SAnnotSelector& SetNoMapping(bool value = true)
709         {
710             m_NoMapping = value;
711             return *this;
712         }
713 
714     /// Try to avoid collecting multiple objects from the same seq-annot.
715     /// Speeds up collecting seq-annots with SNP features.
SetCollectSeq_annotsSAnnotSelector716     SAnnotSelector& SetCollectSeq_annots(bool value = true)
717         {
718             m_CollectSeq_annots = value;
719             return *this;
720         }
721 
722     /// Collect available annot types rather than annots.
723     ///  @sa
724     ///    CAnnotTypes_CI::GetAnnotTypes()
SetCollectTypesSAnnotSelector725     SAnnotSelector& SetCollectTypes(bool value = true)
726         {
727             m_CollectTypes = value;
728             return *this;
729         }
730 
731     /// Collect available annot names rather than annots.
732     ///  @sa
733     ///    CAnnotTypes_CI::GetAnnotNames()
SetCollectNamesSAnnotSelector734     SAnnotSelector& SetCollectNames(bool value = true)
735         {
736             m_CollectNames = value;
737             return *this;
738         }
739 
740     /// Collect cost of loading requested data.
741     /// If cost collection is requested no actual annotations are returned
742     /// by iterator, only the cost estimation.
743     /// The resulting cost can be queried via methods GetCostOfLoadingInBytes()
744     /// and GetCostOfLoadingInSeconds() of actual annotation iterator (CFeat_CI,
745     /// CAlign_CI, etc).
746     /// The cost of loading is estimated in two units: number of bytes to load
747     /// and expected time for loading them in seconds.
748     /// The estimation is dependent on data source of requested annotations.
749     /// In some cases it can be quite rough if the data source doesn't provide
750     /// necessary information.
751     /// The cost includes only data not-yet-loaded. Any annotations already
752     /// loaded into memory are excluded from this cost.
753     ///  @sa CAnnotTypes_CI::GetCostOfLoadingInBytes()
754     ///  @sa CAnnotTypes_CI::GetCostOfLoadingInSeconds()
SetCollectCostOfLoadingSAnnotSelector755     SAnnotSelector& SetCollectCostOfLoading(bool value = true)
756         {
757             m_CollectCostOfLoading = value;
758             return *this;
759         }
760 
761     /// Ignore strand when testing for range overlap
SetIgnoreStrandSAnnotSelector762     SAnnotSelector& SetIgnoreStrand(bool value = true)
763         {
764             m_IgnoreStrand = value;
765             return *this;
766         }
767 
768     /// Set filter for source location of annotations
769     SAnnotSelector& SetSourceLoc(const CSeq_loc& loc);
770 
771     /// Reset filter for source location of annotations
772     SAnnotSelector& ResetSourceLoc(void);
773 
774     /// Set handle used for determining what locations are "near".
775     /// You can set it to an "unset" or "blank" CBioseq_Handle to effectively unset this.
SetIgnoreFarLocationsForSortingSAnnotSelector776     SAnnotSelector& SetIgnoreFarLocationsForSorting( const CBioseq_Handle &handle )
777     {
778         m_IgnoreFarLocationsForSorting = handle;
779         return *this;
780     }
781 
GetIgnoreFarLocationsForSortingSAnnotSelector782     const CBioseq_Handle &GetIgnoreFarLocationsForSorting( void ) const
783     {
784         return m_IgnoreFarLocationsForSorting;
785     }
786 
787     /// Set bit filter for annotations that support it (SNP)
788     /// If filter is set then collect only annotations
789     /// that have masked (filter_mask) bits equal to the filter_bits
790     typedef Uint8 TBitFilter;
SetBitFilterSAnnotSelector791     SAnnotSelector& SetBitFilter(TBitFilter filter_bits,
792                                  TBitFilter filter_mask = TBitFilter(-1))
793     {
794         m_FilterMask = filter_mask;
795         m_FilterBits = filter_bits & filter_mask;
796         return *this;
797     }
ResetBitFilterSAnnotSelector798     SAnnotSelector& ResetBitFilter(void)
799     {
800         m_FilterMask = m_FilterBits = 0;
801         return *this;
802     }
HasBitFilterSAnnotSelector803     bool HasBitFilter(void) const
804     {
805         return m_FilterMask != 0;
806     }
GetFilterMaskSAnnotSelector807     TBitFilter GetFilterMask(void) const
808     {
809         return m_FilterMask;
810     }
GetFilterBitsSAnnotSelector811     TBitFilter GetFilterBits(void) const
812     {
813         return m_FilterBits;
814     }
815 
816     /// Exclude features with empty gene xref:
817     /// xref { { data gene { } } }
SetExcludeIfGeneIsSuppressedSAnnotSelector818     SAnnotSelector& SetExcludeIfGeneIsSuppressed(bool exclude = true)
819     {
820         m_ExcludeIfGeneIsSuppressed = exclude;
821         return *this;
822     }
GetExcludeIfGeneIsSuppressedSAnnotSelector823     bool GetExcludeIfGeneIsSuppressed(void) const
824     {
825         return m_ExcludeIfGeneIsSuppressed;
826     }
827 
828 protected:
829     friend class CAnnot_Collector;
830 
831     void CheckLimitObjectType(void) const;
832 
833     void x_InitializeAnnotTypesSet(bool default_value);
834     void x_ClearAnnotTypesSet(void);
835 
836     typedef bitset<CSeqFeatData::eSubtype_max+3> TAnnotTypesBitset;
837     typedef vector<CTSE_Handle> TTSE_Limits;
838 
839     bool                  m_FeatProduct;  // "true" for searching products
840     int                   m_ResolveDepth;
841     EOverlapType          m_OverlapType;
842     EResolveMethod        m_ResolveMethod;
843     ESortOrder            m_SortOrder;
844     CIRef<IFeatComparator>m_FeatComparator;
845 
846     enum ELimitObject {
847         eLimit_None,
848         eLimit_TSE_Info,        // CTSE_Info + m_LimitTSE
849         eLimit_Seq_entry_Info,  // CSeq_entry_Info + m_LimitTSE
850         eLimit_Seq_annot_Info   // CSeq_annot_Info + m_LimitTSE
851     };
852     mutable ELimitObject  m_LimitObjectType;
853     EUnresolvedFlag       m_UnresolvedFlag;
854     CConstRef<CObject>    m_LimitObject;
855     CTSE_Handle           m_LimitTSE;
856     TMaxSize              m_MaxSize; // maximum number of annotations to find
857     TMaxSearchSegments    m_MaxSearchSegments; // max number of empty segments
858     TMaxSearchTime        m_MaxSearchTime; // max time in seconds to search
859     TAnnotsNames          m_IncludeAnnotsNames;
860     TAnnotsNames          m_ExcludeAnnotsNames;
861     AutoPtr<TNamedAnnotAccessions> m_NamedAnnotAccessions;
862     EMaxSearchSegmentsAction m_MaxSearchSegmentsAction;
863     bool                  m_NoMapping;
864     TAdaptiveDepthFlags   m_AdaptiveDepthFlags;
865     bool                  m_ExactDepth;
866     bool                  m_ExcludeExternal;
867     bool                  m_CollectSeq_annots;
868     bool                  m_CollectTypes;
869     bool                  m_CollectNames;
870     bool                  m_CollectCostOfLoading;
871     bool                  m_IgnoreStrand;
872     bool                  m_HasWildcardInAnnotsNames;
873     TAdaptiveTriggers     m_AdaptiveTriggers;
874     TTSE_Limits           m_ExcludedTSE;
875     TAnnotTypesBitset     m_AnnotTypesBitset;
876     AutoPtr<CHandleRangeMap> m_SourceLoc;
877     CBioseq_Handle        m_IgnoreFarLocationsForSorting;
878     TBitFilter            m_FilterMask;
879     TBitFilter            m_FilterBits;
880     bool                  m_ExcludeIfGeneIsSuppressed;
881 };
882 
883 
884 /// Named annotations zoom level can be encoded in the accession string
885 /// with @@ suffix, for example: NA000000001.1@@1000
886 /// zoom level is the number of bases covered by single value in a annotation
887 /// density graph.
888 
889 #define NCBI_ANNOT_TRACK_ZOOM_LEVEL_SUFFIX "@@"
890 
891 
892 /// Extract optional zoom level suffix from named annotation string.
893 /// returns true if zoom level explicitly defined in the full_name argument.
894 /// The accession string without zoom level will be written
895 /// by acc_ptr pointer if it's not null.
896 /// Zoom level will be written by zoom_level_ptr pointer if it's not null.
897 /// Absent zoom level will be represented by value 0.
898 /// Wildcard zoom level will be represented by value -1.
899 NCBI_XOBJMGR_EXPORT
900 bool ExtractZoomLevel(const string& full_name,
901                       string* acc_ptr, int* zoom_level_ptr);
902 
903 /// Combine accession string and zoom level into a string with separator.
904 /// If the argument string already contains zoom level verify it's the same
905 /// as the zoom_level argument.
906 /// Zoom level value of -1 can be used to add wildcard @@*.
907 NCBI_XOBJMGR_EXPORT
908 string CombineWithZoomLevel(const string& acc, int zoom_level);
909 NCBI_XOBJMGR_EXPORT
910 void AddZoomLevel(string& acc, int zoom_level);
911 
912 /* @} */
913 
914 
915 END_SCOPE(objects)
916 END_NCBI_SCOPE
917 
918 #endif  // ANNOT_SELECTOR__HPP
919