1 /* $Id: Seq_loc.hpp 623038 2021-01-07 14:08:09Z ivanov $
2  * ===========================================================================
3  *
4  *                            PUBLIC DOMAIN NOTICE
5  *               National Center for Biotechnology Information
6  *
7  *  This software/database is a "United States Government Work" under the
8  *  terms of the United States Copyright Act.  It was written as part of
9  *  the author's official duties as a United States Government employee and
10  *  thus cannot be copyrighted.  This software/database is freely available
11  *  to the public for use. The National Library of Medicine and the U.S.
12  *  Government have not placed any restriction on its use or reproduction.
13  *
14  *  Although all reasonable efforts have been taken to ensure the accuracy
15  *  and reliability of the software and data, the NLM and the U.S.
16  *  Government do not and cannot warrant the performance or results that
17  *  may be obtained by using this software or data. The NLM and the U.S.
18  *  Government disclaim all warranties, express or implied, including
19  *  warranties of performance, merchantability or fitness for any particular
20  *  purpose.
21  *
22  *  Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author:  Cliff Clausen, Eugene Vasilchenko, Mati Shomrat
27  *
28  * File Description:
29  *   .......
30  *
31  * Remark:
32  *   This code was originally generated by application DATATOOL
33  *   using specifications from the ASN data definition file
34  *   'seqloc.asn'.
35  *
36  * ===========================================================================
37  */
38 
39 #ifndef OBJECTS_SEQLOC_SEQ_LOC_HPP
40 #define OBJECTS_SEQLOC_SEQ_LOC_HPP
41 
42 
43 // generated includes
44 #include <objects/seqloc/Seq_loc_.hpp>
45 #include <objects/seqloc/Seq_loc_mix.hpp>
46 #include <objects/seqloc/Packed_seqpnt.hpp>
47 #include <objects/seqloc/Packed_seqint.hpp>
48 #include <objects/seqloc/Seq_id.hpp>
49 #include <objects/seq/seq_id_handle.hpp>
50 #include <objects/general/Int_fuzz.hpp>
51 //
52 ////
53 //#include <corelib/ncbiexpt.hpp>
54 //#include <util/range.hpp>
55 //#include <vector>
56 
57 BEGIN_NCBI_SCOPE
58 BEGIN_objects_SCOPE // namespace ncbi::objects::
59 
60 /** @addtogroup OBJECTS_Seqloc
61  *
62  * @{
63  */
64 
65 
66 class CSeq_id_Handle;
67 class ISynonymMapper;
68 class ILengthGetter;
69 class CSeq_loc_CI;
70 class CSeq_loc_I;
71 
72 /// Seq-loc exceptions
73 class NCBI_SEQ_EXPORT CSeqLocException : public CException
74 {
75 public:
76     enum EErrCode {
77         eNotSet,         ///< Seq-loc is not set
78         eMultipleId,     ///< Seq-loc on multiple ids when one id is required
79         eUnsupported,    ///< Seq-loc has data that is not supported yet
80         eBadLocation,    ///< Seq-loc is incorrectly formed
81         eBadIterator,    ///< Seq-loc iterator is in bad state
82         eIncomatible,    ///< Seq-loc type is incompatible with operation
83         eOutOfRange,     ///< parameter is out of valid range
84 
85         eOtherError
86     };
87 
88     virtual const char* GetErrCodeString(void) const override;
89     NCBI_EXCEPTION_DEFAULT(CSeqLocException, CException);
90 };
91 
92 
93 class NCBI_SEQLOC_EXPORT CSeq_loc : public CSeq_loc_Base
94 {
95 public:
96     typedef CSeq_loc_Base Tparent;
97     typedef CPacked_seqpnt_Base::TPoints TPoints;
98     typedef CPacked_seqint_Base::Tdata   TIntervals;
99     typedef CSeq_loc_mix_Base::Tdata     TLocations;
100     typedef CSeq_id                      TId;
101     typedef ENa_strand                   TStrand;
102     typedef TSeqPos                      TPoint;
103     typedef CPacked_seqint::TRanges      TRanges;
104 
105     /// constructors
106     CSeq_loc(void);
107     CSeq_loc(E_Choice index);
108     CSeq_loc(TId& id, TPoint point, TStrand strand = eNa_strand_unknown);
109     CSeq_loc(TId& id, const TPoints& points, TStrand strand = eNa_strand_unknown);
110     CSeq_loc(TId& id, TPoint from, TPoint to, TStrand strand = eNa_strand_unknown);
111     CSeq_loc(TId& id, TRanges ivals, TStrand strand = eNa_strand_unknown);
112 
113     // destructor
114     virtual ~CSeq_loc(void);
115 
116     /// See related functions in objmgr/util/seq_loc_util.hpp:
117     ///
118     ///   TSeqPos GetLength(const CSeq_loc&, CScope*)
119     ///   bool IsOneBioseq(const CSeq_loc&, CScope*)
120     ///   const CSeq_id& GetId(const CSeq_loc&, CScope*)
121     ///   TSeqPos GetStart(const CSeq_loc&, CScope*)
122     ///   sequence::ECompare Compare(const CSeq_loc&, CSeq_loc&, CScope*)
123     ///   sequence::SeqLocMerge(...)
124     ///
125 
126     typedef CRange<TSeqPos> TRange;
127 
128     TRange GetTotalRange(void) const;
129     void InvalidateTotalRangeCache(void) const;
130 
131     /// Check if strand is set for any/all part(s) of the seq-loc
132     /// depending on the flag.
133     bool IsSetStrand(EIsSetStrand flag = eIsSetStrand_Any) const;
134     /// Get the location's strand. If no strand is set, returns
135     /// eNa_strand_unknown. If different strands are set in different
136     /// parts, returns eNa_strand_other. Explicitly set unknown strand
137     /// is ignored when combined whith plus or minus strand.
138     /// Examples:
139     ///   not-set = unknown
140     ///   not-set + plus = plus
141     ///   unknown + plus = plus
142     ///   unknown + both = other
143     ///   plus + minus = other
144     ENa_strand GetStrand(void) const;
145     /// Return true if all ranges have reverse strand
146     bool IsReverseStrand(void) const;
147     /// Flip the strand (e.g. plus to minus)
148     void FlipStrand(void);
149     /// Set the strand for all of the location's ranges.
150     void SetStrand(ENa_strand strand);
151     /// Reset the strand on this location
152     void ResetStrand(void);
153 
154     /// Return start and stop positions of the seq-loc.
155     /// Start position is the start of the first range in the seq-loc,
156     /// stop is the end of the last range. if eExtreme_Biological flag
157     /// is used, the effective order of ranges and range direction depends
158     /// on the strand.
159     /// NOTE: The returned values are not necessarily the same as
160     /// the boundaries returned by GetTotalRange(). It's also not
161     /// guaranteed that the value returned by GetStart() is less or
162     /// equal to the one returned by GetStop().
163     TSeqPos GetStart(ESeqLocExtremes ext) const;
164     TSeqPos GetStop (ESeqLocExtremes ext) const;
165 
166     /// Special case for circular sequences. No ID is checked for
167     /// circular locations. If the sequence is not circular
168     /// (seq_len == kInvalidSeqPos) the function works like GetTotalRange()
169     TSeqPos GetCircularLength(TSeqPos seq_len) const;
170 
171     /// Appends a label suitable for display (e.g., error messages)
172     /// label must point to an existing string object
173     /// Method just returns if label is null. Note this label is NOT
174     /// GenBank-style.
175     void GetLabel(string* label) const;
176 
177     /// check start or stop of location for e_Lim fuzz
178     bool IsPartialStart(ESeqLocExtremes ext) const;
179     bool IsPartialStop(ESeqLocExtremes ext) const;
180 
181     /// set / remove e_Lim fuzz on start or stop
182     /// (lt/gt - indicating partial interval)
183     void SetPartialStart(bool val, ESeqLocExtremes ext);
184     void SetPartialStop (bool val, ESeqLocExtremes ext);
185 
186     /// check if parts of the seq-loc are missing
187     bool IsTruncatedStart(ESeqLocExtremes ext) const;
188     bool IsTruncatedStop (ESeqLocExtremes ext) const;
189 
190     /// set / remove e_Lim fuzz on start or stop
191     /// (tl/tr - indicating removed parts of the seq-loc)
192     void SetTruncatedStart(bool val, ESeqLocExtremes ext);
193     void SetTruncatedStop (bool val, ESeqLocExtremes ext);
194 
195     /// Get the id of the location
196     /// return NULL if has multiple ids or no id at all.
197     const CSeq_id* GetId(void) const;
198 
199     /// check that the 'id' field in all parts of the location is the same
200     /// as the specifies id.
201     /// if the id parameter is NULL will return the location's id (if unique)
202     /// @return true on success
203     bool CheckId(const CSeq_id*& id, bool may_throw = true) const;
204     void InvalidateIdCache(void) const;
205 
206     /// set the 'id' field in all parts of this location
207     void SetId(CSeq_id& id); // stores id
208     void SetId(const CSeq_id& id); // stores a new copy of id
209 
210     /// Combine invalidation of all cached values
211     void InvalidateCache(void) const;
212 
213     /// Override Assign() to incorporate cache invalidation.
214     virtual void Assign(const CSerialObject& source,
215                         ESerialRecursionMode how = eRecursive);
216 
217     /// Override all setters to incorporate cache invalidation.
218     void         SetNull(void);
219     void         SetEmpty(TEmpty& v);
220     TEmpty&      SetEmpty(void);
221     void         SetWhole(TWhole& v);
222     TWhole&      SetWhole(void);
223     void         SetInt(TInt& v);
224     TInt&        SetInt(void);
225     void         SetPacked_int(TPacked_int& v);
226     TPacked_int& SetPacked_int(void);
227     void         SetPnt(TPnt& v);
228     TPnt&        SetPnt(void);
229     void         SetPacked_pnt(TPacked_pnt& v);
230     TPacked_pnt& SetPacked_pnt(void);
231     void         SetMix(TMix& v);
232     TMix&        SetMix(void);
233     void         SetEquiv(TEquiv& v);
234     TEquiv&      SetEquiv(void);
235     void         SetBond(TBond& v);
236     TBond&       SetBond(void);
237     void         SetFeat(TFeat& v);
238     TFeat&       SetFeat(void);
239 
240     /// Invalidate id/range cache after deserialization.
241     void PostRead(void) const;
242 
243     /// Flags for location comparison.
244     enum ECompareFlags {
245         fCompare_Default = 0,
246         fCompare_Strand =  1
247     };
248     typedef int TCompareFlags;
249 
250     /// @deprecated Use the 2-argument version taking flags.
251     NCBI_DEPRECATED int Compare(const CSeq_loc& loc) const;
252 
253     /// Compare locations by total range for each seq-id.
254     /// Compares seq-ids (@sa CSeq_loc::CompareOrdered()), then compares
255     /// total range starts (left to right) and lengths (longerst first).
256     /// Optionally compares strands; location without strand goes first.
257     int Compare(const CSeq_loc& loc, TCompareFlags flags) const;
258 
259     /// Used as a helper for determining which pieces of a
260     /// CSeq_loc to compare.
261     class ISubLocFilter {
262     public:
~ISubLocFilter()263         virtual ~ISubLocFilter() {}
264         // Returns true for pieces we should use.
265         // Must be able to handle "NULL" input
266         virtual bool operator()( const CSeq_id *id ) const = 0;
267     };
268 
269     /// Compare first-level sub-locations sequentially to order them
270     /// by biological "complexity". More "complex" location will come last.
271     /// Sub-locations are checked in Seq-loc-mix and Packed-seqint.
272     /// Minus strand locations' order is reversed.
273     /// Seq-ids are not checked in this method, unless you set
274     /// filter, which will allow the user to pick which parts to skip.
275     int CompareSubLoc(const CSeq_loc& loc, ENa_strand strand,
276         const ISubLocFilter *filter = NULL) const;
277 
278     /// Simple adding of seq-locs.
279     void Add(const CSeq_loc& other);
280 
281     void ChangeToMix(void);
282     /// Works only if location is currently an interval, point,
283     /// packed-int (handled trivially), or a mix built recursively from these.
284     void ChangeToPackedInt(void);
285 
286     /// CSeq_loc operations
287     ///
288     /// Flags for operations:
289     /// fStrand_Ignore - if set, strands will be ignored and any ranges
290     /// may be merged/sorted. If not set, ranges on plus and minus strands
291     /// are treated as different sub-sets. In some operations strand may
292     /// still be checked (see fMerge_Abutting and order of ranges).
293     ///
294     /// NOTE: merge flags do not sort ranges, so only overlaps bewtween
295     /// neighbor ranges can be detected. To merge all overlappig ranges
296     /// add fSort flag. The only exception is fSortAndMerge_All which
297     /// already includes fSort;
298     ///
299     /// fMerge_Contained - merges (removes) any range which is completely
300     /// contained in another range.
301     /// fMerge_Abutting - merge abutting ranges. Also forces merging of
302     /// contained ranges. Even if fStrand_Ignore is set, only the ranges
303     /// with the correct order are merged (e.g. loc2.to == loc1.from must be
304     /// true if loc1.strand is minus).
305     /// fMerge_Overlapping - merge overlapping ranges. Also forces merging of
306     /// contained ranges.
307     /// fMerge_All - merge any ranges if possible (contained, overlapping,
308     /// abutting). The flag does not force sorting, so only neighbor ranges
309     /// can be merged. To sort ranges before merging add fSort flag or use
310     /// fSortAndMerge_All.
311     /// fSortAndMerge_All - combination of fSort and fMerge_All.
312     /// fMerge_SingleRange - creates a single range, covering all original ranges.
313     /// Strand is set to the first strand in the original seq-loc, regardless of the
314     /// strand flag.
315     ///
316     /// fSort - forces sorting of the resulting ranges. All ranges on the
317     /// same ID are grouped together, but the order of IDs is undefined. Strand
318     /// is preserved if all values having the same direction are equal. Otherwise
319     /// strand is reset to plus and minuns (in strand-preserve mode) or unknown (in
320     /// strand-ignore mode). NULLs are always merged to a single NULL. The order
321     /// of locations for each ID is: NULL, whole, empty, plus strand intervals,
322     /// minus strand intervals.
323 
324     enum EOpFlags {
325         fStrand_Ignore         = 1<<0,
326         fMerge_Contained       = 1<<1,
327         fMerge_AbuttingOnly    = 1<<2,
328         fMerge_Abutting        = fMerge_AbuttingOnly | fMerge_Contained,
329         fMerge_OverlappingOnly = 1<<3,
330         fMerge_Overlapping     = fMerge_OverlappingOnly | fMerge_Contained,
331         fMerge_All             = fMerge_Abutting | fMerge_Overlapping,
332         fMerge_SingleRange     = 1<<4,
333         fSort                  = 1<<5,
334         fSortAndMerge_All      = fSort | fMerge_All
335     };
336     typedef int TOpFlags;
337 
338     /// All functions create and return a new seq-loc object.
339     /// Optional synonym mapper may be provided to detect and convert
340     /// synonyms of a bioseq. Length getter is used by Subtract() to
341     /// calculate real sequence length.
342 
343     /// Merge ranges depending on flags, return a new seq-loc object.
344     CRef<CSeq_loc> Merge(TOpFlags        flags,
345                          ISynonymMapper* syn_mapper) const;
346 
347     /// Add seq-loc, merge/sort resulting ranges depending on flags.
348     /// Return a new seq-loc object.
349     CRef<CSeq_loc> Add(const CSeq_loc& other,
350                        TOpFlags        flags,
351                        ISynonymMapper* syn_mapper) const;
352 
353     /// Subtract seq-loc from this, merge/sort resulting ranges depending on
354     /// flags. Return a new seq-loc object.
355     CRef<CSeq_loc> Subtract(const CSeq_loc& other,
356                             TOpFlags        flags,
357                             ISynonymMapper* syn_mapper,
358                             ILengthGetter*  len_getter) const;
359 
360     /// Find the intersection with the seq-loc, merge/sort resulting
361     /// ranges depending on flags. Return a new seq-loc object.
362     CRef<CSeq_loc> Intersect(const CSeq_loc& other,
363                              TOpFlags        flags,
364                              ISynonymMapper* syn_mapper) const;
365 
366     /// Make CSeq_loc look like an STL container
367     typedef CSeq_loc_CI                  const_iterator;
368     const_iterator begin(void) const;
369     const_iterator end(void) const;
370 
371 private:
372     // Prohibit copy constructor & assignment operator
373     CSeq_loc(const CSeq_loc&);
374     CSeq_loc& operator= (const CSeq_loc&);
375 
376     TRange x_UpdateTotalRange(void) const;
377     TRange x_CalculateTotalRangeCheckId(const CSeq_id*& id) const;
378     bool x_CheckId(const CSeq_id*& id, bool may_throw = true) const;
379     bool x_UpdateId(const CSeq_id*& total_id, const CSeq_id* id,
380                     bool may_throw = true) const;
381     void x_ChangeToMix(const CSeq_loc& other);
382     void x_ChangeToPackedInt(const CSeq_interval& other);
383     void x_ChangeToPackedInt(const CSeq_loc& other);
384     void x_ChangeToPackedPnt(const CSeq_loc& other);
385 
386     /// Compare single-id locations, or throw an exception if any location
387     /// is multi-id.
388     int x_CompareSingleId(const CSeq_loc& loc, const CSeq_id* id1,
389                           const CSeq_id* id2,
390                           TCompareFlags flags) const;
391 
392     enum {
393         kDirtyCache = -2,
394         kSeveralIds = -3
395     };
396 
397     mutable volatile TSeqPos m_TotalRangeCacheFrom;
398     mutable volatile TSeqPos m_TotalRangeCacheToOpen;
399     // Seq-id for the whole seq-loc or null if multiple IDs were found
400     mutable const CSeq_id* volatile m_IdCache;
401 };
402 
403 
404 /// Interface for mapping IDs to the best synonym. Should provide
405 /// GetBestSynonym() method which returns the ID which should replace
406 /// the original one in the destination seq-loc.
407 class ISynonymMapper
408 {
409 public:
ISynonymMapper(void)410     ISynonymMapper(void) {}
~ISynonymMapper(void)411     virtual ~ISynonymMapper(void) {}
412 
413     virtual CSeq_id_Handle GetBestSynonym(const CSeq_id& id) = 0;
414 };
415 
416 
417 /// Interface for getting bioseq length. Should provide GetLength()
418 /// method.
419 class ILengthGetter
420 {
421 public:
ILengthGetter(void)422     ILengthGetter(void) {}
~ILengthGetter(void)423     virtual ~ILengthGetter(void) {}
424 
425     virtual TSeqPos GetLength(const CSeq_id& id) = 0;
426 };
427 
428 
429 // Simple location structure: id/from/to
430 struct NCBI_SEQLOC_EXPORT SSeq_loc_CI_RangeInfo {
431     SSeq_loc_CI_RangeInfo(void);
432     ~SSeq_loc_CI_RangeInfo(void);
433 
434     void SetStrand(ENa_strand strand);
435 
436     typedef CSeq_loc::TRange    TRange;
437 
438     CSeq_id_Handle      m_IdHandle;
439     CConstRef<CSeq_id>  m_Id;
440     TRange              m_Range;
441     bool                m_IsSetStrand;
442     ENa_strand          m_Strand;
443     CConstRef<CSeq_loc> m_Loc;
444     pair<CConstRef<CInt_fuzz>, CConstRef<CInt_fuzz> > m_Fuzz;
445 };
446 
447 
448 class CSeq_loc_CI_Impl;
449 
450 /// Seq-loc iterator class -- iterates all intervals from a seq-loc
451 /// in the correct order.
452 class NCBI_SEQLOC_EXPORT CSeq_loc_CI
453 {
454 public:
455     /// Options for empty locations processing
456     enum EEmptyFlag {
457         eEmpty_Skip,    /// ignore empty locations
458         eEmpty_Allow    /// treat empty locations as usual
459     };
460     enum ESeqLocOrder {
461         eOrder_Positional,    /// Iterate sub-locations in positional order
462         eOrder_Biological     /// Iterate sub-locations in biological order
463     };
464     typedef CSeq_loc::TRange TRange;
465 
466     /// constructors
467     CSeq_loc_CI(void);
468     CSeq_loc_CI(const CSeq_loc& loc,
469                 EEmptyFlag empty_flag = eEmpty_Skip,
470                 ESeqLocOrder order = eOrder_Biological);
471     /// construct iterator at a different position in the same location
472     /// @sa GetPos()
473     CSeq_loc_CI(const CSeq_loc_CI& iter, size_t pos);
474     /// destructor
475     virtual ~CSeq_loc_CI(void);
476 
477     CSeq_loc_CI(const CSeq_loc_CI& iter);
478     CSeq_loc_CI& operator= (const CSeq_loc_CI& iter);
479 
480     CSeq_loc_CI& operator++ (void);
481     DECLARE_OPERATOR_BOOL(x_IsValid());
482 
483     bool operator== (const CSeq_loc_CI& iter) const;
484     bool operator!= (const CSeq_loc_CI& iter) const;
485 
486     /// Location of type equiv define set of equivalent locations.
487     /// Each equiv set consist of several equivalent parts.
488     /// Each equiv part can contain equiv location too,
489     /// so equiv sets are recursive.
490 
491     /// Return true if current position is part of a bond
492     bool IsInBond(void) const;
493     /// Return true if current position is A part of a bond
494     bool IsBondA(void) const;
495     /// Return true if current position is B part of a bond
496     bool IsBondB(void) const;
497     /// Return iterators that cover bond of current position
498     /// result.first is the first segment in the equiv set
499     /// result.second is the first segment after the equiv set
500     pair<CSeq_loc_CI, CSeq_loc_CI> GetBondRange(void) const;
501 
502     /// Return true if location has equiv parts
503     bool HasEquivSets(void) const;
504     /// Return true if current position is in some equiv part
505     bool IsInEquivSet(void) const;
506     /// Return number of recursuve equiv parts current position in
507     size_t GetEquivSetsCount(void) const;
508     /// Return iterators that cover equiv set of current position
509     /// result.first is the first segment in the equiv set
510     /// result.second is the first segment after the equiv set
511     /// level specify equiv set if there are more than one of them
512     /// level = 0 is the smallest equiv set (innermost)
513     pair<CSeq_loc_CI, CSeq_loc_CI> GetEquivSetRange(size_t level = 0) const;
514     /// Return iterators that cover equiv part of current position
515     /// result.first is the first segment in the equiv part
516     /// result.second is the first segment after the equiv part
517     /// level specify equiv set if there are more than one of them
518     /// level = 0 is the smallest equiv set (innermost)
519     pair<CSeq_loc_CI, CSeq_loc_CI> GetEquivPartRange(size_t level = 0) const;
520 
521     /// Get seq_id of the current location
522     const CSeq_id& GetSeq_id(void) const;
523     CSeq_id_Handle GetSeq_id_Handle(void) const;
524 
525     /// Get the range
526     TRange         GetRange(void) const;
527     /// Get strand
528     bool IsSetStrand(void) const;
529     ENa_strand GetStrand(void) const;
530 
531     /// Get seq-loc for the current iterator position. New CSeq_loc object may
532     /// be created if the current range is a part of a packed/mixed seq-loc.
533     /// The resulting seq-loc will always include only one range (which may
534     /// be whole or empty).
535     /// @sa GetEmbeddingSeq_loc
536     CConstRef<CSeq_loc> GetRangeAsSeq_loc(void) const;
537 
538     /// Get the nearest seq-loc containing the current range.
539     /// For packed/mixed locations the embedding seq-loc may
540     /// include other ranges.
541     /// @note Don't ever confuse it with GetRangeAsSeq_loc!
542     const CSeq_loc& GetEmbeddingSeq_loc(void) const;
543 
544     /// @deprecated You probably actually wanted to use GetRangeAsSeq_loc
545     /// or GetEmbeddingSeq_loc instead.
546     NCBI_DEPRECATED const CSeq_loc& GetSeq_loc(void) const;
547 
548     // Return null if non-fuzzy
549     const CInt_fuzz* GetFuzzFrom(void) const;
550     const CInt_fuzz* GetFuzzTo  (void) const;
551 
552     /// True if the current location is a whole sequence
553     bool           IsWhole(void) const;
554     /// True if the current location is empty
555     bool           IsEmpty(void) const;
556     /// True if the current location is a single point
557     bool           IsPoint(void) const;
558 
559     /// Reset the iterator to the initial state
560     void Rewind(void);
561 
562     /// Get number of ranges.
563     size_t GetSize(void) const;
564 
565     /// Get iterator's position.
566     size_t GetPos(void) const;
567 
568     /// Set iterator's position.
569     void SetPos(size_t pos);
570 
571 protected:
572     const SSeq_loc_CI_RangeInfo& x_GetRangeInfo(void) const;
573 
574     CRef<CSeq_loc_CI_Impl> m_Impl;
575 
576     // Check the iterator position
577     bool x_IsValid(void) const;
578     // Check the position, throw exception if not valid
579     virtual const char* x_GetIteratorType(void) const;
580     void x_CheckValid(const char* where) const;
581     void x_ThrowNotValid(const char* where) const;
582 
583     size_t m_Index;
584 };
585 
586 
587 /// Seq-loc iterator class -- iterates all intervals from a seq-loc
588 /// in the correct order.
589 class NCBI_SEQLOC_EXPORT CSeq_loc_I : public CSeq_loc_CI
590 {
591 public:
592     /// Options for creation modified locations
593     /// Bond and equiv types are preserved if possible
594     enum EMakeType {
595         eMake_CompactType, /// use most compact Seq-loc type (default)
596         eMake_PreserveType /// keep original Seq-loc type if possible
597     };
598 
599     /// constructors
600     CSeq_loc_I(void);
601     CSeq_loc_I(CSeq_loc& loc);
602     /// construct iterator at a different position in the same location
603     /// @sa GetPos()
604     CSeq_loc_I(const CSeq_loc_I& iter, size_t pos);
605     /// destructor
606     virtual ~CSeq_loc_I(void);
607 
608     /// return true of any part was changed since initialization
609     bool HasChanges(void) const;
610 
611     /// return constructed CSeq_loc with all changes
612     CRef<CSeq_loc> MakeSeq_loc(EMakeType make_type = eMake_CompactType) const;
613 
614     /// Delete current element, and make iterator to point to the next element.
615     /// All other iterators of the same CSeq_loc object will become invalid.
616     /// If the deleted element is contained in any equiv set then the equiv set
617     /// and its corresponding part will be reduced in size appropriately,
618     /// and if the part and/or the set become empty after the deletion
619     /// they will be removed completely.
620     void Delete(void);
621 
622     /// Set of Insert*() methods.
623     /// All of them insert new element before the one the iterator points to.
624     /// If the iterator is at the end of CSeq_loc then the new element is
625     /// inserted after the last element of the CSeq_loc.
626     /// After the insertion this iterator will point to the element
627     /// it was pointing before (or end), and the result iterator will point
628     /// to the inserted element.
629     /// All other iterators of the same CSeq_loc object will become invalid.
630     /// If the insertion point is completely within an equiv set (excluding
631     /// the equiv set boundary) the equiv set will be expanded appropriately.
632     /// If the insertion point is completely within an equiv part (excluding
633     /// the equiv part boundary) the new element is added to that equiv part.
634     /// If the insertion point is between two equiv parts of the same set
635     /// then the new element is appended to the part just before
636     /// the insertion point.
637     /// The effect of the insertion on equiv sets can be modified
638     /// by calling SetEquivMode(EEquivMode).
639     /// @sa SetEquivMode()
640     CSeq_loc_I InsertNull(void);
641 
642     /// Insert new element before the current one (@sa InsertNull()).
643     CSeq_loc_I InsertEmpty(const CSeq_id_Handle& id);
644     /// Insert new element before the current one (@sa InsertNull()).
InsertEmpty(const CSeq_id & id)645     CSeq_loc_I InsertEmpty(const CSeq_id& id)
646         {
647             return InsertEmpty(CSeq_id_Handle::GetHandle(id));
648         }
649     /// Insert new element before the current one (@sa InsertNull()).
650     CSeq_loc_I InsertWhole(const CSeq_id_Handle& id);
651     /// Insert new element before the current one (@sa InsertNull()).
InsertWhole(const CSeq_id & id)652     CSeq_loc_I InsertWhole(const CSeq_id& id)
653         {
654             return InsertWhole(CSeq_id_Handle::GetHandle(id));
655         }
656 
657     /// Insert new element before the current one (@sa InsertNull()).
658     /// The strand value eNa_strand_unknown produces strand field not set,
659     /// If eNa_strand_unknown is expicitly required, call SetStrand().
660     CSeq_loc_I InsertInterval(const CSeq_id_Handle& id,
661                               const TRange& range,
662                               ENa_strand strand = eNa_strand_unknown);
663     /// Insert new element before the current one (@sa InsertNull()).
664     /// The strand value eNa_strand_unknown produces strand field not set,
665     /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertInterval(const CSeq_id & id,const TRange & range,ENa_strand strand=eNa_strand_unknown)666     CSeq_loc_I InsertInterval(const CSeq_id& id,
667                               const TRange& range,
668                               ENa_strand strand = eNa_strand_unknown)
669         {
670             return InsertInterval(CSeq_id_Handle::GetHandle(id), range, strand);
671         }
672     /// Insert new element before the current one (@sa InsertNull()).
673     /// The strand value eNa_strand_unknown produces strand field not set,
674     /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertInterval(const CSeq_id & id,TSeqPos from,TSeqPos to,ENa_strand strand=eNa_strand_unknown)675     CSeq_loc_I InsertInterval(const CSeq_id& id,
676                               TSeqPos from, TSeqPos to,
677                               ENa_strand strand = eNa_strand_unknown)
678         {
679             return InsertInterval(id, TRange(from, to), strand);
680         }
681     /// Insert new element before the current one (@sa InsertNull()).
682     /// The strand value eNa_strand_unknown produces strand field not set,
683     /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertInterval(const CSeq_id_Handle & id,TSeqPos from,TSeqPos to,ENa_strand strand=eNa_strand_unknown)684     CSeq_loc_I InsertInterval(const CSeq_id_Handle& id,
685                               TSeqPos from, TSeqPos to,
686                               ENa_strand strand = eNa_strand_unknown)
687         {
688             return InsertInterval(id, TRange(from, to), strand);
689         }
690 
691     /// Insert new element before the current one (@sa InsertNull()).
692     /// The strand value eNa_strand_unknown produces strand field not set,
693     /// If eNa_strand_unknown is expicitly required, call SetStrand().
694     CSeq_loc_I InsertPoint(const CSeq_id_Handle& id,
695                            TSeqPos pos,
696                            ENa_strand strand = eNa_strand_unknown);
697     /// Insert new element before the current one (@sa InsertNull()).
698     /// The strand value eNa_strand_unknown produces strand field not set,
699     /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertPoint(const CSeq_id & id,TSeqPos pos,ENa_strand strand=eNa_strand_unknown)700     CSeq_loc_I InsertPoint(const CSeq_id& id,
701                            TSeqPos pos,
702                            ENa_strand strand = eNa_strand_unknown)
703         {
704             return InsertPoint(CSeq_id_Handle::GetHandle(id), pos, strand);
705         }
706 
707     /// Set seq_id of the current location
708     void SetSeq_id_Handle(const CSeq_id_Handle& id);
709     /// Set seq_id of the current location
SetSeq_id(const CSeq_id & id)710     void SetSeq_id(const CSeq_id& id)
711         {
712             SetSeq_id_Handle(CSeq_id_Handle::GetHandle(id));
713         }
714 
715     /// Set the range
716     void SetRange(const TRange& range);
717     /// Set the range from position
718     void SetFrom(TSeqPos from);
719     /// Set the range to position
720     void SetTo(TSeqPos to);
721     /// Set the range from and to positions
722     void SetPoint(TSeqPos pos);
723 
724     /// Reset the range strand
725     void ResetStrand(void);
726     /// Set the range strand
727     void SetStrand(ENa_strand strand);
728 
729     /// Reset fuzz from
730     void ResetFuzzFrom(void);
731     /// Change fuzz from
732     void SetFuzzFrom(CInt_fuzz& fuzz);
733     /// Reset fuzz to
734     void ResetFuzzTo(void);
735     /// Change fuzz to values
736     void SetFuzzTo(CInt_fuzz& fuzz);
737     /// Reset fuzz of a point
738     void ResetFuzz(void);
739     /// Change fuzz of a point
740     void SetFuzz(CInt_fuzz& fuzz);
741 
742     /// Return iterators that cover equiv set of current position
743     /// result.first is the first segment in the equiv set
744     /// result.second is the first segment after the equiv set
745     /// level specify equiv set if there are more than one of them
746     /// level = 0 is the smallest equiv set (innermost)
747     pair<CSeq_loc_I, CSeq_loc_I> GetEquivSetRange(size_t level = 0) const;
748     /// Return iterators that cover equiv part of current position
749     /// result.first is the first segment in the equiv part
750     /// result.second is the first segment after the equiv part
751     /// level specify equiv set if there are more than one of them
752     /// level = 0 is the smallest equiv set (innermost)
753     pair<CSeq_loc_I, CSeq_loc_I> GetEquivPartRange(size_t level = 0) const;
754 
755     /// This enum defines a way equiv sets are expanded or created
756     /// when one of Insert*() methods is called.
757     enum EEquivMode {
758         /// By default no equiv sets are created or expanded except
759         /// if insertion point is completely inside of an equiv or its part.
760         eEquiv_none,
761 
762         /// A new equiv set will be created, even if the insertion point
763         /// is already inside of an existing equiv, so that new equiv set may
764         /// become a sub-unit of an exisiting equiv set.
765         /// The new equiv set will contain one part with the inserted element
766         /// as its content.
767         /// The mode will switch to eEquiv_append after the insertion.
768         eEquiv_new_equiv,
769 
770         /// New equiv part will be started with the inserted element.
771         /// If the insertion point is not in or near any existing equiv part
772         /// then exception is thrown.
773         /// If...
774         /// A. the insertion point is exactly between two equiv sets
775         /// then a new equiv part will be created at the end of the first set
776         /// with the inserted element as its content.
777         /// B. the insertion point is at a boundary of any equiv part
778         /// then a new equiv part will be created at this point
779         /// with the inserted element as its content.
780         /// C. the insertion point is in the middle of existing part
781         /// then the existing part will be split at the insertion point
782         /// and new element will be added to the second part after splitting.
783         ///
784         /// The mode will switch to eEquiv_append after the insertion.
785         eEquiv_new_part,
786 
787         /// If the insertion point is just after any equiv part, including
788         /// the last one in an equiv, then the inserted element is appended
789         /// to the part, and the equiv mode will remain eEquiv_append.
790         /// Otherwise the equiv mode will switch to eEquiv_none.
791         /// Change the mode to eEquiv_none explicitly if you want to stop
792         /// expanding existing equiv.
793         eEquiv_append,
794 
795         /// If the insertion point is just before any equiv part, including
796         /// the first one in an equiv, then the inserted element is prepended
797         /// to the part, and the equiv mode will remain eEquiv_prepend.
798         /// Otherwise the equiv mode will switch to eEquiv_none.
799         /// Change the mode to eEquiv_none explicitly if you want to stop
800         /// expanding existing equiv.
801         eEquiv_prepend
802     };
803     /// Change equiv modification mode
804     /// @sa InsertNull()
805     void SetEquivMode(EEquivMode mode);
806     /// Change equiv modification mode to add new equiv set for the next
807     /// insert operation.
808     /// @sa SetEquivMode()
StartNewEquiv(void)809     void StartNewEquiv(void)
810         {
811             SetEquivMode(eEquiv_new_equiv);
812         }
813     /// Change equiv modification mode to add new equiv part for the next
814     /// insert operation.
815     /// @sa SetEquivMode()
StartNewEquivPart(void)816     void StartNewEquivPart(void)
817         {
818             SetEquivMode(eEquiv_new_part);
819         }
820     /// Change equiv modification mode to normal behavior that will only
821     /// update existing equiv sets.
822     /// @sa SetEquivMode()
StopEquiv(void)823     void StopEquiv(void)
824         {
825             SetEquivMode(eEquiv_none);
826         }
827     /// Get equiv modification mode.
828     /// @sa SetEquivMode()
829     EEquivMode GetEquivMode(void) const;
830 
831     /// Remove equiv set, all pieces that are part of the set will be
832     /// preserved as independent pieces.
833     void RemoveEquiv(size_t level = 0);
834     /// Create equiv set with one part from current position to
835     /// the position pointed by end_it argument exclusive.
836     /// The end_it position must be after current position.
837     /// If there are any conflicts with existing equiv sets or bond pairs
838     /// no new equiv set will be made and an exception will be thrown.
839     /// New set's level will be assigned depending on its size in relation
840     /// to existing overlapping equiv sets.
841     void MakeEquiv(const CSeq_loc_I& end_it);
842     /// Create equiv set with one part from current position to
843     /// the position pointed by end_it argument exclusive.
844     /// The end_it position must be after current position.
845     /// If there are any conflicts with existing equiv sets or bond pairs
846     /// no new equiv set will be made and an exception will be thrown.
847     void MakeEquivPartBreak(size_t level = 0);
848 
849     /// Remove bond at current position - it may be either A or B part
850     void RemoveBond(void);
851     /// Make bond at current position (only A)
852     /// The current part must be a point
853     /// If current posision is already a bond A part, it will be updated
854     void MakeBondA(void);
855     /// Make bond at current position with the next position (A and B)
856     /// The current and next parts must be points
857     /// If current posision is already a bond A part, it will be updated
858     void MakeBondAB(void);
859     /// Make bond at previous position with the current position (A and B)
860     /// The current and previous parts must be points
861     /// If previous posision is already a bond A part, it will be updated
862     void MakeBondB(void);
863 
864 protected:
865     using CSeq_loc_CI::x_GetRangeInfo;
866     SSeq_loc_CI_RangeInfo& x_GetRangeInfo(void);
867 
868     void x_SetSeq_id_Handle(SSeq_loc_CI_RangeInfo& info,
869                             const CSeq_id_Handle& id);
870 
871     bool x_IsValidForInsert(void) const;
872     void x_CheckValidForInsert(const char* where) const;
873     virtual const char* x_GetIteratorType(void) const;
874 };
875 
876 
877 /////////////////// CSeq_loc inline methods
878 
879 inline
InvalidateTotalRangeCache(void) const880 void CSeq_loc::InvalidateTotalRangeCache(void) const
881 {
882     m_TotalRangeCacheFrom = TSeqPos(kDirtyCache);
883 }
884 
885 
886 inline
InvalidateIdCache(void) const887 void CSeq_loc::InvalidateIdCache(void) const
888 {
889     m_IdCache = NULL;
890 }
891 
892 
893 inline
InvalidateCache(void) const894 void CSeq_loc::InvalidateCache(void) const
895 {
896     InvalidateTotalRangeCache();
897     InvalidateIdCache();
898 }
899 
900 
901 // constructor
902 inline
CSeq_loc(void)903 CSeq_loc::CSeq_loc(void)
904 {
905     InvalidateCache();
906 }
907 
908 
909 inline
GetTotalRange(void) const910 CSeq_loc::TRange CSeq_loc::GetTotalRange(void) const
911 {
912     TSeqPos range_from  = m_TotalRangeCacheFrom;
913     if ( range_from == TSeqPos(kDirtyCache) ) {
914         return x_UpdateTotalRange();
915     }
916     else {
917         TSeqPos range_to_open  = m_TotalRangeCacheToOpen;
918         return COpenRange<TSeqPos>(range_from, range_to_open);
919     }
920 }
921 
922 
923 inline
CheckId(const CSeq_id * & id,bool may_throw) const924 bool CSeq_loc::CheckId(const CSeq_id*& id, bool may_throw) const
925 {
926     const CSeq_id* my_id = m_IdCache;
927     if ( my_id == NULL ) {
928         if ( !x_CheckId(my_id, may_throw) ) {
929             return false;
930         }
931         m_IdCache = my_id;
932     }
933     return x_UpdateId(id, my_id, may_throw);
934 }
935 
936 
937 inline
GetId(void) const938 const CSeq_id* CSeq_loc::GetId(void) const
939 {
940     const CSeq_id* sip = NULL;
941     return CheckId(sip, false) ? sip : NULL;
942 }
943 
944 
945 inline
SetId(const CSeq_id & id)946 void CSeq_loc::SetId(const CSeq_id& id)
947 {
948     InvalidateIdCache();
949     CRef<CSeq_id> nc_id(new CSeq_id);
950     nc_id->Assign(id);
951     SetId(*nc_id);
952     m_IdCache = nc_id.GetPointer();
953 }
954 
955 
956 inline
SetNull(void)957 void CSeq_loc::SetNull(void)
958 {
959     InvalidateIdCache();
960     Tparent::SetNull();
961 }
962 
963 #define DEFINE_NCBI_SEQ_LOC_SETTERS(x) \
964 inline                                 \
965 void CSeq_loc::Set##x(T##x& v)         \
966 {                                      \
967     InvalidateCache();                 \
968     Tparent::Set##x(v);                \
969 }                                      \
970                                        \
971 inline                                 \
972 CSeq_loc::T##x& CSeq_loc::Set##x(void) \
973 {                                      \
974     InvalidateCache();                 \
975     return Tparent::Set##x();          \
976 }
977 
978 DEFINE_NCBI_SEQ_LOC_SETTERS(Empty)
DEFINE_NCBI_SEQ_LOC_SETTERS(Whole)979 DEFINE_NCBI_SEQ_LOC_SETTERS(Whole)
980 DEFINE_NCBI_SEQ_LOC_SETTERS(Int)
981 DEFINE_NCBI_SEQ_LOC_SETTERS(Packed_int)
982 DEFINE_NCBI_SEQ_LOC_SETTERS(Pnt)
983 DEFINE_NCBI_SEQ_LOC_SETTERS(Packed_pnt)
984 DEFINE_NCBI_SEQ_LOC_SETTERS(Mix)
985 DEFINE_NCBI_SEQ_LOC_SETTERS(Equiv)
986 DEFINE_NCBI_SEQ_LOC_SETTERS(Bond)
987 DEFINE_NCBI_SEQ_LOC_SETTERS(Feat)
988 
989 #undef DEFINE_NCBI_SEQ_LOC_SETTERS
990 
991 inline
992 bool CSeq_loc::IsReverseStrand(void) const
993 {
994     return IsReverse(GetStrand());
995 }
996 
997 
998 /////////////////// end of CSeq_loc inline methods
999 
1000 /////////////////// CSeq_loc_CI inline methods
1001 
1002 inline
SetStrand(ENa_strand strand)1003 void SSeq_loc_CI_RangeInfo::SetStrand(ENa_strand strand)
1004 {
1005     m_IsSetStrand = true;
1006     m_Strand = strand;
1007 }
1008 
1009 inline
x_CheckValid(const char * where) const1010 void CSeq_loc_CI::x_CheckValid(const char* where) const
1011 {
1012     if ( !x_IsValid() )
1013         x_ThrowNotValid(where);
1014 }
1015 
1016 inline
operator ++(void)1017 CSeq_loc_CI& CSeq_loc_CI::operator++ (void)
1018 {
1019     x_CheckValid("operator++");
1020     ++m_Index;
1021     return *this;
1022 }
1023 
1024 inline
GetSeq_id(void) const1025 const CSeq_id& CSeq_loc_CI::GetSeq_id(void) const
1026 {
1027     x_CheckValid("GetSeq_id()");
1028     return *x_GetRangeInfo().m_Id;
1029 }
1030 
1031 inline
GetSeq_id_Handle(void) const1032 CSeq_id_Handle CSeq_loc_CI::GetSeq_id_Handle(void) const
1033 {
1034     x_CheckValid("GetSeq_id_Handle()");
1035     return x_GetRangeInfo().m_IdHandle;
1036 }
1037 
1038 inline
GetRange(void) const1039 CSeq_loc_CI::TRange CSeq_loc_CI::GetRange(void) const
1040 {
1041     x_CheckValid("GetRange()");
1042     return x_GetRangeInfo().m_Range;
1043 }
1044 
1045 inline
IsSetStrand(void) const1046 bool CSeq_loc_CI::IsSetStrand(void) const
1047 {
1048     x_CheckValid("IsSetStrand()");
1049     return x_GetRangeInfo().m_IsSetStrand;
1050 }
1051 
1052 inline
GetStrand(void) const1053 ENa_strand CSeq_loc_CI::GetStrand(void) const
1054 {
1055     x_CheckValid("GetStrand()");
1056     return x_GetRangeInfo().m_Strand;
1057 }
1058 
1059 inline
GetFuzzFrom(void) const1060 const CInt_fuzz* CSeq_loc_CI::GetFuzzFrom(void) const
1061 {
1062     x_CheckValid("GetFuzzFrom()");
1063     return x_GetRangeInfo().m_Fuzz.first;
1064 }
1065 
1066 inline
GetFuzzTo(void) const1067 const CInt_fuzz* CSeq_loc_CI::GetFuzzTo(void) const
1068 {
1069     x_CheckValid("GetFuzzTo()");
1070     return x_GetRangeInfo().m_Fuzz.second;
1071 }
1072 
1073 inline
IsWhole(void) const1074 bool CSeq_loc_CI::IsWhole(void) const
1075 {
1076     x_CheckValid("IsWhole()");
1077     return x_GetRangeInfo().m_Range.IsWhole();
1078 }
1079 
1080 inline
IsEmpty(void) const1081 bool CSeq_loc_CI::IsEmpty(void) const
1082 {
1083     x_CheckValid("IsEmpty()");
1084     return x_GetRangeInfo().m_Range.Empty();
1085 }
1086 
1087 inline
IsPoint(void) const1088 bool CSeq_loc_CI::IsPoint(void) const
1089 {
1090     x_CheckValid("IsPoint()");
1091     return x_GetRangeInfo().m_Range.GetLength() == 1;
1092 }
1093 
1094 inline
GetPos(void) const1095 size_t CSeq_loc_CI::GetPos(void) const
1096 {
1097     return m_Index;
1098 }
1099 
1100 inline
Rewind(void)1101 void CSeq_loc_CI::Rewind(void)
1102 {
1103     m_Index = 0;
1104 }
1105 
1106 /////////////////// CSeq_loc_I inline methods
1107 
1108 inline
x_CheckValidForInsert(const char * where) const1109 void CSeq_loc_I::x_CheckValidForInsert(const char* where) const
1110 {
1111     if ( !x_IsValidForInsert() )
1112         x_ThrowNotValid(where);
1113 }
1114 
1115 NCBISER_HAVE_POST_READ(CSeq_loc)
1116 
1117 /////////////////// end of CSeq_loc_CI inline methods
1118 
1119 
1120 /* @} */
1121 
1122 
1123 END_objects_SCOPE // namespace ncbi::objects::
1124 END_NCBI_SCOPE
1125 
1126 #endif // OBJECTS_SEQLOC_SEQ_LOC_HPP
1127