1 /* $Id: Seq_loc.hpp 623038 2021-01-07 14:08:09Z ivanov $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Cliff Clausen, Eugene Vasilchenko, Mati Shomrat
27 *
28 * File Description:
29 * .......
30 *
31 * Remark:
32 * This code was originally generated by application DATATOOL
33 * using specifications from the ASN data definition file
34 * 'seqloc.asn'.
35 *
36 * ===========================================================================
37 */
38
39 #ifndef OBJECTS_SEQLOC_SEQ_LOC_HPP
40 #define OBJECTS_SEQLOC_SEQ_LOC_HPP
41
42
43 // generated includes
44 #include <objects/seqloc/Seq_loc_.hpp>
45 #include <objects/seqloc/Seq_loc_mix.hpp>
46 #include <objects/seqloc/Packed_seqpnt.hpp>
47 #include <objects/seqloc/Packed_seqint.hpp>
48 #include <objects/seqloc/Seq_id.hpp>
49 #include <objects/seq/seq_id_handle.hpp>
50 #include <objects/general/Int_fuzz.hpp>
51 //
52 ////
53 //#include <corelib/ncbiexpt.hpp>
54 //#include <util/range.hpp>
55 //#include <vector>
56
57 BEGIN_NCBI_SCOPE
58 BEGIN_objects_SCOPE // namespace ncbi::objects::
59
60 /** @addtogroup OBJECTS_Seqloc
61 *
62 * @{
63 */
64
65
66 class CSeq_id_Handle;
67 class ISynonymMapper;
68 class ILengthGetter;
69 class CSeq_loc_CI;
70 class CSeq_loc_I;
71
72 /// Seq-loc exceptions
73 class NCBI_SEQ_EXPORT CSeqLocException : public CException
74 {
75 public:
76 enum EErrCode {
77 eNotSet, ///< Seq-loc is not set
78 eMultipleId, ///< Seq-loc on multiple ids when one id is required
79 eUnsupported, ///< Seq-loc has data that is not supported yet
80 eBadLocation, ///< Seq-loc is incorrectly formed
81 eBadIterator, ///< Seq-loc iterator is in bad state
82 eIncomatible, ///< Seq-loc type is incompatible with operation
83 eOutOfRange, ///< parameter is out of valid range
84
85 eOtherError
86 };
87
88 virtual const char* GetErrCodeString(void) const override;
89 NCBI_EXCEPTION_DEFAULT(CSeqLocException, CException);
90 };
91
92
93 class NCBI_SEQLOC_EXPORT CSeq_loc : public CSeq_loc_Base
94 {
95 public:
96 typedef CSeq_loc_Base Tparent;
97 typedef CPacked_seqpnt_Base::TPoints TPoints;
98 typedef CPacked_seqint_Base::Tdata TIntervals;
99 typedef CSeq_loc_mix_Base::Tdata TLocations;
100 typedef CSeq_id TId;
101 typedef ENa_strand TStrand;
102 typedef TSeqPos TPoint;
103 typedef CPacked_seqint::TRanges TRanges;
104
105 /// constructors
106 CSeq_loc(void);
107 CSeq_loc(E_Choice index);
108 CSeq_loc(TId& id, TPoint point, TStrand strand = eNa_strand_unknown);
109 CSeq_loc(TId& id, const TPoints& points, TStrand strand = eNa_strand_unknown);
110 CSeq_loc(TId& id, TPoint from, TPoint to, TStrand strand = eNa_strand_unknown);
111 CSeq_loc(TId& id, TRanges ivals, TStrand strand = eNa_strand_unknown);
112
113 // destructor
114 virtual ~CSeq_loc(void);
115
116 /// See related functions in objmgr/util/seq_loc_util.hpp:
117 ///
118 /// TSeqPos GetLength(const CSeq_loc&, CScope*)
119 /// bool IsOneBioseq(const CSeq_loc&, CScope*)
120 /// const CSeq_id& GetId(const CSeq_loc&, CScope*)
121 /// TSeqPos GetStart(const CSeq_loc&, CScope*)
122 /// sequence::ECompare Compare(const CSeq_loc&, CSeq_loc&, CScope*)
123 /// sequence::SeqLocMerge(...)
124 ///
125
126 typedef CRange<TSeqPos> TRange;
127
128 TRange GetTotalRange(void) const;
129 void InvalidateTotalRangeCache(void) const;
130
131 /// Check if strand is set for any/all part(s) of the seq-loc
132 /// depending on the flag.
133 bool IsSetStrand(EIsSetStrand flag = eIsSetStrand_Any) const;
134 /// Get the location's strand. If no strand is set, returns
135 /// eNa_strand_unknown. If different strands are set in different
136 /// parts, returns eNa_strand_other. Explicitly set unknown strand
137 /// is ignored when combined whith plus or minus strand.
138 /// Examples:
139 /// not-set = unknown
140 /// not-set + plus = plus
141 /// unknown + plus = plus
142 /// unknown + both = other
143 /// plus + minus = other
144 ENa_strand GetStrand(void) const;
145 /// Return true if all ranges have reverse strand
146 bool IsReverseStrand(void) const;
147 /// Flip the strand (e.g. plus to minus)
148 void FlipStrand(void);
149 /// Set the strand for all of the location's ranges.
150 void SetStrand(ENa_strand strand);
151 /// Reset the strand on this location
152 void ResetStrand(void);
153
154 /// Return start and stop positions of the seq-loc.
155 /// Start position is the start of the first range in the seq-loc,
156 /// stop is the end of the last range. if eExtreme_Biological flag
157 /// is used, the effective order of ranges and range direction depends
158 /// on the strand.
159 /// NOTE: The returned values are not necessarily the same as
160 /// the boundaries returned by GetTotalRange(). It's also not
161 /// guaranteed that the value returned by GetStart() is less or
162 /// equal to the one returned by GetStop().
163 TSeqPos GetStart(ESeqLocExtremes ext) const;
164 TSeqPos GetStop (ESeqLocExtremes ext) const;
165
166 /// Special case for circular sequences. No ID is checked for
167 /// circular locations. If the sequence is not circular
168 /// (seq_len == kInvalidSeqPos) the function works like GetTotalRange()
169 TSeqPos GetCircularLength(TSeqPos seq_len) const;
170
171 /// Appends a label suitable for display (e.g., error messages)
172 /// label must point to an existing string object
173 /// Method just returns if label is null. Note this label is NOT
174 /// GenBank-style.
175 void GetLabel(string* label) const;
176
177 /// check start or stop of location for e_Lim fuzz
178 bool IsPartialStart(ESeqLocExtremes ext) const;
179 bool IsPartialStop(ESeqLocExtremes ext) const;
180
181 /// set / remove e_Lim fuzz on start or stop
182 /// (lt/gt - indicating partial interval)
183 void SetPartialStart(bool val, ESeqLocExtremes ext);
184 void SetPartialStop (bool val, ESeqLocExtremes ext);
185
186 /// check if parts of the seq-loc are missing
187 bool IsTruncatedStart(ESeqLocExtremes ext) const;
188 bool IsTruncatedStop (ESeqLocExtremes ext) const;
189
190 /// set / remove e_Lim fuzz on start or stop
191 /// (tl/tr - indicating removed parts of the seq-loc)
192 void SetTruncatedStart(bool val, ESeqLocExtremes ext);
193 void SetTruncatedStop (bool val, ESeqLocExtremes ext);
194
195 /// Get the id of the location
196 /// return NULL if has multiple ids or no id at all.
197 const CSeq_id* GetId(void) const;
198
199 /// check that the 'id' field in all parts of the location is the same
200 /// as the specifies id.
201 /// if the id parameter is NULL will return the location's id (if unique)
202 /// @return true on success
203 bool CheckId(const CSeq_id*& id, bool may_throw = true) const;
204 void InvalidateIdCache(void) const;
205
206 /// set the 'id' field in all parts of this location
207 void SetId(CSeq_id& id); // stores id
208 void SetId(const CSeq_id& id); // stores a new copy of id
209
210 /// Combine invalidation of all cached values
211 void InvalidateCache(void) const;
212
213 /// Override Assign() to incorporate cache invalidation.
214 virtual void Assign(const CSerialObject& source,
215 ESerialRecursionMode how = eRecursive);
216
217 /// Override all setters to incorporate cache invalidation.
218 void SetNull(void);
219 void SetEmpty(TEmpty& v);
220 TEmpty& SetEmpty(void);
221 void SetWhole(TWhole& v);
222 TWhole& SetWhole(void);
223 void SetInt(TInt& v);
224 TInt& SetInt(void);
225 void SetPacked_int(TPacked_int& v);
226 TPacked_int& SetPacked_int(void);
227 void SetPnt(TPnt& v);
228 TPnt& SetPnt(void);
229 void SetPacked_pnt(TPacked_pnt& v);
230 TPacked_pnt& SetPacked_pnt(void);
231 void SetMix(TMix& v);
232 TMix& SetMix(void);
233 void SetEquiv(TEquiv& v);
234 TEquiv& SetEquiv(void);
235 void SetBond(TBond& v);
236 TBond& SetBond(void);
237 void SetFeat(TFeat& v);
238 TFeat& SetFeat(void);
239
240 /// Invalidate id/range cache after deserialization.
241 void PostRead(void) const;
242
243 /// Flags for location comparison.
244 enum ECompareFlags {
245 fCompare_Default = 0,
246 fCompare_Strand = 1
247 };
248 typedef int TCompareFlags;
249
250 /// @deprecated Use the 2-argument version taking flags.
251 NCBI_DEPRECATED int Compare(const CSeq_loc& loc) const;
252
253 /// Compare locations by total range for each seq-id.
254 /// Compares seq-ids (@sa CSeq_loc::CompareOrdered()), then compares
255 /// total range starts (left to right) and lengths (longerst first).
256 /// Optionally compares strands; location without strand goes first.
257 int Compare(const CSeq_loc& loc, TCompareFlags flags) const;
258
259 /// Used as a helper for determining which pieces of a
260 /// CSeq_loc to compare.
261 class ISubLocFilter {
262 public:
~ISubLocFilter()263 virtual ~ISubLocFilter() {}
264 // Returns true for pieces we should use.
265 // Must be able to handle "NULL" input
266 virtual bool operator()( const CSeq_id *id ) const = 0;
267 };
268
269 /// Compare first-level sub-locations sequentially to order them
270 /// by biological "complexity". More "complex" location will come last.
271 /// Sub-locations are checked in Seq-loc-mix and Packed-seqint.
272 /// Minus strand locations' order is reversed.
273 /// Seq-ids are not checked in this method, unless you set
274 /// filter, which will allow the user to pick which parts to skip.
275 int CompareSubLoc(const CSeq_loc& loc, ENa_strand strand,
276 const ISubLocFilter *filter = NULL) const;
277
278 /// Simple adding of seq-locs.
279 void Add(const CSeq_loc& other);
280
281 void ChangeToMix(void);
282 /// Works only if location is currently an interval, point,
283 /// packed-int (handled trivially), or a mix built recursively from these.
284 void ChangeToPackedInt(void);
285
286 /// CSeq_loc operations
287 ///
288 /// Flags for operations:
289 /// fStrand_Ignore - if set, strands will be ignored and any ranges
290 /// may be merged/sorted. If not set, ranges on plus and minus strands
291 /// are treated as different sub-sets. In some operations strand may
292 /// still be checked (see fMerge_Abutting and order of ranges).
293 ///
294 /// NOTE: merge flags do not sort ranges, so only overlaps bewtween
295 /// neighbor ranges can be detected. To merge all overlappig ranges
296 /// add fSort flag. The only exception is fSortAndMerge_All which
297 /// already includes fSort;
298 ///
299 /// fMerge_Contained - merges (removes) any range which is completely
300 /// contained in another range.
301 /// fMerge_Abutting - merge abutting ranges. Also forces merging of
302 /// contained ranges. Even if fStrand_Ignore is set, only the ranges
303 /// with the correct order are merged (e.g. loc2.to == loc1.from must be
304 /// true if loc1.strand is minus).
305 /// fMerge_Overlapping - merge overlapping ranges. Also forces merging of
306 /// contained ranges.
307 /// fMerge_All - merge any ranges if possible (contained, overlapping,
308 /// abutting). The flag does not force sorting, so only neighbor ranges
309 /// can be merged. To sort ranges before merging add fSort flag or use
310 /// fSortAndMerge_All.
311 /// fSortAndMerge_All - combination of fSort and fMerge_All.
312 /// fMerge_SingleRange - creates a single range, covering all original ranges.
313 /// Strand is set to the first strand in the original seq-loc, regardless of the
314 /// strand flag.
315 ///
316 /// fSort - forces sorting of the resulting ranges. All ranges on the
317 /// same ID are grouped together, but the order of IDs is undefined. Strand
318 /// is preserved if all values having the same direction are equal. Otherwise
319 /// strand is reset to plus and minuns (in strand-preserve mode) or unknown (in
320 /// strand-ignore mode). NULLs are always merged to a single NULL. The order
321 /// of locations for each ID is: NULL, whole, empty, plus strand intervals,
322 /// minus strand intervals.
323
324 enum EOpFlags {
325 fStrand_Ignore = 1<<0,
326 fMerge_Contained = 1<<1,
327 fMerge_AbuttingOnly = 1<<2,
328 fMerge_Abutting = fMerge_AbuttingOnly | fMerge_Contained,
329 fMerge_OverlappingOnly = 1<<3,
330 fMerge_Overlapping = fMerge_OverlappingOnly | fMerge_Contained,
331 fMerge_All = fMerge_Abutting | fMerge_Overlapping,
332 fMerge_SingleRange = 1<<4,
333 fSort = 1<<5,
334 fSortAndMerge_All = fSort | fMerge_All
335 };
336 typedef int TOpFlags;
337
338 /// All functions create and return a new seq-loc object.
339 /// Optional synonym mapper may be provided to detect and convert
340 /// synonyms of a bioseq. Length getter is used by Subtract() to
341 /// calculate real sequence length.
342
343 /// Merge ranges depending on flags, return a new seq-loc object.
344 CRef<CSeq_loc> Merge(TOpFlags flags,
345 ISynonymMapper* syn_mapper) const;
346
347 /// Add seq-loc, merge/sort resulting ranges depending on flags.
348 /// Return a new seq-loc object.
349 CRef<CSeq_loc> Add(const CSeq_loc& other,
350 TOpFlags flags,
351 ISynonymMapper* syn_mapper) const;
352
353 /// Subtract seq-loc from this, merge/sort resulting ranges depending on
354 /// flags. Return a new seq-loc object.
355 CRef<CSeq_loc> Subtract(const CSeq_loc& other,
356 TOpFlags flags,
357 ISynonymMapper* syn_mapper,
358 ILengthGetter* len_getter) const;
359
360 /// Find the intersection with the seq-loc, merge/sort resulting
361 /// ranges depending on flags. Return a new seq-loc object.
362 CRef<CSeq_loc> Intersect(const CSeq_loc& other,
363 TOpFlags flags,
364 ISynonymMapper* syn_mapper) const;
365
366 /// Make CSeq_loc look like an STL container
367 typedef CSeq_loc_CI const_iterator;
368 const_iterator begin(void) const;
369 const_iterator end(void) const;
370
371 private:
372 // Prohibit copy constructor & assignment operator
373 CSeq_loc(const CSeq_loc&);
374 CSeq_loc& operator= (const CSeq_loc&);
375
376 TRange x_UpdateTotalRange(void) const;
377 TRange x_CalculateTotalRangeCheckId(const CSeq_id*& id) const;
378 bool x_CheckId(const CSeq_id*& id, bool may_throw = true) const;
379 bool x_UpdateId(const CSeq_id*& total_id, const CSeq_id* id,
380 bool may_throw = true) const;
381 void x_ChangeToMix(const CSeq_loc& other);
382 void x_ChangeToPackedInt(const CSeq_interval& other);
383 void x_ChangeToPackedInt(const CSeq_loc& other);
384 void x_ChangeToPackedPnt(const CSeq_loc& other);
385
386 /// Compare single-id locations, or throw an exception if any location
387 /// is multi-id.
388 int x_CompareSingleId(const CSeq_loc& loc, const CSeq_id* id1,
389 const CSeq_id* id2,
390 TCompareFlags flags) const;
391
392 enum {
393 kDirtyCache = -2,
394 kSeveralIds = -3
395 };
396
397 mutable volatile TSeqPos m_TotalRangeCacheFrom;
398 mutable volatile TSeqPos m_TotalRangeCacheToOpen;
399 // Seq-id for the whole seq-loc or null if multiple IDs were found
400 mutable const CSeq_id* volatile m_IdCache;
401 };
402
403
404 /// Interface for mapping IDs to the best synonym. Should provide
405 /// GetBestSynonym() method which returns the ID which should replace
406 /// the original one in the destination seq-loc.
407 class ISynonymMapper
408 {
409 public:
ISynonymMapper(void)410 ISynonymMapper(void) {}
~ISynonymMapper(void)411 virtual ~ISynonymMapper(void) {}
412
413 virtual CSeq_id_Handle GetBestSynonym(const CSeq_id& id) = 0;
414 };
415
416
417 /// Interface for getting bioseq length. Should provide GetLength()
418 /// method.
419 class ILengthGetter
420 {
421 public:
ILengthGetter(void)422 ILengthGetter(void) {}
~ILengthGetter(void)423 virtual ~ILengthGetter(void) {}
424
425 virtual TSeqPos GetLength(const CSeq_id& id) = 0;
426 };
427
428
429 // Simple location structure: id/from/to
430 struct NCBI_SEQLOC_EXPORT SSeq_loc_CI_RangeInfo {
431 SSeq_loc_CI_RangeInfo(void);
432 ~SSeq_loc_CI_RangeInfo(void);
433
434 void SetStrand(ENa_strand strand);
435
436 typedef CSeq_loc::TRange TRange;
437
438 CSeq_id_Handle m_IdHandle;
439 CConstRef<CSeq_id> m_Id;
440 TRange m_Range;
441 bool m_IsSetStrand;
442 ENa_strand m_Strand;
443 CConstRef<CSeq_loc> m_Loc;
444 pair<CConstRef<CInt_fuzz>, CConstRef<CInt_fuzz> > m_Fuzz;
445 };
446
447
448 class CSeq_loc_CI_Impl;
449
450 /// Seq-loc iterator class -- iterates all intervals from a seq-loc
451 /// in the correct order.
452 class NCBI_SEQLOC_EXPORT CSeq_loc_CI
453 {
454 public:
455 /// Options for empty locations processing
456 enum EEmptyFlag {
457 eEmpty_Skip, /// ignore empty locations
458 eEmpty_Allow /// treat empty locations as usual
459 };
460 enum ESeqLocOrder {
461 eOrder_Positional, /// Iterate sub-locations in positional order
462 eOrder_Biological /// Iterate sub-locations in biological order
463 };
464 typedef CSeq_loc::TRange TRange;
465
466 /// constructors
467 CSeq_loc_CI(void);
468 CSeq_loc_CI(const CSeq_loc& loc,
469 EEmptyFlag empty_flag = eEmpty_Skip,
470 ESeqLocOrder order = eOrder_Biological);
471 /// construct iterator at a different position in the same location
472 /// @sa GetPos()
473 CSeq_loc_CI(const CSeq_loc_CI& iter, size_t pos);
474 /// destructor
475 virtual ~CSeq_loc_CI(void);
476
477 CSeq_loc_CI(const CSeq_loc_CI& iter);
478 CSeq_loc_CI& operator= (const CSeq_loc_CI& iter);
479
480 CSeq_loc_CI& operator++ (void);
481 DECLARE_OPERATOR_BOOL(x_IsValid());
482
483 bool operator== (const CSeq_loc_CI& iter) const;
484 bool operator!= (const CSeq_loc_CI& iter) const;
485
486 /// Location of type equiv define set of equivalent locations.
487 /// Each equiv set consist of several equivalent parts.
488 /// Each equiv part can contain equiv location too,
489 /// so equiv sets are recursive.
490
491 /// Return true if current position is part of a bond
492 bool IsInBond(void) const;
493 /// Return true if current position is A part of a bond
494 bool IsBondA(void) const;
495 /// Return true if current position is B part of a bond
496 bool IsBondB(void) const;
497 /// Return iterators that cover bond of current position
498 /// result.first is the first segment in the equiv set
499 /// result.second is the first segment after the equiv set
500 pair<CSeq_loc_CI, CSeq_loc_CI> GetBondRange(void) const;
501
502 /// Return true if location has equiv parts
503 bool HasEquivSets(void) const;
504 /// Return true if current position is in some equiv part
505 bool IsInEquivSet(void) const;
506 /// Return number of recursuve equiv parts current position in
507 size_t GetEquivSetsCount(void) const;
508 /// Return iterators that cover equiv set of current position
509 /// result.first is the first segment in the equiv set
510 /// result.second is the first segment after the equiv set
511 /// level specify equiv set if there are more than one of them
512 /// level = 0 is the smallest equiv set (innermost)
513 pair<CSeq_loc_CI, CSeq_loc_CI> GetEquivSetRange(size_t level = 0) const;
514 /// Return iterators that cover equiv part of current position
515 /// result.first is the first segment in the equiv part
516 /// result.second is the first segment after the equiv part
517 /// level specify equiv set if there are more than one of them
518 /// level = 0 is the smallest equiv set (innermost)
519 pair<CSeq_loc_CI, CSeq_loc_CI> GetEquivPartRange(size_t level = 0) const;
520
521 /// Get seq_id of the current location
522 const CSeq_id& GetSeq_id(void) const;
523 CSeq_id_Handle GetSeq_id_Handle(void) const;
524
525 /// Get the range
526 TRange GetRange(void) const;
527 /// Get strand
528 bool IsSetStrand(void) const;
529 ENa_strand GetStrand(void) const;
530
531 /// Get seq-loc for the current iterator position. New CSeq_loc object may
532 /// be created if the current range is a part of a packed/mixed seq-loc.
533 /// The resulting seq-loc will always include only one range (which may
534 /// be whole or empty).
535 /// @sa GetEmbeddingSeq_loc
536 CConstRef<CSeq_loc> GetRangeAsSeq_loc(void) const;
537
538 /// Get the nearest seq-loc containing the current range.
539 /// For packed/mixed locations the embedding seq-loc may
540 /// include other ranges.
541 /// @note Don't ever confuse it with GetRangeAsSeq_loc!
542 const CSeq_loc& GetEmbeddingSeq_loc(void) const;
543
544 /// @deprecated You probably actually wanted to use GetRangeAsSeq_loc
545 /// or GetEmbeddingSeq_loc instead.
546 NCBI_DEPRECATED const CSeq_loc& GetSeq_loc(void) const;
547
548 // Return null if non-fuzzy
549 const CInt_fuzz* GetFuzzFrom(void) const;
550 const CInt_fuzz* GetFuzzTo (void) const;
551
552 /// True if the current location is a whole sequence
553 bool IsWhole(void) const;
554 /// True if the current location is empty
555 bool IsEmpty(void) const;
556 /// True if the current location is a single point
557 bool IsPoint(void) const;
558
559 /// Reset the iterator to the initial state
560 void Rewind(void);
561
562 /// Get number of ranges.
563 size_t GetSize(void) const;
564
565 /// Get iterator's position.
566 size_t GetPos(void) const;
567
568 /// Set iterator's position.
569 void SetPos(size_t pos);
570
571 protected:
572 const SSeq_loc_CI_RangeInfo& x_GetRangeInfo(void) const;
573
574 CRef<CSeq_loc_CI_Impl> m_Impl;
575
576 // Check the iterator position
577 bool x_IsValid(void) const;
578 // Check the position, throw exception if not valid
579 virtual const char* x_GetIteratorType(void) const;
580 void x_CheckValid(const char* where) const;
581 void x_ThrowNotValid(const char* where) const;
582
583 size_t m_Index;
584 };
585
586
587 /// Seq-loc iterator class -- iterates all intervals from a seq-loc
588 /// in the correct order.
589 class NCBI_SEQLOC_EXPORT CSeq_loc_I : public CSeq_loc_CI
590 {
591 public:
592 /// Options for creation modified locations
593 /// Bond and equiv types are preserved if possible
594 enum EMakeType {
595 eMake_CompactType, /// use most compact Seq-loc type (default)
596 eMake_PreserveType /// keep original Seq-loc type if possible
597 };
598
599 /// constructors
600 CSeq_loc_I(void);
601 CSeq_loc_I(CSeq_loc& loc);
602 /// construct iterator at a different position in the same location
603 /// @sa GetPos()
604 CSeq_loc_I(const CSeq_loc_I& iter, size_t pos);
605 /// destructor
606 virtual ~CSeq_loc_I(void);
607
608 /// return true of any part was changed since initialization
609 bool HasChanges(void) const;
610
611 /// return constructed CSeq_loc with all changes
612 CRef<CSeq_loc> MakeSeq_loc(EMakeType make_type = eMake_CompactType) const;
613
614 /// Delete current element, and make iterator to point to the next element.
615 /// All other iterators of the same CSeq_loc object will become invalid.
616 /// If the deleted element is contained in any equiv set then the equiv set
617 /// and its corresponding part will be reduced in size appropriately,
618 /// and if the part and/or the set become empty after the deletion
619 /// they will be removed completely.
620 void Delete(void);
621
622 /// Set of Insert*() methods.
623 /// All of them insert new element before the one the iterator points to.
624 /// If the iterator is at the end of CSeq_loc then the new element is
625 /// inserted after the last element of the CSeq_loc.
626 /// After the insertion this iterator will point to the element
627 /// it was pointing before (or end), and the result iterator will point
628 /// to the inserted element.
629 /// All other iterators of the same CSeq_loc object will become invalid.
630 /// If the insertion point is completely within an equiv set (excluding
631 /// the equiv set boundary) the equiv set will be expanded appropriately.
632 /// If the insertion point is completely within an equiv part (excluding
633 /// the equiv part boundary) the new element is added to that equiv part.
634 /// If the insertion point is between two equiv parts of the same set
635 /// then the new element is appended to the part just before
636 /// the insertion point.
637 /// The effect of the insertion on equiv sets can be modified
638 /// by calling SetEquivMode(EEquivMode).
639 /// @sa SetEquivMode()
640 CSeq_loc_I InsertNull(void);
641
642 /// Insert new element before the current one (@sa InsertNull()).
643 CSeq_loc_I InsertEmpty(const CSeq_id_Handle& id);
644 /// Insert new element before the current one (@sa InsertNull()).
InsertEmpty(const CSeq_id & id)645 CSeq_loc_I InsertEmpty(const CSeq_id& id)
646 {
647 return InsertEmpty(CSeq_id_Handle::GetHandle(id));
648 }
649 /// Insert new element before the current one (@sa InsertNull()).
650 CSeq_loc_I InsertWhole(const CSeq_id_Handle& id);
651 /// Insert new element before the current one (@sa InsertNull()).
InsertWhole(const CSeq_id & id)652 CSeq_loc_I InsertWhole(const CSeq_id& id)
653 {
654 return InsertWhole(CSeq_id_Handle::GetHandle(id));
655 }
656
657 /// Insert new element before the current one (@sa InsertNull()).
658 /// The strand value eNa_strand_unknown produces strand field not set,
659 /// If eNa_strand_unknown is expicitly required, call SetStrand().
660 CSeq_loc_I InsertInterval(const CSeq_id_Handle& id,
661 const TRange& range,
662 ENa_strand strand = eNa_strand_unknown);
663 /// Insert new element before the current one (@sa InsertNull()).
664 /// The strand value eNa_strand_unknown produces strand field not set,
665 /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertInterval(const CSeq_id & id,const TRange & range,ENa_strand strand=eNa_strand_unknown)666 CSeq_loc_I InsertInterval(const CSeq_id& id,
667 const TRange& range,
668 ENa_strand strand = eNa_strand_unknown)
669 {
670 return InsertInterval(CSeq_id_Handle::GetHandle(id), range, strand);
671 }
672 /// Insert new element before the current one (@sa InsertNull()).
673 /// The strand value eNa_strand_unknown produces strand field not set,
674 /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertInterval(const CSeq_id & id,TSeqPos from,TSeqPos to,ENa_strand strand=eNa_strand_unknown)675 CSeq_loc_I InsertInterval(const CSeq_id& id,
676 TSeqPos from, TSeqPos to,
677 ENa_strand strand = eNa_strand_unknown)
678 {
679 return InsertInterval(id, TRange(from, to), strand);
680 }
681 /// Insert new element before the current one (@sa InsertNull()).
682 /// The strand value eNa_strand_unknown produces strand field not set,
683 /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertInterval(const CSeq_id_Handle & id,TSeqPos from,TSeqPos to,ENa_strand strand=eNa_strand_unknown)684 CSeq_loc_I InsertInterval(const CSeq_id_Handle& id,
685 TSeqPos from, TSeqPos to,
686 ENa_strand strand = eNa_strand_unknown)
687 {
688 return InsertInterval(id, TRange(from, to), strand);
689 }
690
691 /// Insert new element before the current one (@sa InsertNull()).
692 /// The strand value eNa_strand_unknown produces strand field not set,
693 /// If eNa_strand_unknown is expicitly required, call SetStrand().
694 CSeq_loc_I InsertPoint(const CSeq_id_Handle& id,
695 TSeqPos pos,
696 ENa_strand strand = eNa_strand_unknown);
697 /// Insert new element before the current one (@sa InsertNull()).
698 /// The strand value eNa_strand_unknown produces strand field not set,
699 /// If eNa_strand_unknown is expicitly required, call SetStrand().
InsertPoint(const CSeq_id & id,TSeqPos pos,ENa_strand strand=eNa_strand_unknown)700 CSeq_loc_I InsertPoint(const CSeq_id& id,
701 TSeqPos pos,
702 ENa_strand strand = eNa_strand_unknown)
703 {
704 return InsertPoint(CSeq_id_Handle::GetHandle(id), pos, strand);
705 }
706
707 /// Set seq_id of the current location
708 void SetSeq_id_Handle(const CSeq_id_Handle& id);
709 /// Set seq_id of the current location
SetSeq_id(const CSeq_id & id)710 void SetSeq_id(const CSeq_id& id)
711 {
712 SetSeq_id_Handle(CSeq_id_Handle::GetHandle(id));
713 }
714
715 /// Set the range
716 void SetRange(const TRange& range);
717 /// Set the range from position
718 void SetFrom(TSeqPos from);
719 /// Set the range to position
720 void SetTo(TSeqPos to);
721 /// Set the range from and to positions
722 void SetPoint(TSeqPos pos);
723
724 /// Reset the range strand
725 void ResetStrand(void);
726 /// Set the range strand
727 void SetStrand(ENa_strand strand);
728
729 /// Reset fuzz from
730 void ResetFuzzFrom(void);
731 /// Change fuzz from
732 void SetFuzzFrom(CInt_fuzz& fuzz);
733 /// Reset fuzz to
734 void ResetFuzzTo(void);
735 /// Change fuzz to values
736 void SetFuzzTo(CInt_fuzz& fuzz);
737 /// Reset fuzz of a point
738 void ResetFuzz(void);
739 /// Change fuzz of a point
740 void SetFuzz(CInt_fuzz& fuzz);
741
742 /// Return iterators that cover equiv set of current position
743 /// result.first is the first segment in the equiv set
744 /// result.second is the first segment after the equiv set
745 /// level specify equiv set if there are more than one of them
746 /// level = 0 is the smallest equiv set (innermost)
747 pair<CSeq_loc_I, CSeq_loc_I> GetEquivSetRange(size_t level = 0) const;
748 /// Return iterators that cover equiv part of current position
749 /// result.first is the first segment in the equiv part
750 /// result.second is the first segment after the equiv part
751 /// level specify equiv set if there are more than one of them
752 /// level = 0 is the smallest equiv set (innermost)
753 pair<CSeq_loc_I, CSeq_loc_I> GetEquivPartRange(size_t level = 0) const;
754
755 /// This enum defines a way equiv sets are expanded or created
756 /// when one of Insert*() methods is called.
757 enum EEquivMode {
758 /// By default no equiv sets are created or expanded except
759 /// if insertion point is completely inside of an equiv or its part.
760 eEquiv_none,
761
762 /// A new equiv set will be created, even if the insertion point
763 /// is already inside of an existing equiv, so that new equiv set may
764 /// become a sub-unit of an exisiting equiv set.
765 /// The new equiv set will contain one part with the inserted element
766 /// as its content.
767 /// The mode will switch to eEquiv_append after the insertion.
768 eEquiv_new_equiv,
769
770 /// New equiv part will be started with the inserted element.
771 /// If the insertion point is not in or near any existing equiv part
772 /// then exception is thrown.
773 /// If...
774 /// A. the insertion point is exactly between two equiv sets
775 /// then a new equiv part will be created at the end of the first set
776 /// with the inserted element as its content.
777 /// B. the insertion point is at a boundary of any equiv part
778 /// then a new equiv part will be created at this point
779 /// with the inserted element as its content.
780 /// C. the insertion point is in the middle of existing part
781 /// then the existing part will be split at the insertion point
782 /// and new element will be added to the second part after splitting.
783 ///
784 /// The mode will switch to eEquiv_append after the insertion.
785 eEquiv_new_part,
786
787 /// If the insertion point is just after any equiv part, including
788 /// the last one in an equiv, then the inserted element is appended
789 /// to the part, and the equiv mode will remain eEquiv_append.
790 /// Otherwise the equiv mode will switch to eEquiv_none.
791 /// Change the mode to eEquiv_none explicitly if you want to stop
792 /// expanding existing equiv.
793 eEquiv_append,
794
795 /// If the insertion point is just before any equiv part, including
796 /// the first one in an equiv, then the inserted element is prepended
797 /// to the part, and the equiv mode will remain eEquiv_prepend.
798 /// Otherwise the equiv mode will switch to eEquiv_none.
799 /// Change the mode to eEquiv_none explicitly if you want to stop
800 /// expanding existing equiv.
801 eEquiv_prepend
802 };
803 /// Change equiv modification mode
804 /// @sa InsertNull()
805 void SetEquivMode(EEquivMode mode);
806 /// Change equiv modification mode to add new equiv set for the next
807 /// insert operation.
808 /// @sa SetEquivMode()
StartNewEquiv(void)809 void StartNewEquiv(void)
810 {
811 SetEquivMode(eEquiv_new_equiv);
812 }
813 /// Change equiv modification mode to add new equiv part for the next
814 /// insert operation.
815 /// @sa SetEquivMode()
StartNewEquivPart(void)816 void StartNewEquivPart(void)
817 {
818 SetEquivMode(eEquiv_new_part);
819 }
820 /// Change equiv modification mode to normal behavior that will only
821 /// update existing equiv sets.
822 /// @sa SetEquivMode()
StopEquiv(void)823 void StopEquiv(void)
824 {
825 SetEquivMode(eEquiv_none);
826 }
827 /// Get equiv modification mode.
828 /// @sa SetEquivMode()
829 EEquivMode GetEquivMode(void) const;
830
831 /// Remove equiv set, all pieces that are part of the set will be
832 /// preserved as independent pieces.
833 void RemoveEquiv(size_t level = 0);
834 /// Create equiv set with one part from current position to
835 /// the position pointed by end_it argument exclusive.
836 /// The end_it position must be after current position.
837 /// If there are any conflicts with existing equiv sets or bond pairs
838 /// no new equiv set will be made and an exception will be thrown.
839 /// New set's level will be assigned depending on its size in relation
840 /// to existing overlapping equiv sets.
841 void MakeEquiv(const CSeq_loc_I& end_it);
842 /// Create equiv set with one part from current position to
843 /// the position pointed by end_it argument exclusive.
844 /// The end_it position must be after current position.
845 /// If there are any conflicts with existing equiv sets or bond pairs
846 /// no new equiv set will be made and an exception will be thrown.
847 void MakeEquivPartBreak(size_t level = 0);
848
849 /// Remove bond at current position - it may be either A or B part
850 void RemoveBond(void);
851 /// Make bond at current position (only A)
852 /// The current part must be a point
853 /// If current posision is already a bond A part, it will be updated
854 void MakeBondA(void);
855 /// Make bond at current position with the next position (A and B)
856 /// The current and next parts must be points
857 /// If current posision is already a bond A part, it will be updated
858 void MakeBondAB(void);
859 /// Make bond at previous position with the current position (A and B)
860 /// The current and previous parts must be points
861 /// If previous posision is already a bond A part, it will be updated
862 void MakeBondB(void);
863
864 protected:
865 using CSeq_loc_CI::x_GetRangeInfo;
866 SSeq_loc_CI_RangeInfo& x_GetRangeInfo(void);
867
868 void x_SetSeq_id_Handle(SSeq_loc_CI_RangeInfo& info,
869 const CSeq_id_Handle& id);
870
871 bool x_IsValidForInsert(void) const;
872 void x_CheckValidForInsert(const char* where) const;
873 virtual const char* x_GetIteratorType(void) const;
874 };
875
876
877 /////////////////// CSeq_loc inline methods
878
879 inline
InvalidateTotalRangeCache(void) const880 void CSeq_loc::InvalidateTotalRangeCache(void) const
881 {
882 m_TotalRangeCacheFrom = TSeqPos(kDirtyCache);
883 }
884
885
886 inline
InvalidateIdCache(void) const887 void CSeq_loc::InvalidateIdCache(void) const
888 {
889 m_IdCache = NULL;
890 }
891
892
893 inline
InvalidateCache(void) const894 void CSeq_loc::InvalidateCache(void) const
895 {
896 InvalidateTotalRangeCache();
897 InvalidateIdCache();
898 }
899
900
901 // constructor
902 inline
CSeq_loc(void)903 CSeq_loc::CSeq_loc(void)
904 {
905 InvalidateCache();
906 }
907
908
909 inline
GetTotalRange(void) const910 CSeq_loc::TRange CSeq_loc::GetTotalRange(void) const
911 {
912 TSeqPos range_from = m_TotalRangeCacheFrom;
913 if ( range_from == TSeqPos(kDirtyCache) ) {
914 return x_UpdateTotalRange();
915 }
916 else {
917 TSeqPos range_to_open = m_TotalRangeCacheToOpen;
918 return COpenRange<TSeqPos>(range_from, range_to_open);
919 }
920 }
921
922
923 inline
CheckId(const CSeq_id * & id,bool may_throw) const924 bool CSeq_loc::CheckId(const CSeq_id*& id, bool may_throw) const
925 {
926 const CSeq_id* my_id = m_IdCache;
927 if ( my_id == NULL ) {
928 if ( !x_CheckId(my_id, may_throw) ) {
929 return false;
930 }
931 m_IdCache = my_id;
932 }
933 return x_UpdateId(id, my_id, may_throw);
934 }
935
936
937 inline
GetId(void) const938 const CSeq_id* CSeq_loc::GetId(void) const
939 {
940 const CSeq_id* sip = NULL;
941 return CheckId(sip, false) ? sip : NULL;
942 }
943
944
945 inline
SetId(const CSeq_id & id)946 void CSeq_loc::SetId(const CSeq_id& id)
947 {
948 InvalidateIdCache();
949 CRef<CSeq_id> nc_id(new CSeq_id);
950 nc_id->Assign(id);
951 SetId(*nc_id);
952 m_IdCache = nc_id.GetPointer();
953 }
954
955
956 inline
SetNull(void)957 void CSeq_loc::SetNull(void)
958 {
959 InvalidateIdCache();
960 Tparent::SetNull();
961 }
962
963 #define DEFINE_NCBI_SEQ_LOC_SETTERS(x) \
964 inline \
965 void CSeq_loc::Set##x(T##x& v) \
966 { \
967 InvalidateCache(); \
968 Tparent::Set##x(v); \
969 } \
970 \
971 inline \
972 CSeq_loc::T##x& CSeq_loc::Set##x(void) \
973 { \
974 InvalidateCache(); \
975 return Tparent::Set##x(); \
976 }
977
978 DEFINE_NCBI_SEQ_LOC_SETTERS(Empty)
DEFINE_NCBI_SEQ_LOC_SETTERS(Whole)979 DEFINE_NCBI_SEQ_LOC_SETTERS(Whole)
980 DEFINE_NCBI_SEQ_LOC_SETTERS(Int)
981 DEFINE_NCBI_SEQ_LOC_SETTERS(Packed_int)
982 DEFINE_NCBI_SEQ_LOC_SETTERS(Pnt)
983 DEFINE_NCBI_SEQ_LOC_SETTERS(Packed_pnt)
984 DEFINE_NCBI_SEQ_LOC_SETTERS(Mix)
985 DEFINE_NCBI_SEQ_LOC_SETTERS(Equiv)
986 DEFINE_NCBI_SEQ_LOC_SETTERS(Bond)
987 DEFINE_NCBI_SEQ_LOC_SETTERS(Feat)
988
989 #undef DEFINE_NCBI_SEQ_LOC_SETTERS
990
991 inline
992 bool CSeq_loc::IsReverseStrand(void) const
993 {
994 return IsReverse(GetStrand());
995 }
996
997
998 /////////////////// end of CSeq_loc inline methods
999
1000 /////////////////// CSeq_loc_CI inline methods
1001
1002 inline
SetStrand(ENa_strand strand)1003 void SSeq_loc_CI_RangeInfo::SetStrand(ENa_strand strand)
1004 {
1005 m_IsSetStrand = true;
1006 m_Strand = strand;
1007 }
1008
1009 inline
x_CheckValid(const char * where) const1010 void CSeq_loc_CI::x_CheckValid(const char* where) const
1011 {
1012 if ( !x_IsValid() )
1013 x_ThrowNotValid(where);
1014 }
1015
1016 inline
operator ++(void)1017 CSeq_loc_CI& CSeq_loc_CI::operator++ (void)
1018 {
1019 x_CheckValid("operator++");
1020 ++m_Index;
1021 return *this;
1022 }
1023
1024 inline
GetSeq_id(void) const1025 const CSeq_id& CSeq_loc_CI::GetSeq_id(void) const
1026 {
1027 x_CheckValid("GetSeq_id()");
1028 return *x_GetRangeInfo().m_Id;
1029 }
1030
1031 inline
GetSeq_id_Handle(void) const1032 CSeq_id_Handle CSeq_loc_CI::GetSeq_id_Handle(void) const
1033 {
1034 x_CheckValid("GetSeq_id_Handle()");
1035 return x_GetRangeInfo().m_IdHandle;
1036 }
1037
1038 inline
GetRange(void) const1039 CSeq_loc_CI::TRange CSeq_loc_CI::GetRange(void) const
1040 {
1041 x_CheckValid("GetRange()");
1042 return x_GetRangeInfo().m_Range;
1043 }
1044
1045 inline
IsSetStrand(void) const1046 bool CSeq_loc_CI::IsSetStrand(void) const
1047 {
1048 x_CheckValid("IsSetStrand()");
1049 return x_GetRangeInfo().m_IsSetStrand;
1050 }
1051
1052 inline
GetStrand(void) const1053 ENa_strand CSeq_loc_CI::GetStrand(void) const
1054 {
1055 x_CheckValid("GetStrand()");
1056 return x_GetRangeInfo().m_Strand;
1057 }
1058
1059 inline
GetFuzzFrom(void) const1060 const CInt_fuzz* CSeq_loc_CI::GetFuzzFrom(void) const
1061 {
1062 x_CheckValid("GetFuzzFrom()");
1063 return x_GetRangeInfo().m_Fuzz.first;
1064 }
1065
1066 inline
GetFuzzTo(void) const1067 const CInt_fuzz* CSeq_loc_CI::GetFuzzTo(void) const
1068 {
1069 x_CheckValid("GetFuzzTo()");
1070 return x_GetRangeInfo().m_Fuzz.second;
1071 }
1072
1073 inline
IsWhole(void) const1074 bool CSeq_loc_CI::IsWhole(void) const
1075 {
1076 x_CheckValid("IsWhole()");
1077 return x_GetRangeInfo().m_Range.IsWhole();
1078 }
1079
1080 inline
IsEmpty(void) const1081 bool CSeq_loc_CI::IsEmpty(void) const
1082 {
1083 x_CheckValid("IsEmpty()");
1084 return x_GetRangeInfo().m_Range.Empty();
1085 }
1086
1087 inline
IsPoint(void) const1088 bool CSeq_loc_CI::IsPoint(void) const
1089 {
1090 x_CheckValid("IsPoint()");
1091 return x_GetRangeInfo().m_Range.GetLength() == 1;
1092 }
1093
1094 inline
GetPos(void) const1095 size_t CSeq_loc_CI::GetPos(void) const
1096 {
1097 return m_Index;
1098 }
1099
1100 inline
Rewind(void)1101 void CSeq_loc_CI::Rewind(void)
1102 {
1103 m_Index = 0;
1104 }
1105
1106 /////////////////// CSeq_loc_I inline methods
1107
1108 inline
x_CheckValidForInsert(const char * where) const1109 void CSeq_loc_I::x_CheckValidForInsert(const char* where) const
1110 {
1111 if ( !x_IsValidForInsert() )
1112 x_ThrowNotValid(where);
1113 }
1114
1115 NCBISER_HAVE_POST_READ(CSeq_loc)
1116
1117 /////////////////// end of CSeq_loc_CI inline methods
1118
1119
1120 /* @} */
1121
1122
1123 END_objects_SCOPE // namespace ncbi::objects::
1124 END_NCBI_SCOPE
1125
1126 #endif // OBJECTS_SEQLOC_SEQ_LOC_HPP
1127