1 #ifndef SEQ_LOC_MAPPER_BASE__HPP
2 #define SEQ_LOC_MAPPER_BASE__HPP
3 
4 /*  $Id: seq_loc_mapper_base.hpp 572476 2018-10-15 15:17:03Z grichenk $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko
30 *
31 * File Description:
32 *   Seq-loc mapper base
33 *
34 */
35 
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
38 #include <corelib/ncbi_message.hpp>
39 #include <util/range.hpp>
40 #include <util/rangemap.hpp>
41 #include <objects/seqloc/Na_strand.hpp>
42 #include <objects/seqalign/Seq_align.hpp>
43 #include <objects/seqalign/Spliced_exon.hpp>
44 #include <objects/seq/seq_id_handle.hpp>
45 #include <objects/general/Int_fuzz.hpp>
46 #include <objects/seq/annot_mapper_exception.hpp>
47 
48 
49 BEGIN_NCBI_SCOPE
50 BEGIN_SCOPE(objects)
51 
52 
53 /** @addtogroup ObjectManagerCore
54  *
55  * @{
56  */
57 
58 
59 class CSeq_id;
60 class CSeq_loc;
61 class CSeq_interval;
62 class CPacked_seqpnt;
63 class CSeq_loc_CI;
64 class CSeq_feat;
65 class CSeq_align;
66 class CSeq_align_Mapper_Base;
67 class CSeq_graph;
68 class IMapper_Sequence_Info;
69 
70 
71 /// CMappingRange - describes a single interval to interval
72 /// mapping.
73 class NCBI_SEQ_EXPORT CMappingRange : public CObject
74 {
75 public:
76     CMappingRange(CSeq_id_Handle    src_id,
77                   TSeqPos           src_from,
78                   TSeqPos           src_length,
79                   ENa_strand        src_strand,
80                   CSeq_id_Handle    dst_id,
81                   TSeqPos           dst_from,
82                   ENa_strand        dst_strand,
83                   bool              ext_to = false,
84                   int               frame = 0,
85                   TSeqPos           src_bioseq_len = kInvalidSeqPos,
86                   TSeqPos           dst_len = kInvalidSeqPos);
87 
88     /// Check if the id is on the source sequence.
89     bool GoodSrcId(const CSeq_id& id) const;
90     CRef<CSeq_id> GetDstId(void) const;
GetDstIdHandle(void) const91     const CSeq_id_Handle& GetDstIdHandle(void) const
92         { return m_Dst_id_Handle; }
93 
94     typedef CRange<TSeqPos>    TRange;
95     typedef CRef<CInt_fuzz>    TFuzz;
96     typedef pair<TFuzz, TFuzz> TRangeFuzz;
97 
98     /// Check if the interval can be mapped through this mapping range.
99     /// Strand direction is checked only if is_set_strand is true.
100     bool CanMap(TSeqPos    from,
101                 TSeqPos    to,
102                 bool       is_set_strand,
103                 ENa_strand strand) const;
104     /// Map a single point
105     TSeqPos Map_Pos(TSeqPos pos) const;
106     /// Map an interval, set fuzz when the mapping truncates the original
107     /// range.
108     TRange Map_Range(TSeqPos           from,
109                      TSeqPos           to,
110                      const TRangeFuzz* fuzz = 0) const;
111     /// Map the strand, return true if the destination strand should be
112     /// set (even if it's eNa_strand_unknown -- this may happen if the
113     /// source strand is set to unknown).
114     bool Map_Strand(bool is_set_strand,
115                     ENa_strand src,
116                     ENa_strand* dst) const;
117     /// Map fuzz if one is set in the original location.
118     TRangeFuzz Map_Fuzz(const TRangeFuzz& fuzz) const;
119 
120 private:
121     // Get new fuzz value when reversing location's strand.
122     CInt_fuzz::ELim x_ReverseFuzzLim(CInt_fuzz::ELim lim) const;
123     void x_Map_Fuzz(TFuzz& fuzz) const;
124 
125     CSeq_id_Handle      m_Src_id_Handle;
126     TSeqPos             m_Src_from;
127     TSeqPos             m_Src_to;
128     ENa_strand          m_Src_strand;
129     CSeq_id_Handle      m_Dst_id_Handle;
130     TSeqPos             m_Dst_from;
131     ENa_strand          m_Dst_strand;
132     // Whether the mapping reverses the strand or not.
133     // This value can be calculated from source and destination
134     // strands, but is cached for better performance.
135     bool                m_Reverse;
136     // Whether to extend the mapped location to the end of
137     // destination range. Used when mapping from a prot to a nuc.
138     // ExtTo is set when both conditions are met:
139     // - the mapping is from a protein to a nucleotide
140     // - the destination interval has partial 'to' (set as fuzz)
141     // ExtTo is used only when the interval to be mapped has
142     // partial 'to' set through the fuzz and the mapped range is
143     // just 1 or 2 bases shorter than the mapping destination.
144     bool                m_ExtTo;
145     // Holds the frame shift (0 if none) of the underlying CDS (if any).
146     int                 m_Frame;
147     // This holds the complete length of the original source bioseq.
148     // Needed to detect whether or not fuzzy edges should be extended to the end.
149     TSeqPos             m_Src_bioseq_len;
150     // For example, if the end of a source maps to just before the end of the
151     // dest, then we sometimes extend to the end of the dest, so we do need
152     // to store this, even though it's not needed for the mapping itself.
153     TSeqPos             m_Dst_len;
154     // Group of mapping ranges - used with alignments, e.g. to group
155     // mapped ranges by exon.
156     int                 m_Group;
157 
158     friend class CSeq_loc_Mapper_Base;
159     //friend class CSeq_loc_Mapper;
160     friend class CMappingRanges;
161     friend class CSeq_align_Mapper_Base;
162     //friend class CSeq_align_Mapper;
163     friend struct CMappingRangeRef_Less;
164     friend struct CMappingRangeRef_LessRev;
165 
166 public:
167     // Interface for CPairwiseAln converter
GetSrc_from(void) const168     TSeqPos GetSrc_from(void) const { return m_Src_from; }
GetDst_from(void) const169     TSeqPos GetDst_from(void) const { return m_Dst_from; }
GetLength(void) const170     TSeqPos GetLength(void)   const { return m_Src_to - m_Src_from; }
GetReverse(void) const171     bool    GetReverse(void)  const { return m_Reverse; }
GetGroup(void) const172     int     GetGroup(void)    const { return m_Group; }
SetGroup(int grp)173     void    SetGroup(int grp) { m_Group = grp; }
174 };
175 
176 
177 /// Storage for multiple mapping ranges. Stores mappings grouped
178 /// by the source seq-id, then sorted by start coordinate.
179 class NCBI_SEQ_EXPORT CMappingRanges : public CObject
180 {
181 public:
182     CMappingRanges(void);
183 
184     // Conversions
185     typedef CMappingRange::TRange                        TRange;
186     typedef CRangeMultimap<CRef<CMappingRange>, TSeqPos> TRangeMap;
187     typedef TRangeMap::const_iterator                    TRangeIterator;
188     typedef map<CSeq_id_Handle, TRangeMap>               TIdMap;
189     typedef TIdMap::const_iterator                       TIdIterator;
190     typedef vector< CRef<CMappingRange> >                TSortedMappings;
191 
GetIdMap() const192     const TIdMap& GetIdMap() const { return m_IdMap; }
GetIdMap(void)193     TIdMap& GetIdMap(void) { return m_IdMap; }
194 
195     /// Add new mapping range to the proper place.
196     void AddConversion(CRef<CMappingRange> cvt);
197     CRef<CMappingRange> AddConversion(CSeq_id_Handle    src_id,
198                                       TSeqPos           src_from,
199                                       TSeqPos           src_length,
200                                       ENa_strand        src_strand,
201                                       CSeq_id_Handle    dst_id,
202                                       TSeqPos           dst_from,
203                                       ENa_strand        dst_strand,
204                                       bool              ext_to = false,
205                                       int               frame = 0,
206                                       TSeqPos           dst_total_len = kInvalidSeqPos,
207                                       TSeqPos           src_bioseq_len = kInvalidSeqPos,
208                                       TSeqPos           dst_len = kInvalidSeqPos );
209 
210     /// Get mapping ranges iterator for the given seq-id and range.
211     TRangeIterator BeginMappingRanges(CSeq_id_Handle id,
212                                       TSeqPos        from,
213                                       TSeqPos        to) const;
214 
215     // Overall source and destination orientation. The order of mapped ranges
216     // is reversed if ReverseSrc != ReverseDst (except in some merging modes).
SetReverseSrc(bool value=true)217     void SetReverseSrc(bool value = true) { m_ReverseSrc = value; };
GetReverseSrc(void) const218     bool GetReverseSrc(void) const { return m_ReverseSrc; }
SetReverseDst(bool value=true)219     void SetReverseDst(bool value = true) { m_ReverseDst = value; };
GetReverseDst(void) const220     bool GetReverseDst(void) const { return m_ReverseDst; }
221 
222 private:
223     TIdMap m_IdMap;
224 
225     // Mapping source and destination orientations
226     bool   m_ReverseSrc;
227     bool   m_ReverseDst;
228 };
229 
230 
231 /// Helper class for mapping graphs. Used to collect ranges
232 /// relative to the graph location and adjust mapped graph data
233 /// accordingly.
234 class NCBI_SEQ_EXPORT CGraphRanges : public CObject
235 {
236 public:
CGraphRanges(void)237     CGraphRanges(void) : m_Offset(0) {}
238 
239     typedef CRange<TSeqPos> TRange;
240     typedef vector<TRange>  TGraphRanges;
241 
242     // Offset is relative to the original graph location, indicates
243     // the part of the original location which has been already
244     // mapped (or truncated).
GetOffset(void) const245     TSeqPos GetOffset(void) const { return m_Offset; }
SetOffset(TSeqPos offset)246     void SetOffset(TSeqPos offset) { m_Offset = offset; }
IncOffset(TSeqPos inc)247     void IncOffset(TSeqPos inc) { m_Offset += inc; }
248 
GetRanges(void) const249     const TGraphRanges& GetRanges(void) const { return m_Ranges; }
250 
251     // Add new mapped range. The range is relative to the not yet mapped
252     // part of the original location. See:
253     //   CSeq_loc_Mapper_Base::x_MapNextRange()
254     //   CSeq_loc_Mapper_Base::x_MapInterval()
AddRange(const TRange & rg)255     void AddRange(const TRange& rg)
256     {
257         if ( rg.Empty() ) {
258             return;
259         }
260         TRange offset_rg = rg.IsWhole() ? rg :
261             TRange(rg.GetFrom() + m_Offset, rg.GetTo() + m_Offset);
262         m_Ranges.push_back(offset_rg);
263         m_TotalRange.CombineWith(offset_rg);
264     }
265 
GetTotalRange(void) const266     const TRange& GetTotalRange(void) const { return m_TotalRange; }
267 
268 private:
269     TSeqPos      m_Offset;
270     TGraphRanges m_Ranges;
271     TRange       m_TotalRange;
272 };
273 
274 
275 /////////////////////////////////////////////////////////////////////////////
276 ///
277 ///  CSeq_loc_Mapper_Options --
278 ///
279 ///  Options passed to CSeq_loc_Mapper[_Base] constructor.
280 ///
281 
282 class NCBI_SEQ_EXPORT CSeq_loc_Mapper_Options
283 {
284 public:
285     typedef int TMapOptions;
286 
287     CSeq_loc_Mapper_Options(void);
288     CSeq_loc_Mapper_Options(IMapper_Sequence_Info* seq_info,
289                             TMapOptions opts = 0);
290     CSeq_loc_Mapper_Options(TMapOptions opts);
291 
292     ///  Sequence type, length etc. provider. If any ids from the mapping
293     ///  ranges are not available through this object, they should be
294     ///  registered using CSeq_loc_Mapper_Base::SetSeqTypeById().
295     IMapper_Sequence_Info* GetMapperSequenceInfo(void) const;
296     CSeq_loc_Mapper_Options& SetMapperSequenceInfo(IMapper_Sequence_Info* seq_info);
297 
298     /// Dense-seg mapping option.
299     /// @sa CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange
300     bool GetAlign_Dense_seg_TotalRange(void) const;
301     CSeq_loc_Mapper_Options& SetAlign_Dense_seg_TotalRange(bool value = true);
302 
303     /// Mapping direction when mapping through a sparse-seg.
304     /// @sa CSeq_loc_Mapper_Base::fAlign_Sparse_ToFirst
305     /// @sa CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond
306     bool GetAlign_Sparse_ToFirst(void) const;
307     bool GetAlign_Sparse_ToSecond(void) const;
308     CSeq_loc_Mapper_Options& SetAlign_Sparse_ToFirst(bool value = true);
309     CSeq_loc_Mapper_Options& SetAlign_Sparse_ToSecond(bool value = true);
310 
311     /// Mapping depth when using a seq-map, a bioseq or a GC-assembly.
312     /// @sa CSeq_loc_Mapper_Base::fMapSingleLevel
313     bool GetMapSingleLevel(void) const;
314     CSeq_loc_Mapper_Options& SetMapSingleLevel(bool value = true);
315 
316     /// Mapped location trimming at sequence end. Off by default.
317     /// @sa CSeq_loc_Mapper_Base::fTrimMappedLocation
318     bool GetTrimMappedLocation(void) const;
319     CSeq_loc_Mapper_Options& SetTrimMappedLocation(bool value = true);
320 
321 private:
322     friend class CSeq_loc_Mapper_Base;
323 
324     IMapper_Sequence_Info& GetSeqInfo(void) const;
325 
326     bool x_IsSetOption(int opt) const;
327     void x_SetOption(int opt, bool enable);
328 
329     mutable CRef<IMapper_Sequence_Info> m_SeqInfo;
330     TMapOptions m_Options;
331 };
332 
333 
334 /////////////////////////////////////////////////////////////////////////////
335 ///
336 ///  CSeq_loc_Mapper_Base --
337 ///
338 ///  Mapping locations and alignments between bioseqs through seq-locs,
339 ///  features, alignments or between parts of segmented bioseqs.
340 
341 class NCBI_SEQ_EXPORT CSeq_loc_Mapper_Base : public CObject
342 {
343 public:
344     /// Mapping direction used when initializing the mapper with a feature.
345     enum EFeatMapDirection {
346         eLocationToProduct, ///< Map from the feature's location to product
347         eProductToLocation  ///< Map from the feature's product to location
348     };
349 
350     /// Options for interpretations of locations
351     enum EMapOptions {
352         /// Ignore internal dense-seg structure - map each
353         /// dense-seg according to the total ranges involved
354         fAlign_Dense_seg_TotalRange = 1 << 0,
355 
356         /// Flags used to indicate mapping direction when mapping
357         /// through a sparse-seg.
358         fAlign_Sparse_ToFirst       = 0,      ///< Map to first-id
359         fAlign_Sparse_ToSecond      = 1 << 1, ///< Map to second-id
360 
361         /// Flag used when mapping through a seq-map (this includes
362         /// mapping through a bioseq or a GC-assembly). If set, each
363         /// call to Map() goes only one level up or down, unlike normal
364         /// mode which maps from any level as far up/down as possible.
365         /// The result of mapping can be mapped further by making another
366         /// call to Map().
367         fMapSingleLevel             = 1 << 2,
368 
369         /// Enable trimming of source/destination ranges at sequence end.
370         /// By default locations can stretch beyond sequence end. With trimming
371         /// enabled the mapper will truncate ranges to fit sequence lengths.
372         fTrimMappedLocation         = 1 << 3
373     };
374     typedef int TMapOptions;
375 
376     /// Spliced-seg row indexing constants.
377     enum ESplicedRow {
378         eSplicedRow_Prod = 0,
379         eSplicedRow_Gen  = 1
380     };
381 
382     enum FFuzzOption {
383         // used for backwards compatibility with C toolkit's output.
384         // TODO: we should remove this one day since the
385         // normal output is superior.
386         fFuzzOption_CStyle      = 1 << 0,
387         // Don't set eLim_tl or eLim_tr and instead set greater than or less
388         // than if appropriate.
389         fFuzzOption_RemoveLimTlOrTr = 1 << 1
390     };
391     typedef int TFuzzOption;
392 
393     /// Mapping through a pre-filled CMappipngRanges.
394     /// @param mapping_ranges
395     ///  CMappingRanges filled with the desired source and destination
396     ///  ranges. Must be a heap object (will be stored in a CRef<>).
397     ///  NOTE: If the mapper is used with mixed sequence types, the
398     ///  ranges must use genomic coordinates (for ranges on proteins
399     ///  multiply all coordinates by 3).
400     /// @param options
401     ///  Mapping options which need to be set during mapper initialization.
402     /// @sa CSeq_loc_Mapper_Options
403     CSeq_loc_Mapper_Base(CMappingRanges*         mapping_ranges,
404                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
405 
406     /// Mapping through a feature, both location and product must be set.
407     CSeq_loc_Mapper_Base(const CSeq_feat&        map_feat,
408                          EFeatMapDirection       dir,
409                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
410 
411     /// Mapping between two seq_locs.
412     CSeq_loc_Mapper_Base(const CSeq_loc&         source,
413                          const CSeq_loc&         target,
414                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
415 
416     /// Mapping through an alignment. Need to specify target ID or
417     /// target row of the alignment. Any other ID is mapped to the
418     /// target one. Only the first row matching target ID is used,
419     /// all other rows are considered source.
420     CSeq_loc_Mapper_Base(const CSeq_align&       map_align,
421                          const CSeq_id&          to_id,
422                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
423     /// Mapping through an alignment using specific source and target ids.
424     /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
425     /// id is ignored.
426     CSeq_loc_Mapper_Base(const CSeq_id&          from_id,
427                          const CSeq_id&          to_id,
428                          const CSeq_align&       map_align,
429                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
430     /// @deprecated Use the version with CSeq_loc_Mapper_Options instead.
431     NCBI_DEPRECATED
432     CSeq_loc_Mapper_Base(const CSeq_align&      map_align,
433                          const CSeq_id&         to_id,
434                          TMapOptions            opts,
435                          IMapper_Sequence_Info* seq_info);
436 
437     /// Sparse alignments require special row indexing since each
438     /// row contains two seq-ids. Use options to specify mapping
439     /// direction.
440     CSeq_loc_Mapper_Base(const CSeq_align&       map_align,
441                          size_t                  to_row,
442                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
443     /// Mapping through an alignment using specific source and target row numbers.
444     /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
445     /// row is ignored.
446     CSeq_loc_Mapper_Base(size_t                  from_row,
447                          size_t                  to_row,
448                          const CSeq_align&       map_align,
449                          CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
450     /// @deprecated Use the version with CSeq_loc_Mapper_Options instead.
451     NCBI_DEPRECATED
452     CSeq_loc_Mapper_Base(const CSeq_align&      map_align,
453                          size_t                 to_row,
454                          TMapOptions            opts,
455                          IMapper_Sequence_Info* seq_info);
456 
457     ~CSeq_loc_Mapper_Base(void);
458 
459     void SetFuzzOption( TFuzzOption newOption );
460 
461     /// Intervals' merging mode
462     /// MergeNone and MergeAbutting do not change the order of ranges
463     /// in the destination seq-loc. No ranges will be merged if they
464     /// are separated by any other sub-range.
465     /// MergeContained and MergeAll sort ranges before sorting, so that
466     /// any overlapping ranges can be merged. The sorting takes the
467     /// mapped location strand into account.
468     /// NOTE: any merging (except None) is incompatible with collecting
469     /// source ranges.
470     /// @sa IncludeSourceLocs
471 
472     /// No merging
473     CSeq_loc_Mapper_Base& SetMergeNone(void);
474     /// Merge only abutting intervals, keep overlapping
475     CSeq_loc_Mapper_Base& SetMergeAbutting(void);
476     /// Merge only intervals from the same group. Group is created
477     /// for each exon, dense-diag, std-seg and disc sub-alignment.
478     CSeq_loc_Mapper_Base& SetMergeBySeg(void);
479     /// Merge intervals only if one is completely covered by another
480     CSeq_loc_Mapper_Base& SetMergeContained(void);
481     /// Merge any abutting or overlapping intervals
482     CSeq_loc_Mapper_Base& SetMergeAll(void);
483 
484     /// Whether to preserve or remove NULL sub-locations (usually
485     /// indicating gaps) from the result. By default gaps are preserved.
486     CSeq_loc_Mapper_Base& SetGapPreserve(void);
487     CSeq_loc_Mapper_Base& SetGapRemove(void);
488 
489     /// For mapping spliced-segs only: preserve or trim starting/ending
490     /// indels. By default indels are trimmed (only those at the whole
491     /// alignment start and end).
492     CSeq_loc_Mapper_Base& SetTrimSplicedSeg(bool trim);
493 
494     /// Keep ranges which can not be mapped. Does not affect truncation
495     /// of partially mapped ranges. By default non-mapping ranges are
496     /// removed.
497     CSeq_loc_Mapper_Base& KeepNonmappingRanges(void);
498     CSeq_loc_Mapper_Base& TruncateNonmappingRanges(void);
499 
500     /// Check strands before mapping a range. By default strand is not
501     /// checked and a range will be mapped even if its strand does not
502     /// correspond to the strand of the mapping source.
503     CSeq_loc_Mapper_Base& SetCheckStrand(bool value = true);
504 
505     /// When set to 'true' if mapped alignment has exactly one genomic and
506     /// one protein row, convert it to spliced-seg. By default all mixed-type
507     /// alignments are converted to std-seg.
508     CSeq_loc_Mapper_Base& MixedAlignsAsSpliced(bool value = true);
509 
510     /// Include source ranges in the mapped location. If turned
511     /// on, the resulting seq-loc will be an equiv with the
512     /// first sub-loc containing the usual mapped seq-loc, and
513     /// the second one - the set of source locations used in the
514     /// mapping.
515     /// NOTE: this option is incompatible with any merging.
516     /// Merging mode must be set to MergeNone.
517     CSeq_loc_Mapper_Base& IncludeSourceLocs(bool value = true);
518 
519     /// Report source range trimming as an error. If the flag is set,
520     /// any trimming will result in throwing CAnnotMapperException.
521     /// Intended to be used when mapping GC-Assembly aliases.
522     CSeq_loc_Mapper_Base& SetErrorOnPartial(bool value = true);
523 
524     /// Map seq-loc
525     CRef<CSeq_loc>   Map(const CSeq_loc& src_loc);
526     /// Take the total range from the location and run it through the mapper.
527     CRef<CSeq_loc>   MapTotalRange(const CSeq_loc& seq_loc);
528     /// Map the whole alignment. Searches all rows for ranges
529     /// which can be mapped.
530     CRef<CSeq_align> Map(const CSeq_align& src_align);
531     /// Map a single row of the alignment.
532     CRef<CSeq_align> Map(const CSeq_align& src_align,
533                          size_t            row);
534     /// Map seq-graph. This will map both location and data.
535     /// The data may be truncated to match the new location.
536     CRef<CSeq_graph> Map(const CSeq_graph& src_graph);
537 
538     /// Flags defining seq-annot mapping options.
539     enum FAnnotMapFlag {
540         fAnnotMap_Location = 1 << 0, ///< Map seq-feat locations
541         fAnnotMap_Product  = 1 << 1, ///< Map seq-feat products
542         fAnnotMap_Both     = fAnnotMap_Location | fAnnotMap_Product,
543 
544         /// Remove annotations which can not be mapped with this mapper.
545         /// If the flag is not set, the original annotation is stored
546         /// in the seq-annot.
547         fAnnotMap_RemoveNonMapping = 1 << 2,
548 
549         /// Throw exception if an annotation can not be mapped.
550         fAnnotMap_ThrowOnFailure = 1 << 3,
551 
552         fAnnotMap_Default = fAnnotMap_Both
553     };
554     typedef int TAnnotMapFlags;
555 
556     /// Result of seq-annot mapping
557     enum EMapResult {
558         /// No annotation was mapped, the input seq-annot is unchanged.
559         eMapped_None = 0,
560         /// Some (not all) annotations were mapped.
561         eMapped_Some,
562         /// All annotations were mapped, none was removed.
563         eMapped_All
564     };
565 
566     /// Map each object from the Seq-annot and replace the original
567     /// with the mapped one.
568     EMapResult Map(CSeq_annot& annot, TAnnotMapFlags flags = fAnnotMap_Default);
569 
570     /// Check if the last mapping resulted in partial location
571     /// (not all ranges from the original location could be mapped
572     /// to the target).
573     bool LastIsPartial(void);
574 
575     typedef set<CSeq_id_Handle> TSynonyms;
576 
577     // Collect synonyms for the id, store mapping of each synonym
578     // to the primary id. Returns primary id for the argument or the
579     // argument itself.
580     const CSeq_id_Handle& CollectSynonyms(const CSeq_id_Handle& id) const;
581 
582     // Sequence type - to recalculate coordinates.
583     enum ESeqType {
584         eSeq_unknown = 0,
585         eSeq_nuc = 1,
586         eSeq_prot = 3
587     };
588 
589 protected:
590 
591     // Get molecule type for the given id. The default implementation
592     // returns eSeq_unknown. The overrided methods should return
593     // real sequence type. The returned type is stored in the mapper's
594     // cache. The method should not be called directly, use
595     // GetSeqTypeById instead for it uses the cached types.
596     // It's also a good idea to cache the same sequence type for all
597     // synonyms in the overrided method to prevent multiple requests
598     // to GetSeqType.
599     ESeqType GetSeqType(const CSeq_id_Handle& idh) const;
600 
601     // Get sequence length for the given seq-id. Returns kInvalidSeqPos
602     // if the length is unknown (the default behavior).
603     TSeqPos GetSequenceLength(const CSeq_id& id);
604 
605     // Create CSeq_align_Mapper_Base, add any necessary arguments.
606     virtual CSeq_align_Mapper_Base*
607         InitAlignMapper(const CSeq_align& src_align);
608 
609     // Initialize the mapper from a feature. The feature must have
610     // both location and product set, mapping direction is set by
611     // the flag.
612     void x_InitializeFeat(const CSeq_feat&  map_feat,
613                           EFeatMapDirection dir);
614     // Map between two locations. Optional frame is used by x_InitializeFeat()
615     // only with cd-region features.
616     void x_InitializeLocs(const CSeq_loc& source,
617                           const CSeq_loc& target,
618                           int             src_frame = 0,
619                           int             dst_frame = 0);
620     // Initialize the mapper from an alignment. Looks for the first
621     // row containing the id and sets it as mapping target. All other
622     // rows become mapping source.
623     void x_InitializeAlign(const CSeq_align& map_align,
624                            const CSeq_id&    to_id,
625                            const CSeq_id*    from_id = nullptr);
626     // Recursive version of the above.
627     void x_InitializeAlign(const CSeq_align& map_align,
628                            const TSynonyms&  to_ids,
629                            const TSynonyms*  from_ids = nullptr);
630     // Initialize the mapper from an alignment, map to the specified row.
631     void x_InitializeAlign(const CSeq_align& map_align,
632                            size_t            to_row,
633                            size_t            from_row = size_t(-1));
634 
635     // Create dummy mapping from the whole destination location to itself.
636     // This will prevent truncation of ranges already on the target.
637     // For some reason (?) the function is used only by CSeq_loc_Mapper,
638     // not CSeq_loc_Mapper_Base, and only when initializing the mapper
639     // from a bioseq handle or a seq-map. When mapping through a feature
640     // or a pair of seq-locs it's not called and ranges on destination
641     // are truncated or preserved the same way as any other non-mapping
642     // ranges.
643     void x_PreserveDestinationLocs(void);
644 
645     // Add new mapping range while initializing the mapper. The function
646     // adjusts starts and lengths according to the used range and strand.
647     void x_NextMappingRange(const CSeq_id&   src_id,
648                             TSeqPos&         src_start,
649                             TSeqPos&         src_len,
650                             ENa_strand       src_strand,
651                             const CSeq_id&   dst_id,
652                             TSeqPos&         dst_start,
653                             TSeqPos&         dst_len,
654                             ENa_strand       dst_strand,
655                             const CInt_fuzz* fuzz_from = 0,
656                             const CInt_fuzz* fuzz_to = 0,
657                             int              frame = 0,
658                             TSeqPos          src_bioseq_len = kInvalidSeqPos);
659 
660     // Add new CMappingRange. This includes collecting all synonyms for the id,
661     // creating a new mapping for each of them and updating the destination
662     // ranges.
663     void x_AddConversion(const CSeq_id& src_id,
664                          TSeqPos        src_start,
665                          ENa_strand     src_strand,
666                          const CSeq_id& dst_id,
667                          TSeqPos        dst_start,
668                          ENa_strand     dst_strand,
669                          TSeqPos        length,
670                          bool           ext_right,
671                          int            frame,
672                          TSeqPos        src_bioseq_len,
673                          TSeqPos        dst_length );
674 
675     // Parse and map the seq-loc.
676     void x_MapSeq_loc(const CSeq_loc& src_loc);
677 
678     // Convert collected ranges into a seq-loc and push it into the destination
679     // seq-loc mix. This is done to preserve the original seq-loc structure
680     // when possible (although some optimizations are done - see
681     // x_OptimizeSeq_loc).
682     void x_PushRangesToDstMix(void);
683 
684     typedef CMappingRange::TRange           TRange;
685     typedef CMappingRanges::TRangeMap       TRangeMap;
686     typedef CMappingRanges::TRangeIterator  TRangeIterator;
687     typedef CMappingRanges::TSortedMappings TSortedMappings;
688 
689     // List and map of target ranges to construct target-to-target mapping
690     typedef list<TRange>                    TDstRanges;
691     typedef map<CSeq_id_Handle, TDstRanges> TDstIdMap;
692     typedef vector<TDstIdMap>               TDstStrandMap;
693 
694     // Destination locations arranged by ID/range
695     typedef CRef<CInt_fuzz>                 TFuzz;
696     typedef pair<TFuzz, TFuzz>              TRangeFuzz;
697 
698     // Structure to hold information about mapped ranges until they are
699     // converted to seq-loc parts.
700     struct SMappedRange {
SMappedRangeCSeq_loc_Mapper_Base::SMappedRange701         SMappedRange(void) : group(0) {}
SMappedRangeCSeq_loc_Mapper_Base::SMappedRange702         SMappedRange(const TRange&      rg,
703                      const TRangeFuzz&  fz,
704                      int                grp = 0)
705             : range(rg), fuzz(fz), group(grp) {}
706 
707         TRange      range;
708         TRangeFuzz  fuzz;
709         int         group; // used mostly to group ranges by exon
710 
operator <CSeq_loc_Mapper_Base::SMappedRange711         bool operator<(const SMappedRange& rg) const
712             {
713                 return range < rg.range;
714             }
715     };
716     typedef list<SMappedRange>                   TMappedRanges;
717     // Ranges grouped by strand. [0] contains ranges without strand,
718     // [i] where i>0 stands for 'eNa_strand_XXXX + 1'.
719     typedef vector<TMappedRanges>                TRangesByStrand;
720     typedef map<CSeq_id_Handle, TRangesByStrand> TRangesById;
721     typedef map<CSeq_id_Handle, ESeqType>        TSeqTypeById;
722 
723     typedef CSeq_align::C_Segs::TDendiag         TDendiag;
724     typedef CSeq_align::C_Segs::TStd             TStd;
725 
726 private:
727     CSeq_loc_Mapper_Base(const CSeq_loc_Mapper_Base&);
728     CSeq_loc_Mapper_Base& operator=(const CSeq_loc_Mapper_Base&);
729 
730     friend class CSeq_align_Mapper_Base;
731 
732     enum EMergeFlags {
733         eMergeNone,      // no merging
734         eMergeAbutting,  // merge only abutting intervals, keep overlapping
735         eMergeContained, // merge if one range is contained in another
736         eMergeBySeg,     // merge abutting and overlapping ranges by mapping group
737         eMergeAll        // merge both abutting and overlapping intervals
738     };
739     enum EGapFlags {
740         eGapPreserve,    // Leave gaps as-is
741         eGapRemove       // Remove gaps (NULL seq-locs)
742     };
743 
744     // Check types of all sequences referenced by the location,
745     // calculate the total length of the location, return true
746     // if types are known for all sequences.
747     // Set seqtype to the detected sequence type or to unknown
748     // if the type can not be detected or there are multiple types.
749     bool x_CheckSeqTypes(const CSeq_loc& loc,
750                          ESeqType&       seqtype,
751                          TSeqPos&        len);
752     // If x_CheckSeqTypes returns false, it may indicate that some
753     // sequence types could not be detected. In this case the mapper
754     // will attempt to find at least one known type in the location
755     // and force it for all sub-locations with unknown types.
756     // The function will fail if there are different known types in the
757     // same seq-loc.
758     ESeqType x_ForceSeqTypes(const CSeq_loc& loc) const;
759 
760     // In some cases the mapper may fail to detect that both source
761     // and destination locations are on proteins rather than on nucs.
762     // CSeq_align_Mapper_Base may detect this mistake while mapping
763     // an alignment. In this case it will try to change all types to
764     // protein.
765     void x_AdjustSeqTypesToProt(const CSeq_id_Handle& idh);
766 
767     // Get sequence length, try to get the real length for
768     // reverse strand, do not use "whole".
769     TSeqPos x_GetRangeLength(const CSeq_loc_CI& it);
770 
771     // Initialize the mapper from different alignment types.
772     void x_InitAlign(const CDense_diag& diag, size_t to_row, size_t from_row);
773     void x_InitAlign(const CDense_seg& denseg, size_t to_row, size_t from_row);
774     void x_InitAlign(const CStd_seg& sseg, size_t to_row);
775     void x_InitAlign(const CPacked_seg& pseg, size_t to_row, size_t from_row);
776     void x_InitSpliced(const CSpliced_seg& spliced,
777                        const TSynonyms&    to_ids);
778     void x_InitSpliced(const CSpliced_seg& spliced, ESplicedRow to_row);
779     void x_InitSparse(const CSparse_seg& sparse, int to_row);
780 
781     void x_IterateExonParts(const CSpliced_exon::TParts& parts,
782                             ESplicedRow                  to_row,
783                             const CSeq_id&               gen_id,
784                             TSeqPos&                     gen_start,
785                             TSeqPos&                     gen_len,
786                             ENa_strand                   gen_strand,
787                             const CSeq_id&               prod_id,
788                             TSeqPos&                     prod_start,
789                             TSeqPos&                     prod_len,
790                             ENa_strand                   prod_strand);
791     void x_AddExonPartsMapping(TSeqPos&        mapping_len,
792                                ESplicedRow     to_row,
793                                const CSeq_id&  gen_id,
794                                TSeqPos&        gen_start,
795                                TSeqPos&        gen_len,
796                                ENa_strand      gen_strand,
797                                const CSeq_id&  prod_id,
798                                TSeqPos&        prod_start,
799                                TSeqPos&        prod_len,
800                                ENa_strand      prod_strand);
801     // Helper method to simplify getting exon part length regardless of
802     // its type.
803     static TSeqPos sx_GetExonPartLength(const CSpliced_exon_chunk& part);
804 
805     // Map a single range from source to destination.
806     bool x_MapNextRange(const TRange&     src_rg,
807                         bool              is_set_strand,
808                         ENa_strand        src_strand,
809                         const TRangeFuzz& src_fuzz,
810                         TSortedMappings&  mappings,
811                         size_t            cvt_idx,
812                         TSeqPos*          last_src_to);
813     // Map the interval through all matching mappings.
814     bool x_MapInterval(const CSeq_id&   src_id,
815                        TRange           src_rg,
816                        bool             is_set_strand,
817                        ENa_strand       src_strand,
818                        TRangeFuzz       orig_fuzz);
819     // Set the flag to indicate that the last range was truncated
820     // during mapping.
821     void x_SetLastTruncated(void);
822 
823     // Pushes the location to the destination seq-loc mix.
824     // See also x_PushRangesToDstMix.
825     void x_PushLocToDstMix(CRef<CSeq_loc> loc);
826 
827     // Pushes NULL location to the destination mix (when a range
828     // can not be mapped).
829     void x_PushNullLoc(void);
830 
831     // Map the alignment. If row is NULL, map all rows. Otherwise
832     // map only the selected row.
833     CRef<CSeq_align> x_MapSeq_align(const CSeq_align& src_align,
834                                     size_t*           row);
835 
836     // Get mapped ranges for the given id and strand index.
837     // See TRangesByStrand for strand indexing.
838     TMappedRanges& x_GetMappedRanges(const CSeq_id_Handle& id,
839                                      size_t                strand_idx) const;
840     // Push mapped range to the list of mapped ranges. Try to merge the new
841     // range with the existing ones based on the selected merging mode.
842     void x_PushMappedRange(const CSeq_id_Handle& id,
843                            size_t                strand_idx,
844                            const TRange&         range,
845                            const TRangeFuzz&     fuzz,
846                            bool                  push_reverse,
847                            int                   group);
848     // Store the source range just mapped. Used only if storing source
849     // locations is enabled - see IncludeSourceLocs.
850     void x_PushSourceRange(const CSeq_id_Handle& idh,
851                            size_t                src_strand,
852                            size_t                dst_strand,
853                            const TRange&         range,
854                            bool                  push_reverse);
855 
856     // Convert mapped range data to a seq-loc (point or interval).
857     // Set fuzzes to indicate truncated range if necessary.
858     CRef<CSeq_loc> x_RangeToSeq_loc(const CSeq_id_Handle& idh,
859                                     TSeqPos               from,
860                                     TSeqPos               to,
861                                     size_t                strand_idx,
862                                     TRangeFuzz            rg_fuzz);
863 
864     // Convert all collected and not yet converted mapped ranges to a seq-loc.
865     // May be called multiple times while mapping a complex location and
866     // storing its parts to a destination seq-loc mix (see
867     // x_PushRangesToDstMix).
868     CRef<CSeq_loc> x_GetMappedSeq_loc(void);
869 
870     // For mix locations, we remove fuzz from in-between the parts.
871     void x_StripExtraneousFuzz(CRef<CSeq_loc>& loc) const;
872 
873     // This removes fuzz of type "range" if any.
874     // Don't give this mix locations; it won't do anything.
875     CConstRef<CSeq_loc> x_FixNonsenseFuzz( CConstRef<CSeq_loc> loc_piece ) const;
876 
877     // Try to optimize the mapped location if it's a mix.
878     // The allowed optimizations are:
879     // - empty mix is converted to Null
880     // - if the mix contains a single element, use just this element
881     // - if the mix contains only intervals, convert it to packed-int
882     // When mapping a complex location (e.g. a multi-level mix) each
883     // sub-location is optimized individually.
884     void x_OptimizeSeq_loc(CRef<CSeq_loc>& loc) const;
885 
886     // Returns true if the new mapped range should be added to the
887     // existing mapped ranges in the reverse order (in the front).
888     // If merging is set to contained or all, used the provided strand
889     // index to check the order of ranges. For all other merging modes
890     // compares the directions of mapping source and target.
891     bool x_ReverseRangeOrder(int str) const;
892 
893     // Map parts of a complex seq-loc.
894     void x_Map_PackedInt_Element(const CSeq_interval& si);
895     void x_Map_PackedPnt_Element(const CPacked_seqpnt& pp, TSeqPos p);
896 
897     // Get main seq-id for a synonym. If no mapping exists, returns the
898     // original id.
899     const CSeq_id_Handle& x_GetPrimaryId(const CSeq_id_Handle& synonym) const;
900 
901     // Check if the id is in the list of synonyms.
902     bool x_IsSynonym(const CSeq_id& id, const TSynonyms& synonyms) const;
903 
904     typedef map<CSeq_id_Handle, CSeq_id_Handle> TSynonymMap;
905     typedef map<CSeq_id_Handle, TSeqPos> TLengthMap;
906 
907     friend class CTotalRangeSynonymMapper;
908 
909     // How to merge mapped locations.
910     EMergeFlags          m_MergeFlag;
911     // How to treat gaps (Null sub-locations) if any.
912     EGapFlags            m_GapFlag;
913 
914     // Other mapping options.
915     enum EMiscFlags {
916         // Trim leading/trailing indels (gaps) from mapped spliced-seg alignments.
917         fTrimSplicedSegs        = 1 << 0,
918         // Whether to keep or discard ranges which can not be mapped.
919         fKeepNonmapping         = 1 << 1,
920         // Whether to check or not if the original location is on the same strand
921         // as the mapping source.
922         fCheckStrand            = 1 << 2,
923         // Whether to include a source of each mapped range to the mapped seq-loc.
924         fIncludeSrcLocs         = 1 << 3,
925         // Prefer spliced-seg for mixed alignments.
926         fMixedAlignsAsSpliced   = 1 << 4,
927         // Treat any range truncation as an error (added for mapping to GC-Assembly
928         // aliases).
929         fErrorOnPartial         = 1 << 5
930     };
931     typedef int TMiscFlags;
932 
x_IsSetMiscFlag(EMiscFlags flag) const933     bool x_IsSetMiscFlag(EMiscFlags flag) const { return (m_MiscFlags & flag) == flag; }
934     void x_SetMiscFlag(EMiscFlags flag, bool value);
935 
936     TMiscFlags m_MiscFlags;
937 
938     // Mapped ranges collected from the currently parsed sub-location.
939     mutable TRangesById  m_MappedLocs;
940     // Source locations for all mapped ranges.
941     CRef<CSeq_loc>       m_SrcLocs;
942 
943     // Collected ranges for mapped graph. Used to adjust mapped graph data.
944     CRef<CGraphRanges>   m_GraphRanges;
945 
946     // Map each synonym to a primary seq-id.
947     mutable TSynonymMap  m_SynonymMap;
948 
949     // Map each primary seq-id to sequence length.
950     mutable TLengthMap   m_LengthMap;
951 
952 protected:
953     // Storage for sequence types.
954     mutable TSeqTypeById m_SeqTypes;
955     // Flag indicating if the mapping truncated at least some ranges.
956     bool                 m_Partial;
957     // Flag indicating if the last range could not be mapped and was
958     // dropped.
959     bool                 m_LastTruncated;
960     // Mapping ranges grouped by source id and strand.
961     CRef<CMappingRanges> m_Mappings;
962     // Mapped seq-loc
963     CRef<CSeq_loc>       m_Dst_loc;
964     // All ranges on the mapping destination.
965     TDstStrandMap        m_DstRanges;
966     // Current mapping group. Incremented for each mapping sub-location
967     // (e.g. exon).
968     int                  m_CurrentGroup;
969     // Control how fuzz is generated and propagated
970     TFuzzOption          m_FuzzOption;
971     // Misc mapping options
972     CSeq_loc_Mapper_Options m_MapOptions;
973 
974 public:
975     // Initialize the mapper with default values
976     CSeq_loc_Mapper_Base(CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
977 
978     /// Methods for getting sequence types, use cached types (m_SeqTypes)
979     /// if possible.
980     ESeqType GetSeqTypeById(const CSeq_id_Handle& idh) const;
981     ESeqType GetSeqTypeById(const CSeq_id& id) const;
982     /// Methods for setting sequence types. May be used to populate the
983     /// cache before mapping huge alignments if the types are already
984     /// known. Throw exception if the sequence type is already set to
985     /// a different value.
986     /// NOTE: setting sequence type does not adjust mapping ranges for this
987     /// id. All mapping ranges must use genomic coordinates.
988     void SetSeqTypeById(const CSeq_id_Handle& idh, ESeqType seqtype) const;
989     void SetSeqTypeById(const CSeq_id& id, ESeqType seqtype) const;
990 
991     /// Get sequence width. Return 3 for proteins, 1 for nucleotides and
992     /// unknown sequence types.
993     int GetWidthById(const CSeq_id_Handle& idh) const;
994     int GetWidthById(const CSeq_id& id) const;
995 
996     /// Get mapping ranges.
GetMappingRanges(void) const997     const CMappingRanges& GetMappingRanges(void) const { return *m_Mappings; }
998 
999     /// NOTE: In most cases CollectSynonyms(const CSeq_id_Handle& id) should
1000     /// be used instead, since it takes care of synonym storage and mapping.
1001     /// This method does nothing but storing synonyms in the container.
1002     void CollectSynonyms(const CSeq_id_Handle& id, TSynonyms& synonyms) const;
1003     // Check if ranges which can not be mapped should be replaced with NULL
1004     // locations. By default removed ranges are reported using neighbor's fuzz.
1005     // The flag is controlled from environment/registry:
1006     // MAPPER_NONMAPPING_AS_NULL=t
1007     // [Mapper]/Nonmapping_As_Null=t
1008     static bool GetNonMappingAsNull(void);
1009 };
1010 
1011 
1012 /////////////////////////////////////////////////////////////////////////////
1013 ///
1014 ///  IMapper_Sequence_Info
1015 ///
1016 ///  Interface for providing sequence information to CSeq_loc_Mapper_Base.
1017 ///  Returns information about sequence type, length and synonyms.
1018 
1019 class IMapper_Sequence_Info : public CObject
1020 {
1021 public:
1022     typedef CSeq_loc_Mapper_Base::ESeqType  TSeqType;
1023     typedef CSeq_loc_Mapper_Base::TSynonyms TSynonyms;
1024 
1025     /// Get information about sequence type (nuc or prot).
1026     virtual TSeqType GetSequenceType(const CSeq_id_Handle& idh) = 0;
1027 
1028     /// Get sequence length or kInvalidSeqPos.
1029     virtual TSeqPos GetSequenceLength(const CSeq_id_Handle& idh) = 0;
1030 
1031     /// Collect all synonyms for the id including the id itself.
1032     /// Any derived class must add at least the original id to the collection.
1033     virtual void CollectSynonyms(const CSeq_id_Handle& id,
1034                                  TSynonyms&            synonyms) = 0;
1035 };
1036 
1037 
1038 /////////////////////////////////////////////////////////////////////////////
1039 ///
1040 ///  CSeq_loc_Mapper_Message
1041 ///
1042 ///  Class used to report CSeq_loc_Mapper_Base issues through
1043 ///  IMessageListener.
1044 class NCBI_SEQ_EXPORT CSeq_loc_Mapper_Message : public CMessage_Basic
1045 {
1046 public:
1047     CSeq_loc_Mapper_Message(const string& msg,
1048                             EDiagSev      sev,
1049                             int           err_code = 0,
1050                             int           sub_code = 0);
1051     virtual ~CSeq_loc_Mapper_Message(void);
1052 
1053     virtual CSeq_loc_Mapper_Message* Clone(void) const;
1054     virtual void Write(CNcbiOstream& out) const;
1055 
1056     enum EObjectType {
1057         eNot_set,
1058         eSeq_loc,
1059         eSeq_feat,
1060         eSeq_align,
1061         eSeq_graph
1062     };
1063 
1064     /// Check type of the object stored in the message.
Which(void) const1065     EObjectType Which(void) const { return m_ObjType; }
1066 
1067     /// Set seq-loc object (copy into the message).
1068     void SetLoc(const CSeq_loc& loc);
1069     /// Get seq-loc object or null.
1070     const CSeq_loc* GetLoc(void) const;
1071 
1072     /// Set seq-feat object (copy into the message).
1073     void SetFeat(const CSeq_feat& feat);
1074     /// Get seq-feat object or null.
1075     const CSeq_feat* GetFeat(void) const;
1076 
1077     /// Set seq-align object (copy into the message).
1078     void SetAlign(const CSeq_align& align);
1079     /// Get seq-align object or null.
1080     const CSeq_align* GetAlign(void) const;
1081 
1082     /// Set seq-graph object (copy into the message).
1083     void SetGraph(const CSeq_graph& graph);
1084     /// Get seq-graph object or null.
1085     const CSeq_graph* GetGraph(void) const;
1086 
1087     /// Set the stored object to null.
1088     void ResetObject(void);
1089 
1090 private:
1091     EObjectType m_ObjType;
1092 
1093     CRef<CObject> m_Obj;
1094 };
1095 
1096 
1097 struct CMappingRangeRef_Less
1098 {
1099     bool operator()(const CRef<CMappingRange>& x,
1100                     const CRef<CMappingRange>& y) const;
1101 };
1102 
1103 
1104 struct CMappingRangeRef_LessRev
1105 {
1106     bool operator()(const CRef<CMappingRange>& x,
1107                     const CRef<CMappingRange>& y) const;
1108 };
1109 
1110 
1111 inline
operator ()(const CRef<CMappingRange> & x,const CRef<CMappingRange> & y) const1112 bool CMappingRangeRef_Less::operator()(const CRef<CMappingRange>& x,
1113                                        const CRef<CMappingRange>& y) const
1114 {
1115     // Leftmost first
1116     if (x->m_Src_from != y->m_Src_from) {
1117         return x->m_Src_from < y->m_Src_from;
1118     }
1119     // Longest first
1120     if (x->m_Src_to != y->m_Src_to) {
1121         return x->m_Src_to > y->m_Src_to;
1122     }
1123     return x < y;
1124 }
1125 
1126 
1127 inline
operator ()(const CRef<CMappingRange> & x,const CRef<CMappingRange> & y) const1128 bool CMappingRangeRef_LessRev::operator()(const CRef<CMappingRange>& x,
1129                                           const CRef<CMappingRange>& y) const
1130 {
1131     // Rightmost first
1132     if (x->m_Src_to != y->m_Src_to) {
1133         return x->m_Src_to > y->m_Src_to;
1134     }
1135     // Longest first
1136     if (x->m_Src_from != y->m_Src_from) {
1137         return x->m_Src_from < y->m_Src_from;
1138     }
1139     return x > y;
1140 }
1141 
1142 
1143 inline
GoodSrcId(const CSeq_id & id) const1144 bool CMappingRange::GoodSrcId(const CSeq_id& id) const
1145 {
1146     return m_Src_id_Handle == id;
1147 }
1148 
1149 
1150 inline
GetDstId(void) const1151 CRef<CSeq_id> CMappingRange::GetDstId(void) const
1152 {
1153     return m_Dst_id_Handle ?
1154         Ref(&const_cast<CSeq_id&>(*m_Dst_id_Handle.GetSeqId())) :
1155         CRef<CSeq_id>(0);
1156 }
1157 
1158 
1159 inline
SetMergeNone(void)1160 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeNone(void)
1161 {
1162     m_MergeFlag = eMergeNone;
1163     return *this;
1164 }
1165 
1166 
1167 inline
SetMergeAbutting(void)1168 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeAbutting(void)
1169 {
1170     m_MergeFlag = eMergeAbutting;
1171     return *this;
1172 }
1173 
1174 
1175 inline
SetMergeBySeg(void)1176 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeBySeg(void)
1177 {
1178     m_MergeFlag = eMergeBySeg;
1179     return *this;
1180 }
1181 
1182 
1183 inline
SetMergeContained(void)1184 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeContained(void)
1185 {
1186     m_MergeFlag = eMergeContained;
1187     return *this;
1188 }
1189 
1190 
1191 inline
SetMergeAll(void)1192 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeAll(void)
1193 {
1194     m_MergeFlag = eMergeAll;
1195     return *this;
1196 }
1197 
1198 
1199 inline
SetGapPreserve(void)1200 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetGapPreserve(void)
1201 {
1202     m_GapFlag = eGapPreserve;
1203     return *this;
1204 }
1205 
1206 
1207 inline
SetGapRemove(void)1208 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetGapRemove(void)
1209 {
1210     m_GapFlag = eGapRemove;
1211     return *this;
1212 }
1213 
1214 
1215 inline
SetTrimSplicedSeg(bool trim)1216 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetTrimSplicedSeg(bool trim)
1217 {
1218     x_SetMiscFlag(fTrimSplicedSegs, trim);
1219     return *this;
1220 }
1221 
1222 
1223 inline
SetCheckStrand(bool value)1224 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetCheckStrand(bool value)
1225 {
1226     x_SetMiscFlag(fCheckStrand, value);
1227     return *this;
1228 }
1229 
1230 
1231 inline
LastIsPartial(void)1232 bool CSeq_loc_Mapper_Base::LastIsPartial(void)
1233 {
1234     return m_Partial;
1235 }
1236 
1237 
1238 inline
KeepNonmappingRanges(void)1239 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::KeepNonmappingRanges(void)
1240 {
1241     x_SetMiscFlag(fKeepNonmapping, true);
1242     return *this;
1243 }
1244 
1245 
1246 inline
TruncateNonmappingRanges(void)1247 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::TruncateNonmappingRanges(void)
1248 {
1249     x_SetMiscFlag(fKeepNonmapping, false);
1250     return *this;
1251 }
1252 
1253 
1254 inline
MixedAlignsAsSpliced(bool value)1255 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::MixedAlignsAsSpliced(bool value)
1256 {
1257     x_SetMiscFlag(fMixedAlignsAsSpliced, value);
1258     return *this;
1259 }
1260 
1261 
1262 inline
IncludeSourceLocs(bool value)1263 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::IncludeSourceLocs(bool value)
1264 {
1265     x_SetMiscFlag(fIncludeSrcLocs, value);
1266     return *this;
1267 }
1268 
1269 
1270 inline
SetErrorOnPartial(bool value)1271 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetErrorOnPartial(bool value)
1272 {
1273     x_SetMiscFlag(fErrorOnPartial, value);
1274     return *this;
1275 }
1276 
1277 
1278 inline
Map(const CSeq_align & src_align)1279 CRef<CSeq_align> CSeq_loc_Mapper_Base::Map(const CSeq_align& src_align)
1280 {
1281     return x_MapSeq_align(src_align, 0);
1282 }
1283 
1284 
1285 inline
Map(const CSeq_align & src_align,size_t row)1286 CRef<CSeq_align> CSeq_loc_Mapper_Base::Map(const CSeq_align& src_align,
1287                                            size_t            row)
1288 {
1289     return x_MapSeq_align(src_align, &row);
1290 }
1291 
1292 
1293 inline
1294 CSeq_loc_Mapper_Base::ESeqType
GetSeqTypeById(const CSeq_id_Handle & idh) const1295 CSeq_loc_Mapper_Base::GetSeqTypeById(const CSeq_id_Handle& idh) const
1296 {
1297     CSeq_id_Handle primary_id = CollectSynonyms(idh);
1298     TSeqTypeById::const_iterator it = m_SeqTypes.find(primary_id);
1299     if (it != m_SeqTypes.end()) {
1300         return it->second;
1301     }
1302     return GetSeqType(primary_id);
1303 }
1304 
1305 
1306 inline
1307 CSeq_loc_Mapper_Base::ESeqType
GetSeqTypeById(const CSeq_id & id) const1308 CSeq_loc_Mapper_Base::GetSeqTypeById(const CSeq_id& id) const
1309 {
1310     return GetSeqTypeById(CSeq_id_Handle::GetHandle(id));
1311 }
1312 
1313 
1314 inline
SetSeqTypeById(const CSeq_id & id,ESeqType seqtype) const1315 void CSeq_loc_Mapper_Base::SetSeqTypeById(const CSeq_id& id,
1316                                           ESeqType       seqtype) const
1317 {
1318     SetSeqTypeById(CSeq_id_Handle::GetHandle(id), seqtype);
1319 }
1320 
1321 
1322 inline
GetWidthById(const CSeq_id_Handle & idh) const1323 int CSeq_loc_Mapper_Base::GetWidthById(const CSeq_id_Handle& idh) const
1324 {
1325     return (GetSeqTypeById(idh) == eSeq_prot) ? 3 : 1;
1326 }
1327 
1328 
1329 inline
GetWidthById(const CSeq_id & id) const1330 int CSeq_loc_Mapper_Base::GetWidthById(const CSeq_id& id) const
1331 {
1332     return GetWidthById(CSeq_id_Handle::GetHandle(id));
1333 }
1334 
1335 
1336 inline
CSeq_loc_Mapper_Options(void)1337 CSeq_loc_Mapper_Options::CSeq_loc_Mapper_Options(void)
1338     : m_SeqInfo(0), m_Options(0) {}
1339 
1340 inline
CSeq_loc_Mapper_Options(IMapper_Sequence_Info * seq_info,TMapOptions opts)1341 CSeq_loc_Mapper_Options::CSeq_loc_Mapper_Options(IMapper_Sequence_Info* seq_info,
1342                                                  TMapOptions            opts)
1343     : m_SeqInfo(seq_info), m_Options(opts) {}
1344 
1345 inline
CSeq_loc_Mapper_Options(TMapOptions opts)1346 CSeq_loc_Mapper_Options::CSeq_loc_Mapper_Options(TMapOptions opts)
1347     : m_SeqInfo(0), m_Options(opts) {}
1348 
1349 inline
1350 IMapper_Sequence_Info*
GetMapperSequenceInfo(void) const1351 CSeq_loc_Mapper_Options::GetMapperSequenceInfo(void) const
1352 {
1353     return m_SeqInfo;
1354 }
1355 
1356 inline
1357 CSeq_loc_Mapper_Options&
SetMapperSequenceInfo(IMapper_Sequence_Info * seq_info)1358 CSeq_loc_Mapper_Options::SetMapperSequenceInfo(IMapper_Sequence_Info* seq_info)
1359 {
1360     m_SeqInfo = seq_info;
1361     return *this;
1362 }
1363 
1364 inline
GetAlign_Dense_seg_TotalRange(void) const1365 bool CSeq_loc_Mapper_Options::GetAlign_Dense_seg_TotalRange(void) const
1366 {
1367     return x_IsSetOption(CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange);
1368 }
1369 
1370 inline
1371 CSeq_loc_Mapper_Options&
SetAlign_Dense_seg_TotalRange(bool value)1372 CSeq_loc_Mapper_Options::SetAlign_Dense_seg_TotalRange(bool value)
1373 {
1374     x_SetOption(CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange, value);
1375     return *this;
1376 }
1377 
1378 inline
GetAlign_Sparse_ToFirst(void) const1379 bool CSeq_loc_Mapper_Options::GetAlign_Sparse_ToFirst(void) const
1380 {
1381     return !x_IsSetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond);
1382 }
1383 
1384 inline
GetAlign_Sparse_ToSecond(void) const1385 bool CSeq_loc_Mapper_Options::GetAlign_Sparse_ToSecond(void) const
1386 {
1387     return x_IsSetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond);
1388 }
1389 
1390 inline
1391 CSeq_loc_Mapper_Options&
SetAlign_Sparse_ToFirst(bool value)1392 CSeq_loc_Mapper_Options::SetAlign_Sparse_ToFirst(bool value)
1393 {
1394     x_SetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond, !value);
1395     return *this;
1396 }
1397 
1398 inline
1399 CSeq_loc_Mapper_Options&
SetAlign_Sparse_ToSecond(bool value)1400 CSeq_loc_Mapper_Options::SetAlign_Sparse_ToSecond(bool value)
1401 {
1402     x_SetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond, value);
1403     return *this;
1404 }
1405 
1406 inline
GetMapSingleLevel(void) const1407 bool CSeq_loc_Mapper_Options::GetMapSingleLevel(void) const
1408 {
1409     return x_IsSetOption(CSeq_loc_Mapper_Base::fMapSingleLevel);
1410 }
1411 
1412 inline
1413 CSeq_loc_Mapper_Options&
SetMapSingleLevel(bool value)1414 CSeq_loc_Mapper_Options::SetMapSingleLevel(bool value)
1415 {
1416     x_SetOption(CSeq_loc_Mapper_Base::fMapSingleLevel, value);
1417     return *this;
1418 }
1419 
1420 inline
GetTrimMappedLocation(void) const1421 bool CSeq_loc_Mapper_Options::GetTrimMappedLocation(void) const
1422 {
1423     return x_IsSetOption(CSeq_loc_Mapper_Base::fTrimMappedLocation);
1424 }
1425 
1426 inline
1427 CSeq_loc_Mapper_Options&
SetTrimMappedLocation(bool value)1428 CSeq_loc_Mapper_Options::SetTrimMappedLocation(bool value)
1429 {
1430     x_SetOption(CSeq_loc_Mapper_Base::fTrimMappedLocation, value);
1431     return *this;
1432 }
1433 
1434 inline
x_IsSetOption(int opt) const1435 bool CSeq_loc_Mapper_Options::x_IsSetOption(int opt) const
1436 {
1437     return (m_Options & opt) != 0;
1438 }
1439 
1440 inline
x_SetOption(int opt,bool enable)1441 void CSeq_loc_Mapper_Options::x_SetOption(int opt, bool enable)
1442 {
1443     if ( enable ) {
1444         m_Options |= opt;
1445     }
1446     else {
1447         m_Options &= ~opt;
1448     }
1449 }
1450 
1451 
1452 /* @} */
1453 
1454 
1455 END_SCOPE(objects)
1456 END_NCBI_SCOPE
1457 
1458 #endif  // SEQ_LOC_MAPPER_BASE__HPP
1459