1 #ifndef SEQ_LOC_MAPPER_BASE__HPP
2 #define SEQ_LOC_MAPPER_BASE__HPP
3
4 /* $Id: seq_loc_mapper_base.hpp 572476 2018-10-15 15:17:03Z grichenk $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko
30 *
31 * File Description:
32 * Seq-loc mapper base
33 *
34 */
35
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
38 #include <corelib/ncbi_message.hpp>
39 #include <util/range.hpp>
40 #include <util/rangemap.hpp>
41 #include <objects/seqloc/Na_strand.hpp>
42 #include <objects/seqalign/Seq_align.hpp>
43 #include <objects/seqalign/Spliced_exon.hpp>
44 #include <objects/seq/seq_id_handle.hpp>
45 #include <objects/general/Int_fuzz.hpp>
46 #include <objects/seq/annot_mapper_exception.hpp>
47
48
49 BEGIN_NCBI_SCOPE
50 BEGIN_SCOPE(objects)
51
52
53 /** @addtogroup ObjectManagerCore
54 *
55 * @{
56 */
57
58
59 class CSeq_id;
60 class CSeq_loc;
61 class CSeq_interval;
62 class CPacked_seqpnt;
63 class CSeq_loc_CI;
64 class CSeq_feat;
65 class CSeq_align;
66 class CSeq_align_Mapper_Base;
67 class CSeq_graph;
68 class IMapper_Sequence_Info;
69
70
71 /// CMappingRange - describes a single interval to interval
72 /// mapping.
73 class NCBI_SEQ_EXPORT CMappingRange : public CObject
74 {
75 public:
76 CMappingRange(CSeq_id_Handle src_id,
77 TSeqPos src_from,
78 TSeqPos src_length,
79 ENa_strand src_strand,
80 CSeq_id_Handle dst_id,
81 TSeqPos dst_from,
82 ENa_strand dst_strand,
83 bool ext_to = false,
84 int frame = 0,
85 TSeqPos src_bioseq_len = kInvalidSeqPos,
86 TSeqPos dst_len = kInvalidSeqPos);
87
88 /// Check if the id is on the source sequence.
89 bool GoodSrcId(const CSeq_id& id) const;
90 CRef<CSeq_id> GetDstId(void) const;
GetDstIdHandle(void) const91 const CSeq_id_Handle& GetDstIdHandle(void) const
92 { return m_Dst_id_Handle; }
93
94 typedef CRange<TSeqPos> TRange;
95 typedef CRef<CInt_fuzz> TFuzz;
96 typedef pair<TFuzz, TFuzz> TRangeFuzz;
97
98 /// Check if the interval can be mapped through this mapping range.
99 /// Strand direction is checked only if is_set_strand is true.
100 bool CanMap(TSeqPos from,
101 TSeqPos to,
102 bool is_set_strand,
103 ENa_strand strand) const;
104 /// Map a single point
105 TSeqPos Map_Pos(TSeqPos pos) const;
106 /// Map an interval, set fuzz when the mapping truncates the original
107 /// range.
108 TRange Map_Range(TSeqPos from,
109 TSeqPos to,
110 const TRangeFuzz* fuzz = 0) const;
111 /// Map the strand, return true if the destination strand should be
112 /// set (even if it's eNa_strand_unknown -- this may happen if the
113 /// source strand is set to unknown).
114 bool Map_Strand(bool is_set_strand,
115 ENa_strand src,
116 ENa_strand* dst) const;
117 /// Map fuzz if one is set in the original location.
118 TRangeFuzz Map_Fuzz(const TRangeFuzz& fuzz) const;
119
120 private:
121 // Get new fuzz value when reversing location's strand.
122 CInt_fuzz::ELim x_ReverseFuzzLim(CInt_fuzz::ELim lim) const;
123 void x_Map_Fuzz(TFuzz& fuzz) const;
124
125 CSeq_id_Handle m_Src_id_Handle;
126 TSeqPos m_Src_from;
127 TSeqPos m_Src_to;
128 ENa_strand m_Src_strand;
129 CSeq_id_Handle m_Dst_id_Handle;
130 TSeqPos m_Dst_from;
131 ENa_strand m_Dst_strand;
132 // Whether the mapping reverses the strand or not.
133 // This value can be calculated from source and destination
134 // strands, but is cached for better performance.
135 bool m_Reverse;
136 // Whether to extend the mapped location to the end of
137 // destination range. Used when mapping from a prot to a nuc.
138 // ExtTo is set when both conditions are met:
139 // - the mapping is from a protein to a nucleotide
140 // - the destination interval has partial 'to' (set as fuzz)
141 // ExtTo is used only when the interval to be mapped has
142 // partial 'to' set through the fuzz and the mapped range is
143 // just 1 or 2 bases shorter than the mapping destination.
144 bool m_ExtTo;
145 // Holds the frame shift (0 if none) of the underlying CDS (if any).
146 int m_Frame;
147 // This holds the complete length of the original source bioseq.
148 // Needed to detect whether or not fuzzy edges should be extended to the end.
149 TSeqPos m_Src_bioseq_len;
150 // For example, if the end of a source maps to just before the end of the
151 // dest, then we sometimes extend to the end of the dest, so we do need
152 // to store this, even though it's not needed for the mapping itself.
153 TSeqPos m_Dst_len;
154 // Group of mapping ranges - used with alignments, e.g. to group
155 // mapped ranges by exon.
156 int m_Group;
157
158 friend class CSeq_loc_Mapper_Base;
159 //friend class CSeq_loc_Mapper;
160 friend class CMappingRanges;
161 friend class CSeq_align_Mapper_Base;
162 //friend class CSeq_align_Mapper;
163 friend struct CMappingRangeRef_Less;
164 friend struct CMappingRangeRef_LessRev;
165
166 public:
167 // Interface for CPairwiseAln converter
GetSrc_from(void) const168 TSeqPos GetSrc_from(void) const { return m_Src_from; }
GetDst_from(void) const169 TSeqPos GetDst_from(void) const { return m_Dst_from; }
GetLength(void) const170 TSeqPos GetLength(void) const { return m_Src_to - m_Src_from; }
GetReverse(void) const171 bool GetReverse(void) const { return m_Reverse; }
GetGroup(void) const172 int GetGroup(void) const { return m_Group; }
SetGroup(int grp)173 void SetGroup(int grp) { m_Group = grp; }
174 };
175
176
177 /// Storage for multiple mapping ranges. Stores mappings grouped
178 /// by the source seq-id, then sorted by start coordinate.
179 class NCBI_SEQ_EXPORT CMappingRanges : public CObject
180 {
181 public:
182 CMappingRanges(void);
183
184 // Conversions
185 typedef CMappingRange::TRange TRange;
186 typedef CRangeMultimap<CRef<CMappingRange>, TSeqPos> TRangeMap;
187 typedef TRangeMap::const_iterator TRangeIterator;
188 typedef map<CSeq_id_Handle, TRangeMap> TIdMap;
189 typedef TIdMap::const_iterator TIdIterator;
190 typedef vector< CRef<CMappingRange> > TSortedMappings;
191
GetIdMap() const192 const TIdMap& GetIdMap() const { return m_IdMap; }
GetIdMap(void)193 TIdMap& GetIdMap(void) { return m_IdMap; }
194
195 /// Add new mapping range to the proper place.
196 void AddConversion(CRef<CMappingRange> cvt);
197 CRef<CMappingRange> AddConversion(CSeq_id_Handle src_id,
198 TSeqPos src_from,
199 TSeqPos src_length,
200 ENa_strand src_strand,
201 CSeq_id_Handle dst_id,
202 TSeqPos dst_from,
203 ENa_strand dst_strand,
204 bool ext_to = false,
205 int frame = 0,
206 TSeqPos dst_total_len = kInvalidSeqPos,
207 TSeqPos src_bioseq_len = kInvalidSeqPos,
208 TSeqPos dst_len = kInvalidSeqPos );
209
210 /// Get mapping ranges iterator for the given seq-id and range.
211 TRangeIterator BeginMappingRanges(CSeq_id_Handle id,
212 TSeqPos from,
213 TSeqPos to) const;
214
215 // Overall source and destination orientation. The order of mapped ranges
216 // is reversed if ReverseSrc != ReverseDst (except in some merging modes).
SetReverseSrc(bool value=true)217 void SetReverseSrc(bool value = true) { m_ReverseSrc = value; };
GetReverseSrc(void) const218 bool GetReverseSrc(void) const { return m_ReverseSrc; }
SetReverseDst(bool value=true)219 void SetReverseDst(bool value = true) { m_ReverseDst = value; };
GetReverseDst(void) const220 bool GetReverseDst(void) const { return m_ReverseDst; }
221
222 private:
223 TIdMap m_IdMap;
224
225 // Mapping source and destination orientations
226 bool m_ReverseSrc;
227 bool m_ReverseDst;
228 };
229
230
231 /// Helper class for mapping graphs. Used to collect ranges
232 /// relative to the graph location and adjust mapped graph data
233 /// accordingly.
234 class NCBI_SEQ_EXPORT CGraphRanges : public CObject
235 {
236 public:
CGraphRanges(void)237 CGraphRanges(void) : m_Offset(0) {}
238
239 typedef CRange<TSeqPos> TRange;
240 typedef vector<TRange> TGraphRanges;
241
242 // Offset is relative to the original graph location, indicates
243 // the part of the original location which has been already
244 // mapped (or truncated).
GetOffset(void) const245 TSeqPos GetOffset(void) const { return m_Offset; }
SetOffset(TSeqPos offset)246 void SetOffset(TSeqPos offset) { m_Offset = offset; }
IncOffset(TSeqPos inc)247 void IncOffset(TSeqPos inc) { m_Offset += inc; }
248
GetRanges(void) const249 const TGraphRanges& GetRanges(void) const { return m_Ranges; }
250
251 // Add new mapped range. The range is relative to the not yet mapped
252 // part of the original location. See:
253 // CSeq_loc_Mapper_Base::x_MapNextRange()
254 // CSeq_loc_Mapper_Base::x_MapInterval()
AddRange(const TRange & rg)255 void AddRange(const TRange& rg)
256 {
257 if ( rg.Empty() ) {
258 return;
259 }
260 TRange offset_rg = rg.IsWhole() ? rg :
261 TRange(rg.GetFrom() + m_Offset, rg.GetTo() + m_Offset);
262 m_Ranges.push_back(offset_rg);
263 m_TotalRange.CombineWith(offset_rg);
264 }
265
GetTotalRange(void) const266 const TRange& GetTotalRange(void) const { return m_TotalRange; }
267
268 private:
269 TSeqPos m_Offset;
270 TGraphRanges m_Ranges;
271 TRange m_TotalRange;
272 };
273
274
275 /////////////////////////////////////////////////////////////////////////////
276 ///
277 /// CSeq_loc_Mapper_Options --
278 ///
279 /// Options passed to CSeq_loc_Mapper[_Base] constructor.
280 ///
281
282 class NCBI_SEQ_EXPORT CSeq_loc_Mapper_Options
283 {
284 public:
285 typedef int TMapOptions;
286
287 CSeq_loc_Mapper_Options(void);
288 CSeq_loc_Mapper_Options(IMapper_Sequence_Info* seq_info,
289 TMapOptions opts = 0);
290 CSeq_loc_Mapper_Options(TMapOptions opts);
291
292 /// Sequence type, length etc. provider. If any ids from the mapping
293 /// ranges are not available through this object, they should be
294 /// registered using CSeq_loc_Mapper_Base::SetSeqTypeById().
295 IMapper_Sequence_Info* GetMapperSequenceInfo(void) const;
296 CSeq_loc_Mapper_Options& SetMapperSequenceInfo(IMapper_Sequence_Info* seq_info);
297
298 /// Dense-seg mapping option.
299 /// @sa CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange
300 bool GetAlign_Dense_seg_TotalRange(void) const;
301 CSeq_loc_Mapper_Options& SetAlign_Dense_seg_TotalRange(bool value = true);
302
303 /// Mapping direction when mapping through a sparse-seg.
304 /// @sa CSeq_loc_Mapper_Base::fAlign_Sparse_ToFirst
305 /// @sa CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond
306 bool GetAlign_Sparse_ToFirst(void) const;
307 bool GetAlign_Sparse_ToSecond(void) const;
308 CSeq_loc_Mapper_Options& SetAlign_Sparse_ToFirst(bool value = true);
309 CSeq_loc_Mapper_Options& SetAlign_Sparse_ToSecond(bool value = true);
310
311 /// Mapping depth when using a seq-map, a bioseq or a GC-assembly.
312 /// @sa CSeq_loc_Mapper_Base::fMapSingleLevel
313 bool GetMapSingleLevel(void) const;
314 CSeq_loc_Mapper_Options& SetMapSingleLevel(bool value = true);
315
316 /// Mapped location trimming at sequence end. Off by default.
317 /// @sa CSeq_loc_Mapper_Base::fTrimMappedLocation
318 bool GetTrimMappedLocation(void) const;
319 CSeq_loc_Mapper_Options& SetTrimMappedLocation(bool value = true);
320
321 private:
322 friend class CSeq_loc_Mapper_Base;
323
324 IMapper_Sequence_Info& GetSeqInfo(void) const;
325
326 bool x_IsSetOption(int opt) const;
327 void x_SetOption(int opt, bool enable);
328
329 mutable CRef<IMapper_Sequence_Info> m_SeqInfo;
330 TMapOptions m_Options;
331 };
332
333
334 /////////////////////////////////////////////////////////////////////////////
335 ///
336 /// CSeq_loc_Mapper_Base --
337 ///
338 /// Mapping locations and alignments between bioseqs through seq-locs,
339 /// features, alignments or between parts of segmented bioseqs.
340
341 class NCBI_SEQ_EXPORT CSeq_loc_Mapper_Base : public CObject
342 {
343 public:
344 /// Mapping direction used when initializing the mapper with a feature.
345 enum EFeatMapDirection {
346 eLocationToProduct, ///< Map from the feature's location to product
347 eProductToLocation ///< Map from the feature's product to location
348 };
349
350 /// Options for interpretations of locations
351 enum EMapOptions {
352 /// Ignore internal dense-seg structure - map each
353 /// dense-seg according to the total ranges involved
354 fAlign_Dense_seg_TotalRange = 1 << 0,
355
356 /// Flags used to indicate mapping direction when mapping
357 /// through a sparse-seg.
358 fAlign_Sparse_ToFirst = 0, ///< Map to first-id
359 fAlign_Sparse_ToSecond = 1 << 1, ///< Map to second-id
360
361 /// Flag used when mapping through a seq-map (this includes
362 /// mapping through a bioseq or a GC-assembly). If set, each
363 /// call to Map() goes only one level up or down, unlike normal
364 /// mode which maps from any level as far up/down as possible.
365 /// The result of mapping can be mapped further by making another
366 /// call to Map().
367 fMapSingleLevel = 1 << 2,
368
369 /// Enable trimming of source/destination ranges at sequence end.
370 /// By default locations can stretch beyond sequence end. With trimming
371 /// enabled the mapper will truncate ranges to fit sequence lengths.
372 fTrimMappedLocation = 1 << 3
373 };
374 typedef int TMapOptions;
375
376 /// Spliced-seg row indexing constants.
377 enum ESplicedRow {
378 eSplicedRow_Prod = 0,
379 eSplicedRow_Gen = 1
380 };
381
382 enum FFuzzOption {
383 // used for backwards compatibility with C toolkit's output.
384 // TODO: we should remove this one day since the
385 // normal output is superior.
386 fFuzzOption_CStyle = 1 << 0,
387 // Don't set eLim_tl or eLim_tr and instead set greater than or less
388 // than if appropriate.
389 fFuzzOption_RemoveLimTlOrTr = 1 << 1
390 };
391 typedef int TFuzzOption;
392
393 /// Mapping through a pre-filled CMappipngRanges.
394 /// @param mapping_ranges
395 /// CMappingRanges filled with the desired source and destination
396 /// ranges. Must be a heap object (will be stored in a CRef<>).
397 /// NOTE: If the mapper is used with mixed sequence types, the
398 /// ranges must use genomic coordinates (for ranges on proteins
399 /// multiply all coordinates by 3).
400 /// @param options
401 /// Mapping options which need to be set during mapper initialization.
402 /// @sa CSeq_loc_Mapper_Options
403 CSeq_loc_Mapper_Base(CMappingRanges* mapping_ranges,
404 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
405
406 /// Mapping through a feature, both location and product must be set.
407 CSeq_loc_Mapper_Base(const CSeq_feat& map_feat,
408 EFeatMapDirection dir,
409 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
410
411 /// Mapping between two seq_locs.
412 CSeq_loc_Mapper_Base(const CSeq_loc& source,
413 const CSeq_loc& target,
414 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
415
416 /// Mapping through an alignment. Need to specify target ID or
417 /// target row of the alignment. Any other ID is mapped to the
418 /// target one. Only the first row matching target ID is used,
419 /// all other rows are considered source.
420 CSeq_loc_Mapper_Base(const CSeq_align& map_align,
421 const CSeq_id& to_id,
422 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
423 /// Mapping through an alignment using specific source and target ids.
424 /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
425 /// id is ignored.
426 CSeq_loc_Mapper_Base(const CSeq_id& from_id,
427 const CSeq_id& to_id,
428 const CSeq_align& map_align,
429 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
430 /// @deprecated Use the version with CSeq_loc_Mapper_Options instead.
431 NCBI_DEPRECATED
432 CSeq_loc_Mapper_Base(const CSeq_align& map_align,
433 const CSeq_id& to_id,
434 TMapOptions opts,
435 IMapper_Sequence_Info* seq_info);
436
437 /// Sparse alignments require special row indexing since each
438 /// row contains two seq-ids. Use options to specify mapping
439 /// direction.
440 CSeq_loc_Mapper_Base(const CSeq_align& map_align,
441 size_t to_row,
442 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
443 /// Mapping through an alignment using specific source and target row numbers.
444 /// If the alignment is not one of dense-seg, dense-diag or packed-seg, the source
445 /// row is ignored.
446 CSeq_loc_Mapper_Base(size_t from_row,
447 size_t to_row,
448 const CSeq_align& map_align,
449 CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
450 /// @deprecated Use the version with CSeq_loc_Mapper_Options instead.
451 NCBI_DEPRECATED
452 CSeq_loc_Mapper_Base(const CSeq_align& map_align,
453 size_t to_row,
454 TMapOptions opts,
455 IMapper_Sequence_Info* seq_info);
456
457 ~CSeq_loc_Mapper_Base(void);
458
459 void SetFuzzOption( TFuzzOption newOption );
460
461 /// Intervals' merging mode
462 /// MergeNone and MergeAbutting do not change the order of ranges
463 /// in the destination seq-loc. No ranges will be merged if they
464 /// are separated by any other sub-range.
465 /// MergeContained and MergeAll sort ranges before sorting, so that
466 /// any overlapping ranges can be merged. The sorting takes the
467 /// mapped location strand into account.
468 /// NOTE: any merging (except None) is incompatible with collecting
469 /// source ranges.
470 /// @sa IncludeSourceLocs
471
472 /// No merging
473 CSeq_loc_Mapper_Base& SetMergeNone(void);
474 /// Merge only abutting intervals, keep overlapping
475 CSeq_loc_Mapper_Base& SetMergeAbutting(void);
476 /// Merge only intervals from the same group. Group is created
477 /// for each exon, dense-diag, std-seg and disc sub-alignment.
478 CSeq_loc_Mapper_Base& SetMergeBySeg(void);
479 /// Merge intervals only if one is completely covered by another
480 CSeq_loc_Mapper_Base& SetMergeContained(void);
481 /// Merge any abutting or overlapping intervals
482 CSeq_loc_Mapper_Base& SetMergeAll(void);
483
484 /// Whether to preserve or remove NULL sub-locations (usually
485 /// indicating gaps) from the result. By default gaps are preserved.
486 CSeq_loc_Mapper_Base& SetGapPreserve(void);
487 CSeq_loc_Mapper_Base& SetGapRemove(void);
488
489 /// For mapping spliced-segs only: preserve or trim starting/ending
490 /// indels. By default indels are trimmed (only those at the whole
491 /// alignment start and end).
492 CSeq_loc_Mapper_Base& SetTrimSplicedSeg(bool trim);
493
494 /// Keep ranges which can not be mapped. Does not affect truncation
495 /// of partially mapped ranges. By default non-mapping ranges are
496 /// removed.
497 CSeq_loc_Mapper_Base& KeepNonmappingRanges(void);
498 CSeq_loc_Mapper_Base& TruncateNonmappingRanges(void);
499
500 /// Check strands before mapping a range. By default strand is not
501 /// checked and a range will be mapped even if its strand does not
502 /// correspond to the strand of the mapping source.
503 CSeq_loc_Mapper_Base& SetCheckStrand(bool value = true);
504
505 /// When set to 'true' if mapped alignment has exactly one genomic and
506 /// one protein row, convert it to spliced-seg. By default all mixed-type
507 /// alignments are converted to std-seg.
508 CSeq_loc_Mapper_Base& MixedAlignsAsSpliced(bool value = true);
509
510 /// Include source ranges in the mapped location. If turned
511 /// on, the resulting seq-loc will be an equiv with the
512 /// first sub-loc containing the usual mapped seq-loc, and
513 /// the second one - the set of source locations used in the
514 /// mapping.
515 /// NOTE: this option is incompatible with any merging.
516 /// Merging mode must be set to MergeNone.
517 CSeq_loc_Mapper_Base& IncludeSourceLocs(bool value = true);
518
519 /// Report source range trimming as an error. If the flag is set,
520 /// any trimming will result in throwing CAnnotMapperException.
521 /// Intended to be used when mapping GC-Assembly aliases.
522 CSeq_loc_Mapper_Base& SetErrorOnPartial(bool value = true);
523
524 /// Map seq-loc
525 CRef<CSeq_loc> Map(const CSeq_loc& src_loc);
526 /// Take the total range from the location and run it through the mapper.
527 CRef<CSeq_loc> MapTotalRange(const CSeq_loc& seq_loc);
528 /// Map the whole alignment. Searches all rows for ranges
529 /// which can be mapped.
530 CRef<CSeq_align> Map(const CSeq_align& src_align);
531 /// Map a single row of the alignment.
532 CRef<CSeq_align> Map(const CSeq_align& src_align,
533 size_t row);
534 /// Map seq-graph. This will map both location and data.
535 /// The data may be truncated to match the new location.
536 CRef<CSeq_graph> Map(const CSeq_graph& src_graph);
537
538 /// Flags defining seq-annot mapping options.
539 enum FAnnotMapFlag {
540 fAnnotMap_Location = 1 << 0, ///< Map seq-feat locations
541 fAnnotMap_Product = 1 << 1, ///< Map seq-feat products
542 fAnnotMap_Both = fAnnotMap_Location | fAnnotMap_Product,
543
544 /// Remove annotations which can not be mapped with this mapper.
545 /// If the flag is not set, the original annotation is stored
546 /// in the seq-annot.
547 fAnnotMap_RemoveNonMapping = 1 << 2,
548
549 /// Throw exception if an annotation can not be mapped.
550 fAnnotMap_ThrowOnFailure = 1 << 3,
551
552 fAnnotMap_Default = fAnnotMap_Both
553 };
554 typedef int TAnnotMapFlags;
555
556 /// Result of seq-annot mapping
557 enum EMapResult {
558 /// No annotation was mapped, the input seq-annot is unchanged.
559 eMapped_None = 0,
560 /// Some (not all) annotations were mapped.
561 eMapped_Some,
562 /// All annotations were mapped, none was removed.
563 eMapped_All
564 };
565
566 /// Map each object from the Seq-annot and replace the original
567 /// with the mapped one.
568 EMapResult Map(CSeq_annot& annot, TAnnotMapFlags flags = fAnnotMap_Default);
569
570 /// Check if the last mapping resulted in partial location
571 /// (not all ranges from the original location could be mapped
572 /// to the target).
573 bool LastIsPartial(void);
574
575 typedef set<CSeq_id_Handle> TSynonyms;
576
577 // Collect synonyms for the id, store mapping of each synonym
578 // to the primary id. Returns primary id for the argument or the
579 // argument itself.
580 const CSeq_id_Handle& CollectSynonyms(const CSeq_id_Handle& id) const;
581
582 // Sequence type - to recalculate coordinates.
583 enum ESeqType {
584 eSeq_unknown = 0,
585 eSeq_nuc = 1,
586 eSeq_prot = 3
587 };
588
589 protected:
590
591 // Get molecule type for the given id. The default implementation
592 // returns eSeq_unknown. The overrided methods should return
593 // real sequence type. The returned type is stored in the mapper's
594 // cache. The method should not be called directly, use
595 // GetSeqTypeById instead for it uses the cached types.
596 // It's also a good idea to cache the same sequence type for all
597 // synonyms in the overrided method to prevent multiple requests
598 // to GetSeqType.
599 ESeqType GetSeqType(const CSeq_id_Handle& idh) const;
600
601 // Get sequence length for the given seq-id. Returns kInvalidSeqPos
602 // if the length is unknown (the default behavior).
603 TSeqPos GetSequenceLength(const CSeq_id& id);
604
605 // Create CSeq_align_Mapper_Base, add any necessary arguments.
606 virtual CSeq_align_Mapper_Base*
607 InitAlignMapper(const CSeq_align& src_align);
608
609 // Initialize the mapper from a feature. The feature must have
610 // both location and product set, mapping direction is set by
611 // the flag.
612 void x_InitializeFeat(const CSeq_feat& map_feat,
613 EFeatMapDirection dir);
614 // Map between two locations. Optional frame is used by x_InitializeFeat()
615 // only with cd-region features.
616 void x_InitializeLocs(const CSeq_loc& source,
617 const CSeq_loc& target,
618 int src_frame = 0,
619 int dst_frame = 0);
620 // Initialize the mapper from an alignment. Looks for the first
621 // row containing the id and sets it as mapping target. All other
622 // rows become mapping source.
623 void x_InitializeAlign(const CSeq_align& map_align,
624 const CSeq_id& to_id,
625 const CSeq_id* from_id = nullptr);
626 // Recursive version of the above.
627 void x_InitializeAlign(const CSeq_align& map_align,
628 const TSynonyms& to_ids,
629 const TSynonyms* from_ids = nullptr);
630 // Initialize the mapper from an alignment, map to the specified row.
631 void x_InitializeAlign(const CSeq_align& map_align,
632 size_t to_row,
633 size_t from_row = size_t(-1));
634
635 // Create dummy mapping from the whole destination location to itself.
636 // This will prevent truncation of ranges already on the target.
637 // For some reason (?) the function is used only by CSeq_loc_Mapper,
638 // not CSeq_loc_Mapper_Base, and only when initializing the mapper
639 // from a bioseq handle or a seq-map. When mapping through a feature
640 // or a pair of seq-locs it's not called and ranges on destination
641 // are truncated or preserved the same way as any other non-mapping
642 // ranges.
643 void x_PreserveDestinationLocs(void);
644
645 // Add new mapping range while initializing the mapper. The function
646 // adjusts starts and lengths according to the used range and strand.
647 void x_NextMappingRange(const CSeq_id& src_id,
648 TSeqPos& src_start,
649 TSeqPos& src_len,
650 ENa_strand src_strand,
651 const CSeq_id& dst_id,
652 TSeqPos& dst_start,
653 TSeqPos& dst_len,
654 ENa_strand dst_strand,
655 const CInt_fuzz* fuzz_from = 0,
656 const CInt_fuzz* fuzz_to = 0,
657 int frame = 0,
658 TSeqPos src_bioseq_len = kInvalidSeqPos);
659
660 // Add new CMappingRange. This includes collecting all synonyms for the id,
661 // creating a new mapping for each of them and updating the destination
662 // ranges.
663 void x_AddConversion(const CSeq_id& src_id,
664 TSeqPos src_start,
665 ENa_strand src_strand,
666 const CSeq_id& dst_id,
667 TSeqPos dst_start,
668 ENa_strand dst_strand,
669 TSeqPos length,
670 bool ext_right,
671 int frame,
672 TSeqPos src_bioseq_len,
673 TSeqPos dst_length );
674
675 // Parse and map the seq-loc.
676 void x_MapSeq_loc(const CSeq_loc& src_loc);
677
678 // Convert collected ranges into a seq-loc and push it into the destination
679 // seq-loc mix. This is done to preserve the original seq-loc structure
680 // when possible (although some optimizations are done - see
681 // x_OptimizeSeq_loc).
682 void x_PushRangesToDstMix(void);
683
684 typedef CMappingRange::TRange TRange;
685 typedef CMappingRanges::TRangeMap TRangeMap;
686 typedef CMappingRanges::TRangeIterator TRangeIterator;
687 typedef CMappingRanges::TSortedMappings TSortedMappings;
688
689 // List and map of target ranges to construct target-to-target mapping
690 typedef list<TRange> TDstRanges;
691 typedef map<CSeq_id_Handle, TDstRanges> TDstIdMap;
692 typedef vector<TDstIdMap> TDstStrandMap;
693
694 // Destination locations arranged by ID/range
695 typedef CRef<CInt_fuzz> TFuzz;
696 typedef pair<TFuzz, TFuzz> TRangeFuzz;
697
698 // Structure to hold information about mapped ranges until they are
699 // converted to seq-loc parts.
700 struct SMappedRange {
SMappedRangeCSeq_loc_Mapper_Base::SMappedRange701 SMappedRange(void) : group(0) {}
SMappedRangeCSeq_loc_Mapper_Base::SMappedRange702 SMappedRange(const TRange& rg,
703 const TRangeFuzz& fz,
704 int grp = 0)
705 : range(rg), fuzz(fz), group(grp) {}
706
707 TRange range;
708 TRangeFuzz fuzz;
709 int group; // used mostly to group ranges by exon
710
operator <CSeq_loc_Mapper_Base::SMappedRange711 bool operator<(const SMappedRange& rg) const
712 {
713 return range < rg.range;
714 }
715 };
716 typedef list<SMappedRange> TMappedRanges;
717 // Ranges grouped by strand. [0] contains ranges without strand,
718 // [i] where i>0 stands for 'eNa_strand_XXXX + 1'.
719 typedef vector<TMappedRanges> TRangesByStrand;
720 typedef map<CSeq_id_Handle, TRangesByStrand> TRangesById;
721 typedef map<CSeq_id_Handle, ESeqType> TSeqTypeById;
722
723 typedef CSeq_align::C_Segs::TDendiag TDendiag;
724 typedef CSeq_align::C_Segs::TStd TStd;
725
726 private:
727 CSeq_loc_Mapper_Base(const CSeq_loc_Mapper_Base&);
728 CSeq_loc_Mapper_Base& operator=(const CSeq_loc_Mapper_Base&);
729
730 friend class CSeq_align_Mapper_Base;
731
732 enum EMergeFlags {
733 eMergeNone, // no merging
734 eMergeAbutting, // merge only abutting intervals, keep overlapping
735 eMergeContained, // merge if one range is contained in another
736 eMergeBySeg, // merge abutting and overlapping ranges by mapping group
737 eMergeAll // merge both abutting and overlapping intervals
738 };
739 enum EGapFlags {
740 eGapPreserve, // Leave gaps as-is
741 eGapRemove // Remove gaps (NULL seq-locs)
742 };
743
744 // Check types of all sequences referenced by the location,
745 // calculate the total length of the location, return true
746 // if types are known for all sequences.
747 // Set seqtype to the detected sequence type or to unknown
748 // if the type can not be detected or there are multiple types.
749 bool x_CheckSeqTypes(const CSeq_loc& loc,
750 ESeqType& seqtype,
751 TSeqPos& len);
752 // If x_CheckSeqTypes returns false, it may indicate that some
753 // sequence types could not be detected. In this case the mapper
754 // will attempt to find at least one known type in the location
755 // and force it for all sub-locations with unknown types.
756 // The function will fail if there are different known types in the
757 // same seq-loc.
758 ESeqType x_ForceSeqTypes(const CSeq_loc& loc) const;
759
760 // In some cases the mapper may fail to detect that both source
761 // and destination locations are on proteins rather than on nucs.
762 // CSeq_align_Mapper_Base may detect this mistake while mapping
763 // an alignment. In this case it will try to change all types to
764 // protein.
765 void x_AdjustSeqTypesToProt(const CSeq_id_Handle& idh);
766
767 // Get sequence length, try to get the real length for
768 // reverse strand, do not use "whole".
769 TSeqPos x_GetRangeLength(const CSeq_loc_CI& it);
770
771 // Initialize the mapper from different alignment types.
772 void x_InitAlign(const CDense_diag& diag, size_t to_row, size_t from_row);
773 void x_InitAlign(const CDense_seg& denseg, size_t to_row, size_t from_row);
774 void x_InitAlign(const CStd_seg& sseg, size_t to_row);
775 void x_InitAlign(const CPacked_seg& pseg, size_t to_row, size_t from_row);
776 void x_InitSpliced(const CSpliced_seg& spliced,
777 const TSynonyms& to_ids);
778 void x_InitSpliced(const CSpliced_seg& spliced, ESplicedRow to_row);
779 void x_InitSparse(const CSparse_seg& sparse, int to_row);
780
781 void x_IterateExonParts(const CSpliced_exon::TParts& parts,
782 ESplicedRow to_row,
783 const CSeq_id& gen_id,
784 TSeqPos& gen_start,
785 TSeqPos& gen_len,
786 ENa_strand gen_strand,
787 const CSeq_id& prod_id,
788 TSeqPos& prod_start,
789 TSeqPos& prod_len,
790 ENa_strand prod_strand);
791 void x_AddExonPartsMapping(TSeqPos& mapping_len,
792 ESplicedRow to_row,
793 const CSeq_id& gen_id,
794 TSeqPos& gen_start,
795 TSeqPos& gen_len,
796 ENa_strand gen_strand,
797 const CSeq_id& prod_id,
798 TSeqPos& prod_start,
799 TSeqPos& prod_len,
800 ENa_strand prod_strand);
801 // Helper method to simplify getting exon part length regardless of
802 // its type.
803 static TSeqPos sx_GetExonPartLength(const CSpliced_exon_chunk& part);
804
805 // Map a single range from source to destination.
806 bool x_MapNextRange(const TRange& src_rg,
807 bool is_set_strand,
808 ENa_strand src_strand,
809 const TRangeFuzz& src_fuzz,
810 TSortedMappings& mappings,
811 size_t cvt_idx,
812 TSeqPos* last_src_to);
813 // Map the interval through all matching mappings.
814 bool x_MapInterval(const CSeq_id& src_id,
815 TRange src_rg,
816 bool is_set_strand,
817 ENa_strand src_strand,
818 TRangeFuzz orig_fuzz);
819 // Set the flag to indicate that the last range was truncated
820 // during mapping.
821 void x_SetLastTruncated(void);
822
823 // Pushes the location to the destination seq-loc mix.
824 // See also x_PushRangesToDstMix.
825 void x_PushLocToDstMix(CRef<CSeq_loc> loc);
826
827 // Pushes NULL location to the destination mix (when a range
828 // can not be mapped).
829 void x_PushNullLoc(void);
830
831 // Map the alignment. If row is NULL, map all rows. Otherwise
832 // map only the selected row.
833 CRef<CSeq_align> x_MapSeq_align(const CSeq_align& src_align,
834 size_t* row);
835
836 // Get mapped ranges for the given id and strand index.
837 // See TRangesByStrand for strand indexing.
838 TMappedRanges& x_GetMappedRanges(const CSeq_id_Handle& id,
839 size_t strand_idx) const;
840 // Push mapped range to the list of mapped ranges. Try to merge the new
841 // range with the existing ones based on the selected merging mode.
842 void x_PushMappedRange(const CSeq_id_Handle& id,
843 size_t strand_idx,
844 const TRange& range,
845 const TRangeFuzz& fuzz,
846 bool push_reverse,
847 int group);
848 // Store the source range just mapped. Used only if storing source
849 // locations is enabled - see IncludeSourceLocs.
850 void x_PushSourceRange(const CSeq_id_Handle& idh,
851 size_t src_strand,
852 size_t dst_strand,
853 const TRange& range,
854 bool push_reverse);
855
856 // Convert mapped range data to a seq-loc (point or interval).
857 // Set fuzzes to indicate truncated range if necessary.
858 CRef<CSeq_loc> x_RangeToSeq_loc(const CSeq_id_Handle& idh,
859 TSeqPos from,
860 TSeqPos to,
861 size_t strand_idx,
862 TRangeFuzz rg_fuzz);
863
864 // Convert all collected and not yet converted mapped ranges to a seq-loc.
865 // May be called multiple times while mapping a complex location and
866 // storing its parts to a destination seq-loc mix (see
867 // x_PushRangesToDstMix).
868 CRef<CSeq_loc> x_GetMappedSeq_loc(void);
869
870 // For mix locations, we remove fuzz from in-between the parts.
871 void x_StripExtraneousFuzz(CRef<CSeq_loc>& loc) const;
872
873 // This removes fuzz of type "range" if any.
874 // Don't give this mix locations; it won't do anything.
875 CConstRef<CSeq_loc> x_FixNonsenseFuzz( CConstRef<CSeq_loc> loc_piece ) const;
876
877 // Try to optimize the mapped location if it's a mix.
878 // The allowed optimizations are:
879 // - empty mix is converted to Null
880 // - if the mix contains a single element, use just this element
881 // - if the mix contains only intervals, convert it to packed-int
882 // When mapping a complex location (e.g. a multi-level mix) each
883 // sub-location is optimized individually.
884 void x_OptimizeSeq_loc(CRef<CSeq_loc>& loc) const;
885
886 // Returns true if the new mapped range should be added to the
887 // existing mapped ranges in the reverse order (in the front).
888 // If merging is set to contained or all, used the provided strand
889 // index to check the order of ranges. For all other merging modes
890 // compares the directions of mapping source and target.
891 bool x_ReverseRangeOrder(int str) const;
892
893 // Map parts of a complex seq-loc.
894 void x_Map_PackedInt_Element(const CSeq_interval& si);
895 void x_Map_PackedPnt_Element(const CPacked_seqpnt& pp, TSeqPos p);
896
897 // Get main seq-id for a synonym. If no mapping exists, returns the
898 // original id.
899 const CSeq_id_Handle& x_GetPrimaryId(const CSeq_id_Handle& synonym) const;
900
901 // Check if the id is in the list of synonyms.
902 bool x_IsSynonym(const CSeq_id& id, const TSynonyms& synonyms) const;
903
904 typedef map<CSeq_id_Handle, CSeq_id_Handle> TSynonymMap;
905 typedef map<CSeq_id_Handle, TSeqPos> TLengthMap;
906
907 friend class CTotalRangeSynonymMapper;
908
909 // How to merge mapped locations.
910 EMergeFlags m_MergeFlag;
911 // How to treat gaps (Null sub-locations) if any.
912 EGapFlags m_GapFlag;
913
914 // Other mapping options.
915 enum EMiscFlags {
916 // Trim leading/trailing indels (gaps) from mapped spliced-seg alignments.
917 fTrimSplicedSegs = 1 << 0,
918 // Whether to keep or discard ranges which can not be mapped.
919 fKeepNonmapping = 1 << 1,
920 // Whether to check or not if the original location is on the same strand
921 // as the mapping source.
922 fCheckStrand = 1 << 2,
923 // Whether to include a source of each mapped range to the mapped seq-loc.
924 fIncludeSrcLocs = 1 << 3,
925 // Prefer spliced-seg for mixed alignments.
926 fMixedAlignsAsSpliced = 1 << 4,
927 // Treat any range truncation as an error (added for mapping to GC-Assembly
928 // aliases).
929 fErrorOnPartial = 1 << 5
930 };
931 typedef int TMiscFlags;
932
x_IsSetMiscFlag(EMiscFlags flag) const933 bool x_IsSetMiscFlag(EMiscFlags flag) const { return (m_MiscFlags & flag) == flag; }
934 void x_SetMiscFlag(EMiscFlags flag, bool value);
935
936 TMiscFlags m_MiscFlags;
937
938 // Mapped ranges collected from the currently parsed sub-location.
939 mutable TRangesById m_MappedLocs;
940 // Source locations for all mapped ranges.
941 CRef<CSeq_loc> m_SrcLocs;
942
943 // Collected ranges for mapped graph. Used to adjust mapped graph data.
944 CRef<CGraphRanges> m_GraphRanges;
945
946 // Map each synonym to a primary seq-id.
947 mutable TSynonymMap m_SynonymMap;
948
949 // Map each primary seq-id to sequence length.
950 mutable TLengthMap m_LengthMap;
951
952 protected:
953 // Storage for sequence types.
954 mutable TSeqTypeById m_SeqTypes;
955 // Flag indicating if the mapping truncated at least some ranges.
956 bool m_Partial;
957 // Flag indicating if the last range could not be mapped and was
958 // dropped.
959 bool m_LastTruncated;
960 // Mapping ranges grouped by source id and strand.
961 CRef<CMappingRanges> m_Mappings;
962 // Mapped seq-loc
963 CRef<CSeq_loc> m_Dst_loc;
964 // All ranges on the mapping destination.
965 TDstStrandMap m_DstRanges;
966 // Current mapping group. Incremented for each mapping sub-location
967 // (e.g. exon).
968 int m_CurrentGroup;
969 // Control how fuzz is generated and propagated
970 TFuzzOption m_FuzzOption;
971 // Misc mapping options
972 CSeq_loc_Mapper_Options m_MapOptions;
973
974 public:
975 // Initialize the mapper with default values
976 CSeq_loc_Mapper_Base(CSeq_loc_Mapper_Options options = CSeq_loc_Mapper_Options());
977
978 /// Methods for getting sequence types, use cached types (m_SeqTypes)
979 /// if possible.
980 ESeqType GetSeqTypeById(const CSeq_id_Handle& idh) const;
981 ESeqType GetSeqTypeById(const CSeq_id& id) const;
982 /// Methods for setting sequence types. May be used to populate the
983 /// cache before mapping huge alignments if the types are already
984 /// known. Throw exception if the sequence type is already set to
985 /// a different value.
986 /// NOTE: setting sequence type does not adjust mapping ranges for this
987 /// id. All mapping ranges must use genomic coordinates.
988 void SetSeqTypeById(const CSeq_id_Handle& idh, ESeqType seqtype) const;
989 void SetSeqTypeById(const CSeq_id& id, ESeqType seqtype) const;
990
991 /// Get sequence width. Return 3 for proteins, 1 for nucleotides and
992 /// unknown sequence types.
993 int GetWidthById(const CSeq_id_Handle& idh) const;
994 int GetWidthById(const CSeq_id& id) const;
995
996 /// Get mapping ranges.
GetMappingRanges(void) const997 const CMappingRanges& GetMappingRanges(void) const { return *m_Mappings; }
998
999 /// NOTE: In most cases CollectSynonyms(const CSeq_id_Handle& id) should
1000 /// be used instead, since it takes care of synonym storage and mapping.
1001 /// This method does nothing but storing synonyms in the container.
1002 void CollectSynonyms(const CSeq_id_Handle& id, TSynonyms& synonyms) const;
1003 // Check if ranges which can not be mapped should be replaced with NULL
1004 // locations. By default removed ranges are reported using neighbor's fuzz.
1005 // The flag is controlled from environment/registry:
1006 // MAPPER_NONMAPPING_AS_NULL=t
1007 // [Mapper]/Nonmapping_As_Null=t
1008 static bool GetNonMappingAsNull(void);
1009 };
1010
1011
1012 /////////////////////////////////////////////////////////////////////////////
1013 ///
1014 /// IMapper_Sequence_Info
1015 ///
1016 /// Interface for providing sequence information to CSeq_loc_Mapper_Base.
1017 /// Returns information about sequence type, length and synonyms.
1018
1019 class IMapper_Sequence_Info : public CObject
1020 {
1021 public:
1022 typedef CSeq_loc_Mapper_Base::ESeqType TSeqType;
1023 typedef CSeq_loc_Mapper_Base::TSynonyms TSynonyms;
1024
1025 /// Get information about sequence type (nuc or prot).
1026 virtual TSeqType GetSequenceType(const CSeq_id_Handle& idh) = 0;
1027
1028 /// Get sequence length or kInvalidSeqPos.
1029 virtual TSeqPos GetSequenceLength(const CSeq_id_Handle& idh) = 0;
1030
1031 /// Collect all synonyms for the id including the id itself.
1032 /// Any derived class must add at least the original id to the collection.
1033 virtual void CollectSynonyms(const CSeq_id_Handle& id,
1034 TSynonyms& synonyms) = 0;
1035 };
1036
1037
1038 /////////////////////////////////////////////////////////////////////////////
1039 ///
1040 /// CSeq_loc_Mapper_Message
1041 ///
1042 /// Class used to report CSeq_loc_Mapper_Base issues through
1043 /// IMessageListener.
1044 class NCBI_SEQ_EXPORT CSeq_loc_Mapper_Message : public CMessage_Basic
1045 {
1046 public:
1047 CSeq_loc_Mapper_Message(const string& msg,
1048 EDiagSev sev,
1049 int err_code = 0,
1050 int sub_code = 0);
1051 virtual ~CSeq_loc_Mapper_Message(void);
1052
1053 virtual CSeq_loc_Mapper_Message* Clone(void) const;
1054 virtual void Write(CNcbiOstream& out) const;
1055
1056 enum EObjectType {
1057 eNot_set,
1058 eSeq_loc,
1059 eSeq_feat,
1060 eSeq_align,
1061 eSeq_graph
1062 };
1063
1064 /// Check type of the object stored in the message.
Which(void) const1065 EObjectType Which(void) const { return m_ObjType; }
1066
1067 /// Set seq-loc object (copy into the message).
1068 void SetLoc(const CSeq_loc& loc);
1069 /// Get seq-loc object or null.
1070 const CSeq_loc* GetLoc(void) const;
1071
1072 /// Set seq-feat object (copy into the message).
1073 void SetFeat(const CSeq_feat& feat);
1074 /// Get seq-feat object or null.
1075 const CSeq_feat* GetFeat(void) const;
1076
1077 /// Set seq-align object (copy into the message).
1078 void SetAlign(const CSeq_align& align);
1079 /// Get seq-align object or null.
1080 const CSeq_align* GetAlign(void) const;
1081
1082 /// Set seq-graph object (copy into the message).
1083 void SetGraph(const CSeq_graph& graph);
1084 /// Get seq-graph object or null.
1085 const CSeq_graph* GetGraph(void) const;
1086
1087 /// Set the stored object to null.
1088 void ResetObject(void);
1089
1090 private:
1091 EObjectType m_ObjType;
1092
1093 CRef<CObject> m_Obj;
1094 };
1095
1096
1097 struct CMappingRangeRef_Less
1098 {
1099 bool operator()(const CRef<CMappingRange>& x,
1100 const CRef<CMappingRange>& y) const;
1101 };
1102
1103
1104 struct CMappingRangeRef_LessRev
1105 {
1106 bool operator()(const CRef<CMappingRange>& x,
1107 const CRef<CMappingRange>& y) const;
1108 };
1109
1110
1111 inline
operator ()(const CRef<CMappingRange> & x,const CRef<CMappingRange> & y) const1112 bool CMappingRangeRef_Less::operator()(const CRef<CMappingRange>& x,
1113 const CRef<CMappingRange>& y) const
1114 {
1115 // Leftmost first
1116 if (x->m_Src_from != y->m_Src_from) {
1117 return x->m_Src_from < y->m_Src_from;
1118 }
1119 // Longest first
1120 if (x->m_Src_to != y->m_Src_to) {
1121 return x->m_Src_to > y->m_Src_to;
1122 }
1123 return x < y;
1124 }
1125
1126
1127 inline
operator ()(const CRef<CMappingRange> & x,const CRef<CMappingRange> & y) const1128 bool CMappingRangeRef_LessRev::operator()(const CRef<CMappingRange>& x,
1129 const CRef<CMappingRange>& y) const
1130 {
1131 // Rightmost first
1132 if (x->m_Src_to != y->m_Src_to) {
1133 return x->m_Src_to > y->m_Src_to;
1134 }
1135 // Longest first
1136 if (x->m_Src_from != y->m_Src_from) {
1137 return x->m_Src_from < y->m_Src_from;
1138 }
1139 return x > y;
1140 }
1141
1142
1143 inline
GoodSrcId(const CSeq_id & id) const1144 bool CMappingRange::GoodSrcId(const CSeq_id& id) const
1145 {
1146 return m_Src_id_Handle == id;
1147 }
1148
1149
1150 inline
GetDstId(void) const1151 CRef<CSeq_id> CMappingRange::GetDstId(void) const
1152 {
1153 return m_Dst_id_Handle ?
1154 Ref(&const_cast<CSeq_id&>(*m_Dst_id_Handle.GetSeqId())) :
1155 CRef<CSeq_id>(0);
1156 }
1157
1158
1159 inline
SetMergeNone(void)1160 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeNone(void)
1161 {
1162 m_MergeFlag = eMergeNone;
1163 return *this;
1164 }
1165
1166
1167 inline
SetMergeAbutting(void)1168 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeAbutting(void)
1169 {
1170 m_MergeFlag = eMergeAbutting;
1171 return *this;
1172 }
1173
1174
1175 inline
SetMergeBySeg(void)1176 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeBySeg(void)
1177 {
1178 m_MergeFlag = eMergeBySeg;
1179 return *this;
1180 }
1181
1182
1183 inline
SetMergeContained(void)1184 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeContained(void)
1185 {
1186 m_MergeFlag = eMergeContained;
1187 return *this;
1188 }
1189
1190
1191 inline
SetMergeAll(void)1192 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetMergeAll(void)
1193 {
1194 m_MergeFlag = eMergeAll;
1195 return *this;
1196 }
1197
1198
1199 inline
SetGapPreserve(void)1200 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetGapPreserve(void)
1201 {
1202 m_GapFlag = eGapPreserve;
1203 return *this;
1204 }
1205
1206
1207 inline
SetGapRemove(void)1208 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetGapRemove(void)
1209 {
1210 m_GapFlag = eGapRemove;
1211 return *this;
1212 }
1213
1214
1215 inline
SetTrimSplicedSeg(bool trim)1216 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetTrimSplicedSeg(bool trim)
1217 {
1218 x_SetMiscFlag(fTrimSplicedSegs, trim);
1219 return *this;
1220 }
1221
1222
1223 inline
SetCheckStrand(bool value)1224 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetCheckStrand(bool value)
1225 {
1226 x_SetMiscFlag(fCheckStrand, value);
1227 return *this;
1228 }
1229
1230
1231 inline
LastIsPartial(void)1232 bool CSeq_loc_Mapper_Base::LastIsPartial(void)
1233 {
1234 return m_Partial;
1235 }
1236
1237
1238 inline
KeepNonmappingRanges(void)1239 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::KeepNonmappingRanges(void)
1240 {
1241 x_SetMiscFlag(fKeepNonmapping, true);
1242 return *this;
1243 }
1244
1245
1246 inline
TruncateNonmappingRanges(void)1247 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::TruncateNonmappingRanges(void)
1248 {
1249 x_SetMiscFlag(fKeepNonmapping, false);
1250 return *this;
1251 }
1252
1253
1254 inline
MixedAlignsAsSpliced(bool value)1255 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::MixedAlignsAsSpliced(bool value)
1256 {
1257 x_SetMiscFlag(fMixedAlignsAsSpliced, value);
1258 return *this;
1259 }
1260
1261
1262 inline
IncludeSourceLocs(bool value)1263 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::IncludeSourceLocs(bool value)
1264 {
1265 x_SetMiscFlag(fIncludeSrcLocs, value);
1266 return *this;
1267 }
1268
1269
1270 inline
SetErrorOnPartial(bool value)1271 CSeq_loc_Mapper_Base& CSeq_loc_Mapper_Base::SetErrorOnPartial(bool value)
1272 {
1273 x_SetMiscFlag(fErrorOnPartial, value);
1274 return *this;
1275 }
1276
1277
1278 inline
Map(const CSeq_align & src_align)1279 CRef<CSeq_align> CSeq_loc_Mapper_Base::Map(const CSeq_align& src_align)
1280 {
1281 return x_MapSeq_align(src_align, 0);
1282 }
1283
1284
1285 inline
Map(const CSeq_align & src_align,size_t row)1286 CRef<CSeq_align> CSeq_loc_Mapper_Base::Map(const CSeq_align& src_align,
1287 size_t row)
1288 {
1289 return x_MapSeq_align(src_align, &row);
1290 }
1291
1292
1293 inline
1294 CSeq_loc_Mapper_Base::ESeqType
GetSeqTypeById(const CSeq_id_Handle & idh) const1295 CSeq_loc_Mapper_Base::GetSeqTypeById(const CSeq_id_Handle& idh) const
1296 {
1297 CSeq_id_Handle primary_id = CollectSynonyms(idh);
1298 TSeqTypeById::const_iterator it = m_SeqTypes.find(primary_id);
1299 if (it != m_SeqTypes.end()) {
1300 return it->second;
1301 }
1302 return GetSeqType(primary_id);
1303 }
1304
1305
1306 inline
1307 CSeq_loc_Mapper_Base::ESeqType
GetSeqTypeById(const CSeq_id & id) const1308 CSeq_loc_Mapper_Base::GetSeqTypeById(const CSeq_id& id) const
1309 {
1310 return GetSeqTypeById(CSeq_id_Handle::GetHandle(id));
1311 }
1312
1313
1314 inline
SetSeqTypeById(const CSeq_id & id,ESeqType seqtype) const1315 void CSeq_loc_Mapper_Base::SetSeqTypeById(const CSeq_id& id,
1316 ESeqType seqtype) const
1317 {
1318 SetSeqTypeById(CSeq_id_Handle::GetHandle(id), seqtype);
1319 }
1320
1321
1322 inline
GetWidthById(const CSeq_id_Handle & idh) const1323 int CSeq_loc_Mapper_Base::GetWidthById(const CSeq_id_Handle& idh) const
1324 {
1325 return (GetSeqTypeById(idh) == eSeq_prot) ? 3 : 1;
1326 }
1327
1328
1329 inline
GetWidthById(const CSeq_id & id) const1330 int CSeq_loc_Mapper_Base::GetWidthById(const CSeq_id& id) const
1331 {
1332 return GetWidthById(CSeq_id_Handle::GetHandle(id));
1333 }
1334
1335
1336 inline
CSeq_loc_Mapper_Options(void)1337 CSeq_loc_Mapper_Options::CSeq_loc_Mapper_Options(void)
1338 : m_SeqInfo(0), m_Options(0) {}
1339
1340 inline
CSeq_loc_Mapper_Options(IMapper_Sequence_Info * seq_info,TMapOptions opts)1341 CSeq_loc_Mapper_Options::CSeq_loc_Mapper_Options(IMapper_Sequence_Info* seq_info,
1342 TMapOptions opts)
1343 : m_SeqInfo(seq_info), m_Options(opts) {}
1344
1345 inline
CSeq_loc_Mapper_Options(TMapOptions opts)1346 CSeq_loc_Mapper_Options::CSeq_loc_Mapper_Options(TMapOptions opts)
1347 : m_SeqInfo(0), m_Options(opts) {}
1348
1349 inline
1350 IMapper_Sequence_Info*
GetMapperSequenceInfo(void) const1351 CSeq_loc_Mapper_Options::GetMapperSequenceInfo(void) const
1352 {
1353 return m_SeqInfo;
1354 }
1355
1356 inline
1357 CSeq_loc_Mapper_Options&
SetMapperSequenceInfo(IMapper_Sequence_Info * seq_info)1358 CSeq_loc_Mapper_Options::SetMapperSequenceInfo(IMapper_Sequence_Info* seq_info)
1359 {
1360 m_SeqInfo = seq_info;
1361 return *this;
1362 }
1363
1364 inline
GetAlign_Dense_seg_TotalRange(void) const1365 bool CSeq_loc_Mapper_Options::GetAlign_Dense_seg_TotalRange(void) const
1366 {
1367 return x_IsSetOption(CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange);
1368 }
1369
1370 inline
1371 CSeq_loc_Mapper_Options&
SetAlign_Dense_seg_TotalRange(bool value)1372 CSeq_loc_Mapper_Options::SetAlign_Dense_seg_TotalRange(bool value)
1373 {
1374 x_SetOption(CSeq_loc_Mapper_Base::fAlign_Dense_seg_TotalRange, value);
1375 return *this;
1376 }
1377
1378 inline
GetAlign_Sparse_ToFirst(void) const1379 bool CSeq_loc_Mapper_Options::GetAlign_Sparse_ToFirst(void) const
1380 {
1381 return !x_IsSetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond);
1382 }
1383
1384 inline
GetAlign_Sparse_ToSecond(void) const1385 bool CSeq_loc_Mapper_Options::GetAlign_Sparse_ToSecond(void) const
1386 {
1387 return x_IsSetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond);
1388 }
1389
1390 inline
1391 CSeq_loc_Mapper_Options&
SetAlign_Sparse_ToFirst(bool value)1392 CSeq_loc_Mapper_Options::SetAlign_Sparse_ToFirst(bool value)
1393 {
1394 x_SetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond, !value);
1395 return *this;
1396 }
1397
1398 inline
1399 CSeq_loc_Mapper_Options&
SetAlign_Sparse_ToSecond(bool value)1400 CSeq_loc_Mapper_Options::SetAlign_Sparse_ToSecond(bool value)
1401 {
1402 x_SetOption(CSeq_loc_Mapper_Base::fAlign_Sparse_ToSecond, value);
1403 return *this;
1404 }
1405
1406 inline
GetMapSingleLevel(void) const1407 bool CSeq_loc_Mapper_Options::GetMapSingleLevel(void) const
1408 {
1409 return x_IsSetOption(CSeq_loc_Mapper_Base::fMapSingleLevel);
1410 }
1411
1412 inline
1413 CSeq_loc_Mapper_Options&
SetMapSingleLevel(bool value)1414 CSeq_loc_Mapper_Options::SetMapSingleLevel(bool value)
1415 {
1416 x_SetOption(CSeq_loc_Mapper_Base::fMapSingleLevel, value);
1417 return *this;
1418 }
1419
1420 inline
GetTrimMappedLocation(void) const1421 bool CSeq_loc_Mapper_Options::GetTrimMappedLocation(void) const
1422 {
1423 return x_IsSetOption(CSeq_loc_Mapper_Base::fTrimMappedLocation);
1424 }
1425
1426 inline
1427 CSeq_loc_Mapper_Options&
SetTrimMappedLocation(bool value)1428 CSeq_loc_Mapper_Options::SetTrimMappedLocation(bool value)
1429 {
1430 x_SetOption(CSeq_loc_Mapper_Base::fTrimMappedLocation, value);
1431 return *this;
1432 }
1433
1434 inline
x_IsSetOption(int opt) const1435 bool CSeq_loc_Mapper_Options::x_IsSetOption(int opt) const
1436 {
1437 return (m_Options & opt) != 0;
1438 }
1439
1440 inline
x_SetOption(int opt,bool enable)1441 void CSeq_loc_Mapper_Options::x_SetOption(int opt, bool enable)
1442 {
1443 if ( enable ) {
1444 m_Options |= opt;
1445 }
1446 else {
1447 m_Options &= ~opt;
1448 }
1449 }
1450
1451
1452 /* @} */
1453
1454
1455 END_SCOPE(objects)
1456 END_NCBI_SCOPE
1457
1458 #endif // SEQ_LOC_MAPPER_BASE__HPP
1459