1 #ifndef SEQ_LOC_CVT__HPP
2 #define SEQ_LOC_CVT__HPP
3 
4 /*  $Id: seq_loc_cvt.hpp 486734 2015-12-09 19:13:21Z grichenk $
5 * ===========================================================================
6 *
7 *                            PUBLIC DOMAIN NOTICE
8 *               National Center for Biotechnology Information
9 *
10 *  This software/database is a "United States Government Work" under the
11 *  terms of the United States Copyright Act.  It was written as part of
12 *  the author's official duties as a United States Government employee and
13 *  thus cannot be copyrighted.  This software/database is freely available
14 *  to the public for use. The National Library of Medicine and the U.S.
15 *  Government have not placed any restriction on its use or reproduction.
16 *
17 *  Although all reasonable efforts have been taken to ensure the accuracy
18 *  and reliability of the software and data, the NLM and the U.S.
19 *  Government do not and cannot warrant the performance or results that
20 *  may be obtained by using this software or data. The NLM and the U.S.
21 *  Government disclaim all warranties, express or implied, including
22 *  warranties of performance, merchantability or fitness for any particular
23 *  purpose.
24 *
25 *  Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko, Michael Kimelman, Eugene Vasilchenko
30 *
31 * File Description:
32 *   Object manager iterators
33 *
34 */
35 
36 #include <corelib/ncbiobj.hpp>
37 
38 #include <util/range.hpp>
39 #include <util/rangemap.hpp>
40 
41 #include <objects/seq/seq_id_handle.hpp>
42 #include <objects/seq/seq_loc_mapper_base.hpp>
43 #include <objmgr/impl/heap_scope.hpp>
44 
45 #include <objects/seqloc/Na_strand.hpp>
46 #include <objects/seqloc/Seq_loc.hpp>
47 #include <objects/seqloc/Seq_point.hpp>
48 #include <objects/seqloc/Seq_interval.hpp>
49 
50 BEGIN_NCBI_SCOPE
51 BEGIN_SCOPE(objects)
52 
53 class CSeqMap_CI;
54 class CScope;
55 class CSeq_align_Mapper;
56 class CAnnotObject_Ref;
57 
58 class CSeq_id;
59 class CSeq_loc;
60 class CSeq_interval;
61 class CSeq_point;
62 class CInt_fuzz;
63 
64 class CSeq_feat;
65 class CSeq_align;
66 class CDense_seg;
67 class CPacked_seg;
68 class CSeq_align_set;
69 struct SAnnotObject_Index;
70 
71 
72 /////////////////////////////////////////////////////////////////////////////
73 // CSeq_loc_Conversion
74 /////////////////////////////////////////////////////////////////////////////
75 
76 class NCBI_XOBJMGR_EXPORT CSeq_loc_Conversion : public CObject
77 {
78 public:
79     typedef CRange<TSeqPos> TRange;
80 
81     // Create conversion based on a seq-map segment
82     CSeq_loc_Conversion(CSeq_loc&             master_loc_empty,
83                         const CSeq_id_Handle& dst_id,
84                         const CSeqMap_CI&     seg,
85                         const CSeq_id_Handle& src_id,
86                         CScope*               scope);
87     // Create conversion based on ranges and IDs
88     CSeq_loc_Conversion(CSeq_loc&             master_loc_empty,
89                         const CSeq_id_Handle& dst_id,
90                         const TRange&         dst_rg,
91                         const CSeq_id_Handle& src_id,
92                         TSeqPos               src_start,
93                         bool                  reverse,
94                         CScope*               scope);
95 
96     ~CSeq_loc_Conversion(void);
97 
98     // Add mapping from current destination through one more conversion
99     // The new destination becomes the one of cvt, range may be truncated.
100     void CombineWith(CSeq_loc_Conversion& cvt);
101 
102     TSeqPos ConvertPos(TSeqPos src_pos);
103 
104     bool GoodSrcId(const CSeq_id& id);
MinusStrand(void) const105     bool MinusStrand(void) const
106         {
107             return m_Reverse;
108         }
109 
110     void ConvertSimpleLoc(const CSeq_id_Handle& src_id,
111                           const CRange<TSeqPos> src_range,
112                           const SAnnotObject_Index& src_index);
113     bool ConvertPoint(TSeqPos src_pos, ENa_strand src_strand);
114     bool ConvertPoint(const CSeq_point& src);
115 
116     bool ConvertInterval(TSeqPos src_from, TSeqPos src_to,
117                          ENa_strand src_strand);
118     bool ConvertInterval(const CSeq_interval& src);
119 
120     void ConvertFeature(CAnnotObject_Ref& ref,
121                         const CSeq_feat& orig_feat,
122                         CRef<CSeq_feat>& mapped_feat);
123     void ConvertCdregion(CAnnotObject_Ref& ref,
124                          const CSeq_feat& orig_feat,
125                          CRef<CSeq_feat>& mapped_feat);
126     void ConvertRna(CAnnotObject_Ref& ref,
127                     const CSeq_feat& orig_feat,
128                     CRef<CSeq_feat>& mapped_feat);
129 
130     CConstRef<CInt_fuzz> ReverseFuzz(const CInt_fuzz& fuzz) const;
131 
132     enum EConvertFlag {
133         eCnvDefault,
134         eCnvAlways
135     };
136     enum ELocationType {
137         eLocation,
138         eProduct
139     };
140 
141     bool Convert(const CSeq_loc& src, CRef<CSeq_loc>& dst,
142                  EConvertFlag flag = eCnvDefault);
143 
144     void Reset(void);
145     void ResetKeepPartial(void);
146 
IsPartial(void) const147     bool IsPartial(void) const
148         {
149             return m_Partial;
150         }
HasUnconvertedId(void) const151     bool HasUnconvertedId(void) const
152         {
153             return m_PartialHasUnconvertedId;
154         }
155 
SetSrcId(const CSeq_id_Handle & src)156     void SetSrcId(const CSeq_id_Handle& src)
157         {
158             m_Src_id_Handle = src;
159         }
160     void SetConversion(const CSeqMap_CI& seg);
161 
GetSrc_id_Handle(void) const162     const CSeq_id_Handle& GetSrc_id_Handle(void) const
163         {
164             return m_Src_id_Handle;
165         }
GetSrc_from(void) const166     TSeqPos GetSrc_from(void) const
167         {
168             return m_Src_from;
169         }
GetSrc_to(void) const170     TSeqPos GetSrc_to(void) const
171         {
172             return m_Src_to;
173         }
174 
GetTotalRange(void) const175     const TRange& GetTotalRange(void) const
176         {
177             return m_TotalRange;
178         }
179 
180     ENa_strand ConvertStrand(ENa_strand strand) const;
181 
182     void SetMappedLocation(CAnnotObject_Ref& ref, ELocationType loctype);
183     void MakeDstMix(CSeq_loc_mix& dst, const CSeq_loc_mix& src) const;
184 
GetDst_id_Handle(void) const185     const CSeq_id_Handle& GetDst_id_Handle(void) const
186         {
187             return m_Dst_id_Handle;
188         }
GetId(void) const189     const CSeq_id& GetId(void) const
190         {
191             return m_Dst_loc_Empty->GetEmpty();
192         }
193 
194 protected:
195     friend class CAnnot_Collector;
196 
197     void Convert(CAnnotObject_Ref& obj,
198                  ELocationType loctype);
199     void Convert(CAnnotObject_Ref& ref,
200                  ELocationType loctype,
201                  const CSeq_id_Handle& id,
202                  const CRange<TSeqPos>& range,
203                  const SAnnotObject_Index& index);
204 
205 private:
206     void CheckDstInterval(void);
207     void CheckDstPoint(void);
208     void CheckDstMix(void);
209 
210     CRef<CSeq_interval> GetDstInterval(void);
211     CRef<CSeq_point> GetDstPoint(void);
212     CRef<CSeq_loc_mix> GetDstMix(void);
213 
214     void SetDstLoc(CRef<CSeq_loc>& loc);
215 
216     bool IsSpecialLoc(void) const;
217 
GetDstLocEmpty(void)218     CSeq_loc& GetDstLocEmpty(void)
219         {
220             return *m_Dst_loc_Empty;
221         }
GetDstId(void)222     CSeq_id& GetDstId(void)
223         {
224             return m_Dst_loc_Empty->SetEmpty();
225         }
226 
GetDstRange(void)227     TRange GetDstRange(void)
228         {
229             return m_Reverse ?
230                 TRange(ConvertPos(m_Src_to), ConvertPos(m_Src_from)) :
231                 TRange(ConvertPos(m_Src_from), ConvertPos(m_Src_to));
232         }
GetSrcRange(void) const233     TRange GetSrcRange(void) const
234         {
235             return TRange(m_Src_from, m_Src_to);
236         }
237 
238     void ConvertPacked_int(const CSeq_loc& src, CRef<CSeq_loc>& dst);
239     void ConvertPacked_pnt(const CSeq_loc& src, CRef<CSeq_loc>& dst);
240     bool ConvertSimpleMix(const CSeq_loc& src);
241     void ConvertMix(const CSeq_loc& src, CRef<CSeq_loc>& dst,
242                     EConvertFlag flag = eCnvDefault);
243     void ConvertEquiv(const CSeq_loc& src, CRef<CSeq_loc>& dst);
244     void ConvertBond(const CSeq_loc& src, CRef<CSeq_loc>& dst);
245 
246     static CSeq_loc_mix::Tdata* s_ConvertToMix(CRef<CSeq_loc>& loc);
247 
248     // Translation parameters:
249     //   Source id and bounds:
250     CSeq_id_Handle m_Src_id_Handle;
251     TSeqPos        m_Src_from;
252     TSeqPos        m_Src_to;
253 
254     //   Source to destination shift:
255     TSignedSeqPos  m_Shift;
256     bool           m_Reverse;
257 
258     //   Destination id:
259     CSeq_id_Handle m_Dst_id_Handle;
260     CRef<CSeq_loc> m_Dst_loc_Empty;
261 
262     // Results:
263     //   Cumulative results on destination:
264     TRange         m_TotalRange;
265     bool           m_Partial;
266     bool           m_PartialHasUnconvertedId;
267 
268     // Separate flags for left and right truncations of each interval
269     enum EPartialFlag {
270         fPartial_from = 1 << 0, // the interval is partial on the left
271         fPartial_to   = 1 << 1  // the interval is partial on the right
272     };
273     typedef int TPartialFlag;
274 
275     TPartialFlag m_PartialFlag;
276     CConstRef<CInt_fuzz> m_DstFuzz_from;
277     CConstRef<CInt_fuzz> m_DstFuzz_to;
278 
279     //   Last Point, Interval or other simple location's conversion result:
280     enum EMappedObjectType {
281         eMappedObjType_not_set,
282         eMappedObjType_Seq_loc,
283         eMappedObjType_Seq_point,
284         eMappedObjType_Seq_interval,
285         eMappedObjType_Seq_loc_mix
286     };
287     EMappedObjectType m_LastType;
288     TRange         m_LastRange;
289     ENa_strand     m_LastStrand;
290     CConstRef<CSeq_loc> m_SrcLoc;
291 
292     // Scope for id resolution:
293     CHeapScope     m_Scope;
294 
295     CRef<CGraphRanges> m_GraphRanges;
296 
297     friend class CSeq_loc_Conversion_Set;
298     friend class CSeq_align_Mapper;
299     friend struct CConversionRef_Less;
300 };
301 
302 
303 class NCBI_XOBJMGR_EXPORT CSeq_loc_Conversion_Set : public CObject
304 {
305 public:
306     CSeq_loc_Conversion_Set(CHeapScope& scope);
307 
308     typedef CRange<TSeqPos> TRange;
309     typedef CRangeMultimap<CRef<CSeq_loc_Conversion>, TSeqPos> TRangeMap;
310     typedef TRangeMap::iterator TRangeIterator;
311     typedef map<CSeq_id_Handle, TRangeMap> TIdMap;
312 
313     enum {
314         // special index value meaning that
315         // the conversion should be applied to all location indexes
316         kAllIndexes = kMax_UInt
317     };
318 
319     // Conversions by location index
320     typedef map<unsigned int, TIdMap> TConvByIndex;
321 
322     void Add(CSeq_loc_Conversion& cvt, unsigned int loc_index);
323     TRangeIterator BeginRanges(CSeq_id_Handle id,
324                                TSeqPos from,
325                                TSeqPos to,
326                                unsigned int loc_index);
327     void Convert(CAnnotObject_Ref& obj,
328                  CSeq_loc_Conversion::ELocationType loctype);
329     bool Convert(const CSeq_loc& src,
330                  CRef<CSeq_loc>& dst,
331                  unsigned int loc_index);
332     void Convert(const CSeq_align& src, CRef<CSeq_align>& dst);
333 
334     void Reset(void);
IsPartial(void) const335     bool IsPartial(void) const
336         {
337             return m_Partial;
338         }
HasUnconvertedId(void) const339     bool HasUnconvertedId(void) const
340         {
341             return m_PartialHasUnconvertedId;
342         }
343 
344     typedef set<CSeq_id_Handle> TSeq_id_Handles;
GetDst_id_Handles(void) const345     const TSeq_id_Handles& GetDst_id_Handles(void) const
346         {
347             return m_Dst_id_Handles;
348         }
349 
350 private:
351     friend class CSeq_align_Mapper;
352 
353     void x_Add(CSeq_loc_Conversion& cvt, unsigned int loc_index);
354 
355     bool ConvertPoint(const CSeq_point& src,
356                       CRef<CSeq_loc>& dst,
357                       unsigned int loc_index);
358     bool ConvertInterval(const CSeq_interval& src,
359                          CRef<CSeq_loc>& dst,
360                          unsigned int loc_index);
361 
362     bool ConvertPacked_int(const CSeq_loc& src,
363                            CRef<CSeq_loc>& dst,
364                            unsigned int loc_index);
365     bool ConvertPacked_pnt(const CSeq_loc& src,
366                            CRef<CSeq_loc>& dst,
367                            unsigned int loc_index);
368     bool ConvertMix(const CSeq_loc& src,
369                     CRef<CSeq_loc>& dst,
370                     unsigned int loc_index);
371     bool ConvertEquiv(const CSeq_loc& src,
372                       CRef<CSeq_loc>& dst,
373                       unsigned int loc_index);
374     bool ConvertBond(const CSeq_loc& src,
375                      CRef<CSeq_loc>& dst,
376                      unsigned int loc_index);
377     void ConvertFeature(CAnnotObject_Ref& ref,
378                         const CSeq_feat& orig_feat,
379                         CRef<CSeq_feat>& mapped_feat);
380     void ConvertCdregion(CAnnotObject_Ref& ref,
381                          const CSeq_feat& orig_feat,
382                          CRef<CSeq_feat>& mapped_feat);
383     void ConvertRna(CAnnotObject_Ref& ref,
384                     const CSeq_feat& orig_feat,
385                     CRef<CSeq_feat>& mapped_feat);
386 
387     CRef<CSeq_loc_Conversion> m_SingleConv;
388     unsigned int              m_SingleIndex;
389     TConvByIndex m_CvtByIndex;
390     TSeq_id_Handles m_Dst_id_Handles;
391     bool         m_Partial;
392     bool         m_PartialHasUnconvertedId;
393     TRange       m_TotalRange;
394     CHeapScope   m_Scope;
395 
396     CRef<CGraphRanges> m_GraphRanges;
397 };
398 
399 
400 inline
IsSpecialLoc(void) const401 bool CSeq_loc_Conversion::IsSpecialLoc(void) const
402 {
403     return m_LastType >= eMappedObjType_Seq_point;
404 }
405 
406 
407 inline
ConvertPos(TSeqPos src_pos)408 TSeqPos CSeq_loc_Conversion::ConvertPos(TSeqPos src_pos)
409 {
410     if ( src_pos < m_Src_from || src_pos > m_Src_to ) {
411         m_Partial = true;
412         return kInvalidSeqPos;
413     }
414     TSeqPos dst_pos;
415     if ( !m_Reverse ) {
416         dst_pos = m_Shift + src_pos;
417     }
418     else {
419         dst_pos = m_Shift - src_pos;
420     }
421     return dst_pos;
422 }
423 
424 
425 inline
GoodSrcId(const CSeq_id & id)426 bool CSeq_loc_Conversion::GoodSrcId(const CSeq_id& id)
427 {
428     if ( m_Src_id_Handle == id ) {
429         return true;
430     }
431     m_Partial = m_PartialHasUnconvertedId = true;
432     return false;
433 }
434 
435 
436 inline
ConvertStrand(ENa_strand strand) const437 ENa_strand CSeq_loc_Conversion::ConvertStrand(ENa_strand strand) const
438 {
439     if ( m_Reverse ) {
440         strand = Reverse(strand);
441     }
442     return strand;
443 }
444 
445 
446 END_SCOPE(objects)
447 END_NCBI_SCOPE
448 
449 #endif  // ANNOT_TYPES_CI__HPP
450