1 #ifndef SEQ_LOC_CVT__HPP
2 #define SEQ_LOC_CVT__HPP
3
4 /* $Id: seq_loc_cvt.hpp 486734 2015-12-09 19:13:21Z grichenk $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko, Michael Kimelman, Eugene Vasilchenko
30 *
31 * File Description:
32 * Object manager iterators
33 *
34 */
35
36 #include <corelib/ncbiobj.hpp>
37
38 #include <util/range.hpp>
39 #include <util/rangemap.hpp>
40
41 #include <objects/seq/seq_id_handle.hpp>
42 #include <objects/seq/seq_loc_mapper_base.hpp>
43 #include <objmgr/impl/heap_scope.hpp>
44
45 #include <objects/seqloc/Na_strand.hpp>
46 #include <objects/seqloc/Seq_loc.hpp>
47 #include <objects/seqloc/Seq_point.hpp>
48 #include <objects/seqloc/Seq_interval.hpp>
49
50 BEGIN_NCBI_SCOPE
51 BEGIN_SCOPE(objects)
52
53 class CSeqMap_CI;
54 class CScope;
55 class CSeq_align_Mapper;
56 class CAnnotObject_Ref;
57
58 class CSeq_id;
59 class CSeq_loc;
60 class CSeq_interval;
61 class CSeq_point;
62 class CInt_fuzz;
63
64 class CSeq_feat;
65 class CSeq_align;
66 class CDense_seg;
67 class CPacked_seg;
68 class CSeq_align_set;
69 struct SAnnotObject_Index;
70
71
72 /////////////////////////////////////////////////////////////////////////////
73 // CSeq_loc_Conversion
74 /////////////////////////////////////////////////////////////////////////////
75
76 class NCBI_XOBJMGR_EXPORT CSeq_loc_Conversion : public CObject
77 {
78 public:
79 typedef CRange<TSeqPos> TRange;
80
81 // Create conversion based on a seq-map segment
82 CSeq_loc_Conversion(CSeq_loc& master_loc_empty,
83 const CSeq_id_Handle& dst_id,
84 const CSeqMap_CI& seg,
85 const CSeq_id_Handle& src_id,
86 CScope* scope);
87 // Create conversion based on ranges and IDs
88 CSeq_loc_Conversion(CSeq_loc& master_loc_empty,
89 const CSeq_id_Handle& dst_id,
90 const TRange& dst_rg,
91 const CSeq_id_Handle& src_id,
92 TSeqPos src_start,
93 bool reverse,
94 CScope* scope);
95
96 ~CSeq_loc_Conversion(void);
97
98 // Add mapping from current destination through one more conversion
99 // The new destination becomes the one of cvt, range may be truncated.
100 void CombineWith(CSeq_loc_Conversion& cvt);
101
102 TSeqPos ConvertPos(TSeqPos src_pos);
103
104 bool GoodSrcId(const CSeq_id& id);
MinusStrand(void) const105 bool MinusStrand(void) const
106 {
107 return m_Reverse;
108 }
109
110 void ConvertSimpleLoc(const CSeq_id_Handle& src_id,
111 const CRange<TSeqPos> src_range,
112 const SAnnotObject_Index& src_index);
113 bool ConvertPoint(TSeqPos src_pos, ENa_strand src_strand);
114 bool ConvertPoint(const CSeq_point& src);
115
116 bool ConvertInterval(TSeqPos src_from, TSeqPos src_to,
117 ENa_strand src_strand);
118 bool ConvertInterval(const CSeq_interval& src);
119
120 void ConvertFeature(CAnnotObject_Ref& ref,
121 const CSeq_feat& orig_feat,
122 CRef<CSeq_feat>& mapped_feat);
123 void ConvertCdregion(CAnnotObject_Ref& ref,
124 const CSeq_feat& orig_feat,
125 CRef<CSeq_feat>& mapped_feat);
126 void ConvertRna(CAnnotObject_Ref& ref,
127 const CSeq_feat& orig_feat,
128 CRef<CSeq_feat>& mapped_feat);
129
130 CConstRef<CInt_fuzz> ReverseFuzz(const CInt_fuzz& fuzz) const;
131
132 enum EConvertFlag {
133 eCnvDefault,
134 eCnvAlways
135 };
136 enum ELocationType {
137 eLocation,
138 eProduct
139 };
140
141 bool Convert(const CSeq_loc& src, CRef<CSeq_loc>& dst,
142 EConvertFlag flag = eCnvDefault);
143
144 void Reset(void);
145 void ResetKeepPartial(void);
146
IsPartial(void) const147 bool IsPartial(void) const
148 {
149 return m_Partial;
150 }
HasUnconvertedId(void) const151 bool HasUnconvertedId(void) const
152 {
153 return m_PartialHasUnconvertedId;
154 }
155
SetSrcId(const CSeq_id_Handle & src)156 void SetSrcId(const CSeq_id_Handle& src)
157 {
158 m_Src_id_Handle = src;
159 }
160 void SetConversion(const CSeqMap_CI& seg);
161
GetSrc_id_Handle(void) const162 const CSeq_id_Handle& GetSrc_id_Handle(void) const
163 {
164 return m_Src_id_Handle;
165 }
GetSrc_from(void) const166 TSeqPos GetSrc_from(void) const
167 {
168 return m_Src_from;
169 }
GetSrc_to(void) const170 TSeqPos GetSrc_to(void) const
171 {
172 return m_Src_to;
173 }
174
GetTotalRange(void) const175 const TRange& GetTotalRange(void) const
176 {
177 return m_TotalRange;
178 }
179
180 ENa_strand ConvertStrand(ENa_strand strand) const;
181
182 void SetMappedLocation(CAnnotObject_Ref& ref, ELocationType loctype);
183 void MakeDstMix(CSeq_loc_mix& dst, const CSeq_loc_mix& src) const;
184
GetDst_id_Handle(void) const185 const CSeq_id_Handle& GetDst_id_Handle(void) const
186 {
187 return m_Dst_id_Handle;
188 }
GetId(void) const189 const CSeq_id& GetId(void) const
190 {
191 return m_Dst_loc_Empty->GetEmpty();
192 }
193
194 protected:
195 friend class CAnnot_Collector;
196
197 void Convert(CAnnotObject_Ref& obj,
198 ELocationType loctype);
199 void Convert(CAnnotObject_Ref& ref,
200 ELocationType loctype,
201 const CSeq_id_Handle& id,
202 const CRange<TSeqPos>& range,
203 const SAnnotObject_Index& index);
204
205 private:
206 void CheckDstInterval(void);
207 void CheckDstPoint(void);
208 void CheckDstMix(void);
209
210 CRef<CSeq_interval> GetDstInterval(void);
211 CRef<CSeq_point> GetDstPoint(void);
212 CRef<CSeq_loc_mix> GetDstMix(void);
213
214 void SetDstLoc(CRef<CSeq_loc>& loc);
215
216 bool IsSpecialLoc(void) const;
217
GetDstLocEmpty(void)218 CSeq_loc& GetDstLocEmpty(void)
219 {
220 return *m_Dst_loc_Empty;
221 }
GetDstId(void)222 CSeq_id& GetDstId(void)
223 {
224 return m_Dst_loc_Empty->SetEmpty();
225 }
226
GetDstRange(void)227 TRange GetDstRange(void)
228 {
229 return m_Reverse ?
230 TRange(ConvertPos(m_Src_to), ConvertPos(m_Src_from)) :
231 TRange(ConvertPos(m_Src_from), ConvertPos(m_Src_to));
232 }
GetSrcRange(void) const233 TRange GetSrcRange(void) const
234 {
235 return TRange(m_Src_from, m_Src_to);
236 }
237
238 void ConvertPacked_int(const CSeq_loc& src, CRef<CSeq_loc>& dst);
239 void ConvertPacked_pnt(const CSeq_loc& src, CRef<CSeq_loc>& dst);
240 bool ConvertSimpleMix(const CSeq_loc& src);
241 void ConvertMix(const CSeq_loc& src, CRef<CSeq_loc>& dst,
242 EConvertFlag flag = eCnvDefault);
243 void ConvertEquiv(const CSeq_loc& src, CRef<CSeq_loc>& dst);
244 void ConvertBond(const CSeq_loc& src, CRef<CSeq_loc>& dst);
245
246 static CSeq_loc_mix::Tdata* s_ConvertToMix(CRef<CSeq_loc>& loc);
247
248 // Translation parameters:
249 // Source id and bounds:
250 CSeq_id_Handle m_Src_id_Handle;
251 TSeqPos m_Src_from;
252 TSeqPos m_Src_to;
253
254 // Source to destination shift:
255 TSignedSeqPos m_Shift;
256 bool m_Reverse;
257
258 // Destination id:
259 CSeq_id_Handle m_Dst_id_Handle;
260 CRef<CSeq_loc> m_Dst_loc_Empty;
261
262 // Results:
263 // Cumulative results on destination:
264 TRange m_TotalRange;
265 bool m_Partial;
266 bool m_PartialHasUnconvertedId;
267
268 // Separate flags for left and right truncations of each interval
269 enum EPartialFlag {
270 fPartial_from = 1 << 0, // the interval is partial on the left
271 fPartial_to = 1 << 1 // the interval is partial on the right
272 };
273 typedef int TPartialFlag;
274
275 TPartialFlag m_PartialFlag;
276 CConstRef<CInt_fuzz> m_DstFuzz_from;
277 CConstRef<CInt_fuzz> m_DstFuzz_to;
278
279 // Last Point, Interval or other simple location's conversion result:
280 enum EMappedObjectType {
281 eMappedObjType_not_set,
282 eMappedObjType_Seq_loc,
283 eMappedObjType_Seq_point,
284 eMappedObjType_Seq_interval,
285 eMappedObjType_Seq_loc_mix
286 };
287 EMappedObjectType m_LastType;
288 TRange m_LastRange;
289 ENa_strand m_LastStrand;
290 CConstRef<CSeq_loc> m_SrcLoc;
291
292 // Scope for id resolution:
293 CHeapScope m_Scope;
294
295 CRef<CGraphRanges> m_GraphRanges;
296
297 friend class CSeq_loc_Conversion_Set;
298 friend class CSeq_align_Mapper;
299 friend struct CConversionRef_Less;
300 };
301
302
303 class NCBI_XOBJMGR_EXPORT CSeq_loc_Conversion_Set : public CObject
304 {
305 public:
306 CSeq_loc_Conversion_Set(CHeapScope& scope);
307
308 typedef CRange<TSeqPos> TRange;
309 typedef CRangeMultimap<CRef<CSeq_loc_Conversion>, TSeqPos> TRangeMap;
310 typedef TRangeMap::iterator TRangeIterator;
311 typedef map<CSeq_id_Handle, TRangeMap> TIdMap;
312
313 enum {
314 // special index value meaning that
315 // the conversion should be applied to all location indexes
316 kAllIndexes = kMax_UInt
317 };
318
319 // Conversions by location index
320 typedef map<unsigned int, TIdMap> TConvByIndex;
321
322 void Add(CSeq_loc_Conversion& cvt, unsigned int loc_index);
323 TRangeIterator BeginRanges(CSeq_id_Handle id,
324 TSeqPos from,
325 TSeqPos to,
326 unsigned int loc_index);
327 void Convert(CAnnotObject_Ref& obj,
328 CSeq_loc_Conversion::ELocationType loctype);
329 bool Convert(const CSeq_loc& src,
330 CRef<CSeq_loc>& dst,
331 unsigned int loc_index);
332 void Convert(const CSeq_align& src, CRef<CSeq_align>& dst);
333
334 void Reset(void);
IsPartial(void) const335 bool IsPartial(void) const
336 {
337 return m_Partial;
338 }
HasUnconvertedId(void) const339 bool HasUnconvertedId(void) const
340 {
341 return m_PartialHasUnconvertedId;
342 }
343
344 typedef set<CSeq_id_Handle> TSeq_id_Handles;
GetDst_id_Handles(void) const345 const TSeq_id_Handles& GetDst_id_Handles(void) const
346 {
347 return m_Dst_id_Handles;
348 }
349
350 private:
351 friend class CSeq_align_Mapper;
352
353 void x_Add(CSeq_loc_Conversion& cvt, unsigned int loc_index);
354
355 bool ConvertPoint(const CSeq_point& src,
356 CRef<CSeq_loc>& dst,
357 unsigned int loc_index);
358 bool ConvertInterval(const CSeq_interval& src,
359 CRef<CSeq_loc>& dst,
360 unsigned int loc_index);
361
362 bool ConvertPacked_int(const CSeq_loc& src,
363 CRef<CSeq_loc>& dst,
364 unsigned int loc_index);
365 bool ConvertPacked_pnt(const CSeq_loc& src,
366 CRef<CSeq_loc>& dst,
367 unsigned int loc_index);
368 bool ConvertMix(const CSeq_loc& src,
369 CRef<CSeq_loc>& dst,
370 unsigned int loc_index);
371 bool ConvertEquiv(const CSeq_loc& src,
372 CRef<CSeq_loc>& dst,
373 unsigned int loc_index);
374 bool ConvertBond(const CSeq_loc& src,
375 CRef<CSeq_loc>& dst,
376 unsigned int loc_index);
377 void ConvertFeature(CAnnotObject_Ref& ref,
378 const CSeq_feat& orig_feat,
379 CRef<CSeq_feat>& mapped_feat);
380 void ConvertCdregion(CAnnotObject_Ref& ref,
381 const CSeq_feat& orig_feat,
382 CRef<CSeq_feat>& mapped_feat);
383 void ConvertRna(CAnnotObject_Ref& ref,
384 const CSeq_feat& orig_feat,
385 CRef<CSeq_feat>& mapped_feat);
386
387 CRef<CSeq_loc_Conversion> m_SingleConv;
388 unsigned int m_SingleIndex;
389 TConvByIndex m_CvtByIndex;
390 TSeq_id_Handles m_Dst_id_Handles;
391 bool m_Partial;
392 bool m_PartialHasUnconvertedId;
393 TRange m_TotalRange;
394 CHeapScope m_Scope;
395
396 CRef<CGraphRanges> m_GraphRanges;
397 };
398
399
400 inline
IsSpecialLoc(void) const401 bool CSeq_loc_Conversion::IsSpecialLoc(void) const
402 {
403 return m_LastType >= eMappedObjType_Seq_point;
404 }
405
406
407 inline
ConvertPos(TSeqPos src_pos)408 TSeqPos CSeq_loc_Conversion::ConvertPos(TSeqPos src_pos)
409 {
410 if ( src_pos < m_Src_from || src_pos > m_Src_to ) {
411 m_Partial = true;
412 return kInvalidSeqPos;
413 }
414 TSeqPos dst_pos;
415 if ( !m_Reverse ) {
416 dst_pos = m_Shift + src_pos;
417 }
418 else {
419 dst_pos = m_Shift - src_pos;
420 }
421 return dst_pos;
422 }
423
424
425 inline
GoodSrcId(const CSeq_id & id)426 bool CSeq_loc_Conversion::GoodSrcId(const CSeq_id& id)
427 {
428 if ( m_Src_id_Handle == id ) {
429 return true;
430 }
431 m_Partial = m_PartialHasUnconvertedId = true;
432 return false;
433 }
434
435
436 inline
ConvertStrand(ENa_strand strand) const437 ENa_strand CSeq_loc_Conversion::ConvertStrand(ENa_strand strand) const
438 {
439 if ( m_Reverse ) {
440 strand = Reverse(strand);
441 }
442 return strand;
443 }
444
445
446 END_SCOPE(objects)
447 END_NCBI_SCOPE
448
449 #endif // ANNOT_TYPES_CI__HPP
450