1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Rene Rahn <rene.rahn@fu-berlin.de>
33 // ==========================================================================
34 // Implements a data structure to store deltas efficiently.
35 // ==========================================================================
36 
37 #ifndef EXTRAS_INCLUDE_SEQAN_JOURNALED_STRING_TREE_DELTA_STORE_H_
38 #define EXTRAS_INCLUDE_SEQAN_JOURNALED_STRING_TREE_DELTA_STORE_H_
39 
40 namespace seqan
41 {
42 
43 // ============================================================================
44 // Forwards
45 // ============================================================================
46 
47 template <typename TDeltaStore, typename TDeltaType>
48 struct DeltaValue;
49 
50 // ============================================================================
51 // Tags, Classes, Enums
52 // ============================================================================
53 
54 /*!
55  * @defgroup DeltaTypeTags Delta Type Tags
56  * @brief Tags used for the different delta types.
57  */
58 
59 /*!
60  * @tag DeltaTypeTags#DeltaTypeSnp
61  * @brief Tag used to select SNPs.
62  * @headerfile <seqan/journaled_string_tree.h>
63  *
64  * @signature struct DeltaTypeSnp_;
65  *            typedef Tag<DeltaTypeSnp_> DeltaTypeSnp;
66  */
67 struct DeltaTypeSnp_;
68 typedef Tag<DeltaTypeSnp_> DeltaTypeSnp;
69 
70 /*!
71  * @tag DeltaTypeTags#DeltaTypeDel
72  * @brief Tag used to select deletions.
73  * @headerfile <seqan/journaled_string_tree.h>
74  *
75  * @signature struct DeltaTypeDel_;
76  *            typedef Tag<DeltaTypeDel_> DeltaTypeDel;
77  */
78 struct DeltaTypeDel_;
79 typedef Tag<DeltaTypeDel_> DeltaTypeDel;
80 
81 /*!
82  * @tag DeltaTypeTags#DeltaTypeIns
83  * @brief Tag used to select insertions.
84  * @headerfile <seqan/journaled_string_tree.h>
85  *
86  * @signature struct DeltaTypeIns_;
87  *            typedef Tag<DeltaTypeIns_> DeltaTypeIns;
88  */
89 struct DeltaTypeIns_;
90 typedef Tag<DeltaTypeIns_> DeltaTypeIns;
91 
92 /*!
93  * @tag DeltaTypeTags#DeltaTypeSV
94  * @brief Tag used to select SVs.
95  * @headerfile <seqan/journaled_string_tree.h>
96  *
97  * @signature struct DeltaTypeSV_;
98  *            typedef Tag<DeltaTypeSV_> DeltaTypeSV;
99  */
100 struct DeltaTypeSV_;
101 typedef Tag<DeltaTypeSV_> DeltaTypeSV;
102 
103 typedef TagList<DeltaTypeSnp,
104         TagList<DeltaTypeDel,
105         TagList<DeltaTypeIns,
106         TagList<DeltaTypeSV> > > > DeltaTypeTagList;
107 
108 // ----------------------------------------------------------------------------
109 // Class DeltaTypeSelector
110 // ----------------------------------------------------------------------------
111 
112 typedef TagSelector<DeltaTypeTagList> DeltaTypeSelector;
113 
114 // ----------------------------------------------------------------------------
115 // Enum DeltaType
116 // ----------------------------------------------------------------------------
117 
118 /*!
119  * @enum DeltaType
120  * @headerfile <seqan/journaled_string_tree.h>
121  * @brief Keys for specifying the delta type to be accessed.
122  *
123  * @val DeltaType DELTA_TYPE_SNP
124  * @brief Id to denote SNP events.
125  *
126  * @val DeltaType DELTA_TYPE_DEL
127  * @brief Id to denote deletion events.
128  *
129  * @val DeltaType DELTA_TYPE_INS
130  * @brief Id to denote insertion events.
131  *
132  * @val DeltaType DElTA_TYPE_INDEL
133  * @brief Id to denote replacement events.
134  */
135 
136 enum DeltaType
137 {
138     DELTA_TYPE_SNP = Find<DeltaTypeSelector, DeltaTypeSnp>::VALUE,
139     DELTA_TYPE_DEL = Find<DeltaTypeSelector, DeltaTypeDel>::VALUE,
140     DELTA_TYPE_INS = Find<DeltaTypeSelector, DeltaTypeIns>::VALUE,
141     DELTA_TYPE_SV = Find<DeltaTypeSelector, DeltaTypeSV>::VALUE
142 };
143 
144 namespace impl
145 {
146 
147 // ----------------------------------------------------------------------------
148 // Class DeltaStore
149 // ----------------------------------------------------------------------------
150 
151 template <typename TSnp, typename TDel = uint32_t, typename TIns = String<TSnp>, typename TSV = Pair<TDel, TIns> >
152 class DeltaStore
153 {
154 public:
155     typedef typename Member<DeltaStore, DeltaTypeSnp>::Type TSnpData;
156     typedef typename Member<DeltaStore, DeltaTypeDel>::Type TDelData;
157     typedef typename Member<DeltaStore, DeltaTypeIns>::Type TInsData;
158     typedef typename Member<DeltaStore, DeltaTypeSV>::Type  TSVData;
159 
160     // TODO(rmaerker): Elaborate on these ideas!
161     // Idea a) Use ConcatStringSet for insertions. Use as global insertion buffer for all journal sequences.
162     // Idea b) Use bit encoding for DNA alphabet.
163     // Idea c) Instead of insertion buffer, we append the inserted strings to the reference and only use original nodes.
164 
165     TSnpData    _snpData;
166     TDelData    _delData;
167     TInsData    _insData;
168     TSVData     _svData;
169 
DeltaStore()170     DeltaStore()
171     {}
172 };
173 
174 }  // namespace impl
175 
176 // ============================================================================
177 // Metafunctions
178 // ============================================================================
179 
180 // ----------------------------------------------------------------------------
181 // Metafunction BitsPerValue
182 // ----------------------------------------------------------------------------
183 
184 template <>
185 struct BitsPerValue<DeltaType>
186 {
187     static const unsigned VALUE = 2;
188 };
189 
190 // ----------------------------------------------------------------------------
191 // Metafunction Member
192 // ----------------------------------------------------------------------------
193 
194 // DeltaTypeSnp.
195 template <typename TSnp, typename TDel, typename TIns, typename TSV>
196 struct Member<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeSnp>
197 {
198     typedef String<TSnp> Type;
199 };
200 
201 // DeltaTypeDel.
202 template <typename TSnp, typename TDel, typename TIns, typename TSV>
203 struct Member<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeDel>
204 {
205     typedef String<TDel> Type;
206 };
207 
208 // DeltaTypeIns.
209 template <typename TSnp, typename TDel, typename TIns, typename TSV>
210 struct Member<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeIns>
211 {
212     typedef String<TIns> Type;
213 };
214 
215 // DeltaTypeSV.
216 template <typename TSnp, typename TDel, typename TIns, typename TSV>
217 struct Member<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeSV>
218 {
219     typedef String<TSV> Type;
220 };
221 
222 // Generic const version.
223 template <typename TSnp, typename TDel, typename TIns, typename TSV, typename TDeltaType>
224 struct Member<impl::DeltaStore<TSnp, TDel, TIns, TSV> const, TDeltaType>
225 {
226     typedef impl::DeltaStore<TSnp, TDel, TIns, TSV> TStore_;
227     typedef typename Member<TStore_, TDeltaType>::Type const Type;
228 };
229 
230 // ----------------------------------------------------------------------------
231 // Metafunction DeltaValue                                     [DELTA_TYPE_SNP]
232 // ----------------------------------------------------------------------------
233 
234 template <typename TSnp, typename TDel, typename TIns, typename TSV>
235 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeSnp>
236 {
237     typedef TSnp Type;
238 };
239 
240 template <typename TSnp, typename TDel, typename TIns, typename TSV>
241 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeSnp>
242 {
243     typedef TSnp const Type;
244 };
245 
246 // ----------------------------------------------------------------------------
247 // Metafunction DeltaValue                                     [DELTA_TYPE_DEL]
248 // ----------------------------------------------------------------------------
249 
250 template <typename TSnp, typename TDel, typename TIns, typename TSV>
251 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeDel>
252 {
253     typedef TDel Type;
254 };
255 
256 template <typename TSnp, typename TDel, typename TIns, typename TSV>
257 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeDel>
258 {
259     typedef TDel const Type;
260 };
261 
262 // ----------------------------------------------------------------------------
263 // Metafunction DeltaValue                                     [DELTA_TYPE_INS]
264 // ----------------------------------------------------------------------------
265 
266 template <typename TSnp, typename TDel, typename TIns, typename TSV>
267 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeIns>
268 {
269     typedef TIns Type;
270 };
271 
272 template <typename TSnp, typename TDel, typename TIns, typename TSV>
273 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeIns>
274 {
275     typedef TIns const Type;
276 };
277 
278 // ----------------------------------------------------------------------------
279 // Metafunction DeltaValue                                   [DELTA_TYPE_SV]
280 // ----------------------------------------------------------------------------
281 
282 template <typename TSnp, typename TDel, typename TIns, typename TSV>
283 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeSV>
284 {
285     typedef TSV Type;
286 };
287 
288 template <typename TSnp, typename TDel, typename TIns, typename TSV>
289 struct DeltaValue<impl::DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeSV>
290 {
291     typedef TSV const Type;
292 };
293 
294 // ============================================================================
295 // Punlic Functions
296 // ============================================================================
297 
298 // ----------------------------------------------------------------------------
299 // Function isDeltaType()
300 // ----------------------------------------------------------------------------
301 
302 template <typename TTag>
303 inline bool
304 isDeltaType(DeltaType type, TTag const & /*tag*/)
305 {
306     return type == static_cast<DeltaType>(Find<DeltaTypeSelector, TTag>::VALUE);
307 }
308 
309 // ----------------------------------------------------------------------------
310 // Function selectDeltaType()                                    [DeltaTypeSnp]
311 // ----------------------------------------------------------------------------
312 
313 template <typename TTag>
314 constexpr inline DeltaType
315 selectDeltaType(TTag const & /*tag*/)
316 {
317     return static_cast<DeltaType>(Find<DeltaTypeSelector, TTag>::VALUE);
318 }
319 
320 // ----------------------------------------------------------------------------
321 // Function setDeltaType()
322 // ----------------------------------------------------------------------------
323 
324 inline void
325 setDeltaType(DeltaTypeSelector & selector, DeltaType const deltaType)
326 {
327     value(selector) = deltaType;
328 }
329 
330 // ============================================================================
331 // Private Functions
332 // ============================================================================
333 
334 namespace impl
335 {
336 
337 // ----------------------------------------------------------------------------
338 // Function getDeltaStore()                                     [DeltaTypeSnp]
339 // ----------------------------------------------------------------------------
340 
341 template <typename TSnp, typename TDel, typename TIns, typename TSV>
342 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeSnp>::Type &
343 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> & store, DeltaTypeSnp /*tag*/)
344 {
345     return store._snpData;
346 }
347 
348 template <typename TSnp, typename TDel, typename TIns, typename TSV>
349 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeSnp>::Type &
350 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> const & store, DeltaTypeSnp /*tag*/)
351 {
352     return store._snpData;
353 }
354 
355 // ----------------------------------------------------------------------------
356 // Function getDeltaStore()                                     [DeltaTypeDel]
357 // ----------------------------------------------------------------------------
358 
359 template <typename TSnp, typename TDel, typename TIns, typename TSV>
360 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeDel>::Type &
361 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> & store, DeltaTypeDel /*tag*/)
362 {
363     return store._delData;
364 }
365 
366 template <typename TSnp, typename TDel, typename TIns, typename TSV>
367 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeDel>::Type &
368 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> const & store, DeltaTypeDel /*tag*/)
369 {
370     return store._delData;
371 }
372 
373 // ----------------------------------------------------------------------------
374 // Function getDeltaStore()                                     [DeltaTypeIns]
375 // ----------------------------------------------------------------------------
376 
377 template <typename TSnp, typename TDel, typename TIns, typename TSV>
378 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeIns>::Type &
379 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> & store, DeltaTypeIns /*tag*/)
380 {
381     return store._insData;
382 }
383 
384 template <typename TSnp, typename TDel, typename TIns, typename TSV>
385 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeIns>::Type &
386 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> const & store, DeltaTypeIns /*tag*/)
387 {
388     return store._insData;
389 }
390 
391 // ----------------------------------------------------------------------------
392 // Function getDeltaStore()                                      [DeltaTypeSV]
393 // ----------------------------------------------------------------------------
394 
395 template <typename TSnp, typename TDel, typename TIns, typename TSV>
396 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV>, DeltaTypeSV>::Type &
397 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> & store, DeltaTypeSV /*tag*/)
398 {
399     return store._svData;
400 }
401 
402 template <typename TSnp, typename TDel, typename TIns, typename TSV>
403 inline typename Member<DeltaStore<TSnp, TDel, TIns, TSV> const, DeltaTypeSV>::Type &
404 getDeltaStore(DeltaStore<TSnp, TDel, TIns, TSV> const & store, DeltaTypeSV /*tag*/)
405 {
406     return store._svData;
407 }
408 
409 // ----------------------------------------------------------------------------
410 // Function addDeltaValue()
411 // ----------------------------------------------------------------------------
412 
413 template <typename TSnp, typename TDel, typename TIns, typename TSV, typename TTag>
414 inline typename Size<DeltaStore<TSnp, TDel, TIns, TSV> >::Type
415 addDeltaValue(DeltaStore<TSnp, TDel, TIns, TSV> & store,
416               typename DeltaValue<DeltaStore<TSnp, TDel, TIns, TSV>, TTag>::Type const & value,
417               TTag /*deltaType*/)
418 {
419     appendValue(getDeltaStore(store, TTag()), value);
420     return length(getDeltaStore(store, TTag())) - 1;
421 }
422 
423 // ----------------------------------------------------------------------------
424 // Function eraseDeltaValue()
425 // ----------------------------------------------------------------------------
426 
427 template <typename TSnp, typename TDel, typename TIns, typename TSV, typename TPos, typename TTag>
428 inline typename Size<DeltaStore<TSnp, TDel, TIns, TSV> >::Type
429 eraseDeltaValue(DeltaStore<TSnp, TDel, TIns, TSV> & store,
430                 TPos recordPos,
431                 TTag /*deltaType*/)
432 {
433     typedef typename Size<DeltaStore<TSnp, TDel, TIns, TSV> >::Type TSize;
434     if (SEQAN_LIKELY(static_cast<TSize>(recordPos) < length(getDeltaStore(store, TTag()))))
435         erase(getDeltaStore(store, TTag()), recordPos);
436     return length(getDeltaStore(store, TTag()));
437 }
438 
439 // ----------------------------------------------------------------------------
440 // Function clear()
441 // ----------------------------------------------------------------------------
442 
443 template <typename TSnp, typename TDel, typename TIns, typename TSV>
444 inline void
445 clear(DeltaStore<TSnp, TDel, TIns, TSV> & deltaStore)
446 {
447     clear(deltaStore._delData);
448     clear(deltaStore._svData);
449     clear(deltaStore._insData);
450     clear(deltaStore._snpData);
451 }
452 
453 // ----------------------------------------------------------------------------
454 // Function deltaValue()
455 // ----------------------------------------------------------------------------
456 
457 template <typename TSnp, typename TDel, typename TIns, typename TSV, typename TPos, typename TTag>
458 inline typename DeltaValue<DeltaStore<TSnp, TDel, TIns, TSV>, TTag>::Type &
459 deltaValue(DeltaStore<TSnp, TDel, TIns, TSV> & store, TPos pos, TTag const & tag)
460 {
461     typedef typename Size<DeltaStore<TSnp, TDel, TIns, TSV> >::Type TSize SEQAN_TYPEDEF_FOR_DEBUG;
462     SEQAN_ASSERT_LT(static_cast<TSize>(pos), length(getDeltaStore(store, tag)));
463 
464     return value(getDeltaStore(store, tag), pos);
465 }
466 
467 template <typename TSnp, typename TDel, typename TIns, typename TSV, typename TPos, typename TTag>
468 inline typename DeltaValue<DeltaStore<TSnp, TDel, TIns, TSV> const, TTag>::Type &
469 deltaValue(DeltaStore<TSnp, TDel, TIns, TSV> const & store, TPos pos, TTag const & tag)
470 {
471     typedef typename Size<DeltaStore<TSnp, TDel, TIns, TSV> const>::Type TSize SEQAN_TYPEDEF_FOR_DEBUG;
472     SEQAN_ASSERT_LT(static_cast<TSize>(pos), length(getDeltaStore(store, tag)));
473 
474     return value(getDeltaStore(store, tag), pos);
475 }
476 
477 // ----------------------------------------------------------------------------
478 // Function deletionSize()
479 // ----------------------------------------------------------------------------
480 
481 template <typename TDeltaStore>
482 inline typename Size<TDeltaStore>::Type
483 deletionSize(typename DeltaValue<TDeltaStore, DeltaTypeSnp>::Type const & /*snp*/)
484 {
485     return 1;
486 }
487 
488 template <typename TDeltaStore>
489 inline typename Size<TDeltaStore>::Type
490 deletionSize(typename DeltaValue<TDeltaStore, DeltaTypeIns>::Type const & /*ins*/)
491 {
492     return 0;
493 }
494 
495 template <typename TDeltaStore>
496 inline typename Size<TDeltaStore>::Type
497 deletionSize(typename DeltaValue<TDeltaStore, DeltaTypeDel>::Type const & del)
498 {
499     return del;
500 }
501 
502 template <typename TDeltaStore>
503 inline typename Size<TDeltaStore>::Type
504 deletionSize(typename DeltaValue<TDeltaStore, DeltaTypeSV>::Type const & sv)
505 {
506     return sv.i1;
507 }
508 
509 template <typename TStore, typename TPos, typename TTag>
510 inline typename Size<TStore>::Type
511 deletionSize(TStore const & store, TPos const pos, TTag const & /*tag*/)
512 {
513     return deletionSize<TStore>(getDeltaStore(store, TTag())[pos]);
514 }
515 
516 // ----------------------------------------------------------------------------
517 // Function insertionSize()
518 // ----------------------------------------------------------------------------
519 
520 template <typename TDeltaStore>
521 inline typename Size<TDeltaStore>::Type
522 insertionSize(typename DeltaValue<TDeltaStore, DeltaTypeSnp>::Type const & /*snp*/)
523 {
524     return 1;
525 }
526 
527 template <typename TDeltaStore>
528 inline typename Size<TDeltaStore>::Type
529 insertionSize(typename DeltaValue<TDeltaStore, DeltaTypeIns>::Type const & ins)
530 {
531     return length(ins);
532 }
533 
534 template <typename TDeltaStore>
535 inline typename Size<TDeltaStore>::Type
536 insertionSize(typename DeltaValue<TDeltaStore, DeltaTypeDel>::Type const & /*del*/)
537 {
538     return 0;
539 }
540 
541 template <typename TDeltaStore>
542 inline typename Size<TDeltaStore>::Type
543 insertionSize(typename DeltaValue<TDeltaStore, DeltaTypeSV>::Type const & sv)
544 {
545     return length(sv.i2);
546 }
547 
548 template <typename TStore, typename TPos, typename TTag>
549 inline typename Size<TStore>::Type
550 insertionSize(TStore const & store, TPos const pos, TTag const & /*tag*/)
551 {
552     return insertionSize<TStore>(getDeltaStore(store, TTag())[pos]);
553 }
554 
555 // ----------------------------------------------------------------------------
556 // Function netSize()
557 // ----------------------------------------------------------------------------
558 
559 template <typename TStore, typename TPos, typename TTag>
560 inline typename MakeSigned<typename Size<TStore>::Type>::Type
561 netSize(TStore const & store, TPos const pos, TTag const & /*tag*/)
562 {
563     typedef typename MakeSigned<typename Size<TStore>::Type>::Type TSignedSize;
564     return static_cast<TSignedSize>(insertionSize(store, pos, TTag())) -
565            static_cast<TSignedSize>(deletionSize(store, pos, TTag()));
566 }
567 
568 }  // namespace impl
569 
570 }  // namespace seqan
571 
572 #endif // EXTRAS_INCLUDE_SEQAN_JOURNALED_STRING_TREE_DELTA_STORE_H_
573