1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de>
33 // ==========================================================================
34 // Journaled String implementation.
35 // ==========================================================================
36 
37 #ifndef SEQAN_SEQUENCE_JOURNAL_SEQUENCE_JOURNAL_H_
38 #define SEQAN_SEQUENCE_JOURNAL_SEQUENCE_JOURNAL_H_
39 
40 namespace seqan {
41 
42 // ============================================================================
43 // Forwards
44 // ============================================================================
45 
46 // ============================================================================
47 // Tags, Classes
48 // ============================================================================
49 
50 /**
51 .Spec.Journaled String
52 ..summary:Journaled versions of arbitrary underlying string.
53 ..signature:String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> >
54 ..include:seqan/sequence_journaled.h
55 ..cat:Sequences
56  */
57 
58 template <typename THostSpec, typename TJournalSpec = SortedArray, typename TBufferSpec = Alloc<void> >
59 struct Journaled {};
60 
61 template <typename TValue_, typename THostSpec_, typename TJournalSpec_, typename TBufferSpec_>
62 class String<TValue_, Journaled<THostSpec_, TJournalSpec_, TBufferSpec_> >
63 {
64 public:
65     typedef TValue_ TValue;
66     typedef THostSpec_ THostSpec;
67     typedef TJournalSpec_ TJournalSpec;
68     typedef TBufferSpec_ TBufferSpec;
69 
70     typedef String<TValue, THostSpec> THost;
71     typedef typename Size<THost>::Type TSize;
72     typedef typename Position<THost>::Type TPosition;
73     typedef String<TValue, TBufferSpec> TInsertionBuffer;
74     typedef JournalEntry<TSize, TPosition> TJournalEntry;
75     typedef JournalEntries<TJournalEntry, TJournalSpec> TJournalEntries;
76 
77     // The underlying host string.
78     Holder<THost> _holder;
79     // A buffer for inserted strings.
80     TInsertionBuffer _insertionBuffer;
81     // The journal is a sorted set of TJournalEntry objects, the exact types
82     // depends on TJournalSpec.  Note that the entries resemble a partial
83     // sum datastructure.
84     TJournalEntries _journalEntries;
85     // The journaled string's size.
86     TSize _length;
87 
String()88     String() {}
89 
String(THost & host)90     String(THost & host)
91     {
92         SEQAN_CHECKPOINT;
93         setHost(*this, host);
94     }
95 
96     // TODO(holtgrew): Actually, we want to have a proxy for non-const.
97     TValue operator[](TPosition const & pos) const
98     {
99         SEQAN_CHECKPOINT;
100         return getValue(*this, pos);
101     }
102 };
103 
104 // ============================================================================
105 // Metafunctions
106 // ============================================================================
107 
108 ///.Metafunction.Host.param.T:Spec.Journaled String
109 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
110 struct Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
111 {
112     typedef String<TValue, THostSpec> Type;
113 };
114 
115 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
116 struct Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
117 {
118     typedef String<TValue, THostSpec> const Type;
119 };
120 
121 /**
122 .Metafunction.InsertionBuffer
123 ..summary:Return type of insertion buffer string for a journaled string.
124 ..param.T:Spec.Journaled String
125 ..include:sequan/sequence_journal.h
126 */
127 template <typename T>
128 struct InsertionBuffer;
129 
130 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
131 struct InsertionBuffer<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
132 {
133     typedef String<TValue, TBufferSpec> Type;
134 };
135 
136 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
137 struct InsertionBuffer<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
138 {
139     typedef String<TValue, TBufferSpec> const Type;
140 };
141 
142 ///.Metafunction.Size.param.T:Spec.Journaled String
143 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
144 struct Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
145 {
146     typedef typename String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> >::TSize Type;
147 };
148 
149 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
150 struct Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
151     : Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > {};
152 
153 ///.Metafunction.Position.param.T:Spec.Journaled String
154 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
155 struct Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
156 {
157   typedef typename String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> >::TPosition Type;
158 };
159 
160 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
161 struct Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
162     : Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > {};
163 
164 ///.Metafunction.Value.param.T:Spec.Journaled String
165 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
166 struct Value<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
167 {
168   typedef TValue Type;
169 };
170 
171 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
172 struct Value<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
173 {
174   typedef TValue Type;
175 };
176 
177 ///.Metafunction.GetValue.param.T:Spec.Journaled String
178 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
179 struct GetValue<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
180 {
181   typedef TValue Type;
182 };
183 
184 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
185 struct GetValue<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
186 {
187   typedef TValue Type;
188 };
189 
190 ///.Metafunction.Reference.param.T:Spec.Journaled String
191 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
192 struct Reference<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
193 {
194   typedef TValue & Type;
195 };
196 
197 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
198 struct Reference<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
199 {
200   typedef TValue const & Type;
201 };
202 
203 /**
204 .Metafunction.JournalType
205 ..signature:JournalType<T>::Type
206 ..summary:Metafunction for returning the type of the journal of a Journaled String.
207 ..param.T:Spec.Journaled String
208 ..include:seqan/string_journaled.h
209  */
210 template <typename T>
211 struct JournalType;
212 
213 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
214 struct JournalType<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >
215 {
216     typedef typename Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type TSize_;
217     typedef typename Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type TPosition_;
218     typedef JournalEntry<TSize_, TPosition_> TJournalEntry_;
219 
220     typedef JournalEntries<TJournalEntry_, TJournalSpec> Type;
221 };
222 
223 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
224 struct JournalType<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>
225 {
226     typedef typename JournalType<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type const Type;
227 };
228 
229 // ============================================================================
230 // Functions
231 // ============================================================================
232 
233 template <typename TStream, typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
234 inline
235 TStream &
236 operator<<(TStream & stream, String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & s)
237 {
238     SEQAN_CHECKPOINT;
239     typedef String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > TString;
240     typedef typename TString::TJournalEntries TJournalEntries;
241     typedef typename Iterator<TJournalEntries const, Standard>::Type TIterator;
242 
243     for (TIterator it = begin(s._journalEntries), itend = end(s._journalEntries); it != itend; ++it) {
244         if (value(it).segmentSource == SOURCE_ORIGINAL) {
245             stream << infix(value(s._holder), value(it).physicalPosition, value(it).physicalPosition + value(it).length);
246         } else {
247             SEQAN_ASSERT_EQ(value(it).segmentSource, SOURCE_PATCH);
248             stream << infix(s._insertionBuffer, value(it).physicalPosition, value(it).physicalPosition + value(it).length);
249         }
250     }
251     return stream;
252 }
253 
254 /**
255 .Function.setHost:
256 ..param.object.type:Spec.Journaled String
257 ..param.host.type:Class.String
258 ..include:seqan/sequence_journaled.h
259 */
260 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TSequence2>
261 inline
262 void
263 setHost(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, TSequence2 & str)
264 {
265     SEQAN_CHECKPOINT;
266     setValue(journaledString._holder, str);
267     journaledString._length = length(str);
268     reinit(journaledString._journalEntries, length(str));
269 }
270 
271 /**
272 .Function.host:
273 ..param.object.type:Spec.Journaled String
274 ..include:seqan/sequence_journaled.h
275 */
276 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
277 inline
278 typename Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type &
279 host(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString)
280 {
281     SEQAN_CHECKPOINT;
282     return value(journaledString._holder);
283 }
284 
285 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
286 inline
287 typename Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type const &
288 host(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString)
289 {
290     SEQAN_CHECKPOINT;
291     return value(journaledString._holder);
292 }
293 
294 /**
295 .Function.clear:
296 ..param.object.type:Spec.Journaled String
297 ..include:seqan/sequence_journaled.h
298  */
299 // TODO(holtgrew): Behaviour is to clear the journal, not the string!
300 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
301 inline void
302 clear(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString)
303 {
304     SEQAN_CHECKPOINT;
305     reinit(journaledString._journalEntries, length(host(journaledString)));
306 }
307 
308 /**
309 .Function.flatten:
310 ..summary:Apply the journal to the underlying string, destructively on the underlying string.
311 ..signature:flatten(journaledString)
312 ..param.journaledString:The journaled string to flatten.
313 ...type:Spec.Journaled String
314 ..include:seqan/sequence_journaled.h
315  */
316 // TODO(holtgrew): Write me! What about non-destructive version that creates a new copy and sets holder to it?
317 
318 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos>
319 inline void
320 erase(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString,
321       TPos const & pos,
322       TPos const & posEnd)
323 {
324     SEQAN_CHECKPOINT;
325 	SEQAN_ASSERT_GEQ(static_cast<TPos>(journaledString._length), pos);
326 	SEQAN_ASSERT_GEQ(static_cast<TPos>(journaledString._length), posEnd);
327     SEQAN_ASSERT_GEQ(static_cast<TPos>(journaledString._length), posEnd - pos);
328     journaledString._length -= posEnd - pos;
329     recordErase(journaledString._journalEntries, pos, posEnd);
330     if (length(journaledString._journalEntries) == 0)
331         clear(journaledString._insertionBuffer);
332 }
333 
334 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos>
335 inline void
336 erase(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString,
337       TPos const & pos)
338 {
339     SEQAN_CHECKPOINT;
340     SEQAN_ASSERT_GEQ(journaledString._length, 1u);
341     erase(journaledString, pos, pos + 1);
342 }
343 
344 
345 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TString, typename TPos>
346 inline void
347 insert(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString,
348        TPos const & pos,
349        TString const & seq)
350 {
351     SEQAN_CHECKPOINT;
352     journaledString._length += length(seq);
353     TPos beginPos = length(journaledString._insertionBuffer);
354     append(journaledString._insertionBuffer, seq);
355     recordInsertion(journaledString._journalEntries, pos, beginPos, length(seq));
356 }
357 
358 
359 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos, typename TValue2>
360 inline void
361 insertValue(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString,
362             TPos const & pos,
363             TValue2 const & value)
364 {
365     SEQAN_CHECKPOINT;
366     journaledString._length += 1;
367     TPos beginPos = length(journaledString._insertionBuffer);
368     appendValue(journaledString._insertionBuffer, value);
369     recordInsertion(journaledString._journalEntries, pos, beginPos, 1u);
370 }
371 
372 
373 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos, typename TSequence2>
374 inline void
375 assignInfix(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString,
376             TPos const & beginPos,
377             TPos const & endPos,
378             TSequence2 const & valueString)
379 {
380     SEQAN_CHECKPOINT;
381     erase(journaledString, beginPos, endPos);
382     insert(journaledString, beginPos, valueString);
383 }
384 
385 
386 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos, typename TValue2>
387 inline void
388 assignValue(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString,
389             TPos const & pos,
390             TValue2 const & value)
391 {
392     SEQAN_CHECKPOINT;
393     erase(journaledString, pos);
394     insertValue(journaledString, pos, value);
395 }
396 
397 
398 // TODO(holtgrew): Batch-Assignment of values through segments?
399 
400 // TODO(holtgrew): begin
401 // TODO(holtgrew): empty
402 // TODO(holtgrew): end
403 // TODO(holtgrew): flatten
404 // TODO(holtgrew): fill
405 // TODO(holtgrew): getValue
406 // TODO(holtgrew): infix
407 // TODO(holtgrew): infixWithLength
408 // TODO(holtgrew): iter
409 
410 // TODO(holtgrew): Unused, remove?
411 /*
412 template <typename TSequence, typename TJournalSpec>
413 inline
414 typename Value<TSequence>::Type const &
415 front(String...<TSequence, TJournalSpec> const & journaledString)
416 {
417     SEQAN_XXXCHECKPOINT;
418     typedef SequenceJournal<TSequence, TJournalSpec> TString;
419     typedef typename TString::TNode TNode;
420     TNode frontNode = front(journaledString._journalEntries);
421     if (frontNode->segmentSource == SOURCE_ORIGINAL) {
422         return getValue(value(journaledString._holder), frontNode->virtualPosition + frontNode->length - 1);
423     } else {
424         SEQAN_ASSERT_EQ(frontNode->segmentSource, SOURCE_PATCH);
425         return getValue(journaledString._insertionBuffer, frontNode->virtualPosition + frontNode->length - 1);
426     }
427 }
428 
429 // front/back clash with general sequence definitions.
430 template <typename TSequence, typename TJournalSpec>
431 inline
432 TValue const &
433 back(SequenceJournal<TSequence, TJournalSpec> const & journaledString)
434 {
435     SEQAN_XXXCHECKPOINT;
436     typedef SequenceJournal<TSequence, TJournalSpec> TString;
437     typedef typename TString::TNode TNode;
438     TNode backNode = back(journaledString._journalEntries);
439     if (backNode->segmentSource == SOURCE_ORIGINAL) {
440         return getValue(value(journaledString._holder), backNode->virtualPosition + backNode->length - 1);
441     } else {
442         SEQAN_ASSERT_EQ(backNode->segmentSource, SOURCE_PATCH);
443         return getValue(journaledString._insertionBuffer, backNode->virtualPosition + backNode->length - 1);
444     }
445 }
446 */
447 
448 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
449 inline
450 typename Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type
451 length(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString)
452 {
453     SEQAN_CHECKPOINT;
454     return journaledString._length;
455 }
456 
457 
458 // TODO(holtgrew): toCString
459 // TODO(holtgrew): value
460 
461 // TOOD(holtgrew): operator<
462 // TOOD(holtgrew): operator>
463 // TOOD(holtgrew): operator<=
464 // TOOD(holtgrew): operator>=
465 // TOOD(holtgrew): operator==
466 // TOOD(holtgrew): operator!=
467 
468 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
469 inline
470 typename GetValue<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>::Type
471 getValue(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString,
472          typename Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type const & pos)
473 {
474     SEQAN_CHECKPOINT;
475     typedef String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const TJournaledString;
476     typedef typename TJournaledString::TJournalEntry TJournalEntry;
477     typedef typename Position<TJournaledString>::Type TPos;
478 
479     TJournalEntry entry = findJournalEntry(journaledString._journalEntries, pos);
480     TPos relativePos = pos - entry.virtualPosition;
481 
482     if (entry.segmentSource == SOURCE_ORIGINAL) {
483         return getValue(value(journaledString._holder), entry.physicalPosition + relativePos);
484     } else {
485         return getValue(journaledString._insertionBuffer, entry.physicalPosition + relativePos);
486     }
487 }
488 
489 
490 // Note that if pos is in a gap, we return the position of the entry
491 // after the gap in the host.
492 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos>
493 inline
494 typename Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type
495 virtualToHostPosition(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString,
496                       TPos const & pos)
497 {
498     SEQAN_CHECKPOINT;
499     // TODO(holtgrew): With a better journal entries datastructure, we could solve the main problem here. At the moment, we delegate completely.
500     return virtualToHostPosition(journaledString._journalEntries, pos);
501 }
502 
503 
504 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos>
505 inline
506 bool
507 isGapInHost(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString,
508             TPos const & pos)
509 {
510     SEQAN_CHECKPOINT;
511     // TODO(holtgrew): With a better journal entries datastructure, we could solve the main problem here. At the moment, we delegate completely.
512     return isGapInHost(journaledString._journalEntries, pos);
513 }
514 
515 
516 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
517 inline
518 const void *
519 id(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString)
520 {
521     SEQAN_CHECKPOINT;
522     return id(value(journaledString._holder));
523 }
524 
525 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec>
526 inline
527 const void *
528 id(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString)
529 {
530     SEQAN_CHECKPOINT;
531     return id(value(journaledString._holder));
532 }
533 
534 }  // namespace seqan
535 
536 #endif  // SEQAN_SEQUENCE_JOURNAL_SEQUENCE_JOURNAL_H_
537