1 // ========================================================================== 2 // SeqAn - The Library for Sequence Analysis 3 // ========================================================================== 4 // Copyright (c) 2006-2010, Knut Reinert, FU Berlin 5 // All rights reserved. 6 // 7 // Redistribution and use in source and binary forms, with or without 8 // modification, are permitted provided that the following conditions are met: 9 // 10 // * Redistributions of source code must retain the above copyright 11 // notice, this list of conditions and the following disclaimer. 12 // * Redistributions in binary form must reproduce the above copyright 13 // notice, this list of conditions and the following disclaimer in the 14 // documentation and/or other materials provided with the distribution. 15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of 16 // its contributors may be used to endorse or promote products derived 17 // from this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE 23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 29 // DAMAGE. 30 // 31 // ========================================================================== 32 // Author: Manuel Holtgrewe <manuel.holtgrewe@fu-berlin.de> 33 // ========================================================================== 34 // Journaled String implementation. 35 // ========================================================================== 36 37 #ifndef SEQAN_SEQUENCE_JOURNAL_SEQUENCE_JOURNAL_H_ 38 #define SEQAN_SEQUENCE_JOURNAL_SEQUENCE_JOURNAL_H_ 39 40 namespace seqan { 41 42 // ============================================================================ 43 // Forwards 44 // ============================================================================ 45 46 // ============================================================================ 47 // Tags, Classes 48 // ============================================================================ 49 50 /** 51 .Spec.Journaled String 52 ..summary:Journaled versions of arbitrary underlying string. 53 ..signature:String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > 54 ..include:seqan/sequence_journaled.h 55 ..cat:Sequences 56 */ 57 58 template <typename THostSpec, typename TJournalSpec = SortedArray, typename TBufferSpec = Alloc<void> > 59 struct Journaled {}; 60 61 template <typename TValue_, typename THostSpec_, typename TJournalSpec_, typename TBufferSpec_> 62 class String<TValue_, Journaled<THostSpec_, TJournalSpec_, TBufferSpec_> > 63 { 64 public: 65 typedef TValue_ TValue; 66 typedef THostSpec_ THostSpec; 67 typedef TJournalSpec_ TJournalSpec; 68 typedef TBufferSpec_ TBufferSpec; 69 70 typedef String<TValue, THostSpec> THost; 71 typedef typename Size<THost>::Type TSize; 72 typedef typename Position<THost>::Type TPosition; 73 typedef String<TValue, TBufferSpec> TInsertionBuffer; 74 typedef JournalEntry<TSize, TPosition> TJournalEntry; 75 typedef JournalEntries<TJournalEntry, TJournalSpec> TJournalEntries; 76 77 // The underlying host string. 78 Holder<THost> _holder; 79 // A buffer for inserted strings. 80 TInsertionBuffer _insertionBuffer; 81 // The journal is a sorted set of TJournalEntry objects, the exact types 82 // depends on TJournalSpec. Note that the entries resemble a partial 83 // sum datastructure. 84 TJournalEntries _journalEntries; 85 // The journaled string's size. 86 TSize _length; 87 String()88 String() {} 89 String(THost & host)90 String(THost & host) 91 { 92 SEQAN_CHECKPOINT; 93 setHost(*this, host); 94 } 95 96 // TODO(holtgrew): Actually, we want to have a proxy for non-const. 97 TValue operator[](TPosition const & pos) const 98 { 99 SEQAN_CHECKPOINT; 100 return getValue(*this, pos); 101 } 102 }; 103 104 // ============================================================================ 105 // Metafunctions 106 // ============================================================================ 107 108 ///.Metafunction.Host.param.T:Spec.Journaled String 109 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 110 struct Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 111 { 112 typedef String<TValue, THostSpec> Type; 113 }; 114 115 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 116 struct Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 117 { 118 typedef String<TValue, THostSpec> const Type; 119 }; 120 121 /** 122 .Metafunction.InsertionBuffer 123 ..summary:Return type of insertion buffer string for a journaled string. 124 ..param.T:Spec.Journaled String 125 ..include:sequan/sequence_journal.h 126 */ 127 template <typename T> 128 struct InsertionBuffer; 129 130 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 131 struct InsertionBuffer<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 132 { 133 typedef String<TValue, TBufferSpec> Type; 134 }; 135 136 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 137 struct InsertionBuffer<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 138 { 139 typedef String<TValue, TBufferSpec> const Type; 140 }; 141 142 ///.Metafunction.Size.param.T:Spec.Journaled String 143 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 144 struct Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 145 { 146 typedef typename String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> >::TSize Type; 147 }; 148 149 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 150 struct Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 151 : Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > {}; 152 153 ///.Metafunction.Position.param.T:Spec.Journaled String 154 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 155 struct Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 156 { 157 typedef typename String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> >::TPosition Type; 158 }; 159 160 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 161 struct Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 162 : Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > {}; 163 164 ///.Metafunction.Value.param.T:Spec.Journaled String 165 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 166 struct Value<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 167 { 168 typedef TValue Type; 169 }; 170 171 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 172 struct Value<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 173 { 174 typedef TValue Type; 175 }; 176 177 ///.Metafunction.GetValue.param.T:Spec.Journaled String 178 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 179 struct GetValue<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 180 { 181 typedef TValue Type; 182 }; 183 184 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 185 struct GetValue<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 186 { 187 typedef TValue Type; 188 }; 189 190 ///.Metafunction.Reference.param.T:Spec.Journaled String 191 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 192 struct Reference<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 193 { 194 typedef TValue & Type; 195 }; 196 197 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 198 struct Reference<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 199 { 200 typedef TValue const & Type; 201 }; 202 203 /** 204 .Metafunction.JournalType 205 ..signature:JournalType<T>::Type 206 ..summary:Metafunction for returning the type of the journal of a Journaled String. 207 ..param.T:Spec.Journaled String 208 ..include:seqan/string_journaled.h 209 */ 210 template <typename T> 211 struct JournalType; 212 213 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 214 struct JournalType<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > > 215 { 216 typedef typename Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type TSize_; 217 typedef typename Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type TPosition_; 218 typedef JournalEntry<TSize_, TPosition_> TJournalEntry_; 219 220 typedef JournalEntries<TJournalEntry_, TJournalSpec> Type; 221 }; 222 223 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 224 struct JournalType<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const> 225 { 226 typedef typename JournalType<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type const Type; 227 }; 228 229 // ============================================================================ 230 // Functions 231 // ============================================================================ 232 233 template <typename TStream, typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 234 inline 235 TStream & 236 operator<<(TStream & stream, String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & s) 237 { 238 SEQAN_CHECKPOINT; 239 typedef String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > TString; 240 typedef typename TString::TJournalEntries TJournalEntries; 241 typedef typename Iterator<TJournalEntries const, Standard>::Type TIterator; 242 243 for (TIterator it = begin(s._journalEntries), itend = end(s._journalEntries); it != itend; ++it) { 244 if (value(it).segmentSource == SOURCE_ORIGINAL) { 245 stream << infix(value(s._holder), value(it).physicalPosition, value(it).physicalPosition + value(it).length); 246 } else { 247 SEQAN_ASSERT_EQ(value(it).segmentSource, SOURCE_PATCH); 248 stream << infix(s._insertionBuffer, value(it).physicalPosition, value(it).physicalPosition + value(it).length); 249 } 250 } 251 return stream; 252 } 253 254 /** 255 .Function.setHost: 256 ..param.object.type:Spec.Journaled String 257 ..param.host.type:Class.String 258 ..include:seqan/sequence_journaled.h 259 */ 260 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TSequence2> 261 inline 262 void 263 setHost(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, TSequence2 & str) 264 { 265 SEQAN_CHECKPOINT; 266 setValue(journaledString._holder, str); 267 journaledString._length = length(str); 268 reinit(journaledString._journalEntries, length(str)); 269 } 270 271 /** 272 .Function.host: 273 ..param.object.type:Spec.Journaled String 274 ..include:seqan/sequence_journaled.h 275 */ 276 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 277 inline 278 typename Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type & 279 host(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString) 280 { 281 SEQAN_CHECKPOINT; 282 return value(journaledString._holder); 283 } 284 285 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 286 inline 287 typename Host<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type const & 288 host(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString) 289 { 290 SEQAN_CHECKPOINT; 291 return value(journaledString._holder); 292 } 293 294 /** 295 .Function.clear: 296 ..param.object.type:Spec.Journaled String 297 ..include:seqan/sequence_journaled.h 298 */ 299 // TODO(holtgrew): Behaviour is to clear the journal, not the string! 300 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 301 inline void 302 clear(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString) 303 { 304 SEQAN_CHECKPOINT; 305 reinit(journaledString._journalEntries, length(host(journaledString))); 306 } 307 308 /** 309 .Function.flatten: 310 ..summary:Apply the journal to the underlying string, destructively on the underlying string. 311 ..signature:flatten(journaledString) 312 ..param.journaledString:The journaled string to flatten. 313 ...type:Spec.Journaled String 314 ..include:seqan/sequence_journaled.h 315 */ 316 // TODO(holtgrew): Write me! What about non-destructive version that creates a new copy and sets holder to it? 317 318 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos> 319 inline void 320 erase(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, 321 TPos const & pos, 322 TPos const & posEnd) 323 { 324 SEQAN_CHECKPOINT; 325 SEQAN_ASSERT_GEQ(static_cast<TPos>(journaledString._length), pos); 326 SEQAN_ASSERT_GEQ(static_cast<TPos>(journaledString._length), posEnd); 327 SEQAN_ASSERT_GEQ(static_cast<TPos>(journaledString._length), posEnd - pos); 328 journaledString._length -= posEnd - pos; 329 recordErase(journaledString._journalEntries, pos, posEnd); 330 if (length(journaledString._journalEntries) == 0) 331 clear(journaledString._insertionBuffer); 332 } 333 334 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos> 335 inline void 336 erase(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, 337 TPos const & pos) 338 { 339 SEQAN_CHECKPOINT; 340 SEQAN_ASSERT_GEQ(journaledString._length, 1u); 341 erase(journaledString, pos, pos + 1); 342 } 343 344 345 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TString, typename TPos> 346 inline void 347 insert(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, 348 TPos const & pos, 349 TString const & seq) 350 { 351 SEQAN_CHECKPOINT; 352 journaledString._length += length(seq); 353 TPos beginPos = length(journaledString._insertionBuffer); 354 append(journaledString._insertionBuffer, seq); 355 recordInsertion(journaledString._journalEntries, pos, beginPos, length(seq)); 356 } 357 358 359 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos, typename TValue2> 360 inline void 361 insertValue(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, 362 TPos const & pos, 363 TValue2 const & value) 364 { 365 SEQAN_CHECKPOINT; 366 journaledString._length += 1; 367 TPos beginPos = length(journaledString._insertionBuffer); 368 appendValue(journaledString._insertionBuffer, value); 369 recordInsertion(journaledString._journalEntries, pos, beginPos, 1u); 370 } 371 372 373 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos, typename TSequence2> 374 inline void 375 assignInfix(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, 376 TPos const & beginPos, 377 TPos const & endPos, 378 TSequence2 const & valueString) 379 { 380 SEQAN_CHECKPOINT; 381 erase(journaledString, beginPos, endPos); 382 insert(journaledString, beginPos, valueString); 383 } 384 385 386 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos, typename TValue2> 387 inline void 388 assignValue(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString, 389 TPos const & pos, 390 TValue2 const & value) 391 { 392 SEQAN_CHECKPOINT; 393 erase(journaledString, pos); 394 insertValue(journaledString, pos, value); 395 } 396 397 398 // TODO(holtgrew): Batch-Assignment of values through segments? 399 400 // TODO(holtgrew): begin 401 // TODO(holtgrew): empty 402 // TODO(holtgrew): end 403 // TODO(holtgrew): flatten 404 // TODO(holtgrew): fill 405 // TODO(holtgrew): getValue 406 // TODO(holtgrew): infix 407 // TODO(holtgrew): infixWithLength 408 // TODO(holtgrew): iter 409 410 // TODO(holtgrew): Unused, remove? 411 /* 412 template <typename TSequence, typename TJournalSpec> 413 inline 414 typename Value<TSequence>::Type const & 415 front(String...<TSequence, TJournalSpec> const & journaledString) 416 { 417 SEQAN_XXXCHECKPOINT; 418 typedef SequenceJournal<TSequence, TJournalSpec> TString; 419 typedef typename TString::TNode TNode; 420 TNode frontNode = front(journaledString._journalEntries); 421 if (frontNode->segmentSource == SOURCE_ORIGINAL) { 422 return getValue(value(journaledString._holder), frontNode->virtualPosition + frontNode->length - 1); 423 } else { 424 SEQAN_ASSERT_EQ(frontNode->segmentSource, SOURCE_PATCH); 425 return getValue(journaledString._insertionBuffer, frontNode->virtualPosition + frontNode->length - 1); 426 } 427 } 428 429 // front/back clash with general sequence definitions. 430 template <typename TSequence, typename TJournalSpec> 431 inline 432 TValue const & 433 back(SequenceJournal<TSequence, TJournalSpec> const & journaledString) 434 { 435 SEQAN_XXXCHECKPOINT; 436 typedef SequenceJournal<TSequence, TJournalSpec> TString; 437 typedef typename TString::TNode TNode; 438 TNode backNode = back(journaledString._journalEntries); 439 if (backNode->segmentSource == SOURCE_ORIGINAL) { 440 return getValue(value(journaledString._holder), backNode->virtualPosition + backNode->length - 1); 441 } else { 442 SEQAN_ASSERT_EQ(backNode->segmentSource, SOURCE_PATCH); 443 return getValue(journaledString._insertionBuffer, backNode->virtualPosition + backNode->length - 1); 444 } 445 } 446 */ 447 448 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 449 inline 450 typename Size<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type 451 length(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString) 452 { 453 SEQAN_CHECKPOINT; 454 return journaledString._length; 455 } 456 457 458 // TODO(holtgrew): toCString 459 // TODO(holtgrew): value 460 461 // TOOD(holtgrew): operator< 462 // TOOD(holtgrew): operator> 463 // TOOD(holtgrew): operator<= 464 // TOOD(holtgrew): operator>= 465 // TOOD(holtgrew): operator== 466 // TOOD(holtgrew): operator!= 467 468 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 469 inline 470 typename GetValue<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const>::Type 471 getValue(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString, 472 typename Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type const & pos) 473 { 474 SEQAN_CHECKPOINT; 475 typedef String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const TJournaledString; 476 typedef typename TJournaledString::TJournalEntry TJournalEntry; 477 typedef typename Position<TJournaledString>::Type TPos; 478 479 TJournalEntry entry = findJournalEntry(journaledString._journalEntries, pos); 480 TPos relativePos = pos - entry.virtualPosition; 481 482 if (entry.segmentSource == SOURCE_ORIGINAL) { 483 return getValue(value(journaledString._holder), entry.physicalPosition + relativePos); 484 } else { 485 return getValue(journaledString._insertionBuffer, entry.physicalPosition + relativePos); 486 } 487 } 488 489 490 // Note that if pos is in a gap, we return the position of the entry 491 // after the gap in the host. 492 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos> 493 inline 494 typename Position<String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > >::Type 495 virtualToHostPosition(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString, 496 TPos const & pos) 497 { 498 SEQAN_CHECKPOINT; 499 // TODO(holtgrew): With a better journal entries datastructure, we could solve the main problem here. At the moment, we delegate completely. 500 return virtualToHostPosition(journaledString._journalEntries, pos); 501 } 502 503 504 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec, typename TPos> 505 inline 506 bool 507 isGapInHost(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString, 508 TPos const & pos) 509 { 510 SEQAN_CHECKPOINT; 511 // TODO(holtgrew): With a better journal entries datastructure, we could solve the main problem here. At the moment, we delegate completely. 512 return isGapInHost(journaledString._journalEntries, pos); 513 } 514 515 516 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 517 inline 518 const void * 519 id(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > & journaledString) 520 { 521 SEQAN_CHECKPOINT; 522 return id(value(journaledString._holder)); 523 } 524 525 template <typename TValue, typename THostSpec, typename TJournalSpec, typename TBufferSpec> 526 inline 527 const void * 528 id(String<TValue, Journaled<THostSpec, TJournalSpec, TBufferSpec> > const & journaledString) 529 { 530 SEQAN_CHECKPOINT; 531 return id(value(journaledString._holder)); 532 } 533 534 } // namespace seqan 535 536 #endif // SEQAN_SEQUENCE_JOURNAL_SEQUENCE_JOURNAL_H_ 537