1 // ==========================================================================
2 // SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright
11 // notice, this list of conditions and the following disclaimer.
12 // * Redistributions in binary form must reproduce the above copyright
13 // notice, this list of conditions and the following disclaimer in the
14 // documentation and/or other materials provided with the distribution.
15 // * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 // its contributors may be used to endorse or promote products derived
17 // from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34
35 #ifndef SEQAN_HEADER_STORE_IO_H
36 #define SEQAN_HEADER_STORE_IO_H
37
38 /* IOREV
39 *
40 * _doc_
41 *
42 *
43 * if this file is about the amos file format why isn't it named accordingly?
44 *
45 * altogether it is unclear why sequence io is in file/ but store io is in
46 * store/
47 *
48 */
49
50
51
52 namespace seqan
53 {
54
55 //////////////////////////////////////////////////////////////////////////////
56 // File tags
57 //////////////////////////////////////////////////////////////////////////////
58
59 //////////////////////////////////////////////////////////////////////////////
60
61 struct TagAmos_;
62 typedef Tag<TagAmos_> const Amos;
63
64
65 //////////////////////////////////////////////////////////////////////////////
66 // Auxillary functions
67 //////////////////////////////////////////////////////////////////////////////
68
69 //////////////////////////////////////////////////////////////////////////////
70
71 /*!
72 * @fn FragmentStore#getClrRange
73 * @brief Get the "clear" range of a read alignment.
74 *
75 * The clear range of a read alignment is the range of the part of the alignmetn that is not clipped.
76 *
77 * @signature void getClrRange(store, alignEl, begClr, endClr);
78 *
79 * @param[in] store The FragmentStore to work on.
80 * @param[in] alignEl The @link AlignedReadStoreElement @endlink to work on.
81 * @param[out] begClr Begin of the clear range.
82 * @param[out] endClr End of the clear range.
83 */
84
85
86 template <typename TSpec, typename TConfig, typename TPos, typename TGapAnchor, typename TSpecAlign, typename TBeginClr, typename TEndClr>
87 inline void
getClrRange(FragmentStore<TSpec,TConfig> const & fragStore,AlignedReadStoreElement<TPos,TGapAnchor,TSpecAlign> const & alignEl,TBeginClr & begClr,TEndClr & endClr)88 getClrRange(FragmentStore<TSpec, TConfig> const& fragStore,
89 AlignedReadStoreElement<TPos, TGapAnchor, TSpecAlign> const& alignEl,
90 TBeginClr& begClr, // Out-parameter: left / begin position of the clear range
91 TEndClr& endClr) // Out-parameter: right / end position of the clear range
92 {
93 typedef FragmentStore<TSpec, TConfig> TFragmentStore;
94 typedef typename Size<TFragmentStore>::Type TSize;
95 typedef typename Iterator<String<TGapAnchor> const, Standard>::Type TGapIter;
96
97 TSize lenRead = length(fragStore.readSeqStore[alignEl.readId]);
98 TGapIter itGap = begin(alignEl.gaps, Standard());
99 TGapIter itGapEnd = end(alignEl.gaps, Standard());
100
101 // Any gaps or clipped characters?
102 if (itGap == itGapEnd) {
103 begClr = 0;
104 endClr = lenRead;
105 } else {
106 // Begin clear range
107 begClr = (itGap->gapPos == 0) ? itGap->seqPos : 0;
108 // End clear range
109 --itGapEnd;
110 if (static_cast<TSize>(itGapEnd->seqPos) != lenRead) endClr = lenRead;
111 else {
112 int diff = (itGap != itGapEnd) ? (*(itGapEnd - 1)).gapPos - (*(itGapEnd-1)).seqPos : 0;
113 int newDiff = itGapEnd->gapPos - itGapEnd->seqPos;
114 endClr = (newDiff < diff) ? lenRead - (diff - newDiff) : lenRead;
115 }
116 }
117
118 // For reverse reads adapt clear ranges
119 if (alignEl.beginPos > alignEl.endPos) {
120 TBeginClr tmp = begClr;
121 begClr = lenRead - endClr;
122 endClr = lenRead - tmp;
123 }
124 }
125
126
127
128 //////////////////////////////////////////////////////////////////////////////
129 // Read / Write of AMOS message files (*.afg)
130 //////////////////////////////////////////////////////////////////////////////
131
132
133 //////////////////////////////////////////////////////////////////////////////
134
135 /*!
136 * @fn FragmentStore#read
137 * @brief Read the contents of a FragmentStore from a file.
138 *
139 * @signature int read(file, store, tag);
140 *
141 * @param[in,out] file The @link StreamConcept @endlink to read from.
142 * @param[in,out] store The FragmentStore to append to.
143 * @param[in] tag The format to read from. Can be Amos or Sam.
144 *
145 * @return int 0 in the case of success, non-0 value in case of errors.
146 */
147
148 template <typename TKey, typename TValue, typename TIter>
readAmosKeyValue(TKey & key,TValue & value,TIter & reader)149 void readAmosKeyValue(TKey &key, TValue &value, TIter &reader)
150 {
151 clear(key);
152 clear(value);
153
154 // read "KEY:VALUE" pair
155 readUntil(key, reader, EqualsChar<':'>());
156 skipOne(reader, EqualsChar<':'>());
157 readUntil(value, reader, IsWhitespace());
158 }
159
160 template<typename TFile, typename TSpec, typename TConfig>
161 inline int
read(FragmentStore<TSpec,TConfig> & fragStore,TFile & file,Amos)162 read(FragmentStore<TSpec, TConfig>& fragStore,
163 TFile & file,
164 Amos)
165 {
166 // Basic types
167 typedef FragmentStore<TSpec, TConfig> TFragmentStore;
168 typedef typename Id<TFragmentStore>::Type TId;
169 typedef typename Size<TFragmentStore>::Type TSize;
170 //typedef typename Value<TFile>::Type TValue;
171
172 // All fragment store element types
173 typedef typename Value<typename TFragmentStore::TContigStore>::Type TContigElement;
174 typedef typename Value<typename TFragmentStore::TLibraryStore>::Type TLibraryStoreElement;
175 typedef typename Value<typename TFragmentStore::TMatePairStore>::Type TMatePairElement;
176 typedef typename Value<typename TFragmentStore::TReadStore>::Type TReadStoreElement;
177 typedef typename Value<typename TFragmentStore::TAlignedReadStore>::Type TAlignedElement;
178 typedef typename TFragmentStore::TReadSeq TReadSeq;
179 typedef typename TFragmentStore::TContigSeq TContigSeq;
180
181 // All maps to mirror file ids to our ids
182 typedef std::map<TId, TSize> TIdMap;
183 // The following maps the library/fragment/read id from AMOS into the fragment store's ids.
184 TIdMap libIdMap;
185 TIdMap frgIdMap;
186 TIdMap readIdMap;
187 // For all paired reads (inferred from FRG), a mapping from the AMOS read id to the paired match id of the one
188 // alignment read from the AMOS file. Note that this has the assumption that there is only one alignment per read
189 // in the AMOS file.
190 TIdMap readToPairMatchId;
191 // The id of the next pair match.
192 unsigned nextPairMatchId = 0;
193
194 typename DirectionIterator<TFile, Input>::Type reader(file);
195
196 // Parse the file and convert the internal ids
197 if (atEnd(reader))
198 return 1;
199
200 CharString blockIdentifier, fieldIdentifier, eid, buffer;
201 TReadSeq readSeq;
202 CharString contigSeq;
203 CharString qual;
204 TId _id;
205
206 while (!atEnd(reader))
207 {
208 // New block?
209 if (value(reader) == '{')
210 {
211 skipOne(reader, EqualsChar<'{'>());
212
213 clear(blockIdentifier);
214 readUntil(blockIdentifier, reader, NotFunctor<IsAlpha>());
215 skipLine(reader);
216
217 // reset id and eid
218 _id = TReadStoreElement::INVALID_ID;
219 clear(eid);
220
221 // Library block
222 if (blockIdentifier == "LIB")
223 {
224 // The LIB block contains the mean template length/insert size and standard deviation thereof. Besides
225 // the numeric id and (iid) a (external) name of the library (eid).
226 TLibraryStoreElement libEl;
227 while (!atEnd(reader) && value(reader) != '}')
228 {
229 // read "KEY:VALUE" pair
230 readAmosKeyValue(fieldIdentifier, buffer, reader);
231
232 if (fieldIdentifier == "iid")
233 _id = lexicalCast<TId>(buffer);
234 else if (fieldIdentifier == "eid")
235 eid = buffer;
236 else if (fieldIdentifier == "mea")
237 libEl.mean = lexicalCast<double>(buffer);
238 else if (fieldIdentifier == "std")
239 libEl.std = lexicalCast<double>(buffer);
240
241 // Always skip over the remainder of the line.
242 skipUntil(reader, NotFunctor<IsWhitespace>());
243 }
244 skipOne(reader, EqualsChar<'}'>());
245
246 // Insert library information into the fragmentstore.
247 libIdMap.insert(std::make_pair(_id, length(fragStore.libraryStore)));
248 appendValue(fragStore.libraryStore, libEl, Generous());
249 appendValue(fragStore.libraryNameStore, eid, Generous());
250 }
251 // Fragment block
252 else if (blockIdentifier == "FRG")
253 {
254 // The FRG block contains the parent library (lib), links to the paired sequencing reads (rds), internal
255 // id (iid) and external id (eid).
256 bool foundRds = false;
257 TMatePairElement matePairEl;
258 while (!atEnd(reader) && value(reader) != '}')
259 {
260 // read "KEY:VALUE" pair
261 readAmosKeyValue(fieldIdentifier, buffer, reader);
262
263 if (fieldIdentifier == "iid")
264 _id = lexicalCast<TId>(buffer);
265 else if (fieldIdentifier == "eid")
266 eid = buffer;
267 else if (fieldIdentifier == "lib")
268 matePairEl.libId = lexicalCast<TId>(buffer);
269 else if (fieldIdentifier == "rds")
270 {
271 foundRds = true;
272 size_t comma = findFirst(buffer, EqualsChar<','>());
273 matePairEl.readId[0] = lexicalCast<TId>(prefix(buffer, comma));
274 matePairEl.readId[1] = lexicalCast<TId>(suffix(buffer, comma + 1));
275
276 // Store mapping to pair match id.
277 readToPairMatchId[matePairEl.readId[0]] = nextPairMatchId;
278 readToPairMatchId[matePairEl.readId[1]] = nextPairMatchId;
279 nextPairMatchId++;
280 }
281 // Always skip over the remainder of the line.
282 skipUntil(reader, NotFunctor<IsWhitespace>());
283 }
284 skipOne(reader, EqualsChar<'}'>());
285
286 // Only insert valid mate pairs
287 if (foundRds)
288 {
289 frgIdMap.insert(std::make_pair(_id, length(fragStore.matePairStore)));
290 appendValue(fragStore.matePairStore, matePairEl, Generous());
291 appendValue(fragStore.matePairNameStore, eid, Generous());
292 }
293 }
294 // Read block
295 else if (blockIdentifier == "RED")
296 {
297 // If matePairId is not updated, this yields to a singleton read below.
298 clear(readSeq);
299 clear(qual);
300 TId matePairId = TReadStoreElement::INVALID_ID;
301 while (!atEnd(reader) && value(reader) != '}')
302 {
303 // read "KEY:VALUE" pair
304 readAmosKeyValue(fieldIdentifier, buffer, reader);
305
306 if (fieldIdentifier == "iid")
307 _id = lexicalCast<TId>(buffer);
308 else if (fieldIdentifier == "eid")
309 eid = buffer;
310 else if (fieldIdentifier == "frg")
311 matePairId = lexicalCast<TId>(buffer);
312 else if (fieldIdentifier == "seq")
313 {
314 readUntil(readSeq, reader, EqualsChar<'.'>(), IsWhitespace());
315 skipOne(reader, EqualsChar<'.'>());
316 }
317 else if (fieldIdentifier == "qlt")
318 {
319 // TODO(holtgrew): Problematic if . part of qualities, need to count chars.
320 readUntil(qual, reader, EqualsChar<'.'>(), IsWhitespace());
321 skipOne(reader, EqualsChar<'.'>());
322 }
323 // Always skip over the remainder of the line.
324 skipUntil(reader, NotFunctor<IsWhitespace>());
325 }
326 // Set quality
327 assignQualities(readSeq, qual);
328 skipOne(reader, EqualsChar<'}'>());
329
330 // Insert the read
331 readIdMap.insert(std::make_pair(_id, length(fragStore.readStore)));
332 appendRead(fragStore, readSeq, matePairId);
333 appendValue(fragStore.readNameStore, eid, Generous());
334 }
335 // Contig block
336 else if (blockIdentifier == "CTG")
337 {
338 clear(contigSeq);
339 clear(qual);
340 TContigElement contigEl;
341 TSize fromAligned = length(fragStore.alignedReadStore);
342 while (!atEnd(reader) && value(reader) != '}')
343 {
344 // Are we entering a TLE block
345 if (value(reader) == '{')
346 {
347 // skip "{TLE\n"
348 skipOne(reader, EqualsChar<'{'>());
349 skipOne(reader, EqualsChar<'T'>());
350 skipOne(reader, EqualsChar<'L'>());
351 skipOne(reader, EqualsChar<'E'>());
352 skipUntil(reader, NotFunctor<IsWhitespace>());
353
354 TAlignedElement alignEl;
355 typedef typename TFragmentStore::TContigPos TContigPos;
356 TContigPos offsetPos = 0;
357 TContigPos clr1 = 0;
358 TContigPos clr2 = 0;
359 String<TContigPos> gaps;
360 while (!atEnd(reader) && value(reader) != '}')
361 {
362 // read "KEY:VALUE" pair
363 readAmosKeyValue(fieldIdentifier, buffer, reader);
364
365 if (fieldIdentifier == "src")
366 alignEl.readId = lexicalCast<TId>(buffer);
367 else if (fieldIdentifier == "off")
368 {
369 if (buffer != "-")
370 offsetPos = lexicalCast<TContigPos>(buffer);
371 else
372 offsetPos = 0;
373 }
374 else if (fieldIdentifier == "clr")
375 {
376 size_t comma = findFirst(buffer, EqualsChar<','>());
377 clr1 = lexicalCast<TContigPos>(prefix(buffer, comma));
378 clr2 = lexicalCast<TContigPos>(suffix(buffer, comma + 1));
379 }
380 else if (fieldIdentifier == "gap")
381 {
382 skipUntil(reader, NotFunctor<IsWhitespace>());
383 while (!atEnd(reader) && value(reader) != '.')
384 {
385 clear(buffer);
386 readUntil(buffer, reader, IsWhitespace());
387 appendValue(gaps, lexicalCast<TContigPos>(buffer));
388 skipUntil(reader, NotFunctor<IsWhitespace>());
389 }
390 skipOne(reader, EqualsChar<'.'>());
391 }
392 // Always skip over the remainder of the line.
393 skipUntil(reader, NotFunctor<IsWhitespace>());
394 }
395 skipOne(reader, EqualsChar<'}'>());
396 skipUntil(reader, NotFunctor<IsWhitespace>());
397
398 // Get the length of the read
399 TId readId = (readIdMap.find(alignEl.readId))->second;
400 TSize lenRead = length(value(fragStore.readSeqStore, readId));
401
402 // Create the gap anchors
403 typedef typename TFragmentStore::TContigGapAnchor TContigGapAnchor;
404 int offset = 0;
405 if ((clr1 < clr2) && (clr1>0)) offset = clr1;
406 else if ((clr1 > clr2) && (clr1 < static_cast<TContigPos>(lenRead))) offset = lenRead - clr1;
407 int diff = -1 * (int) (offset);
408 // Clipped begin
409 if (offset != 0) appendValue(alignEl.gaps, TContigGapAnchor(offset, 0), Generous());
410 // Internal gaps
411 typedef typename Iterator<String<TContigPos>, Standard>::Type TPosIter;
412 TPosIter posIt = begin(gaps, Standard());
413 TPosIter posItEnd = end(gaps, Standard());
414 TContigPos lastGap = 0;
415 TSize gapLen = 0;
416 TSize totalGapLen = 0;
417 for(;posIt!=posItEnd; goNext(posIt)) {
418 if (gapLen == 0) {
419 ++gapLen; ++totalGapLen;
420 ++diff;
421 lastGap = value(posIt);
422 }
423 else if (lastGap == value(posIt)) {
424 ++gapLen; ++totalGapLen;
425 ++diff;
426 }
427 else {
428 appendValue(alignEl.gaps, TContigGapAnchor(offset + lastGap, offset + lastGap + diff), Generous());
429 gapLen = 1; ++totalGapLen;
430 lastGap = value(posIt);
431 ++diff;
432 }
433 }
434 if (gapLen > 0) appendValue(alignEl.gaps, TContigGapAnchor(offset + lastGap, offset + lastGap + diff), Generous());
435 // Clipped end
436 if ((clr1 < clr2) && (clr2 < static_cast<TContigPos>(lenRead))) {
437 diff -= (lenRead - clr2);
438 appendValue(alignEl.gaps, TContigGapAnchor(lenRead, lenRead + diff), Generous());
439 } else if ((clr1 > clr2) && (clr2 > 0)) {
440 diff -= clr2;
441 appendValue(alignEl.gaps, TContigGapAnchor(lenRead, lenRead + diff), Generous());
442 }
443
444 // Set begin and end position
445 if (clr1 < clr2) {
446 alignEl.beginPos = offsetPos;
447 alignEl.endPos = offsetPos + totalGapLen + (clr2 - clr1);
448 } else {
449 alignEl.beginPos = offsetPos + totalGapLen + (clr1 - clr2);
450 alignEl.endPos = offsetPos;
451 }
452
453 // Append new align fragment, note: contigId must still be set
454 alignEl.id = length(fragStore.alignedReadStore);
455 appendValue(fragStore.alignedReadStore, alignEl, Generous());
456 }
457 else
458 {
459 // read "KEY:VALUE" pair
460 readAmosKeyValue(fieldIdentifier, buffer, reader);
461
462 if (fieldIdentifier == "iid")
463 _id = lexicalCast<TId>(buffer);
464 else if (fieldIdentifier == "eid")
465 eid = buffer;
466 else if (fieldIdentifier == "seq")
467 {
468 readUntil(contigSeq, reader, EqualsChar<'.'>(), IsWhitespace());
469 skipOne(reader, EqualsChar<'.'>());
470 }
471 else if (fieldIdentifier == "qlt")
472 {
473 // TODO(holtgrew): Problematic if . part of qualities, need to count chars.
474 readUntil(qual, reader, EqualsChar<'.'>(), IsWhitespace());
475 skipOne(reader, EqualsChar<'.'>());
476 }
477 // Always skip over the remainder of the line.
478 skipUntil(reader, NotFunctor<IsWhitespace>());
479 }
480 }
481 // Set quality
482 skipOne(reader, EqualsChar<'}'>());
483
484 // Create the gap anchors
485 char gapChar = gapValue<char>();
486 typedef typename Iterator<CharString, Standard>::Type TStringIter;
487 TStringIter seqIt = begin(contigSeq, Standard());
488 TStringIter seqItEnd = end(contigSeq, Standard());
489 TStringIter qualIt = begin(qual, Standard());
490 typedef typename TFragmentStore::TReadPos TPos;
491 typedef typename TFragmentStore::TContigGapAnchor TContigGapAnchor;
492 TPos ungappedPos = 0;
493 TPos gappedPos = 0;
494 bool gapOpen = false;
495 for (; seqIt != seqItEnd; goNext(seqIt), goNext(qualIt), ++gappedPos)
496 {
497 if (value(seqIt) == gapChar)
498 {
499 gapOpen = true;
500 }
501 else
502 {
503 if (gapOpen)
504 {
505 appendValue(contigEl.gaps, TContigGapAnchor(ungappedPos, gappedPos), Generous());
506 gapOpen = false;
507 }
508 typename Value<TContigSeq>::Type letter = getValue(seqIt);
509 assignQualityValue(letter, getValue(qualIt));
510 appendValue(contigEl.seq, letter, Generous());
511 ++ungappedPos;
512 }
513 }
514 if (gapOpen)
515 appendValue(contigEl.gaps, TContigGapAnchor(ungappedPos, gappedPos), Generous());
516
517 // Set the contigId in all aligned reads
518 TSize toAligned = length(fragStore.alignedReadStore);
519 TId newContigId = length(fragStore.contigStore);
520 for (; fromAligned < toAligned; ++fromAligned)
521 fragStore.alignedReadStore[fromAligned].contigId = newContigId;
522
523 // Insert the contig
524 appendValue(fragStore.contigStore, contigEl, Generous());
525 appendValue(fragStore.contigNameStore, eid, Generous());
526 }
527 // Unknown block identifier
528 else
529 {
530 // skip until closing bracket
531 while (!atEnd(reader) && value(reader) != '}')
532 skipLine(reader);
533 }
534 }
535 else
536 {
537 // not a block - skip line
538 skipLine(reader);
539 }
540 }
541
542 // Renumber all ids
543 typedef typename TIdMap::const_iterator TIdMapIter;
544 typedef typename Iterator<typename TFragmentStore::TMatePairStore>::Type TMateIter;
545 TMateIter mateIt = begin(fragStore.matePairStore);
546 TMateIter mateItEnd = end(fragStore.matePairStore);
547 for(;mateIt != mateItEnd; goNext(mateIt)) {
548 if (mateIt->libId != TMatePairElement::INVALID_ID) {
549 TIdMapIter libIdPos = libIdMap.find(mateIt->libId);
550 if (libIdPos != libIdMap.end())
551 mateIt->libId = libIdPos->second;
552 else
553 mateIt->libId = TMatePairElement::INVALID_ID;
554 }
555 if (mateIt->readId[0] != TMatePairElement::INVALID_ID) {
556 TIdMapIter readIdPos = readIdMap.find(mateIt->readId[0]);
557 if (readIdPos != readIdMap.end())
558 mateIt->readId[0] = readIdPos->second;
559 else
560 mateIt->readId[0] = TMatePairElement::INVALID_ID;
561 }
562 if (mateIt->readId[1]!= TMatePairElement::INVALID_ID) {
563 TIdMapIter readIdPos = readIdMap.find(mateIt->readId[1]);
564 if (readIdPos != readIdMap.end())
565 mateIt->readId[1] = readIdPos->second;
566 else
567 mateIt->readId[0] = TMatePairElement::INVALID_ID;
568 }
569 }
570
571 // Copy data from frgIdMap into the matePairId members of the readStore.
572 typedef typename Iterator<typename TFragmentStore::TReadStore>::Type TReadIter;
573 TReadIter readIt = begin(fragStore.readStore);
574 TReadIter readItEnd = end(fragStore.readStore);
575 for (;readIt != readItEnd; goNext(readIt))
576 {
577 if (readIt->matePairId != TReadStoreElement::INVALID_ID)
578 {
579 TIdMapIter mateIdPos = frgIdMap.find(readIt->matePairId);
580 if (mateIdPos != frgIdMap.end())
581 readIt->matePairId = mateIdPos->second;
582 else
583 readIt->matePairId = TReadStoreElement::INVALID_ID;
584 }
585 }
586
587 // Copy data from readIdMap into the pairMatchId entries of the alignedReadStore.
588 typedef typename Iterator<typename TFragmentStore::TAlignedReadStore>::Type TAlignIter;
589 TAlignIter alignIt = begin(fragStore.alignedReadStore);
590 TAlignIter alignItEnd = end(fragStore.alignedReadStore);
591 for (;alignIt != alignItEnd; goNext(alignIt))
592 {
593 if (alignIt->readId != TAlignedElement::INVALID_ID)
594 {
595 TIdMapIter readIdPos = readIdMap.find(alignIt->readId);
596 if (readIdPos != readIdMap.end())
597 {
598 //SEQAN_ASSERT(readToPairMatchId.find(alignIt->readId) != readToPairMatchId.end());
599 if (readToPairMatchId.find(alignIt->readId) != readToPairMatchId.end())
600 alignIt->pairMatchId = readToPairMatchId[alignIt->readId];
601 alignIt->readId = readIdPos->second;
602 }
603 else
604 {
605 alignIt->readId = TAlignedElement::INVALID_ID;
606 }
607 }
608 }
609
610 return 0;
611 }
612
613
614 //////////////////////////////////////////////////////////////////////////////
615
616 /*!
617 * @fn FragmentStore#write
618 * @brief Write the contents of a FragmentStore to a file.
619 *
620 * @signature int write(file, store, tag);
621 *
622 * @param[in,out] file The @link StreamConcept @endlink to write to.
623 * @param[in] store The FragmentStore to write to the file.
624 * @param[in] tag The format to write out. Types: Sam or Amos.
625 *
626 * @return int 0 in case of success, 1 in case of errors.
627 */
628
629 template <typename TTarget, typename TSpec, typename TConfig>
630 inline void
write(TTarget & target,FragmentStore<TSpec,TConfig> & fragStore,Amos)631 write(TTarget & target,
632 FragmentStore<TSpec, TConfig> & fragStore,
633 Amos)
634 {
635 // Basic types
636 typedef FragmentStore<TSpec, TConfig> TFragmentStore;
637 //typedef typename Id<TFragmentStore>::Type TId;
638 typedef typename Size<TFragmentStore>::Type TSize;
639 //typedef typename Value<TFile>::Type TValue;
640
641 // All fragment store element types
642 //typedef typename Value<typename TFragmentStore::TContigStore>::Type TContigElement;
643 //typedef typename Value<typename TFragmentStore::TLibraryStore>::Type TLibraryStoreElement;
644 typedef typename Value<typename TFragmentStore::TMatePairStore>::Type TMatePairElement;
645 typedef typename Value<typename TFragmentStore::TReadStore>::Type TReadStoreElement;
646 //typedef typename Value<typename TFragmentStore::TAlignedReadStore>::Type TAlignedElement;
647
648 typename DirectionIterator<TTarget, Output>::Type iter = directionIterator(target, Output());
649
650 // Write Header
651 write(iter, "{UNV\niid:1\neid:seqan\ncom:\nafg file created with SeqAn\n.\n}\n");
652
653 // Write Libraries
654 typedef typename Iterator<typename TFragmentStore::TLibraryStore, Standard>::Type TLibIter;
655 TLibIter libIt = begin(fragStore.libraryStore, Standard() );
656 TLibIter libItEnd = end(fragStore.libraryStore, Standard() );
657 for(TSize idCount = 0;libIt != libItEnd; goNext(libIt), ++idCount)
658 {
659 write(iter, "{LIB\niid:");
660 appendNumber(iter, idCount + 1);
661 writeValue(iter, '\n');
662 if (!empty(fragStore.libraryNameStore) && !empty(fragStore.libraryNameStore[idCount]))
663 {
664 write(iter, "eid:");
665 write(iter, fragStore.libraryNameStore[idCount]);
666 writeValue(iter, '\n');
667 }
668 write(iter, "{DST\nmea:");
669 appendNumber(iter, libIt->mean);
670 writeValue(iter, '\n');
671 write(iter, "std:");
672 appendNumber(iter, libIt->std);
673 writeValue(iter, '\n');
674 write(iter, "}\n}\n");
675 }
676
677 // Write Fragments / mate pairs
678 typedef typename Iterator<typename TFragmentStore::TMatePairStore, Standard>::Type TMateIter;
679 TMateIter mateIt = begin(fragStore.matePairStore, Standard() );
680 TMateIter mateItEnd = end(fragStore.matePairStore, Standard() );
681 for(TSize idCount = 0;mateIt != mateItEnd; goNext(mateIt), ++idCount)
682 {
683 write(iter, "{FRG\niid:");
684 appendNumber(iter, idCount + 1);
685 writeValue(iter, '\n');
686 if (!empty(fragStore.matePairNameStore) && !empty(fragStore.matePairNameStore[idCount]))
687 {
688 write(iter, "eid:");
689 write(iter, fragStore.matePairNameStore[idCount]);
690 writeValue(iter, '\n');
691 }
692 write(iter, "lib:");
693 appendNumber(iter, mateIt->libId + 1);
694 writeValue(iter, '\n');
695 if (mateIt->readId[0] != TMatePairElement::INVALID_ID && mateIt->readId[1] != TMatePairElement::INVALID_ID)
696 {
697 write(iter, "rds:");
698 appendNumber(iter, mateIt->readId[0] + 1);
699 writeValue(iter, ',');
700 appendNumber(iter, mateIt->readId[1] + 1);
701 writeValue(iter, '\n');
702 }
703 write(iter, "}\n");
704 }
705
706 // Get clear ranges
707 typedef Pair<typename TFragmentStore::TReadPos, typename TFragmentStore::TReadPos> TClrRange;
708 String<TClrRange> clrRange;
709 resize(clrRange, length(fragStore.readStore), TClrRange(0,0));
710 typedef typename Iterator<typename TFragmentStore::TAlignedReadStore, Standard>::Type TAlignIter;
711 TAlignIter alignIt = begin(fragStore.alignedReadStore, Standard() );
712 TAlignIter alignItEnd = end(fragStore.alignedReadStore, Standard() );
713 for(;alignIt != alignItEnd; goNext(alignIt))
714 {
715 typename TFragmentStore::TReadPos begClr = 0;
716 typename TFragmentStore::TReadPos endClr = 0;
717 getClrRange(fragStore, value(alignIt), begClr, endClr);
718 clrRange[alignIt->readId] = TClrRange(begClr, endClr);
719 }
720
721 // Write reads
722 typedef typename Iterator<typename TFragmentStore::TReadStore, Standard>::Type TReadIter;
723 TReadIter readIt = begin(fragStore.readStore, Standard() );
724 TReadIter readItEnd = end(fragStore.readStore, Standard() );
725 for(TSize idCount = 0;readIt != readItEnd; ++readIt, ++idCount)
726 {
727 // Skip reads without a name.
728 if (length(value(fragStore.readNameStore, idCount)) == 0u)
729 continue;
730 write(iter, "{RED\niid:");
731 appendNumber(iter, idCount + 1);
732 writeValue(iter, '\n');
733 if (!empty(fragStore.readNameStore) && !empty(fragStore.readNameStore[idCount]))
734 {
735 write(iter, "eid:");
736 write(iter, fragStore.readNameStore[idCount]);
737 writeValue(iter, '\n');
738 }
739 write(iter, "seq:\n");
740 writeWrappedString(iter, fragStore.readSeqStore[idCount], 60);
741 write(iter, ".\nqlt:\n");
742
743 typedef typename Value<typename TFragmentStore::TReadSeqStore>::Type TReadSeq;
744 typedef QualityExtractor<typename Value<TReadSeq>::Type> TQualityExtractor;
745 ModifiedString<TReadSeq const, ModView<TQualityExtractor> > quals(fragStore.readSeqStore[idCount]);
746 writeWrappedString(iter, quals, 60);
747 write(iter, ".\n");
748 if (readIt->matePairId != TReadStoreElement::INVALID_ID)
749 {
750 write(iter, "frg:");
751 appendNumber(iter, readIt->matePairId + 1);
752 writeValue(iter, '\n');
753 }
754 if (clrRange[idCount].i1 != clrRange[idCount].i2)
755 {
756 write(iter, "clr:");
757 appendNumber(iter, clrRange[idCount].i1);
758 writeValue(iter, ',');
759 appendNumber(iter, clrRange[idCount].i2);
760 writeValue(iter, '\n');
761 }
762 write(iter, "}\n");
763 }
764
765 // Sort aligned reads according to contigId
766 sortAlignedReads(fragStore.alignedReadStore, SortContigId());
767
768 // Write Contigs
769 typedef typename Iterator<typename TFragmentStore::TContigStore, Standard>::Type TContigIter;
770 TContigIter contigIt = begin(fragStore.contigStore, Standard() );
771 TContigIter contigItEnd = end(fragStore.contigStore, Standard() );
772 alignIt = begin(fragStore.alignedReadStore);
773 alignItEnd = end(fragStore.alignedReadStore);
774 for(TSize idCount = 0;contigIt != contigItEnd; goNext(contigIt), ++idCount)
775 {
776 write(iter, "{CTG\niid:");
777 appendNumber(iter, idCount + 1);
778 writeValue(iter, '\n');
779 if (!empty(fragStore.contigNameStore) && !empty(fragStore.contigNameStore[idCount]))
780 {
781 write(iter, "eid:");
782 write(iter, fragStore.contigNameStore[idCount]);
783 writeValue(iter, '\n');
784 }
785 String<char> qlt;
786 write(iter, "seq:\n");
787 typedef typename Iterator<typename TFragmentStore::TContigSeq>::Type TContigIter;
788 TContigIter seqContigIt = begin(contigIt->seq);
789 TContigIter seqContigItEnd = end(contigIt->seq);
790 typedef typename Iterator<String<typename TFragmentStore::TContigGapAnchor> >::Type TGapsIter;
791 TGapsIter itGaps = begin(contigIt->gaps);
792 TGapsIter itGapsEnd = end(contigIt->gaps);
793 int diff = 0;
794 char gapChar = gapValue<char>();
795 typename TFragmentStore::TContigPos mySeqPos = 0;
796 TSize k = 0;
797 for(;itGaps != itGapsEnd; goNext(itGaps))
798 {
799 while (mySeqPos < itGaps->seqPos)
800 {
801
802 if ((k % 60 == 0) && (k != 0))
803 writeValue(iter, '\n');
804 ++k;
805 writeValue(iter, value(seqContigIt));
806 char c = ' ';
807 convertQuality(c, getQualityValue(value(seqContigIt)));
808 appendValue(qlt, c, Generous());
809 goNext(seqContigIt);++mySeqPos;
810 }
811 for(int i = 0; i < ((int) itGaps->gapPos - (int) itGaps->seqPos) - diff; ++i)
812 {
813 if ((k % 60 == 0) && (k != 0))
814 writeValue(iter, '\n');
815 ++k;
816 writeValue(iter, gapChar);
817 appendValue(qlt, '0', Generous());
818 }
819 diff = (itGaps->gapPos - itGaps->seqPos);
820 }
821 for(;seqContigIt != seqContigItEnd; goNext(seqContigIt))
822 {
823 if ((k % 60 == 0) && (k != 0))
824 writeValue(iter, '\n');
825 ++k;
826 writeValue(iter, value(seqContigIt));
827 char c = ' ';
828 convertQuality(c, getQualityValue(value(seqContigIt)));
829 appendValue(qlt, c, Generous());
830 }
831 write(iter, "\n.\nqlt:\n");
832 writeWrappedString(iter, qlt, 60);
833 write(iter, ".\n");
834
835 while (alignIt != alignItEnd && idCount < alignIt->contigId)
836 goNext(alignIt);
837
838 for (; alignIt != alignItEnd && idCount == alignIt->contigId; goNext(alignIt))
839 {
840 write(iter, "{TLE\nsrc:");
841 appendNumber(iter, alignIt->readId + 1);
842 writeValue(iter, '\n');
843 typedef typename Iterator<String<typename TFragmentStore::TReadGapAnchor> >::Type TReadGapsIter;
844 TReadGapsIter itGaps = begin(alignIt->gaps);
845 TReadGapsIter itGapsEnd = end(alignIt->gaps);
846
847 // Create the gaps string and the clear ranges
848 typename TFragmentStore::TReadPos lenRead = length(value(fragStore.readSeqStore, alignIt->readId));
849 TSize clr1 = 0;
850 TSize clr2 = lenRead;
851 // Create first clear range
852 if (itGaps != itGapsEnd && itGaps->gapPos == 0)
853 clr1 = itGaps->seqPos;
854 int diff = clr1;
855 String<unsigned int> gaps;
856 for (; itGaps != itGapsEnd; goNext(itGaps))
857 {
858 for (int i = 0; i + itGaps->seqPos < diff + itGaps->gapPos; ++i)
859 appendValue(gaps, itGaps->seqPos - clr1, Generous());
860 // Clipped sequence
861 if (diff + itGaps->gapPos < itGaps->seqPos)
862 clr2 = lenRead + diff + itGaps->gapPos - itGaps->seqPos;
863 diff = (int)itGaps->seqPos - (int)itGaps->gapPos;
864 }
865 if (alignIt->beginPos > alignIt->endPos)
866 {
867 clr1 = lenRead - clr1;
868 clr2 = lenRead - clr2;
869 }
870 write(iter, "off:");
871 appendNumber(iter, std::min(alignIt->beginPos, alignIt->endPos));
872 write(iter, "\nclr:");
873 appendNumber(iter, clr1);
874 writeValue(iter, ',');
875 appendNumber(iter, clr2);
876 writeValue(iter, '\n');
877 if (!empty(gaps))
878 {
879 write(iter, "gap:\n");
880 for (TSize z = 0; z < length(gaps); ++z)
881 {
882 appendNumber(iter, gaps[z]);
883 writeValue(iter, '\n');
884 }
885 write(iter, ".\n");
886 }
887 write(iter, "}\n");
888 }
889 write(iter, "}\n");
890 }
891 }
892
893 // ----------------------------------------------------------------------------
894 // Function writeRecords()
895 // ----------------------------------------------------------------------------
896
897
898 /*!
899 * @fn FragmentStore#writeRecords
900 * @brief Write all records to a file.
901 *
902 * @signature void writeRecords(bamFileOut, store);
903 * @signature void writeRecords(gffFileOut, store);
904 * @signature void writeRecords(ucscFileIn, store);
905 *
906 * @param[in,out] bamFileOut The @link BamFileOut @endlink object to write into.
907 * @param[in,out] gffFileOut The @link GffFileOut @endlink object to write into.
908 * @param[in,out] ucscFileOut The @link UcscFileOut @endlink object to write into.
909 * @param[in] store The @link FragmentStore @endlink object.
910 *
911 * @throw IOError On low-level I/O errors.
912 */
913
914 //////////////////////////////////////////////////////////////////////////////
915
916 /*!
917 * @fn FragmentStore#writeContigs
918 * @brief Write contigs from FragmentStore into a @link StreamConcept @endlink.
919 *
920 * @signature bool writeContigs(file, store, tag);
921 *
922 * @param[in,out] file The @link StreamConcept @endlink to write to.
923 * @param[in] store The FragmentStore to write contigs of.
924 * @param[in] tag A tag for the sequence format.
925 *
926 * @return bool true on success, false on errors.
927 */
928
929 template <typename TSpec, typename TFSSpec, typename TFSConfig>
writeContigs(FormattedFile<Fastq,Output,TSpec> & file,FragmentStore<TFSSpec,TFSConfig> & store)930 bool writeContigs(FormattedFile<Fastq, Output, TSpec> & file, FragmentStore<TFSSpec, TFSConfig> & store)
931 {
932 //IOREV _doc_
933 for (unsigned i = 0; i < length(store.contigNameStore); ++i)
934 writeRecord(file, store.contigNameStore[i], store.contigStore[i].seq);
935 return true;
936 }
937
938 // ----------------------------------------------------------------------------
939 // Function readRecords()
940 // ----------------------------------------------------------------------------
941
942 /*!
943 * @fn FragmentStore#readRecords
944 * @brief Read all records from a file.
945 *
946 * @signature void readRecords(store, bamFileIn[, importFlags]);
947 * @signature void readRecords(store, gffFileIn);
948 * @signature void readRecords(store, ucscFileIn);
949 *
950 * @param[in,out] store The @link FragmentStore @endlink object to store the records into.
951 * @param[in,out] bamFileIn The @link BamFileIn @endlink object to read from.
952 * @param[in,out] gffFileIn The @link GffFileIn @endlink object to read from.
953 * @param[in,out] ucscFileIn The @link UcscFileIn @endlink object to read from.
954 * @param[in] importFlags The import flags.
955 *
956 * @throw IOError On low-level I/O errors.
957 * @throw ParseError On high-level file format errors.
958 */
959
960 //////////////////////////////////////////////////////////////////////////////
961
962 /*!
963 * @fn FragmentStore#loadContigs
964 * @brief Load contigs into a FragmentStore.
965 *
966 * @signature bool loadContigs(store, fileName[, loadSeqs]);
967 * @signature bool loadContigs(store, fileNameList[, loadSeqs]);
968 *
969 * @param[in,out] store The FragmentStore to append the contigs to.
970 * @param[in] fileName A @link CharString @endlink with the name of the file to load.
971 * @param[in] fileNameList A @link StringSet @endlink of @link CharString @endlink with a list of file names to
972 * load.
973 * @param[in] loadSeqs A <tt>bool</tt> indicating whether to load lazily. If <tt>true</tt> then sequences are
974 * loaded immediately. If <tt>false</tt>, an emptycontig with a reference to the file is
975 * created. Its sequence can be loaded on demand by @link FragmentStore#lockContig
976 * @endlink and @link FragmentStore#loadContig @endlink.
977 *
978 * @return bool true in case of success and false in case of error.
979 */
980
981 template <typename TFSSpec, typename TFSConfig>
loadContigs(FragmentStore<TFSSpec,TFSConfig> & store,StringSet<CharString> const & fileNameList,bool loadSeqs)982 bool loadContigs(FragmentStore<TFSSpec, TFSConfig> &store, StringSet<CharString> const &fileNameList, bool loadSeqs)
983 {
984 //IOREV _nodoc_ although there is dddoc, there is no entry in html-doc
985 typedef FragmentStore<TFSSpec, TFSConfig> TFragmentStore;
986 typedef typename TFragmentStore::TContigStore TContigStore;
987 typedef typename TFragmentStore::TContigFileStore TContigFileStore;
988 typedef typename Value<TContigStore>::Type TContig;
989 typedef typename Value<TContigFileStore>::Type TContigFile;
990
991 SeqFileIn seqFile;
992 CharString meta, seq;
993
994 for (unsigned f = 0; f < length(fileNameList); ++f)
995 {
996 if (!open(seqFile, toCString(fileNameList[f])))
997 return false;
998
999 TContigFile contigFile;
1000 contigFile.fileName = fileNameList[f];
1001 contigFile.firstContigId = length(store.contigStore);
1002 appendValue(store.contigFileStore, contigFile, Generous());
1003
1004 while (!atEnd(seqFile))
1005 {
1006 resize(store.contigStore, length(store.contigStore) + 1, Generous());
1007
1008 TContig & contig = back(store.contigStore);
1009 contig.usage = 0;
1010 contig.fileId = length(store.contigFileStore) - 1;
1011 contig.fileBeginPos = position(seqFile);
1012
1013 if (loadSeqs)
1014 readRecord(meta, contig.seq, seqFile);
1015 else
1016 readRecord(meta, seq, seqFile);
1017
1018 contig.fileEndPos = position(seqFile);
1019
1020 cropAfterFirst(meta, IsWhitespace());
1021 appendValue(store.contigNameStore, meta, Generous());
1022 }
1023 close(seqFile);
1024 }
1025 return true;
1026 }
1027
1028 //////////////////////////////////////////////////////////////////////////////
1029
1030 template <typename TFSSpec, typename TFSConfig>
loadContigs(FragmentStore<TFSSpec,TFSConfig> & store,CharString const & fileName,bool loadSeqs)1031 bool loadContigs(FragmentStore<TFSSpec, TFSConfig> &store, CharString const &fileName, bool loadSeqs)
1032 {
1033 StringSet<CharString> fileNames;
1034 appendValue(fileNames, fileName);
1035 return loadContigs(store, fileNames, loadSeqs);
1036 }
1037
1038 //////////////////////////////////////////////////////////////////////////////
1039
1040 template <typename TFSSpec, typename TFSConfig, typename TFileNames>
loadContigs(FragmentStore<TFSSpec,TFSConfig> & store,TFileNames const & fileNames)1041 bool loadContigs(FragmentStore<TFSSpec, TFSConfig> &store, TFileNames const &fileNames)
1042 {
1043 return loadContigs(store, fileNames, true);
1044 }
1045
1046 //////////////////////////////////////////////////////////////////////////////
1047
1048 /*!
1049 * @fn FragmentStore#loadContig
1050 * @brief Manually load a contig sequence.
1051 *
1052 * @signature bool loadContig(store, contigId);
1053 *
1054 * @param[in,out] store The FragmentStore to load the contig for.
1055 * @param[in] contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1056 *
1057 * @return bool true on success, false on failure.
1058 */
1059
1060 template <typename TSpec, typename TConfig, typename TId>
loadContig(FragmentStore<TSpec,TConfig> & store,TId _id)1061 bool loadContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1062 {
1063 typedef FragmentStore<TSpec, TConfig> TFragmentStore;
1064 typedef typename TFragmentStore::TContigStore TContigStore;
1065 typedef typename TFragmentStore::TContigFileStore TContigFileStore;
1066 typedef typename Value<TContigStore>::Type TContig;
1067 typedef typename Value<TContigFileStore>::Type TContigFile;
1068
1069 if ((TId)length(store.contigStore) <= _id) return false;
1070 TContig &contig = store.contigStore[_id];
1071
1072 if (contig.fileId >= length(store.contigFileStore)) return false;
1073
1074 TContigFile &contigFile = store.contigFileStore[contig.fileId];
1075 CharString meta; // dummy (seq name is already in the name store)
1076
1077 SeqFileIn seqFile(toCString(contigFile.fileName));
1078 setPosition(seqFile, contig.fileBeginPos);
1079 readRecord(meta, contig.seq, seqFile); // read contig sequence
1080
1081 return true;
1082 }
1083
1084 //////////////////////////////////////////////////////////////////////////////
1085
1086 /*!
1087 * @fn FragmentStore#lockContig
1088 * @brief Locks a contig sequence from being removed.
1089 *
1090 * This function increases the contig usage counter by 1 and ensures that the contig sequence is loaded.
1091 *
1092 * @signature bool lockContig(store, contigId);
1093 *
1094 * @param[in,out] store The FragmentStore to lock the contig for.
1095 * @param[in] contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1096 *
1097 * @return bool true on success, false on failure.
1098 */
1099
1100 template <typename TSpec, typename TConfig, typename TId>
lockContig(FragmentStore<TSpec,TConfig> & store,TId _id)1101 bool lockContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1102 {
1103 typedef FragmentStore<TSpec, TConfig> TFragmentStore;
1104 typedef typename TFragmentStore::TContigStore TContigStore;
1105 typedef typename Value<TContigStore>::Type TContig;
1106
1107 if ((TId)length(store.contigStore) <= _id) return false;
1108 TContig &contig = store.contigStore[_id];
1109
1110 if (contig.usage++ > 0 || !empty(contig.seq)) return true;
1111 return loadContig(store, _id);
1112 }
1113
1114 /*!
1115 * @fn FragmentStore#unlockContig
1116 * @brief Removes a previous contig lock.
1117 *
1118 * This function decreases the contig usage counter by 1.
1119 *
1120 * @signature bool unlockContig(store, contigId);
1121 *
1122 * @param[in,out] store The FragmentStore to unlock the contig for.
1123 * @param[in] contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1124 *
1125 * @return bool true on success, false on failure.
1126 */
1127
1128 template <typename TSpec, typename TConfig, typename TId>
unlockContig(FragmentStore<TSpec,TConfig> & store,TId _id)1129 bool unlockContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1130 {
1131 if ((TId)length(store.contigStore) <= _id) return false;
1132 --store.contigStore[_id].usage;
1133 return true;
1134 }
1135
1136 /*!
1137 * @fn FragmentStore#unlockAndFreeContig
1138 * @brief Removes a previous contig lock and clears the sequence if no further lock exists.
1139 *
1140 * This function decreases the contig usage counter by 1 and frees the sequences' memory if the counter equals 0.
1141 *
1142 * @signature bool unlockContig(store, contigId);
1143 *
1144 * @param[in,out] store The FragmentStore to unlock the contig for.
1145 * @param[in] contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1146 *
1147 * @return bool true on success, false on failure.
1148 */
1149
1150 template <typename TSpec, typename TConfig, typename TId>
unlockAndFreeContig(FragmentStore<TSpec,TConfig> & store,TId _id)1151 bool unlockAndFreeContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1152 {
1153 typedef FragmentStore<TSpec, TConfig> TFragmentStore;
1154 typedef typename TFragmentStore::TContigStore TContigStore;
1155 typedef typename Value<TContigStore>::Type TContig;
1156
1157 if ((TId)length(store.contigStore) <= _id) return false;
1158 TContig &contig = store.contigStore[_id];
1159
1160 if (--contig.usage == 0 && contig.fileId < length(store.contigFileStore))
1161 {
1162 typename TContig::TContigSeq emptySeq;
1163 swap(contig.seq, emptySeq);
1164 return true;
1165 }
1166 return false;
1167 }
1168
1169 /*!
1170 * @fn FragmentStore#lockContigs
1171 * @brief Locks all contig sequences from being remove.
1172 *
1173 * @signature bool lockContigs(store);
1174 *
1175 * @param[in,out] store The FragmentStore to lock the contigs for.
1176 *
1177 * @return bool true in case of success, false in case of errors.
1178 */
1179
1180 template <typename TSpec, typename TConfig>
lockContigs(FragmentStore<TSpec,TConfig> & store)1181 bool lockContigs(FragmentStore<TSpec, TConfig> &store)
1182 {
1183 bool result = true;
1184 for (unsigned _id = 0; _id < length(store.contigStore); ++_id)
1185 result &= lockContig(store, _id);
1186 return result;
1187 }
1188
1189 /*!
1190 * @fn FragmentStore#unlockContigs
1191 * @brief Unlocks all contig sequences.
1192 *
1193 * @signature bool unlockContigs(store);
1194 *
1195 * @param[in,out] store The FragmentStore to unlock the contigs for.
1196 *
1197 * @return bool true in case of success, false in case of errors.
1198 */
1199
1200 template <typename TSpec, typename TConfig>
unlockContigs(FragmentStore<TSpec,TConfig> & store)1201 bool unlockContigs(FragmentStore<TSpec, TConfig> &store)
1202 {
1203 bool result = true;
1204 for (unsigned _id = 0; _id < length(store.contigStore); ++_id)
1205 result &= unlockContig(store, _id);
1206 return result;
1207 }
1208
1209 /*!
1210 * @fn FragmentStore#unlockAndFreeContigs
1211 * @brief Unlocks all contig sequences and clears sequences without lock.
1212 *
1213 * @signature bool unlockAndFreeContigs(store);
1214 *
1215 * @param[in,out] store The FragmentStore to unlock the contigs for.
1216 *
1217 * @return bool true in case of success, false in case of errors.
1218 */
1219
1220 template <typename TSpec, typename TConfig>
unlockAndFreeContigs(FragmentStore<TSpec,TConfig> & store)1221 bool unlockAndFreeContigs(FragmentStore<TSpec, TConfig> &store)
1222 {
1223 bool result = true;
1224 for (unsigned _id = 0; _id < length(store.contigStore); ++_id)
1225 result &= unlockAndFreeContig(store, _id);
1226 return result;
1227 }
1228
1229
1230 //////////////////////////////////////////////////////////////////////////////
1231
1232 /*!
1233 * @fn FragmentStore#loadReads
1234 * @brief Loads reads into FragmentStore
1235 *
1236 * When two file names are given thent he files are expected to containt he same number of reads and reads with the same
1237 * index are assumed to be mate pairs. Mate pairs are stored internally in an "interleaved mode": a read is read from
1238 * each file before reading the next one.
1239 *
1240 * @signature bool loadReads(store, fileName);
1241 * @signature bool loadReads(store, fileNameL, fileNameR);
1242 *
1243 * @param[in,out] store The FragmentStore to append the reads to.
1244 * @param[in] fileName Path to single-end read file.
1245 * @param[in] fileNameL Path to left read file in case of paired reads.
1246 * @param[in] fileNameR Path to right read file in case of paired reads.
1247 *
1248 * @return bool true in case of success, false in case of errors.
1249 */
1250
1251 template <typename TSpec, typename TConfig, typename TFileName>
loadReads(FragmentStore<TSpec,TConfig> & store,TFileName & fileName)1252 bool loadReads(FragmentStore<TSpec, TConfig> &store, TFileName &fileName)
1253 {
1254 SEQAN_ASSERT_EQ(length(store.readStore), length(store.readSeqStore));
1255 SEQAN_ASSERT_EQ(length(store.readStore), length(store.readNameStore));
1256
1257 typedef typename FragmentStore<TSpec, TConfig>::TReadStore TReadStore;
1258 typedef typename Value<TReadStore>::Type TReadStoreElement;
1259
1260 SeqFileIn seqFile;
1261 if (!open(seqFile, toCString(fileName)))
1262 return false;
1263
1264 readRecords(store.readNameStore, store.readSeqStore, seqFile);
1265 resize(store.readStore, length(store.readSeqStore), TReadStoreElement());
1266 return true;
1267 }
1268
1269
1270 template <typename TSpec, typename TConfig, typename TFileName>
loadReads(FragmentStore<TSpec,TConfig> & store,TFileName & fileNameL,TFileName & fileNameR)1271 bool loadReads(FragmentStore<TSpec, TConfig> & store, TFileName & fileNameL, TFileName & fileNameR)
1272 {
1273 SEQAN_ASSERT_EQ(length(store.readStore), length(store.readSeqStore));
1274 SEQAN_ASSERT_EQ(length(store.readStore), length(store.readNameStore));
1275
1276 typedef typename FragmentStore<TSpec, TConfig>::TReadSeq TReadSeq;
1277
1278 SeqFileIn seqFileL, seqFileR;
1279 if (!open(seqFileL, toCString(fileNameL)) || !open(seqFileR, toCString(fileNameR)))
1280 return false;
1281
1282 // Read in sequences
1283 TReadSeq seq[2];
1284 CharString meta[2];
1285
1286 while (!atEnd(seqFileL) && !atEnd(seqFileR))
1287 {
1288 readRecord(meta[0], seq[0], seqFileL);
1289 readRecord(meta[1], seq[1], seqFileR);
1290 appendMatePair(store, seq[0], seq[1], meta[0], meta[1]);
1291 }
1292 return true;
1293 }
1294
1295 } // namespace seqan
1296
1297 #endif //#ifndef SEQAN_HEADER_...
1298