1 // ==========================================================================
2 //                 SeqAn - The Library for Sequence Analysis
3 // ==========================================================================
4 // Copyright (c) 2006-2018, Knut Reinert, FU Berlin
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 //     * Redistributions of source code must retain the above copyright
11 //       notice, this list of conditions and the following disclaimer.
12 //     * Redistributions in binary form must reproduce the above copyright
13 //       notice, this list of conditions and the following disclaimer in the
14 //       documentation and/or other materials provided with the distribution.
15 //     * Neither the name of Knut Reinert or the FU Berlin nor the names of
16 //       its contributors may be used to endorse or promote products derived
17 //       from this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
23 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 // DAMAGE.
30 //
31 // ==========================================================================
32 // Author: David Weese <david.weese@fu-berlin.de>
33 // ==========================================================================
34 
35 #ifndef SEQAN_HEADER_STORE_IO_H
36 #define SEQAN_HEADER_STORE_IO_H
37 
38 /* IOREV
39  *
40  * _doc_
41  *
42  *
43  * if this file is about the amos file format why isn't it named accordingly?
44  *
45  * altogether it is unclear why sequence io is in file/ but store io is in
46  * store/
47  *
48  */
49 
50 
51 
52 namespace seqan
53 {
54 
55 //////////////////////////////////////////////////////////////////////////////
56 // File tags
57 //////////////////////////////////////////////////////////////////////////////
58 
59 //////////////////////////////////////////////////////////////////////////////
60 
61 struct TagAmos_;
62 typedef Tag<TagAmos_> const Amos;
63 
64 
65 //////////////////////////////////////////////////////////////////////////////
66 // Auxillary functions
67 //////////////////////////////////////////////////////////////////////////////
68 
69 //////////////////////////////////////////////////////////////////////////////
70 
71 /*!
72  * @fn FragmentStore#getClrRange
73  * @brief Get the "clear" range of a read alignment.
74  *
75  * The clear range of a read alignment is the range of the part of the alignmetn that is not clipped.
76  *
77  * @signature void getClrRange(store, alignEl, begClr, endClr);
78  *
79  * @param[in]  store    The FragmentStore to work on.
80  * @param[in]  alignEl  The @link AlignedReadStoreElement @endlink to work on.
81  * @param[out] begClr   Begin of the clear range.
82  * @param[out] endClr   End of the clear range.
83  */
84 
85 
86 template <typename TSpec, typename TConfig, typename TPos, typename TGapAnchor, typename TSpecAlign, typename TBeginClr, typename TEndClr>
87 inline void
getClrRange(FragmentStore<TSpec,TConfig> const & fragStore,AlignedReadStoreElement<TPos,TGapAnchor,TSpecAlign> const & alignEl,TBeginClr & begClr,TEndClr & endClr)88 getClrRange(FragmentStore<TSpec, TConfig> const& fragStore,
89             AlignedReadStoreElement<TPos, TGapAnchor, TSpecAlign> const& alignEl,
90             TBeginClr& begClr,        // Out-parameter: left / begin position of the clear range
91             TEndClr& endClr)        // Out-parameter: right / end position of the clear range
92 {
93     typedef FragmentStore<TSpec, TConfig> TFragmentStore;
94     typedef typename Size<TFragmentStore>::Type TSize;
95     typedef typename Iterator<String<TGapAnchor> const, Standard>::Type TGapIter;
96 
97     TSize lenRead = length(fragStore.readSeqStore[alignEl.readId]);
98     TGapIter itGap = begin(alignEl.gaps, Standard());
99     TGapIter itGapEnd = end(alignEl.gaps, Standard());
100 
101     // Any gaps or clipped characters?
102     if (itGap == itGapEnd) {
103         begClr = 0;
104         endClr = lenRead;
105     } else {
106         // Begin clear range
107         begClr = (itGap->gapPos == 0) ? itGap->seqPos : 0;
108         // End clear range
109         --itGapEnd;
110         if (static_cast<TSize>(itGapEnd->seqPos) != lenRead) endClr = lenRead;
111         else {
112             int diff = (itGap != itGapEnd) ? (*(itGapEnd - 1)).gapPos - (*(itGapEnd-1)).seqPos : 0;
113             int newDiff = itGapEnd->gapPos - itGapEnd->seqPos;
114             endClr = (newDiff < diff) ? lenRead - (diff - newDiff) : lenRead;
115         }
116     }
117 
118     // For reverse reads adapt clear ranges
119     if (alignEl.beginPos > alignEl.endPos) {
120         TBeginClr tmp = begClr;
121         begClr = lenRead - endClr;
122         endClr = lenRead - tmp;
123     }
124 }
125 
126 
127 
128 //////////////////////////////////////////////////////////////////////////////
129 // Read / Write of AMOS message files (*.afg)
130 //////////////////////////////////////////////////////////////////////////////
131 
132 
133 //////////////////////////////////////////////////////////////////////////////
134 
135 /*!
136  * @fn FragmentStore#read
137  * @brief Read the contents of a FragmentStore from a file.
138  *
139  * @signature int read(file, store, tag);
140  *
141  * @param[in,out] file  The @link StreamConcept @endlink to read from.
142  * @param[in,out] store The FragmentStore to append to.
143  * @param[in]     tag   The format to read from.  Can be Amos or Sam.
144  *
145  * @return int 0 in the case of success, non-0 value in case of errors.
146  */
147 
148 template <typename TKey, typename TValue, typename TIter>
readAmosKeyValue(TKey & key,TValue & value,TIter & reader)149 void readAmosKeyValue(TKey &key, TValue &value, TIter &reader)
150 {
151     clear(key);
152     clear(value);
153 
154     // read "KEY:VALUE" pair
155     readUntil(key, reader, EqualsChar<':'>());
156     skipOne(reader, EqualsChar<':'>());
157     readUntil(value, reader, IsWhitespace());
158 }
159 
160 template<typename TFile, typename TSpec, typename TConfig>
161 inline int
read(FragmentStore<TSpec,TConfig> & fragStore,TFile & file,Amos)162 read(FragmentStore<TSpec, TConfig>& fragStore,
163      TFile & file,
164      Amos)
165 {
166     // Basic types
167     typedef FragmentStore<TSpec, TConfig> TFragmentStore;
168     typedef typename Id<TFragmentStore>::Type TId;
169     typedef typename Size<TFragmentStore>::Type TSize;
170     //typedef typename Value<TFile>::Type TValue;
171 
172     // All fragment store element types
173     typedef typename Value<typename TFragmentStore::TContigStore>::Type TContigElement;
174     typedef typename Value<typename TFragmentStore::TLibraryStore>::Type TLibraryStoreElement;
175     typedef typename Value<typename TFragmentStore::TMatePairStore>::Type TMatePairElement;
176     typedef typename Value<typename TFragmentStore::TReadStore>::Type TReadStoreElement;
177     typedef typename Value<typename TFragmentStore::TAlignedReadStore>::Type TAlignedElement;
178     typedef typename TFragmentStore::TReadSeq TReadSeq;
179     typedef typename TFragmentStore::TContigSeq TContigSeq;
180 
181     // All maps to mirror file ids to our ids
182     typedef std::map<TId, TSize> TIdMap;
183     // The following maps the library/fragment/read id from AMOS into the fragment store's ids.
184     TIdMap libIdMap;
185     TIdMap frgIdMap;
186     TIdMap readIdMap;
187     // For all paired reads (inferred from FRG), a mapping from the AMOS read id to the paired match id of the one
188     // alignment read from the AMOS file.  Note that this has the assumption that there is only one alignment per read
189     // in the AMOS file.
190     TIdMap readToPairMatchId;
191     // The id of the next pair match.
192     unsigned nextPairMatchId = 0;
193 
194     typename DirectionIterator<TFile, Input>::Type reader(file);
195 
196     // Parse the file and convert the internal ids
197     if (atEnd(reader))
198         return 1;
199 
200     CharString  blockIdentifier, fieldIdentifier, eid, buffer;
201     TReadSeq    readSeq;
202     CharString  contigSeq;
203     CharString  qual;
204     TId _id;
205 
206     while (!atEnd(reader))
207     {
208         // New block?
209         if (value(reader) == '{')
210         {
211             skipOne(reader, EqualsChar<'{'>());
212 
213             clear(blockIdentifier);
214             readUntil(blockIdentifier, reader, NotFunctor<IsAlpha>());
215             skipLine(reader);
216 
217             // reset id and eid
218             _id = TReadStoreElement::INVALID_ID;
219             clear(eid);
220 
221             // Library block
222             if (blockIdentifier == "LIB")
223             {
224                 // The LIB block contains the mean template length/insert size and standard deviation thereof.  Besides
225                 // the numeric id and (iid) a (external) name of the library (eid).
226                 TLibraryStoreElement libEl;
227                 while (!atEnd(reader) && value(reader) != '}')
228                 {
229                     // read "KEY:VALUE" pair
230                     readAmosKeyValue(fieldIdentifier, buffer, reader);
231 
232                     if (fieldIdentifier == "iid")
233                         _id = lexicalCast<TId>(buffer);
234                     else if (fieldIdentifier == "eid")
235                         eid = buffer;
236                     else if (fieldIdentifier == "mea")
237                         libEl.mean = lexicalCast<double>(buffer);
238                     else if (fieldIdentifier == "std")
239                         libEl.std = lexicalCast<double>(buffer);
240 
241                     // Always skip over the remainder of the line.
242                     skipUntil(reader, NotFunctor<IsWhitespace>());
243                 }
244                 skipOne(reader, EqualsChar<'}'>());
245 
246                 // Insert library information into the fragmentstore.
247                 libIdMap.insert(std::make_pair(_id, length(fragStore.libraryStore)));
248                 appendValue(fragStore.libraryStore, libEl, Generous());
249                 appendValue(fragStore.libraryNameStore, eid, Generous());
250             }
251             // Fragment block
252             else if (blockIdentifier == "FRG")
253             {
254                 // The FRG block contains the parent library (lib), links to the paired sequencing reads (rds), internal
255                 // id (iid) and external id (eid).
256                 bool foundRds = false;
257                 TMatePairElement matePairEl;
258                 while (!atEnd(reader) && value(reader) != '}')
259                 {
260                     // read "KEY:VALUE" pair
261                     readAmosKeyValue(fieldIdentifier, buffer, reader);
262 
263                     if (fieldIdentifier == "iid")
264                         _id = lexicalCast<TId>(buffer);
265                     else if (fieldIdentifier == "eid")
266                         eid = buffer;
267                     else if (fieldIdentifier == "lib")
268                         matePairEl.libId = lexicalCast<TId>(buffer);
269                     else if (fieldIdentifier == "rds")
270                     {
271                         foundRds = true;
272                         size_t comma = findFirst(buffer, EqualsChar<','>());
273                         matePairEl.readId[0] = lexicalCast<TId>(prefix(buffer, comma));
274                         matePairEl.readId[1] = lexicalCast<TId>(suffix(buffer, comma + 1));
275 
276                         // Store mapping to pair match id.
277                         readToPairMatchId[matePairEl.readId[0]] = nextPairMatchId;
278                         readToPairMatchId[matePairEl.readId[1]] = nextPairMatchId;
279                         nextPairMatchId++;
280                     }
281                     // Always skip over the remainder of the line.
282                     skipUntil(reader, NotFunctor<IsWhitespace>());
283                 }
284                 skipOne(reader, EqualsChar<'}'>());
285 
286                 // Only insert valid mate pairs
287                 if (foundRds)
288                 {
289                     frgIdMap.insert(std::make_pair(_id, length(fragStore.matePairStore)));
290                     appendValue(fragStore.matePairStore, matePairEl, Generous());
291                     appendValue(fragStore.matePairNameStore, eid, Generous());
292                 }
293             }
294             // Read block
295             else if (blockIdentifier == "RED")
296             {
297                 // If matePairId is not updated, this yields to a singleton read below.
298                 clear(readSeq);
299                 clear(qual);
300                 TId matePairId = TReadStoreElement::INVALID_ID;
301                 while (!atEnd(reader) && value(reader) != '}')
302                 {
303                     // read "KEY:VALUE" pair
304                     readAmosKeyValue(fieldIdentifier, buffer, reader);
305 
306                     if (fieldIdentifier == "iid")
307                         _id = lexicalCast<TId>(buffer);
308                     else if (fieldIdentifier == "eid")
309                         eid = buffer;
310                     else if (fieldIdentifier == "frg")
311                         matePairId = lexicalCast<TId>(buffer);
312                     else if (fieldIdentifier == "seq")
313                     {
314                         readUntil(readSeq, reader, EqualsChar<'.'>(), IsWhitespace());
315                         skipOne(reader, EqualsChar<'.'>());
316                     }
317                     else if (fieldIdentifier == "qlt")
318                     {
319                         // TODO(holtgrew): Problematic if . part of qualities, need to count chars.
320                         readUntil(qual, reader, EqualsChar<'.'>(), IsWhitespace());
321                         skipOne(reader, EqualsChar<'.'>());
322                     }
323                     // Always skip over the remainder of the line.
324                     skipUntil(reader, NotFunctor<IsWhitespace>());
325                 }
326                 // Set quality
327                 assignQualities(readSeq, qual);
328                 skipOne(reader, EqualsChar<'}'>());
329 
330                 // Insert the read
331                 readIdMap.insert(std::make_pair(_id, length(fragStore.readStore)));
332                 appendRead(fragStore, readSeq, matePairId);
333                 appendValue(fragStore.readNameStore, eid, Generous());
334             }
335             // Contig block
336             else if (blockIdentifier == "CTG")
337             {
338                 clear(contigSeq);
339                 clear(qual);
340                 TContigElement contigEl;
341                 TSize fromAligned = length(fragStore.alignedReadStore);
342                 while (!atEnd(reader) && value(reader) != '}')
343                 {
344                     // Are we entering a TLE block
345                     if (value(reader) == '{')
346                     {
347                         // skip "{TLE\n"
348                         skipOne(reader, EqualsChar<'{'>());
349                         skipOne(reader, EqualsChar<'T'>());
350                         skipOne(reader, EqualsChar<'L'>());
351                         skipOne(reader, EqualsChar<'E'>());
352                         skipUntil(reader, NotFunctor<IsWhitespace>());
353 
354                         TAlignedElement alignEl;
355                         typedef typename TFragmentStore::TContigPos TContigPos;
356                         TContigPos offsetPos = 0;
357                         TContigPos clr1 = 0;
358                         TContigPos clr2 = 0;
359                         String<TContigPos> gaps;
360                         while (!atEnd(reader) && value(reader) != '}')
361                         {
362                             // read "KEY:VALUE" pair
363                             readAmosKeyValue(fieldIdentifier, buffer, reader);
364 
365                             if (fieldIdentifier == "src")
366                                 alignEl.readId = lexicalCast<TId>(buffer);
367                             else if (fieldIdentifier == "off")
368                             {
369                                 if (buffer != "-")
370                                     offsetPos = lexicalCast<TContigPos>(buffer);
371                                 else
372                                     offsetPos = 0;
373                             }
374                             else if (fieldIdentifier == "clr")
375                             {
376                                 size_t comma = findFirst(buffer, EqualsChar<','>());
377                                 clr1 = lexicalCast<TContigPos>(prefix(buffer, comma));
378                                 clr2 = lexicalCast<TContigPos>(suffix(buffer, comma + 1));
379                             }
380                             else if (fieldIdentifier == "gap")
381                             {
382                                 skipUntil(reader, NotFunctor<IsWhitespace>());
383                                 while (!atEnd(reader) && value(reader) != '.')
384                                 {
385                                     clear(buffer);
386                                     readUntil(buffer, reader, IsWhitespace());
387                                     appendValue(gaps, lexicalCast<TContigPos>(buffer));
388                                     skipUntil(reader, NotFunctor<IsWhitespace>());
389                                 }
390                                 skipOne(reader, EqualsChar<'.'>());
391                             }
392                             // Always skip over the remainder of the line.
393                             skipUntil(reader, NotFunctor<IsWhitespace>());
394                         }
395                         skipOne(reader, EqualsChar<'}'>());
396                         skipUntil(reader, NotFunctor<IsWhitespace>());
397 
398                         // Get the length of the read
399                         TId readId = (readIdMap.find(alignEl.readId))->second;
400                         TSize lenRead = length(value(fragStore.readSeqStore, readId));
401 
402                         // Create the gap anchors
403                         typedef typename TFragmentStore::TContigGapAnchor TContigGapAnchor;
404                         int offset = 0;
405                         if ((clr1 < clr2) && (clr1>0)) offset = clr1;
406                         else if ((clr1 > clr2) && (clr1 < static_cast<TContigPos>(lenRead))) offset = lenRead - clr1;
407                         int diff = -1 * (int) (offset);
408                         // Clipped begin
409                         if (offset != 0) appendValue(alignEl.gaps, TContigGapAnchor(offset, 0), Generous());
410                         // Internal gaps
411                         typedef typename Iterator<String<TContigPos>, Standard>::Type TPosIter;
412                         TPosIter posIt = begin(gaps, Standard());
413                         TPosIter posItEnd = end(gaps, Standard());
414                         TContigPos lastGap = 0;
415                         TSize gapLen = 0;
416                         TSize totalGapLen = 0;
417                         for(;posIt!=posItEnd; goNext(posIt)) {
418                             if (gapLen == 0) {
419                                 ++gapLen; ++totalGapLen;
420                                 ++diff;
421                                 lastGap = value(posIt);
422                             }
423                             else if (lastGap == value(posIt)) {
424                                 ++gapLen; ++totalGapLen;
425                                 ++diff;
426                             }
427                             else {
428                                 appendValue(alignEl.gaps, TContigGapAnchor(offset + lastGap, offset + lastGap + diff), Generous());
429                                 gapLen = 1; ++totalGapLen;
430                                 lastGap = value(posIt);
431                                 ++diff;
432                             }
433                         }
434                         if (gapLen > 0) appendValue(alignEl.gaps, TContigGapAnchor(offset + lastGap, offset + lastGap + diff), Generous());
435                         // Clipped end
436                         if ((clr1 < clr2) && (clr2 < static_cast<TContigPos>(lenRead))) {
437                             diff -= (lenRead - clr2);
438                             appendValue(alignEl.gaps, TContigGapAnchor(lenRead, lenRead + diff), Generous());
439                         } else if ((clr1 > clr2) && (clr2 > 0)) {
440                             diff -= clr2;
441                             appendValue(alignEl.gaps, TContigGapAnchor(lenRead, lenRead + diff), Generous());
442                         }
443 
444                         // Set begin and end position
445                         if (clr1 < clr2) {
446                             alignEl.beginPos = offsetPos;
447                             alignEl.endPos = offsetPos + totalGapLen + (clr2 - clr1);
448                         } else {
449                             alignEl.beginPos = offsetPos + totalGapLen + (clr1 - clr2);
450                             alignEl.endPos = offsetPos;
451                         }
452 
453                         // Append new align fragment, note: contigId must still be set
454                         alignEl.id = length(fragStore.alignedReadStore);
455                         appendValue(fragStore.alignedReadStore, alignEl, Generous());
456                     }
457                     else
458                     {
459                         // read "KEY:VALUE" pair
460                         readAmosKeyValue(fieldIdentifier, buffer, reader);
461 
462                         if (fieldIdentifier == "iid")
463                             _id = lexicalCast<TId>(buffer);
464                         else if (fieldIdentifier == "eid")
465                             eid = buffer;
466                         else if (fieldIdentifier == "seq")
467                         {
468                             readUntil(contigSeq, reader, EqualsChar<'.'>(), IsWhitespace());
469                             skipOne(reader, EqualsChar<'.'>());
470                         }
471                         else if (fieldIdentifier == "qlt")
472                         {
473                             // TODO(holtgrew): Problematic if . part of qualities, need to count chars.
474                             readUntil(qual, reader, EqualsChar<'.'>(), IsWhitespace());
475                             skipOne(reader, EqualsChar<'.'>());
476                         }
477                         // Always skip over the remainder of the line.
478                         skipUntil(reader, NotFunctor<IsWhitespace>());
479                     }
480                 }
481                 // Set quality
482                 skipOne(reader, EqualsChar<'}'>());
483 
484                 // Create the gap anchors
485                 char gapChar = gapValue<char>();
486                 typedef typename Iterator<CharString, Standard>::Type TStringIter;
487                 TStringIter seqIt = begin(contigSeq, Standard());
488                 TStringIter seqItEnd = end(contigSeq, Standard());
489                 TStringIter qualIt = begin(qual, Standard());
490                 typedef typename TFragmentStore::TReadPos TPos;
491                 typedef typename TFragmentStore::TContigGapAnchor TContigGapAnchor;
492                 TPos ungappedPos = 0;
493                 TPos gappedPos = 0;
494                 bool gapOpen = false;
495                 for (; seqIt != seqItEnd; goNext(seqIt), goNext(qualIt), ++gappedPos)
496                 {
497                     if (value(seqIt) == gapChar)
498                     {
499                         gapOpen = true;
500                     }
501                     else
502                     {
503                         if (gapOpen)
504                         {
505                             appendValue(contigEl.gaps, TContigGapAnchor(ungappedPos, gappedPos), Generous());
506                             gapOpen = false;
507                         }
508                         typename Value<TContigSeq>::Type letter = getValue(seqIt);
509                         assignQualityValue(letter, getValue(qualIt));
510                         appendValue(contigEl.seq, letter, Generous());
511                         ++ungappedPos;
512                     }
513                 }
514                 if (gapOpen)
515                     appendValue(contigEl.gaps, TContigGapAnchor(ungappedPos, gappedPos), Generous());
516 
517                 // Set the contigId in all aligned reads
518                 TSize toAligned = length(fragStore.alignedReadStore);
519                 TId newContigId = length(fragStore.contigStore);
520                 for (; fromAligned < toAligned; ++fromAligned)
521                     fragStore.alignedReadStore[fromAligned].contigId = newContigId;
522 
523                 // Insert the contig
524                 appendValue(fragStore.contigStore, contigEl, Generous());
525                 appendValue(fragStore.contigNameStore, eid, Generous());
526             }
527             // Unknown block identifier
528             else
529             {
530                 // skip until closing bracket
531                 while (!atEnd(reader) && value(reader) != '}')
532                     skipLine(reader);
533             }
534         }
535         else
536         {
537             // not a block - skip line
538             skipLine(reader);
539         }
540     }
541 
542     // Renumber all ids
543     typedef typename TIdMap::const_iterator TIdMapIter;
544     typedef typename Iterator<typename TFragmentStore::TMatePairStore>::Type TMateIter;
545     TMateIter mateIt = begin(fragStore.matePairStore);
546     TMateIter mateItEnd = end(fragStore.matePairStore);
547     for(;mateIt != mateItEnd; goNext(mateIt)) {
548         if (mateIt->libId != TMatePairElement::INVALID_ID) {
549             TIdMapIter libIdPos = libIdMap.find(mateIt->libId);
550             if (libIdPos != libIdMap.end())
551                 mateIt->libId = libIdPos->second;
552             else
553                 mateIt->libId = TMatePairElement::INVALID_ID;
554         }
555         if (mateIt->readId[0] != TMatePairElement::INVALID_ID) {
556             TIdMapIter readIdPos = readIdMap.find(mateIt->readId[0]);
557             if (readIdPos != readIdMap.end())
558                 mateIt->readId[0] = readIdPos->second;
559             else
560                 mateIt->readId[0] = TMatePairElement::INVALID_ID;
561         }
562         if (mateIt->readId[1]!= TMatePairElement::INVALID_ID) {
563             TIdMapIter readIdPos = readIdMap.find(mateIt->readId[1]);
564             if (readIdPos != readIdMap.end())
565                 mateIt->readId[1] = readIdPos->second;
566             else
567                 mateIt->readId[0] = TMatePairElement::INVALID_ID;
568         }
569     }
570 
571     // Copy data from frgIdMap into the matePairId members of the readStore.
572     typedef typename Iterator<typename TFragmentStore::TReadStore>::Type TReadIter;
573     TReadIter readIt = begin(fragStore.readStore);
574     TReadIter readItEnd = end(fragStore.readStore);
575     for (;readIt != readItEnd; goNext(readIt))
576     {
577         if (readIt->matePairId != TReadStoreElement::INVALID_ID)
578         {
579             TIdMapIter mateIdPos = frgIdMap.find(readIt->matePairId);
580             if (mateIdPos != frgIdMap.end())
581                 readIt->matePairId = mateIdPos->second;
582             else
583                 readIt->matePairId = TReadStoreElement::INVALID_ID;
584         }
585     }
586 
587     // Copy data from readIdMap into the pairMatchId entries of the alignedReadStore.
588     typedef typename Iterator<typename TFragmentStore::TAlignedReadStore>::Type TAlignIter;
589     TAlignIter alignIt = begin(fragStore.alignedReadStore);
590     TAlignIter alignItEnd = end(fragStore.alignedReadStore);
591     for (;alignIt != alignItEnd; goNext(alignIt))
592     {
593         if (alignIt->readId != TAlignedElement::INVALID_ID)
594         {
595             TIdMapIter readIdPos = readIdMap.find(alignIt->readId);
596             if (readIdPos != readIdMap.end())
597             {
598                 //SEQAN_ASSERT(readToPairMatchId.find(alignIt->readId) != readToPairMatchId.end());
599                 if (readToPairMatchId.find(alignIt->readId) != readToPairMatchId.end())
600                     alignIt->pairMatchId = readToPairMatchId[alignIt->readId];
601                 alignIt->readId = readIdPos->second;
602             }
603             else
604             {
605                 alignIt->readId = TAlignedElement::INVALID_ID;
606             }
607         }
608     }
609 
610     return 0;
611 }
612 
613 
614 //////////////////////////////////////////////////////////////////////////////
615 
616 /*!
617  * @fn FragmentStore#write
618  * @brief Write the contents of a FragmentStore to a file.
619  *
620  * @signature int write(file, store, tag);
621  *
622  * @param[in,out] file  The @link StreamConcept @endlink to write to.
623  * @param[in]     store The FragmentStore to write to the file.
624  * @param[in]     tag   The format to write out.  Types: Sam or Amos.
625  *
626  * @return int 0 in case of success, 1 in case of errors.
627  */
628 
629 template <typename TTarget, typename TSpec, typename TConfig>
630 inline void
write(TTarget & target,FragmentStore<TSpec,TConfig> & fragStore,Amos)631 write(TTarget & target,
632       FragmentStore<TSpec, TConfig> & fragStore,
633       Amos)
634 {
635     // Basic types
636     typedef FragmentStore<TSpec, TConfig> TFragmentStore;
637     //typedef typename Id<TFragmentStore>::Type TId;
638     typedef typename Size<TFragmentStore>::Type TSize;
639     //typedef typename Value<TFile>::Type TValue;
640 
641     // All fragment store element types
642     //typedef typename Value<typename TFragmentStore::TContigStore>::Type TContigElement;
643     //typedef typename Value<typename TFragmentStore::TLibraryStore>::Type TLibraryStoreElement;
644     typedef typename Value<typename TFragmentStore::TMatePairStore>::Type TMatePairElement;
645     typedef typename Value<typename TFragmentStore::TReadStore>::Type TReadStoreElement;
646     //typedef typename Value<typename TFragmentStore::TAlignedReadStore>::Type TAlignedElement;
647 
648     typename DirectionIterator<TTarget, Output>::Type iter = directionIterator(target, Output());
649 
650     // Write Header
651     write(iter, "{UNV\niid:1\neid:seqan\ncom:\nafg file created with SeqAn\n.\n}\n");
652 
653     // Write Libraries
654     typedef typename Iterator<typename TFragmentStore::TLibraryStore, Standard>::Type TLibIter;
655     TLibIter libIt = begin(fragStore.libraryStore, Standard() );
656     TLibIter libItEnd = end(fragStore.libraryStore, Standard() );
657     for(TSize idCount = 0;libIt != libItEnd; goNext(libIt), ++idCount)
658     {
659         write(iter, "{LIB\niid:");
660         appendNumber(iter, idCount + 1);
661         writeValue(iter, '\n');
662         if (!empty(fragStore.libraryNameStore) && !empty(fragStore.libraryNameStore[idCount]))
663         {
664             write(iter, "eid:");
665             write(iter, fragStore.libraryNameStore[idCount]);
666             writeValue(iter, '\n');
667         }
668         write(iter, "{DST\nmea:");
669         appendNumber(iter, libIt->mean);
670         writeValue(iter, '\n');
671         write(iter, "std:");
672         appendNumber(iter, libIt->std);
673         writeValue(iter, '\n');
674         write(iter, "}\n}\n");
675     }
676 
677     // Write Fragments / mate pairs
678     typedef typename Iterator<typename TFragmentStore::TMatePairStore, Standard>::Type TMateIter;
679     TMateIter mateIt = begin(fragStore.matePairStore, Standard() );
680     TMateIter mateItEnd = end(fragStore.matePairStore, Standard() );
681     for(TSize idCount = 0;mateIt != mateItEnd; goNext(mateIt), ++idCount)
682     {
683         write(iter, "{FRG\niid:");
684         appendNumber(iter, idCount + 1);
685         writeValue(iter, '\n');
686         if (!empty(fragStore.matePairNameStore) && !empty(fragStore.matePairNameStore[idCount]))
687         {
688             write(iter, "eid:");
689             write(iter, fragStore.matePairNameStore[idCount]);
690             writeValue(iter, '\n');
691         }
692         write(iter, "lib:");
693         appendNumber(iter, mateIt->libId + 1);
694         writeValue(iter, '\n');
695         if (mateIt->readId[0] != TMatePairElement::INVALID_ID && mateIt->readId[1] != TMatePairElement::INVALID_ID)
696         {
697             write(iter, "rds:");
698             appendNumber(iter, mateIt->readId[0] + 1);
699             writeValue(iter, ',');
700             appendNumber(iter, mateIt->readId[1] + 1);
701             writeValue(iter, '\n');
702         }
703         write(iter, "}\n");
704     }
705 
706     // Get clear ranges
707     typedef Pair<typename TFragmentStore::TReadPos, typename TFragmentStore::TReadPos> TClrRange;
708     String<TClrRange> clrRange;
709     resize(clrRange, length(fragStore.readStore), TClrRange(0,0));
710     typedef typename Iterator<typename TFragmentStore::TAlignedReadStore, Standard>::Type TAlignIter;
711     TAlignIter alignIt = begin(fragStore.alignedReadStore, Standard() );
712     TAlignIter alignItEnd = end(fragStore.alignedReadStore, Standard() );
713     for(;alignIt != alignItEnd; goNext(alignIt))
714     {
715         typename TFragmentStore::TReadPos begClr = 0;
716         typename TFragmentStore::TReadPos endClr = 0;
717         getClrRange(fragStore, value(alignIt), begClr, endClr);
718         clrRange[alignIt->readId] = TClrRange(begClr, endClr);
719     }
720 
721     // Write reads
722     typedef typename Iterator<typename TFragmentStore::TReadStore, Standard>::Type TReadIter;
723     TReadIter readIt = begin(fragStore.readStore, Standard() );
724     TReadIter readItEnd = end(fragStore.readStore, Standard() );
725     for(TSize idCount = 0;readIt != readItEnd; ++readIt, ++idCount)
726     {
727         // Skip reads without a name.
728         if (length(value(fragStore.readNameStore, idCount)) == 0u)
729             continue;
730         write(iter, "{RED\niid:");
731         appendNumber(iter, idCount + 1);
732         writeValue(iter, '\n');
733         if (!empty(fragStore.readNameStore) && !empty(fragStore.readNameStore[idCount]))
734         {
735             write(iter, "eid:");
736             write(iter, fragStore.readNameStore[idCount]);
737             writeValue(iter, '\n');
738         }
739         write(iter, "seq:\n");
740         writeWrappedString(iter, fragStore.readSeqStore[idCount], 60);
741         write(iter, ".\nqlt:\n");
742 
743         typedef typename Value<typename TFragmentStore::TReadSeqStore>::Type TReadSeq;
744         typedef QualityExtractor<typename Value<TReadSeq>::Type> TQualityExtractor;
745         ModifiedString<TReadSeq const, ModView<TQualityExtractor> > quals(fragStore.readSeqStore[idCount]);
746         writeWrappedString(iter, quals, 60);
747         write(iter,  ".\n");
748         if (readIt->matePairId != TReadStoreElement::INVALID_ID)
749         {
750             write(iter, "frg:");
751             appendNumber(iter, readIt->matePairId + 1);
752             writeValue(iter, '\n');
753         }
754         if (clrRange[idCount].i1 != clrRange[idCount].i2)
755         {
756             write(iter, "clr:");
757             appendNumber(iter, clrRange[idCount].i1);
758             writeValue(iter, ',');
759             appendNumber(iter, clrRange[idCount].i2);
760             writeValue(iter, '\n');
761         }
762         write(iter, "}\n");
763     }
764 
765     // Sort aligned reads according to contigId
766     sortAlignedReads(fragStore.alignedReadStore, SortContigId());
767 
768     // Write Contigs
769     typedef typename Iterator<typename TFragmentStore::TContigStore, Standard>::Type TContigIter;
770     TContigIter contigIt = begin(fragStore.contigStore, Standard() );
771     TContigIter contigItEnd = end(fragStore.contigStore, Standard() );
772     alignIt = begin(fragStore.alignedReadStore);
773     alignItEnd = end(fragStore.alignedReadStore);
774     for(TSize idCount = 0;contigIt != contigItEnd; goNext(contigIt), ++idCount)
775     {
776         write(iter, "{CTG\niid:");
777         appendNumber(iter, idCount + 1);
778         writeValue(iter, '\n');
779         if (!empty(fragStore.contigNameStore) && !empty(fragStore.contigNameStore[idCount]))
780         {
781             write(iter, "eid:");
782             write(iter, fragStore.contigNameStore[idCount]);
783             writeValue(iter, '\n');
784         }
785         String<char> qlt;
786         write(iter, "seq:\n");
787         typedef typename Iterator<typename TFragmentStore::TContigSeq>::Type TContigIter;
788         TContigIter seqContigIt = begin(contigIt->seq);
789         TContigIter seqContigItEnd = end(contigIt->seq);
790         typedef typename Iterator<String<typename TFragmentStore::TContigGapAnchor> >::Type TGapsIter;
791         TGapsIter itGaps = begin(contigIt->gaps);
792         TGapsIter itGapsEnd = end(contigIt->gaps);
793         int diff = 0;
794         char gapChar = gapValue<char>();
795         typename TFragmentStore::TContigPos mySeqPos = 0;
796         TSize k = 0;
797         for(;itGaps != itGapsEnd; goNext(itGaps))
798         {
799             while (mySeqPos < itGaps->seqPos)
800             {
801 
802                 if ((k % 60 == 0) && (k != 0))
803                     writeValue(iter, '\n');
804                 ++k;
805                 writeValue(iter, value(seqContigIt));
806                 char c = ' ';
807                 convertQuality(c, getQualityValue(value(seqContigIt)));
808                 appendValue(qlt, c, Generous());
809                 goNext(seqContigIt);++mySeqPos;
810             }
811             for(int i = 0; i < ((int) itGaps->gapPos - (int) itGaps->seqPos) - diff; ++i)
812             {
813                 if ((k % 60 == 0) && (k != 0))
814                     writeValue(iter, '\n');
815                 ++k;
816                 writeValue(iter, gapChar);
817                 appendValue(qlt, '0', Generous());
818             }
819             diff = (itGaps->gapPos - itGaps->seqPos);
820         }
821         for(;seqContigIt != seqContigItEnd; goNext(seqContigIt))
822         {
823             if ((k % 60 == 0) && (k != 0))
824                 writeValue(iter, '\n');
825             ++k;
826             writeValue(iter, value(seqContigIt));
827             char c = ' ';
828             convertQuality(c, getQualityValue(value(seqContigIt)));
829             appendValue(qlt, c, Generous());
830         }
831         write(iter, "\n.\nqlt:\n");
832         writeWrappedString(iter, qlt, 60);
833         write(iter, ".\n");
834 
835         while (alignIt != alignItEnd && idCount < alignIt->contigId)
836             goNext(alignIt);
837 
838         for (; alignIt != alignItEnd && idCount == alignIt->contigId; goNext(alignIt))
839         {
840             write(iter, "{TLE\nsrc:");
841             appendNumber(iter, alignIt->readId + 1);
842             writeValue(iter, '\n');
843             typedef typename Iterator<String<typename TFragmentStore::TReadGapAnchor> >::Type TReadGapsIter;
844             TReadGapsIter itGaps = begin(alignIt->gaps);
845             TReadGapsIter itGapsEnd = end(alignIt->gaps);
846 
847             // Create the gaps string and the clear ranges
848             typename TFragmentStore::TReadPos lenRead = length(value(fragStore.readSeqStore, alignIt->readId));
849             TSize clr1 = 0;
850             TSize clr2 = lenRead;
851             // Create first clear range
852             if (itGaps != itGapsEnd && itGaps->gapPos == 0)
853                 clr1 = itGaps->seqPos;
854             int diff = clr1;
855             String<unsigned int> gaps;
856             for (; itGaps != itGapsEnd; goNext(itGaps))
857             {
858                 for (int i = 0; i + itGaps->seqPos < diff + itGaps->gapPos; ++i)
859                     appendValue(gaps, itGaps->seqPos - clr1, Generous());
860                 // Clipped sequence
861                 if (diff + itGaps->gapPos < itGaps->seqPos)
862                     clr2 = lenRead + diff + itGaps->gapPos - itGaps->seqPos;
863                 diff = (int)itGaps->seqPos - (int)itGaps->gapPos;
864             }
865             if (alignIt->beginPos > alignIt->endPos)
866             {
867                 clr1 = lenRead - clr1;
868                 clr2 = lenRead - clr2;
869             }
870             write(iter, "off:");
871             appendNumber(iter, std::min(alignIt->beginPos, alignIt->endPos));
872             write(iter, "\nclr:");
873             appendNumber(iter, clr1);
874             writeValue(iter, ',');
875             appendNumber(iter, clr2);
876             writeValue(iter, '\n');
877             if (!empty(gaps))
878             {
879                 write(iter, "gap:\n");
880                 for (TSize z = 0; z < length(gaps); ++z)
881                 {
882                     appendNumber(iter, gaps[z]);
883                     writeValue(iter, '\n');
884                 }
885                 write(iter, ".\n");
886             }
887             write(iter, "}\n");
888         }
889         write(iter, "}\n");
890     }
891 }
892 
893 // ----------------------------------------------------------------------------
894 // Function writeRecords()
895 // ----------------------------------------------------------------------------
896 
897 
898 /*!
899  * @fn FragmentStore#writeRecords
900  * @brief Write all records to a file.
901  *
902  * @signature void writeRecords(bamFileOut, store);
903  * @signature void writeRecords(gffFileOut, store);
904  * @signature void writeRecords(ucscFileIn, store);
905  *
906  * @param[in,out] bamFileOut  The @link BamFileOut @endlink object to write into.
907  * @param[in,out] gffFileOut  The @link GffFileOut @endlink object to write into.
908  * @param[in,out] ucscFileOut The @link UcscFileOut @endlink object to write into.
909  * @param[in]     store       The @link FragmentStore @endlink object.
910  *
911  * @throw IOError On low-level I/O errors.
912  */
913 
914 //////////////////////////////////////////////////////////////////////////////
915 
916 /*!
917  * @fn FragmentStore#writeContigs
918  * @brief Write contigs from FragmentStore into a @link StreamConcept @endlink.
919  *
920  * @signature bool writeContigs(file, store, tag);
921  *
922  * @param[in,out] file  The @link StreamConcept @endlink to write to.
923  * @param[in]     store The FragmentStore to write contigs of.
924  * @param[in]     tag   A tag for the sequence format.
925  *
926  * @return bool true on success, false on errors.
927  */
928 
929 template <typename TSpec, typename TFSSpec, typename TFSConfig>
writeContigs(FormattedFile<Fastq,Output,TSpec> & file,FragmentStore<TFSSpec,TFSConfig> & store)930 bool writeContigs(FormattedFile<Fastq, Output, TSpec> & file, FragmentStore<TFSSpec, TFSConfig> & store)
931 {
932 //IOREV _doc_
933     for (unsigned i = 0; i < length(store.contigNameStore); ++i)
934         writeRecord(file, store.contigNameStore[i], store.contigStore[i].seq);
935     return true;
936 }
937 
938 // ----------------------------------------------------------------------------
939 // Function readRecords()
940 // ----------------------------------------------------------------------------
941 
942 /*!
943  * @fn FragmentStore#readRecords
944  * @brief Read all records from a file.
945  *
946  * @signature void readRecords(store, bamFileIn[, importFlags]);
947  * @signature void readRecords(store, gffFileIn);
948  * @signature void readRecords(store, ucscFileIn);
949  *
950  * @param[in,out] store       The @link FragmentStore @endlink object to store the records into.
951  * @param[in,out] bamFileIn   The @link BamFileIn @endlink object to read from.
952  * @param[in,out] gffFileIn   The @link GffFileIn @endlink object to read from.
953  * @param[in,out] ucscFileIn  The @link UcscFileIn @endlink object to read from.
954  * @param[in]     importFlags The import flags.
955  *
956  * @throw IOError On low-level I/O errors.
957  * @throw ParseError On high-level file format errors.
958  */
959 
960 //////////////////////////////////////////////////////////////////////////////
961 
962 /*!
963  * @fn FragmentStore#loadContigs
964  * @brief Load contigs into a FragmentStore.
965  *
966  * @signature bool loadContigs(store, fileName[, loadSeqs]);
967  * @signature bool loadContigs(store, fileNameList[, loadSeqs]);
968  *
969  * @param[in,out] store        The FragmentStore to append the contigs to.
970  * @param[in]     fileName     A @link CharString @endlink with the name of the file to load.
971  * @param[in]     fileNameList A @link StringSet @endlink of @link CharString @endlink with a list of file names to
972  *                             load.
973  * @param[in]     loadSeqs     A <tt>bool</tt> indicating whether to load lazily.  If <tt>true</tt> then sequences are
974  *                             loaded immediately.  If <tt>false</tt>, an emptycontig with a reference to the file is
975  *                             created.  Its sequence can be loaded on demand by @link FragmentStore#lockContig
976  *                             @endlink and @link FragmentStore#loadContig @endlink.
977  *
978  * @return bool true in case of success and false in case of error.
979  */
980 
981 template <typename TFSSpec, typename TFSConfig>
loadContigs(FragmentStore<TFSSpec,TFSConfig> & store,StringSet<CharString> const & fileNameList,bool loadSeqs)982 bool loadContigs(FragmentStore<TFSSpec, TFSConfig> &store, StringSet<CharString> const &fileNameList, bool loadSeqs)
983 {
984 //IOREV _nodoc_ although there is dddoc, there is no entry in html-doc
985     typedef FragmentStore<TFSSpec, TFSConfig>            TFragmentStore;
986     typedef typename TFragmentStore::TContigStore        TContigStore;
987     typedef typename TFragmentStore::TContigFileStore    TContigFileStore;
988     typedef typename Value<TContigStore>::Type            TContig;
989     typedef typename Value<TContigFileStore>::Type        TContigFile;
990 
991     SeqFileIn seqFile;
992     CharString meta, seq;
993 
994     for (unsigned f = 0; f < length(fileNameList); ++f)
995     {
996         if (!open(seqFile, toCString(fileNameList[f])))
997             return false;
998 
999         TContigFile contigFile;
1000         contigFile.fileName = fileNameList[f];
1001         contigFile.firstContigId = length(store.contigStore);
1002         appendValue(store.contigFileStore, contigFile, Generous());
1003 
1004         while (!atEnd(seqFile))
1005         {
1006             resize(store.contigStore, length(store.contigStore) + 1, Generous());
1007 
1008             TContig & contig = back(store.contigStore);
1009             contig.usage = 0;
1010             contig.fileId = length(store.contigFileStore) - 1;
1011             contig.fileBeginPos = position(seqFile);
1012 
1013             if (loadSeqs)
1014                 readRecord(meta, contig.seq, seqFile);
1015             else
1016                 readRecord(meta, seq, seqFile);
1017 
1018             contig.fileEndPos = position(seqFile);
1019 
1020             cropAfterFirst(meta, IsWhitespace());
1021             appendValue(store.contigNameStore, meta, Generous());
1022         }
1023         close(seqFile);
1024     }
1025     return true;
1026 }
1027 
1028 //////////////////////////////////////////////////////////////////////////////
1029 
1030 template <typename TFSSpec, typename TFSConfig>
loadContigs(FragmentStore<TFSSpec,TFSConfig> & store,CharString const & fileName,bool loadSeqs)1031 bool loadContigs(FragmentStore<TFSSpec, TFSConfig> &store, CharString const &fileName, bool loadSeqs)
1032 {
1033     StringSet<CharString> fileNames;
1034     appendValue(fileNames, fileName);
1035     return loadContigs(store, fileNames, loadSeqs);
1036 }
1037 
1038 //////////////////////////////////////////////////////////////////////////////
1039 
1040 template <typename TFSSpec, typename TFSConfig, typename TFileNames>
loadContigs(FragmentStore<TFSSpec,TFSConfig> & store,TFileNames const & fileNames)1041 bool loadContigs(FragmentStore<TFSSpec, TFSConfig> &store, TFileNames const &fileNames)
1042 {
1043     return loadContigs(store, fileNames, true);
1044 }
1045 
1046 //////////////////////////////////////////////////////////////////////////////
1047 
1048 /*!
1049  * @fn FragmentStore#loadContig
1050  * @brief Manually load a contig sequence.
1051  *
1052  * @signature bool loadContig(store, contigId);
1053  *
1054  * @param[in,out] store    The FragmentStore to load the contig for.
1055  * @param[in]     contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1056  *
1057  * @return bool true on success, false on failure.
1058  */
1059 
1060 template <typename TSpec, typename TConfig, typename TId>
loadContig(FragmentStore<TSpec,TConfig> & store,TId _id)1061 bool loadContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1062 {
1063     typedef FragmentStore<TSpec, TConfig>                TFragmentStore;
1064     typedef typename TFragmentStore::TContigStore        TContigStore;
1065     typedef typename TFragmentStore::TContigFileStore    TContigFileStore;
1066     typedef typename Value<TContigStore>::Type            TContig;
1067     typedef typename Value<TContigFileStore>::Type        TContigFile;
1068 
1069     if ((TId)length(store.contigStore) <= _id) return false;
1070     TContig &contig = store.contigStore[_id];
1071 
1072     if (contig.fileId >= length(store.contigFileStore)) return false;
1073 
1074     TContigFile &contigFile = store.contigFileStore[contig.fileId];
1075     CharString meta;                                // dummy (seq name is already in the name store)
1076 
1077     SeqFileIn seqFile(toCString(contigFile.fileName));
1078     setPosition(seqFile, contig.fileBeginPos);
1079     readRecord(meta, contig.seq, seqFile);            // read contig sequence
1080 
1081     return true;
1082 }
1083 
1084 //////////////////////////////////////////////////////////////////////////////
1085 
1086 /*!
1087  * @fn FragmentStore#lockContig
1088  * @brief Locks a contig sequence from being removed.
1089  *
1090  * This function increases the contig usage counter by 1 and ensures that the contig sequence is loaded.
1091  *
1092  * @signature bool lockContig(store, contigId);
1093  *
1094  * @param[in,out] store    The FragmentStore to lock the contig for.
1095  * @param[in]     contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1096  *
1097  * @return bool true on success, false on failure.
1098  */
1099 
1100 template <typename TSpec, typename TConfig, typename TId>
lockContig(FragmentStore<TSpec,TConfig> & store,TId _id)1101 bool lockContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1102 {
1103     typedef FragmentStore<TSpec, TConfig>                TFragmentStore;
1104     typedef typename TFragmentStore::TContigStore        TContigStore;
1105     typedef typename Value<TContigStore>::Type            TContig;
1106 
1107     if ((TId)length(store.contigStore) <= _id) return false;
1108     TContig &contig = store.contigStore[_id];
1109 
1110     if (contig.usage++ > 0 || !empty(contig.seq)) return true;
1111     return loadContig(store, _id);
1112 }
1113 
1114 /*!
1115  * @fn FragmentStore#unlockContig
1116  * @brief Removes a previous contig lock.
1117  *
1118  * This function decreases the contig usage counter by 1.
1119  *
1120  * @signature bool unlockContig(store, contigId);
1121  *
1122  * @param[in,out] store    The FragmentStore to unlock the contig for.
1123  * @param[in]     contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1124  *
1125  * @return bool true on success, false on failure.
1126  */
1127 
1128 template <typename TSpec, typename TConfig, typename TId>
unlockContig(FragmentStore<TSpec,TConfig> & store,TId _id)1129 bool unlockContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1130 {
1131     if ((TId)length(store.contigStore) <= _id) return false;
1132     --store.contigStore[_id].usage;
1133     return true;
1134 }
1135 
1136 /*!
1137  * @fn FragmentStore#unlockAndFreeContig
1138  * @brief Removes a previous contig lock and clears the sequence if no further lock exists.
1139  *
1140  * This function decreases the contig usage counter by 1 and frees the sequences' memory if the counter equals 0.
1141  *
1142  * @signature bool unlockContig(store, contigId);
1143  *
1144  * @param[in,out] store    The FragmentStore to unlock the contig for.
1145  * @param[in]     contigId The id of the contig that was created earlier by @link FragmentStore#loadContigs @endlink.
1146  *
1147  * @return bool true on success, false on failure.
1148  */
1149 
1150 template <typename TSpec, typename TConfig, typename TId>
unlockAndFreeContig(FragmentStore<TSpec,TConfig> & store,TId _id)1151 bool unlockAndFreeContig(FragmentStore<TSpec, TConfig> &store, TId _id)
1152 {
1153     typedef FragmentStore<TSpec, TConfig>                TFragmentStore;
1154     typedef typename TFragmentStore::TContigStore        TContigStore;
1155     typedef typename Value<TContigStore>::Type            TContig;
1156 
1157     if ((TId)length(store.contigStore) <= _id) return false;
1158     TContig &contig = store.contigStore[_id];
1159 
1160     if (--contig.usage == 0 && contig.fileId < length(store.contigFileStore))
1161     {
1162         typename TContig::TContigSeq emptySeq;
1163         swap(contig.seq, emptySeq);
1164         return true;
1165     }
1166     return false;
1167 }
1168 
1169 /*!
1170  * @fn FragmentStore#lockContigs
1171  * @brief Locks all contig sequences from being remove.
1172  *
1173  * @signature bool lockContigs(store);
1174  *
1175  * @param[in,out] store    The FragmentStore to lock the contigs for.
1176  *
1177  * @return bool true in case of success, false in case of errors.
1178  */
1179 
1180 template <typename TSpec, typename TConfig>
lockContigs(FragmentStore<TSpec,TConfig> & store)1181 bool lockContigs(FragmentStore<TSpec, TConfig> &store)
1182 {
1183     bool result = true;
1184     for (unsigned _id = 0; _id < length(store.contigStore); ++_id)
1185         result &= lockContig(store, _id);
1186     return result;
1187 }
1188 
1189 /*!
1190  * @fn FragmentStore#unlockContigs
1191  * @brief Unlocks all contig sequences.
1192  *
1193  * @signature bool unlockContigs(store);
1194  *
1195  * @param[in,out] store    The FragmentStore to unlock the contigs for.
1196  *
1197  * @return bool true in case of success, false in case of errors.
1198  */
1199 
1200 template <typename TSpec, typename TConfig>
unlockContigs(FragmentStore<TSpec,TConfig> & store)1201 bool unlockContigs(FragmentStore<TSpec, TConfig> &store)
1202 {
1203     bool result = true;
1204     for (unsigned _id = 0; _id < length(store.contigStore); ++_id)
1205         result &= unlockContig(store, _id);
1206     return result;
1207 }
1208 
1209 /*!
1210  * @fn FragmentStore#unlockAndFreeContigs
1211  * @brief Unlocks all contig sequences and clears sequences without lock.
1212  *
1213  * @signature bool unlockAndFreeContigs(store);
1214  *
1215  * @param[in,out] store    The FragmentStore to unlock the contigs for.
1216  *
1217  * @return bool true in case of success, false in case of errors.
1218  */
1219 
1220 template <typename TSpec, typename TConfig>
unlockAndFreeContigs(FragmentStore<TSpec,TConfig> & store)1221 bool unlockAndFreeContigs(FragmentStore<TSpec, TConfig> &store)
1222 {
1223     bool result = true;
1224     for (unsigned _id = 0; _id < length(store.contigStore); ++_id)
1225         result &= unlockAndFreeContig(store, _id);
1226     return result;
1227 }
1228 
1229 
1230 //////////////////////////////////////////////////////////////////////////////
1231 
1232 /*!
1233  * @fn FragmentStore#loadReads
1234  * @brief Loads reads into FragmentStore
1235  *
1236  * When two file names are given thent he files are expected to containt he same number of reads and reads with the same
1237  * index are assumed to be mate pairs.  Mate pairs are stored internally in an "interleaved mode": a read is read from
1238  * each file before reading the next one.
1239  *
1240  * @signature bool loadReads(store, fileName);
1241  * @signature bool loadReads(store, fileNameL, fileNameR);
1242  *
1243  * @param[in,out] store     The FragmentStore to append the reads to.
1244  * @param[in]     fileName  Path to single-end read file.
1245  * @param[in]     fileNameL Path to left read file in case of paired reads.
1246  * @param[in]     fileNameR Path to right read file in case of paired reads.
1247  *
1248  * @return bool true in case of success, false in case of errors.
1249  */
1250 
1251 template <typename TSpec, typename TConfig, typename TFileName>
loadReads(FragmentStore<TSpec,TConfig> & store,TFileName & fileName)1252 bool loadReads(FragmentStore<TSpec, TConfig> &store, TFileName &fileName)
1253 {
1254     SEQAN_ASSERT_EQ(length(store.readStore), length(store.readSeqStore));
1255     SEQAN_ASSERT_EQ(length(store.readStore), length(store.readNameStore));
1256 
1257     typedef typename FragmentStore<TSpec, TConfig>::TReadStore TReadStore;
1258     typedef typename Value<TReadStore>::Type TReadStoreElement;
1259 
1260     SeqFileIn seqFile;
1261     if (!open(seqFile, toCString(fileName)))
1262         return false;
1263 
1264     readRecords(store.readNameStore, store.readSeqStore, seqFile);
1265     resize(store.readStore, length(store.readSeqStore), TReadStoreElement());
1266     return true;
1267 }
1268 
1269 
1270 template <typename TSpec, typename TConfig, typename TFileName>
loadReads(FragmentStore<TSpec,TConfig> & store,TFileName & fileNameL,TFileName & fileNameR)1271 bool loadReads(FragmentStore<TSpec, TConfig> & store, TFileName & fileNameL, TFileName & fileNameR)
1272 {
1273     SEQAN_ASSERT_EQ(length(store.readStore), length(store.readSeqStore));
1274     SEQAN_ASSERT_EQ(length(store.readStore), length(store.readNameStore));
1275 
1276     typedef typename FragmentStore<TSpec, TConfig>::TReadSeq TReadSeq;
1277 
1278     SeqFileIn seqFileL, seqFileR;
1279     if (!open(seqFileL, toCString(fileNameL)) || !open(seqFileR, toCString(fileNameR)))
1280         return false;
1281 
1282     // Read in sequences
1283     TReadSeq seq[2];
1284     CharString meta[2];
1285 
1286     while (!atEnd(seqFileL) && !atEnd(seqFileR))
1287     {
1288         readRecord(meta[0], seq[0], seqFileL);
1289         readRecord(meta[1], seq[1], seqFileR);
1290         appendMatePair(store, seq[0], seq[1], meta[0], meta[1]);
1291     }
1292     return true;
1293 }
1294 
1295 }  // namespace seqan
1296 
1297 #endif //#ifndef SEQAN_HEADER_...
1298