1 #ifndef BLOOM_DBG_H
2 #define BLOOM_DBG_H 1
3
4 #include "config.h"
5
6 #include "BloomDBG/AssemblyCounters.h"
7 #include "BloomDBG/AssemblyParams.h"
8 #include "BloomDBG/BloomIO.h"
9 #include "BloomDBG/Checkpoint.h"
10 #include "BloomDBG/MaskedKmer.h"
11 #include "BloomDBG/RollingBloomDBG.h"
12 #include "BloomDBG/RollingHash.h"
13 #include "BloomDBG/RollingHashIterator.h"
14 #include "Common/Hash.h"
15 #include "Common/IOUtil.h"
16 #include "Common/Sequence.h"
17 #include "Common/Uncompress.h"
18 #include "Common/UnorderedSet.h"
19 #include "DataLayer/FastaConcat.h"
20 #include "DataLayer/FastaReader.h"
21 #include "Graph/BreadthFirstSearch.h"
22 #include "Graph/ExtendPath.h"
23 #include "Graph/Path.h"
24 #include "vendor/btl_bloomfilter/BloomFilter.hpp"
25
26 #include <cmath>
27 #include <iomanip>
28 #include <iostream>
29 #include <limits>
30 #include <sstream>
31 #include <string>
32
33 #if _OPENMP
34 #include <omp.h>
35 #endif
36
37 #if HAVE_GOOGLE_SPARSE_HASH_MAP
38
39 #include <google/sparse_hash_set>
40 typedef google::sparse_hash_set<RollingBloomDBGVertex, hash<RollingBloomDBGVertex>> KmerHash;
41
42 #else
43
44 #include "Common/UnorderedSet.h"
45 typedef unordered_set<RollingBloomDBGVertex, hash<RollingBloomDBGVertex>> KmerHash;
46
47 #endif
48
49 namespace BloomDBG {
50
51 /**
52 * Type for a vertex in the de Bruijn graph.
53 */
54 typedef RollingBloomDBGVertex Vertex;
55
56 /**
57 * Return true if all of the k-mers in `seq` are contained in `bloom`
58 * and false otherwise.
59 */
60 template<typename BloomT>
61 inline static bool
allKmersInBloom(const Sequence & seq,const BloomT & bloom)62 allKmersInBloom(const Sequence& seq, const BloomT& bloom)
63 {
64 const unsigned k = bloom.getKmerSize();
65 const unsigned numHashes = bloom.getHashNum();
66 assert(seq.length() >= k);
67 unsigned validKmers = 0;
68 for (RollingHashIterator it(seq, numHashes, k); it != RollingHashIterator::end();
69 ++it, ++validKmers) {
70 if (!bloom.contains(*it))
71 return false;
72 }
73 /* if we skipped over k-mers containing non-ACGT chars */
74 if (validKmers < seq.length() - k + 1)
75 return false;
76 return true;
77 }
78
79 /**
80 * Add all k-mers of a DNA sequence to a Bloom filter.
81 */
82 template<typename BloomT>
83 inline static void
addKmersToBloom(const Sequence & seq,BloomT & bloom)84 addKmersToBloom(const Sequence& seq, BloomT& bloom)
85 {
86 const unsigned k = bloom.getKmerSize();
87 const unsigned numHashes = bloom.getHashNum();
88 for (RollingHashIterator it(seq, numHashes, k); it != RollingHashIterator::end(); ++it) {
89 bloom.insert(*it);
90 }
91 }
92
93 /**
94 * Returns the sum of all kmer multiplicities in `seq` by querying `bloom`
95 */
96 template<typename CountingBloomT>
97 inline static unsigned
getSeqAbsoluteKmerCoverage(const Sequence & seq,const CountingBloomT & bloom)98 getSeqAbsoluteKmerCoverage(const Sequence& seq, const CountingBloomT& bloom)
99 {
100 const unsigned k = bloom.getKmerSize();
101 const unsigned numHashes = bloom.getHashNum();
102 assert(seq.length() >= k);
103 unsigned coverage = 0;
104 for (RollingHashIterator it(seq, numHashes, k); it != RollingHashIterator::end();
105 ++it) {
106 coverage += bloom.minCount(*it);
107 }
108 return coverage;
109 }
110
111 /**
112 * Translate a DNA sequence to an equivalent path in the
113 * de Bruijn graph.
114 */
115 inline static Path<Vertex>
seqToPath(const Sequence & seq,unsigned k,unsigned numHashes)116 seqToPath(const Sequence& seq, unsigned k, unsigned numHashes)
117 {
118 Path<Vertex> path;
119 assert(seq.length() >= k);
120 for (RollingHashIterator it(seq, numHashes, k); it != RollingHashIterator::end(); ++it) {
121 path.push_back(Vertex(it.kmer().c_str(), it.rollingHash()));
122 }
123 return path;
124 }
125
126 /**
127 * Translate a path in the de Bruijn graph to an equivalent
128 * DNA sequence.
129 */
130 inline static Sequence
pathToSeq(const Path<Vertex> & path,unsigned k)131 pathToSeq(const Path<Vertex>& path, unsigned k)
132 {
133 assert(path.size() > 0);
134 assert(k > 0);
135
136 const std::string& spacedSeed = MaskedKmer::mask();
137 assert(spacedSeed.empty() || spacedSeed.length() == k);
138 Sequence seq;
139 seq.resize(path.size() + k - 1, 'N');
140
141 for (size_t i = 0; i < path.size(); ++i) {
142 std::string kmer(path.at(i).kmer().c_str());
143 for (size_t j = 0; j < k; ++j) {
144 if (spacedSeed.empty() || spacedSeed.at(j) == '1') {
145 if (seq.at(i + j) != 'N' && seq.at(i + j) != kmer.at(j)) {
146 std::cerr << "warning: inconsistent DBG path detected "
147 "at position "
148 << i + j << ": " << seq.substr(0, i + j) << " (orig base: '"
149 << seq.at(i + j) << "'"
150 << ", new base: '" << kmer.at(j) << "')" << std::endl;
151 }
152 seq.at(i + j) = kmer.at(j);
153 }
154 }
155 }
156
157 return seq;
158 }
159
160 enum SeedType
161 {
162 ST_BRANCH_KMER = 0,
163 ST_READ
164 };
165
166 static inline std::string
seedTypeStr(const SeedType & type)167 seedTypeStr(const SeedType& type)
168 {
169 switch (type) {
170 case ST_BRANCH_KMER:
171 return "BRANCH_KMER";
172 case ST_READ:
173 return "READ";
174 default:
175 break;
176 }
177 assert(false);
178 }
179
180 /**
181 * Results for the extension of a read segment.
182 * Each instance represents a row in the trace file generated
183 * by the '-T' option for abyss-bloom-dbg.
184 */
185 struct ContigRecord
186 {
187 /** output FASTA ID for this contig */
188 size_t contigID;
189 /** length of contig (bp) */
190 unsigned length;
191 /** coverage of contig */
192 unsigned coverage;
193 /** FASTA ID of seeding read */
194 std::string readID;
195 /** Type of sequence used to seed contig (branch k-mer or full read) */
196 SeedType seedType;
197 /** Starting sequence prior to extensions */
198 Sequence seed;
199 /** Result code for attempted left sequence extension (e.g. DEAD END) */
200 PathExtensionResult leftExtensionResult;
201 /** Result code for attempted left sequence extension (e.g. DEAD END) */
202 PathExtensionResult rightExtensionResult;
203
204 /**
205 * True if the extended sequence was excluded from the output contigs
206 * because it was redundant. (An identical sequence was generated
207 * when extending a previous read.)
208 */
209 bool redundant;
210
ContigRecordContigRecord211 ContigRecord()
212 : contigID(std::numeric_limits<size_t>::max())
213 , readID()
214 , leftExtensionResult(std::make_pair(0, ER_DEAD_END))
215 , rightExtensionResult(std::make_pair(0, ER_DEAD_END))
216 , redundant(false)
217 {}
218
printHeadersContigRecord219 static std::ostream& printHeaders(std::ostream& out)
220 {
221 out << "contig_id" << '\t' << "length" << '\t' << "redundant" << '\t' << "read_id" << '\t'
222 << "left_result" << '\t' << "left_extension" << '\t' << "right_result" << '\t'
223 << "right_extension" << '\t' << "seed_type" << '\t' << "seed_length" << '\t' << "seed"
224 << '\n';
225 return out;
226 }
227
228 friend std::ostream& operator<<(std::ostream& out, const ContigRecord& o)
229 {
230 if (o.redundant)
231 out << "NA" << '\t';
232 else
233 out << o.contigID << '\t';
234
235 out << o.length << '\t' << o.redundant << '\t' << o.readID << '\t';
236
237 if (o.leftExtensionResult.first > 0)
238 out << pathExtensionResultStr(o.leftExtensionResult.second) << '\t'
239 << o.leftExtensionResult.first << '\t';
240 else
241 out << "NA" << '\t' << "NA" << '\t';
242
243 if (o.rightExtensionResult.first > 0)
244 out << pathExtensionResultStr(o.rightExtensionResult.second) << '\t'
245 << o.rightExtensionResult.first << '\t';
246 else
247 out << "NA" << '\t' << "NA" << '\t';
248
249 out << seedTypeStr(o.seedType) << '\t' << o.seed.length() << '\t' << o.seed << '\n';
250
251 return out;
252 }
253 };
254
255 enum ReadResult
256 {
257 RR_UNINITIALIZED = 0,
258 RR_SHORTER_THAN_K,
259 RR_NON_ACGT,
260 RR_BLUNT_END,
261 RR_NOT_SOLID,
262 RR_ALL_KMERS_VISITED,
263 RR_ALL_BRANCH_KMERS_VISITED,
264 RR_GENERATED_CONTIGS
265 };
266
267 static inline std::string
readResultStr(const ReadResult & result)268 readResultStr(const ReadResult& result)
269 {
270 switch (result) {
271 case RR_UNINITIALIZED:
272 return "NA";
273 case RR_SHORTER_THAN_K:
274 return "SHORTER_THAN_K";
275 case RR_NON_ACGT:
276 return "NON_ACGT";
277 case RR_BLUNT_END:
278 return "BLUNT_END";
279 case RR_NOT_SOLID:
280 return "NOT_SOLID";
281 case RR_ALL_KMERS_VISITED:
282 return "ALL_KMERS_VISITED";
283 case RR_ALL_BRANCH_KMERS_VISITED:
284 return "ALL_BRANCH_KMERS_VISITED";
285 case RR_GENERATED_CONTIGS:
286 return "GENERATED_CONTIGS";
287 default:
288 break;
289 }
290 assert(false);
291 }
292
293 /**
294 * Records results of processing a sequencing read.
295 * Each instance represents a row in the read log file generated
296 * by the `--read-log` option of `abyss-bloom-dbg`.
297 */
298 struct ReadRecord
299 {
300 /** FASTA ID for this read */
301 std::string readID;
302 /** outcome of processing the read */
303 ReadResult result;
304
ReadRecordReadRecord305 ReadRecord()
306 : readID()
307 , result(RR_UNINITIALIZED)
308 {}
309
ReadRecordReadRecord310 ReadRecord(const std::string& readID)
311 : readID(readID)
312 , result(RR_UNINITIALIZED)
313 {}
314
ReadRecordReadRecord315 ReadRecord(const std::string& readID, ReadResult result)
316 : readID(readID)
317 , result(result)
318 {}
319
printHeadersReadRecord320 static std::ostream& printHeaders(std::ostream& out)
321 {
322 out << "read_id" << '\t' << "result" << '\n';
323 return out;
324 }
325
326 friend std::ostream& operator<<(std::ostream& out, const ReadRecord& o)
327 {
328 out << o.readID << '\t' << readResultStr(o.result) << '\n';
329
330 return out;
331 }
332 };
333
334 /** Print an intermediate progress message during assembly */
335 void
readsProgressMessage(AssemblyCounters counters)336 readsProgressMessage(AssemblyCounters counters)
337 {
338 std::cerr << "Processed " << counters.readsProcessed << " reads"
339 << ", solid reads: " << counters.solidReads << " (" << std::setprecision(3)
340 << (float)100 * counters.solidReads / counters.readsProcessed << "%)"
341 << ", visited reads: " << counters.visitedReads << " (" << std::setprecision(3)
342 << (float)100 * counters.visitedReads / counters.readsProcessed << "%)" << std::endl;
343 }
344
345 /** Print an intermediate progress message during assembly */
346 void
basesProgressMessage(AssemblyCounters counters)347 basesProgressMessage(AssemblyCounters counters)
348 {
349 std::cerr << "Assembled " << counters.basesAssembled << " bp in " << counters.contigID + 1
350 << " contigs" << std::endl;
351 }
352
353 /**
354 * Split a path at branching k-mers (degree > 2).
355 */
356 template<typename GraphT>
357 inline static std::vector<Path<typename boost::graph_traits<GraphT>::vertex_descriptor>>
splitPath(const Path<typename boost::graph_traits<GraphT>::vertex_descriptor> & path,const GraphT & dbg,unsigned minBranchLen)358 splitPath(
359 const Path<typename boost::graph_traits<GraphT>::vertex_descriptor>& path,
360 const GraphT& dbg,
361 unsigned minBranchLen)
362 {
363 assert(path.size() > 0);
364
365 typedef typename boost::graph_traits<GraphT>::vertex_descriptor V;
366 typedef typename Path<V>::const_iterator PathIt;
367
368 std::vector<Path<V>> splitPaths;
369 Path<V> currentPath;
370 for (PathIt it = path.begin(); it != path.end(); ++it) {
371 currentPath.push_back(*it);
372 unsigned inDegree = trueBranches(*it, REVERSE, dbg, minBranchLen).size();
373 unsigned outDegree = trueBranches(*it, FORWARD, dbg, minBranchLen).size();
374 if (inDegree > 1 || outDegree > 1) {
375 /* we've hit a branching point -- end the current
376 * path and start a new one */
377 splitPaths.push_back(currentPath);
378 currentPath.clear();
379 currentPath.push_back(*it);
380 }
381 }
382 if (currentPath.size() > 1 || splitPaths.empty())
383 splitPaths.push_back(currentPath);
384
385 assert(splitPaths.size() >= 1);
386 return splitPaths;
387 }
388
389 /**
390 * Trim a sequence down to the longest contiguous subsequence
391 * of "good" k-mers. If the sequence has length < k or contains
392 * no good k-mers, the trimmed sequence will be the empty string.
393 *
394 * @param seq the DNA sequence to be trimmed
395 * @param goodKmerSet Bloom filter containing "good" k-mers
396 */
397 template<typename BloomT>
398 static inline void
trimSeq(Sequence & seq,const BloomT & goodKmerSet)399 trimSeq(Sequence& seq, const BloomT& goodKmerSet)
400 {
401 const unsigned k = goodKmerSet.getKmerSize();
402 const unsigned numHashes = goodKmerSet.getHashNum();
403
404 if (seq.length() < k) {
405 seq.clear();
406 return;
407 }
408
409 const unsigned UNSET = UINT_MAX;
410 unsigned prevPos = UNSET;
411 unsigned matchStart = UNSET;
412 unsigned matchLen = 0;
413 unsigned maxMatchStart = UNSET;
414 unsigned maxMatchLen = 0;
415
416 /* note: RollingHashIterator skips over k-mer
417 * positions with non-ACGT chars */
418 for (RollingHashIterator it(seq, numHashes, k); it != RollingHashIterator::end();
419 prevPos = it.pos(), ++it) {
420 if (!goodKmerSet.contains(*it) || (prevPos != UNSET && it.pos() - prevPos > 1)) {
421 /* end any previous match */
422 if (matchStart != UNSET && matchLen > maxMatchLen) {
423 maxMatchLen = matchLen;
424 maxMatchStart = matchStart;
425 }
426 matchStart = UNSET;
427 matchLen = 0;
428 }
429 if (goodKmerSet.contains(*it)) {
430 /* initiate or extend match */
431 if (matchStart == UNSET)
432 matchStart = it.pos();
433 matchLen++;
434 }
435 }
436 /* handles case last match extends to end of seq */
437 if (matchStart != UNSET && matchLen > maxMatchLen) {
438 maxMatchLen = matchLen;
439 maxMatchStart = matchStart;
440 }
441 /* if there were no matching k-mers */
442 if (maxMatchLen == 0) {
443 seq.clear();
444 return;
445 }
446 /* trim read down to longest matching subseq */
447 seq = seq.substr(maxMatchStart, maxMatchLen + k - 1);
448 }
449
450 /**
451 * Append a contig to the output FASTA stream.
452 */
453 inline static void
printContig(const Sequence & seq,unsigned length,unsigned coverage,size_t contigID,const std::string & readID,unsigned k,std::ostream & out)454 printContig(
455 const Sequence& seq,
456 unsigned length,
457 unsigned coverage,
458 size_t contigID,
459 const std::string& readID,
460 unsigned k,
461 std::ostream& out)
462 {
463 assert(seq.length() >= k);
464
465 FastaRecord contig;
466
467 /* set FASTA id */
468 std::ostringstream id;
469 id << contigID;
470
471 /* add FASTA comment indicating extended read id */
472 std::ostringstream comment;
473 comment << length << ' ' << coverage << ' ';
474 comment << "read:" << readID;
475 assert(id.good());
476 contig.id = id.str();
477 contig.comment = comment.str();
478
479 /* set seq */
480 contig.seq = seq;
481
482 /* output FASTQ record */
483 out << contig;
484 assert(out);
485 }
486
487 /**
488 * Return true if the left end of the given sequence is a blunt end
489 * in the Bloom filterde Bruijn graph. PRECONDITION: `seq` does not contain
490 * any non-ACGT chars.
491 */
492 template<typename GraphT>
493 inline static unsigned
leftIsBluntEnd(const Sequence & seq,const GraphT & graph,const AssemblyParams & params)494 leftIsBluntEnd(const Sequence& seq, const GraphT& graph, const AssemblyParams& params)
495 {
496 unsigned k = params.k;
497 unsigned numHashes = params.numHashes;
498 unsigned fpLookAhead = 5;
499
500 if (seq.length() < k)
501 return false;
502
503 Sequence firstKmerStr(seq, 0, k);
504 Path<Vertex> path = seqToPath(firstKmerStr, k, numHashes);
505 Vertex& firstKmer = path.front();
506
507 return !lookAhead(firstKmer, REVERSE, fpLookAhead, graph);
508 }
509
510 /**
511 * Return true if the left and/or right end of the sequence is a blunt
512 * end in the de Bruijn graph. Such reads likely have read errors
513 * and thus we should not try to extend these into contings.
514 * When testing for a blunt end, we account for the possibility of
515 * false positive extensions by using a small amount of look-ahead
516 * (as dictated by params.fpLookAhead).
517 */
518 template<typename GraphT>
519 inline static bool
hasBluntEnd(const Sequence & seq,const GraphT & graph,const AssemblyParams & params)520 hasBluntEnd(const Sequence& seq, const GraphT& graph, const AssemblyParams& params)
521 {
522 if (leftIsBluntEnd(seq, graph, params))
523 return true;
524
525 Sequence rc = reverseComplement(seq);
526 if (leftIsBluntEnd(rc, graph, params))
527 return true;
528
529 return false;
530 }
531
532 /**
533 * Output a contig sequence if it is not redundant, i.e. it has not already
534 * been generated from a different read / thread of execution.
535 */
536 template<typename SolidKmerSetT, typename AssembledKmerSetT, typename AssemblyStreamsT>
537 inline static void
outputContig(const Path<Vertex> & contigPath,ContigRecord & rec,SolidKmerSetT & solidKmerSet,AssembledKmerSetT & assembledKmerSet,KmerHash & contigEndKmers,const AssemblyParams & params,AssemblyCounters & counters,AssemblyStreamsT & streams)538 outputContig(
539 const Path<Vertex>& contigPath,
540 ContigRecord& rec,
541 SolidKmerSetT& solidKmerSet,
542 AssembledKmerSetT& assembledKmerSet,
543 KmerHash& contigEndKmers,
544 const AssemblyParams& params,
545 AssemblyCounters& counters,
546 AssemblyStreamsT& streams)
547 {
548 const unsigned fpLookAhead = 5;
549
550 Sequence seq = pathToSeq(contigPath, params.k);
551
552 /* vertices representing start/end k-mers of contig */
553
554 Sequence kmer1 = seq.substr(0, params.k);
555 canonicalize(kmer1);
556 RollingHash hash1(kmer1.c_str(), params.numHashes, params.k);
557 Vertex v1(kmer1.c_str(), hash1);
558
559 Sequence kmer2 = seq.substr(seq.length() - params.k);
560 canonicalize(kmer2);
561 RollingHash hash2(kmer2.c_str(), params.numHashes, params.k);
562 Vertex v2(kmer2.c_str(), hash2);
563
564 bool redundant = false;
565 #pragma omp critical(redundancyCheck)
566 {
567 /*
568 * If we use `assembledKmerSet` to check very short contigs,
569 * we may get full-length matches purely due to Bloom filter
570 * positives. For such contigs, we additionally track
571 * the start and end in a separate hash table called
572 * `contigEndKmers`.
573 */
574 if (seq.length() < params.k + fpLookAhead - 1) {
575
576 if (contigEndKmers.find(v1) != contigEndKmers.end() &&
577 contigEndKmers.find(v2) != contigEndKmers.end()) {
578 redundant = true;
579 } else {
580 contigEndKmers.insert(v1);
581 contigEndKmers.insert(v2);
582 }
583
584 } else if (allKmersInBloom(seq, assembledKmerSet)) {
585 redundant = true;
586 }
587
588 if (!redundant) {
589
590 /* mark remaining k-mers as assembled */
591 addKmersToBloom(seq, assembledKmerSet);
592 }
593 }
594 rec.redundant = redundant;
595
596 if (!redundant) {
597 #pragma omp critical(fasta)
598 {
599 rec.length = seq.length();
600 rec.coverage = getSeqAbsoluteKmerCoverage(seq, solidKmerSet);
601
602 /* add contig to output FASTA */
603 printContig(seq, rec.length, rec.coverage, counters.contigID, rec.readID, params.k, streams.out);
604
605 /* add contig to checkpoint FASTA file */
606 if (params.checkpointsEnabled())
607 printContig(seq, rec.length, rec.coverage, counters.contigID, rec.readID, params.k, streams.checkpointOut);
608
609 rec.contigID = counters.contigID;
610
611 counters.contigID++;
612 counters.basesAssembled += seq.length();
613 }
614 }
615
616 #pragma omp critical(trace)
617 streams.traceOut << rec;
618 }
619
620 enum ContigType
621 {
622 CT_LINEAR,
623 CT_CIRCULAR,
624 CT_HAIRPIN
625 };
626
627 template<typename GraphT>
628 static inline ContigType
getContigType(const Path<Vertex> & contigPath,const GraphT & dbg)629 getContigType(const Path<Vertex>& contigPath, const GraphT& dbg)
630 {
631 if (edge(contigPath.back(), contigPath.front(), dbg).second) {
632
633 Vertex v = contigPath.front().clone();
634 v.shift(ANTISENSE, contigPath.back().kmer().getBase(0));
635
636 if (v.kmer() == contigPath.back().kmer())
637 return CT_CIRCULAR;
638 else
639 return CT_HAIRPIN;
640 }
641
642 return CT_LINEAR;
643 }
644
645 /** Special case behaviour for circular/hairpin contigs */
646 template<typename GraphT>
647 static inline void
preprocessCircularContig(Path<Vertex> & contigPath,const GraphT & dbg,unsigned trim)648 preprocessCircularContig(Path<Vertex>& contigPath, const GraphT& dbg, unsigned trim)
649 {
650 assert(!contigPath.empty());
651
652 ContigType contigType = getContigType(contigPath, dbg);
653 assert(contigType != CT_LINEAR);
654
655 if (contigPath.size() <= 2)
656 return;
657
658 /* longest branch of Bloom filter positives */
659 const unsigned fpTrim = 5;
660
661 /*
662 * Note: A circular/hairpin contig contains at most two branch k-mers.
663 * And if it does contain branch k-mers, they will be the first/last
664 * k-mers.
665 *
666 * If only one end of the circular/hairpin contig is a branch k-mer,
667 * add that k-mer to the other end of the contig as well. This
668 * enables the usual branch k-mer trimming logic for linear contigs to
669 * produce the correct results downstream.
670 */
671
672 bool branchStart = ambiguous(contigPath.front(), FORWARD, dbg, trim, fpTrim) ||
673 ambiguous(contigPath.front(), REVERSE, dbg, trim, fpTrim);
674
675 bool branchEnd = ambiguous(contigPath.back(), FORWARD, dbg, trim, fpTrim) ||
676 ambiguous(contigPath.back(), REVERSE, dbg, trim, fpTrim);
677
678 if (branchStart && !branchEnd) {
679
680 if (contigType == CT_CIRCULAR) {
681 contigPath.push_back(contigPath.front());
682 } else {
683 assert(contigType == CT_HAIRPIN);
684 Vertex rc = contigPath.front().clone();
685 rc.reverseComplement();
686 contigPath.push_back(rc);
687 }
688
689 } else if (!branchStart && branchEnd) {
690
691 if (contigType == CT_CIRCULAR) {
692 contigPath.push_front(contigPath.back());
693 } else {
694 assert(contigType == CT_HAIRPIN);
695 Vertex rc = contigPath.back().clone();
696 rc.reverseComplement();
697 contigPath.push_front(rc);
698 }
699 }
700 }
701
702 /**
703 * Ensure that branching k-mers are not repeated in the output
704 * contigs by selectively trimming contig ends.
705 *
706 * The idea is to keep a branch k-mer at the end of contig only
707 * if the edge leading up to it is unambigous. For example, in the
708 * diagram below the contig generated from the right side would
709 * include the branching k-mer k5, whereas the two contigs entering
710 * on the left would discard it:
711 *
712 * ...-k1-k2
713 * \
714 * k5-k6-...
715 * /
716 * ...-k3-k4
717 *
718 * If a branch k-mer has both indegree > 1 and outdegree > 1, it is
719 * output separately as its own contig of length k.
720 */
721 template<typename GraphT>
722 inline static void
trimBranchKmers(Path<Vertex> & contigPath,const GraphT & dbg,unsigned trim)723 trimBranchKmers(Path<Vertex>& contigPath, const GraphT& dbg, unsigned trim)
724 {
725 assert(!contigPath.empty());
726
727 if (contigPath.size() == 1)
728 return;
729
730 /* special case: circular/hairpin contigs */
731
732 ContigType contigType = getContigType(contigPath, dbg);
733 if (contigType == CT_CIRCULAR || contigType == CT_HAIRPIN)
734 preprocessCircularContig(contigPath, dbg, trim);
735
736 unsigned l = contigPath.size();
737
738 /* longest branch of Bloom filter false positives */
739 const unsigned fpTrim = 5;
740
741 bool ambiguous1 = ambiguous(contigPath.at(0), contigPath.at(1), FORWARD, dbg, trim, fpTrim);
742
743 bool ambiguous2 =
744 ambiguous(contigPath.at(l - 1), contigPath.at(l - 2), REVERSE, dbg, trim, fpTrim);
745
746 if (ambiguous1)
747 contigPath.pop_front();
748
749 assert(!contigPath.empty());
750
751 if (ambiguous2)
752 contigPath.pop_back();
753
754 assert(!contigPath.empty());
755 }
756
757 bool
isTip(unsigned length,PathExtensionResultCode leftResult,PathExtensionResultCode rightResult,unsigned trim)758 isTip(
759 unsigned length,
760 PathExtensionResultCode leftResult,
761 PathExtensionResultCode rightResult,
762 unsigned trim)
763 {
764 if (length > trim)
765 return false;
766
767 if (leftResult == ER_DEAD_END && (rightResult == ER_DEAD_END || rightResult == ER_AMBI_IN))
768 return true;
769
770 if (rightResult == ER_DEAD_END && (leftResult == ER_DEAD_END || leftResult == ER_AMBI_IN))
771 return true;
772
773 return false;
774 }
775
776 /**
777 * Decide if a read should be extended and if so extend it into a contig.
778 */
779 template<typename SolidKmerSetT, typename AssembledKmerSetT, typename AssemblyStreamsT>
780 static inline ReadRecord
processRead(const FastaRecord & rec,const SolidKmerSetT & solidKmerSet,AssembledKmerSetT & assembledKmerSet,KmerHash & contigEndKmers,KmerHash & visitedBranchKmers,const AssemblyParams & params,AssemblyCounters & counters,AssemblyStreamsT & streams)781 processRead(
782 const FastaRecord& rec,
783 const SolidKmerSetT& solidKmerSet,
784 AssembledKmerSetT& assembledKmerSet,
785 KmerHash& contigEndKmers,
786 KmerHash& visitedBranchKmers,
787 const AssemblyParams& params,
788 AssemblyCounters& counters,
789 AssemblyStreamsT& streams)
790 {
791 (void)visitedBranchKmers;
792
793 typedef typename Path<Vertex>::iterator PathIt;
794
795 /* Boost graph API for Bloom filter */
796 RollingBloomDBG<SolidKmerSetT> dbg(solidKmerSet);
797
798 unsigned k = params.k;
799 const Sequence& seq = rec.seq;
800
801 /* we can't extend reads shorter than k */
802 if (seq.length() < k)
803 return ReadRecord(rec.id, RR_SHORTER_THAN_K);
804
805 /* skip reads with non-ACGT chars */
806 if (!allACGT(seq))
807 return ReadRecord(rec.id, RR_NON_ACGT);
808
809 /* don't extend reads that are tips */
810 if (hasBluntEnd(seq, dbg, params))
811 return ReadRecord(rec.id, RR_BLUNT_END);
812
813 /* only extend "solid" reads */
814 if (!allKmersInBloom(seq, solidKmerSet))
815 return ReadRecord(rec.id, RR_NOT_SOLID);
816
817 #pragma omp atomic
818 counters.solidReads++;
819
820 /* skip reads in previously assembled regions */
821 if (allKmersInBloom(seq, assembledKmerSet)) {
822 #pragma omp atomic
823 counters.visitedReads++;
824 return ReadRecord(rec.id, RR_ALL_KMERS_VISITED);
825 }
826
827 /*
828 * We use `assembledKmers` to track read k-mers
829 * that have already been included in the output contigs, and
830 * therefore do not need to be processed (extended) again.
831 * Note that we do not use `assembledKmerSet` (a Bloom filter)
832 * for this purpose because Bloom filter false positives
833 * may cause us to omit short contigs (e.g. contigs with length k).
834 */
835 unordered_set<Vertex> assembledKmers;
836
837 Path<Vertex> path = seqToPath(rec.seq, params.k, params.numHashes);
838 for (PathIt it = path.begin(); it != path.end(); ++it) {
839
840 if (assembledKmers.find(*it) != assembledKmers.end())
841 continue;
842
843 ExtendPathParams extendParams;
844 extendParams.trimLen = params.trim;
845 extendParams.fpTrim = 5;
846 extendParams.maxLen = NO_LIMIT;
847 extendParams.lookBehind = true;
848 extendParams.lookBehindStartVertex = false;
849
850 ContigRecord contigRec;
851 contigRec.readID = rec.id;
852 contigRec.seedType = ST_READ;
853 contigRec.seed = it->kmer().c_str();
854
855 Path<Vertex> contigPath;
856 contigPath.push_back(*it);
857
858 contigRec.leftExtensionResult = extendPath(contigPath, REVERSE, dbg, extendParams);
859
860 contigRec.rightExtensionResult = extendPath(contigPath, FORWARD, dbg, extendParams);
861
862 PathExtensionResultCode leftResult = contigRec.leftExtensionResult.second;
863 PathExtensionResultCode rightResult = contigRec.rightExtensionResult.second;
864
865 if (!isTip(contigPath.size(), leftResult, rightResult, params.trim)) {
866 /* selectively trim branch k-mers from contig ends */
867 trimBranchKmers(contigPath, dbg, params.trim);
868
869 /* output contig to FASTA file */
870 outputContig(
871 contigPath, contigRec, solidKmerSet, assembledKmerSet, contigEndKmers, params, counters, streams);
872 }
873
874 /* mark contig k-mers as visited */
875 for (PathIt it2 = contigPath.begin(); it2 != contigPath.end(); ++it2)
876 assembledKmers.insert(*it2);
877 }
878
879 return ReadRecord(rec.id, RR_GENERATED_CONTIGS);
880 }
881
882 /**
883 * Perform a Bloom-filter-based de Bruijn graph assembly.
884 * Contigs are generated by extending reads left/right within
885 * the de Bruijn graph, up to the next branching point or dead end.
886 * Short branches due to Bloom filter false positives are
887 * ignored.
888 *
889 * @param argc number of input FASTA files
890 * @param argv array of input FASTA filenames
891 * @param genomeSize approx genome size
892 * @param goodKmerSet Bloom filter containing k-mers that
893 * occur more than once in the input data
894 * @param out output stream for contigs (FASTA)
895 * @param verbose set to true to print progress messages to
896 * STDERR
897 */
898 template<typename SolidKmerSetT>
899 inline static void
assemble(int argc,char ** argv,SolidKmerSetT & solidKmerSet,const AssemblyParams & params,std::ostream & out)900 assemble(
901 int argc,
902 char** argv,
903 SolidKmerSetT& solidKmerSet,
904 const AssemblyParams& params,
905 std::ostream& out)
906 {
907 /* k-mers in previously assembled contigs */
908 BloomFilter assembledKmerSet(
909 solidKmerSet.size(), solidKmerSet.getHashNum(), solidKmerSet.getKmerSize());
910
911 /* counters for progress messages */
912 AssemblyCounters counters;
913
914 /* input reads */
915 FastaConcat in(argv, argv + argc, FastaReader::FOLD_CASE);
916
917 /* duplicate FASTA output for checkpoints */
918 std::ofstream checkpointOut;
919 if (params.checkpointsEnabled()) {
920 assert(!params.checkpointPathPrefix.empty());
921 std::string path = params.checkpointPathPrefix + CHECKPOINT_FASTA_EXT + CHECKPOINT_TMP_EXT;
922 checkpointOut.open(path.c_str());
923 assert_good(checkpointOut, path);
924 }
925
926 /* trace file output ('-T' option) */
927 std::ofstream traceOut;
928 if (!params.tracePath.empty()) {
929 traceOut.open(params.tracePath.c_str());
930 assert_good(traceOut, params.tracePath);
931 ContigRecord::printHeaders(traceOut);
932 assert_good(traceOut, params.tracePath);
933 }
934
935 /* logs outcome of processing of each read (`--read-log`) */
936 std::ofstream readLogOut;
937 if (!params.readLogPath.empty()) {
938 readLogOut.open(params.readLogPath.c_str());
939 assert_good(readLogOut, params.readLogPath);
940 ReadRecord::printHeaders(readLogOut);
941 assert_good(readLogOut, params.readLogPath);
942 }
943
944 /* bundle output streams */
945 AssemblyStreams<FastaConcat> streams(in, out, checkpointOut, traceOut, readLogOut);
946
947 /* run the assembly */
948 assemble(solidKmerSet, assembledKmerSet, counters, params, streams);
949 }
950
951 /**
952 * Perform a Bloom-filter-based de Bruijn graph assembly.
953 * Contigs are generated by extending reads left/right within
954 * the de Bruijn graph, up to the next branching point or dead end.
955 * Short branches due to Bloom filter false positives are
956 * ignored.
957 *
958 * @param argc number of input FASTA files
959 * @param argv array of input FASTA filenames
960 * @param genomeSize approx genome size
961 * @param goodKmerSet Bloom filter containing k-mers that
962 * occur more than once in the input data
963 * @param assembledKmerSet Bloom filter containing k-mers that have
964 * already been included in contigs
965 * @param in input stream for sequencing reads (FASTA)
966 * @param out output stream for contigs (FASTA)
967 * @param verbose set to true to print progress messages to
968 * STDERR
969 */
970 template<typename SolidKmerSetT, typename AssembledKmerSetT, typename InputReadStreamT>
971 inline static void
assemble(const SolidKmerSetT & goodKmerSet,AssembledKmerSetT & assembledKmerSet,AssemblyCounters & counters,const AssemblyParams & params,AssemblyStreams<InputReadStreamT> & streams)972 assemble(
973 const SolidKmerSetT& goodKmerSet,
974 AssembledKmerSetT& assembledKmerSet,
975 AssemblyCounters& counters,
976 const AssemblyParams& params,
977 AssemblyStreams<InputReadStreamT>& streams)
978 {
979 assert(params.initialized());
980
981 /* per-thread I/O buffer (size is in bases) */
982 const size_t SEQ_BUFFER_SIZE = 1000000;
983
984 if (params.verbose)
985 std::cerr << "Trimming branches " << params.trim << " k-mers or shorter" << std::endl;
986
987 InputReadStreamT& in = streams.in;
988 std::ostream& checkpointOut = streams.checkpointOut;
989
990 KmerHash contigEndKmers;
991 contigEndKmers.rehash((size_t)pow(2, 28));
992
993 KmerHash visitedBranchKmers;
994
995 /*
996 * Print a progress message whenever `READS_PROGRESS_STEP`
997 * reads have been processed or `BASES_PROGRESS_STEP`
998 * bases have been assembled. The purpose of using
999 * two measures of progess is to show steady updates
1000 * throughout the assembly process.
1001 */
1002
1003 const size_t READS_PROGRESS_STEP = 100000;
1004 const size_t BASES_PROGRESS_STEP = 1000000;
1005 size_t basesProgressLine = BASES_PROGRESS_STEP;
1006
1007 while (true) {
1008 size_t readsUntilCheckpoint = params.readsPerCheckpoint;
1009
1010 #pragma omp parallel
1011 for (std::vector<FastaRecord> buffer;;) {
1012 /* read sequences in batches to reduce I/O contention */
1013 buffer.clear();
1014 size_t bufferSize;
1015 bool good = true;
1016 #pragma omp critical(in)
1017 for (bufferSize = 0; bufferSize < SEQ_BUFFER_SIZE && readsUntilCheckpoint > 0;) {
1018 FastaRecord rec;
1019 good = in >> rec;
1020 if (!good)
1021 break;
1022 #pragma omp atomic
1023 readsUntilCheckpoint--;
1024 buffer.push_back(rec);
1025 bufferSize += rec.seq.length();
1026 }
1027 if (buffer.size() == 0)
1028 break;
1029
1030 for (std::vector<FastaRecord>::iterator it = buffer.begin(); it != buffer.end(); ++it) {
1031 ReadRecord result = processRead(
1032 *it,
1033 goodKmerSet,
1034 assembledKmerSet,
1035 contigEndKmers,
1036 visitedBranchKmers,
1037 params,
1038 counters,
1039 streams);
1040
1041 #pragma omp critical(readsProgress)
1042 {
1043 ++counters.readsProcessed;
1044 if (params.verbose && counters.readsProcessed % READS_PROGRESS_STEP == 0)
1045 readsProgressMessage(counters);
1046 }
1047
1048 if (params.verbose)
1049 #pragma omp critical(basesProgress)
1050 {
1051 if (counters.basesAssembled >= basesProgressLine) {
1052 basesProgressMessage(counters);
1053 while (counters.basesAssembled >= basesProgressLine)
1054 basesProgressLine += BASES_PROGRESS_STEP;
1055 }
1056 }
1057
1058 if (!params.readLogPath.empty()) {
1059 #pragma omp critical(readLog)
1060 streams.readLogOut << result;
1061 }
1062 }
1063
1064 } /* for batch of reads between I/O operations */
1065
1066 if (readsUntilCheckpoint > 0) {
1067 assert(in.eof());
1068 break;
1069 }
1070
1071 /* save assembly state to checkpoint files */
1072 checkpointOut.flush();
1073 createCheckpoint(goodKmerSet, assembledKmerSet, counters, params);
1074
1075 } /* for each batch of reads between checkpoints */
1076
1077 assert(in.eof());
1078
1079 if (params.verbose) {
1080 readsProgressMessage(counters);
1081 basesProgressMessage(counters);
1082 std::cerr << "Assembly complete" << std::endl;
1083 }
1084
1085 if (params.checkpointsEnabled() && !params.keepCheckpoint)
1086 removeCheckpointData(params);
1087 }
1088
1089 /**
1090 * Visitor class that outputs visited nodes/edges in GraphViz format during
1091 * a breadth first traversal. An instance of this class may be passed
1092 * as an argument to the `breadthFirstSearch` function.
1093 */
1094 template<typename GraphT>
1095 class GraphvizBFSVisitor : public DefaultBFSVisitor<GraphT>
1096 {
1097 typedef typename boost::graph_traits<GraphT>::vertex_descriptor VertexT;
1098 typedef typename boost::graph_traits<GraphT>::edge_descriptor EdgeT;
1099
1100 public:
1101 /** Constructor */
GraphvizBFSVisitor(std::ostream & out)1102 GraphvizBFSVisitor(std::ostream& out)
1103 : m_out(out)
1104 , m_nodesVisited(0)
1105 , m_edgesVisited(0)
1106 {
1107 /* start directed graph (GraphViz) */
1108 m_out << "digraph g {\n";
1109 }
1110
1111 /** Destructor */
~GraphvizBFSVisitor()1112 ~GraphvizBFSVisitor()
1113 {
1114 /* end directed graph (GraphViz) */
1115 m_out << "}\n";
1116 }
1117
1118 /** Invoked when a vertex is visited for the first time */
discover_vertex(const VertexT & v,const GraphT &)1119 BFSVisitorResult discover_vertex(const VertexT& v, const GraphT&)
1120 {
1121 ++m_nodesVisited;
1122 /* declare vertex (GraphViz) */
1123 m_out << '\t' << v.kmer().c_str() << ";\n";
1124
1125 return BFS_SUCCESS;
1126 }
1127
1128 /**
1129 * Invoked when an edge is traversed. (Each edge
1130 * in the graph is traversed exactly once.)
1131 */
examine_edge(const EdgeT & e,const GraphT & g)1132 BFSVisitorResult examine_edge(const EdgeT& e, const GraphT& g)
1133 {
1134 ++m_edgesVisited;
1135 const VertexT& u = source(e, g);
1136 const VertexT& v = target(e, g);
1137
1138 /* declare edge (GraphViz) */
1139 m_out << '\t' << u.kmer().c_str() << " -> " << v.kmer().c_str() << ";\n";
1140
1141 return BFS_SUCCESS;
1142 }
1143
1144 /** Return number of distinct nodes visited */
getNumNodesVisited()1145 size_t getNumNodesVisited() const { return m_nodesVisited; }
1146
1147 /** Get number of distinct edges visited */
getNumEdgesVisited()1148 size_t getNumEdgesVisited() const { return m_edgesVisited; }
1149
1150 protected:
1151 /** output stream for GraphViz serialization */
1152 std::ostream& m_out;
1153 /** number of nodes visited so far */
1154 size_t m_nodesVisited;
1155 /** number of edges visited so far */
1156 size_t m_edgesVisited;
1157 };
1158
1159 /**
1160 * Output a GraphViz serialization of the de Bruijn graph
1161 * using FASTA files and a Bloom filter as input.
1162 *
1163 * @param argc number of input FASTA files
1164 * @param argv array of input FASTA filenames
1165 * @param kmerSet Bloom filter containing valid k-mers
1166 * @param out output stream for GraphViz serialization
1167 * @param verbose prints progress messages to STDERR if true
1168 */
1169 template<typename BloomT>
1170 static inline void
outputGraph(int argc,char ** argv,const BloomT & kmerSet,const AssemblyParams & params,std::ostream & out)1171 outputGraph(
1172 int argc,
1173 char** argv,
1174 const BloomT& kmerSet,
1175 const AssemblyParams& params,
1176 std::ostream& out)
1177 {
1178 assert(params.initialized());
1179
1180 typedef RollingBloomDBG<BloomT> GraphT;
1181
1182 /* interval for progress messages */
1183 const unsigned progressStep = 1000;
1184 const unsigned k = kmerSet.getKmerSize();
1185 const unsigned numHashes = kmerSet.getHashNum();
1186
1187 /* counter for progress messages */
1188 size_t readsProcessed = 0;
1189
1190 /* Boost graph API over rolling hash Bloom filter */
1191 GraphT dbg(kmerSet);
1192
1193 /* Marks visited nodes in breadth-first traversal */
1194 DefaultColorMap<GraphT> colorMap;
1195
1196 /* BFS Visitor -- generates GraphViz output as nodes
1197 * and edges are traversed. */
1198 GraphvizBFSVisitor<GraphT> visitor(out);
1199
1200 if (params.verbose)
1201 std::cerr << "Generating GraphViz output..." << std::endl;
1202
1203 FastaConcat in(argv, argv + argc, FastaReader::FOLD_CASE);
1204 for (FastaRecord rec;;) {
1205 bool good;
1206 good = in >> rec;
1207 if (!good)
1208 break;
1209 Sequence& seq = rec.seq;
1210
1211 /* Trim down to longest subsequence of "good" k-mers */
1212 trimSeq(seq, kmerSet);
1213 if (seq.length() > 0) {
1214
1215 /* BFS traversal in forward dir */
1216 std::string startKmer = seq.substr(0, k);
1217 Vertex start(startKmer.c_str(), RollingHash(startKmer, numHashes, k));
1218 breadthFirstSearch(start, dbg, colorMap, visitor);
1219
1220 /* BFS traversal in reverse dir */
1221 Sequence rcSeq = reverseComplement(seq);
1222 std::string rcStartKmer = rcSeq.substr(0, k);
1223 Vertex rcStart(rcStartKmer.c_str(), RollingHash(rcStartKmer, numHashes, k));
1224 breadthFirstSearch(rcStart, dbg, colorMap, visitor);
1225 }
1226
1227 if (++readsProcessed % progressStep == 0 && params.verbose) {
1228 std::cerr << "processed " << readsProcessed
1229 << " (k-mers visited: " << visitor.getNumNodesVisited()
1230 << ", edges visited: " << visitor.getNumEdgesVisited() << ")" << std::endl;
1231 }
1232 }
1233 assert(in.eof());
1234 if (params.verbose) {
1235 std::cerr << "processed " << readsProcessed
1236 << " reads (k-mers visited: " << visitor.getNumNodesVisited()
1237 << ", edges visited: " << visitor.getNumEdgesVisited() << ")" << std::endl;
1238 std::cerr << "GraphViz generation complete" << std::endl;
1239 }
1240 }
1241
1242 /**
1243 * Write a single block of a 'variableStep' WIG file.
1244 *
1245 * @param chr chromosome name
1246 * @param start start coordinate of block
1247 * @param length length of block
1248 * @param val value of block
1249 * @param out output stream for WIG file
1250 * @param outPath path for output WIG file
1251 */
1252 static inline void
outputWigBlock(const std::string & chr,size_t start,size_t length,unsigned val,ostream & out,const std::string & outPath)1253 outputWigBlock(
1254 const std::string& chr,
1255 size_t start,
1256 size_t length,
1257 unsigned val,
1258 ostream& out,
1259 const std::string& outPath)
1260 {
1261 assert(length > 0);
1262 out << "variableStep chrom=" << chr << " span=" << length << "\n";
1263 out << start << ' ' << val << '\n';
1264 assert_good(out, outPath);
1265 }
1266
1267 /**
1268 * Write a WIG file for a reference genome, using the values 0 and 1
1269 * to indicate whether or not a given k-mer had sufficient coverage
1270 * in the reads to exceed the minimum coverage threshold.
1271 *
1272 * @param goodKmerSet Bloom filter of k-mers that exceed the
1273 * minimum coverage threshold
1274 * @param params encapsulates all command line options for the
1275 * assembly, including the reference genome and the output path
1276 * for the WIG file.
1277 */
1278 template<class BloomT>
1279 static inline void
writeCovTrack(const BloomT & goodKmerSet,const AssemblyParams & params)1280 writeCovTrack(const BloomT& goodKmerSet, const AssemblyParams& params)
1281 {
1282 assert(!params.covTrackPath.empty());
1283 assert(!params.refPath.empty());
1284
1285 const unsigned k = goodKmerSet.getKmerSize();
1286 const unsigned numHashes = goodKmerSet.getHashNum();
1287
1288 std::ofstream covTrack(params.covTrackPath.c_str());
1289 assert_good(covTrack, params.covTrackPath);
1290
1291 if (params.verbose)
1292 std::cerr << "Writing 0/1 k-mer coverage track for `" << params.refPath << "` to `"
1293 << params.covTrackPath << "`" << std::endl;
1294
1295 FastaReader ref(params.refPath.c_str(), FastaReader::FOLD_CASE);
1296 for (FastaRecord rec; ref >> rec;) {
1297 std::string chr = rec.id;
1298 bool firstVal = true;
1299 size_t blockStart = 1;
1300 size_t blockLength = 0;
1301 uint8_t blockVal = 0;
1302 for (RollingHashIterator it(rec.seq, numHashes, k); it != RollingHashIterator::end();
1303 ++it) {
1304 uint8_t val = goodKmerSet.contains(*it) ? 1 : 0;
1305 if (firstVal) {
1306 firstVal = false;
1307 /* WIG standard uses 1-based coords */
1308 blockStart = it.pos() + 1;
1309 blockLength = 1;
1310 blockVal = val;
1311 } else if (val != blockVal) {
1312 assert(firstVal == false);
1313 outputWigBlock(
1314 chr, blockStart, blockLength, blockVal, covTrack, params.covTrackPath);
1315 /* WIG standard uses 1-based coords */
1316 blockStart = it.pos() + 1;
1317 blockLength = 1;
1318 blockVal = val;
1319 } else {
1320 blockLength++;
1321 }
1322 }
1323 /* output last block */
1324 if (blockLength > 0) {
1325 outputWigBlock(chr, blockStart, blockLength, blockVal, covTrack, params.covTrackPath);
1326 }
1327 }
1328 assert(ref.eof());
1329
1330 assert_good(covTrack, params.covTrackPath);
1331 covTrack.close();
1332 }
1333
1334 } /* BloomDBG namespace */
1335
1336 #endif
1337