1 #ifndef _ASSEMBLY_PARAMS_H_ 2 #define _ASSEMBLY_PARAMS_H_ 3 4 #include <string> 5 #include <iostream> 6 #include <limits> 7 8 namespace BloomDBG { 9 10 /** 11 * Parameters controlling assembly. 12 */ 13 struct AssemblyParams 14 { 15 /** Bloom filter size (in bytes) */ 16 size_t bloomSize; 17 18 /** Checkpoint frequency (reads processed per checkpoint) */ 19 size_t readsPerCheckpoint; 20 21 /** Do not delete checkpoint files after a successful assembly */ 22 bool keepCheckpoint; 23 24 /** Filename prefix for checkpoint files */ 25 std::string checkpointPathPrefix; 26 27 /** minimum k-mer coverage threshold */ 28 unsigned minCov; 29 30 /** path to output debugging info about processing of each read */ 31 std::string readLogPath; 32 33 /** WIG track containing 0/1 for sufficient k-mer cov */ 34 std::string covTrackPath; 35 36 /** path for output GraphViz file */ 37 std::string graphPath; 38 39 /** num Bloom filter hash functions */ 40 unsigned numHashes; 41 42 /** input Bloom filter file (if empty, build Bloom filter from reads)*/ 43 std::string bloomPath; 44 45 /** the number of parallel threads. */ 46 unsigned threads; 47 48 /** the size of a k-mer. */ 49 unsigned k; 50 51 /** the size of a single k-mer in a k-mer pair */ 52 unsigned K; 53 54 /** reference genome */ 55 std::string refPath; 56 57 /** Quadratic Residue (QR) seed length */ 58 unsigned qrSeedLen; 59 60 /** spaced seed */ 61 std::string spacedSeed; 62 63 /** maximum length of branches to trim */ 64 unsigned trim; 65 66 /** verbose level for progress messages */ 67 int verbose; 68 69 /** output contigs path (empty string indicates STDOUT) */ 70 std::string outputPath; 71 72 /** output path for trace file (-T) option */ 73 std::string tracePath; 74 75 /** Default constructor */ AssemblyParamsAssemblyParams76 AssemblyParams() : bloomSize(0), 77 readsPerCheckpoint(std::numeric_limits<size_t>::max()), 78 keepCheckpoint(false), checkpointPathPrefix("bloom-dbg-checkpoint"), 79 minCov(2), graphPath(), numHashes(1), threads(1), 80 k(0), K(0), qrSeedLen(0), spacedSeed(), 81 trim(std::numeric_limits<unsigned>::max()), 82 verbose(0), outputPath(), tracePath() {} 83 84 /** Return true if all required members are initialized */ initializedAssemblyParams85 bool initialized() const { 86 return bloomSize > 0 && k > 0 && 87 trim != std::numeric_limits<unsigned>::max(); 88 } 89 90 /** Return true if checkpoint creation is enabled */ checkpointsEnabledAssemblyParams91 bool checkpointsEnabled() const { 92 return readsPerCheckpoint != std::numeric_limits<size_t>::max(); 93 } 94 95 /** Reset all spaced seed params to their default values */ resetSpacedSeedParamsAssemblyParams96 void resetSpacedSeedParams() { 97 spacedSeed.clear(); 98 K = 0; 99 qrSeedLen = 0; 100 } 101 102 /** Report current parameter values (for logging) */ 103 friend std::ostream& operator<<(std::ostream& out, 104 const AssemblyParams& o) 105 { 106 out << "Assembly parameters:" << std::endl 107 << '\t' << "K-mer size (-k): " << o.k << std::endl 108 << '\t' << "K-mer coverage threshold (--kc): " << o.minCov << std::endl 109 << '\t' << "Max branch trim length (-t): " << o.trim << std::endl 110 << '\t' << "Bloom size in bytes (-b): " << o.bloomSize << std::endl 111 << '\t' << "Bloom hash functions (-H): " << o.numHashes << std::endl; 112 113 if (o.K > 0) 114 out << '\t' << "Spaced k-mer size (-K): " << o.K << std::endl; 115 116 if (o.qrSeedLen > 0) 117 out << '\t' << "Quadratic residue (QR) seed length (--qr-seed): " 118 << o.qrSeedLen << std::endl; 119 120 return out; 121 } 122 }; 123 124 } /* end of BloomDBG namespace */ 125 126 #endif 127