1 #ifndef _ASSEMBLY_PARAMS_H_
2 #define _ASSEMBLY_PARAMS_H_
3 
4 #include <string>
5 #include <iostream>
6 #include <limits>
7 
8 namespace BloomDBG {
9 
10 	/**
11 	 * Parameters controlling assembly.
12 	 */
13 	struct AssemblyParams
14 	{
15 		/** Bloom filter size (in bytes) */
16 		size_t bloomSize;
17 
18 		/** Checkpoint frequency (reads processed per checkpoint) */
19 		size_t readsPerCheckpoint;
20 
21 		/** Do not delete checkpoint files after a successful assembly */
22 		bool keepCheckpoint;
23 
24 		/** Filename prefix for checkpoint files */
25 		std::string checkpointPathPrefix;
26 
27 		/** minimum k-mer coverage threshold */
28 		unsigned minCov;
29 
30 		/** path to output debugging info about processing of each read */
31 		std::string readLogPath;
32 
33 		/** WIG track containing 0/1 for sufficient k-mer cov */
34 		std::string covTrackPath;
35 
36 		/** path for output GraphViz file */
37 		std::string graphPath;
38 
39 		/** num Bloom filter hash functions */
40 		unsigned numHashes;
41 
42 		/** input Bloom filter file (if empty, build Bloom filter from reads)*/
43 		std::string bloomPath;
44 
45 		/** the number of parallel threads. */
46 		unsigned threads;
47 
48 		/** the size of a k-mer. */
49 		unsigned k;
50 
51 		/** the size of a single k-mer in a k-mer pair */
52 		unsigned K;
53 
54 		/** reference genome */
55 		std::string refPath;
56 
57 		/** Quadratic Residue (QR) seed length */
58 		unsigned qrSeedLen;
59 
60 		/** spaced seed */
61 		std::string spacedSeed;
62 
63 		/** maximum length of branches to trim */
64 		unsigned trim;
65 
66 		/** verbose level for progress messages */
67 		int verbose;
68 
69 		/** output contigs path (empty string indicates STDOUT) */
70 		std::string outputPath;
71 
72 		/** output path for trace file (-T) option */
73 		std::string tracePath;
74 
75 		/** Default constructor */
AssemblyParamsAssemblyParams76 		AssemblyParams() : bloomSize(0),
77 			readsPerCheckpoint(std::numeric_limits<size_t>::max()),
78 			keepCheckpoint(false), checkpointPathPrefix("bloom-dbg-checkpoint"),
79 			minCov(2), graphPath(), numHashes(1), threads(1),
80 			k(0), K(0), qrSeedLen(0), spacedSeed(),
81 			trim(std::numeric_limits<unsigned>::max()),
82 			verbose(0), outputPath(), tracePath() {}
83 
84 		/** Return true if all required members are initialized */
initializedAssemblyParams85 		bool initialized() const {
86 			return bloomSize > 0 && k > 0 &&
87 				trim != std::numeric_limits<unsigned>::max();
88 		}
89 
90 		/** Return true if checkpoint creation is enabled */
checkpointsEnabledAssemblyParams91 		bool checkpointsEnabled() const {
92 			return readsPerCheckpoint != std::numeric_limits<size_t>::max();
93 		}
94 
95 		/** Reset all spaced seed params to their default values */
resetSpacedSeedParamsAssemblyParams96 		void resetSpacedSeedParams() {
97 			spacedSeed.clear();
98 			K = 0;
99 			qrSeedLen = 0;
100 		}
101 
102 		/** Report current parameter values (for logging) */
103 		friend std::ostream& operator<<(std::ostream& out,
104 			const AssemblyParams& o)
105 		{
106 			out << "Assembly parameters:" << std::endl
107 				<< '\t' << "K-mer size (-k): " << o.k << std::endl
108 				<< '\t' << "K-mer coverage threshold (--kc): " << o.minCov << std::endl
109 				<< '\t' << "Max branch trim length (-t): " << o.trim << std::endl
110 				<< '\t' << "Bloom size in bytes (-b): " << o.bloomSize << std::endl
111 				<< '\t' << "Bloom hash functions (-H): " << o.numHashes << std::endl;
112 
113 			if (o.K > 0)
114 				out << '\t' << "Spaced k-mer size (-K): " << o.K << std::endl;
115 
116 			if (o.qrSeedLen > 0)
117 				out << '\t' << "Quadratic residue (QR) seed length (--qr-seed): "
118 					<< o.qrSeedLen << std::endl;
119 
120 			return out;
121 		}
122 	};
123 
124 } /* end of BloomDBG namespace */
125 
126 #endif
127