1 2 /****************************************************************************** 3 * 4 * This file is part of meryl, a genomic k-kmer counter with nice features. 5 * 6 * This software is based on: 7 * 'Canu' v2.0 (https://github.com/marbl/canu) 8 * which is based on: 9 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net) 10 * the 'kmer package' r1994 (http://kmer.sourceforge.net) 11 * 12 * Except as indicated otherwise, this is a 'United States Government Work', 13 * and is released in the public domain. 14 * 15 * File 'README.licenses' in the root directory of this distribution 16 * contains full conditions and disclaimers. 17 */ 18 19 #ifndef MERYL_H 20 #define MERYL_H 21 22 #include "runtime.H" 23 24 #include "merylInput.H" 25 #include "merylOp.H" 26 #include "merylCountArray.H" 27 28 #include <stack> 29 #include <vector> 30 #include <algorithm> 31 32 33 class merylCommandBuilder { 34 public: 35 merylCommandBuilder(); 36 ~merylCommandBuilder(); 37 38 void initialize(char *opt); 39 bool processOptions(void); 40 41 void terminateOperation(void); 42 bool processOperation(void); 43 44 bool isOutput(void); 45 bool isPrinter(void); 46 47 bool isMerylInput(void); 48 bool isCanuInput(std::vector<char *> &err); 49 bool isSequenceInput(void); 50 51 void finalize(void); 52 void spawnThreads(void); 53 54 void printTree(merylOperation *op, uint32 indent); 55 56 public: numOperations(void)57 uint32 numOperations(void) { return(_opList.size()); }; getOperation(uint32 i)58 merylOperation *getOperation(uint32 i) { return(_opList[i]); }; 59 numRoots(void)60 uint32 numRoots(void) { return(_opRoot.size()); }; getRoot(uint32 r)61 merylOperation *getRoot(uint32 r) { return(_opList [ _opRoot[r] ]); }; getRoot(uint32 r,uint32 t)62 merylOperation *getRoot(uint32 r, uint32 t) { return(_thList[t][ _opRoot[r] ]); }; 63 64 private: 65 uint32 _terminating = 0; 66 67 uint32 _optStringLen = 0; 68 char _optString[FILENAME_MAX + 1] = {0}; 69 70 char _inoutName[FILENAME_MAX + 1] = {0}; // Generic input or output name. 71 char _indexName[FILENAME_MAX + 1] = {0}; // 'merylIndex' in a meryl DB. 72 char _sqInfName[FILENAME_MAX + 1] = {0}; // 'info' in a Canu seqStore. 73 char _sqRdsName[FILENAME_MAX + 1] = {0}; // 'reads' in a Canu seqStore. 74 75 // Input from merylDB. 76 77 // Input from Canu seqStore. 78 uint32 _segment = 1; 79 uint32 _segmentMax = 1; 80 81 // Input from FASTA or FASTQ file. 82 bool _doCompression = false; 83 84 // Output to merylDB. 85 bool _isOutput = false; 86 87 // Output to dump file. 88 bool _printACGTorder = false; 89 bool _isPrint = false; 90 91 92 93 uint64 _allowedMemory; // These are set in the constructor, 94 uint32 _allowedThreads; // based on what we know at run time. 95 96 // _opStack is a stack of operations, used when constructing the tree of operations. 97 // 98 // _opList is a list of operations. 99 100 std::stack <merylOperation *> _opStack; 101 std::vector<merylOperation *> _opList; 102 merylOperation **_thList[64] = { nullptr }; // Mirrors opList 103 104 std::vector<uint32> _opRoot; 105 }; 106 107 108 #endif // MERYL_H 109