1 
2 /******************************************************************************
3  *
4  *  This file is part of meryl, a genomic k-kmer counter with nice features.
5  *
6  *  This software is based on:
7  *    'Canu' v2.0              (https://github.com/marbl/canu)
8  *  which is based on:
9  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
10  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
11  *
12  *  Except as indicated otherwise, this is a 'United States Government Work',
13  *  and is released in the public domain.
14  *
15  *  File 'README.licenses' in the root directory of this distribution
16  *  contains full conditions and disclaimers.
17  */
18 
19 #ifndef MERYL_H
20 #define MERYL_H
21 
22 #include "runtime.H"
23 
24 #include "merylInput.H"
25 #include "merylOp.H"
26 #include "merylCountArray.H"
27 
28 #include <stack>
29 #include <vector>
30 #include <algorithm>
31 
32 
33 class merylCommandBuilder {
34 public:
35   merylCommandBuilder();
36   ~merylCommandBuilder();
37 
38   void    initialize(char *opt);
39   bool    processOptions(void);
40 
41   void    terminateOperation(void);
42   bool    processOperation(void);
43 
44   bool    isOutput(void);
45   bool    isPrinter(void);
46 
47   bool    isMerylInput(void);
48   bool    isCanuInput(std::vector<char *> &err);
49   bool    isSequenceInput(void);
50 
51   void    finalize(void);
52   void    spawnThreads(void);
53 
54   void    printTree(merylOperation *op, uint32 indent);
55 
56 public:
numOperations(void)57   uint32           numOperations(void)           { return(_opList.size()); };
getOperation(uint32 i)58   merylOperation  *getOperation(uint32 i)        { return(_opList[i]);     };
59 
numRoots(void)60   uint32           numRoots(void)                { return(_opRoot.size()); };
getRoot(uint32 r)61   merylOperation  *getRoot(uint32 r)             { return(_opList   [ _opRoot[r] ]);  };
getRoot(uint32 r,uint32 t)62   merylOperation  *getRoot(uint32 r, uint32 t)   { return(_thList[t][ _opRoot[r] ]);  };
63 
64 private:
65   uint32    _terminating = 0;
66 
67   uint32    _optStringLen = 0;
68   char      _optString[FILENAME_MAX + 1] = {0};
69 
70   char      _inoutName[FILENAME_MAX + 1] = {0};   //  Generic input or output name.
71   char      _indexName[FILENAME_MAX + 1] = {0};   //  'merylIndex' in a meryl DB.
72   char      _sqInfName[FILENAME_MAX + 1] = {0};   //  'info' in a Canu seqStore.
73   char      _sqRdsName[FILENAME_MAX + 1] = {0};   //  'reads' in a Canu seqStore.
74 
75   //  Input from merylDB.
76 
77   //  Input from Canu seqStore.
78   uint32    _segment       = 1;
79   uint32    _segmentMax    = 1;
80 
81   //  Input from FASTA or FASTQ file.
82   bool      _doCompression     = false;
83 
84   //  Output to merylDB.
85   bool      _isOutput    = false;
86 
87   //  Output to dump file.
88   bool      _printACGTorder = false;
89   bool      _isPrint        = false;
90 
91 
92 
93   uint64    _allowedMemory;    //  These are set in the constructor,
94   uint32    _allowedThreads;   //  based on what we know at run time.
95 
96   //  _opStack is a stack of operations, used when constructing the tree of operations.
97   //
98   //  _opList is a list of operations.
99 
100   std::stack <merylOperation *>   _opStack;
101   std::vector<merylOperation *>   _opList;
102   merylOperation                **_thList[64] = { nullptr };   //  Mirrors opList
103 
104   std::vector<uint32>             _opRoot;
105 };
106 
107 
108 #endif  //  MERYL_H
109