1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #ifndef OVSTORECONFIG_H
19 #define OVSTORECONFIG_H
20 
21 
22 #include "runtime.H"
23 
24 #include <vector>
25 
26 
27 
28 class ovStoreConfig {
29 public:
ovStoreConfig()30   ovStoreConfig() {
31     _maxID         = 0;
32 
33     _numBuckets    = 0;
34     _numSlices     = 0;
35     _sortMemory    = 0;
36 
37     _numInputs     = 0;
38     _inputNames    = NULL;
39 
40     _inputToBucket = NULL;
41     _readToSlice   = NULL;
42   };
43 
ovStoreConfig(std::vector<char const * > & names,uint32 maxID)44   ovStoreConfig(std::vector<char const *> &names, uint32 maxID) {
45     _maxID         = maxID;
46 
47     _numBuckets    = 0;
48     _numSlices     = 0;
49     _sortMemory    = 0;
50 
51     _numInputs     = names.size();
52     _inputNames    = new char * [_numInputs];
53 
54     for (uint32 ii=0; ii<names.size(); ii++)
55       _inputNames[ii] = duplicateString(names[ii]);
56 
57     _inputToBucket = new uint32 [_numInputs];
58     _readToSlice   = new uint16 [_maxID+1];
59   };
60 
ovStoreConfig(char const * configName)61   ovStoreConfig(char const *configName) {
62     _maxID         = 0;
63 
64     _numBuckets    = 0;
65     _numSlices     = 0;
66     _sortMemory    = 0;
67 
68     _numInputs     = 0;
69     _inputNames    = NULL;
70 
71     _inputToBucket = NULL;
72     _readToSlice   = NULL;
73 
74     loadConfig(configName);
75   };
76 
~ovStoreConfig()77   ~ovStoreConfig() {
78     for (uint32 ii=0; ii<_numInputs; ii++)
79       delete [] _inputNames[ii];
80 
81     delete [] _inputNames;
82 
83     delete [] _inputToBucket;
84     delete [] _readToSlice;
85   };
86 
loadConfig(char const * configName)87   void    loadConfig(char const *configName) {
88     FILE *C = AS_UTL_openInputFile(configName);
89 
90     loadFromFile(_maxID,      "maxID",      C);
91     loadFromFile(_numBuckets, "numBuckets", C);
92     loadFromFile(_numSlices,  "numSlices",  C);
93     loadFromFile(_sortMemory, "sortMemory", C);
94     loadFromFile(_numInputs,  "numInputs",  C);
95 
96     _inputNames = new char * [_numInputs];
97 
98     for (uint32 ii=0; ii<_numInputs; ii++) {
99       uint32  nl = 0;
100 
101       loadFromFile(nl, "nameLen", C);
102 
103       _inputNames[ii] = new char [nl+1];
104 
105       loadFromFile(_inputNames[ii], "name", nl+1, C);
106     }
107 
108     _inputToBucket = new uint32 [_numInputs];
109     _readToSlice   = new uint16 [_maxID+1];
110 
111     loadFromFile(_inputToBucket, "inputToBucket", _numInputs, C);
112     loadFromFile(_readToSlice,   "readToSlice",   _maxID+1,   C);
113 
114     AS_UTL_closeFile(C, configName);
115   };
116 
writeConfig(char const * configName)117   void    writeConfig(char const *configName) {
118     FILE *C = AS_UTL_openOutputFile(configName);
119 
120     writeToFile(_maxID,      "maxID",      C);
121     writeToFile(_numBuckets, "numBuckets", C);
122     writeToFile(_numSlices,  "numSlices",  C);
123     writeToFile(_sortMemory, "sortMemory", C);
124     writeToFile(_numInputs,  "numInputs",  C);
125 
126     for (uint32 ii=0; ii<_numInputs; ii++) {
127       uint32 nl = strlen(_inputNames[ii]);
128 
129       writeToFile(nl,              "nameLen",       C);
130       writeToFile(_inputNames[ii], "name",    nl+1, C);
131     }
132 
133     writeToFile(_inputToBucket, "inputToBucket", _numInputs, C);
134     writeToFile(_readToSlice,   "readToSlice",   _maxID + 1, C);
135 
136     AS_UTL_closeFile(C, configName);
137 
138     fprintf(stderr, "\n");
139     fprintf(stderr, "Saved configuration to '%s'.\n", configName);
140   };
141 
numBuckets(void)142   uint32  numBuckets(void) { return(_numBuckets); };
numSlices(void)143   uint32  numSlices(void)  { return(_numSlices);  };
sortMemory(void)144   double  sortMemory(void) { return(_sortMemory); };
145 
146 
numInputs(uint32 bucketNumber)147   uint32  numInputs(uint32 bucketNumber) {
148     uint32 ni = 0;
149 
150     bucketNumber--;  //  Internally starting at 0, externally at 1.
151 
152     for (uint32 ii=0; ii<_numInputs; ii++)
153       if (_inputToBucket[ii] == bucketNumber)
154         ni++;
155 
156     return(ni);
157   };
158 
getInput(uint32 bucketNumber,uint32 fileNumber)159   char const *getInput(uint32 bucketNumber, uint32 fileNumber) {
160     uint32 ni = 0;
161 
162     bucketNumber--;  //  Internally starting at 0, externally at 1.
163 
164     for (uint32 ii=0; ii<_numInputs; ii++)
165       if (_inputToBucket[ii] == bucketNumber)
166         if (ni++ == fileNumber)
167           return(_inputNames[ii]);
168 
169     return(NULL);
170   }
171 
172 
getAssignedSlice(uint32 id)173   uint32  getAssignedSlice(uint32 id) {
174     return(_readToSlice[id] + 1);
175   };
176 
177 
178   void    assignReadsToSlices(sqStore *seq,
179                               uint64   minMemory,
180                               uint64   maxMemory);
181 
182 private:
183   uint32     _maxID;
184 
185   uint32     _numBuckets;
186   uint32     _numSlices;
187   double     _sortMemory;      //  Expected maximum memory usage in GB (for sorting).
188 
189   uint32     _numInputs;       //  Number of input ovb files.
190   char     **_inputNames;      //  Input ovb files.
191 
192   uint32    *_inputToBucket;   //  Maps an input name to a bucket.
193   uint16    *_readToSlice;      //  Map each read ID to a slice.
194 };
195 
196 
197 
198 #endif  //  OVSTORECONFIG_H
199