1 2 /****************************************************************************** 3 * 4 * This file is part of canu, a software program that assembles whole-genome 5 * sequencing reads into contigs. 6 * 7 * This software is based on: 8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net) 9 * the 'kmer package' r1994 (http://kmer.sourceforge.net) 10 * 11 * Except as indicated otherwise, this is a 'United States Government Work', 12 * and is released in the public domain. 13 * 14 * File 'README.licenses' in the root directory of this distribution 15 * contains full conditions and disclaimers. 16 */ 17 18 #ifndef OVSTORECONFIG_H 19 #define OVSTORECONFIG_H 20 21 22 #include "runtime.H" 23 24 #include <vector> 25 26 27 28 class ovStoreConfig { 29 public: ovStoreConfig()30 ovStoreConfig() { 31 _maxID = 0; 32 33 _numBuckets = 0; 34 _numSlices = 0; 35 _sortMemory = 0; 36 37 _numInputs = 0; 38 _inputNames = NULL; 39 40 _inputToBucket = NULL; 41 _readToSlice = NULL; 42 }; 43 ovStoreConfig(std::vector<char const * > & names,uint32 maxID)44 ovStoreConfig(std::vector<char const *> &names, uint32 maxID) { 45 _maxID = maxID; 46 47 _numBuckets = 0; 48 _numSlices = 0; 49 _sortMemory = 0; 50 51 _numInputs = names.size(); 52 _inputNames = new char * [_numInputs]; 53 54 for (uint32 ii=0; ii<names.size(); ii++) 55 _inputNames[ii] = duplicateString(names[ii]); 56 57 _inputToBucket = new uint32 [_numInputs]; 58 _readToSlice = new uint16 [_maxID+1]; 59 }; 60 ovStoreConfig(char const * configName)61 ovStoreConfig(char const *configName) { 62 _maxID = 0; 63 64 _numBuckets = 0; 65 _numSlices = 0; 66 _sortMemory = 0; 67 68 _numInputs = 0; 69 _inputNames = NULL; 70 71 _inputToBucket = NULL; 72 _readToSlice = NULL; 73 74 loadConfig(configName); 75 }; 76 ~ovStoreConfig()77 ~ovStoreConfig() { 78 for (uint32 ii=0; ii<_numInputs; ii++) 79 delete [] _inputNames[ii]; 80 81 delete [] _inputNames; 82 83 delete [] _inputToBucket; 84 delete [] _readToSlice; 85 }; 86 loadConfig(char const * configName)87 void loadConfig(char const *configName) { 88 FILE *C = AS_UTL_openInputFile(configName); 89 90 loadFromFile(_maxID, "maxID", C); 91 loadFromFile(_numBuckets, "numBuckets", C); 92 loadFromFile(_numSlices, "numSlices", C); 93 loadFromFile(_sortMemory, "sortMemory", C); 94 loadFromFile(_numInputs, "numInputs", C); 95 96 _inputNames = new char * [_numInputs]; 97 98 for (uint32 ii=0; ii<_numInputs; ii++) { 99 uint32 nl = 0; 100 101 loadFromFile(nl, "nameLen", C); 102 103 _inputNames[ii] = new char [nl+1]; 104 105 loadFromFile(_inputNames[ii], "name", nl+1, C); 106 } 107 108 _inputToBucket = new uint32 [_numInputs]; 109 _readToSlice = new uint16 [_maxID+1]; 110 111 loadFromFile(_inputToBucket, "inputToBucket", _numInputs, C); 112 loadFromFile(_readToSlice, "readToSlice", _maxID+1, C); 113 114 AS_UTL_closeFile(C, configName); 115 }; 116 writeConfig(char const * configName)117 void writeConfig(char const *configName) { 118 FILE *C = AS_UTL_openOutputFile(configName); 119 120 writeToFile(_maxID, "maxID", C); 121 writeToFile(_numBuckets, "numBuckets", C); 122 writeToFile(_numSlices, "numSlices", C); 123 writeToFile(_sortMemory, "sortMemory", C); 124 writeToFile(_numInputs, "numInputs", C); 125 126 for (uint32 ii=0; ii<_numInputs; ii++) { 127 uint32 nl = strlen(_inputNames[ii]); 128 129 writeToFile(nl, "nameLen", C); 130 writeToFile(_inputNames[ii], "name", nl+1, C); 131 } 132 133 writeToFile(_inputToBucket, "inputToBucket", _numInputs, C); 134 writeToFile(_readToSlice, "readToSlice", _maxID + 1, C); 135 136 AS_UTL_closeFile(C, configName); 137 138 fprintf(stderr, "\n"); 139 fprintf(stderr, "Saved configuration to '%s'.\n", configName); 140 }; 141 numBuckets(void)142 uint32 numBuckets(void) { return(_numBuckets); }; numSlices(void)143 uint32 numSlices(void) { return(_numSlices); }; sortMemory(void)144 double sortMemory(void) { return(_sortMemory); }; 145 146 numInputs(uint32 bucketNumber)147 uint32 numInputs(uint32 bucketNumber) { 148 uint32 ni = 0; 149 150 bucketNumber--; // Internally starting at 0, externally at 1. 151 152 for (uint32 ii=0; ii<_numInputs; ii++) 153 if (_inputToBucket[ii] == bucketNumber) 154 ni++; 155 156 return(ni); 157 }; 158 getInput(uint32 bucketNumber,uint32 fileNumber)159 char const *getInput(uint32 bucketNumber, uint32 fileNumber) { 160 uint32 ni = 0; 161 162 bucketNumber--; // Internally starting at 0, externally at 1. 163 164 for (uint32 ii=0; ii<_numInputs; ii++) 165 if (_inputToBucket[ii] == bucketNumber) 166 if (ni++ == fileNumber) 167 return(_inputNames[ii]); 168 169 return(NULL); 170 } 171 172 getAssignedSlice(uint32 id)173 uint32 getAssignedSlice(uint32 id) { 174 return(_readToSlice[id] + 1); 175 }; 176 177 178 void assignReadsToSlices(sqStore *seq, 179 uint64 minMemory, 180 uint64 maxMemory); 181 182 private: 183 uint32 _maxID; 184 185 uint32 _numBuckets; 186 uint32 _numSlices; 187 double _sortMemory; // Expected maximum memory usage in GB (for sorting). 188 189 uint32 _numInputs; // Number of input ovb files. 190 char **_inputNames; // Input ovb files. 191 192 uint32 *_inputToBucket; // Maps an input name to a bucket. 193 uint16 *_readToSlice; // Map each read ID to a slice. 194 }; 195 196 197 198 #endif // OVSTORECONFIG_H 199