1 2 /****************************************************************************** 3 * 4 * This file is part of canu, a software program that assembles whole-genome 5 * sequencing reads into contigs. 6 * 7 * This software is based on: 8 * 'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net) 9 * the 'kmer package' r1994 (http://kmer.sourceforge.net) 10 * 11 * Except as indicated otherwise, this is a 'United States Government Work', 12 * and is released in the public domain. 13 * 14 * File 'README.licenses' in the root directory of this distribution 15 * contains full conditions and disclaimers. 16 */ 17 18 #ifndef CLEAR_RANGE_FILE_H 19 #define CLEAR_RANGE_FILE_H 20 21 #include "runtime.H" 22 23 #include "sqStore.H" 24 25 #include "files.H" 26 27 // Create a clear range file. If the file doesn't exist, maxID must be 28 // set to the number of reads allowed. If the file does exist, we don't 29 // care and load the number of reads from the file. 30 // 31 // If no input file, set up for a new clear range: 32 // it's modified (so it'll get written out) 33 // every range is (first cleared) then set to the whole read 34 // 35 // But if there is a file, load the clear ranges saved. 36 37 38 class clearRangeFile { 39 public: clearRangeFile(char const * fileName,sqStore * seq)40 clearRangeFile(char const *fileName, sqStore *seq) { 41 42 _modified = false; 43 44 memset(_fileName, 0, sizeof(char) * (FILENAME_MAX+1)); 45 strncpy(_fileName, fileName, FILENAME_MAX); 46 47 _lastID = seq->sqStore_lastReadID(); 48 _bgn = new uint32 [_lastID + 1]; 49 _end = new uint32 [_lastID + 1]; 50 51 memset(_bgn, 0, sizeof(uint32) * (_lastID + 1)); 52 memset(_end, 0, sizeof(uint32) * (_lastID + 1)); 53 54 55 if (fileExists(_fileName) == false) { 56 _modified = true; 57 58 reset(seq); 59 } 60 61 else { 62 _modified = false; 63 64 FILE *F = AS_UTL_openInputFile(_fileName); 65 66 loadFromFile(_lastID, "clearRangeFile::lastID", F); 67 loadFromFile(_bgn, "clearRangeFile::bgn", _lastID + 1, F); 68 loadFromFile(_end, "clearRangeFile::end", _lastID + 1, F); 69 70 AS_UTL_closeFile(F, _fileName); 71 72 assert(seq->sqStore_lastReadID() == _lastID); // Sane? And if wrong, we've overwritten _bgn and _end, and crash anyway. 73 } 74 }; 75 76 ~clearRangeFile()77 ~clearRangeFile() { 78 if (_modified == true) { 79 FILE *F = AS_UTL_openOutputFile(_fileName); 80 81 writeToFile(_lastID, "clearRangeFile::lastID", F); 82 writeToFile(_bgn, "clearRangeFile::bgn", _lastID + 1, F); 83 writeToFile(_end, "clearRangeFile::end", _lastID + 1, F); 84 85 AS_UTL_closeFile(F, _fileName); 86 } 87 88 delete [] _bgn; 89 delete [] _end; 90 }; 91 92 reset(sqStore * seq)93 void reset(sqStore *seq) { 94 for (uint32 fi=1; fi <= _lastID; fi++) { 95 _bgn[fi] = 0; 96 _end[fi] = seq->sqStore_getReadLength(fi); 97 } 98 }; 99 100 bgn(uint32 id)101 uint32 bgn(uint32 id) { assert(id <= _lastID); return(_bgn[id]); }; end(uint32 id)102 uint32 end(uint32 id) { assert(id <= _lastID); return(_end[id]); }; 103 104 setbgn(uint32 id)105 uint32 &setbgn(uint32 id) { assert(id <= _lastID); _modified = true; return(_bgn[id]); }; setend(uint32 id)106 uint32 &setend(uint32 id) { assert(id <= _lastID); _modified = true; return(_end[id]); }; 107 108 isDeleted(uint32 id)109 bool isDeleted(uint32 id) { 110 assert(id <= _lastID); 111 return((_bgn[id] == UINT32_MAX) && (_end[id] == UINT32_MAX)); 112 }; 113 114 setDeleted(uint32 id)115 void setDeleted(uint32 id) { 116 assert(id <= _lastID); 117 _modified = true; 118 _bgn[id] = UINT32_MAX; 119 _end[id] = UINT32_MAX; 120 }; 121 122 copy(clearRangeFile * source)123 void copy(clearRangeFile *source) { 124 if (source == NULL) 125 return; 126 127 assert(_lastID == source->_lastID); 128 129 memcpy(_bgn, source->_bgn, sizeof(uint32) * (_lastID + 1)); 130 memcpy(_end, source->_end, sizeof(uint32) * (_lastID + 1)); 131 }; 132 133 134 private: 135 bool _modified; 136 137 char _fileName[FILENAME_MAX+1]; 138 139 uint32 _lastID; // [_lastID] is valid; allocated _lastID+1 spots. 140 uint32 *_bgn; 141 uint32 *_end; 142 }; 143 144 145 #endif // CLEAR_RANGE_FILE_H 146