1 
2 /******************************************************************************
3  *
4  *  This file is part of canu, a software program that assembles whole-genome
5  *  sequencing reads into contigs.
6  *
7  *  This software is based on:
8  *    'Celera Assembler' r4587 (http://wgs-assembler.sourceforge.net)
9  *    the 'kmer package' r1994 (http://kmer.sourceforge.net)
10  *
11  *  Except as indicated otherwise, this is a 'United States Government Work',
12  *  and is released in the public domain.
13  *
14  *  File 'README.licenses' in the root directory of this distribution
15  *  contains full conditions and disclaimers.
16  */
17 
18 #ifndef CLEAR_RANGE_FILE_H
19 #define CLEAR_RANGE_FILE_H
20 
21 #include "runtime.H"
22 
23 #include "sqStore.H"
24 
25 #include "files.H"
26 
27 //  Create a clear range file.  If the file doesn't exist, maxID must be
28 //  set to the number of reads allowed.  If the file does exist, we don't
29 //  care and load the number of reads from the file.
30 //
31 //  If no input file, set up for a new clear range:
32 //    it's modified (so it'll get written out)
33 //    every range is (first cleared) then set to the whole read
34 //
35 //  But if there is a file, load the clear ranges saved.
36 
37 
38 class clearRangeFile {
39 public:
clearRangeFile(char const * fileName,sqStore * seq)40   clearRangeFile(char const *fileName, sqStore *seq) {
41 
42     _modified = false;
43 
44     memset(_fileName, 0, sizeof(char) * (FILENAME_MAX+1));
45     strncpy(_fileName, fileName, FILENAME_MAX);
46 
47     _lastID   = seq->sqStore_lastReadID();
48     _bgn      = new uint32 [_lastID + 1];
49     _end      = new uint32 [_lastID + 1];
50 
51     memset(_bgn, 0, sizeof(uint32) * (_lastID + 1));
52     memset(_end, 0, sizeof(uint32) * (_lastID + 1));
53 
54 
55     if (fileExists(_fileName) == false) {
56       _modified = true;
57 
58      reset(seq);
59     }
60 
61     else {
62       _modified = false;
63 
64       FILE  *F = AS_UTL_openInputFile(_fileName);
65 
66       loadFromFile(_lastID, "clearRangeFile::lastID",             F);
67       loadFromFile(_bgn,    "clearRangeFile::bgn",   _lastID + 1, F);
68       loadFromFile(_end,    "clearRangeFile::end",   _lastID + 1, F);
69 
70       AS_UTL_closeFile(F, _fileName);
71 
72       assert(seq->sqStore_lastReadID() == _lastID);  //  Sane?  And if wrong, we've overwritten _bgn and _end, and crash anyway.
73     }
74   };
75 
76 
~clearRangeFile()77   ~clearRangeFile() {
78     if (_modified == true) {
79       FILE  *F = AS_UTL_openOutputFile(_fileName);
80 
81       writeToFile(_lastID, "clearRangeFile::lastID",             F);
82       writeToFile(_bgn,    "clearRangeFile::bgn",   _lastID + 1, F);
83       writeToFile(_end,    "clearRangeFile::end",   _lastID + 1, F);
84 
85       AS_UTL_closeFile(F, _fileName);
86     }
87 
88     delete [] _bgn;
89     delete [] _end;
90   };
91 
92 
reset(sqStore * seq)93   void      reset(sqStore *seq) {
94     for (uint32 fi=1; fi <= _lastID; fi++) {
95       _bgn[fi] = 0;
96       _end[fi] = seq->sqStore_getReadLength(fi);
97     }
98   };
99 
100 
bgn(uint32 id)101   uint32    bgn(uint32 id)     { assert(id <= _lastID);  return(_bgn[id]); };
end(uint32 id)102   uint32    end(uint32 id)     { assert(id <= _lastID);  return(_end[id]); };
103 
104 
setbgn(uint32 id)105   uint32   &setbgn(uint32 id)  { assert(id <= _lastID);  _modified = true;  return(_bgn[id]); };
setend(uint32 id)106   uint32   &setend(uint32 id)  { assert(id <= _lastID);  _modified = true;  return(_end[id]); };
107 
108 
isDeleted(uint32 id)109   bool      isDeleted(uint32 id) {
110     assert(id <= _lastID);
111     return((_bgn[id] == UINT32_MAX) && (_end[id] == UINT32_MAX));
112   };
113 
114 
setDeleted(uint32 id)115   void      setDeleted(uint32 id) {
116     assert(id <= _lastID);
117     _modified = true;
118     _bgn[id]  = UINT32_MAX;
119     _end[id]  = UINT32_MAX;
120   };
121 
122 
copy(clearRangeFile * source)123   void      copy(clearRangeFile *source) {
124     if (source == NULL)
125       return;
126 
127     assert(_lastID   == source->_lastID);
128 
129     memcpy(_bgn, source->_bgn, sizeof(uint32) * (_lastID + 1));
130     memcpy(_end, source->_end, sizeof(uint32) * (_lastID + 1));
131   };
132 
133 
134 private:
135   bool     _modified;
136 
137   char     _fileName[FILENAME_MAX+1];
138 
139   uint32   _lastID;  //  [_lastID] is valid; allocated _lastID+1 spots.
140   uint32  *_bgn;
141   uint32  *_end;
142 };
143 
144 
145 #endif // CLEAR_RANGE_FILE_H
146