1 #ifndef OBJTOOLS_READERS_SEQDB__SEQDBBITSET_HPP 2 #define OBJTOOLS_READERS_SEQDB__SEQDBBITSET_HPP 3 4 /* $Id: seqdbbitset.hpp 599293 2019-12-23 18:45:56Z zaretska $ 5 * =========================================================================== 6 * 7 * PUBLIC DOMAIN NOTICE 8 * National Center for Biotechnology Information 9 * 10 * This software/database is a "United States Government Work" under the 11 * terms of the United States Copyright Act. It was written as part of 12 * the author's official duties as a United States Government employee and 13 * thus cannot be copyrighted. This software/database is freely available 14 * to the public for use. The National Library of Medicine and the U.S. 15 * Government have not placed any restriction on its use or reproduction. 16 * 17 * Although all reasonable efforts have been taken to ensure the accuracy 18 * and reliability of the software and data, the NLM and the U.S. 19 * Government do not and cannot warrant the performance or results that 20 * may be obtained by using this software or data. The NLM and the U.S. 21 * Government disclaim all warranties, express or implied, including 22 * warranties of performance, merchantability or fitness for any particular 23 * purpose. 24 * 25 * Please cite the author in any work or product based on this material. 26 * 27 * =========================================================================== 28 * 29 * Author: Kevin Bealer 30 * 31 */ 32 33 /// @file seqdbbitset.hpp 34 /// Implementation for the CSeqDB_BitSet class, a bit vector. 35 /// 36 /// Defines classes: 37 /// CSeqDB_BitSet 38 /// 39 /// Implemented for: UNIX, MS-Windows 40 41 #include <objtools/blast/seqdb_reader/impl/seqdbvol.hpp> 42 43 BEGIN_NCBI_SCOPE 44 45 /// Import definitions from the ncbi::objects namespace. 46 USING_SCOPE(objects); 47 48 /// Bit set class. 49 class CSeqDB_BitSet : public CObject { 50 // Some of this code might be more efficient if a larger word size 51 // were used. However, I think the difference would probably be 52 // tiny, and reading of OID mask file data on such platforms would 53 // then require special byte-order-aware algorithms. Working at 54 // the byte level completely sidesteps all such issues and results 55 // in simpler, clearer code. 56 57 /// Word size for vector elements. 58 typedef unsigned char TByte; 59 60 /// Some useful constants related to word size. 61 enum { 62 /// Number of bits per stored word. 63 eWordBits = 8, 64 65 /// Shift to convert from bit index to vector index. 66 eWordShift = 3, 67 68 /// Mask to compute bit index within word. 69 eWordMask = eWordBits-1 70 }; 71 72 public: 73 /// Special edge cases (all-set and all-clear). 74 enum ESpecialCase { 75 eNone, ///< Normal OID list. 76 eAllSet, ///< All OIDs are set. 77 eAllClear ///< All OIDs are clear. 78 }; 79 80 /// Default constructor for zero-length empty bit array. CSeqDB_BitSet()81 CSeqDB_BitSet() 82 : m_Start (0), 83 m_End (0), 84 m_Special(eNone) 85 { 86 _ASSERT(TByte(0) < TByte(-1)); // must be unsigned 87 } 88 89 /// Constructor for bit array with start/end range. 90 /// 91 /// This constructs a bit array with the given start and end 92 /// points. If the `sp' parameter is set to `eNone', a normal 93 /// bit array is constructed, with all bits set to `false'. If sp 94 /// is `eAllSet' or `eAllClear', the array is not constructed, but 95 /// rather an object that acts as though the given range of OIDs 96 /// is all `true' or all `false' respectively. These are termed 97 /// `special case' bit sets. Special cases tend to be much more 98 /// efficient in terms of memory usage and in the efficiency of 99 /// boolean operations. 100 /// 101 /// @param start Starting OID for this bit set. 102 /// @param end Ending OID for this bit set. 103 /// @param sp Special case (homogeneous value) arrays. CSeqDB_BitSet(size_t start,size_t end,ESpecialCase sp=eNone)104 CSeqDB_BitSet(size_t start, size_t end, ESpecialCase sp = eNone) 105 : m_Start (start), 106 m_End (end), 107 m_Special(sp) 108 { 109 _ASSERT(TByte(0) < TByte(-1)); // must be unsigned 110 111 if (sp == eNone) { 112 x_Alloc(end-start); 113 } 114 } 115 116 /// Constructor. 117 /// 118 /// This constructs a normal (eNone) bit array with the given 119 /// start and end points, and populates it from the byte data 120 /// in the memory found between addresses p1 and p2. This method 121 /// is meant for reading data from OID mask files and follows the 122 /// format of those files, but the start pointer should be to the 123 /// start of the bit map data within the file, not to the start of 124 /// the file (the file contains a header that should be skipped). 125 /// 126 /// @param start The first OID represented by this data. 127 /// @param end The OID after the last OID represented by this data. 128 /// @param p1 A pointer to the beginning of the byte data. 129 /// @param p2 A pointer to past the end of the byte data. 130 CSeqDB_BitSet(size_t start, 131 size_t end, 132 const TByte * p1, 133 const TByte * p2); 134 135 /// Set the specified bit (to true). 136 /// @param index The index of the bit to set. 137 void SetBit(size_t index); 138 139 /// Clear the specified bit (to false). 140 /// @param index The index of the bit to clear. 141 void ClearBit(size_t index); 142 143 /// Store a value at the given index. 144 /// @param index The index of the bit to clear. 145 /// @param value The value to store in this bit. 146 void AssignBit(size_t i, bool value); 147 148 /// Store the provided value in a range of bits. 149 /// @param start The index of the first bit to assign. 150 /// @param end The index after the last bit to assign. 151 /// @param value The value to store in this range. 152 void AssignBitRange(size_t start, size_t end, bool value); 153 154 /// Get the value of the specified bit. 155 /// @param index The index of the bit to get. 156 bool GetBit(size_t index) const; 157 158 /// Check if a bit is true or find the next bit that is. 159 /// 160 /// If the index points to a `false' value, the index is increased 161 /// until a `true' value is found (true is returned) or until the 162 /// index exceeds the OID range (false is returned). If the index 163 /// initially points to a `true' bit, the index will not change. 164 /// 165 /// @param index The start index, and the returned index [in|out]. 166 /// @return true if the index points to a true value. 167 bool CheckOrFindBit(size_t & index) const; 168 169 /// Swap two bitsets. 170 /// 171 /// All fields of this bitset are swapped with those of `other'. 172 /// 173 /// @param other A bitset to swap values with. 174 void Swap(CSeqDB_BitSet & other); 175 176 /// This bitset is assigned to the union of it and another. 177 /// 178 /// Each bit in this bitset will be `true' if it was true in 179 /// either this bitset or `other'. The `consume' flag can be 180 /// specified as true if the value of the `other' bitset will not 181 /// be used after this operation. Specifying `true' for consume 182 /// may change the data in the other bitset but can sometimes use 183 /// a more efficient algorithm. 184 /// 185 /// @param other The bitset to union with this bitset. 186 /// @param consume Specify true if the other bitset is expendable. 187 void UnionWith(CSeqDB_BitSet & other, bool consume); 188 189 /// This bitset is assigned to the intersection of it and another. 190 /// 191 /// Each bit in this bitset will be `true' if it was true in both 192 /// this bitset and `other'. The `consume' flag can be specified 193 /// as true if the value of the `other' bitset will not be used 194 /// after this operation. Specifying `true' for consume may 195 /// change the data in the other bitset but can sometimes use a 196 /// more efficient algorithm. 197 /// 198 /// @param other The bitset to intersect with this bitset. 199 /// @param consume Specify true if the other bitset is expendable. 200 void IntersectWith(CSeqDB_BitSet & other, bool consume); 201 202 /// If this is a special case bitset, convert it to a normal one. 203 /// 204 /// Operations on normal (`eNone') bitsets can be more expensive 205 /// in terms of memory and CPU time than on special case (eAllSet 206 /// and eAllClear) bitsets, but normal bitsets support operations 207 /// such as SetBit() and ClearBit() that special bitsets don't. 208 /// 209 /// This method checks if this bitset is a special case, and 210 /// converts it to a normal (`eNone') bitset if so. 211 void Normalize(); 212 213 private: 214 /// Set all bits that are true in `src'. 215 /// @param src The bitset to read from. 216 void x_CopyBits(const CSeqDB_BitSet & src); 217 218 /// Set all bits in the given range that are true in `src'. 219 /// @param src The bitset to read from. 220 /// @param begin The start of the index range to read. 221 /// @param end The index past the end of the index range. 222 void x_CopyBits(const CSeqDB_BitSet & src, size_t start, size_t end); 223 224 /// Set this bitset to the value of the provided one. 225 /// 226 /// This is like a normal "copy assignment" operation, except that 227 /// if `consume' is specified as true, a more efficient algorithm 228 /// may be used. (Implementation: if `consume' is true, and the 229 /// source is a normal (eNone) bitset, this is a `swap'.) 230 /// 231 /// @param other The bitset to copy. 232 /// @param consume Specify true if the other bitset is expendable. 233 void x_Copy(CSeqDB_BitSet & other, bool consume); 234 235 /// Replace `special' with normal bitsets, adjust the index range. 236 /// 237 /// If this bitset is special, it becomes a normal bitset. If the 238 /// start or end point is outside of the current one, the bitset 239 /// expands (but does not contract). All bits that are `true' in 240 /// the initial bitset will be true in the resulting bitset. 241 /// 242 /// @param start Move start point down (but not up) to here. 243 /// @param end Move end point up (but not down) to here. 244 void x_Normalize(size_t start, size_t end); 245 246 /// Allocate memory for the bit data. 247 /// @param bits Allocate enough storage to hold this many bits. x_Alloc(size_t bits)248 void x_Alloc(size_t bits) 249 { 250 m_Bits.resize((bits + eWordBits - 1) >> eWordShift); 251 } 252 253 /// Prevent copy construction. 254 CSeqDB_BitSet(const CSeqDB_BitSet &); 255 256 /// Prevent copy assignment. 257 CSeqDB_BitSet & operator=(const CSeqDB_BitSet &); 258 259 /// Number of bits stored here. 260 size_t m_Start; 261 262 /// Number of bits stored here. 263 size_t m_End; 264 265 /// Special edge cases. 266 ESpecialCase m_Special; 267 268 /// Representation of bit data. 269 vector<TByte> m_Bits; 270 271 public: 272 /// Allows to dump a snapshot of the object 273 /// @todo this doesn't do anything for locality eRemote 274 void DebugDump(CDebugDumpContext ddc, unsigned int depth) const; 275 }; 276 277 END_NCBI_SCOPE 278 279 #endif // OBJTOOLS_READERS_SEQDB__SEQDBBITSET_HPP 280 281