1 /* 2 * Copyright (c) 2015-2017, Intel Corporation 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are met: 6 * 7 * * Redistributions of source code must retain the above copyright notice, 8 * this list of conditions and the following disclaimer. 9 * * Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * * Neither the name of Intel Corporation nor the names of its contributors 13 * may be used to endorse or promote products derived from this software 14 * without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /** \file 30 * \brief Class for representing character reachability. 31 * 32 * This is a simple (but hopefully fast) class for representing 8-bit character 33 * reachability, along with a bunch of useful operations. 34 */ 35 36 #ifndef NG_CHARREACH_H 37 #define NG_CHARREACH_H 38 39 #include "ue2common.h" 40 #include "util/bitfield.h" 41 42 #include <string> 43 44 namespace ue2 { 45 46 class CharReach { 47 private: 48 /// Underlying storage. 49 ue2::bitfield<256> bits; 50 51 public: 52 static constexpr size_t npos = decltype(bits)::npos; //!< One past the max value. 53 54 /// Empty constructor. CharReach()55 CharReach() {} 56 57 /// Constructor for a character class containing a single char. CharReach(unsigned char c)58 explicit CharReach(unsigned char c) { set(c); } 59 60 /// Constructor for a character class representing a contiguous range of 61 /// chars, inclusive. CharReach(unsigned char from,unsigned char to)62 CharReach(unsigned char from, unsigned char to) { setRange(from, to); } 63 64 /// Constructor for a character class based on the set of chars in a 65 /// string. CharReach(const std::string & str)66 explicit CharReach(const std::string &str) { set(str); } 67 68 /// Returns total capacity. size()69 static constexpr size_t size() { return npos; } 70 71 /// Returns a CharReach with complete reachability (a "dot"). dot()72 static CharReach dot() { return CharReach(0, 255); } 73 74 /// Complete bitset equality. 75 bool operator==(const CharReach &a) const { return bits == a.bits; } 76 77 /// Inequality. 78 bool operator!=(const CharReach &a) const { return bits != a.bits; } 79 80 /// Ordering. 81 bool operator<(const CharReach &a) const { return bits < a.bits; } 82 83 /// Set all bits. setall()84 void setall() { bits.setall(); } 85 86 /// Clear all bits. clear()87 void clear() { bits.clear(); } 88 89 /// Clear bit N. clear(unsigned char n)90 void clear(unsigned char n) { bits.clear(n); } 91 92 /// Set bit N. set(unsigned char n)93 void set(unsigned char n) { bits.set(n); } 94 95 /// Test bit N. test(unsigned char n)96 bool test(unsigned char n) const { return bits.test(n); } 97 98 /// Flip bit N. flip(unsigned char n)99 void flip(unsigned char n) { bits.flip(n); } 100 101 /// Flip all bits. flip()102 void flip() { bits.flip(); } 103 104 // Switch on the bit in the range (from, to), inclusive. setRange(unsigned char from,unsigned char to)105 void setRange(unsigned char from, unsigned char to) { 106 bits.set_range(from, to); 107 } 108 109 // Switch on the bits corresponding to the characters in \a s. 110 void set(const std::string &s); 111 112 /// Returns number of bits set on. count()113 size_t count() const { return bits.count(); } 114 115 /// Are no bits set? none()116 bool none() const { return bits.none(); } 117 118 /// Is any bit set? any()119 bool any() const { return bits.any(); } 120 121 /// Are all bits set? all()122 bool all() const { return bits.all(); } 123 124 /// Returns first bit set, or CharReach::npos if none set. find_first()125 size_t find_first() const { return bits.find_first(); } 126 127 /// Returns last bit set, or CharReach::npos if none set. find_last()128 size_t find_last() const { return bits.find_last(); } 129 130 /// Returns next bit set, or CharReach::npos if none set after n. find_next(size_t last)131 size_t find_next(size_t last) const { return bits.find_next(last); } 132 133 /// Returns (zero-based) N'th bit set, or CharReach::npos if fewer than 134 /// N + 1 bits are on. find_nth(size_t n)135 size_t find_nth(size_t n) const { return bits.find_nth(n); } 136 137 /// Bitwise OR. 138 CharReach operator|(const CharReach &a) const { 139 CharReach cr(*this); 140 cr.bits |= a.bits; 141 return cr; 142 } 143 144 /// Bitwise OR-equals. 145 void operator|=(const CharReach &a) { bits |= a.bits; } 146 147 /// Bitwise AND. 148 CharReach operator&(const CharReach &a) const { 149 CharReach cr(*this); 150 cr.bits &= a.bits; 151 return cr; 152 } 153 154 /// Bitwise AND-equals. 155 void operator&=(const CharReach &a) { bits &= a.bits; } 156 157 /// Bitwise XOR. 158 CharReach operator^(const CharReach &a) const { 159 CharReach cr(*this); 160 cr.bits ^= a.bits; 161 return cr; 162 } 163 164 /// Bitwise complement. 165 CharReach operator~(void) const { 166 CharReach cr(*this); 167 cr.flip(); 168 return cr; 169 } 170 171 /// Do we only contain bits representing alpha characters? 172 bool isAlpha() const; 173 174 /// Do we represent an uppercase/lowercase pair? 175 bool isCaselessChar() const; 176 177 /// Do we represent a cheapskate caseless set? 178 bool isBit5Insensitive() const; 179 180 /// Return a string containing the characters that are switched on. 181 std::string to_string() const; 182 183 /// Hash of enabled bits. hash()184 size_t hash() const { return bits.hash(); } 185 186 /// True if this character class is a subset of \a other. 187 bool isSubsetOf(const CharReach &other) const; 188 }; 189 190 /** \brief True iff there is a non-empty intersection between \a and \a b */ 191 bool overlaps(const CharReach &a, const CharReach &b); 192 193 /** \brief True iff \a small is a subset of \a big. */ 194 bool isSubsetOf(const CharReach &small, const CharReach &big); 195 196 bool isutf8ascii(const CharReach &cr); 197 bool isutf8start(const CharReach &cr); 198 199 } // namespace ue2 200 201 namespace std { 202 203 template<> 204 struct hash<ue2::CharReach> { 205 size_t operator()(const ue2::CharReach &cr) const { 206 return cr.hash(); 207 } 208 }; 209 210 } // namespace std 211 212 #endif // NG_CHARREACH_H 213