1 // Scintilla source code edit control
2 /** @file CharacterSet.h
3  ** Encapsulates a set of characters. Used to test if a character is within a set.
4  **/
5 // Copyright 2007 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #ifndef CHARACTERSET_H
9 #define CHARACTERSET_H
10 
11 namespace Scintilla {
12 
13 class CharacterSet {
14 	int size;
15 	bool valueAfter;
16 	bool *bset;
17 public:
18 	enum setBase {
19 		setNone=0,
20 		setLower=1,
21 		setUpper=2,
22 		setDigits=4,
23 		setAlpha=setLower|setUpper,
24 		setAlphaNum=setAlpha|setDigits
25 	};
26 	CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) {
27 		size = size_;
28 		valueAfter = valueAfter_;
29 		bset = new bool[size];
30 		for (int i=0; i < size; i++) {
31 			bset[i] = false;
32 		}
33 		AddString(initialSet);
34 		if (base & setLower)
35 			AddString("abcdefghijklmnopqrstuvwxyz");
36 		if (base & setUpper)
37 			AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
38 		if (base & setDigits)
39 			AddString("0123456789");
40 	}
CharacterSet(const CharacterSet & other)41 	CharacterSet(const CharacterSet &other) {
42 		size = other.size;
43 		valueAfter = other.valueAfter;
44 		bset = new bool[size];
45 		for (int i=0; i < size; i++) {
46 			bset[i] = other.bset[i];
47 		}
48 	}
CharacterSet(CharacterSet && other)49 	CharacterSet(CharacterSet &&other) noexcept {
50 		size = other.size;
51 		valueAfter = other.valueAfter;
52 		bset = other.bset;
53 		other.size = 0;
54 		other.bset = nullptr;
55 	}
56 	CharacterSet &operator=(const CharacterSet &other) {
57 		if (this != &other) {
58 			bool *bsetNew = new bool[other.size];
59 			for (int i = 0; i < other.size; i++) {
60 				bsetNew[i] = other.bset[i];
61 			}
62 			delete[]bset;
63 			size = other.size;
64 			valueAfter = other.valueAfter;
65 			bset = bsetNew;
66 		}
67 		return *this;
68 	}
69 	CharacterSet &operator=(CharacterSet &&other) noexcept {
70 		if (this != &other) {
71 			delete []bset;
72 			size = other.size;
73 			valueAfter = other.valueAfter;
74 			bset = other.bset;
75 			other.size = 0;
76 			other.bset = nullptr;
77 		}
78 		return *this;
79 	}
~CharacterSet()80 	~CharacterSet() {
81 		delete []bset;
82 		bset = nullptr;
83 		size = 0;
84 	}
Add(int val)85 	void Add(int val) {
86 		assert(val >= 0);
87 		assert(val < size);
88 		bset[val] = true;
89 	}
AddString(const char * setToAdd)90 	void AddString(const char *setToAdd) {
91 		for (const char *cp=setToAdd; *cp; cp++) {
92 			const unsigned char uch = *cp;
93 			assert(uch < size);
94 			bset[uch] = true;
95 		}
96 	}
Contains(int val)97 	bool Contains(int val) const noexcept {
98 		assert(val >= 0);
99 		if (val < 0) return false;
100 		return (val < size) ? bset[val] : valueAfter;
101 	}
Contains(char ch)102 	bool Contains(char ch) const noexcept {
103 		// Overload char as char may be signed
104 		const unsigned char uch = ch;
105 		return Contains(uch);
106 	}
107 };
108 
109 // Functions for classifying characters
110 
IsASpace(int ch)111 constexpr bool IsASpace(int ch) noexcept {
112     return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
113 }
114 
IsASpaceOrTab(int ch)115 constexpr bool IsASpaceOrTab(int ch) noexcept {
116 	return (ch == ' ') || (ch == '\t');
117 }
118 
IsADigit(int ch)119 constexpr bool IsADigit(int ch) noexcept {
120 	return (ch >= '0') && (ch <= '9');
121 }
122 
IsADigit(int ch,int base)123 inline bool IsADigit(int ch, int base) noexcept {
124 	if (base <= 10) {
125 		return (ch >= '0') && (ch < '0' + base);
126 	} else {
127 		return ((ch >= '0') && (ch <= '9')) ||
128 		       ((ch >= 'A') && (ch < 'A' + base - 10)) ||
129 		       ((ch >= 'a') && (ch < 'a' + base - 10));
130 	}
131 }
132 
IsASCII(int ch)133 constexpr bool IsASCII(int ch) noexcept {
134 	return (ch >= 0) && (ch < 0x80);
135 }
136 
IsLowerCase(int ch)137 constexpr bool IsLowerCase(int ch) noexcept {
138 	return (ch >= 'a') && (ch <= 'z');
139 }
140 
IsUpperCase(int ch)141 constexpr bool IsUpperCase(int ch) noexcept {
142 	return (ch >= 'A') && (ch <= 'Z');
143 }
144 
IsUpperOrLowerCase(int ch)145 constexpr bool IsUpperOrLowerCase(int ch) noexcept {
146 	return IsUpperCase(ch) || IsLowerCase(ch);
147 }
148 
IsAlphaNumeric(int ch)149 constexpr bool IsAlphaNumeric(int ch) noexcept {
150 	return
151 		((ch >= '0') && (ch <= '9')) ||
152 		((ch >= 'a') && (ch <= 'z')) ||
153 		((ch >= 'A') && (ch <= 'Z'));
154 }
155 
156 /**
157  * Check if a character is a space.
158  * This is ASCII specific but is safe with chars >= 0x80.
159  */
isspacechar(int ch)160 constexpr bool isspacechar(int ch) noexcept {
161     return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
162 }
163 
iswordchar(int ch)164 constexpr bool iswordchar(int ch) noexcept {
165 	return IsAlphaNumeric(ch) || ch == '.' || ch == '_';
166 }
167 
iswordstart(int ch)168 constexpr bool iswordstart(int ch) noexcept {
169 	return IsAlphaNumeric(ch) || ch == '_';
170 }
171 
isoperator(int ch)172 inline bool isoperator(int ch) noexcept {
173 	if (IsAlphaNumeric(ch))
174 		return false;
175 	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
176 	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
177 	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
178 	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
179 	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
180 	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
181 		return true;
182 	return false;
183 }
184 
185 // Simple case functions for ASCII supersets.
186 
187 template <typename T>
MakeUpperCase(T ch)188 inline T MakeUpperCase(T ch) noexcept {
189 	if (ch < 'a' || ch > 'z')
190 		return ch;
191 	else
192 		return ch - 'a' + 'A';
193 }
194 
195 template <typename T>
MakeLowerCase(T ch)196 inline T MakeLowerCase(T ch) noexcept {
197 	if (ch < 'A' || ch > 'Z')
198 		return ch;
199 	else
200 		return ch - 'A' + 'a';
201 }
202 
203 int CompareCaseInsensitive(const char *a, const char *b) noexcept;
204 int CompareNCaseInsensitive(const char *a, const char *b, size_t len) noexcept;
205 
206 }
207 
208 #endif
209