1 // Scintilla source code edit control
2 /** @file CharacterSet.h
3  ** Encapsulates a set of characters. Used to test if a character is within a set.
4  **/
5 // Copyright 2007 by Neil Hodgson <neilh@scintilla.org>
6 // The License.txt file describes the conditions under which this software may be distributed.
7 
8 #ifndef CHARACTERSET_H
9 #define CHARACTERSET_H
10 
11 #ifdef SCI_NAMESPACE
12 namespace Scintilla {
13 #endif
14 
15 class CharacterSet {
16 	int size;
17 	bool valueAfter;
18 	bool *bset;
19 public:
20 	enum setBase {
21 		setNone=0,
22 		setLower=1,
23 		setUpper=2,
24 		setDigits=4,
25 		setAlpha=setLower|setUpper,
26 		setAlphaNum=setAlpha|setDigits
27 	};
28 	CharacterSet(setBase base=setNone, const char *initialSet="", int size_=0x80, bool valueAfter_=false) {
29 		size = size_;
30 		valueAfter = valueAfter_;
31 		bset = new bool[size];
32 		for (int i=0; i < size; i++) {
33 			bset[i] = false;
34 		}
35 		AddString(initialSet);
36 		if (base & setLower)
37 			AddString("abcdefghijklmnopqrstuvwxyz");
38 		if (base & setUpper)
39 			AddString("ABCDEFGHIJKLMNOPQRSTUVWXYZ");
40 		if (base & setDigits)
41 			AddString("0123456789");
42 	}
CharacterSet(const CharacterSet & other)43 	CharacterSet(const CharacterSet &other) {
44 		size = other.size;
45 		valueAfter = other.valueAfter;
46 		bset = new bool[size];
47 		for (int i=0; i < size; i++) {
48 			bset[i] = other.bset[i];
49 		}
50 	}
~CharacterSet()51 	~CharacterSet() {
52 		delete []bset;
53 		bset = 0;
54 		size = 0;
55 	}
56 	CharacterSet &operator=(const CharacterSet &other) {
57 		if (this != &other) {
58 			bool *bsetNew = new bool[other.size];
59 			for (int i=0; i < other.size; i++) {
60 				bsetNew[i] = other.bset[i];
61 			}
62 			delete []bset;
63 			size = other.size;
64 			valueAfter = other.valueAfter;
65 			bset = bsetNew;
66 		}
67 		return *this;
68 	}
Add(int val)69 	void Add(int val) {
70 		assert(val >= 0);
71 		assert(val < size);
72 		bset[val] = true;
73 	}
AddString(const char * setToAdd)74 	void AddString(const char *setToAdd) {
75 		for (const char *cp=setToAdd; *cp; cp++) {
76 			int val = static_cast<unsigned char>(*cp);
77 			assert(val >= 0);
78 			assert(val < size);
79 			bset[val] = true;
80 		}
81 	}
Contains(int val)82 	bool Contains(int val) const {
83 		assert(val >= 0);
84 		if (val < 0) return false;
85 		return (val < size) ? bset[val] : valueAfter;
86 	}
87 };
88 
89 // Functions for classifying characters
90 
IsASpace(int ch)91 inline bool IsASpace(int ch) {
92     return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
93 }
94 
IsASpaceOrTab(int ch)95 inline bool IsASpaceOrTab(int ch) {
96 	return (ch == ' ') || (ch == '\t');
97 }
98 
IsADigit(int ch)99 inline bool IsADigit(int ch) {
100 	return (ch >= '0') && (ch <= '9');
101 }
102 
IsADigit(int ch,int base)103 inline bool IsADigit(int ch, int base) {
104 	if (base <= 10) {
105 		return (ch >= '0') && (ch < '0' + base);
106 	} else {
107 		return ((ch >= '0') && (ch <= '9')) ||
108 		       ((ch >= 'A') && (ch < 'A' + base - 10)) ||
109 		       ((ch >= 'a') && (ch < 'a' + base - 10));
110 	}
111 }
112 
IsASCII(int ch)113 inline bool IsASCII(int ch) {
114 	return (ch >= 0) && (ch < 0x80);
115 }
116 
IsLowerCase(int ch)117 inline bool IsLowerCase(int ch) {
118 	return (ch >= 'a') && (ch <= 'z');
119 }
120 
IsUpperCase(int ch)121 inline bool IsUpperCase(int ch) {
122 	return (ch >= 'A') && (ch <= 'Z');
123 }
124 
IsAlphaNumeric(int ch)125 inline bool IsAlphaNumeric(int ch) {
126 	return
127 		((ch >= '0') && (ch <= '9')) ||
128 		((ch >= 'a') && (ch <= 'z')) ||
129 		((ch >= 'A') && (ch <= 'Z'));
130 }
131 
132 /**
133  * Check if a character is a space.
134  * This is ASCII specific but is safe with chars >= 0x80.
135  */
isspacechar(int ch)136 inline bool isspacechar(int ch) {
137     return (ch == ' ') || ((ch >= 0x09) && (ch <= 0x0d));
138 }
139 
iswordchar(int ch)140 inline bool iswordchar(int ch) {
141 	return IsAlphaNumeric(ch) || ch == '.' || ch == '_';
142 }
143 
iswordstart(int ch)144 inline bool iswordstart(int ch) {
145 	return IsAlphaNumeric(ch) || ch == '_';
146 }
147 
isoperator(int ch)148 inline bool isoperator(int ch) {
149 	if (IsAlphaNumeric(ch))
150 		return false;
151 	if (ch == '%' || ch == '^' || ch == '&' || ch == '*' ||
152 	        ch == '(' || ch == ')' || ch == '-' || ch == '+' ||
153 	        ch == '=' || ch == '|' || ch == '{' || ch == '}' ||
154 	        ch == '[' || ch == ']' || ch == ':' || ch == ';' ||
155 	        ch == '<' || ch == '>' || ch == ',' || ch == '/' ||
156 	        ch == '?' || ch == '!' || ch == '.' || ch == '~')
157 		return true;
158 	return false;
159 }
160 
161 // Simple case functions for ASCII.
162 
MakeUpperCase(char ch)163 inline char MakeUpperCase(char ch) {
164 	if (ch < 'a' || ch > 'z')
165 		return ch;
166 	else
167 		return static_cast<char>(ch - 'a' + 'A');
168 }
169 
170 int CompareCaseInsensitive(const char *a, const char *b);
171 int CompareNCaseInsensitive(const char *a, const char *b, size_t len);
172 
173 #ifdef SCI_NAMESPACE
174 }
175 #endif
176 
177 #endif
178