1 /*
2    AngelCode Scripting Library
3    Copyright (c) 2003-2017 Andreas Jonsson
4 
5    This software is provided 'as-is', without any express or implied
6    warranty. In no event will the authors be held liable for any
7    damages arising from the use of this software.
8 
9    Permission is granted to anyone to use this software for any
10    purpose, including commercial applications, and to alter it and
11    redistribute it freely, subject to the following restrictions:
12 
13    1. The origin of this software must not be misrepresented; you
14       must not claim that you wrote the original software. If you use
15       this software in a product, an acknowledgment in the product
16       documentation would be appreciated but is not required.
17 
18    2. Altered source versions must be plainly marked as such, and
19       must not be misrepresented as being the original software.
20 
21    3. This notice may not be removed or altered from any source
22       distribution.
23 
24    The original version of this library can be located at:
25    http://www.angelcode.com/angelscript/
26 
27    Andreas Jonsson
28    andreas@angelcode.com
29 
30 */
31 
32 #include "as_config.h"
33 
34 #include <string.h>     // some compilers declare memcpy() here
35 #include <math.h>       // pow()
36 
37 #if !defined(AS_NO_MEMORY_H)
38 #include <memory.h>
39 #endif
40 
41 #include "as_string.h"
42 #include "as_string_util.h"
43 
44 BEGIN_AS_NAMESPACE
45 
asCompareStrings(const char * str1,size_t len1,const char * str2,size_t len2)46 int asCompareStrings(const char *str1, size_t len1, const char *str2, size_t len2)
47 {
48 	if( len1 == 0 )
49 	{
50 		if( str2 == 0 || len2 == 0 ) return 0; // Equal
51 
52 		return 1; // The other string is larger than this
53 	}
54 
55 	if( str2 == 0 )
56 	{
57 		if( len1 == 0 )
58 			return 0; // Equal
59 
60 		return -1; // The other string is smaller than this
61 	}
62 
63 	if( len2 < len1 )
64 	{
65 		int result = memcmp(str1, str2, len2);
66 		if( result == 0 ) return -1; // The other string is smaller than this
67 
68 		return result;
69 	}
70 
71 	int result = memcmp(str1, str2, len1);
72 	if( result == 0 && len1 < len2 ) return 1; // The other string is larger than this
73 
74 	return result;
75 }
76 
asStringScanDouble(const char * string,size_t * numScanned)77 double asStringScanDouble(const char *string, size_t *numScanned)
78 {
79 	// I decided to do my own implementation of strtod() because this function
80 	// doesn't seem to be present on all systems. iOS 5 for example doesn't appear
81 	// to include the function in the standard lib.
82 
83 	// Another reason is that the standard implementation of strtod() is dependent
84 	// on the locale on some systems, i.e. it may use comma instead of dot for
85 	// the decimal indicator. This can be avoided by forcing the locale to "C" with
86 	// setlocale(), but this is another thing that is highly platform dependent.
87 
88 	double value = 0;
89 	double fraction = 0.1;
90 	int exponent = 0;
91 	bool negativeExponent = false;
92 	int c = 0;
93 
94 	// The tokenizer separates the sign from the number in
95 	// two tokens so we'll never have a sign to parse here
96 
97 	// Parse the integer value
98 	for( ;; )
99 	{
100 		if( string[c] >= '0' && string[c] <= '9' )
101 			value = value*10 + double(string[c] - '0');
102 		else
103 			break;
104 
105 		c++;
106 	}
107 
108 	if( string[c] == '.' )
109 	{
110 		c++;
111 
112 		// Parse the fraction
113 		for( ;; )
114 		{
115 			if( string[c] >= '0' && string[c] <= '9' )
116 				value += fraction * double(string[c] - '0');
117 			else
118 				break;
119 
120 			c++;
121 			fraction *= 0.1;
122 		}
123 	}
124 
125 	if( string[c] == 'e' || string[c] == 'E' )
126 	{
127 		c++;
128 
129 		// Parse the sign of the exponent
130 		if( string[c] == '-' )
131 		{
132 			negativeExponent = true;
133 			c++;
134 		}
135 		else if( string[c] == '+' )
136 			c++;
137 
138 		// Parse the exponent value
139 		for( ;; )
140 		{
141 			if( string[c] >= '0' && string[c] <= '9' )
142 				exponent = exponent*10 + int(string[c] - '0');
143 			else
144 				break;
145 
146 			c++;
147 		}
148 	}
149 
150 	if( exponent )
151 	{
152 		if( negativeExponent )
153 			exponent = -exponent;
154 		value *= pow(10.0, exponent);
155 	}
156 
157 	if( numScanned )
158 		*numScanned = c;
159 
160 	return value;
161 }
162 
163 // Converts a character to the decimal number based on the radix
164 // Returns -1 if the character is not valid for the radix
asCharToNbr(char ch,int radix)165 static int asCharToNbr(char ch, int radix)
166 {
167 	if( ch >= '0' && ch <= '9' ) return ((ch -= '0') < radix ? ch : -1);
168 	if( ch >= 'A' && ch <= 'Z' ) return ((ch -= 'A'-10) < radix ? ch : -1);
169 	if( ch >= 'a' && ch <= 'z' ) return ((ch -= 'a'-10) < radix ? ch : -1);
170 	return -1;
171 }
172 
173 // If base is 0 the string should be prefixed by 0x, 0d, 0o, or 0b to allow the function to automatically determine the radix
asStringScanUInt64(const char * string,int base,size_t * numScanned,bool * overflow)174 asQWORD asStringScanUInt64(const char *string, int base, size_t *numScanned, bool *overflow)
175 {
176 	asASSERT(base == 10 || base == 16 || base == 0);
177 
178 	if (overflow)
179 		*overflow = false;
180 
181 	const char *end = string;
182 
183 	static const asQWORD QWORD_MAX = (~asQWORD(0));
184 
185 	asQWORD res = 0;
186 	if( base == 10 )
187 	{
188 		while( *end >= '0' && *end <= '9' )
189 		{
190 			if( overflow && ((res > QWORD_MAX / 10) || ((asUINT(*end - '0') > (QWORD_MAX - (QWORD_MAX / 10) * 10)) && res == QWORD_MAX / 10)) )
191 				*overflow = true;
192 			res *= 10;
193 			res += *end++ - '0';
194 		}
195 	}
196 	else
197 	{
198 		if( base == 0 && string[0] == '0')
199 		{
200 			// Determine the radix from the prefix
201 			switch( string[1] )
202 			{
203 			case 'b': case 'B': base = 2; break;
204 			case 'o': case 'O': base = 8; break;
205 			case 'd': case 'D': base = 10; break;
206 			case 'x': case 'X': base = 16; break;
207 			}
208 			end += 2;
209 		}
210 
211 		asASSERT( base );
212 
213 		if( base )
214 		{
215 			for (int nbr; (nbr = asCharToNbr(*end, base)) >= 0; end++)
216 			{
217 				if (overflow && ((res > QWORD_MAX / base) || ((asUINT(nbr) > (QWORD_MAX - (QWORD_MAX / base) * base)) && res == QWORD_MAX / base)) )
218 					*overflow = true;
219 
220 				res = res * base + nbr;
221 			}
222 		}
223 	}
224 
225 	if( numScanned )
226 		*numScanned = end - string;
227 
228 	return res;
229 }
230 
231 //
232 // The function will encode the unicode code point into the outEncodedBuffer, and then
233 // return the length of the encoded value. If the input value is not a valid unicode code
234 // point, then the function will return -1.
235 //
236 // This function is taken from the AngelCode ToolBox.
237 //
asStringEncodeUTF8(unsigned int value,char * outEncodedBuffer)238 int asStringEncodeUTF8(unsigned int value, char *outEncodedBuffer)
239 {
240 	unsigned char *buf = (unsigned char*)outEncodedBuffer;
241 
242 	int length = -1;
243 
244 	if( value <= 0x7F )
245 	{
246 		buf[0] = static_cast<unsigned char>(value);
247 		return 1;
248 	}
249 	else if( value >= 0x80 && value <= 0x7FF )
250 	{
251 		// Encode it with 2 characters
252 		buf[0] = static_cast<unsigned char>(0xC0 + (value >> 6));
253 		length = 2;
254 	}
255 	else if( (value >= 0x800 && value <= 0xD7FF) || (value >= 0xE000 && value <= 0xFFFF) )
256 	{
257 		// Note: Values 0xD800 to 0xDFFF are not valid unicode characters
258 		buf[0] = static_cast<unsigned char>(0xE0 + (value >> 12));
259 		length = 3;
260 	}
261 	else if( value >= 0x10000 && value <= 0x10FFFF )
262 	{
263 		buf[0] = static_cast<unsigned char>(0xF0 + (value >> 18));
264 		length = 4;
265 	}
266 
267 	int n = length-1;
268 	for( ; n > 0; n-- )
269 	{
270 		buf[n] = static_cast<unsigned char>(0x80 + (value & 0x3F));
271 		value >>= 6;
272 	}
273 
274 	return length;
275 }
276 
277 //
278 // The function will decode an UTF8 character and return the unicode code point.
279 // outLength will receive the number of bytes that were decoded.
280 //
281 // This function is taken from the AngelCode ToolBox.
282 //
asStringDecodeUTF8(const char * encodedBuffer,unsigned int * outLength)283 int asStringDecodeUTF8(const char *encodedBuffer, unsigned int *outLength)
284 {
285 	const unsigned char *buf = (const unsigned char*)encodedBuffer;
286 
287 	int value = 0;
288 	int length = -1;
289 	unsigned char byte = buf[0];
290 	if( (byte & 0x80) == 0 )
291 	{
292 		// This is the only byte
293 		if( outLength ) *outLength = 1;
294 		return byte;
295 	}
296 	else if( (byte & 0xE0) == 0xC0 )
297 	{
298 		// There is one more byte
299 		value = int(byte & 0x1F);
300 		length = 2;
301 
302 		// The value at this moment must not be less than 2, because
303 		// that should have been encoded with one byte only.
304 		if( value < 2 )
305 			length = -1;
306 	}
307 	else if( (byte & 0xF0) == 0xE0 )
308 	{
309 		// There are two more bytes
310 		value = int(byte & 0x0F);
311 		length = 3;
312 	}
313 	else if( (byte & 0xF8) == 0xF0 )
314 	{
315 		// There are three more bytes
316 		value = int(byte & 0x07);
317 		length = 4;
318 	}
319 
320 	int n = 1;
321 	for( ; n < length; n++ )
322 	{
323 		byte = buf[n];
324 		if( (byte & 0xC0) == 0x80 )
325 			value = (value << 6) + int(byte & 0x3F);
326 		else
327 			break;
328 	}
329 
330 	if( n == length )
331 	{
332 		if( outLength ) *outLength = (unsigned)length;
333 		return value;
334 	}
335 
336 	// The byte sequence isn't a valid UTF-8 byte sequence.
337 	return -1;
338 }
339 
340 //
341 // The function will encode the unicode code point into the outEncodedBuffer, and then
342 // return the length of the encoded value. If the input value is not a valid unicode code
343 // point, then the function will return -1.
344 //
345 // This function is taken from the AngelCode ToolBox.
346 //
asStringEncodeUTF16(unsigned int value,char * outEncodedBuffer)347 int asStringEncodeUTF16(unsigned int value, char *outEncodedBuffer)
348 {
349 	if( value < 0x10000 )
350 	{
351 #ifndef AS_BIG_ENDIAN
352 		outEncodedBuffer[0] = (value & 0xFF);
353 		outEncodedBuffer[1] = ((value >> 8) & 0xFF);
354 #else
355 		outEncodedBuffer[1] = (value & 0xFF);
356 		outEncodedBuffer[0] = ((value >> 8) & 0xFF);
357 #endif
358 		return 2;
359 	}
360 	else
361 	{
362 		value -= 0x10000;
363 		int surrogate1 = ((value >> 10) & 0x3FF) + 0xD800;
364 		int surrogate2 = (value & 0x3FF) + 0xDC00;
365 
366 #ifndef AS_BIG_ENDIAN
367 		outEncodedBuffer[0] = (surrogate1 & 0xFF);
368 		outEncodedBuffer[1] = ((surrogate1 >> 8) & 0xFF);
369 		outEncodedBuffer[2] = (surrogate2 & 0xFF);
370 		outEncodedBuffer[3] = ((surrogate2 >> 8) & 0xFF);
371 #else
372 		outEncodedBuffer[1] = (surrogate1 & 0xFF);
373 		outEncodedBuffer[0] = ((surrogate1 >> 8) & 0xFF);
374 		outEncodedBuffer[3] = (surrogate2 & 0xFF);
375 		outEncodedBuffer[2] = ((surrogate2 >> 8) & 0xFF);
376 #endif
377 
378 		return 4;
379 	}
380 }
381 
382 
383 END_AS_NAMESPACE
384