1 /*
2 AngelCode Scripting Library
3 Copyright (c) 2003-2017 Andreas Jonsson
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any
7 damages arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any
10 purpose, including commercial applications, and to alter it and
11 redistribute it freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you
14 must not claim that you wrote the original software. If you use
15 this software in a product, an acknowledgment in the product
16 documentation would be appreciated but is not required.
17
18 2. Altered source versions must be plainly marked as such, and
19 must not be misrepresented as being the original software.
20
21 3. This notice may not be removed or altered from any source
22 distribution.
23
24 The original version of this library can be located at:
25 http://www.angelcode.com/angelscript/
26
27 Andreas Jonsson
28 andreas@angelcode.com
29
30 */
31
32 #include "as_config.h"
33
34 #include <string.h> // some compilers declare memcpy() here
35 #include <math.h> // pow()
36
37 #if !defined(AS_NO_MEMORY_H)
38 #include <memory.h>
39 #endif
40
41 #include "as_string.h"
42 #include "as_string_util.h"
43
44 BEGIN_AS_NAMESPACE
45
asCompareStrings(const char * str1,size_t len1,const char * str2,size_t len2)46 int asCompareStrings(const char *str1, size_t len1, const char *str2, size_t len2)
47 {
48 if( len1 == 0 )
49 {
50 if( str2 == 0 || len2 == 0 ) return 0; // Equal
51
52 return 1; // The other string is larger than this
53 }
54
55 if( str2 == 0 )
56 {
57 if( len1 == 0 )
58 return 0; // Equal
59
60 return -1; // The other string is smaller than this
61 }
62
63 if( len2 < len1 )
64 {
65 int result = memcmp(str1, str2, len2);
66 if( result == 0 ) return -1; // The other string is smaller than this
67
68 return result;
69 }
70
71 int result = memcmp(str1, str2, len1);
72 if( result == 0 && len1 < len2 ) return 1; // The other string is larger than this
73
74 return result;
75 }
76
asStringScanDouble(const char * string,size_t * numScanned)77 double asStringScanDouble(const char *string, size_t *numScanned)
78 {
79 // I decided to do my own implementation of strtod() because this function
80 // doesn't seem to be present on all systems. iOS 5 for example doesn't appear
81 // to include the function in the standard lib.
82
83 // Another reason is that the standard implementation of strtod() is dependent
84 // on the locale on some systems, i.e. it may use comma instead of dot for
85 // the decimal indicator. This can be avoided by forcing the locale to "C" with
86 // setlocale(), but this is another thing that is highly platform dependent.
87
88 double value = 0;
89 double fraction = 0.1;
90 int exponent = 0;
91 bool negativeExponent = false;
92 int c = 0;
93
94 // The tokenizer separates the sign from the number in
95 // two tokens so we'll never have a sign to parse here
96
97 // Parse the integer value
98 for( ;; )
99 {
100 if( string[c] >= '0' && string[c] <= '9' )
101 value = value*10 + double(string[c] - '0');
102 else
103 break;
104
105 c++;
106 }
107
108 if( string[c] == '.' )
109 {
110 c++;
111
112 // Parse the fraction
113 for( ;; )
114 {
115 if( string[c] >= '0' && string[c] <= '9' )
116 value += fraction * double(string[c] - '0');
117 else
118 break;
119
120 c++;
121 fraction *= 0.1;
122 }
123 }
124
125 if( string[c] == 'e' || string[c] == 'E' )
126 {
127 c++;
128
129 // Parse the sign of the exponent
130 if( string[c] == '-' )
131 {
132 negativeExponent = true;
133 c++;
134 }
135 else if( string[c] == '+' )
136 c++;
137
138 // Parse the exponent value
139 for( ;; )
140 {
141 if( string[c] >= '0' && string[c] <= '9' )
142 exponent = exponent*10 + int(string[c] - '0');
143 else
144 break;
145
146 c++;
147 }
148 }
149
150 if( exponent )
151 {
152 if( negativeExponent )
153 exponent = -exponent;
154 value *= pow(10.0, exponent);
155 }
156
157 if( numScanned )
158 *numScanned = c;
159
160 return value;
161 }
162
163 // Converts a character to the decimal number based on the radix
164 // Returns -1 if the character is not valid for the radix
asCharToNbr(char ch,int radix)165 static int asCharToNbr(char ch, int radix)
166 {
167 if( ch >= '0' && ch <= '9' ) return ((ch -= '0') < radix ? ch : -1);
168 if( ch >= 'A' && ch <= 'Z' ) return ((ch -= 'A'-10) < radix ? ch : -1);
169 if( ch >= 'a' && ch <= 'z' ) return ((ch -= 'a'-10) < radix ? ch : -1);
170 return -1;
171 }
172
173 // If base is 0 the string should be prefixed by 0x, 0d, 0o, or 0b to allow the function to automatically determine the radix
asStringScanUInt64(const char * string,int base,size_t * numScanned,bool * overflow)174 asQWORD asStringScanUInt64(const char *string, int base, size_t *numScanned, bool *overflow)
175 {
176 asASSERT(base == 10 || base == 16 || base == 0);
177
178 if (overflow)
179 *overflow = false;
180
181 const char *end = string;
182
183 static const asQWORD QWORD_MAX = (~asQWORD(0));
184
185 asQWORD res = 0;
186 if( base == 10 )
187 {
188 while( *end >= '0' && *end <= '9' )
189 {
190 if( overflow && ((res > QWORD_MAX / 10) || ((asUINT(*end - '0') > (QWORD_MAX - (QWORD_MAX / 10) * 10)) && res == QWORD_MAX / 10)) )
191 *overflow = true;
192 res *= 10;
193 res += *end++ - '0';
194 }
195 }
196 else
197 {
198 if( base == 0 && string[0] == '0')
199 {
200 // Determine the radix from the prefix
201 switch( string[1] )
202 {
203 case 'b': case 'B': base = 2; break;
204 case 'o': case 'O': base = 8; break;
205 case 'd': case 'D': base = 10; break;
206 case 'x': case 'X': base = 16; break;
207 }
208 end += 2;
209 }
210
211 asASSERT( base );
212
213 if( base )
214 {
215 for (int nbr; (nbr = asCharToNbr(*end, base)) >= 0; end++)
216 {
217 if (overflow && ((res > QWORD_MAX / base) || ((asUINT(nbr) > (QWORD_MAX - (QWORD_MAX / base) * base)) && res == QWORD_MAX / base)) )
218 *overflow = true;
219
220 res = res * base + nbr;
221 }
222 }
223 }
224
225 if( numScanned )
226 *numScanned = end - string;
227
228 return res;
229 }
230
231 //
232 // The function will encode the unicode code point into the outEncodedBuffer, and then
233 // return the length of the encoded value. If the input value is not a valid unicode code
234 // point, then the function will return -1.
235 //
236 // This function is taken from the AngelCode ToolBox.
237 //
asStringEncodeUTF8(unsigned int value,char * outEncodedBuffer)238 int asStringEncodeUTF8(unsigned int value, char *outEncodedBuffer)
239 {
240 unsigned char *buf = (unsigned char*)outEncodedBuffer;
241
242 int length = -1;
243
244 if( value <= 0x7F )
245 {
246 buf[0] = static_cast<unsigned char>(value);
247 return 1;
248 }
249 else if( value >= 0x80 && value <= 0x7FF )
250 {
251 // Encode it with 2 characters
252 buf[0] = static_cast<unsigned char>(0xC0 + (value >> 6));
253 length = 2;
254 }
255 else if( (value >= 0x800 && value <= 0xD7FF) || (value >= 0xE000 && value <= 0xFFFF) )
256 {
257 // Note: Values 0xD800 to 0xDFFF are not valid unicode characters
258 buf[0] = static_cast<unsigned char>(0xE0 + (value >> 12));
259 length = 3;
260 }
261 else if( value >= 0x10000 && value <= 0x10FFFF )
262 {
263 buf[0] = static_cast<unsigned char>(0xF0 + (value >> 18));
264 length = 4;
265 }
266
267 int n = length-1;
268 for( ; n > 0; n-- )
269 {
270 buf[n] = static_cast<unsigned char>(0x80 + (value & 0x3F));
271 value >>= 6;
272 }
273
274 return length;
275 }
276
277 //
278 // The function will decode an UTF8 character and return the unicode code point.
279 // outLength will receive the number of bytes that were decoded.
280 //
281 // This function is taken from the AngelCode ToolBox.
282 //
asStringDecodeUTF8(const char * encodedBuffer,unsigned int * outLength)283 int asStringDecodeUTF8(const char *encodedBuffer, unsigned int *outLength)
284 {
285 const unsigned char *buf = (const unsigned char*)encodedBuffer;
286
287 int value = 0;
288 int length = -1;
289 unsigned char byte = buf[0];
290 if( (byte & 0x80) == 0 )
291 {
292 // This is the only byte
293 if( outLength ) *outLength = 1;
294 return byte;
295 }
296 else if( (byte & 0xE0) == 0xC0 )
297 {
298 // There is one more byte
299 value = int(byte & 0x1F);
300 length = 2;
301
302 // The value at this moment must not be less than 2, because
303 // that should have been encoded with one byte only.
304 if( value < 2 )
305 length = -1;
306 }
307 else if( (byte & 0xF0) == 0xE0 )
308 {
309 // There are two more bytes
310 value = int(byte & 0x0F);
311 length = 3;
312 }
313 else if( (byte & 0xF8) == 0xF0 )
314 {
315 // There are three more bytes
316 value = int(byte & 0x07);
317 length = 4;
318 }
319
320 int n = 1;
321 for( ; n < length; n++ )
322 {
323 byte = buf[n];
324 if( (byte & 0xC0) == 0x80 )
325 value = (value << 6) + int(byte & 0x3F);
326 else
327 break;
328 }
329
330 if( n == length )
331 {
332 if( outLength ) *outLength = (unsigned)length;
333 return value;
334 }
335
336 // The byte sequence isn't a valid UTF-8 byte sequence.
337 return -1;
338 }
339
340 //
341 // The function will encode the unicode code point into the outEncodedBuffer, and then
342 // return the length of the encoded value. If the input value is not a valid unicode code
343 // point, then the function will return -1.
344 //
345 // This function is taken from the AngelCode ToolBox.
346 //
asStringEncodeUTF16(unsigned int value,char * outEncodedBuffer)347 int asStringEncodeUTF16(unsigned int value, char *outEncodedBuffer)
348 {
349 if( value < 0x10000 )
350 {
351 #ifndef AS_BIG_ENDIAN
352 outEncodedBuffer[0] = (value & 0xFF);
353 outEncodedBuffer[1] = ((value >> 8) & 0xFF);
354 #else
355 outEncodedBuffer[1] = (value & 0xFF);
356 outEncodedBuffer[0] = ((value >> 8) & 0xFF);
357 #endif
358 return 2;
359 }
360 else
361 {
362 value -= 0x10000;
363 int surrogate1 = ((value >> 10) & 0x3FF) + 0xD800;
364 int surrogate2 = (value & 0x3FF) + 0xDC00;
365
366 #ifndef AS_BIG_ENDIAN
367 outEncodedBuffer[0] = (surrogate1 & 0xFF);
368 outEncodedBuffer[1] = ((surrogate1 >> 8) & 0xFF);
369 outEncodedBuffer[2] = (surrogate2 & 0xFF);
370 outEncodedBuffer[3] = ((surrogate2 >> 8) & 0xFF);
371 #else
372 outEncodedBuffer[1] = (surrogate1 & 0xFF);
373 outEncodedBuffer[0] = ((surrogate1 >> 8) & 0xFF);
374 outEncodedBuffer[3] = (surrogate2 & 0xFF);
375 outEncodedBuffer[2] = ((surrogate2 >> 8) & 0xFF);
376 #endif
377
378 return 4;
379 }
380 }
381
382
383 END_AS_NAMESPACE
384