1 /*
2 Copyright (c) by Valery Goryachev (Wal)
3 */
4
5 #include "wal.h"
6 #include "../unicode_lc.h"
7
8 #define __STDC_FORMAT_MACROS
9 #include <stdint.h>
10 #if !defined(_MSC_VER) || _MSC_VER >= 1700
11 # include <inttypes.h>
12 #endif
13
14 #if !defined(_WIN32)
15 # include "utf8proc/utf8proc.h"
16 #endif
17
18 #include <string>
19 #include <sstream>
20
21 namespace wal
22 {
new_unicode_str(const unicode_t * s)23 std::vector<unicode_t> new_unicode_str( const unicode_t* s )
24 {
25 if ( !s ) { return std::vector<unicode_t>(); }
26
27 const unicode_t* p;
28
29 for ( p = s; *p; ) { p++; }
30
31 int len = p - s;
32 std::vector<unicode_t> r( len + 1 );
33
34 if ( len ) { memcpy( r.data(), s, len * sizeof( unicode_t ) ); }
35
36 r[len] = 0;
37 return r;
38 }
39
new_sys_str(const sys_char_t * s)40 std::vector<sys_char_t> new_sys_str( const sys_char_t* s )
41 {
42 if ( !s ) { return std::vector<sys_char_t>(); }
43
44 int len = sys_strlen( s );
45 std::vector<sys_char_t> r( len + 1 );
46
47 if ( len ) { memcpy( r.data(), s, len * sizeof( sys_char_t ) ); }
48
49 r[len] = 0;
50 return r;
51 }
52
utf8_to_sys(const char * s)53 std::vector<sys_char_t> utf8_to_sys( const char* s )
54 {
55 if ( !s ) { return std::vector<sys_char_t>(); }
56
57 int symbolCount = utf8_symbol_count( s );
58 std::vector<unicode_t> unicodeBuf( symbolCount + 1 );
59 utf8_to_unicode( unicodeBuf.data(), s );
60
61 int sys_len = sys_string_buffer_len( unicodeBuf.data(), symbolCount );
62 std::vector<sys_char_t> sysBuf( sys_len + 1 );
63 unicode_to_sys( sysBuf.data(), unicodeBuf.data(), symbolCount );
64 return sysBuf;
65 };
66
67
sys_to_utf8(const sys_char_t * s)68 std::string sys_to_utf8( const sys_char_t* s )
69 {
70 if ( !s ) { return std::string(); }
71
72 int symbolCount = sys_symbol_count( s );
73 std::vector<unicode_t> unicodeBuf( symbolCount + 1 );
74 sys_to_unicode( unicodeBuf.data(), s );
75 int utf8Len = utf8_string_buffer_len( unicodeBuf.data(), symbolCount );
76 char* Buf = (char*)alloca( utf8Len + 1 );
77 unicode_to_utf8( Buf, unicodeBuf.data(), symbolCount );
78 return std::string( Buf );
79 }
80
81
utf8_to_unicode(const char * s)82 std::vector<unicode_t> utf8_to_unicode( const char* s )
83 {
84 if ( !s ) { return std::vector<unicode_t>(); }
85
86 int symbolCount = utf8_symbol_count( s );
87 std::vector<unicode_t> unicodeBuf( symbolCount + 1 );
88 utf8_to_unicode( unicodeBuf.data(), s );
89 return unicodeBuf;
90 }
91
utf8str_to_unicode(const std::string & s)92 std::vector<unicode_t> utf8str_to_unicode( const std::string& s )
93 {
94 return utf8_to_unicode( s.c_str() );
95 }
96
unicode_to_utf8(const unicode_t * u)97 std::string unicode_to_utf8( const unicode_t* u )
98 {
99 if ( !u ) { return std::string(); }
100
101 int size = utf8_string_buffer_len( u );
102 char* Buf = (char*)alloca( size + 1 );
103 unicode_to_utf8( Buf, u );
104 return std::string( Buf );
105 }
106
unicode_to_utf8_string(const unicode_t * u)107 std::string unicode_to_utf8_string( const unicode_t* u )
108 {
109 if ( !u ) { return std::string(); }
110
111 int size = utf8_string_buffer_len( u );
112 char* s = (char*)alloca( size + 1 );
113 unicode_to_utf8( s, u );
114 return std::string( s );
115 }
116
unicode_is_equal(const unicode_t * s,const unicode_t * ss)117 bool unicode_is_equal( const unicode_t* s, const unicode_t* ss )
118 {
119 if ( !s || !ss ) { return false; }
120
121 while ( *ss != 0 ) if ( *s++ != *ss++ )
122 {
123 return false;
124 }
125
126 if ( *ss == 0 && *s == 0 ) { return true; }
127
128 return false;
129 }
130
131 // not null-safe like strcmp
unicode_strcmp(const unicode_t * s1,const unicode_t * s2)132 int unicode_strcmp(const unicode_t* s1, const unicode_t* s2)
133 {
134 for (; *s1 == *s2; s1++, s2++)
135 {
136 if (*s1 == 0)
137 return 0;
138 }
139 return *s1 > *s2 ? 1 : -1;
140 }
141
142 // not null-safe, like stricmp
unicode_stricmp(const unicode_t * s1,const unicode_t * s2)143 int unicode_stricmp(const unicode_t* s1, const unicode_t* s2)
144 {
145 for (;; s1++, s2++)
146 {
147 unicode_t c1 = UnicodeLC(*s1);
148 unicode_t c2 = UnicodeLC(*s2);
149 if (c1 != c2)
150 return c1 > c2 ? 1 : -1;
151 if (c1 == 0)
152 return 0;
153 }
154 }
155
unicode_get_last_word(const unicode_t * Str,const unicode_t ** LastWordStart,bool UsePathSeparator)156 std::vector<unicode_t> unicode_get_last_word( const unicode_t* Str, const unicode_t** LastWordStart, bool UsePathSeparator )
157 {
158 if ( !Str ) return std::vector<unicode_t>();
159
160 std::vector<unicode_t> Result;
161
162 const unicode_t* S = Str;
163 const unicode_t* StartPos = Str;
164
165 while ( *S )
166 {
167 if ( UsePathSeparator )
168 {
169 if ( IsPathSeparator(*S) || *S == ' ' ) StartPos = S+1;
170 if ( LastWordStart ) *LastWordStart = StartPos;
171 }
172 else
173 {
174 if ( *S == ' ' ) StartPos = S+1;
175 if ( LastWordStart ) *LastWordStart = StartPos;
176 }
177 S++;
178 }
179
180 return ( *StartPos ) ? new_unicode_str( StartPos ) : std::vector<unicode_t>();
181 }
182
unicode_starts_with_and_not_equal(const unicode_t * Str,const unicode_t * SubStr,bool IgnoreCase)183 bool unicode_starts_with_and_not_equal( const unicode_t* Str, const unicode_t* SubStr, bool IgnoreCase )
184 {
185 if ( !Str || !SubStr )
186 {
187 return false;
188 }
189
190 while ( *SubStr != 0 )
191 {
192 unicode_t S = *Str++;
193 unicode_t SS = *SubStr++;
194 if ( IgnoreCase )
195 {
196 S = UnicodeLC( S );
197 SS = UnicodeLC( SS );
198 }
199
200 if ( S != SS )
201 {
202 return false;
203 }
204 }
205
206 return !( *Str == 0 && *SubStr == 0 );
207 }
208
utf8_starts_with_and_not_equal(const char * Str,const char * SubStr)209 bool utf8_starts_with_and_not_equal( const char* Str, const char* SubStr )
210 {
211 if (!Str || !SubStr) { return false; }
212
213 const char* S = Str;
214 const char* SS = SubStr;
215
216 while (*SS != 0) if (*S++ != *SS++)
217 {
218 return false;
219 }
220
221 if (*SS == 0 && *S == 0) { return false; }
222
223 return true;
224 }
225
unicode_strlen(const unicode_t * s)226 int unicode_strlen( const unicode_t* s )
227 {
228 if ( !s ) { return 0; }
229
230 const unicode_t* p = s;
231
232 while ( *p ) { p++; }
233
234 return p - s;
235 }
236
unicode_strchr(const unicode_t * s,unicode_t c)237 unicode_t* unicode_strchr( const unicode_t* s, unicode_t c )
238 {
239 while ( *s != c && *s ) { s++; }
240
241 return ( unicode_t* )( *s ? s : 0 );
242 }
243
unicode_strrchr(const unicode_t * s,unicode_t c)244 unicode_t* unicode_strrchr(const unicode_t* s, unicode_t c)
245 {
246 const unicode_t* ps = s + unicode_strlen(s);
247 while (*ps != c && ps != s) { ps--; }
248
249 return (unicode_t*)(*ps == c ? ps : 0);
250 }
251
unicode_strcpy(unicode_t * d,const unicode_t * s)252 unicode_t* unicode_strcpy(unicode_t* d, const unicode_t* s)
253 {
254 if ( !d || !s ) { return NULL; }
255
256 unicode_t* ret = d;
257
258 while ( ( *d++ = *s++ ) != 0 )
259 ;
260
261 return ret;
262 }
263
264 // copy unlit end of string, or when n chars copid, whichever comes first.
265 // d is always 0-ended
unicode_strncpy0(unicode_t * d,const unicode_t * s,int n)266 unicode_t* unicode_strncpy0( unicode_t* d, const unicode_t* s, int n )
267 {
268 if ( !d || !s ) { return NULL; }
269
270 unicode_t* ret = d;
271
272 for ( ;; )
273 {
274 if ( n-- == 0 )
275 {
276 *d = 0;
277 break;
278 }
279
280 if ( ( *d++ = *s++ ) == 0 )
281 {
282 break;
283 }
284 }
285
286 return ret;
287 }
288
unicode_strcat(unicode_t * d,const unicode_t * s)289 void unicode_strcat( unicode_t* d, const unicode_t* s )
290 {
291 if ( !d || !s ) { return; }
292
293 while ( *d )
294 {
295 d++;
296 }
297
298 while ( ( *d++ = *s++ ) != 0 )
299 ;
300 }
301
unicode_strdup(const unicode_t * s)302 unicode_t* unicode_strdup( const unicode_t* s )
303 {
304 return unicode_strcpy( new unicode_t[unicode_strlen( s ) + 1], s );
305 }
306
307
308 //////////// system File implementation
309
Throw(const sys_char_t * name)310 void File::Throw( const sys_char_t* name )
311 {
312 if ( !name ) { name = _fileName.data(); }
313
314 static const char noname[] = "<NULL>";
315 throw_syserr( 0, "'%s'", name ? sys_to_utf8( name ).data() : noname );
316 }
317
Throw()318 void File::Throw() { Throw( 0 ); }
319
LookAhead(const unicode_t * p,unicode_t * OutNextChar)320 bool LookAhead( const unicode_t* p, unicode_t* OutNextChar )
321 {
322 if ( !p ) { return false; }
323
324 if ( !*p ) { return false; }
325
326 if ( OutNextChar ) { *OutNextChar = *( p + 1 ); }
327
328 return true;
329 }
330
PopLastNull(std::vector<unicode_t> * S)331 void PopLastNull( std::vector<unicode_t>* S )
332 {
333 if ( S && !S->empty() && S->back() == 0 ) { S->pop_back(); }
334 }
335
LastCharEquals(const std::vector<unicode_t> & S,unicode_t Ch)336 bool LastCharEquals( const std::vector<unicode_t>& S, unicode_t Ch )
337 {
338 if ( S.empty() ) { return false; }
339
340 return S.back() == Ch;
341 }
342
LastCharEquals(const unicode_t * S,unicode_t Ch)343 bool LastCharEquals( const unicode_t* S, unicode_t Ch )
344 {
345 if ( !S ) { return false; }
346
347 unicode_t PrevCh = *S;
348
349 while ( *S )
350 {
351 PrevCh = *S;
352 S++;
353 }
354
355 return PrevCh == Ch;
356 }
357
IsPathSeparator(const unicode_t Ch)358 bool IsPathSeparator( const unicode_t Ch )
359 {
360 return ( Ch == '\\' ) || ( Ch == '/' );
361 }
362
ReplaceSpaces(std::vector<unicode_t> * S)363 void ReplaceSpaces( std::vector<unicode_t>* S )
364 {
365 if ( !S ) { return; }
366
367 for ( size_t i = 0; i != S->size(); i++ )
368 {
369 unicode_t Ch = S->at( i );
370
371 if ( Ch == 32 || Ch == 9 )
372 {
373 S->at( i ) = 0x00B7;
374 }
375 }
376 }
377
ReplaceTrailingSpaces(std::vector<unicode_t> * S)378 void ReplaceTrailingSpaces( std::vector<unicode_t>* S )
379 {
380 if ( !S ) { return; }
381
382 for ( auto i = S->size(); i -- > 0; )
383 {
384 unicode_t Ch = S->at( i );
385
386 if ( !Ch ) { continue; }
387
388 if ( Ch == 32 || Ch == 9 )
389 {
390 S->at( i ) = 0x00B7;
391 }
392 else
393 {
394 break;
395 }
396 }
397 }
398
IsEqual_Unicode_CStr(const unicode_t * U,const char * S,bool CaseSensitive)399 bool IsEqual_Unicode_CStr( const unicode_t* U, const char* S, bool CaseSensitive )
400 {
401 if ( !U && !S ) { return true; }
402
403 if ( !U ) { return false; }
404
405 if ( !S ) { return false; }
406
407 const unicode_t* UPtr = U;
408 const char* SPtr = S;
409
410 while ( *UPtr || *SPtr )
411 {
412 unicode_t ChU = CaseSensitive ? *UPtr : UnicodeLC( *UPtr );
413 char ChS = CaseSensitive ? *SPtr : tolower( *SPtr );
414
415 if ( ChU != ChS ) { return false; }
416
417 UPtr++;
418 SPtr++;
419 }
420
421 return true;
422 }
423
424 static char g_HexChars[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
425
GetHexChar(int n)426 char GetHexChar( int n )
427 {
428 return g_HexChars[ n & 0xF ];
429 }
430
IntToHexStr(uint64_t Value,size_t Padding)431 std::wstring IntToHexStr( uint64_t Value, size_t Padding )
432 {
433 const int BUFFER = 1024;
434
435 char buf[BUFFER];
436
437 #if defined( _WIN32 )
438 # if _MSC_VER >= 1400
439 _ui64toa_s( Value, buf, BUFFER - 1, 16 );
440 # else
441 _ui64toa( Value, buf, 16 );
442 # endif
443 #else
444 Lsnprintf( buf, BUFFER - 1, "%" PRIx64, Value );
445 #endif
446
447 std::vector<unicode_t> Str = utf8_to_unicode( buf );
448
449 std::wstring Result( Str.data() );
450
451 if ( Padding && Padding > Result.length() )
452 {
453 Result.insert( Result.begin( ), Padding - Result.length( ), '0' );
454 }
455
456 return Result;
457 }
458
HexStrToInt(const unicode_t * Str)459 uint64_t HexStrToInt( const unicode_t* Str )
460 {
461 if ( !Str ) return 0;
462
463 std::string utf8 = unicode_to_utf8( Str );
464
465 uint64_t i = 0x0;
466
467 std::stringstream Convert( utf8 );
468
469 Convert >> std::hex >> i;
470
471 return i;
472 }
473
normalize_unicode_NFC(const unicode_t * Str)474 std::vector<unicode_t> normalize_unicode_NFC( const unicode_t* Str )
475 {
476 if ( !Str ) return std::vector<unicode_t>();
477 #if !defined(_WIN32)
478 std::string UTFstr = unicode_to_utf8( Str );
479 uint8_t* NormString = utf8proc_NFC( (const uint8_t*)UTFstr.data() );
480 std::vector<unicode_t> Result = utf8_to_unicode( (const char*)NormString );
481
482 free( NormString );
483
484 return Result;
485 #else
486 return new_unicode_str( Str );
487 #endif
488 }
489
normalize_utf8_NFC(const char * Str)490 std::string normalize_utf8_NFC( const char* Str )
491 {
492 if ( !Str ) return std::string();
493 #if !defined(_WIN32)
494 uint8_t* NormString = utf8proc_NFC( (const uint8_t*)Str );
495
496 if(!NormString) return std::string(Str);
497
498 std::string Result = (const char*)NormString;
499
500 free( NormString );
501
502 return Result;
503 #else
504 return std::string( Str );
505 #endif
506 }
507
GetEnvVariable(const char * VarName)508 std::string GetEnvVariable( const char* VarName )
509 {
510 if ( !VarName ) return std::string();
511
512 #if _MSC_VER > 1700
513 char* value;
514 size_t size;
515 _dupenv_s( &value, &size, VarName );
516 #else
517 const char* value = getenv( VarName );
518 #endif
519
520 if ( !value ) { return std::string(); }
521
522 std::string Result( value );
523
524 #if _MSC_VER > 1700
525 // deallocate after _dupenv_s()
526 free( value );
527 #endif
528
529 return Result;
530 }
531
532 }; //namespace wal
533