1 /*******************************************************
2 
3    CoolReader Engine
4 
5    lvstring.cpp:  string classes implementation
6 
7    (c) Vadim Lopatin, 2000-2006
8    This source code is distributed under the terms of
9    GNU General Public License
10    See LICENSE file for details
11 
12 *******************************************************/
13 
14 #include "../include/lvstring.h"
15 
16 #include <stdlib.h>
17 #include <assert.h>
18 #include <string.h>
19 #include <stdio.h>
20 #include <stddef.h>
21 #include <stdarg.h>
22 #include <stddef.h>
23 #include <time.h>
24 
25 #if !defined(__SYMBIAN32__) && defined(_WIN32)
26 extern "C" {
27 #include <windows.h>
28 }
29 #endif
30 
31 #if (USE_ZLIB==1)
32 #include <zlib.h>
33 #endif
34 
35 #if (USE_UTF8PROC==1)
36 #include <utf8proc.h>
37 #endif
38 
39 #if !defined(__SYMBIAN32__) && defined(_WIN32)
40 extern "C" {
41 #include <windows.h>
42 }
43 #endif
44 
45 #define LS_DEBUG_CHECK
46 
47 // set to 1 to enable debugging
48 #define DEBUG_STATIC_STRING_ALLOC 0
49 
50 
51 static lChar8 empty_str_8[] = {0};
52 static lstring8_chunk_t empty_chunk_8(empty_str_8);
53 lstring8_chunk_t * lString8::EMPTY_STR_8 = &empty_chunk_8;
54 
55 static lChar16 empty_str_16[] = {0};
56 static lstring16_chunk_t empty_chunk_16(empty_str_16);
57 lstring16_chunk_t * lString16::EMPTY_STR_16 = &empty_chunk_16;
58 
59 static lChar32 empty_str_32[] = {0};
60 static lstring32_chunk_t empty_chunk_32(empty_str_32);
61 lstring32_chunk_t * lString32::EMPTY_STR_32 = &empty_chunk_32;
62 
63 //================================================================================
64 // atomic string storages for string literals
65 //================================================================================
66 
67 static const void * const_ptrs_8[CONST_STRING_BUFFER_SIZE] = {NULL};
68 static lString8 values_8[CONST_STRING_BUFFER_SIZE];
69 static int size_8 = 0;
70 
71 /// get reference to atomic constant string for string literal e.g. cs8("abc") -- fast and memory effective
cs8(const char * str)72 const lString8 & cs8(const char * str) {
73     int index =  (int)(((ptrdiff_t)str * CONST_STRING_BUFFER_HASH_MULT) & CONST_STRING_BUFFER_MASK);
74     for (;;) {
75         const void * p = const_ptrs_8[index];
76         if (p == str) {
77             return values_8[index];
78         } else if (p == NULL) {
79 #if DEBUG_STATIC_STRING_ALLOC == 1
80             CRLog::trace("allocating static string8 %s", str);
81 #endif
82             const_ptrs_8[index] = str;
83             size_8++;
84             values_8[index] = lString8(str);
85             values_8[index].addref();
86             return values_8[index];
87         }
88         if (size_8 > CONST_STRING_BUFFER_SIZE / 4) {
89             crFatalError(-1, "out of memory for const string8");
90         }
91         index = (index + 1) & CONST_STRING_BUFFER_MASK;
92     }
93     return lString8::empty_str;
94 }
95 
96 static const void * const_ptrs_32[CONST_STRING_BUFFER_SIZE] = {NULL};
97 static lString32 values_32[CONST_STRING_BUFFER_SIZE];
98 static int size_32 = 0;
99 
100 /// get reference to atomic constant wide string for string literal e.g. cs32("abc") -- fast and memory effective
cs32(const char * str)101 const lString32 & cs32(const char * str) {
102     int index =  (int)(((ptrdiff_t)str * CONST_STRING_BUFFER_HASH_MULT) & CONST_STRING_BUFFER_MASK);
103     for (;;) {
104         const void * p = const_ptrs_32[index];
105         if (p == str) {
106             return values_32[index];
107         } else if (p == NULL) {
108 #if DEBUG_STATIC_STRING_ALLOC == 1
109             CRLog::trace("allocating static string32 %s", str);
110 #endif
111             const_ptrs_32[index] = str;
112             size_32++;
113             values_32[index] = lString32(str);
114             values_32[index].addref();
115             return values_32[index];
116         }
117         if (size_32 > CONST_STRING_BUFFER_SIZE / 4) {
118             crFatalError(-1, "out of memory for const string8");
119         }
120         index = (index + 1) & CONST_STRING_BUFFER_MASK;
121     }
122     return lString32::empty_str;
123 }
124 
125 /// get reference to atomic constant wide string for string literal e.g. cs32(U"abc") -- fast and memory effective
cs32(const lChar32 * str)126 const lString32 & cs32(const lChar32 * str) {
127     int index = (((int)((ptrdiff_t)str)) * CONST_STRING_BUFFER_HASH_MULT) & CONST_STRING_BUFFER_MASK;
128     for (;;) {
129         const void * p = const_ptrs_32[index];
130         if (p == str) {
131             return values_32[index];
132         } else if (p == NULL) {
133 #if DEBUG_STATIC_STRING_ALLOC == 1
134             CRLog::trace("allocating static string32 %s", LCSTR(str));
135 #endif
136             const_ptrs_32[index] = str;
137             size_32++;
138             values_32[index] = lString32(str);
139             values_32[index].addref();
140             return values_32[index];
141         }
142         if (size_32 > CONST_STRING_BUFFER_SIZE / 4) {
143             crFatalError(-1, "out of memory for const string8");
144         }
145         index = (index + 1) & CONST_STRING_BUFFER_MASK;
146     }
147     return lString32::empty_str;
148 }
149 
150 
151 
152 //================================================================================
153 // memory allocation slice
154 //================================================================================
155 struct lstring_chunk_slice_t {
156     lstring8_chunk_t * pChunks; // first chunk
157     lstring8_chunk_t * pEnd;    // first free byte after last chunk
158     lstring8_chunk_t * pFree;   // first free chunk
159     int used;
lstring_chunk_slice_tlstring_chunk_slice_t160     lstring_chunk_slice_t( int size )
161     {
162         pChunks = (lstring8_chunk_t *) malloc(sizeof(lstring8_chunk_t) * size);
163         pEnd = pChunks + size;
164         pFree = pChunks;
165         for (lstring8_chunk_t * p = pChunks; p<pEnd; ++p)
166         {
167             p->buf8 = (char*)(p+1);
168             p->size = 0;
169         }
170         (pEnd-1)->buf8 = NULL;
171     }
~lstring_chunk_slice_tlstring_chunk_slice_t172     ~lstring_chunk_slice_t()
173     {
174         free( pChunks );
175     }
alloc_chunklstring_chunk_slice_t176     inline lstring8_chunk_t * alloc_chunk()
177     {
178         lstring8_chunk_t * res = pFree;
179         pFree = (lstring8_chunk_t *)res->buf8;
180         return res;
181     }
alloc_chunk16lstring_chunk_slice_t182     inline lstring16_chunk_t * alloc_chunk16()
183     {
184         lstring16_chunk_t * res = (lstring16_chunk_t *)pFree;
185         pFree = (lstring8_chunk_t *)res->buf16;
186         return res;
187     }
alloc_chunk32lstring_chunk_slice_t188     inline lstring32_chunk_t * alloc_chunk32()
189     {
190         lstring32_chunk_t * res = (lstring32_chunk_t *)pFree;
191         pFree = (lstring8_chunk_t *)res->buf32;
192         return res;
193     }
free_chunklstring_chunk_slice_t194     inline bool free_chunk( lstring8_chunk_t * pChunk )
195     {
196         if (pChunk < pChunks || pChunk >= pEnd)
197             return false; // chunk does not belong to this slice
198 /*
199 #ifdef LS_DEBUG_CHECK
200         if (!pChunk->size)
201         {
202             crFatalError(); // already freed!!!
203         }
204         pChunk->size = 0;
205 #endif
206 */
207         pChunk->buf8 = (char *)pFree;
208         pFree = pChunk;
209         return true;
210     }
free_chunk16lstring_chunk_slice_t211     inline bool free_chunk16(lstring16_chunk_t * pChunk)
212     {
213         if ((lstring8_chunk_t *)pChunk < pChunks || (lstring8_chunk_t *)pChunk >= pEnd)
214             return false; // chunk does not belong to this slice
215 /*
216 #ifdef LS_DEBUG_CHECK
217         if (!pChunk->size)
218         {
219             crFatalError(); // already freed!!!
220         }
221         pChunk->size = 0;
222 #endif
223 */
224         pChunk->buf16 = (lChar16 *)pFree;
225         pFree = (lstring8_chunk_t *)pChunk;
226         return true;
227     }
free_chunk32lstring_chunk_slice_t228     inline bool free_chunk32(lstring32_chunk_t * pChunk)
229     {
230         if ((lstring8_chunk_t *)pChunk < pChunks || (lstring8_chunk_t *)pChunk >= pEnd)
231             return false; // chunk does not belong to this slice
232 /*
233 #ifdef LS_DEBUG_CHECK
234         if (!pChunk->size)
235         {
236             crFatalError(); // already freed!!!
237         }
238         pChunk->size = 0;
239 #endif
240 */
241         pChunk->buf32 = (lChar32 *)pFree;
242         pFree = (lstring8_chunk_t *)pChunk;
243         return true;
244     }
245 };
246 
247 //#define FIRST_SLICE_SIZE 256
248 //#define MAX_SLICE_COUNT  20
249 #if (LDOM_USE_OWN_MEM_MAN == 1)
250 static lstring_chunk_slice_t * slices[MAX_SLICE_COUNT];
251 static int slices_count = 0;
252 static bool slices_initialized = false;
253 #endif
254 
255 #if (LDOM_USE_OWN_MEM_MAN == 1)
init_ls_storage()256 static void init_ls_storage()
257 {
258     slices[0] = new lstring_chunk_slice_t( FIRST_SLICE_SIZE );
259     slices_count = 1;
260     slices_initialized = true;
261 }
262 
free_ls_storage()263 void free_ls_storage()
264 {
265     if (!slices_initialized)
266         return;
267     for (int i=0; i<slices_count; i++)
268     {
269         delete slices[i];
270     }
271     slices_count = 0;
272     slices_initialized = false;
273 }
274 
alloc()275 lstring8_chunk_t * lstring8_chunk_t::alloc()
276 {
277     if (!slices_initialized)
278         init_ls_storage();
279     // search for existing slice
280     for (int i=slices_count-1; i>=0; --i)
281     {
282         if (slices[i]->pFree != NULL)
283             return slices[i]->alloc_chunk();
284     }
285     // alloc new slice
286     if (slices_count >= MAX_SLICE_COUNT)
287         crFatalError();
288     lstring_chunk_slice_t * new_slice = new lstring_chunk_slice_t( FIRST_SLICE_SIZE << (slices_count+1) );
289     slices[slices_count++] = new_slice;
290     return slices[slices_count-1]->alloc_chunk();
291 }
292 
free(lstring8_chunk_t * pChunk)293 void lstring8_chunk_t::free( lstring8_chunk_t * pChunk )
294 {
295     for (int i=slices_count-1; i>=0; --i)
296     {
297         if (slices[i]->free_chunk(pChunk))
298             return;
299     }
300     crFatalError(); // wrong pointer!!!
301 }
302 
alloc()303 lstring16_chunk_t * lstring16_chunk_t::alloc()
304 {
305     if (!slices_initialized)
306         init_ls_storage();
307     // search for existing slice
308     for (int i=slices_count-1; i>=0; --i)
309     {
310         if (slices[i]->pFree != NULL)
311             return slices[i]->alloc_chunk16();
312     }
313     // alloc new slice
314     if (slices_count >= MAX_SLICE_COUNT)
315         crFatalError();
316     lstring_chunk_slice_t * new_slice = new lstring_chunk_slice_t( FIRST_SLICE_SIZE << (slices_count+1) );
317     slices[slices_count++] = new_slice;
318     return slices[slices_count-1]->alloc_chunk16();
319 }
320 
free(lstring16_chunk_t * pChunk)321 void lstring16_chunk_t::free( lstring16_chunk_t * pChunk )
322 {
323     for (int i=slices_count-1; i>=0; --i)
324     {
325         if (slices[i]->free_chunk16(pChunk))
326             return;
327     }
328     crFatalError(); // wrong pointer!!!
329 }
330 
alloc()331 lstring32_chunk_t * lstring32_chunk_t::alloc()
332 {
333     if (!slices_initialized)
334         init_ls_storage();
335     // search for existing slice
336     for (int i=slices_count-1; i>=0; --i)
337     {
338         if (slices[i]->pFree != NULL)
339             return slices[i]->alloc_chunk32();
340     }
341     // alloc new slice
342     if (slices_count >= MAX_SLICE_COUNT)
343         crFatalError();
344     lstring_chunk_slice_t * new_slice = new lstring_chunk_slice_t( FIRST_SLICE_SIZE << (slices_count+1) );
345     slices[slices_count++] = new_slice;
346     return slices[slices_count-1]->alloc_chunk32();
347 }
348 
free(lstring32_chunk_t * pChunk)349 void lstring32_chunk_t::free( lstring32_chunk_t * pChunk )
350 {
351     for (int i=slices_count-1; i>=0; --i)
352     {
353         if (slices[i]->free_chunk32(pChunk))
354             return;
355     }
356     crFatalError(); // wrong pointer!!!
357 }
358 #endif  // (LDOM_USE_OWN_MEM_MAN == 1)
359 
360 ////////////////////////////////////////////////////////////////////////////
361 // Utility functions
362 ////////////////////////////////////////////////////////////////////////////
363 
_lStr_len(const lChar16 * str)364 inline int _lStr_len(const lChar16 * str)
365 {
366     int len;
367     for (len=0; *str; str++)
368         len++;
369     return len;
370 }
371 
_lStr_len(const lChar32 * str)372 inline int _lStr_len(const lChar32 * str)
373 {
374     int len;
375     for (len=0; *str; str++)
376         len++;
377     return len;
378 }
379 
_lStr_len(const lChar8 * str)380 inline int _lStr_len(const lChar8 * str)
381 {
382     int len;
383     for (len=0; *str; str++)
384         len++;
385     return len;
386 }
387 
_lStr_nlen(const lChar16 * str,int maxcount)388 inline int _lStr_nlen(const lChar16 * str, int maxcount)
389 {
390     int len;
391     for (len=0; len<maxcount && *str; str++)
392         len++;
393     return len;
394 }
395 
_lStr_nlen(const lChar32 * str,int maxcount)396 inline int _lStr_nlen(const lChar32 * str, int maxcount)
397 {
398     int len;
399     for (len=0; len<maxcount && *str; str++)
400         len++;
401     return len;
402 }
403 
_lStr_nlen(const lChar8 * str,int maxcount)404 inline int _lStr_nlen(const lChar8 * str, int maxcount)
405 {
406     int len;
407     for (len=0; len<maxcount && *str; str++)
408         len++;
409     return len;
410 }
411 
_lStr_cpy(lChar16 * dst,const lChar16 * src)412 inline int _lStr_cpy(lChar16 * dst, const lChar16 * src)
413 {
414     int count;
415     for ( count=0; (*dst++ = *src++); count++ )
416         ;
417     return count;
418 }
419 
_lStr_cpy(lChar32 * dst,const lChar32 * src)420 inline int _lStr_cpy(lChar32 * dst, const lChar32 * src)
421 {
422     int count;
423     for ( count=0; (*dst++ = *src++); count++ )
424         ;
425     return count;
426 }
427 
_lStr_cpy(lChar8 * dst,const lChar8 * src)428 inline int _lStr_cpy(lChar8 * dst, const lChar8 * src)
429 {
430     int count;
431     for ( count=0; (*dst++ = *src++); count++ )
432         ;
433     return count;
434 }
435 
_lStr_cpy(lChar16 * dst,const lChar8 * src)436 inline int _lStr_cpy(lChar16 * dst, const lChar8 * src)
437 {
438     int count;
439     for ( count=0; (*dst++ = *src++); count++ )
440         ;
441     return count;
442 }
443 
_lStr_cpy(lChar32 * dst,const lChar8 * src)444 inline int _lStr_cpy(lChar32 * dst, const lChar8 * src)
445 {
446     int count;
447     for ( count=0; (*dst++ = *src++); count++ )
448         ;
449     return count;
450 }
451 
_lStr_cpy(lChar8 * dst,const lChar16 * src)452 inline int _lStr_cpy(lChar8 * dst, const lChar16 * src)
453 {
454     int count;
455     for ( count=0; (*dst++ = (lChar8)*src++); count++ )
456         ;
457     return count;
458 }
459 
_lStr_cpy(lChar8 * dst,const lChar32 * src)460 inline int _lStr_cpy(lChar8 * dst, const lChar32 * src)
461 {
462     int count;
463     for ( count=0; (*dst++ = (lChar8)*src++); count++ )
464         ;
465     return count;
466 }
467 
_lStr_ncpy(lChar32 * dst,const lChar32 * src,int maxcount)468 inline int _lStr_ncpy(lChar32 * dst, const lChar32 * src, int maxcount)
469 {
470     int count = 0;
471     do
472     {
473         if (++count > maxcount)
474         {
475             *dst = 0;
476             return count;
477         }
478     } while ((*dst++ = *src++));
479     return count;
480 }
481 
_lStr_ncpy(lChar16 * dst,const lChar16 * src,int maxcount)482 inline int _lStr_ncpy(lChar16 * dst, const lChar16 * src, int maxcount)
483 {
484     int count = 0;
485     do
486     {
487         if (++count > maxcount)
488         {
489             *dst = 0;
490             return count;
491         }
492     } while ((*dst++ = *src++));
493     return count;
494 }
495 
_lStr_ncpy(lChar16 * dst,const lChar8 * src,int maxcount)496 inline int _lStr_ncpy(lChar16 * dst, const lChar8 * src, int maxcount)
497 {
498     int count = 0;
499     do
500     {
501         if (++count > maxcount)
502         {
503             *dst = 0;
504             return count;
505         }
506     } while ((*dst++ = (unsigned char)*src++));
507     return count;
508 }
509 
_lStr_ncpy(lChar32 * dst,const lChar8 * src,int maxcount)510 inline int _lStr_ncpy(lChar32 * dst, const lChar8 * src, int maxcount)
511 {
512     int count = 0;
513     do
514     {
515         if (++count > maxcount)
516         {
517             *dst = 0;
518             return count;
519         }
520     } while ((*dst++ = (unsigned char)*src++));
521     return count;
522 }
523 
_lStr_ncpy(lChar8 * dst,const lChar8 * src,int maxcount)524 inline int _lStr_ncpy(lChar8 * dst, const lChar8 * src, int maxcount)
525 {
526     int count = 0;
527     do
528     {
529         if (++count > maxcount)
530         {
531             *dst = 0;
532             return count;
533         }
534     } while ((*dst++ = *src++));
535     return count;
536 }
537 
_lStr_memcpy(lChar16 * dst,const lChar16 * src,int count)538 inline void _lStr_memcpy(lChar16 * dst, const lChar16 * src, int count)
539 {
540     while ( count-- > 0)
541         (*dst++ = *src++);
542 }
543 
_lStr_memcpy(lChar32 * dst,const lChar32 * src,int count)544 inline void _lStr_memcpy(lChar32 * dst, const lChar32 * src, int count)
545 {
546     while ( count-- > 0)
547         (*dst++ = *src++);
548 }
549 
_lStr_memcpy(lChar8 * dst,const lChar8 * src,int count)550 inline void _lStr_memcpy(lChar8 * dst, const lChar8 * src, int count)
551 {
552     memcpy(dst, (const lChar8 *) src, count);
553 }
554 
_lStr_memset(lChar16 * dst,lChar16 value,int count)555 inline void _lStr_memset(lChar16 * dst, lChar16 value, int count)
556 {
557     while ( count-- > 0)
558         *dst++ = value;
559 }
560 
_lStr_memset(lChar32 * dst,lChar32 value,int count)561 inline void _lStr_memset(lChar32 * dst, lChar32 value, int count)
562 {
563     while ( count-- > 0)
564         *dst++ = value;
565 }
566 
_lStr_memset(lChar8 * dst,lChar8 value,int count)567 inline void _lStr_memset(lChar8 * dst, lChar8 value, int count)
568 {
569     memset(dst, (lChar8) value, count);
570 }
571 
lStr_len(const lChar16 * str)572 int lStr_len(const lChar16 * str)
573 {
574     return _lStr_len(str);
575 }
576 
lStr_len(const lChar32 * str)577 int lStr_len(const lChar32 * str)
578 {
579     return _lStr_len(str);
580 }
581 
lStr_len(const lChar8 * str)582 int lStr_len(const lChar8 * str)
583 {
584     return _lStr_len(str);
585 }
586 
lStr_nlen(const lChar16 * str,int maxcount)587 int lStr_nlen(const lChar16 * str, int maxcount)
588 {
589     return _lStr_nlen(str, maxcount);
590 }
591 
lStr_nlen(const lChar32 * str,int maxcount)592 int lStr_nlen(const lChar32 * str, int maxcount)
593 {
594     return _lStr_nlen(str, maxcount);
595 }
596 
lStr_nlen(const lChar8 * str,int maxcount)597 int lStr_nlen(const lChar8 * str, int maxcount)
598 {
599     return _lStr_nlen(str, maxcount);
600 }
601 
lStr_cpy(lChar16 * dst,const lChar16 * src)602 int lStr_cpy(lChar16 * dst, const lChar16 * src)
603 {
604     return _lStr_cpy(dst, src);
605 }
606 
lStr_cpy(lChar32 * dst,const lChar32 * src)607 int lStr_cpy(lChar32 * dst, const lChar32 * src)
608 {
609     return _lStr_cpy(dst, src);
610 }
611 
lStr_cpy(lChar8 * dst,const lChar8 * src)612 int lStr_cpy(lChar8 * dst, const lChar8 * src)
613 {
614     return _lStr_cpy(dst, src);
615 }
616 
lStr_cpy(lChar16 * dst,const lChar8 * src)617 int lStr_cpy(lChar16 * dst, const lChar8 * src)
618 {
619     return _lStr_cpy(dst, src);
620 }
621 
lStr_cpy(lChar32 * dst,const lChar8 * src)622 int lStr_cpy(lChar32 * dst, const lChar8 * src)
623 {
624     return _lStr_cpy(dst, src);
625 }
626 
lStr_ncpy(lChar16 * dst,const lChar16 * src,int maxcount)627 int lStr_ncpy(lChar16 * dst, const lChar16 * src, int maxcount)
628 {
629     return _lStr_ncpy(dst, src, maxcount);
630 }
631 
lStr_ncpy(lChar32 * dst,const lChar32 * src,int maxcount)632 int lStr_ncpy(lChar32 * dst, const lChar32 * src, int maxcount)
633 {
634     return _lStr_ncpy(dst, src, maxcount);
635 }
636 
lStr_ncpy(lChar8 * dst,const lChar8 * src,int maxcount)637 int lStr_ncpy(lChar8 * dst, const lChar8 * src, int maxcount)
638 {
639     return _lStr_ncpy(dst, src, maxcount);
640 }
641 
lStr_memcpy(lChar16 * dst,const lChar16 * src,int count)642 void lStr_memcpy(lChar16 * dst, const lChar16 * src, int count)
643 {
644     _lStr_memcpy(dst, src, count);
645 }
646 
lStr_memcpy(lChar32 * dst,const lChar32 * src,int count)647 void lStr_memcpy(lChar32 * dst, const lChar32 * src, int count)
648 {
649     _lStr_memcpy(dst, src, count);
650 }
651 
lStr_memcpy(lChar8 * dst,const lChar8 * src,int count)652 void lStr_memcpy(lChar8 * dst, const lChar8 * src, int count)
653 {
654     _lStr_memcpy(dst, src, count);
655 }
656 
lStr_memset(lChar16 * dst,lChar16 value,int count)657 void lStr_memset(lChar16 * dst, lChar16 value, int count)
658 {
659     _lStr_memset(dst, value, count);
660 }
661 
lStr_memset(lChar32 * dst,lChar32 value,int count)662 void lStr_memset(lChar32 * dst, lChar32 value, int count)
663 {
664     _lStr_memset(dst, value, count);
665 }
666 
lStr_memset(lChar8 * dst,lChar8 value,int count)667 void lStr_memset(lChar8 * dst, lChar8 value, int count)
668 {
669     _lStr_memset(dst, value, count);
670 }
671 
lStr_cmp(const lChar16 * dst,const lChar16 * src)672 int lStr_cmp(const lChar16 * dst, const lChar16 * src)
673 {
674     if (dst == src)
675         return 0;
676     if (!dst)
677         return -1;
678     else if (!src)
679         return 1;
680     while ( *dst == *src)
681     {
682         if (! *dst )
683             return 0;
684         ++dst;
685         ++src;
686     }
687     if ( *dst > *src )
688         return 1;
689     else
690         return -1;
691 }
692 
lStr_cmp(const lChar32 * dst,const lChar32 * src)693 int lStr_cmp(const lChar32 * dst, const lChar32 * src)
694 {
695     if (dst == src)
696         return 0;
697     if (!dst)
698         return -1;
699     else if (!src)
700         return 1;
701     while ( *dst == *src)
702     {
703         if (! *dst )
704             return 0;
705         ++dst;
706         ++src;
707     }
708     if ( *dst > *src )
709         return 1;
710     else
711         return -1;
712 }
713 
lStr_cmp(const lChar8 * dst,const lChar8 * src)714 int lStr_cmp(const lChar8 * dst, const lChar8 * src)
715 {
716     if (dst == src)
717         return 0;
718     if (!dst)
719         return -1;
720     else if (!src)
721         return 1;
722     while ( *dst == *src)
723     {
724         if (! *dst )
725             return 0;
726         ++dst;
727         ++src;
728     }
729     if ( *dst > *src )
730         return 1;
731     else
732         return -1;
733 }
734 
lStr_cmp(const lChar16 * dst,const lChar8 * src)735 int lStr_cmp(const lChar16 * dst, const lChar8 * src)
736 {
737     if (!dst && !src)
738         return 0;
739     if (!dst)
740         return -1;
741     else if (!src)
742         return 1;
743     while ( *dst == (lChar16)*src)
744     {
745         if (! *dst )
746             return 0;
747         ++dst;
748         ++src;
749     }
750     if ( *dst > (lChar16)*src )
751         return 1;
752     else
753         return -1;
754 }
755 
lStr_cmp(const lChar32 * dst,const lChar8 * src)756 int lStr_cmp(const lChar32 * dst, const lChar8 * src)
757 {
758     if (!dst && !src)
759         return 0;
760     if (!dst)
761         return -1;
762     else if (!src)
763         return 1;
764     while ( *dst == (lChar32)*src)
765     {
766         if (! *dst )
767             return 0;
768         ++dst;
769         ++src;
770     }
771     if ( *dst > (lChar32)*src )
772         return 1;
773     else
774         return -1;
775 }
776 
lStr_cmp(const lChar8 * dst,const lChar16 * src)777 int lStr_cmp(const lChar8 * dst, const lChar16 * src)
778 {
779     if (!dst && !src)
780         return 0;
781     if (!dst)
782         return -1;
783     else if (!src)
784         return 1;
785     while ( (lChar16)*dst == *src)
786     {
787         if (! *dst )
788             return 0;
789         ++dst;
790         ++src;
791     }
792     if ( (lChar16)*dst > *src )
793         return 1;
794     else
795         return -1;
796 }
797 
lStr_cmp(const lChar8 * dst,const lChar32 * src)798 int lStr_cmp(const lChar8 * dst, const lChar32 * src)
799 {
800     if (!dst && !src)
801         return 0;
802     if (!dst)
803         return -1;
804     else if (!src)
805         return 1;
806     while ( (lChar32)*dst == *src)
807     {
808         if (! *dst )
809             return 0;
810         ++dst;
811         ++src;
812     }
813     if ( (lChar32)*dst > *src )
814         return 1;
815     else
816         return -1;
817 }
818 
lStr_cmp(const lChar32 * dst,const lChar16 * src)819 int lStr_cmp(const lChar32 * dst, const lChar16 * src) {
820     if (!dst && !src)
821         return 0;
822     if (!dst)
823         return -1;
824     else if (!src)
825         return 1;
826     while ( *dst == (lChar32)*src)
827     {
828         if (! *dst )
829             return 0;
830         ++dst;
831         ++src;
832     }
833     if ( *dst > (lChar32)*src )
834         return 1;
835     else
836         return -1;
837 }
838 
lStr_cmp(const lChar16 * dst,const lChar32 * src)839 int lStr_cmp(const lChar16 * dst, const lChar32 * src)
840 {
841     if (!dst && !src)
842         return 0;
843     if (!dst)
844         return -1;
845     else if (!src)
846         return 1;
847     while ( (lChar32)*dst == *src)
848     {
849         if (! *dst )
850             return 0;
851         ++dst;
852         ++src;
853     }
854     if ( (lChar32)*dst > *src )
855         return 1;
856     else
857         return -1;
858 }
859 
860 ////////////////////////////////////////////////////////////////////////////
861 // lString32
862 ////////////////////////////////////////////////////////////////////////////
863 
free()864 void lString32::free()
865 {
866     if ( pchunk==EMPTY_STR_32 )
867         return;
868     //assert(pchunk->buf32[pchunk->len]==0);
869     ::free(pchunk->buf32);
870 #if (LDOM_USE_OWN_MEM_MAN == 1)
871     for (int i=slices_count-1; i>=0; --i)
872     {
873         if (slices[i]->free_chunk32(pchunk))
874             return;
875     }
876     crFatalError(); // wrong pointer!!!
877 #else
878     ::free(pchunk);
879 #endif
880 }
881 
alloc(int sz)882 void lString32::alloc(int sz)
883 {
884 #if (LDOM_USE_OWN_MEM_MAN == 1)
885     pchunk = lstring_chunk_t::alloc();
886 #else
887     pchunk = (lstring_chunk_t*)::malloc(sizeof(lstring_chunk_t));
888 #endif
889     pchunk->buf32 = (lChar32*) ::malloc( sizeof(lChar32) * (sz+1) );
890     assert( pchunk->buf32!=NULL );
891     pchunk->size = sz;
892     pchunk->refCount = 1;
893 }
894 
lString32(const lChar32 * str)895 lString32::lString32(const lChar32 * str)
896 {
897     if (!str || !(*str))
898     {
899         pchunk = EMPTY_STR_32;
900         addref();
901         return;
902     }
903     size_type len = _lStr_len(str);
904     alloc( len );
905     pchunk->len = len;
906     _lStr_cpy( pchunk->buf32, str );
907 }
908 
lString32(const lChar8 * str)909 lString32::lString32(const lChar8 * str)
910 {
911     if (!str || !(*str))
912     {
913         pchunk = EMPTY_STR_32;
914         addref();
915         return;
916     }
917     pchunk = EMPTY_STR_32;
918     addref();
919     *this = Utf8ToUnicode( str );
920 }
921 
922 /// constructor from utf8 character array fragment
lString32(const lChar8 * str,size_type count)923 lString32::lString32(const lChar8 * str, size_type count)
924 {
925     if (!str || !(*str))
926     {
927         pchunk = EMPTY_STR_32;
928         addref();
929         return;
930     }
931     pchunk = EMPTY_STR_32;
932     addref();
933     *this = Utf8ToUnicode( str, count );
934 }
935 
936 
lString32(const value_type * str,size_type count)937 lString32::lString32(const value_type * str, size_type count)
938 {
939     if ( !str || !(*str) || count<=0 )
940     {
941         pchunk = EMPTY_STR_32; addref();
942     }
943     else
944     {
945         size_type len = _lStr_nlen(str, count);
946         alloc(len);
947         _lStr_ncpy( pchunk->buf32, str, len );
948         pchunk->len = len;
949     }
950 }
951 
lString32(const lString32 & str,size_type offset,size_type count)952 lString32::lString32(const lString32 & str, size_type offset, size_type count)
953 {
954     if ( count > str.length() - offset )
955         count = str.length() - offset;
956     if (count<=0)
957     {
958         pchunk = EMPTY_STR_32; addref();
959     }
960     else
961     {
962         alloc(count);
963         _lStr_memcpy( pchunk->buf32, str.pchunk->buf32+offset, count );
964         pchunk->buf32[count]=0;
965         pchunk->len = count;
966     }
967 }
968 
assign(const lChar32 * str)969 lString32 & lString32::assign(const lChar32 * str)
970 {
971     if (!str || !(*str))
972     {
973         clear();
974     }
975     else
976     {
977         size_type len = _lStr_len(str);
978         if (refCount()==1)
979         {
980             if (pchunk->size<=len)
981             {
982                 // resize is necessary
983                 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
984                 pchunk->size = len+1;
985             }
986         }
987         else
988         {
989             release();
990             alloc(len);
991         }
992         _lStr_cpy( pchunk->buf32, str );
993         pchunk->len = len;
994     }
995     return *this;
996 }
997 
assign(const lChar8 * str)998 lString32 & lString32::assign(const lChar8 * str)
999 {
1000     if (!str || !(*str))
1001     {
1002         clear();
1003     }
1004     else
1005     {
1006         size_type len = _lStr_len(str);
1007         if (refCount()==1)
1008         {
1009             if (pchunk->size<=len)
1010             {
1011                 // resize is necessary
1012                 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
1013                 pchunk->size = len+1;
1014             }
1015         }
1016         else
1017         {
1018             release();
1019             alloc(len);
1020         }
1021         _lStr_cpy( pchunk->buf32, str );
1022         pchunk->len = len;
1023     }
1024     return *this;
1025 }
1026 
assign(const lChar32 * str,size_type count)1027 lString32 & lString32::assign(const lChar32 * str, size_type count)
1028 {
1029     if ( !str || !(*str) || count<=0 )
1030     {
1031         clear();
1032     }
1033     else
1034     {
1035         size_type len = _lStr_nlen(str, count);
1036         if (refCount()==1)
1037         {
1038             if (pchunk->size<=len)
1039             {
1040                 // resize is necessary
1041                 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
1042                 pchunk->size = len+1;
1043             }
1044         }
1045         else
1046         {
1047             release();
1048             alloc(len);
1049         }
1050         _lStr_ncpy( pchunk->buf32, str, count );
1051         pchunk->len = len;
1052     }
1053     return *this;
1054 }
1055 
assign(const lChar8 * str,size_type count)1056 lString32 & lString32::assign(const lChar8 * str, size_type count)
1057 {
1058     if ( !str || !(*str) || count<=0 )
1059     {
1060         clear();
1061     }
1062     else
1063     {
1064         size_type len = _lStr_nlen(str, count);
1065         if (refCount()==1)
1066         {
1067             if (pchunk->size<=len)
1068             {
1069                 // resize is necessary
1070                 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
1071                 pchunk->size = len+1;
1072             }
1073         }
1074         else
1075         {
1076             release();
1077             alloc(len);
1078         }
1079         _lStr_ncpy( pchunk->buf32, str, count );
1080         pchunk->len = len;
1081     }
1082     return *this;
1083 }
1084 
assign(const lString32 & str,size_type offset,size_type count)1085 lString32 & lString32::assign(const lString32 & str, size_type offset, size_type count)
1086 {
1087     if ( count > str.length() - offset )
1088         count = str.length() - offset;
1089     if (count<=0)
1090     {
1091         clear();
1092     }
1093     else
1094     {
1095         if (pchunk==str.pchunk)
1096         {
1097             if (&str != this)
1098             {
1099                 release();
1100                 alloc(count);
1101             }
1102             if (offset>0)
1103             {
1104                 _lStr_memcpy( pchunk->buf32, str.pchunk->buf32+offset, count );
1105             }
1106             pchunk->buf32[count]=0;
1107         }
1108         else
1109         {
1110             if (refCount()==1)
1111             {
1112                 if (pchunk->size<=count)
1113                 {
1114                     // resize is necessary
1115                     pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(count+1) );
1116                     pchunk->size = count+1;
1117                 }
1118             }
1119             else
1120             {
1121                 release();
1122                 alloc(count);
1123             }
1124             _lStr_memcpy( pchunk->buf32, str.pchunk->buf32+offset, count );
1125             pchunk->buf32[count]=0;
1126         }
1127         pchunk->len = count;
1128     }
1129     return *this;
1130 }
1131 
erase(size_type offset,size_type count)1132 lString32 & lString32::erase(size_type offset, size_type count)
1133 {
1134     if ( count > length() - offset )
1135         count = length() - offset;
1136     if (count<=0)
1137     {
1138         clear();
1139     }
1140     else
1141     {
1142         size_type newlen = length()-count;
1143         if (refCount()==1)
1144         {
1145             _lStr_memcpy( pchunk->buf32+offset, pchunk->buf32+offset+count, newlen-offset+1 );
1146         }
1147         else
1148         {
1149             lstring_chunk_t * poldchunk = pchunk;
1150             release();
1151             alloc( newlen );
1152             _lStr_memcpy( pchunk->buf32, poldchunk->buf32, offset );
1153             _lStr_memcpy( pchunk->buf32+offset, poldchunk->buf32+offset+count, newlen-offset+1 );
1154         }
1155         pchunk->len = newlen;
1156         pchunk->buf32[newlen]=0;
1157     }
1158     return *this;
1159 }
1160 
reserve(size_type n)1161 void lString32::reserve(size_type n)
1162 {
1163     if (refCount()==1)
1164     {
1165         if (pchunk->size < n)
1166         {
1167             pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(n+1) );
1168             pchunk->size = n;
1169         }
1170     }
1171     else
1172     {
1173         lstring_chunk_t * poldchunk = pchunk;
1174         release();
1175         alloc( n );
1176         _lStr_memcpy( pchunk->buf32, poldchunk->buf32, poldchunk->len+1 );
1177         pchunk->len = poldchunk->len;
1178     }
1179 }
1180 
lock(size_type newsize)1181 void lString32::lock( size_type newsize )
1182 {
1183     if (refCount()>1)
1184     {
1185         lstring_chunk_t * poldchunk = pchunk;
1186         release();
1187         alloc( newsize );
1188         size_type len = newsize;
1189         if (len>poldchunk->len)
1190             len = poldchunk->len;
1191         _lStr_memcpy( pchunk->buf32, poldchunk->buf32, len );
1192         pchunk->buf32[len]=0;
1193         pchunk->len = len;
1194     }
1195 }
1196 
1197 // lock string, allocate buffer and reset length to 0
reset(size_type size)1198 void lString32::reset( size_type size )
1199 {
1200     if (refCount()>1 || pchunk->size<size)
1201     {
1202         release();
1203         alloc( size );
1204     }
1205     pchunk->buf32[0] = 0;
1206     pchunk->len = 0;
1207 }
1208 
resize(size_type n,lChar32 e)1209 void lString32::resize(size_type n, lChar32 e)
1210 {
1211     lock( n );
1212     if (n>=pchunk->size)
1213     {
1214         pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(n+1) );
1215         pchunk->size = n;
1216     }
1217     // fill with data if expanded
1218     for (size_type i=pchunk->len; i<n; i++)
1219         pchunk->buf32[i] = e;
1220     pchunk->buf32[pchunk->len] = 0;
1221 }
1222 
append(const lChar32 * str)1223 lString32 & lString32::append(const lChar32 * str)
1224 {
1225     size_type len = _lStr_len(str);
1226     reserve( pchunk->len+len );
1227     _lStr_memcpy(pchunk->buf32+pchunk->len, str, len+1);
1228     pchunk->len += len;
1229     return *this;
1230 }
1231 
append(const lChar32 * str,size_type count)1232 lString32 & lString32::append(const lChar32 * str, size_type count)
1233 {
1234     reserve(pchunk->len + count);
1235     _lStr_ncpy(pchunk->buf32 + pchunk->len, str, count);
1236     pchunk->len += count;
1237     return *this;
1238 }
1239 
append(const lChar8 * str)1240 lString32 & lString32::append(const lChar8 * str)
1241 {
1242     size_type len = _lStr_len(str);
1243     reserve( pchunk->len+len );
1244     _lStr_ncpy(pchunk->buf32+pchunk->len, str, len+1);
1245     pchunk->len += len;
1246     return *this;
1247 }
1248 
append(const lChar8 * str,size_type count)1249 lString32 & lString32::append(const lChar8 * str, size_type count)
1250 {
1251     reserve(pchunk->len + count);
1252     _lStr_ncpy(pchunk->buf32+pchunk->len, str, count);
1253     pchunk->len += count;
1254     return *this;
1255 }
1256 
append(const lString32 & str)1257 lString32 & lString32::append(const lString32 & str)
1258 {
1259     size_type len2 = pchunk->len + str.pchunk->len;
1260     reserve( len2 );
1261     _lStr_memcpy( pchunk->buf32+pchunk->len, str.pchunk->buf32, str.pchunk->len+1 );
1262     pchunk->len = len2;
1263     return *this;
1264 }
1265 
append(const lString32 & str,size_type offset,size_type count)1266 lString32 & lString32::append(const lString32 & str, size_type offset, size_type count)
1267 {
1268     if ( str.pchunk->len>offset )
1269     {
1270         if ( offset + count > str.pchunk->len )
1271             count = str.pchunk->len - offset;
1272         reserve( pchunk->len+count );
1273         _lStr_ncpy(pchunk->buf32 + pchunk->len, str.pchunk->buf32 + offset, count);
1274         pchunk->len += count;
1275         pchunk->buf32[pchunk->len] = 0;
1276     }
1277     return *this;
1278 }
1279 
append(size_type count,lChar32 ch)1280 lString32 & lString32::append(size_type count, lChar32 ch)
1281 {
1282     reserve( pchunk->len+count );
1283     _lStr_memset(pchunk->buf32+pchunk->len, ch, count);
1284     pchunk->len += count;
1285     pchunk->buf32[pchunk->len] = 0;
1286     return *this;
1287 }
1288 
insert(size_type p0,size_type count,lChar32 ch)1289 lString32 & lString32::insert(size_type p0, size_type count, lChar32 ch)
1290 {
1291     if (p0>pchunk->len)
1292         p0 = pchunk->len;
1293     reserve( pchunk->len+count );
1294     for (size_type i=pchunk->len+count; i>p0; i--)
1295         pchunk->buf32[i] = pchunk->buf32[i-1];
1296     _lStr_memset(pchunk->buf32+p0, ch, count);
1297     pchunk->len += count;
1298     pchunk->buf32[pchunk->len] = 0;
1299     return *this;
1300 }
1301 
insert(size_type p0,const lString32 & str)1302 lString32 & lString32::insert(size_type p0, const lString32 & str)
1303 {
1304     if (p0>pchunk->len)
1305         p0 = pchunk->len;
1306     int count = str.length();
1307     reserve( pchunk->len+count );
1308     for (size_type i=pchunk->len+count; i>p0; i--)
1309         pchunk->buf32[i] = pchunk->buf32[i-1];
1310     _lStr_memcpy(pchunk->buf32 + p0, str.c_str(), count);
1311     pchunk->len += count;
1312     pchunk->buf32[pchunk->len] = 0;
1313     return *this;
1314 }
1315 
substr(size_type pos,size_type n) const1316 lString32 lString32::substr(size_type pos, size_type n) const
1317 {
1318     if (pos>=length())
1319         return lString32::empty_str;
1320     if (pos+n>length())
1321         n = length() - pos;
1322     return lString32( pchunk->buf32+pos, n );
1323 }
1324 
pack()1325 lString32 & lString32::pack()
1326 {
1327     if (pchunk->len + 4 < pchunk->size )
1328     {
1329         if (refCount()>1)
1330         {
1331             lock(pchunk->len);
1332         }
1333         else
1334         {
1335             pchunk->buf32 = cr_realloc( pchunk->buf32, pchunk->len+1 );
1336             pchunk->size = pchunk->len;
1337         }
1338     }
1339     return *this;
1340 }
1341 
isAlNum(lChar32 ch)1342 bool isAlNum(lChar32 ch) {
1343     lUInt16 props = lGetCharProps(ch);
1344     return (props & (CH_PROP_ALPHA | CH_PROP_DIGIT)) != 0;
1345 }
1346 
1347 /// trims non alpha at beginning and end of string
trimNonAlpha()1348 lString32 & lString32::trimNonAlpha()
1349 {
1350     int firstns;
1351     for (firstns = 0; firstns<pchunk->len &&
1352         !isAlNum(pchunk->buf32[firstns]); ++firstns)
1353         ;
1354     if (firstns >= pchunk->len)
1355     {
1356         clear();
1357         return *this;
1358     }
1359     int lastns;
1360     for (lastns = pchunk->len-1; lastns>0 &&
1361         !isAlNum(pchunk->buf32[lastns]); --lastns)
1362         ;
1363     int newlen = lastns-firstns+1;
1364     if (newlen == pchunk->len)
1365         return *this;
1366     if (refCount()==1)
1367     {
1368         if (firstns>0)
1369             lStr_memcpy( pchunk->buf32, pchunk->buf32+firstns, newlen );
1370         pchunk->buf32[newlen] = 0;
1371         pchunk->len = newlen;
1372     }
1373     else
1374     {
1375         lstring_chunk_t * poldchunk = pchunk;
1376         release();
1377         alloc( newlen );
1378         _lStr_memcpy( pchunk->buf32, poldchunk->buf32+firstns, newlen );
1379         pchunk->buf32[newlen] = 0;
1380         pchunk->len = newlen;
1381     }
1382     return *this;
1383 }
1384 
trim()1385 lString32 & lString32::trim()
1386 {
1387     //
1388     int firstns;
1389     for (firstns = 0; firstns<pchunk->len &&
1390         (pchunk->buf32[firstns]==' ' || pchunk->buf32[firstns]=='\t'); ++firstns)
1391         ;
1392     if (firstns >= pchunk->len)
1393     {
1394         clear();
1395         return *this;
1396     }
1397     int lastns;
1398     for (lastns = pchunk->len-1; lastns>0 &&
1399         (pchunk->buf32[lastns]==' ' || pchunk->buf32[lastns]=='\t'); --lastns)
1400         ;
1401     int newlen = lastns-firstns+1;
1402     if (newlen == pchunk->len)
1403         return *this;
1404     if (refCount()==1)
1405     {
1406         if (firstns>0)
1407             lStr_memcpy( pchunk->buf32, pchunk->buf32+firstns, newlen );
1408         pchunk->buf32[newlen] = 0;
1409         pchunk->len = newlen;
1410     }
1411     else
1412     {
1413         lstring_chunk_t * poldchunk = pchunk;
1414         release();
1415         alloc( newlen );
1416         _lStr_memcpy( pchunk->buf32, poldchunk->buf32+firstns, newlen );
1417         pchunk->buf32[newlen] = 0;
1418         pchunk->len = newlen;
1419     }
1420     return *this;
1421 }
1422 
atoi() const1423 int lString32::atoi() const
1424 {
1425     int n = 0;
1426     atoi(n);
1427     return n;
1428 }
1429 
1430 static const char * hex_digits = "0123456789abcdef";
1431 // converts 0..15 to 0..f
toHexDigit(int c)1432 char toHexDigit( int c )
1433 {
1434     return hex_digits[c&0xf];
1435 }
1436 
1437 // returns 0..15 if c is hex digit, -1 otherwise
hexDigit(int c)1438 int hexDigit( int c )
1439 {
1440     if ( c>='0' && c<='9')
1441         return c-'0';
1442     if ( c>='a' && c<='f')
1443         return c-'a'+10;
1444     if ( c>='A' && c<='F')
1445         return c-'A'+10;
1446     return -1;
1447 }
1448 
1449 // decode LEN hex digits, return decoded number, -1 if invalid
decodeHex(const lChar32 * str,int len)1450 int decodeHex( const lChar32 * str, int len ) {
1451     int n = 0;
1452     for ( int i=0; i<len; i++ ) {
1453         if ( !str[i] )
1454             return -1;
1455         int d = hexDigit(str[i]);
1456         if ( d==-1 )
1457             return -1;
1458         n = (n<<4) | d;
1459     }
1460     return n;
1461 }
1462 
1463 // decode LEN decimal digits, return decoded number, -1 if invalid
decodeDecimal(const lChar32 * str,int len)1464 int decodeDecimal( const lChar32 * str, int len ) {
1465     int n = 0;
1466     for ( int i=0; i<len; i++ ) {
1467         if ( !str[i] )
1468             return -1;
1469         int d = str[i] - '0';
1470         if ( d<0 || d>9 )
1471             return -1;
1472         n = n*10 + d;
1473     }
1474     return n;
1475 }
1476 
atoi(int & n) const1477 bool lString32::atoi( int &n ) const
1478 {
1479     n = 0;
1480     int sgn = 1;
1481     const lChar32 * s = c_str();
1482     while (*s == ' ' || *s == '\t')
1483         s++;
1484     if ( s[0]=='0' && s[1]=='x') {
1485         s+=2;
1486         for (;*s;) {
1487             int d = hexDigit(*s++);
1488             if ( d>=0 )
1489                 n = (n<<4) | d;
1490         }
1491         return true;
1492     }
1493     if (*s == '-')
1494     {
1495         sgn = -1;
1496         s++;
1497     }
1498     else if (*s == '+')
1499     {
1500         s++;
1501     }
1502     if ( !(*s>='0' && *s<='9') )
1503         return false;
1504     while (*s>='0' && *s<='9')
1505     {
1506         n = n * 10 + ( (*s++)-'0' );
1507     }
1508     if ( sgn<0 )
1509         n = -n;
1510     return *s=='\0' || *s==' ' || *s=='\t';
1511 }
1512 
atoi(lInt64 & n) const1513 bool lString32::atoi( lInt64 &n ) const
1514 {
1515     int sgn = 1;
1516     const lChar32 * s = c_str();
1517     while (*s == ' ' || *s == '\t')
1518         s++;
1519     if (*s == '-')
1520     {
1521         sgn = -1;
1522         s++;
1523     }
1524     else if (*s == '+')
1525     {
1526         s++;
1527     }
1528     if ( !(*s>='0' && *s<='9') )
1529         return false;
1530     while (*s>='0' && *s<='9')
1531     {
1532         n = n * 10 + ( (*s++)-'0' );
1533     }
1534     if ( sgn<0 )
1535         n = -n;
1536     return *s=='\0' || *s==' ' || *s=='\t';
1537 }
1538 
1539 #define STRING_HASH_MULT 31
getHash() const1540 lUInt32 lString32::getHash() const
1541 {
1542     lUInt32 res = 0;
1543     for (lInt32 i=0; i<pchunk->len; i++)
1544         res = res * STRING_HASH_MULT + pchunk->buf32[i];
1545     return res;
1546 }
1547 
calcStringHash(const lChar32 * s)1548 lUInt32 calcStringHash( const lChar32 * s )
1549 {
1550     lUInt32 a = 2166136261u;
1551     while (*s)
1552     {
1553         a = a * 16777619 ^ (*s++);
1554     }
1555     return a;
1556 }
1557 
1558 /// calculates CRC32 for buffer contents
lStr_crc32(lUInt32 prevValue,const void * buf,int size)1559 lUInt32 lStr_crc32( lUInt32 prevValue, const void * buf, int size )
1560 {
1561 #if (USE_ZLIB==1)
1562     return crc32( prevValue, (const lUInt8 *)buf, size );
1563 #else
1564     // TODO:
1565     return 0;
1566 #endif
1567 }
1568 
1569 
1570 const lString32 lString32::empty_str;
1571 
1572 
1573 ////////////////////////////////////////////////////////////////////////////
1574 // lString16
1575 ////////////////////////////////////////////////////////////////////////////
1576 
free()1577 void lString16::free()
1578 {
1579     if ( pchunk==EMPTY_STR_16 )
1580         return;
1581     //assert(pchunk->buf16[pchunk->len]==0);
1582     ::free(pchunk->buf16);
1583 #if (LDOM_USE_OWN_MEM_MAN == 1)
1584     for (int i=slices_count-1; i>=0; --i)
1585     {
1586         if (slices[i]->free_chunk16(pchunk))
1587             return;
1588     }
1589     crFatalError(); // wrong pointer!!!
1590 #else
1591     ::free(pchunk);
1592 #endif
1593 }
1594 
alloc(int sz)1595 void lString16::alloc(int sz)
1596 {
1597 #if (LDOM_USE_OWN_MEM_MAN == 1)
1598     pchunk = lstring_chunk_t::alloc();
1599 #else
1600     pchunk = (lstring_chunk_t*)::malloc(sizeof(lstring_chunk_t));
1601 #endif
1602     pchunk->buf16 = (lChar16*) ::malloc( sizeof(lChar16) * (sz+1) );
1603     assert( pchunk->buf16!=NULL );
1604     pchunk->size = sz;
1605     pchunk->refCount = 1;
1606 }
1607 
lString16(const value_type * str)1608 lString16::lString16(const value_type * str)
1609 {
1610     if (!str || !(*str))
1611     {
1612         pchunk = EMPTY_STR_16;
1613         addref();
1614         return;
1615     }
1616     size_type len = _lStr_len(str);
1617     alloc( len );
1618     pchunk->len = len;
1619     _lStr_cpy( pchunk->buf16, str );
1620 }
1621 
lString16(const lChar8 * str)1622 lString16::lString16(const lChar8 * str)
1623 {
1624     if (!str || !(*str))
1625     {
1626         pchunk = EMPTY_STR_16;
1627         addref();
1628         return;
1629     }
1630     pchunk = EMPTY_STR_16;
1631     addref();
1632     *this = UnicodeToUtf16( Utf8ToUnicode( str ) );
1633 }
1634 
1635 /// constructor from utf8 character array fragment
lString16(const lChar8 * str,size_type count)1636 lString16::lString16(const lChar8 * str, size_type count)
1637 {
1638     if (!str || !(*str))
1639     {
1640         pchunk = EMPTY_STR_16;
1641         addref();
1642         return;
1643     }
1644     pchunk = EMPTY_STR_16;
1645     addref();
1646     *this = UnicodeToUtf16( Utf8ToUnicode( str, count ) );
1647 }
1648 
1649 
lString16(const value_type * str,size_type count)1650 lString16::lString16(const value_type * str, size_type count)
1651 {
1652     if ( !str || !(*str) || count<=0 )
1653     {
1654         pchunk = EMPTY_STR_16;
1655         addref();
1656     }
1657     else
1658     {
1659         size_type len = _lStr_nlen(str, count);
1660         alloc(len);
1661         _lStr_ncpy( pchunk->buf16, str, len );
1662         pchunk->len = len;
1663     }
1664 }
1665 
lString16(const lString16 & str,size_type offset,size_type count)1666 lString16::lString16(const lString16 & str, size_type offset, size_type count)
1667 {
1668     if ( count > str.length() - offset )
1669         count = str.length() - offset;
1670     if (count<=0)
1671     {
1672         pchunk = EMPTY_STR_16;
1673         addref();
1674     }
1675     else
1676     {
1677         alloc(count);
1678         _lStr_memcpy( pchunk->buf16, str.pchunk->buf16+offset, count );
1679         pchunk->buf16[count]=0;
1680         pchunk->len = count;
1681     }
1682 }
1683 
assign(const value_type * str)1684 lString16 & lString16::assign(const value_type * str)
1685 {
1686     if (!str || !(*str))
1687     {
1688         clear();
1689     }
1690     else
1691     {
1692         size_type len = _lStr_len(str);
1693         if (refCount()==1)
1694         {
1695             if (pchunk->size<=len)
1696             {
1697                 // resize is necessary
1698                 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1699                 pchunk->size = len+1;
1700             }
1701         }
1702         else
1703         {
1704             release();
1705             alloc(len);
1706         }
1707         _lStr_cpy( pchunk->buf16, str );
1708         pchunk->len = len;
1709     }
1710     return *this;
1711 }
1712 
assign(const lChar8 * str)1713 lString16 & lString16::assign(const lChar8 * str)
1714 {
1715     if (!str || !(*str))
1716     {
1717         clear();
1718     }
1719     else
1720     {
1721         size_type len = _lStr_len(str);
1722         if (refCount()==1)
1723         {
1724             if (pchunk->size<=len)
1725             {
1726                 // resize is necessary
1727                 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1728                 pchunk->size = len+1;
1729             }
1730         }
1731         else
1732         {
1733             release();
1734             alloc(len);
1735         }
1736         _lStr_cpy( pchunk->buf16, str );
1737         pchunk->len = len;
1738     }
1739     return *this;
1740 }
1741 
assign(const value_type * str,size_type count)1742 lString16 & lString16::assign(const value_type * str, size_type count)
1743 {
1744     if ( !str || !(*str) || count<=0 )
1745     {
1746         clear();
1747     }
1748     else
1749     {
1750         size_type len = _lStr_nlen(str, count);
1751         if (refCount()==1)
1752         {
1753             if (pchunk->size<=len)
1754             {
1755                 // resize is necessary
1756                 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1757                 pchunk->size = len+1;
1758             }
1759         }
1760         else
1761         {
1762             release();
1763             alloc(len);
1764         }
1765         _lStr_ncpy( pchunk->buf16, str, count );
1766         pchunk->len = len;
1767     }
1768     return *this;
1769 }
1770 
assign(const lChar8 * str,size_type count)1771 lString16 & lString16::assign(const lChar8 * str, size_type count)
1772 {
1773     if ( !str || !(*str) || count<=0 )
1774     {
1775         clear();
1776     }
1777     else
1778     {
1779         size_type len = _lStr_nlen(str, count);
1780         if (refCount()==1)
1781         {
1782             if (pchunk->size<=len)
1783             {
1784                 // resize is necessary
1785                 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1786                 pchunk->size = len+1;
1787             }
1788         }
1789         else
1790         {
1791             release();
1792             alloc(len);
1793         }
1794         _lStr_ncpy( pchunk->buf16, str, count );
1795         pchunk->len = len;
1796     }
1797     return *this;
1798 }
1799 
assign(const lString16 & str,size_type offset,size_type count)1800 lString16 & lString16::assign(const lString16 & str, size_type offset, size_type count)
1801 {
1802     if ( count > str.length() - offset )
1803         count = str.length() - offset;
1804     if (count<=0)
1805     {
1806         clear();
1807     }
1808     else
1809     {
1810         if (pchunk==str.pchunk)
1811         {
1812             if (&str != this)
1813             {
1814                 release();
1815                 alloc(count);
1816             }
1817             if (offset>0)
1818             {
1819                 _lStr_memcpy( pchunk->buf16, str.pchunk->buf16+offset, count );
1820             }
1821             pchunk->buf16[count]=0;
1822         }
1823         else
1824         {
1825             if (refCount()==1)
1826             {
1827                 if (pchunk->size<=count)
1828                 {
1829                     // resize is necessary
1830                     pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(count+1) );
1831                     pchunk->size = count+1;
1832                 }
1833             }
1834             else
1835             {
1836                 release();
1837                 alloc(count);
1838             }
1839             _lStr_memcpy( pchunk->buf16, str.pchunk->buf16+offset, count );
1840             pchunk->buf16[count]=0;
1841         }
1842         pchunk->len = count;
1843     }
1844     return *this;
1845 }
1846 
erase(size_type offset,size_type count)1847 lString16 & lString16::erase(size_type offset, size_type count)
1848 {
1849     if ( count > length() - offset )
1850         count = length() - offset;
1851     if (count<=0)
1852     {
1853         clear();
1854     }
1855     else
1856     {
1857         size_type newlen = length()-count;
1858         if (refCount()==1)
1859         {
1860             _lStr_memcpy( pchunk->buf16+offset, pchunk->buf16+offset+count, newlen-offset+1 );
1861         }
1862         else
1863         {
1864             lstring_chunk_t * poldchunk = pchunk;
1865             release();
1866             alloc( newlen );
1867             _lStr_memcpy( pchunk->buf16, poldchunk->buf16, offset );
1868             _lStr_memcpy( pchunk->buf16+offset, poldchunk->buf16+offset+count, newlen-offset+1 );
1869         }
1870         pchunk->len = newlen;
1871         pchunk->buf16[newlen]=0;
1872     }
1873     return *this;
1874 }
1875 
reserve(size_type n)1876 void lString16::reserve(size_type n)
1877 {
1878     if (refCount()==1)
1879     {
1880         if (pchunk->size < n)
1881         {
1882             pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(n+1) );
1883             pchunk->size = n;
1884         }
1885     }
1886     else
1887     {
1888         lstring_chunk_t * poldchunk = pchunk;
1889         release();
1890         alloc( n );
1891         _lStr_memcpy( pchunk->buf16, poldchunk->buf16, poldchunk->len+1 );
1892         pchunk->len = poldchunk->len;
1893     }
1894 }
1895 
lock(size_type newsize)1896 void lString16::lock( size_type newsize )
1897 {
1898     if (refCount()>1)
1899     {
1900         lstring_chunk_t * poldchunk = pchunk;
1901         release();
1902         alloc( newsize );
1903         size_type len = newsize;
1904         if (len>poldchunk->len)
1905             len = poldchunk->len;
1906         _lStr_memcpy( pchunk->buf16, poldchunk->buf16, len );
1907         pchunk->buf16[len]=0;
1908         pchunk->len = len;
1909     }
1910 }
1911 
1912 // lock string, allocate buffer and reset length to 0
reset(size_type size)1913 void lString16::reset( size_type size )
1914 {
1915     if (refCount()>1 || pchunk->size<size)
1916     {
1917         release();
1918         alloc( size );
1919     }
1920     pchunk->buf16[0] = 0;
1921     pchunk->len = 0;
1922 }
1923 
resize(size_type n,value_type e)1924 void lString16::resize(size_type n, value_type e)
1925 {
1926     lock( n );
1927     if (n>=pchunk->size)
1928     {
1929         pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(n+1) );
1930         pchunk->size = n;
1931     }
1932     // fill with data if expanded
1933     for (size_type i=pchunk->len; i<n; i++)
1934         pchunk->buf16[i] = e;
1935     pchunk->buf16[pchunk->len] = 0;
1936 }
1937 
append(const value_type * str)1938 lString16 & lString16::append(const value_type * str)
1939 {
1940     size_type len = _lStr_len(str);
1941     reserve( pchunk->len+len );
1942     _lStr_memcpy(pchunk->buf16 + pchunk->len, str, len+1);
1943     pchunk->len += len;
1944     return *this;
1945 }
1946 
append(const value_type * str,size_type count)1947 lString16 & lString16::append(const value_type * str, size_type count)
1948 {
1949     reserve(pchunk->len + count);
1950     _lStr_ncpy(pchunk->buf16 + pchunk->len, str, count);
1951     pchunk->len += count;
1952     return *this;
1953 }
1954 
append(const lChar8 * str)1955 lString16 & lString16::append(const lChar8 * str)
1956 {
1957     size_type len = _lStr_len(str);
1958     reserve( pchunk->len+len );
1959     _lStr_ncpy(pchunk->buf16 + pchunk->len, str, len + 1);
1960     pchunk->len += len;
1961     return *this;
1962 }
1963 
append(const lChar8 * str,size_type count)1964 lString16 & lString16::append(const lChar8 * str, size_type count)
1965 {
1966     reserve(pchunk->len + count);
1967     _lStr_ncpy(pchunk->buf16 + pchunk->len, str, count);
1968     pchunk->len += count;
1969     return *this;
1970 }
1971 
append(const lString16 & str)1972 lString16 & lString16::append(const lString16 & str)
1973 {
1974     size_type len2 = pchunk->len + str.pchunk->len;
1975     reserve( len2 );
1976     _lStr_memcpy( pchunk->buf16+pchunk->len, str.pchunk->buf16, str.pchunk->len+1 );
1977     pchunk->len = len2;
1978     return *this;
1979 }
1980 
append(const lString16 & str,size_type offset,size_type count)1981 lString16 & lString16::append(const lString16 & str, size_type offset, size_type count)
1982 {
1983     if ( str.pchunk->len>offset )
1984     {
1985         if ( offset + count > str.pchunk->len )
1986             count = str.pchunk->len - offset;
1987         reserve( pchunk->len+count );
1988         _lStr_ncpy(pchunk->buf16 + pchunk->len, str.pchunk->buf16 + offset, count);
1989         pchunk->len += count;
1990         pchunk->buf16[pchunk->len] = 0;
1991     }
1992     return *this;
1993 }
1994 
append(size_type count,value_type ch)1995 lString16 & lString16::append(size_type count, value_type ch)
1996 {
1997     reserve( pchunk->len+count );
1998     _lStr_memset(pchunk->buf16+pchunk->len, ch, count);
1999     pchunk->len += count;
2000     pchunk->buf16[pchunk->len] = 0;
2001     return *this;
2002 }
2003 
insert(size_type p0,const value_type * str)2004 lString16 & lString16::insert(size_type p0, const value_type * str)
2005 {
2006     if (p0>pchunk->len)
2007         p0 = pchunk->len;
2008     int count = lStr_len(str);
2009     reserve( pchunk->len+count );
2010     for (size_type i=pchunk->len+count; i>p0; i--)
2011         pchunk->buf16[i] = pchunk->buf16[i-1];
2012     _lStr_memcpy(pchunk->buf16 + p0, str, count);
2013     pchunk->len += count;
2014     pchunk->buf16[pchunk->len] = 0;
2015     return *this;
2016 }
2017 
insert(size_type p0,const value_type * str,size_type count)2018 lString16 & lString16::insert(size_type p0, const value_type * str, size_type count)
2019 {
2020     if (p0>pchunk->len)
2021         p0 = pchunk->len;
2022     reserve( pchunk->len+count );
2023     for (size_type i=pchunk->len+count; i>p0; i--)
2024         pchunk->buf16[i] = pchunk->buf16[i-1];
2025     _lStr_memcpy(pchunk->buf16 + p0, str, count);
2026     pchunk->len += count;
2027     pchunk->buf16[pchunk->len] = 0;
2028     return *this;
2029 }
2030 
insert(size_type p0,size_type count,value_type ch)2031 lString16 & lString16::insert(size_type p0, size_type count, value_type ch)
2032 {
2033     if (p0>pchunk->len)
2034         p0 = pchunk->len;
2035     reserve( pchunk->len+count );
2036     for (size_type i=pchunk->len+count; i>p0; i--)
2037         pchunk->buf16[i] = pchunk->buf16[i-1];
2038     _lStr_memset(pchunk->buf16+p0, ch, count);
2039     pchunk->len += count;
2040     pchunk->buf16[pchunk->len] = 0;
2041     return *this;
2042 }
2043 
insert(size_type p0,const lString16 & str)2044 lString16 & lString16::insert(size_type p0, const lString16 & str)
2045 {
2046     if (p0>pchunk->len)
2047         p0 = pchunk->len;
2048     int count = str.length();
2049     reserve( pchunk->len+count );
2050     for (size_type i=pchunk->len+count; i>p0; i--)
2051         pchunk->buf16[i] = pchunk->buf16[i-1];
2052     _lStr_memcpy(pchunk->buf16 + p0, str.c_str(), count);
2053     pchunk->len += count;
2054     pchunk->buf16[pchunk->len] = 0;
2055     return *this;
2056 }
2057 
substr(size_type pos,size_type n) const2058 lString16 lString16::substr(size_type pos, size_type n) const
2059 {
2060     if (pos>=length())
2061         return lString16::empty_str;
2062     if (pos+n>length())
2063         n = length() - pos;
2064     return lString16( pchunk->buf16 + pos, n );
2065 }
2066 
pack()2067 lString16 & lString16::pack()
2068 {
2069     if (pchunk->len + 4 < pchunk->size )
2070     {
2071         if (refCount()>1)
2072         {
2073             lock(pchunk->len);
2074         }
2075         else
2076         {
2077             pchunk->buf16 = cr_realloc( pchunk->buf16, pchunk->len + 1 );
2078             pchunk->size = pchunk->len;
2079         }
2080     }
2081     return *this;
2082 }
2083 
2084 /// trims non alpha at beginning and end of string
trimNonAlpha()2085 lString16 & lString16::trimNonAlpha()
2086 {
2087     int firstns;
2088     for (firstns = 0; firstns<pchunk->len &&
2089         !isAlNum(pchunk->buf16[firstns]); ++firstns)
2090         ;
2091     if (firstns >= pchunk->len)
2092     {
2093         clear();
2094         return *this;
2095     }
2096     int lastns;
2097     for (lastns = pchunk->len-1; lastns>0 &&
2098         !isAlNum(pchunk->buf16[lastns]); --lastns)
2099         ;
2100     int newlen = lastns-firstns+1;
2101     if (newlen == pchunk->len)
2102         return *this;
2103     if (refCount()==1)
2104     {
2105         if (firstns>0)
2106             lStr_memcpy( pchunk->buf16, pchunk->buf16 + firstns, newlen );
2107         pchunk->buf16[newlen] = 0;
2108         pchunk->len = newlen;
2109     }
2110     else
2111     {
2112         lstring_chunk_t * poldchunk = pchunk;
2113         release();
2114         alloc( newlen );
2115         _lStr_memcpy( pchunk->buf16, poldchunk->buf16+firstns, newlen );
2116         pchunk->buf16[newlen] = 0;
2117         pchunk->len = newlen;
2118     }
2119     return *this;
2120 }
2121 
trim()2122 lString16 & lString16::trim()
2123 {
2124     //
2125     int firstns;
2126     for (firstns = 0; firstns<pchunk->len &&
2127         (pchunk->buf16[firstns]==' ' || pchunk->buf16[firstns]=='\t'); ++firstns)
2128         ;
2129     if (firstns >= pchunk->len)
2130     {
2131         clear();
2132         return *this;
2133     }
2134     int lastns;
2135     for (lastns = pchunk->len-1; lastns>0 &&
2136         (pchunk->buf16[lastns]==' ' || pchunk->buf16[lastns]=='\t'); --lastns)
2137         ;
2138     int newlen = lastns-firstns+1;
2139     if (newlen == pchunk->len)
2140         return *this;
2141     if (refCount()==1)
2142     {
2143         if (firstns>0)
2144             lStr_memcpy( pchunk->buf16, pchunk->buf16+firstns, newlen );
2145         pchunk->buf16[newlen] = 0;
2146         pchunk->len = newlen;
2147     }
2148     else
2149     {
2150         lstring_chunk_t * poldchunk = pchunk;
2151         release();
2152         alloc( newlen );
2153         _lStr_memcpy( pchunk->buf16, poldchunk->buf16+firstns, newlen );
2154         pchunk->buf16[newlen] = 0;
2155         pchunk->len = newlen;
2156     }
2157     return *this;
2158 }
2159 
atoi() const2160 int lString16::atoi() const
2161 {
2162     int n = 0;
2163     atoi(n);
2164     return n;
2165 }
2166 
atoi(int & n) const2167 bool lString16::atoi( int &n ) const
2168 {
2169     n = 0;
2170     int sgn = 1;
2171     const lChar16 * s = c_str();
2172     while (*s == ' ' || *s == '\t')
2173         s++;
2174     if ( s[0]=='0' && s[1]=='x') {
2175         s+=2;
2176         for (;*s;) {
2177             int d = hexDigit(*s++);
2178             if ( d>=0 )
2179                 n = (n<<4) | d;
2180         }
2181         return true;
2182     }
2183     if (*s == '-')
2184     {
2185         sgn = -1;
2186         s++;
2187     }
2188     else if (*s == '+')
2189     {
2190         s++;
2191     }
2192     if ( !(*s>='0' && *s<='9') )
2193         return false;
2194     while (*s>='0' && *s<='9')
2195     {
2196         n = n * 10 + ( (*s++)-'0' );
2197     }
2198     if ( sgn<0 )
2199         n = -n;
2200     return *s=='\0' || *s==' ' || *s=='\t';
2201 }
2202 
atoi(lInt64 & n) const2203 bool lString16::atoi( lInt64 &n ) const
2204 {
2205     int sgn = 1;
2206     const lChar16 * s = c_str();
2207     while (*s == ' ' || *s == '\t')
2208         s++;
2209     if (*s == '-')
2210     {
2211         sgn = -1;
2212         s++;
2213     }
2214     else if (*s == '+')
2215     {
2216         s++;
2217     }
2218     if ( !(*s>='0' && *s<='9') )
2219         return false;
2220     while (*s>='0' && *s<='9')
2221     {
2222         n = n * 10 + ( (*s++)-'0' );
2223     }
2224     if ( sgn<0 )
2225         n = -n;
2226     return *s=='\0' || *s==' ' || *s=='\t';
2227 }
2228 
getHash() const2229 lUInt32 lString16::getHash() const
2230 {
2231     lUInt32 res = 0;
2232     for (lInt32 i=0; i<pchunk->len; i++)
2233         res = res * STRING_HASH_MULT + pchunk->buf16[i];
2234     return res;
2235 }
2236 
calcStringHash(const lChar16 * s)2237 lUInt32 calcStringHash( const lChar16 * s )
2238 {
2239     lUInt32 a = 2166136261u;
2240     while (*s)
2241     {
2242         a = a * 16777619 ^ (*s++);
2243     }
2244     return a;
2245 }
2246 
2247 
2248 const lString16 lString16::empty_str;
2249 
2250 
2251 ////////////////////////////////////////////////////////////////////////////
2252 // lString8
2253 ////////////////////////////////////////////////////////////////////////////
2254 
free()2255 void lString8::free()
2256 {
2257     if ( pchunk==EMPTY_STR_8 )
2258         return;
2259     ::free(pchunk->buf8);
2260 #if (LDOM_USE_OWN_MEM_MAN == 1)
2261     for (int i=slices_count-1; i>=0; --i)
2262     {
2263         if (slices[i]->free_chunk(pchunk))
2264             return;
2265     }
2266     crFatalError(); // wrong pointer!!!
2267 #else
2268     ::free(pchunk);
2269 #endif
2270 }
2271 
alloc(int sz)2272 void lString8::alloc(int sz)
2273 {
2274 #if (LDOM_USE_OWN_MEM_MAN == 1)
2275     pchunk = lstring_chunk_t::alloc();
2276 #else
2277     pchunk = (lstring_chunk_t*)::malloc(sizeof(lstring_chunk_t));
2278 #endif
2279     pchunk->buf8 = (lChar8*) ::malloc( sizeof(lChar8) * (sz+1) );
2280     assert( pchunk->buf8!=NULL );
2281     pchunk->size = sz;
2282     pchunk->refCount = 1;
2283 }
2284 
lString8(const lChar8 * str)2285 lString8::lString8(const lChar8 * str)
2286 {
2287     if (!str || !(*str))
2288     {
2289         pchunk = EMPTY_STR_8;
2290         addref();
2291         return;
2292     }
2293     size_type len = _lStr_len(str);
2294     alloc( len );
2295     pchunk->len = len;
2296     _lStr_cpy( pchunk->buf8, str );
2297 }
2298 
lString8(const lChar32 * str)2299 lString8::lString8(const lChar32 * str)
2300 {
2301     if (!str || !(*str))
2302     {
2303         pchunk = EMPTY_STR_8;
2304         addref();
2305         return;
2306     }
2307     size_type len = _lStr_len(str);
2308     alloc( len );
2309     pchunk->len = len;
2310     _lStr_cpy( pchunk->buf8, str );
2311 }
2312 
lString8(const value_type * str,size_type count)2313 lString8::lString8(const value_type * str, size_type count)
2314 {
2315     if ( !str || !(*str) || count<=0 )
2316     {
2317         pchunk = EMPTY_STR_8; addref();
2318     }
2319     else
2320     {
2321         size_type len = _lStr_nlen(str, count);
2322         alloc(len);
2323         _lStr_ncpy( pchunk->buf8, str, len );
2324         pchunk->len = len;
2325     }
2326 }
2327 
lString8(const lString8 & str,size_type offset,size_type count)2328 lString8::lString8(const lString8 & str, size_type offset, size_type count)
2329 {
2330     if ( count > str.length() - offset )
2331         count = str.length() - offset;
2332     if (count<=0)
2333     {
2334         pchunk = EMPTY_STR_8; addref();
2335     }
2336     else
2337     {
2338         alloc(count);
2339         _lStr_memcpy( pchunk->buf8, str.pchunk->buf8+offset, count );
2340         pchunk->buf8[count]=0;
2341         pchunk->len = count;
2342     }
2343 }
2344 
assign(const lChar8 * str)2345 lString8 & lString8::assign(const lChar8 * str)
2346 {
2347     if (!str || !(*str))
2348     {
2349         clear();
2350     }
2351     else
2352     {
2353         size_type len = _lStr_len(str);
2354         if (refCount()==1)
2355         {
2356             if (pchunk->size<=len)
2357             {
2358                 // resize is necessary
2359                 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(len+1) );
2360                 pchunk->size = len+1;
2361             }
2362         }
2363         else
2364         {
2365             release();
2366             alloc(len);
2367         }
2368         _lStr_cpy( pchunk->buf8, str );
2369         pchunk->len = len;
2370     }
2371     return *this;
2372 }
2373 
assign(const lChar8 * str,size_type count)2374 lString8 & lString8::assign(const lChar8 * str, size_type count)
2375 {
2376     if ( !str || !(*str) || count<=0 )
2377     {
2378         clear();
2379     }
2380     else
2381     {
2382         size_type len = _lStr_nlen(str, count);
2383         if (refCount()==1)
2384         {
2385             if (pchunk->size<=len)
2386             {
2387                 // resize is necessary
2388                 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(len+1) );
2389                 pchunk->size = len+1;
2390             }
2391         }
2392         else
2393         {
2394             release();
2395             alloc(len);
2396         }
2397         _lStr_ncpy( pchunk->buf8, str, count );
2398         pchunk->len = len;
2399     }
2400     return *this;
2401 }
2402 
assign(const lString8 & str,size_type offset,size_type count)2403 lString8 & lString8::assign(const lString8 & str, size_type offset, size_type count)
2404 {
2405     if ( count > str.length() - offset )
2406         count = str.length() - offset;
2407     if (count<=0)
2408     {
2409         clear();
2410     }
2411     else
2412     {
2413         if (pchunk==str.pchunk)
2414         {
2415             if (&str != this)
2416             {
2417                 release();
2418                 alloc(count);
2419             }
2420             if (offset>0)
2421             {
2422                 _lStr_memcpy( pchunk->buf8, str.pchunk->buf8+offset, count );
2423             }
2424             pchunk->buf8[count]=0;
2425         }
2426         else
2427         {
2428             if (refCount()==1)
2429             {
2430                 if (pchunk->size<=count)
2431                 {
2432                     // resize is necessary
2433                     pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(count+1) );
2434                     pchunk->size = count+1;
2435                 }
2436             }
2437             else
2438             {
2439                 release();
2440                 alloc(count);
2441             }
2442             _lStr_memcpy( pchunk->buf8, str.pchunk->buf8+offset, count );
2443             pchunk->buf8[count]=0;
2444         }
2445         pchunk->len = count;
2446     }
2447     return *this;
2448 }
2449 
erase(size_type offset,size_type count)2450 lString8 & lString8::erase(size_type offset, size_type count)
2451 {
2452     if ( count > length() - offset )
2453         count = length() - offset;
2454     if (count<=0)
2455     {
2456         clear();
2457     }
2458     else
2459     {
2460         size_type newlen = length()-count;
2461         if (refCount()==1)
2462         {
2463             _lStr_memcpy( pchunk->buf8+offset, pchunk->buf8+offset+count, newlen-offset+1 );
2464         }
2465         else
2466         {
2467             lstring_chunk_t * poldchunk = pchunk;
2468             release();
2469             alloc( newlen );
2470             _lStr_memcpy( pchunk->buf8, poldchunk->buf8, offset );
2471             _lStr_memcpy( pchunk->buf8+offset, poldchunk->buf8+offset+count, newlen-offset+1 );
2472         }
2473         pchunk->len = newlen;
2474         pchunk->buf8[newlen]=0;
2475     }
2476     return *this;
2477 }
2478 
reserve(size_type n)2479 void lString8::reserve(size_type n)
2480 {
2481     if (refCount()==1)
2482     {
2483         if (pchunk->size < n)
2484         {
2485             pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(n+1) );
2486             pchunk->size = n;
2487         }
2488     }
2489     else
2490     {
2491         lstring_chunk_t * poldchunk = pchunk;
2492         release();
2493         alloc( n );
2494         _lStr_memcpy( pchunk->buf8, poldchunk->buf8, poldchunk->len+1 );
2495         pchunk->len = poldchunk->len;
2496     }
2497 }
2498 
lock(size_type newsize)2499 void lString8::lock( size_type newsize )
2500 {
2501     if (refCount()>1)
2502     {
2503         lstring_chunk_t * poldchunk = pchunk;
2504         release();
2505         alloc( newsize );
2506         size_type len = newsize;
2507         if (len>poldchunk->len)
2508             len = poldchunk->len;
2509         _lStr_memcpy( pchunk->buf8, poldchunk->buf8, len );
2510         pchunk->buf8[len]=0;
2511         pchunk->len = len;
2512     }
2513 }
2514 
2515 // lock string, allocate buffer and reset length to 0
reset(size_type size)2516 void lString8::reset( size_type size )
2517 {
2518     if (refCount()>1 || pchunk->size<size)
2519     {
2520         release();
2521         alloc( size );
2522     }
2523     pchunk->buf8[0] = 0;
2524     pchunk->len = 0;
2525 }
2526 
resize(size_type n,lChar8 e)2527 void lString8::resize(size_type n, lChar8 e)
2528 {
2529     lock( n );
2530     if (n>=pchunk->size)
2531     {
2532         pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(n+1) );
2533         pchunk->size = n;
2534     }
2535     // fill with data if expanded
2536     for (size_type i=pchunk->len; i<n; i++)
2537         pchunk->buf8[i] = e;
2538     pchunk->buf8[pchunk->len] = 0;
2539 }
2540 
append(const lChar8 * str)2541 lString8 & lString8::append(const lChar8 * str)
2542 {
2543     size_type len = _lStr_len(str);
2544     reserve( pchunk->len+len );
2545     _lStr_memcpy(pchunk->buf8+pchunk->len, str, len+1);
2546     pchunk->len += len;
2547     return *this;
2548 }
2549 
appendDecimal(lInt64 n)2550 lString8 & lString8::appendDecimal(lInt64 n)
2551 {
2552     lChar8 buf[24];
2553     int i=0;
2554     int negative = 0;
2555     if (n==0)
2556         return append(1, '0');
2557     else if (n<0)
2558     {
2559         negative = 1;
2560         n = -n;
2561     }
2562     for ( ; n; n/=10 )
2563     {
2564         buf[i++] = '0' + (n % 10);
2565     }
2566     reserve(length() + i + negative);
2567     if (negative)
2568         append(1, '-');
2569     for (int j=i-1; j>=0; j--)
2570         append(1, buf[j]);
2571     return *this;
2572 }
2573 
appendHex(lUInt64 n)2574 lString8 & lString8::appendHex(lUInt64 n)
2575 {
2576     if (n == 0)
2577         return append(1, '0');
2578     reserve(length() + 16);
2579     bool foundNz = false;
2580     for (int i=0; i<16; i++) {
2581         int digit = (n >> 60) & 0x0F;
2582         if (digit)
2583             foundNz = true;
2584         if (foundNz)
2585             append(1, (lChar8)toHexDigit(digit));
2586         n <<= 4;
2587     }
2588     return *this;
2589 }
2590 
appendDecimal(lInt64 n)2591 lString16 & lString16::appendDecimal(lInt64 n)
2592 {
2593     lChar16 buf[24];
2594     int i=0;
2595     int negative = 0;
2596     if (n==0)
2597         return append(1, '0');
2598     else if (n<0)
2599     {
2600         negative = 1;
2601         n = -n;
2602     }
2603     for ( ; n; n/=10 )
2604     {
2605         buf[i++] = '0' + (n % 10);
2606     }
2607     reserve(length() + i + negative);
2608     if (negative)
2609         append(1, '-');
2610     for (int j=i-1; j>=0; j--)
2611         append(1, buf[j]);
2612     return *this;
2613 }
2614 
appendHex(lUInt64 n)2615 lString16 & lString16::appendHex(lUInt64 n)
2616 {
2617     if (n == 0)
2618         return append(1, '0');
2619     reserve(length() + 16);
2620     bool foundNz = false;
2621     for (int i=0; i<16; i++) {
2622         int digit = (n >> 60) & 0x0F;
2623         if (digit)
2624             foundNz = true;
2625         if (foundNz)
2626             append(1, toHexDigit(digit));
2627         n <<= 4;
2628     }
2629     return *this;
2630 }
2631 
appendDecimal(lInt64 n)2632 lString32 & lString32::appendDecimal(lInt64 n)
2633 {
2634     lChar32 buf[24];
2635     int i=0;
2636     int negative = 0;
2637     if (n==0)
2638         return append(1, '0');
2639     else if (n<0)
2640     {
2641         negative = 1;
2642         n = -n;
2643     }
2644     for ( ; n; n/=10 )
2645     {
2646         buf[i++] = '0' + (n % 10);
2647     }
2648     reserve(length() + i + negative);
2649     if (negative)
2650         append(1, '-');
2651     for (int j=i-1; j>=0; j--)
2652         append(1, buf[j]);
2653     return *this;
2654 }
2655 
appendHex(lUInt64 n)2656 lString32 & lString32::appendHex(lUInt64 n)
2657 {
2658     if (n == 0)
2659         return append(1, '0');
2660     reserve(length() + 16);
2661     bool foundNz = false;
2662     for (int i=0; i<16; i++) {
2663         int digit = (n >> 60) & 0x0F;
2664         if (digit)
2665             foundNz = true;
2666         if (foundNz)
2667             append(1, toHexDigit(digit));
2668         n <<= 4;
2669     }
2670     return *this;
2671 }
2672 
append(const lChar8 * str,size_type count)2673 lString8 & lString8::append(const lChar8 * str, size_type count)
2674 {
2675     size_type len = _lStr_nlen(str, count);
2676     reserve( pchunk->len+len );
2677     _lStr_ncpy(pchunk->buf8+pchunk->len, str, len);
2678     pchunk->len += len;
2679     return *this;
2680 }
2681 
append(const lString8 & str)2682 lString8 & lString8::append(const lString8 & str)
2683 {
2684     size_type len2 = pchunk->len + str.pchunk->len;
2685     reserve( len2 );
2686     _lStr_memcpy( pchunk->buf8+pchunk->len, str.pchunk->buf8, str.pchunk->len+1 );
2687     pchunk->len = len2;
2688     return *this;
2689 }
2690 
append(const lString8 & str,size_type offset,size_type count)2691 lString8 & lString8::append(const lString8 & str, size_type offset, size_type count)
2692 {
2693     if ( str.pchunk->len>offset )
2694     {
2695         if ( offset + count > str.pchunk->len )
2696             count = str.pchunk->len - offset;
2697         reserve( pchunk->len+count );
2698         _lStr_ncpy(pchunk->buf8 + pchunk->len, str.pchunk->buf8 + offset, count);
2699         pchunk->len += count;
2700         pchunk->buf8[pchunk->len] = 0;
2701     }
2702     return *this;
2703 }
2704 
append(size_type count,lChar8 ch)2705 lString8 & lString8::append(size_type count, lChar8 ch)
2706 {
2707     reserve( pchunk->len+count );
2708     memset( pchunk->buf8+pchunk->len, ch, count );
2709     //_lStr_memset(pchunk->buf8+pchunk->len, ch, count);
2710     pchunk->len += count;
2711     pchunk->buf8[pchunk->len] = 0;
2712     return *this;
2713 }
2714 
insert(size_type p0,size_type count,lChar8 ch)2715 lString8 & lString8::insert(size_type p0, size_type count, lChar8 ch)
2716 {
2717     if (p0>pchunk->len)
2718         p0 = pchunk->len;
2719     reserve( pchunk->len+count );
2720     for (size_type i=pchunk->len+count; i>p0; i--)
2721         pchunk->buf8[i] = pchunk->buf8[i-1];
2722     //_lStr_memset(pchunk->buf8+p0, ch, count);
2723     memset(pchunk->buf8+p0, ch, count);
2724     pchunk->len += count;
2725     pchunk->buf8[pchunk->len] = 0;
2726     return *this;
2727 }
2728 
substr(size_type pos,size_type n) const2729 lString8 lString8::substr(size_type pos, size_type n) const
2730 {
2731     if (pos>=length())
2732         return lString8::empty_str;
2733     if (pos+n>length())
2734         n = length() - pos;
2735     return lString8( pchunk->buf8+pos, n );
2736 }
2737 
pos(lChar8 ch) const2738 int lString8::pos(lChar8 ch) const
2739 {
2740     for (int i = 0; i < length(); i++)
2741     {
2742         if (pchunk->buf8[i] == ch)
2743         {
2744             return i;
2745         }
2746     }
2747     return -1;
2748 }
2749 
pos(lChar8 ch,int start) const2750 int lString8::pos(lChar8 ch, int start) const
2751 {
2752     if (length() - start < 1)
2753         return -1;
2754     for (int i = start; i < length(); i++)
2755     {
2756         if (pchunk->buf8[i] == ch)
2757         {
2758             return i;
2759         }
2760     }
2761     return -1;
2762 }
2763 
pos(const lString8 & subStr) const2764 int lString8::pos(const lString8 & subStr) const
2765 {
2766     if (subStr.length()>length())
2767         return -1;
2768     int l = subStr.length();
2769     int dl = length() - l;
2770     for (int i=0; i<=dl; i++)
2771     {
2772         int flg = 1;
2773         for (int j=0; j<l; j++)
2774             if (pchunk->buf8[i+j]!=subStr.pchunk->buf8[j])
2775             {
2776                 flg = 0;
2777                 break;
2778             }
2779         if (flg)
2780             return i;
2781     }
2782     return -1;
2783 }
2784 
2785 /// find position of substring inside string starting from right, -1 if not found
rpos(const char * subStr) const2786 int lString8::rpos(const char * subStr) const
2787 {
2788     if (!subStr || !subStr[0])
2789         return -1;
2790     int l = lStr_len(subStr);
2791     if (l > length())
2792         return -1;
2793     int dl = length() - l;
2794     for (int i=dl; i>=0; i--)
2795     {
2796         int flg = 1;
2797         for (int j=0; j<l; j++)
2798             if (pchunk->buf8[i+j] != subStr[j])
2799             {
2800                 flg = 0;
2801                 break;
2802             }
2803         if (flg)
2804             return i;
2805     }
2806     return -1;
2807 }
2808 
2809 /// find position of substring inside string, -1 if not found
pos(const char * subStr) const2810 int lString8::pos(const char * subStr) const
2811 {
2812     if (!subStr || !subStr[0])
2813         return -1;
2814     int l = lStr_len(subStr);
2815     if (l > length())
2816         return -1;
2817     int dl = length() - l;
2818     for (int i=0; i<=dl; i++)
2819     {
2820         int flg = 1;
2821         for (int j=0; j<l; j++)
2822             if (pchunk->buf8[i+j] != subStr[j])
2823             {
2824                 flg = 0;
2825                 break;
2826             }
2827         if (flg)
2828             return i;
2829     }
2830     return -1;
2831 }
2832 
pos(const lString8 & subStr,int startPos) const2833 int lString8::pos(const lString8 & subStr, int startPos) const
2834 {
2835     if (subStr.length() > length() - startPos)
2836         return -1;
2837     int l = subStr.length();
2838     int dl = length() - l;
2839     for (int i = startPos; i <= dl; i++) {
2840         int flg = 1;
2841         for (int j=0; j<l; j++)
2842             if (pchunk->buf8[i+j]!=subStr.pchunk->buf8[j])
2843             {
2844                 flg = 0;
2845                 break;
2846             }
2847         if (flg)
2848             return i;
2849     }
2850     return -1;
2851 }
2852 
pos(lChar32 ch) const2853 int lString32::pos(lChar32 ch) const {
2854     for (int i = 0; i < length(); i++)
2855     {
2856         if (pchunk->buf32[i] == ch)
2857         {
2858             return i;
2859         }
2860     }
2861     return -1;
2862 }
2863 
pos(lChar32 ch,int start) const2864 int lString32::pos(lChar32 ch, int start) const
2865 {
2866     if (length() - start < 1)
2867         return -1;
2868     for (int i = start; i < length(); i++)
2869     {
2870         if (pchunk->buf32[i] == ch)
2871         {
2872             return i;
2873         }
2874     }
2875     return -1;
2876 }
2877 
pos(const lString32 & subStr,int startPos) const2878 int lString32::pos(const lString32 & subStr, int startPos) const
2879 {
2880     if (subStr.length() > length() - startPos)
2881         return -1;
2882     int l = subStr.length();
2883     int dl = length() - l;
2884     for (int i = startPos; i <= dl; i++) {
2885         int flg = 1;
2886         for (int j=0; j<l; j++)
2887             if (pchunk->buf32[i+j]!=subStr.pchunk->buf32[j])
2888             {
2889                 flg = 0;
2890                 break;
2891             }
2892         if (flg)
2893             return i;
2894     }
2895     return -1;
2896 }
2897 
2898 /// find position of substring inside string, -1 if not found
pos(const char * subStr,int startPos) const2899 int lString8::pos(const char * subStr, int startPos) const
2900 {
2901     if (!subStr || !subStr[0])
2902         return -1;
2903     int l = lStr_len(subStr);
2904     if (l > length() - startPos)
2905         return -1;
2906     int dl = length() - l;
2907     for (int i = startPos; i <= dl; i++) {
2908         int flg = 1;
2909         for (int j=0; j<l; j++)
2910             if (pchunk->buf8[i+j] != subStr[j])
2911             {
2912                 flg = 0;
2913                 break;
2914             }
2915         if (flg)
2916             return i;
2917     }
2918     return -1;
2919 }
2920 
2921 /// find position of substring inside string, -1 if not found
pos(const lChar32 * subStr,int startPos) const2922 int lString32::pos(const lChar32 * subStr, int startPos) const
2923 {
2924     if (!subStr || !subStr[0])
2925         return -1;
2926     int l = lStr_len(subStr);
2927     if (l > length() - startPos)
2928         return -1;
2929     int dl = length() - l;
2930     for (int i = startPos; i <= dl; i++) {
2931         int flg = 1;
2932         for (int j=0; j<l; j++)
2933             if (pchunk->buf32[i+j] != subStr[j])
2934             {
2935                 flg = 0;
2936                 break;
2937             }
2938         if (flg)
2939             return i;
2940     }
2941     return -1;
2942 }
2943 
2944 /// find position of substring inside string, right to left, return -1 if not found
rpos(lString32 subStr) const2945 int lString32::rpos(lString32 subStr) const
2946 {
2947     if (subStr.length()>length())
2948         return -1;
2949     int l = subStr.length();
2950     int dl = length() - l;
2951     for (int i=dl; i>=0; i++)
2952     {
2953         int flg = 1;
2954         for (int j=0; j<l; j++)
2955             if (pchunk->buf32[i+j]!=subStr.pchunk->buf32[j])
2956             {
2957                 flg = 0;
2958                 break;
2959             }
2960         if (flg)
2961             return i;
2962     }
2963     return -1;
2964 }
2965 
2966 /// find position of substring inside string, -1 if not found
pos(const lChar32 * subStr) const2967 int lString32::pos(const lChar32 * subStr) const
2968 {
2969     if (!subStr)
2970         return -1;
2971     int l = lStr_len(subStr);
2972     if (l > length())
2973         return -1;
2974     int dl = length() - l;
2975     for (int i=0; i <= dl; i++)
2976     {
2977         int flg = 1;
2978         for (int j=0; j<l; j++)
2979             if (pchunk->buf32[i+j] != subStr[j])
2980             {
2981                 flg = 0;
2982                 break;
2983             }
2984         if (flg)
2985             return i;
2986     }
2987     return -1;
2988 }
2989 
2990 /// find position of substring inside string, -1 if not found
pos(const lChar8 * subStr) const2991 int lString32::pos(const lChar8 * subStr) const
2992 {
2993     if (!subStr)
2994         return -1;
2995     int l = lStr_len(subStr);
2996     if (l > length())
2997         return -1;
2998     int dl = length() - l;
2999     for (int i=0; i <= dl; i++)
3000     {
3001         int flg = 1;
3002         for (int j=0; j<l; j++)
3003             if (pchunk->buf32[i+j] != subStr[j])
3004             {
3005                 flg = 0;
3006                 break;
3007             }
3008         if (flg)
3009             return i;
3010     }
3011     return -1;
3012 }
3013 
3014 /// find position of substring inside string, -1 if not found
pos(const lChar8 * subStr,int start) const3015 int lString32::pos(const lChar8 * subStr, int start) const
3016 {
3017     if (!subStr)
3018         return -1;
3019     int l = lStr_len(subStr);
3020     if (l > length() - start)
3021         return -1;
3022     int dl = length() - l;
3023     for (int i = start; i <= dl; i++)
3024     {
3025         int flg = 1;
3026         for (int j=0; j<l; j++)
3027             if (pchunk->buf32[i+j] != subStr[j])
3028             {
3029                 flg = 0;
3030                 break;
3031             }
3032         if (flg)
3033             return i;
3034     }
3035     return -1;
3036 }
3037 
pos(lString32 subStr) const3038 int lString32::pos(lString32 subStr) const
3039 {
3040     if (subStr.length()>length())
3041         return -1;
3042     int l = subStr.length();
3043     int dl = length() - l;
3044     for (int i=0; i<=dl; i++)
3045     {
3046         int flg = 1;
3047         for (int j=0; j<l; j++)
3048             if (pchunk->buf32[i+j]!=subStr.pchunk->buf32[j])
3049             {
3050                 flg = 0;
3051                 break;
3052             }
3053         if (flg)
3054             return i;
3055     }
3056     return -1;
3057 }
3058 
pack()3059 lString8 & lString8::pack()
3060 {
3061     if (pchunk->len + 4 < pchunk->size )
3062     {
3063         if (refCount()>1)
3064         {
3065             lock(pchunk->len);
3066         }
3067         else
3068         {
3069             pchunk->buf8 = cr_realloc( pchunk->buf8, pchunk->len+1 );
3070             pchunk->size = pchunk->len;
3071         }
3072     }
3073     return *this;
3074 }
3075 
trim()3076 lString8 & lString8::trim()
3077 {
3078     //
3079     int firstns;
3080     for (firstns = 0;
3081             firstns < pchunk->len &&
3082             (pchunk->buf8[firstns] == ' ' ||
3083             pchunk->buf8[firstns] == '\t');
3084             ++firstns)
3085         ;
3086     if (firstns >= pchunk->len)
3087     {
3088         clear();
3089         return *this;
3090     }
3091     size_t lastns;
3092     for (lastns = pchunk->len-1;
3093             lastns>0 &&
3094             (pchunk->buf8[lastns]==' ' || pchunk->buf8[lastns]=='\t');
3095             --lastns)
3096         ;
3097     int newlen = (int)(lastns - firstns + 1);
3098     if (newlen == pchunk->len)
3099         return *this;
3100     if (refCount()==1)
3101     {
3102         if (firstns>0)
3103             lStr_memcpy( pchunk->buf8, pchunk->buf8+firstns, newlen );
3104         pchunk->buf8[newlen] = 0;
3105         pchunk->len = newlen;
3106     }
3107     else
3108     {
3109         lstring_chunk_t * poldchunk = pchunk;
3110         release();
3111         alloc( newlen );
3112         _lStr_memcpy( pchunk->buf8, poldchunk->buf8+firstns, newlen );
3113         pchunk->buf8[newlen] = 0;
3114         pchunk->len = newlen;
3115     }
3116     return *this;
3117 }
3118 
atoi() const3119 int lString8::atoi() const
3120 {
3121     int sgn = 1;
3122     int n = 0;
3123     const lChar8 * s = c_str();
3124     while (*s == ' ' || *s == '\t')
3125         s++;
3126     if (*s == '-')
3127     {
3128         sgn = -1;
3129         s++;
3130     }
3131     else if (*s == '+')
3132     {
3133         s++;
3134     }
3135     while (*s>='0' && *s<='9')
3136     {
3137         n = n * 10 + ( (*s)-'0' );
3138         s++;
3139     }
3140     return (sgn>0)?n:-n;
3141 }
3142 
atoi64() const3143 lInt64 lString8::atoi64() const
3144 {
3145     int sgn = 1;
3146     lInt64 n = 0;
3147     const lChar8 * s = c_str();
3148     while (*s == ' ' || *s == '\t')
3149         s++;
3150     if (*s == '-')
3151     {
3152         sgn = -1;
3153         s++;
3154     }
3155     else if (*s == '+')
3156     {
3157         s++;
3158     }
3159     while (*s>='0' && *s<='9')
3160     {
3161         n = n * 10 + ( (*s)-'0' );
3162     }
3163     return (sgn>0) ? n : -n;
3164 }
3165 
3166 // constructs string representation of integer
itoa(int n)3167 lString8 lString8::itoa( int n )
3168 {
3169     lChar8 buf[16];
3170     int i=0;
3171     int negative = 0;
3172     if (n==0)
3173         return cs8("0");
3174     else if (n<0)
3175     {
3176         negative = 1;
3177         n = -n;
3178     }
3179     for ( ; n; n/=10 )
3180     {
3181         buf[i++] = '0' + (n%10);
3182     }
3183     lString8 res;
3184     res.reserve(i+negative);
3185     if (negative)
3186         res.append(1, '-');
3187     for (int j=i-1; j>=0; j--)
3188         res.append(1, buf[j]);
3189     return res;
3190 }
3191 
3192 // constructs string representation of integer
itoa(unsigned int n)3193 lString8 lString8::itoa( unsigned int n )
3194 {
3195     lChar8 buf[16];
3196     int i=0;
3197     if (n==0)
3198         return cs8("0");
3199     for ( ; n; n/=10 )
3200     {
3201         buf[i++] = '0' + (n%10);
3202     }
3203     lString8 res;
3204     res.reserve(i);
3205     for (int j=i-1; j>=0; j--)
3206         res.append(1, buf[j]);
3207     return res;
3208 }
3209 
3210 // constructs string representation of integer
itoa(lInt64 n)3211 lString8 lString8::itoa( lInt64 n )
3212 {
3213     lChar8 buf[32];
3214     int i=0;
3215     int negative = 0;
3216     if (n==0)
3217         return cs8("0");
3218     else if (n<0)
3219     {
3220         negative = 1;
3221         n = -n;
3222     }
3223     for ( ; n; n/=10 )
3224     {
3225         buf[i++] = '0' + (n%10);
3226     }
3227     lString8 res;
3228     res.reserve(i+negative);
3229     if (negative)
3230         res.append(1, '-');
3231     for (int j=i-1; j>=0; j--)
3232         res.append(1, buf[j]);
3233     return res;
3234 }
3235 
3236 // constructs string representation of integer
itoa(int n)3237 lString16 lString16::itoa( int n )
3238 {
3239     return itoa( (lInt64)n );
3240 }
3241 
3242 // constructs string representation of integer
itoa(unsigned int n)3243 lString16 lString16::itoa( unsigned int n )
3244 {
3245     return itoa( (lUInt64) n );
3246 }
3247 
3248 // constructs string representation of integer
itoa(lInt64 n)3249 lString16 lString16::itoa( lInt64 n )
3250 {
3251     lChar16 buf[32];
3252     int i=0;
3253     int negative = 0;
3254     if (n==0)
3255         return lString16("0");
3256     else if (n<0)
3257     {
3258         negative = 1;
3259         n = -n;
3260     }
3261     for ( ; n && i<30; n/=10 )
3262     {
3263         buf[i++] = (lChar16)('0' + (n%10));
3264     }
3265     lString16 res;
3266     res.reserve(i+negative);
3267     if (negative)
3268         res.append(1, L'-');
3269     for (int j=i-1; j>=0; j--)
3270         res.append(1, buf[j]);
3271     return res;
3272 }
3273 
3274 // constructs string representation of integer
itoa(lUInt64 n)3275 lString16 lString16::itoa( lUInt64 n )
3276 {
3277     lChar16 buf[32];
3278     int i=0;
3279     if (n==0)
3280         return lString16("0");
3281     for ( ; n; n/=10 )
3282     {
3283         buf[i++] = (lChar16)('0' + (n%10));
3284     }
3285     lString16 res;
3286     res.reserve(i);
3287     for (int j=i-1; j>=0; j--)
3288         res.append(1, buf[j]);
3289     return res;
3290 }
3291 
3292 // constructs string representation of integer
itoa(int n)3293 lString32 lString32::itoa( int n )
3294 {
3295     return itoa( (lInt64)n );
3296 }
3297 
3298 // constructs string representation of integer
itoa(unsigned int n)3299 lString32 lString32::itoa( unsigned int n )
3300 {
3301     return itoa( (lUInt64) n );
3302 }
3303 
3304 // constructs string representation of integer
itoa(lInt64 n)3305 lString32 lString32::itoa( lInt64 n )
3306 {
3307     lChar32 buf[32];
3308     int i=0;
3309     int negative = 0;
3310     if (n==0)
3311         return cs32("0");
3312     else if (n<0)
3313     {
3314         negative = 1;
3315         n = -n;
3316     }
3317     for ( ; n && i<30; n/=10 )
3318     {
3319         buf[i++] = (lChar32)('0' + (n%10));
3320     }
3321     lString32 res;
3322     res.reserve(i+negative);
3323     if (negative)
3324         res.append(1, U'-');
3325     for (int j=i-1; j>=0; j--)
3326         res.append(1, buf[j]);
3327     return res;
3328 }
3329 
3330 // constructs string representation of integer
itoa(lUInt64 n)3331 lString32 lString32::itoa( lUInt64 n )
3332 {
3333     lChar32 buf[32];
3334     int i=0;
3335     if (n==0)
3336         return cs32("0");
3337     for ( ; n; n/=10 )
3338     {
3339         buf[i++] = (lChar32)('0' + (n%10));
3340     }
3341     lString32 res;
3342     res.reserve(i);
3343     for (int j=i-1; j>=0; j--)
3344         res.append(1, buf[j]);
3345     return res;
3346 }
3347 
lvUnicodeIsAlpha(lChar32 ch)3348 bool lvUnicodeIsAlpha( lChar32 ch )
3349 {
3350     if ( ch<128 ) {
3351         if ( (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') )
3352             return true;
3353     } else if ( ch>=0xC0 && ch<=0x1ef9) {
3354         return true;
3355     }
3356     return false;
3357 }
3358 
uppercase()3359 lString8 & lString8::uppercase()
3360 {
3361     lStr_uppercase( modify(), length() );
3362     return *this;
3363 }
3364 
lowercase()3365 lString8 & lString8::lowercase()
3366 {
3367     lStr_lowercase( modify(), length() );
3368     return *this;
3369 }
3370 
uppercase()3371 lString32 & lString32::uppercase()
3372 {
3373     lStr_uppercase( modify(), length() );
3374     return *this;
3375 }
3376 
lowercase()3377 lString32 & lString32::lowercase()
3378 {
3379     lStr_lowercase( modify(), length() );
3380     return *this;
3381 }
3382 
capitalize()3383 lString32 & lString32::capitalize()
3384 {
3385     lStr_capitalize( modify(), length() );
3386     return *this;
3387 }
3388 
fullWidthChars()3389 lString32 & lString32::fullWidthChars()
3390 {
3391     lStr_fullWidthChars( modify(), length() );
3392     return *this;
3393 }
3394 
lStr_uppercase(lChar8 * str,int len)3395 void lStr_uppercase( lChar8 * str, int len )
3396 {
3397     for ( int i=0; i<len; i++ ) {
3398         lChar32 ch = str[i];
3399         if ( ch>='a' && ch<='z' ) {
3400             str[i] = ch - 0x20;
3401         } else if ( ch>=0xE0 && ch<=0xFF ) {
3402             str[i] = ch - 0x20;
3403         }
3404     }
3405 }
3406 
lStr_lowercase(lChar8 * str,int len)3407 void lStr_lowercase( lChar8 * str, int len )
3408 {
3409     for ( int i=0; i<len; i++ ) {
3410         lChar32 ch = str[i];
3411         if ( ch>='A' && ch<='Z' ) {
3412             str[i] = ch + 0x20;
3413         } else if ( ch>=0xC0 && ch<=0xDF ) {
3414             str[i] = ch + 0x20;
3415         }
3416     }
3417 }
3418 
lStr_uppercase(lChar32 * str,int len)3419 void lStr_uppercase( lChar32 * str, int len )
3420 {
3421     for ( int i=0; i<len; i++ ) {
3422         lChar32 ch = str[i];
3423 #if (USE_UTF8PROC==1)
3424         str[i] = utf8proc_toupper(ch);
3425 #else
3426         if ( ch>='a' && ch<='z' ) {
3427             str[i] = ch - 0x20;
3428         } else if ( ch>=0xE0 && ch<=0xFF ) {
3429             str[i] = ch - 0x20;
3430         } else if ( ch>=0x430 && ch<=0x44F ) {
3431             str[i] = ch - 0x20;
3432         } else if ( ch>=0x3b0 && ch<=0x3cF ) {
3433             str[i] = ch - 0x20;
3434         } else if ( (ch >> 8)==0x1F ) { // greek
3435             lChar32 n = ch & 255;
3436             if (n<0x70) {
3437                 str[i] = ch | 8;
3438             } else if (n<0x80) {
3439 
3440             } else if (n<0xF0) {
3441                 str[i] = ch | 8;
3442             }
3443         }
3444 #endif
3445     }
3446 }
3447 
lStr_lowercase(lChar32 * str,int len)3448 void lStr_lowercase( lChar32 * str, int len )
3449 {
3450     for ( int i=0; i<len; i++ ) {
3451         lChar32 ch = str[i];
3452 #if (USE_UTF8PROC==1)
3453         str[i] = utf8proc_tolower(ch);
3454 #else
3455         if ( ch>='A' && ch<='Z' ) {
3456             str[i] = ch + 0x20;
3457         } else if ( ch>=0xC0 && ch<=0xDF ) {
3458             str[i] = ch + 0x20;
3459         } else if ( ch>=0x410 && ch<=0x42F ) {
3460             str[i] = ch + 0x20;
3461         } else if ( ch>=0x390 && ch<=0x3aF ) {
3462             str[i] = ch + 0x20;
3463         } else if ( (ch >> 8)==0x1F ) { // greek
3464             lChar32 n = ch & 255;
3465             if (n<0x70) {
3466                 str[i] = ch & (~8);
3467             } else if (n<0x80) {
3468 
3469             } else if (n<0xF0) {
3470                 str[i] = ch & (~8);
3471             }
3472         }
3473 #endif
3474     }
3475 }
3476 
lStr_fullWidthChars(lChar32 * str,int len)3477 void lStr_fullWidthChars( lChar32 * str, int len )
3478 {
3479     for ( int i=0; i<len; i++ ) {
3480         lChar32 ch = str[i];
3481         if ( ch>=0x21 && ch<=0x7E ) {
3482             // full-width versions of ascii chars 0x21-0x7E are at 0xFF01-0Xff5E
3483             str[i] = ch + UNICODE_ASCII_FULL_WIDTH_OFFSET;
3484         } else if ( ch==0x20 ) {
3485             str[i] = UNICODE_CJK_IDEOGRAPHIC_SPACE; // full-width space
3486         }
3487     }
3488 }
3489 
lStr_capitalize(lChar32 * str,int len)3490 void lStr_capitalize( lChar32 * str, int len )
3491 {
3492     bool prev_is_word_sep = true; // first char of string will be capitalized
3493     for ( int i=0; i<len; i++ ) {
3494         lChar32 ch = str[i];
3495         if (prev_is_word_sep) {
3496             // as done as in lStr_uppercase()
3497 #if (USE_UTF8PROC==1)
3498             str[i] = utf8proc_toupper(ch);
3499 #else
3500             if ( ch>='a' && ch<='z' ) {
3501                 str[i] = ch - 0x20;
3502             } else if ( ch>=0xE0 && ch<=0xFF ) {
3503                 str[i] = ch - 0x20;
3504             } else if ( ch>=0x430 && ch<=0x44F ) {
3505                 str[i] = ch - 0x20;
3506             } else if ( ch>=0x3b0 && ch<=0x3cF ) {
3507                 str[i] = ch - 0x20;
3508             } else if ( (ch >> 8)==0x1F ) { // greek
3509                 lChar32 n = ch & 255;
3510                 if (n<0x70) {
3511                     str[i] = ch | 8;
3512                 } else if (n<0x80) {
3513 
3514                 } else if (n<0xF0) {
3515                     str[i] = ch | 8;
3516                 }
3517             }
3518 #endif
3519         }
3520         // update prev_is_word_sep for next char
3521         prev_is_word_sep = lStr_isWordSeparator(ch);
3522     }
3523 }
3524 
3525 
TrimDoubleSpaces(lChar32 * buf,int len,bool allowStartSpace,bool allowEndSpace,bool removeEolHyphens)3526 int TrimDoubleSpaces(lChar32 * buf, int len,  bool allowStartSpace, bool allowEndSpace, bool removeEolHyphens)
3527 {
3528     lChar32 * psrc = buf;
3529     lChar32 * pdst = buf;
3530     int state = 0; // 0=beginning, 1=after space, 2=after non-space
3531     while ((len--) > 0) {
3532         lChar32 ch = *psrc++;
3533         if (ch == ' ' || ch == '\t') {
3534             if ( state==2 ) {
3535                 if ( *psrc || allowEndSpace ) // if not last
3536                     *pdst++ = ' ';
3537             } else if ( state==0 && allowStartSpace ) {
3538                 *pdst++ = ' ';
3539             }
3540             state = 1;
3541         } else if ( ch=='\r' || ch=='\n' ) {
3542             if ( state==2 ) {
3543                 if ( removeEolHyphens && pdst>(buf+1) && *(pdst-1)=='-' && lvUnicodeIsAlpha(*(pdst-2)) )
3544                     pdst--; // remove hyphen at end of line
3545                 if ( *psrc || allowEndSpace ) // if not last
3546                     *pdst++ = ' ';
3547             } else if ( state==0 && allowStartSpace ) {
3548                 *pdst++ = ' ';
3549             }
3550             state = 1;
3551         } else {
3552             *pdst++ = ch;
3553             state = 2;
3554         }
3555     }
3556     return (int)(pdst - buf);
3557 }
3558 
trimDoubleSpaces(bool allowStartSpace,bool allowEndSpace,bool removeEolHyphens)3559 lString32 & lString32::trimDoubleSpaces( bool allowStartSpace, bool allowEndSpace, bool removeEolHyphens )
3560 {
3561     if ( empty() )
3562         return *this;
3563     lChar32 * buf = modify();
3564     int len = length();
3565     int nlen = TrimDoubleSpaces(buf, len,  allowStartSpace, allowEndSpace, removeEolHyphens);
3566     if (nlen < len)
3567         limit(nlen);
3568     return *this;
3569 }
3570 
getHash() const3571 lUInt32 lString8::getHash() const
3572 {
3573     lUInt32 res = 0;
3574     for (int i=0; i < pchunk->len; i++)
3575         res = res * STRING_HASH_MULT + pchunk->buf8[i];
3576     return res;
3577 }
3578 
3579 const lString8 lString8::empty_str;
3580 
Utf8CharCount(const lChar8 * str)3581 int Utf8CharCount( const lChar8 * str )
3582 {
3583     int count = 0;
3584     lUInt8 ch;
3585     while ( (ch=*str++) ) {
3586         if ( (ch & 0x80) == 0 ) {
3587         } else if ( (ch & 0xE0) == 0xC0 ) {
3588             if ( !(*str++) )
3589                 break;
3590         } else if ( (ch & 0xF0) == 0xE0 ) {
3591             if ( !(*str++) )
3592                 break;
3593             if ( !(*str++) )
3594                 break;
3595         } else if ( (ch & 0xF8) == 0xF0 ) {
3596             if ( !(*str++) )
3597                 break;
3598             if ( !(*str++) )
3599                 break;
3600             if ( !(*str++) )
3601                 break;
3602         } else {
3603             // In Unicode standard maximum length of UTF-8 sequence is 4 byte!
3604             // invalid first byte in UTF-8 sequence, just leave as is
3605             ;
3606         }
3607         count++;
3608     }
3609     return count;
3610 }
3611 
Utf8CharCount(const lChar8 * str,int len)3612 int Utf8CharCount( const lChar8 * str, int len )
3613 {
3614     if (len == 0)
3615         return 0;
3616     int count = 0;
3617     lUInt8 ch;
3618     const lChar8 * endp = str + len;
3619     while ((ch=*str++)) {
3620         if ( (ch & 0x80) == 0 ) {
3621         } else if ( (ch & 0xE0) == 0xC0 ) {
3622             str++;
3623         } else if ( (ch & 0xF0) == 0xE0 ) {
3624             str+=2;
3625         } else if ( (ch & 0xF8) == 0xF0 ) {
3626             str+=3;
3627         } else {
3628             // invalid first byte of UTF-8 sequence, just leave as is
3629             ;
3630         }
3631         if (str > endp)
3632             break;
3633         count++;
3634     }
3635     return count;
3636 }
3637 
Utf16CharCount(const lChar16 * str)3638 int Utf16CharCount( const lChar16 * str )
3639 {
3640     int count = 0;
3641     lUInt16 ch;
3642     while ( (ch=*str++) ) {
3643         if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
3644         } else if ( ch >= 0xD800 && ch <= 0xDBFF ) {
3645             if ( !(*str++) )
3646                 break;
3647         } else {
3648             // In Unicode standard maximum length of UTF-16 sequence is 2 word!
3649             // invalid first word in UTF-16 sequence, just leave as is
3650             ;
3651         }
3652         count++;
3653     }
3654     return count;
3655 }
3656 
Utf16CharCount(const lChar16 * str,int len)3657 int Utf16CharCount( const lChar16 * str, int len )
3658 {
3659     if (len == 0)
3660         return 0;
3661     int count = 0;
3662     lUInt16 ch;
3663     const lChar16 * endp = str + len;
3664     while ( (ch=*str++) ) {
3665         if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
3666         } else if ( ch >= 0xD800 && ch <= 0xDBFF ) {
3667             str++;
3668         } else {
3669             // invalid first word of UTF-16 sequence, just leave as is
3670             ;
3671         }
3672         if (str > endp)
3673             break;
3674         count++;
3675     }
3676     return count;
3677 }
3678 
Wtf8CharCount(const lChar8 * str)3679 int Wtf8CharCount( const lChar8 * str )
3680 {
3681     int count = 0;
3682     lUInt8 ch;
3683     lUInt32 p;
3684     while ( (ch=*str++) ) {
3685         if ( (ch & 0x80) == 0 ) {
3686         } else if ( (ch & 0xE0) == 0xC0 ) {
3687             if ( !(*str++) )
3688                 break;
3689         } else if ( (ch & 0xF0) == 0xE0 ) {
3690             p = (ch & 0x0F) << 12;
3691             if ( !(ch=*str++) )
3692                 break;
3693             p |= (ch & 0x3F) << 6;
3694             if ( !(ch=*str++) )
3695                 break;
3696             p |= ch & 0x3F;
3697             if (p >= 0xD800 && p <= 0xDBFF) {           // high surrogate
3698                 ch = *str;
3699                 if ((ch & 0xF0) == 0xE0) {
3700                     p = (ch & 0x0F) << 12;
3701                     if ( !(ch=*(str+1)) )
3702                         break;
3703                     p |= (ch & 0x3F) << 6;
3704                     if ( !(ch=*(str+2)) )
3705                         break;
3706                     p |= ch & 0x3F;
3707                     if (p >= 0xDC00 && p <= 0xDFFF) {   // low surrogate
3708                         str += 3;
3709                     }
3710                 }
3711             }
3712         } else if ( (ch & 0xF8) == 0xF0 ) {
3713             // Mostly unused
3714             if ( !(*str++) )
3715                 break;
3716             if ( !(*str++) )
3717                 break;
3718             if ( !(*str++) )
3719                 break;
3720         } else {
3721             // invalid first byte in UTF-8 sequence, just leave as is
3722             ;
3723         }
3724         count++;
3725     }
3726     return count;
3727 }
3728 
Wtf8CharCount(const lChar8 * str,int len)3729 int Wtf8CharCount( const lChar8 * str, int len )
3730 {
3731     if (len == 0)
3732         return 0;
3733     int count = 0;
3734     lUInt8 ch;
3735     const lChar8 * endp = str + len;
3736     while ((ch=*str)) {
3737         if ( (ch & 0x80) == 0 ) {
3738             str++;
3739         } else if ( (ch & 0xE0) == 0xC0 ) {
3740             str+=2;
3741         } else if ( (ch & 0xF0) == 0xE0 ) {
3742             str+=3;
3743             ch=*str;
3744             if ( (ch & 0xF0) == 0xE0 ) {
3745                 str+=3;
3746             }
3747         } else if ( (ch & 0xF8) == 0xF0 ) {
3748             // Mostly unused
3749             str+=4;
3750         } else {
3751             // invalid first byte of UTF-8 sequence, just leave as is
3752             str++;
3753         }
3754         if (str > endp)
3755             break;
3756         count++;
3757     }
3758     return count;
3759 }
3760 
charUtf8ByteCount(lUInt32 ch)3761 inline int charUtf8ByteCount(lUInt32 ch) {
3762     if (!(ch & ~0x7F))
3763         return 1;
3764     if (!(ch & ~0x7FF))
3765         return 2;
3766     if (!(ch & ~0xFFFF))
3767         return 3;
3768     if (!(ch & ~0x1FFFFF))
3769         return 4;
3770     // In Unicode Standard codepoint must be in range U+0000..U+10FFFF
3771     // return invalid codepoint as one byte
3772     return 1;
3773 }
3774 
charUtf16WordCount(lUInt32 ch)3775 inline int charUtf16WordCount(lUInt32 ch) {
3776     if (!(ch & ~0xFFFF))
3777         return 1;
3778     if (!(ch & ~0x1FFFFF))
3779         return 2;
3780     // In Unicode Standard codepoint must be in range U+0000..U+10FFFF
3781     // return invalid codepoint as one word
3782     return 1;
3783 }
3784 
Utf8ByteCount(const lChar32 * str)3785 int Utf8ByteCount(const lChar32 * str)
3786 {
3787     int count = 0;
3788     lUInt32 ch;
3789     while ( (ch=*str++) ) {
3790         count += charUtf8ByteCount(ch);
3791     }
3792     return count;
3793 }
3794 
charWtf8ByteCount(lUInt32 ch)3795 inline int charWtf8ByteCount(lUInt32 ch) {
3796     if (!(ch & ~0x7F))
3797         return 1;
3798     if (!(ch & ~0x7FF))
3799         return 2;
3800     if (!(ch & ~0xFFFF))
3801         return 3;
3802     if (!(ch & ~0x1FFFFF))
3803         return 6;
3804     return 1;
3805 }
3806 
Utf8ByteCount(const lChar32 * str,int len)3807 int Utf8ByteCount(const lChar32 * str, int len)
3808 {
3809     int count = 0;
3810     lUInt32 ch;
3811     while ((len--) > 0) {
3812         ch = *str++;
3813         count += charUtf8ByteCount(ch);
3814     }
3815     return count;
3816 }
3817 
Utf16WordCount(const lChar32 * str,int len)3818 int Utf16WordCount(const lChar32 * str, int len)
3819 {
3820     int count = 0;
3821     lUInt32 ch;
3822     while ((len--) > 0) {
3823         ch = *str++;
3824         count += charUtf16WordCount(ch);
3825     }
3826     return count;
3827 }
3828 
Wtf8ByteCount(const lChar32 * str,int len)3829 int Wtf8ByteCount(const lChar32 * str, int len)
3830 {
3831     int count = 0;
3832     lUInt32 ch;
3833     while ((len--) > 0) {
3834         ch = *str++;
3835         count += charWtf8ByteCount(ch);
3836     }
3837     return count;
3838 }
3839 
Utf8ToUnicode(const lString8 & str)3840 lString32 Utf8ToUnicode( const lString8 & str )
3841 {
3842     return Utf8ToUnicode( str.c_str() );
3843 }
3844 
Utf16ToUnicode(const lString16 & str)3845 lString32 Utf16ToUnicode( const lString16 & str )
3846 {
3847     return Utf16ToUnicode( str.c_str() );
3848 }
3849 
3850 #define CONT_BYTE(index,shift) (((lChar32)(s[index]) & 0x3F) << shift)
3851 
DecodeUtf8(const char * s,lChar32 * p,int len)3852 static void DecodeUtf8(const char * s,  lChar32 * p, int len)
3853 {
3854     lChar32 * endp = p + len;
3855     lUInt32 ch;
3856     while (p < endp) {
3857         ch = *s++;
3858         if ( (ch & 0x80) == 0 ) {
3859             *p++ = (char)ch;
3860         } else if ( (ch & 0xE0) == 0xC0 ) {
3861             *p++ = ((ch & 0x1F) << 6)
3862                     | CONT_BYTE(0,0);
3863             s++;
3864         } else if ( (ch & 0xF0) == 0xE0 ) {
3865             *p++ = ((ch & 0x0F) << 12)
3866                 | CONT_BYTE(0,6)
3867                 | CONT_BYTE(1,0);
3868             s += 2;
3869         } else if ( (ch & 0xF8) == 0xF0 ) {
3870             *p++ = ((ch & 0x07) << 18)
3871                 | CONT_BYTE(0,12)
3872                 | CONT_BYTE(1,6)
3873                 | CONT_BYTE(2,0);
3874             s += 3;
3875         } else {
3876             // Invalid first byte in UTF-8 sequence
3877             // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3878             *p++ = (char) (ch & 0x7F);
3879         }
3880     }
3881 }
3882 
DecodeWtf8(const char * s,lChar32 * p,int len)3883 static void DecodeWtf8(const char * s,  lChar32 * p, int len)
3884 {
3885     lChar32 * endp = p + len;
3886     lUInt32 ch;
3887     while (p < endp) {
3888         ch = *s;
3889         bool matched = false;
3890         if ( (ch & 0x80) == 0 ) {
3891             matched = true;
3892             *p++ = (char)ch;
3893             s++;
3894         } else if ( (ch & 0xE0) == 0xC0 ) {
3895             matched = true;
3896             *p++ = ((ch & 0x1F) << 6)
3897                     | CONT_BYTE(1,0);
3898             s += 2;
3899         } else if ( (ch & 0xF0) == 0xE0 ) {
3900             matched = true;
3901             *p++ = ((ch & 0x0F) << 12)
3902                 | CONT_BYTE(1,6)
3903                 | CONT_BYTE(2,0);
3904             s += 3;
3905             if (*(p-1) >= 0xD800 && *(p-1) <= 0xDBFF) {     // what we wrote is a high surrogate,
3906                 lUInt32 next = *s;                          // and there's room next for a low surrogate
3907                 if ( (next & 0xF0) == 0xE0) {               // is a 3-bytes sequence
3908                     next = ((next & 0x0F) << 12) | CONT_BYTE(1,6) | CONT_BYTE(2,0);
3909                     if (next >= 0xDC00 && next <= 0xDFFF) { // is a low surrogate: valid surrogates sequence
3910                         ch = 0x10000 + ((*(p-1) & 0x3FF)<<10) + (next & 0x3FF);
3911                         p--; // rewind to override what we wrote
3912                         *p++ = ch;
3913                         s += 3;
3914                     }
3915                 }
3916             }
3917         } else if ( (ch & 0xF8) == 0xF0 ) {
3918             // Mostly unused
3919             matched = true;
3920             *p++ = ((ch & 0x07) << 18)
3921                 | CONT_BYTE(1,12)
3922                 | CONT_BYTE(2,6)
3923                 | CONT_BYTE(3,0);
3924             s += 4;
3925         } else {
3926             // Invalid first byte in UTF-8 sequence
3927             // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3928             *p++ = (char) (ch & 0x7F);
3929             s++;
3930             matched = true; // just to avoid next if
3931         }
3932 
3933         // unexpected character
3934         if (!matched) {
3935             *p++ = '?';
3936             s++;
3937         }
3938     }
3939 }
3940 
DecodeUtf16(const lChar16 * s,lChar32 * p,int len)3941 static void DecodeUtf16(const lChar16 * s,  lChar32 * p, int len)
3942 {
3943     lChar32 * endp = p + len;
3944     lUInt16 ch;
3945     while (p < endp) {
3946         ch = *s++;
3947         if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
3948             *p++ = (lChar32)ch;
3949         } else if ( ch >= 0xD800 && ch < 0xDC00 ) {
3950             lUInt16 next = (lUInt16)*s;
3951             if (next >= 0xDC00 && next < 0xE000) {
3952                 // convert surrogate pair into unicode code point
3953                 // 110110wwwwxxxxxx, 110111xxxxxxxxxx => 000uuuuuxxxxxxxxxxxxxxxx
3954                 //  where uuuuu = wwww+1
3955                 *p++ = ( ( ( (ch & 0x03C0) >> 6 ) + 1 ) << 16 ) | ((ch & 0x3F) << 10) | (next & 0x3FF);
3956             } else {
3957                 // Invalid second word in UTF-16 sequence (including '\0')
3958                 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3959                 *p++ = (char) (ch & 0x7F);
3960             }
3961             s++;
3962         } else {
3963             // Invalid first word in UTF-16 sequence
3964             // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3965             *p++ = (char) (ch & 0x7F);
3966         }
3967     }
3968 }
3969 
3970 // Top two bits are 10, i.e. original & 11000000(2) == 10000000(2)
3971 #define IS_FOLLOWING(index) ((s[index] & 0xC0) == 0x80)
3972 
Utf8ToUnicode(const lUInt8 * src,int & srclen,lChar32 * dst,int & dstlen)3973 void Utf8ToUnicode(const lUInt8 * src,  int &srclen, lChar32 * dst, int &dstlen)
3974 {
3975     const lUInt8 * s = src;
3976     const lUInt8 * ends = s + srclen;
3977     lChar32 * p = dst;
3978     lChar32 * endp = p + dstlen;
3979     lUInt32 ch;
3980     bool matched;
3981     while (p < endp && s < ends) {
3982         ch = *s;
3983         matched = false;
3984         if ( (ch & 0x80) == 0 ) {
3985             matched = true;
3986             *p++ = (char)ch;
3987             s++;
3988         } else if ( (ch & 0xE0) == 0xC0 ) {
3989             if (s + 2 > ends)
3990                 break;
3991             if (IS_FOLLOWING(1)) {
3992                 matched = true;
3993                 *p++ = ((ch & 0x1F) << 6)
3994                         | CONT_BYTE(1,0);
3995                 s += 2;
3996             }
3997         } else if ( (ch & 0xF0) == 0xE0 ) {
3998             if (s + 3 > ends)
3999                 break;
4000             if (IS_FOLLOWING(1) && IS_FOLLOWING(2)) {
4001                 matched = true;
4002                 *p++ = ((ch & 0x0F) << 12)
4003                     | CONT_BYTE(1,6)
4004                     | CONT_BYTE(2,0);
4005                 s += 3;
4006                 // Supports WTF-8 : https://en.wikipedia.org/wiki/UTF-8#WTF-8
4007                 // a superset of UTF-8, that includes UTF-16 surrogates
4008                 // in UTF-8 bytes (forbidden in well-formed UTF-8).
4009                 // We may get that from bad producers or converters.
4010                 // As these shouldn't be there in UTF-8, if we find
4011                 // these surrogates in the right sequence, we might as well
4012                 // convert the char they represent to the right Unicode
4013                 // codepoint and display it instead of a '?'.
4014                 //   Surrogates are code points from two special ranges of
4015                 //   Unicode values, reserved for use as the leading, and
4016                 //   trailing values of paired code units in UTF-16. Leading,
4017                 //   also called high, surrogates are from D800 to DBFF, and
4018                 //   trailing, or low, surrogates are from DC00 to DFFF. They
4019                 //   are called surrogates, since they do not represent
4020                 //   characters directly, but only as a pair.
4021                 if (*(p-1) >= 0xD800 && *(p-1) <= 0xDBFF && s+2 < ends) { // what we wrote is a high surrogate,
4022                     lUInt32 next = *s;                            // and there's room next for a low surrogate
4023                     if ( (next & 0xF0) == 0xE0 && IS_FOLLOWING(1) && IS_FOLLOWING(2)) { // is a valid 3-bytes sequence
4024                         next = ((next & 0x0F) << 12) | CONT_BYTE(1,6) | CONT_BYTE(2,0);
4025                         if (next >= 0xDC00 && next <= 0xDFFF) { // is a low surrogate: valid surrogates sequence
4026                             ch = 0x10000 + ((*(p-1) & 0x3FF)<<10) + (next & 0x3FF);
4027                             p--; // rewind to override what we wrote
4028                             *p++ = ch;
4029                             s += 3;
4030                         }
4031                     }
4032                 }
4033             }
4034         } else if ( (ch & 0xF8) == 0xF0 ) {
4035             if (s + 4 > ends)
4036                 break;
4037             if (IS_FOLLOWING(1) && IS_FOLLOWING(2) && IS_FOLLOWING(3)) {
4038                 matched = true;
4039                 *p++ = ((ch & 0x07) << 18)
4040                     | CONT_BYTE(1,12)
4041                     | CONT_BYTE(2,6)
4042                     | CONT_BYTE(3,0);
4043                 s += 4;
4044             }
4045         } else {
4046             // Invalid first byte in UTF-8 sequence
4047             // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
4048             *p++ = (char) (ch & 0x7F);
4049             s++;
4050             matched = true; // just to avoid next if
4051         }
4052         // unexpected character
4053         if (!matched) {
4054             *p++ = '?';
4055             s++;
4056         }
4057     }
4058     srclen = (int)(s - src);
4059     dstlen = (int)(p - dst);
4060 }
4061 
Utf16ToUnicode(const lChar16 * src,int & srclen,lChar32 * dst,int & dstlen)4062 void Utf16ToUnicode(const lChar16 * src,  int &srclen, lChar32 * dst, int &dstlen)
4063 {
4064     const lChar16 * s = src;
4065     const lChar16 * ends = s + srclen;
4066     lChar32 * p = dst;
4067     lChar32 * endp = p + dstlen;
4068     lUInt32 ch;
4069     bool matched;
4070     while (p < endp && s < ends) {
4071         ch = *s;
4072         matched = false;
4073         if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
4074             matched = true;
4075             *p++ = (lChar32)ch;
4076             s++;
4077         } else if ( ch >= 0xD800 && ch < 0xDC00 ) {
4078             if (s + 2 > ends)
4079                 break;
4080             lUInt16 next = *s;
4081             if (next >= 0xDC00 && next < 0xE000) {
4082                 matched = true;
4083                 // convert surrogate pair into unicode code point
4084                 // 110110wwwwxxxxxx, 110111xxxxxxxxxx => 000uuuuuxxxxxxxxxxxxxxxx
4085                 //  where uuuuu = wwww+1
4086                 *p++ = ( ( ( (ch & 0x03C0) >> 6 ) + 1 ) << 16 ) | ((ch & 0x3F) << 10) | (next & 0x3FF);
4087                 s += 2;
4088             }
4089         } else {
4090             // Invalid first word in UTF-16 sequence
4091             // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
4092             *p++ = (char) (ch & 0x7F);
4093             s++;
4094             matched = true; // just to avoid next if
4095         }
4096         // unexpected character
4097         if (!matched) {
4098             *p++ = '?';
4099             s++;
4100         }
4101     }
4102     srclen = (int)(s - src);
4103     dstlen = (int)(p - dst);
4104 }
4105 
Utf8ToUnicode(const char * s)4106 lString32 Utf8ToUnicode( const char * s ) {
4107     if (!s || !s[0])
4108       return lString32::empty_str;
4109     int len = Utf8CharCount( s );
4110     if (!len)
4111       return lString32::empty_str;
4112     lString32 dst;
4113     dst.append(len, (lChar32)0);
4114     lChar32 * p = dst.modify();
4115     DecodeUtf8(s, p, len);
4116     return dst;
4117 }
4118 
Utf8ToUnicode(const char * s,int sz)4119 lString32 Utf8ToUnicode( const char * s, int sz ) {
4120     if (!s || !s[0] || sz <= 0)
4121       return lString32::empty_str;
4122     int len = Utf8CharCount( s, sz );
4123     if (!len)
4124       return lString32::empty_str;
4125     lString32 dst;
4126     dst.append(len, 0);
4127     lChar32 * p = dst.modify();
4128     DecodeUtf8(s, p, len);
4129     return dst;
4130 }
4131 
Utf16ToUnicode(const lChar16 * s)4132 lString32 Utf16ToUnicode( const lChar16 * s )
4133 {
4134     if (!s || !s[0])
4135       return lString32::empty_str;
4136     int len = Utf16CharCount( s );
4137     if (!len)
4138       return lString32::empty_str;
4139     lString32 dst;
4140     dst.append(len, (lChar32)0);
4141     lChar32 * p = dst.modify();
4142     DecodeUtf16(s, p, len);
4143     return dst;
4144 }
4145 
Utf16ToUnicode(const lChar16 * s,int sz)4146 lString32 Utf16ToUnicode( const lChar16 * s, int sz )
4147 {
4148     if (!s || !s[0] || sz <= 0)
4149       return lString32::empty_str;
4150     int len = Utf16CharCount( s, sz );
4151     if (!len)
4152       return lString32::empty_str;
4153     lString32 dst;
4154     dst.append(len, 0);
4155     lChar32 * p = dst.modify();
4156     DecodeUtf16(s, p, len);
4157     return dst;
4158 }
4159 
Wtf8ToUnicode(const lString8 & str)4160 lString32 Wtf8ToUnicode( const lString8 & str )
4161 {
4162     return Wtf8ToUnicode( str.c_str() );
4163 }
4164 
Wtf8ToUnicode(const char * s)4165 lString32 Wtf8ToUnicode( const char * s ) {
4166     if (!s || !s[0])
4167       return lString32::empty_str;
4168     int len = Wtf8CharCount( s );
4169     if (!len)
4170       return lString32::empty_str;
4171     lString32 dst;
4172     dst.append(len, (lChar32)0);
4173     lChar32 * p = dst.modify();
4174     DecodeWtf8(s, p, len);
4175     return dst;
4176 }
4177 
Wtf8ToUnicode(const char * s,int sz)4178 lString32 Wtf8ToUnicode( const char * s, int sz ) {
4179     if (!s || !s[0] || sz <= 0)
4180       return lString32::empty_str;
4181     int len = Utf8CharCount( s, sz );
4182     if (!len)
4183       return lString32::empty_str;
4184     lString32 dst;
4185     dst.append(len, 0);
4186     lChar32 * p = dst.modify();
4187     DecodeWtf8(s, p, len);
4188     return dst;
4189 }
4190 
UnicodeToUtf8(const lChar32 * s,int count)4191 lString8 UnicodeToUtf8(const lChar32 * s, int count)
4192 {
4193     if (count <= 0)
4194       return lString8::empty_str;
4195     lString8 dst;
4196     int len = Utf8ByteCount(s, count);
4197     if (len <= 0)
4198       return lString8::empty_str;
4199     dst.append( len, ' ' );
4200     lChar8 * buf = dst.modify();
4201     {
4202         lUInt32 ch;
4203         while ((count--) > 0) {
4204             ch = *s++;
4205             if (!(ch & ~0x7F)) {
4206                 *buf++ = ( (lUInt8)ch );
4207             } else if (!(ch & ~0x7FF)) {
4208                 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x1F) | 0xC0 ) );
4209                 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4210             } else if (!(ch & ~0xFFFF)) {
4211                 *buf++ = ( (lUInt8) ( ((ch >> 12) & 0x0F) | 0xE0 ) );
4212                 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x3F) | 0x80 ) );
4213                 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4214             } else if (!(ch & ~0x1FFFFF)) {
4215                 *buf++ = ( (lUInt8) ( ((ch >> 18) & 0x07) | 0xF0 ) );
4216                 *buf++ = ( (lUInt8) ( ((ch >> 12) & 0x3F) | 0x80 ) );
4217                 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x3F) | 0x80 ) );
4218                 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4219             } else {
4220                 // invalid codepoint
4221                 // In Unicode Standard codepoint must be in range U+0000 .. U+10FFFF
4222                 *buf++ = '?';
4223             }
4224         }
4225     }
4226     return dst;
4227 }
4228 
UnicodeToUtf16(const lChar32 * s,int count)4229 lString16 UnicodeToUtf16(const lChar32 * s, int count)
4230 {
4231     if (count <= 0)
4232         return lString16::empty_str;
4233     lString16 dst;
4234     int len = Utf16WordCount(s, count);
4235     if (len <= 0)
4236       return lString16::empty_str;
4237     dst.append( len, ' ' );
4238     lChar16 * buf = dst.modify();
4239     {
4240         lUInt32 ch;
4241         while ((count--) > 0) {
4242             ch = *s++;
4243             if (!(ch & ~0xFFFF)) {
4244                 *buf++ = (lChar16)ch;
4245             } else if (!(ch & ~0x1FFFFF)) {
4246                 // put into a surrogate pair
4247                 // 000uuuuuxxxxxxxxxxxxxxxx => 110110wwwwxxxxxx, 110111xxxxxxxxxx
4248                 //   where wwww = uuuuu - 1
4249                 // first word
4250                 *buf++ = (lChar16) ( 0xD800 | ( ( ( (ch >> 16) & 0x1F ) - 1 ) << 6 ) | ( (ch >> 10) & 0x3F ) );
4251                 // second word
4252                 *buf++ = (lChar16) ( 0xDC00 | (ch & 0x3FF) );
4253             } else {
4254                 // invalid codepoint
4255                 // In Unicode Standard codepoint must be in range U+0000 .. U+10FFFF
4256                 *buf++ = L'?';
4257             }
4258         }
4259     }
4260     return dst;
4261 }
4262 
UnicodeToUtf8(const lString32 & str)4263 lString8 UnicodeToUtf8( const lString32 & str )
4264 {
4265     return UnicodeToUtf8(str.c_str(), str.length());
4266 }
4267 
UnicodeToUtf16(const lString32 & str)4268 lString16  UnicodeToUtf16( const lString32 & str )
4269 {
4270     return UnicodeToUtf16(str.c_str(), str.length());
4271 }
4272 
UnicodeToWtf8(const lChar32 * s,int count)4273 lString8 UnicodeToWtf8(const lChar32 * s, int count)
4274 {
4275     if (count <= 0)
4276       return lString8::empty_str;
4277     lString8 dst;
4278     int len = Wtf8ByteCount(s, count);
4279     if (len <= 0)
4280       return lString8::empty_str;
4281     dst.append( len, ' ' );
4282     lChar8 * buf = dst.modify();
4283     {
4284         lUInt32 ch;
4285         while ((count--) > 0) {
4286             ch = *s++;
4287             if (!(ch & ~0x7F)) {
4288                 *buf++ = ( (lUInt8)ch );
4289             } else if (!(ch & ~0x7FF)) {
4290                 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x1F) | 0xC0 ) );
4291                 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4292             } else if (!(ch & ~0xFFFF)) {
4293                 *buf++ = ( (lUInt8) ( ((ch >> 12) & 0x0F) | 0xE0 ) );
4294                 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x3F) | 0x80 ) );
4295                 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4296             } else if (!(ch & ~0x1FFFFF)) {
4297                 //   UTF-16 Scalar Value
4298                 // 000uuuuu xxxxxxxxxxxxxxxx
4299                 //   UTF-16
4300                 // 110110wwwwxxxxxx 110111xxxxxxxxxx
4301                 // wwww = uuuuu - 1
4302                 lUInt16 wwww = (ch >> 16) - 1;
4303                 lUInt16 low = ch & 0xFFFF;
4304                 lUInt32 hiSurr = 0xD800 | (wwww << 6) | (low >> 10);    // high surrogate
4305                 lUInt32 lowSurr = 0xDC00 | (low & 0x3FF);               // low surrogate
4306                 *buf++ = ( (lUInt8) ( ((hiSurr >> 12) & 0x0F) | 0xE0 ) );
4307                 *buf++ = ( (lUInt8) ( ((hiSurr >> 6) & 0x3F) | 0x80 ) );
4308                 *buf++ = ( (lUInt8) ( ((hiSurr ) & 0x3F) | 0x80 ) );
4309                 *buf++ = ( (lUInt8) ( ((lowSurr >> 12) & 0x0F) | 0xE0 ) );
4310                 *buf++ = ( (lUInt8) ( ((lowSurr >> 6) & 0x3F) | 0x80 ) );
4311                 *buf++ = ( (lUInt8) ( ((lowSurr ) & 0x3F) | 0x80 ) );
4312             } else {
4313                 // invalid codepoint
4314                 // In Unicode Standard codepoint must be in range U+0000 .. U+10FFFF
4315                 *buf++ = '?';
4316             }
4317         }
4318     }
4319     return dst;
4320 }
4321 
UnicodeToWtf8(const lString32 & str)4322 lString8 UnicodeToWtf8( const lString32 & str )
4323 {
4324     return UnicodeToWtf8(str.c_str(), str.length());
4325 }
4326 
UnicodeTo8Bit(const lString32 & str,const lChar8 ** table)4327 lString8 UnicodeTo8Bit( const lString32 & str, const lChar8 * * table )
4328 {
4329     lString8 buf;
4330     buf.reserve( str.length() );
4331     for (int i=0; i < str.length(); i++) {
4332         lChar32 ch = str[i];
4333         const lChar8 * p = table[ (ch>>8) & 255 ];
4334         if ( p ) {
4335             buf += p[ ch&255 ];
4336         } else {
4337             buf += '?';
4338         }
4339     }
4340     return buf;
4341 }
4342 
ByteToUnicode(const lString8 & str,const lChar32 * table)4343 lString32 ByteToUnicode( const lString8 & str, const lChar32 * table )
4344 {
4345     lString32 buf;
4346     buf.reserve( str.length() );
4347     for (int i=0; i < str.length(); i++) {
4348         lChar32 ch = (unsigned char)str[i];
4349         lChar32 ch32 = ((ch & 0x80) && table) ? table[ (ch&0x7F) ] : ch;
4350         buf += ch32;
4351     }
4352     return buf;
4353 }
4354 
4355 
4356 #if !defined(__SYMBIAN32__) && defined(_WIN32)
4357 
UnicodeToLocal(const lString32 & str)4358 lString8 UnicodeToLocal( const lString32 & str )
4359 {
4360    lString8 dst;
4361    if (str.empty())
4362       return dst;
4363    lString16 utf16 = UnicodeToUtf16(str);
4364    CHAR def_char = '?';
4365    BOOL usedDefChar = FALSE;
4366    int len = WideCharToMultiByte(
4367       CP_ACP,
4368       WC_COMPOSITECHECK | WC_DISCARDNS
4369        | WC_SEPCHARS | WC_DEFAULTCHAR,
4370       utf16.c_str(),
4371       utf16.length(),
4372       NULL,
4373       0,
4374       &def_char,
4375       &usedDefChar
4376       );
4377    if (len)
4378    {
4379       dst.insert(0, len, ' ');
4380       WideCharToMultiByte(
4381          CP_ACP,
4382          WC_COMPOSITECHECK | WC_DISCARDNS
4383           | WC_SEPCHARS | WC_DEFAULTCHAR,
4384          utf16.c_str(),
4385          utf16.length(),
4386          dst.modify(),
4387          len,
4388          &def_char,
4389          &usedDefChar
4390          );
4391    }
4392    return dst;
4393 }
4394 
LocalToUnicode(const lString8 & str)4395 lString32 LocalToUnicode( const lString8 & str )
4396 {
4397    lString16 utf16;
4398    if (str.empty())
4399       return lString32::empty_str;
4400    int len = MultiByteToWideChar(
4401       CP_ACP,
4402       0,
4403       str.c_str(),
4404       str.length(),
4405       NULL,
4406       0
4407       );
4408    if (len)
4409    {
4410       utf16.insert(0, len, ' ');
4411       MultiByteToWideChar(
4412          CP_ACP,
4413          0,
4414          str.c_str(),
4415          str.length(),
4416          utf16.modify(),
4417          len
4418          );
4419    }
4420    return Utf16ToUnicode(utf16);
4421 }
4422 
4423 #else
4424 
UnicodeToLocal(const lString32 & str)4425 lString8 UnicodeToLocal( const lString32 & str )
4426 {
4427     return UnicodeToUtf8( str );
4428 }
4429 
LocalToUnicode(const lString8 & str)4430 lString32 LocalToUnicode( const lString8 & str )
4431 {
4432     return Utf8ToUnicode( str );
4433 }
4434 
4435 #endif
4436 
4437 //0x410
4438 static const char * russian_capital[32] =
4439 {
4440 "A", "B", "V", "G", "D", "E", "ZH", "Z", "I", "j", "K", "L", "M", "N", "O", "P", "R",
4441 "S", "T", "U", "F", "H", "TS", "CH", "SH", "SH", "\'", "Y", "\'", "E", "YU", "YA"
4442 };
4443 static const char * russian_small[32] =
4444 {
4445 "a", "b", "v", "g", "d", "e", "zh", "z", "i", "j", "k", "l", "m", "n", "o", "p", "r",
4446 "s", "t", "u", "f", "h", "ts", "ch", "sh", "sh", "\'", "y", "\'", "e", "yu", "ya"
4447 };
4448 
4449 static const char * latin_1[64] =
4450 {
4451 "A", // U+00C0	LATIN CAPITAL LETTER A WITH GRAVE
4452 "A", // U+00C1	LATIN CAPITAL LETTER A WITH ACUTE
4453 "A", // U+00C2	LATIN CAPITAL LETTER A WITH CIRCUMFLEX
4454 "A", // U+00C3	LATIN CAPITAL LETTER A WITH TILDE
4455 "AE",// U+00C4	LATIN CAPITAL LETTER A WITH DIAERESIS
4456 "A", // U+00C5	LATIN CAPITAL LETTER A WITH RING ABOVE
4457 "AE",// U+00C6	LATIN CAPITAL LETTER AE
4458 "C", // U+00C7	LATIN CAPITAL LETTER C WITH CEDILLA
4459 "E", // U+00C8	LATIN CAPITAL LETTER E WITH GRAVE
4460 "E", // U+00C9	LATIN CAPITAL LETTER E WITH ACUTE
4461 "E", // U+00CA	LATIN CAPITAL LETTER E WITH CIRCUMFLEX
4462 "E", // U+00CB	LATIN CAPITAL LETTER E WITH DIAERESIS
4463 "I", // U+00CC	LATIN CAPITAL LETTER I WITH GRAVE
4464 "I", // U+00CD	LATIN CAPITAL LETTER I WITH ACUTE
4465 "I", // U+00CE	LATIN CAPITAL LETTER I WITH CIRCUMFLEX
4466 "I", // U+00CF	LATIN CAPITAL LETTER I WITH DIAERESIS
4467 "D", // U+00D0	LATIN CAPITAL LETTER ETH
4468 "N", // U+00D1	LATIN CAPITAL LETTER N WITH TILDE
4469 "O", // U+00D2	LATIN CAPITAL LETTER O WITH GRAVE
4470 "O", // U+00D3	LATIN CAPITAL LETTER O WITH ACUTE
4471 "O", // U+00D4	LATIN CAPITAL LETTER O WITH CIRCUMFLEX
4472 "O", // U+00D5	LATIN CAPITAL LETTER O WITH TILDE
4473 "OE",// U+00D6	LATIN CAPITAL LETTER O WITH DIAERESIS
4474 "x", // U+00D7	MULTIPLICATION SIGN
4475 "O", // U+00D8	LATIN CAPITAL LETTER O WITH STROKE
4476 "U", // U+00D9	LATIN CAPITAL LETTER U WITH GRAVE
4477 "U", // U+00DA	LATIN CAPITAL LETTER U WITH ACUTE
4478 "U", // U+00DB	LATIN CAPITAL LETTER U WITH CIRCUMFLEX
4479 "UE",// U+00DC	LATIN CAPITAL LETTER U WITH DIAERESIS
4480 "Y", // U+00DD	LATIN CAPITAL LETTER Y WITH ACUTE
4481 "p", // U+00DE	LATIN CAPITAL LETTER THORN
4482 "SS",// U+00DF	LATIN SMALL LETTER SHARP S
4483 "a", // U+00E0	LATIN SMALL LETTER A WITH GRAVE
4484 "a", // U+00E1	LATIN SMALL LETTER A WITH ACUTE
4485 "a", // U+00E2	LATIN SMALL LETTER A WITH CIRCUMFLEX
4486 "a", // U+00E3	LATIN SMALL LETTER A WITH TILDE
4487 "ae",// U+00E4	LATIN SMALL LETTER A WITH DIAERESIS
4488 "a", // U+00E5	LATIN SMALL LETTER A WITH RING ABOVE
4489 "ae",// U+00E6	LATIN SMALL LETTER AE
4490 "c", // U+00E7	LATIN SMALL LETTER C WITH CEDILLA
4491 "e", // U+00E8	LATIN SMALL LETTER E WITH GRAVE
4492 "e", // U+00E9	LATIN SMALL LETTER E WITH ACUTE
4493 "e", // U+00EA	LATIN SMALL LETTER E WITH CIRCUMFLEX
4494 "e", // U+00EB	LATIN SMALL LETTER E WITH DIAERESIS
4495 "i", // U+00EC	LATIN SMALL LETTER I WITH GRAVE
4496 "i", // U+00ED	LATIN SMALL LETTER I WITH ACUTE
4497 "i", // U+00EE	LATIN SMALL LETTER I WITH CIRCUMFLEX
4498 "i", // U+00EF	LATIN SMALL LETTER I WITH DIAERESIS
4499 "d", // U+00F0	LATIN SMALL LETTER ETH
4500 "n", // U+00F1	LATIN SMALL LETTER N WITH TILDE
4501 "o", // U+00F2	LATIN SMALL LETTER O WITH GRAVE
4502 "o", // U+00F3	LATIN SMALL LETTER O WITH ACUTE
4503 "o", // U+00F4	LATIN SMALL LETTER O WITH CIRCUMFLEX
4504 "oe",// U+00F5	LATIN SMALL LETTER O WITH TILDE
4505 "o", // U+00F6	LATIN SMALL LETTER O WITH DIAERESIS
4506 "x", // U+00F7	DIVISION SIGN
4507 "o", // U+00F8	LATIN SMALL LETTER O WITH STROKE
4508 "u", // U+00F9	LATIN SMALL LETTER U WITH GRAVE
4509 "u", // U+00FA	LATIN SMALL LETTER U WITH ACUTE
4510 "u", // U+00FB	LATIN SMALL LETTER U WITH CIRCUMFLEX
4511 "ue",// U+00FC	LATIN SMALL LETTER U WITH DIAERESIS
4512 "y", // U+00FD	LATIN SMALL LETTER Y WITH ACUTE
4513 "p", // U+00FE	LATIN SMALL LETTER THORN
4514 "y", // U+00FF	LATIN SMALL LETTER Y WITH DIAERESIS
4515 };
4516 
getCharTranscript(lChar32 ch)4517 static const char * getCharTranscript( lChar32 ch )
4518 {
4519     if ( ch>=0x410 && ch<0x430 )
4520         return russian_capital[ch-0x410];
4521     else if (ch>=0x430 && ch<0x450)
4522         return russian_small[ch-0x430];
4523     else if (ch>=0xC0 && ch<0xFF)
4524         return latin_1[ch-0xC0];
4525     else if (ch==0x450)
4526         return "E";
4527     else if ( ch==0x451 )
4528         return "e";
4529     return "?";
4530 }
4531 
4532 
UnicodeToTranslit(const lString32 & str)4533 lString8  UnicodeToTranslit( const lString32 & str )
4534 {
4535     lString8 buf;
4536     if ( str.empty() )
4537         return buf;
4538     buf.reserve( str.length()*5/4 );
4539     for ( int i=0; i<str.length(); i++ ) {
4540         lChar32 ch = str[i];
4541         if ( ch>=32 && ch<=127 ) {
4542             buf.append( 1, (lChar8)ch );
4543         } else {
4544             const char * trans = getCharTranscript(ch);
4545             buf.append( trans );
4546         }
4547     }
4548     buf.pack();
4549     return buf;
4550 }
4551 
4552 
4553 // Note:
4554 // CH_PROP_UPPER and CH_PROP_LOWER make out CH_PROP_ALPHA, which is,
4555 // with CH_PROP_CONSONANT, CH_PROP_VOWEL and CH_PROP_ALPHA_SIGN,
4556 // used only for detecting a word candidate to hyphenation.
4557 // CH_PROP_PUNCT and CH_PROP_DASH are used each once in some obscure places.
4558 // Others seem not used anywhere: CH_PROP_SIGN, CH_PROP_DIGIT, CH_PROP_SPACE
4559 static lUInt16 char_props[] = {
4560 // 0x0000:
4561 0,0,0,0, 0,0,0,0, CH_PROP_SPACE,CH_PROP_SPACE,CH_PROP_SPACE,0, CH_PROP_SPACE,CH_PROP_SPACE,0,0,
4562 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4563 // 0x0020:
4564 CH_PROP_SPACE, // ' '
4565 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // '!'
4566 0, // '\"'
4567 CH_PROP_SIGN, // '#'
4568 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '$'
4569 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // '%'
4570 CH_PROP_SIGN, // '&'
4571 CH_PROP_SIGN, // '\''
4572 CH_PROP_AVOID_WRAP_AFTER, // '('
4573 CH_PROP_AVOID_WRAP_BEFORE, // ')'
4574 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '*'
4575 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '+'
4576 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // ','
4577 CH_PROP_SIGN | CH_PROP_DASH | CH_PROP_AVOID_WRAP_BEFORE, // '-'
4578 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // '.'
4579 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // '/'
4580 // 0x0030:
4581 CH_PROP_DIGIT, // '0'
4582 CH_PROP_DIGIT, // '1'
4583 CH_PROP_DIGIT, // '2'
4584 CH_PROP_DIGIT, // '3'
4585 CH_PROP_DIGIT, // '4'
4586 CH_PROP_DIGIT, // '5'
4587 CH_PROP_DIGIT, // '6'
4588 CH_PROP_DIGIT, // '7'
4589 CH_PROP_DIGIT, // '8'
4590 CH_PROP_DIGIT, // '9'
4591 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // ':'
4592 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // ';'
4593 CH_PROP_SIGN  | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER,  // '<'
4594 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER,  // '='
4595 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER,  // '>'
4596 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // '?'
4597 // 0x0040:
4598 CH_PROP_SIGN,  // '@'
4599 CH_PROP_UPPER | CH_PROP_VOWEL,     // 'A'
4600 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'B'
4601 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'C'
4602 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'D'
4603 CH_PROP_UPPER | CH_PROP_VOWEL, // 'E'
4604 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'F'
4605 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'G'
4606 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'H'
4607 CH_PROP_UPPER | CH_PROP_VOWEL, // 'I'
4608 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'J'
4609 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'K'
4610 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'L'
4611 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'M'
4612 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'N'
4613 CH_PROP_UPPER | CH_PROP_VOWEL, // 'O'
4614 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'P'
4615 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'Q'
4616 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'R'
4617 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'S'
4618 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'T'
4619 CH_PROP_UPPER | CH_PROP_VOWEL, // 'U'
4620 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'V'
4621 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'W'
4622 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'X'
4623 CH_PROP_UPPER | CH_PROP_VOWEL, // 'Y'
4624 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'Z'
4625 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_AFTER, // '['
4626 CH_PROP_SIGN, // '\'
4627 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // ']'
4628 CH_PROP_SIGN, // '^'
4629 CH_PROP_SIGN, // '_'
4630 // 0x0060:
4631 CH_PROP_SIGN,  // '`'
4632 CH_PROP_LOWER | CH_PROP_VOWEL,     // 'a'
4633 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'b'
4634 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'c'
4635 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'd'
4636 CH_PROP_LOWER | CH_PROP_VOWEL, // 'e'
4637 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'f'
4638 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'g'
4639 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'h'
4640 CH_PROP_LOWER | CH_PROP_VOWEL, // 'i'
4641 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'j'
4642 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'k'
4643 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'l'
4644 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'm'
4645 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'n'
4646 CH_PROP_LOWER | CH_PROP_VOWEL, // 'o'
4647 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'p'
4648 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'q'
4649 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'r'
4650 CH_PROP_LOWER | CH_PROP_CONSONANT, // 's'
4651 CH_PROP_LOWER | CH_PROP_CONSONANT, // 't'
4652 CH_PROP_LOWER | CH_PROP_VOWEL, // 'u'
4653 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'v'
4654 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'w'
4655 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'x'
4656 CH_PROP_LOWER | CH_PROP_VOWEL, // 'y'
4657 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'z'
4658 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_AFTER, // '{'
4659 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '|'
4660 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // '}'
4661 CH_PROP_SIGN, // '~'
4662 CH_PROP_SIGN, // ' '
4663 // 0x0080:
4664 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4665 // 0x0090:
4666 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4667 // 0x00A0:
4668 CH_PROP_SPACE, // 00A0 nbsp
4669 CH_PROP_PUNCT, // 00A1 inverted !
4670 CH_PROP_SIGN,  // 00A2
4671 CH_PROP_SIGN,  // 00A3
4672 CH_PROP_SIGN,  // 00A4
4673 CH_PROP_SIGN,  // 00A5
4674 CH_PROP_SIGN,  // 00A6
4675 CH_PROP_SIGN,  // 00A7
4676 CH_PROP_SIGN,  // 00A8
4677 CH_PROP_SIGN,  // 00A9
4678 CH_PROP_SIGN,  // 00AA
4679 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_AFTER,  // 00AB «
4680 CH_PROP_SIGN,  // 00AC
4681 CH_PROP_HYPHEN,// 00AD soft-hyphen (UNICODE_SOFT_HYPHEN_CODE)
4682 CH_PROP_SIGN,  // 00AE
4683 CH_PROP_SIGN,  // 00AF
4684 // 0x00A0:
4685 CH_PROP_SIGN,  // 00B0 degree
4686 CH_PROP_SIGN,  // 00B1
4687 CH_PROP_SIGN,  // 00B2
4688 CH_PROP_SIGN,  // 00B3
4689 CH_PROP_SIGN,  // 00B4
4690 CH_PROP_SIGN,  // 00B5
4691 CH_PROP_SIGN,  // 00B6
4692 CH_PROP_SIGN,  // 00B7
4693 CH_PROP_SIGN,  // 00B8
4694 CH_PROP_SIGN,  // 00B9
4695 CH_PROP_SIGN,  // 00BA
4696 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE,  // 00BB »
4697 CH_PROP_SIGN,  // 00BC
4698 CH_PROP_SIGN,  // 00BD
4699 CH_PROP_SIGN,  // 00BE
4700 CH_PROP_PUNCT, // 00BF
4701 // 0x00C0:
4702 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C0 A`
4703 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C1 A'
4704 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C2 A^
4705 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C3 A"
4706 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C4 A:
4707 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C5 Ao
4708 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C6 AE
4709 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 00C7 C~
4710 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C8 E`
4711 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00C9 E'
4712 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00CA E^
4713 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00CB E:
4714 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00CC I`
4715 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00CD I'
4716 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00CE I^
4717 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00CF I:
4718 // 0x00D0:
4719 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 00D0 D-
4720 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 00D1 N-
4721 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D2 O`
4722 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D3 O'
4723 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D4 O^
4724 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D5 O"
4725 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D6 O:
4726 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER,  // 00D7 x (multiplication sign)
4727 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D8 O/
4728 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00D9 U`
4729 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00DA U'
4730 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00DB U^
4731 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00DC U:
4732 CH_PROP_UPPER | CH_PROP_VOWEL,  // 00DD Y'
4733 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 00DE P thorn
4734 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 00DF ss
4735 // 0x00E0:
4736 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E0 a`
4737 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E1 a'
4738 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E2 a^
4739 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E3 a"
4740 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E4 a:
4741 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E5 ao
4742 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E6 ae
4743 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 00E7 c~
4744 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E8 e`
4745 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00E9 e'
4746 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00EA e^
4747 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00EB e:
4748 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00EC i`
4749 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00ED i'
4750 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00EE i^
4751 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00EF i:
4752 // 0x00F0:
4753 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 00F0 eth
4754 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 00F1 n~
4755 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F2 o`
4756 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F3 o'
4757 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F4 o^
4758 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F5 o"
4759 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F6 o:
4760 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER,  // 00F7 (division sign %)
4761 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F8 o/
4762 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00F9 u`
4763 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00FA u'
4764 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00FB u^
4765 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00FC u:
4766 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00FD y'
4767 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 00FE p thorn
4768 CH_PROP_LOWER | CH_PROP_VOWEL,  // 00FF y:
4769 // 0x0100:
4770 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0100 A_
4771 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0101 a_
4772 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0102 Au
4773 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0103 au
4774 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0104 A,
4775 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0105 a,
4776 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0106 C'
4777 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0107 c'
4778 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0108 C^
4779 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0109 c^
4780 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 010A C.
4781 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 010B c.
4782 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 010C Cu
4783 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 010D cu
4784 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 010E Du
4785 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 010F d'
4786 
4787 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0110 D-
4788 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0111 d-
4789 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0112 E_
4790 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0113 e_
4791 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0114 Eu
4792 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0115 eu
4793 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0116 E.
4794 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0117 e.
4795 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0118 E,
4796 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0119 e,
4797 CH_PROP_UPPER | CH_PROP_VOWEL,  // 011A Ev
4798 CH_PROP_LOWER | CH_PROP_VOWEL,  // 011B ev
4799 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 011C G^
4800 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 011D g^
4801 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 011E Gu
4802 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 011F Gu
4803 
4804 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0120 G.
4805 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0121 g.
4806 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0122 G,
4807 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0123 g,
4808 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0124 H^
4809 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0125 h^
4810 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0126 H-
4811 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0127 h-
4812 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0128 I~
4813 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0129 i~
4814 CH_PROP_UPPER | CH_PROP_VOWEL,  // 012A I_
4815 CH_PROP_LOWER | CH_PROP_VOWEL,  // 012B i_
4816 CH_PROP_UPPER | CH_PROP_VOWEL,  // 012C Iu
4817 CH_PROP_LOWER | CH_PROP_VOWEL,  // 012D iu
4818 CH_PROP_UPPER | CH_PROP_VOWEL,  // 012E I,
4819 CH_PROP_LOWER | CH_PROP_VOWEL,  // 012F i,
4820 
4821 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0130 I.
4822 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0131 i-.
4823 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0132 IJ
4824 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0133 ij
4825 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0134 J^
4826 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0135 j^
4827 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0136 K,
4828 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0137 k,
4829 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0138 k (kra)
4830 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0139 L'
4831 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 013A l'
4832 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 013B L,
4833 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 013C l,
4834 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 013D L'
4835 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 013E l'
4836 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 013F L.
4837 
4838 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0140 l.
4839 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0141 L/
4840 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0142 l/
4841 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0143 N'
4842 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0144 n'
4843 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0145 N,
4844 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0146 n,
4845 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0147 Nv
4846 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0148 nv
4847 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0149 `n
4848 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 014A Ng
4849 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 014B ng
4850 CH_PROP_UPPER | CH_PROP_VOWEL,  // 014C O_
4851 CH_PROP_LOWER | CH_PROP_VOWEL,  // 014D o-.
4852 CH_PROP_UPPER | CH_PROP_VOWEL,  // 014E Ou
4853 CH_PROP_LOWER | CH_PROP_VOWEL,  // 014F ou
4854 
4855 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0150 O"
4856 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0151 o"
4857 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0152 Oe
4858 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0153 oe
4859 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0154 R'
4860 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0155 r'
4861 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0156 R,
4862 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0157 r,
4863 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0158 Rv
4864 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0159 rv
4865 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 015A S'
4866 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 015B s'
4867 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 015C S^
4868 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 015D s^
4869 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 015E S,
4870 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 015F s,
4871 
4872 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0160 Sv
4873 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0161 sv
4874 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0162 T,
4875 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0163 T,
4876 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0164 Tv
4877 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0165 Tv
4878 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0166 T-
4879 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0167 T-
4880 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0168 U~
4881 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0169 u~
4882 CH_PROP_UPPER | CH_PROP_VOWEL,  // 016A U_
4883 CH_PROP_LOWER | CH_PROP_VOWEL,  // 016B u_
4884 CH_PROP_UPPER | CH_PROP_VOWEL,  // 016C Uu
4885 CH_PROP_LOWER | CH_PROP_VOWEL,  // 016D uu
4886 CH_PROP_UPPER | CH_PROP_VOWEL,  // 016E Uo
4887 CH_PROP_LOWER | CH_PROP_VOWEL,  // 016F uo
4888 
4889 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0170 U"
4890 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0171 u"
4891 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0172 U,
4892 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0173 u,
4893 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0174 W^
4894 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0175 w^
4895 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0176 Y,
4896 CH_PROP_LOWER | CH_PROP_VOWEL,  // 0177 y,
4897 CH_PROP_UPPER | CH_PROP_VOWEL,  // 0178 Y:
4898 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0179 Z'
4899 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 017A z'
4900 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 017B Z.
4901 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 017C z.
4902 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 017D Zv
4903 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 017E zv
4904 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 017F s long
4905 // 0x0180:
4906 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4907 // 0x0190:
4908 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4909 // 0x01A0:
4910 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4911 // 0x01B0:
4912 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4913 // 0x01C0:
4914 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4915 // 0x01D0:
4916 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4917 // 0x01E0:
4918 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4919 // 0x01F0:
4920 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4921 // 0x0200:
4922 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4923 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4924 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4925 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4926 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4927 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4928 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4929 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4930 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4931 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4932 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4933 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4934 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4935 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4936 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4937 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4938 // 0x0300:
4939 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4940 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4941 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4942 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4943 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4944 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4945 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4946 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4947 // 0x0380:
4948 0,0,0,0,
4949 CH_PROP_VOWEL, //    GREEK TONOS 	0384
4950 CH_PROP_VOWEL, //    GREEK DIALYTIKA TONOS 	0385
4951 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER ALPHA WITH TONOS 	0386
4952 CH_PROP_UPPER | CH_PROP_PUNCT, //    GREEK ANO TELEIA 	0387
4953 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER EPSILON WITH TONOS 	0388
4954 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER ETA WITH TONOS 	0389
4955 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER IOTA WITH TONOS 	038A
4956 0,//038b
4957 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER OMICRON WITH TONOS 	038C
4958 0,//038d
4959 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER UPSILON WITH TONOS 	038E
4960 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER OMEGA WITH TONOS 	038F
4961 // 0x0390:
4962 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 	0390
4963 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER ALPHA	Α	0391 	&Alpha;
4964 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER BETA	0392 	&Beta;
4965 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER GAMMA	0393 	&Gamma;
4966 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER DELTA	0394 	&Delta;
4967 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER EPSILON	0395 	&Epsilon;
4968 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER ZETA	0396 	&Zeta;
4969 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER ETA	0397 	&Eta;
4970 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER THETA	0398 	&Theta;
4971 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER IOTA	0399 	&Iota;
4972 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER KAPPA	039A 	&Kappa;
4973 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER LAM(B)DA	039B 	&Lambda;
4974 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER MU	039C 	&Mu;
4975 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER NU	039D 	&Nu;
4976 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER XI	039E 	&Xi;
4977 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER OMICRON	039F 	&Omicron;
4978 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER PI	03A0 	&Pi;
4979 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER RHO	03A1 	&Rho;
4980 0, // 03a2
4981 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER SIGMA	03A3 	&Sigma;
4982 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER TAU	03A4 	&Tau;
4983 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER UPSILON	03A5 	&Upsilon;
4984 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER PHI	03A6 	&Phi;
4985 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER CHI	03A7 	&Chi;
4986 CH_PROP_UPPER | CH_PROP_CONSONANT, //    GREEK CAPITAL LETTER PSI	03A8 	&Psi;
4987 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER OMEGA	03A9 	&Omega;
4988 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 	03AA
4989 CH_PROP_UPPER | CH_PROP_VOWEL, //    GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 	03AB
4990 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER ALPHA WITH TONOS 	03AC
4991 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER EPSILON WITH TONOS 	03AD
4992 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER ETA WITH TONOS 	03AE
4993 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER IOTA WITH TONOS 	03AF
4994 
4995 // 03B0
4996 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 	03B0
4997 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER ALPHA   03B1 	&alpha;
4998 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER BETA	03B2 	&beta;
4999 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER GAMMA	03B3 	&gamma;
5000 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER DELTA	03B4 	&delta;
5001 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER EPSILON	03B5 	&epsilon;
5002 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER ZETA	03B6 	&zeta;
5003 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER ETA     03B7 	&eta;
5004 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER THETA	03B8 	&theta;
5005 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER IOTA	03B9 	&iota;
5006 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER KAPPA	03BA 	&kappa;
5007 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER LAM(B)DA	03BB 	&lambda;
5008 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER MU      03BC 	&mu;
5009 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER NU      03BD 	&nu;
5010 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER XI      03BE 	&xi;
5011 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER OMICRON	03BF 	&omicron;
5012 
5013 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER PI      03C0 	&pi;
5014 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER RHO     03C1 	&rho;
5015 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER FINAL SIGMA	03C2
5016 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER SIGMA	03C3 	&sigma;
5017 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER TAU     03C4 	&tau;
5018 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER UPSILON	03C5 	&upsilon;
5019 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER PHI     03C6 	&phi;
5020 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER CHI     03C7 	&chi;
5021 CH_PROP_LOWER | CH_PROP_CONSONANT, //    GREEK SMALL LETTER PSI     03C8 	&psi;
5022 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER OMEGA   03C9 	&omega;
5023 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER IOTA WITH DIALYTIKA 	03CA
5024 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER UPSILON WITH DIALYTIKA 	03CB
5025 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER OMICRON WITH TONOS 	03CC
5026 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER UPSILON WITH TONOS 	03CD
5027 CH_PROP_LOWER | CH_PROP_VOWEL, //    GREEK SMALL LETTER OMEGA WITH TONOS 	03CE
5028 0, //03cf
5029 // 03d0
5030 CH_PROP_CONSONANT, //    GREEK BETA SYMBOL (cursive) 	03D0
5031 CH_PROP_CONSONANT, //    GREEK THETA SYMBOL (cursive) 	03D1
5032 CH_PROP_VOWEL, //    GREEK UPSILON WITH HOOK SYMBOL	03D2
5033 CH_PROP_VOWEL, //    GREEK UPSILON WITH ACUTE AND HOOK SYMBOL	03D3
5034 CH_PROP_VOWEL, //    GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL	03D4
5035 CH_PROP_CONSONANT, //    GREEK PHI SYMBOL (cursive) 	03D5
5036 CH_PROP_CONSONANT, //    GREEK PI SYMBOL	03D6
5037 CH_PROP_CONSONANT, //    GREEK KAI SYMBOL	03D7
5038 0, // 03d8
5039 0, // 03d9
5040 CH_PROP_CONSONANT, //    GREEK LETTER STIGMA	03DA
5041 CH_PROP_CONSONANT, //    GREEK SMALL LETTER STIGMA	03DB
5042 CH_PROP_CONSONANT, //    GREEK LETTER DIGAMMA (F)	03DC
5043 CH_PROP_CONSONANT, //    GREEK SMALL LETTER DIGAMMA (f)	03DD
5044 CH_PROP_CONSONANT, //    GREEK LETTER KOPPA	03DE
5045 CH_PROP_CONSONANT, //    GREEK SMALL LETTER KOPPA	03DF
5046 // 03e0
5047 CH_PROP_CONSONANT, //    GREEK LETTER SAMPI	03E0
5048 CH_PROP_CONSONANT, //    GREEK SMALL LETTER SAMPI	03E1
5049 // 03e2
5050     0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5051 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5052 // 0x0400:
5053 0,  // 0400
5054 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0401 cyrillic E:
5055 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0402 cyrillic Dje
5056 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0403 cyrillic Gje
5057 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0404 cyrillic ukr Ie
5058 CH_PROP_UPPER | CH_PROP_CONSONANT,  // 0405 cyrillic Dze
5059 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0406 cyrillic ukr I
5060 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0407 cyrillic ukr I:
5061 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0408 cyrillic J
5062 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0409 cyrillic L'
5063 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 040A cyrillic N'
5064 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 040B cyrillic Th
5065 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 040C cyrillic K'
5066 0,      // 040D cyrillic
5067 CH_PROP_UPPER | CH_PROP_VOWEL,      // 040E cyrillic Yu
5068 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 040F cyrillic Dzhe
5069 // 0x0410:
5070 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0410 cyrillic A
5071 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0411 cyrillic B
5072 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0412 cyrillic V
5073 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0413 cyrillic G
5074 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0414 cyrillic D
5075 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0415 cyrillic E
5076 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0416 cyrillic Zh
5077 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0417 cyrillic Z
5078 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0418 cyrillic I
5079 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0419 cyrillic YI
5080 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 041A cyrillic K
5081 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 041B cyrillic L
5082 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 041C cyrillic M
5083 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 041D cyrillic N
5084 CH_PROP_UPPER | CH_PROP_VOWEL,      // 041E cyrillic O
5085 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 041F cyrillic P
5086 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0420 cyrillic R
5087 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0421 cyrillic S
5088 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0422 cyrillic T
5089 CH_PROP_UPPER | CH_PROP_VOWEL,      // 0423 cyrillic U
5090 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0424 cyrillic F
5091 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0425 cyrillic H
5092 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0426 cyrillic C
5093 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0427 cyrillic Ch
5094 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0428 cyrillic Sh
5095 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0429 cyrillic Sch
5096 CH_PROP_UPPER | CH_PROP_ALPHA_SIGN,      // 042A cyrillic Hard sign
5097 CH_PROP_UPPER | CH_PROP_VOWEL,      // 042B cyrillic Y
5098 CH_PROP_UPPER | CH_PROP_ALPHA_SIGN,      // 042C cyrillic Soft sign
5099 CH_PROP_UPPER | CH_PROP_VOWEL,      // 042D cyrillic EE
5100 CH_PROP_UPPER | CH_PROP_VOWEL,      // 042E cyrillic Yu
5101 CH_PROP_UPPER | CH_PROP_VOWEL,      // 042F cyrillic Ya
5102 // 0x0430:
5103 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0430 cyrillic A
5104 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0431 cyrillic B
5105 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0432 cyrillic V
5106 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0433 cyrillic G
5107 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0434 cyrillic D
5108 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0435 cyrillic E
5109 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0436 cyrillic Zh
5110 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0437 cyrillic Z
5111 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0438 cyrillic I
5112 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0439 cyrillic YI
5113 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 043A cyrillic K
5114 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 043B cyrillic L
5115 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 043C cyrillic M
5116 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 043D cyrillic N
5117 CH_PROP_LOWER | CH_PROP_VOWEL,      // 043E cyrillic O
5118 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 043F cyrillic P
5119 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0440 cyrillic R
5120 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0441 cyrillic S
5121 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0442 cyrillic T
5122 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0443 cyrillic U
5123 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0444 cyrillic F
5124 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0445 cyrillic H
5125 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0446 cyrillic C
5126 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0447 cyrillic Ch
5127 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0448 cyrillic Sh
5128 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0449 cyrillic Sch
5129 CH_PROP_LOWER | CH_PROP_ALPHA_SIGN,     // 044A cyrillic Hard sign
5130 CH_PROP_LOWER | CH_PROP_VOWEL,      // 044B cyrillic Y
5131 CH_PROP_LOWER | CH_PROP_ALPHA_SIGN,     // 044C cyrillic Soft sign
5132 CH_PROP_LOWER | CH_PROP_VOWEL,      // 044D cyrillic EE
5133 CH_PROP_LOWER | CH_PROP_VOWEL,      // 044E cyrillic Yu
5134 CH_PROP_LOWER | CH_PROP_VOWEL,      // 044F cyrillic Ya
5135 0,      // 0450 cyrillic
5136 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0451 cyrillic e:
5137 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0452 cyrillic Dje
5138 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0453 cyrillic Gje
5139 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0454 cyrillic ukr Ie
5140 CH_PROP_LOWER | CH_PROP_CONSONANT,  // 0455 cyrillic Dze
5141 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0456 cyrillic ukr I
5142 CH_PROP_LOWER | CH_PROP_VOWEL,      // 0457 cyrillic ukr I:
5143 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0458 cyrillic J
5144 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0459 cyrillic L'
5145 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 045A cyrillic N'
5146 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 045B cyrillic Th
5147 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 045C cyrillic K'
5148 0,      // 045D cyrillic
5149 CH_PROP_LOWER | CH_PROP_VOWEL,      // 045E cyrillic Yu
5150 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 045F cyrillic Dzhe
5151 // 0x0460:
5152 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5153 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5154 // 0x0490:
5155 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0490 cyrillic G'
5156 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0491 cyrillic g'
5157 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0492 cyrillic G-
5158 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0493 cyrillic g-
5159 0,      // 0494 cyrillic
5160 0,      // 0495 cyrillic
5161 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 0496 cyrillic Zh,
5162 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 0497 cyrillic zh,
5163 0,      // 0498 cyrillic
5164 0,      // 0499 cyrillic
5165 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 049A cyrillic K,
5166 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 049B cyrillic k,
5167 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 049C cyrillic K|
5168 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 049D cyrillic k|
5169 0,      // 049E cyrillic
5170 0,      // 049F cyrillic
5171 0,      // 04A0 cyrillic
5172 0,      // 04A1 cyrillic
5173 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 04A2 cyrillic H,
5174 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 04A3 cyrillic n,
5175 0,      // 04A4 cyrillic
5176 0,      // 04A5 cyrillic
5177 0,      // 04A6 cyrillic
5178 0,      // 04A7 cyrillic
5179 0,      // 04A8 cyrillic
5180 0,      // 04A9 cyrillic
5181 0,      // 04AA cyrillic
5182 0,      // 04AB cyrillic
5183 0,      // 04AC cyrillic
5184 0,      // 04AD cyrillic
5185 CH_PROP_UPPER | CH_PROP_VOWEL,      // 04AE cyrillic Y
5186 CH_PROP_LOWER | CH_PROP_VOWEL,      // 04AF cyrillic y
5187 CH_PROP_UPPER | CH_PROP_VOWEL,      // 04B0 cyrillic Y-
5188 CH_PROP_LOWER | CH_PROP_VOWEL,      // 04B1 cyrillic y-
5189 CH_PROP_UPPER | CH_PROP_CONSONANT,      // 04B2 cyrillic X,
5190 CH_PROP_LOWER | CH_PROP_CONSONANT,      // 04B3 cyrillic x,
5191 };
5192 
5193 
5194 static lUInt16 char_props_1f00[] = {
5195 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI 1F00
5196 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA 1F01
5197 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA 1F02
5198 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA 1F03
5199 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA 1F04
5200 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA 1F05
5201 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F06
5202 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F07
5203 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI 1F08
5204 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA 1F09
5205 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 1F0A
5206 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 1F0B
5207 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 1F0C
5208 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 1F0D
5209 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F0E
5210 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F0F
5211 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH PSILI 1F10
5212 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH DASIA 1F11
5213 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH PSILI AND VARIA 1F12
5214 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH DASIA AND VARIA 1F13
5215 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA 1F14
5216 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F15
5217 0, 0,
5218 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH PSILI 1F18
5219 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH DASIA 1F19
5220 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 1F1A
5221 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 1F1B
5222 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 1F1C
5223 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F1D
5224 0, 0,
5225 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI 1F20
5226 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA 1F21
5227 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND VARIA 1F22
5228 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND VARIA 1F23
5229 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND OXIA 1F24
5230 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND OXIA 1F25
5231 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI 1F26
5232 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI 1F27
5233 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI 1F28
5234 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA 1F29
5235 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 1F2A
5236 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 1F2B
5237 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 1F2C
5238 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 1F2D
5239 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 1F2E
5240 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 1F2F
5241 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI 1F30
5242 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA 1F31
5243 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI AND VARIA 1F32
5244 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA AND VARIA 1F33
5245 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI AND OXIA 1F34
5246 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA AND OXIA 1F35
5247 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI AND PERISPOMENI 1F36
5248 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI 1F37
5249 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI 1F38
5250 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA 1F39
5251 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 1F3A
5252 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 1F3B
5253 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 1F3C
5254 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 1F3D
5255 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 1F3E
5256 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 1F3F
5257 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH PSILI 1F40
5258 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH DASIA 1F41
5259 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH PSILI AND VARIA 1F42
5260 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH DASIA AND VARIA 1F43
5261 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH PSILI AND OXIA 1F44
5262 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 1F45
5263 0, 0,
5264 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH PSILI 1F48
5265 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH DASIA 1F49
5266 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 1F4A
5267 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 1F4B
5268 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 1F4C
5269 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F4D
5270 0, 0,
5271 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI 1F50
5272 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA 1F51
5273 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 1F52
5274 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA AND VARIA 1F53
5275 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 1F54
5276 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA 1F55
5277 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 1F56
5278 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F57
5279 0,
5280 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA 1F59
5281 0,
5282 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5B
5283 0,
5284 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5D
5285 0,
5286 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F5F
5287 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI 1F60
5288 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA 1F61
5289 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA 1F62
5290 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA 1F63
5291 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA 1F64
5292 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA 1F65
5293 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F66
5294 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F67
5295 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI 1F68
5296 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI 1F69
5297 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI 1F6A
5298 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 1F6B
5299 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 1F6C
5300 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 1F6D
5301 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F6E
5302 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F6F
5303 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH VARIA 1F70
5304 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH OXIA 1F71
5305 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH VARIA 1F72
5306 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH OXIA 1F73
5307 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH VARIA 1F74
5308 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH OXIA 1F75
5309 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH VARIA 1F76
5310 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH OXIA 1F77
5311 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH VARIA 1F78
5312 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH OXIA 1F79
5313 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH VARIA 1F7A
5314 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH OXIA 1F7B
5315 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH VARIA 1F7C
5316 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH OXIA 1F7D
5317 0, 0,
5318 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 1F80
5319 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 1F81
5320 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F82
5321 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F83
5322 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F84
5323 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F85
5324 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F86
5325 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F87
5326 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F88
5327 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F89
5328 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8A
5329 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8B
5330 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8C
5331 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8D
5332 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8E
5333 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F8F
5334 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 1F90
5335 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 1F91
5336 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F92
5337 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F93
5338 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F94
5339 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F95
5340 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F96
5341 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F97
5342 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F98
5343 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F99
5344 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9A
5345 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9B
5346 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9C
5347 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9D
5348 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9E
5349 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F9F
5350 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 1FA0
5351 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 1FA1
5352 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1FA2
5353 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1FA3
5354 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1FA4
5355 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1FA5
5356 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1FA6
5357 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1FA7
5358 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA8
5359 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FA9
5360 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAA
5361 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAB
5362 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAC
5363 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAD
5364 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAE
5365 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FAF
5366 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH VRACHY 1FB0
5367 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH MACRON 1FB1
5368 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 1FB2
5369 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 1FB3
5370 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB4
5371 0,
5372 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI 1FB6
5373 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 1FB7
5374 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH VRACHY 1FB8
5375 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH MACRON 1FB9
5376 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH VARIA 1FBA
5377 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH OXIA 1FBB
5378 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBC
5379 0, 0, 0,
5380 0, 0,
5381 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 1FC2
5382 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 1FC3
5383 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC4
5384 0,
5385 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PERISPOMENI 1FC6
5386 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 1FC7
5387 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH VARIA 1FC8
5388 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH OXIA 1FC9
5389 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH VARIA 1FCA
5390 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH OXIA 1FCB
5391 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FCC
5392 0, 0, 0,
5393 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH VRACHY 1FD0
5394 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH MACRON 1FD1
5395 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD2
5396 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD3
5397 0, 0,
5398 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD6
5399 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD7
5400 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH VRACHY 1FD8
5401 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH MACRON 1FD9
5402 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH VARIA 1FDA
5403 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH OXIA 1FDB
5404 0, 0, 0, 0,
5405 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH VRACHY 1FE0
5406 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH MACRON 1FE1
5407 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE2
5408 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE3
5409 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER RHO WITH PSILI 1FE4
5410 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER RHO WITH DASIA 1FE5
5411 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE6
5412 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE7
5413 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH VRACHY 1FE8
5414 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH MACRON 1FE9
5415 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH VARIA 1FEA
5416 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH OXIA 1FEB
5417 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER RHO WITH DASIA 1FEC
5418 0, 0, 0,
5419 0, 0,
5420 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 1FF2
5421 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 1FF3
5422 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF4
5423 0,
5424 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF6
5425 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 1FF7
5426 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH VARIA 1FF8
5427 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH OXIA 1FF9
5428 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH VARIA 1FFA
5429 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH OXIA 1FFB
5430 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 1FFC
5431 0, 0, 0
5432 };
5433 
getCharProp(lChar32 ch)5434 inline lUInt16 getCharProp(lChar32 ch) {
5435     static const lChar32 maxchar = sizeof(char_props) / sizeof( lUInt16 );
5436     if (ch<maxchar)
5437         return char_props[ch];
5438     else if ((ch>>8) == 0x1F)
5439         return char_props_1f00[ch & 255];
5440     else if (ch>=0x2012 && ch<=0x2015)
5441         return CH_PROP_DASH|CH_PROP_SIGN;
5442     else if (ch==0x201C) // left double quotation mark
5443         return CH_PROP_AVOID_WRAP_AFTER;
5444     else if (ch==0x201D) // right double quotation mark
5445         return CH_PROP_AVOID_WRAP_BEFORE;
5446     else if (ch>=UNICODE_CJK_IDEOGRAPHS_BEGIN && ch<=UNICODE_CJK_IDEOGRAPHS_END&&(ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN||
5447                                                                                   ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END))
5448         return CH_PROP_CJK;
5449     else if ((ch>=UNICODE_CJK_PUNCTUATION_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_END) ||
5450              (ch>=UNICODE_GENERAL_PUNCTUATION_BEGIN && ch<=UNICODE_GENERAL_PUNCTUATION_END) ||
5451              (ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END))
5452         return CH_PROP_PUNCT;
5453     return 0;
5454 }
5455 
lStr_getCharProps(const lChar32 * str,int sz,lUInt16 * props)5456 void lStr_getCharProps( const lChar32 * str, int sz, lUInt16 * props )
5457 {
5458     for ( int i=0; i<sz; i++ ) {
5459         lChar32 ch = str[i];
5460         props[i] = getCharProp(ch);
5461     }
5462 }
5463 
lStr_isWordSeparator(lChar32 ch)5464 bool lStr_isWordSeparator( lChar32 ch )
5465 {
5466     // ASCII letters and digits are NOT word separators
5467     if (ch >= 0x61 && ch <= 0x7A) return false; // lowercase ascii letters
5468     if (ch >= 0x41 && ch <= 0x5A) return false; // uppercase ascii letters
5469     if (ch >= 0x30 && ch <= 0x39) return false; // digits
5470     if (ch == 0xAD ) return false; // soft-hyphen, considered now as part of word
5471     // All other below 0xC0 are word separators:
5472     //   < 0x30 space, !"#$%&'()*+,-./
5473     //   < 0x41 :;<=>?@
5474     //   < 0x61 [\]^_`
5475     //   < 0xC0 {|}~ and control characters and other signs
5476     if (ch < 0xC0 ) return true;
5477     // 0xC0 to 0xFF, except 0xD7 and 0xF7, are latin accentuated letters.
5478     // Above 0xFF are other alphabets. Let's consider all above 0xC0 unicode
5479     // characters as letters, except the adequately named PUNCTUATION ranges.
5480     // There may be exceptions in some alphabets, that we can individually
5481     // add here :
5482     if (ch == 0xD7 ) return true;  // multiplication sign
5483     if (ch == 0xF7 ) return true;  // division sign
5484     // this one includes em-dash & friends, and other quotation marks
5485     if (ch>=UNICODE_GENERAL_PUNCTUATION_BEGIN && ch<=UNICODE_GENERAL_PUNCTUATION_END) return true;
5486     // CJK puncutation
5487     if (ch>=UNICODE_CJK_PUNCTUATION_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_END) return true;
5488     if (ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END) return true;
5489     // Some others(from https://www.cs.tut.fi/~jkorpela/chars/spaces.html)
5490     if (ch == 0x1680 ) return true;  // OGHAM SPACE MARK
5491     if (ch == 0x180E ) return true;  // MONGOLIAN VOWEL SEPARATOR
5492     if (ch == 0xFEFF ) return true;  // ZERO WIDTH NO-BREAK SPACE
5493     // All others are considered part of a word, thus not word separators
5494     return false;
5495 }
5496 
5497 /// find alpha sequence bounds
lStr_findWordBounds(const lChar32 * str,int sz,int pos,int & start,int & end)5498 void lStr_findWordBounds( const lChar32 * str, int sz, int pos, int & start, int & end )
5499 {
5500     int hwStart, hwEnd;
5501 
5502     // 20180615: don't split anymore on UNICODE_SOFT_HYPHEN_CODE, consider
5503     // it like an alpha char of zero width not drawn.
5504     // Only hyphenation code will care about it
5505     // We don't use lStr_isWordSeparator() here, but we exclusively look
5506     // for ALPHA chars or soft-hyphens, as this function is and should
5507     // only be used before calling hyphenate() to find a real word to
5508     // give to the hyphenation algorithms.
5509 
5510 //    // skip spaces
5511 //    for (hwStart=pos-1; hwStart>0; hwStart--)
5512 //    {
5513 //        lChar32 ch = str[hwStart];
5514 //        if ( ch<(int)maxchar ) {
5515 //            lUInt16 props = char_props[ch];
5516 //            if ( !(props & CH_PROP_SPACE) )
5517 //                break;
5518 //        }
5519 //    }
5520 //    // skip punctuation signs and digits
5521 //    for (; hwStart>0; hwStart--)
5522 //    {
5523 //        lChar32 ch = str[hwStart];
5524 //        if ( ch<(int)maxchar ) {
5525 //            lUInt16 props = char_props[ch];
5526 //            if ( !(props & (CH_PROP_PUNCT|CH_PROP_DIGIT)) )
5527 //                break;
5528 //        }
5529 //    }
5530     // skip until first alpha
5531     for (hwStart = pos-1; hwStart > 0; hwStart--)
5532     {
5533         lChar32 ch = str[hwStart];
5534         lUInt16 props = getCharProp(ch);
5535         if ( props & CH_PROP_ALPHA || props & CH_PROP_HYPHEN )
5536             break;
5537     }
5538     if ( hwStart<0 ) {
5539         // no alphas found
5540         start = end = pos;
5541         return;
5542     }
5543     hwEnd = hwStart+1;
5544     // skipping while alpha
5545     for (; hwStart>0; hwStart--)
5546     {
5547         lChar32 ch = str[hwStart];
5548         //int lastAlpha = -1;
5549         if ( getCharProp(ch) & CH_PROP_ALPHA || getCharProp(ch) & CH_PROP_HYPHEN ) {
5550             //lastAlpha = hwStart;
5551         } else {
5552             hwStart++;
5553             break;
5554         }
5555     }
5556 //    if ( lastAlpha<0 ) {
5557 //        // no alphas found
5558 //        start = end = pos;
5559 //        return;
5560 //    }
5561     for (hwEnd=hwStart+1; hwEnd<sz; hwEnd++) // 20080404
5562     {
5563         lChar32 ch = str[hwEnd];
5564         if (!(getCharProp(ch) & CH_PROP_ALPHA) && !(getCharProp(ch) & CH_PROP_HYPHEN))
5565             break;
5566         ch = str[hwEnd-1];
5567         if ( ch==' ' ) // || ch==UNICODE_SOFT_HYPHEN_CODE) )
5568             break;
5569     }
5570     start = hwStart;
5571     end = hwEnd;
5572     //CRLog::debug("Word bounds: '%s'", LCSTR(lString32(str+start, end-start)));
5573 }
5574 
limit(size_type sz)5575 void  lString16::limit( size_type sz )
5576 {
5577     if ( length() > sz ) {
5578         modify();
5579         pchunk->len = sz;
5580         pchunk->buf16[sz] = 0;
5581     }
5582 }
5583 
limit(size_type sz)5584 void  lString32::limit( size_type sz )
5585 {
5586     if ( length() > sz ) {
5587         modify();
5588         pchunk->len = sz;
5589         pchunk->buf32[sz] = 0;
5590     }
5591 }
5592 
lGetCharProps(lChar32 ch)5593 lUInt16 lGetCharProps( lChar32 ch )
5594 {
5595     return getCharProp(ch);
5596 }
5597 
5598 
5599 /// returns true if string starts with specified substring, case insensitive
startsWithNoCase(const lString32 & substring) const5600 bool lString32::startsWithNoCase ( const lString32 & substring ) const
5601 {
5602     lString32 a = *this;
5603     lString32 b = substring;
5604     a.uppercase();
5605     b.uppercase();
5606     return a.startsWith( b );
5607 }
5608 
5609 /// returns true if string starts with specified substring
startsWith(const char * substring) const5610 bool lString8::startsWith( const char * substring ) const
5611 {
5612     if (!substring || !substring[0])
5613         return true;
5614     int len = (int)strlen(substring);
5615     if (length() < len)
5616         return false;
5617     const lChar8 * s1 = c_str();
5618     const lChar8 * s2 = substring;
5619     for (int i=0; i<len; i++ )
5620         if ( s1[i] != s2[i] )
5621             return false;
5622     return true;
5623 }
5624 
5625 /// returns true if string starts with specified substring
startsWith(const lString8 & substring) const5626 bool lString8::startsWith( const lString8 & substring ) const
5627 {
5628     if ( substring.empty() )
5629         return true;
5630     int len = substring.length();
5631     if (length() < len)
5632         return false;
5633     const lChar8 * s1 = c_str();
5634     const lChar8 * s2 = substring.c_str();
5635     for (int i=0; i<len; i++ )
5636         if ( s1[i] != s2[i] )
5637             return false;
5638     return true;
5639 }
5640 
5641 /// returns true if string ends with specified substring
endsWith(const lChar8 * substring) const5642 bool lString8::endsWith( const lChar8 * substring ) const
5643 {
5644     if ( !substring || !*substring )
5645         return true;
5646     int len = (int)strlen(substring);
5647     if ( length() < len )
5648         return false;
5649     const lChar8 * s1 = c_str() + (length()-len);
5650     const lChar8 * s2 = substring;
5651     return lStr_cmp( s1, s2 )==0;
5652 }
5653 
5654 /// returns true if string ends with specified substring
endsWith(const lChar16 * substring) const5655 bool lString16::endsWith( const lChar16 * substring ) const
5656 {
5657     if ( !substring || !*substring )
5658         return true;
5659     int len = lStr_len(substring);
5660     if ( length() < len )
5661         return false;
5662     const lChar16 * s1 = c_str() + (length()-len);
5663     const lChar16 * s2 = substring;
5664     return lStr_cmp( s1, s2 )==0;
5665 }
5666 
5667 /// returns true if string ends with specified substring
endsWith(const lChar8 * substring) const5668 bool lString16::endsWith( const lChar8 * substring ) const
5669 {
5670     if ( !substring || !*substring )
5671         return true;
5672     int len = lStr_len(substring);
5673     if ( length() < len )
5674         return false;
5675     const lChar16 * s1 = c_str() + (length()-len);
5676     const lChar8 * s2 = substring;
5677     return lStr_cmp( s1, s2 )==0;
5678 }
5679 
5680 /// returns true if string ends with specified substring
endsWith(const lString16 & substring) const5681 bool lString16::endsWith ( const lString16 & substring ) const
5682 {
5683     if ( substring.empty() )
5684         return true;
5685     int len = substring.length();
5686     if ( length() < len )
5687         return false;
5688     const lChar16 * s1 = c_str() + (length()-len);
5689     const lChar16 * s2 = substring.c_str();
5690     return lStr_cmp( s1, s2 )==0;
5691 }
5692 
5693 /// returns true if string starts with specified substring
startsWith(const lString16 & substring) const5694 bool lString16::startsWith( const lString16 & substring ) const
5695 {
5696     if ( substring.empty() )
5697         return true;
5698     int len = substring.length();
5699     if ( length() < len )
5700         return false;
5701     const lChar16 * s1 = c_str();
5702     const lChar16 * s2 = substring.c_str();
5703     for ( int i=0; i<len; i++ )
5704         if ( s1[i]!=s2[i] )
5705             return false;
5706     return true;
5707 }
5708 
5709 /// returns true if string starts with specified substring
startsWith(const lChar16 * substring) const5710 bool lString16::startsWith(const lChar16 * substring) const
5711 {
5712     if (!substring || !substring[0])
5713         return true;
5714     int len = _lStr_len(substring);
5715     if ( length() < len )
5716         return false;
5717     const lChar16 * s1 = c_str();
5718     const lChar16 * s2 = substring;
5719     for ( int i=0; i<len; i++ )
5720         if ( s1[i] != s2[i] )
5721             return false;
5722     return true;
5723 }
5724 
5725 /// returns true if string starts with specified substring
startsWith(const lChar8 * substring) const5726 bool lString16::startsWith(const lChar8 * substring) const
5727 {
5728     if (!substring || !substring[0])
5729         return true;
5730     int len = _lStr_len(substring);
5731     if ( length() < len )
5732         return false;
5733     const lChar16 * s1 = c_str();
5734     const lChar8 * s2 = substring;
5735     for ( int i=0; i<len; i++ )
5736         if (s1[i] != s2[i])
5737             return false;
5738     return true;
5739 }
5740 
5741 /// returns true if string ends with specified substring
endsWith(const lChar32 * substring) const5742 bool lString32::endsWith( const lChar32 * substring ) const
5743 {
5744     if ( !substring || !*substring )
5745         return true;
5746     int len = lStr_len(substring);
5747     if ( length() < len )
5748         return false;
5749     const lChar32 * s1 = c_str() + (length()-len);
5750     const lChar32 * s2 = substring;
5751     return lStr_cmp( s1, s2 )==0;
5752 }
5753 
5754 /// returns true if string ends with specified substring
endsWith(const lChar8 * substring) const5755 bool lString32::endsWith( const lChar8 * substring ) const
5756 {
5757     if ( !substring || !*substring )
5758         return true;
5759     int len = lStr_len(substring);
5760     if ( length() < len )
5761         return false;
5762     const lChar32 * s1 = c_str() + (length()-len);
5763     const lChar8 * s2 = substring;
5764     return lStr_cmp( s1, s2 )==0;
5765 }
5766 
5767 /// returns true if string ends with specified substring
endsWith(const lString32 & substring) const5768 bool lString32::endsWith ( const lString32 & substring ) const
5769 {
5770     if ( substring.empty() )
5771         return true;
5772     int len = substring.length();
5773     if ( length() < len )
5774         return false;
5775     const lChar32 * s1 = c_str() + (length()-len);
5776     const lChar32 * s2 = substring.c_str();
5777     return lStr_cmp( s1, s2 )==0;
5778 }
5779 
5780 /// returns true if string starts with specified substring
startsWith(const lString32 & substring) const5781 bool lString32::startsWith( const lString32 & substring ) const
5782 {
5783     if ( substring.empty() )
5784         return true;
5785     int len = substring.length();
5786     if ( length() < len )
5787         return false;
5788     const lChar32 * s1 = c_str();
5789     const lChar32 * s2 = substring.c_str();
5790     for ( int i=0; i<len; i++ )
5791         if ( s1[i]!=s2[i] )
5792             return false;
5793     return true;
5794 }
5795 
5796 /// returns true if string starts with specified substring
startsWith(const lChar32 * substring) const5797 bool lString32::startsWith(const lChar32 * substring) const
5798 {
5799     if (!substring || !substring[0])
5800         return true;
5801     int len = _lStr_len(substring);
5802     if ( length() < len )
5803         return false;
5804     const lChar32 * s1 = c_str();
5805     const lChar32 * s2 = substring;
5806     for ( int i=0; i<len; i++ )
5807         if ( s1[i] != s2[i] )
5808             return false;
5809     return true;
5810 }
5811 
5812 /// returns true if string starts with specified substring
startsWith(const lChar8 * substring) const5813 bool lString32::startsWith(const lChar8 * substring) const
5814 {
5815     if (!substring || !substring[0])
5816         return true;
5817     int len = _lStr_len(substring);
5818     if ( length() < len )
5819         return false;
5820     const lChar32 * s1 = c_str();
5821     const lChar8 * s2 = substring;
5822     for ( int i=0; i<len; i++ )
5823         if (s1[i] != s2[i])
5824             return false;
5825     return true;
5826 }
5827 
split2(const lString32 & delim,lString32 & value1,lString32 & value2)5828 bool lString32::split2( const lString32 & delim, lString32 & value1, lString32 & value2 )
5829 {
5830     if ( empty() )
5831         return false;
5832     int p = pos(delim);
5833     if ( p<=0 || p>=length()-delim.length() )
5834         return false;
5835     value1 = substr(0, p);
5836     value2 = substr(p+delim.length());
5837     return true;
5838 }
5839 
split2(const lChar32 * delim,lString32 & value1,lString32 & value2)5840 bool lString32::split2( const lChar32 * delim, lString32 & value1, lString32 & value2 )
5841 {
5842     if (empty())
5843         return false;
5844     int p = pos(delim);
5845     int l = lStr_len(delim);
5846     if (p<=0 || p >= length() - l)
5847         return false;
5848     value1 = substr(0, p);
5849     value2 = substr(p + l);
5850     return true;
5851 }
5852 
split2(const lChar8 * delim,lString32 & value1,lString32 & value2)5853 bool lString32::split2( const lChar8 * delim, lString32 & value1, lString32 & value2 )
5854 {
5855     if (empty())
5856         return false;
5857     int p = pos(delim);
5858     int l = lStr_len(delim);
5859     if (p<=0 || p >= length() - l)
5860         return false;
5861     value1 = substr(0, p);
5862     value2 = substr(p + l);
5863     return true;
5864 }
5865 
splitIntegerList(lString32 s,lString32 delim,int & value1,int & value2)5866 bool splitIntegerList( lString32 s, lString32 delim, int &value1, int &value2 )
5867 {
5868     if ( s.empty() )
5869         return false;
5870     lString32 s1, s2;
5871     if ( !s.split2( delim, s1, s2 ) )
5872         return false;
5873     int n1, n2;
5874     if ( !s1.atoi(n1) )
5875         return false;
5876     if ( !s2.atoi(n2) )
5877         return false;
5878     value1 = n1;
5879     value2 = n2;
5880     return true;
5881 }
5882 
replace(size_type p0,size_type n0,const lString8 & str)5883 lString8 & lString8::replace(size_type p0, size_type n0, const lString8 & str) {
5884     lString8 s1 = substr( 0, p0 );
5885     lString8 s2 = length() - p0 - n0 > 0 ? substr( p0+n0, length()-p0-n0 ) : lString8::empty_str;
5886     *this = s1 + str + s2;
5887     return *this;
5888 }
5889 
replace(size_type p0,size_type n0,const lString32 & str)5890 lString32 & lString32::replace(size_type p0, size_type n0, const lString32 & str)
5891 {
5892     lString32 s1 = substr( 0, p0 );
5893     lString32 s2 = length() - p0 - n0 > 0 ? substr( p0+n0, length()-p0-n0 ) : lString32::empty_str;
5894     *this = s1 + str + s2;
5895     return *this;
5896 }
5897 
5898 /// replaces part of string, if pattern is found
replace(const lString32 & findStr,const lString32 & replaceStr)5899 bool lString32::replace(const lString32 & findStr, const lString32 & replaceStr)
5900 {
5901     int p = pos(findStr);
5902     if ( p<0 )
5903         return false;
5904     *this = replace( p, findStr.length(), replaceStr );
5905     return true;
5906 }
5907 
replaceParam(int index,const lString32 & replaceStr)5908 bool lString32::replaceParam(int index, const lString32 & replaceStr)
5909 {
5910     return replace( cs32("$") + fmt::decimal(index), replaceStr );
5911 }
5912 
5913 /// replaces first found occurence of "$N" pattern with itoa of integer, where N=index
replaceIntParam(int index,int replaceNumber)5914 bool lString32::replaceIntParam(int index, int replaceNumber)
5915 {
5916     return replaceParam( index, lString32::itoa(replaceNumber));
5917 }
5918 
decodeHex(lChar32 ch)5919 static int decodeHex( lChar32 ch )
5920 {
5921     if ( ch>='0' && ch<='9' )
5922         return ch-'0';
5923     else if ( ch>='a' && ch<='f' )
5924         return ch-'a'+10;
5925     else if ( ch>='A' && ch<='F' )
5926         return ch-'A'+10;
5927     return -1;
5928 }
5929 
decodeHTMLChar(const lChar32 * s)5930 static lChar8 decodeHTMLChar( const lChar32 * s )
5931 {
5932     if (s[0] == '%') {
5933         int d1 = decodeHex( s[1] );
5934         if (d1 >= 0) {
5935             int d2 = decodeHex( s[2] );
5936             if (d2 >= 0) {
5937                 return (lChar8)(d1*16 + d2);
5938             }
5939         }
5940     }
5941     return 0;
5942 }
5943 
5944 /// decodes path like "file%20name%C3%A7" to "file nameç"
DecodeHTMLUrlString(lString32 s)5945 lString32 DecodeHTMLUrlString( lString32 s )
5946 {
5947     const lChar32 * str = s.c_str();
5948     for ( int i=0; str[i]; i++ ) {
5949         if ( str[i]=='%'  ) {
5950             lChar8 ch = decodeHTMLChar( str + i );
5951             if ( ch==0 ) {
5952                 continue;
5953             }
5954             // HTML encoded char found
5955             lString8 res;
5956             res.reserve(s.length());
5957             res.append(UnicodeToUtf8(str, i));
5958             res.append(1, ch);
5959             i+=3;
5960 
5961             // continue conversion
5962             for ( ; str[i]; i++ ) {
5963                 if ( str[i]=='%'  ) {
5964                     ch = decodeHTMLChar( str + i );
5965                     if ( ch==0 ) {
5966                         res.append(1, (lChar8)str[i]);
5967                         continue;
5968                     }
5969                     res.append(1, ch);
5970                     i+=2;
5971                 } else {
5972                     res.append(1, (lChar8)str[i]);
5973                 }
5974             }
5975             return Utf8ToUnicode(res);
5976         }
5977     }
5978     return s;
5979 }
5980 
limitStringSize(lString32 & str,int maxSize)5981 void limitStringSize(lString32 & str, int maxSize) {
5982     if (str.length() < maxSize)
5983         return;
5984     int lastSpace = -1;
5985     for (int i = str.length() - 1; i > 0; i--)
5986         if (str[i] == ' ') {
5987             while (i > 0 && str[i - 1] == ' ')
5988                 i--;
5989             lastSpace = i;
5990             break;
5991         }
5992     int split = lastSpace > 0 ? lastSpace : maxSize;
5993     str = str.substr(0, split);
5994     str += "...";
5995 }
5996 
5997 /// remove soft-hyphens from string
removeSoftHyphens(lString32 s)5998 lString32 removeSoftHyphens( lString32 s )
5999 {
6000     lChar32 hyphen = lChar32(UNICODE_SOFT_HYPHEN_CODE);
6001     int start = 0;
6002     while (true) {
6003         int p = -1;
6004         int len = s.length();
6005         for (int i = start; i < len; i++) {
6006             if (s[i] == hyphen) {
6007                 p = i;
6008                 break;
6009             }
6010         }
6011         if (p == -1)
6012             break;
6013         start = p;
6014         lString32 s1 = s.substr( 0, p );
6015         lString32 s2 = p < len-1 ? s.substr( p+1, len-p-1 ) : lString32::empty_str;
6016         s = s1 + s2;
6017     }
6018     return s;
6019 }
6020