1 /*******************************************************
2
3 CoolReader Engine
4
5 lvstring.cpp: string classes implementation
6
7 (c) Vadim Lopatin, 2000-2006
8 This source code is distributed under the terms of
9 GNU General Public License
10 See LICENSE file for details
11
12 *******************************************************/
13
14 #include "../include/lvstring.h"
15
16 #include <stdlib.h>
17 #include <assert.h>
18 #include <string.h>
19 #include <stdio.h>
20 #include <stddef.h>
21 #include <stdarg.h>
22 #include <stddef.h>
23 #include <time.h>
24
25 #if !defined(__SYMBIAN32__) && defined(_WIN32)
26 extern "C" {
27 #include <windows.h>
28 }
29 #endif
30
31 #if (USE_ZLIB==1)
32 #include <zlib.h>
33 #endif
34
35 #if (USE_UTF8PROC==1)
36 #include <utf8proc.h>
37 #endif
38
39 #if !defined(__SYMBIAN32__) && defined(_WIN32)
40 extern "C" {
41 #include <windows.h>
42 }
43 #endif
44
45 #define LS_DEBUG_CHECK
46
47 // set to 1 to enable debugging
48 #define DEBUG_STATIC_STRING_ALLOC 0
49
50
51 static lChar8 empty_str_8[] = {0};
52 static lstring8_chunk_t empty_chunk_8(empty_str_8);
53 lstring8_chunk_t * lString8::EMPTY_STR_8 = &empty_chunk_8;
54
55 static lChar16 empty_str_16[] = {0};
56 static lstring16_chunk_t empty_chunk_16(empty_str_16);
57 lstring16_chunk_t * lString16::EMPTY_STR_16 = &empty_chunk_16;
58
59 static lChar32 empty_str_32[] = {0};
60 static lstring32_chunk_t empty_chunk_32(empty_str_32);
61 lstring32_chunk_t * lString32::EMPTY_STR_32 = &empty_chunk_32;
62
63 //================================================================================
64 // atomic string storages for string literals
65 //================================================================================
66
67 static const void * const_ptrs_8[CONST_STRING_BUFFER_SIZE] = {NULL};
68 static lString8 values_8[CONST_STRING_BUFFER_SIZE];
69 static int size_8 = 0;
70
71 /// get reference to atomic constant string for string literal e.g. cs8("abc") -- fast and memory effective
cs8(const char * str)72 const lString8 & cs8(const char * str) {
73 int index = (int)(((ptrdiff_t)str * CONST_STRING_BUFFER_HASH_MULT) & CONST_STRING_BUFFER_MASK);
74 for (;;) {
75 const void * p = const_ptrs_8[index];
76 if (p == str) {
77 return values_8[index];
78 } else if (p == NULL) {
79 #if DEBUG_STATIC_STRING_ALLOC == 1
80 CRLog::trace("allocating static string8 %s", str);
81 #endif
82 const_ptrs_8[index] = str;
83 size_8++;
84 values_8[index] = lString8(str);
85 values_8[index].addref();
86 return values_8[index];
87 }
88 if (size_8 > CONST_STRING_BUFFER_SIZE / 4) {
89 crFatalError(-1, "out of memory for const string8");
90 }
91 index = (index + 1) & CONST_STRING_BUFFER_MASK;
92 }
93 return lString8::empty_str;
94 }
95
96 static const void * const_ptrs_32[CONST_STRING_BUFFER_SIZE] = {NULL};
97 static lString32 values_32[CONST_STRING_BUFFER_SIZE];
98 static int size_32 = 0;
99
100 /// get reference to atomic constant wide string for string literal e.g. cs32("abc") -- fast and memory effective
cs32(const char * str)101 const lString32 & cs32(const char * str) {
102 int index = (int)(((ptrdiff_t)str * CONST_STRING_BUFFER_HASH_MULT) & CONST_STRING_BUFFER_MASK);
103 for (;;) {
104 const void * p = const_ptrs_32[index];
105 if (p == str) {
106 return values_32[index];
107 } else if (p == NULL) {
108 #if DEBUG_STATIC_STRING_ALLOC == 1
109 CRLog::trace("allocating static string32 %s", str);
110 #endif
111 const_ptrs_32[index] = str;
112 size_32++;
113 values_32[index] = lString32(str);
114 values_32[index].addref();
115 return values_32[index];
116 }
117 if (size_32 > CONST_STRING_BUFFER_SIZE / 4) {
118 crFatalError(-1, "out of memory for const string8");
119 }
120 index = (index + 1) & CONST_STRING_BUFFER_MASK;
121 }
122 return lString32::empty_str;
123 }
124
125 /// get reference to atomic constant wide string for string literal e.g. cs32(U"abc") -- fast and memory effective
cs32(const lChar32 * str)126 const lString32 & cs32(const lChar32 * str) {
127 int index = (((int)((ptrdiff_t)str)) * CONST_STRING_BUFFER_HASH_MULT) & CONST_STRING_BUFFER_MASK;
128 for (;;) {
129 const void * p = const_ptrs_32[index];
130 if (p == str) {
131 return values_32[index];
132 } else if (p == NULL) {
133 #if DEBUG_STATIC_STRING_ALLOC == 1
134 CRLog::trace("allocating static string32 %s", LCSTR(str));
135 #endif
136 const_ptrs_32[index] = str;
137 size_32++;
138 values_32[index] = lString32(str);
139 values_32[index].addref();
140 return values_32[index];
141 }
142 if (size_32 > CONST_STRING_BUFFER_SIZE / 4) {
143 crFatalError(-1, "out of memory for const string8");
144 }
145 index = (index + 1) & CONST_STRING_BUFFER_MASK;
146 }
147 return lString32::empty_str;
148 }
149
150
151
152 //================================================================================
153 // memory allocation slice
154 //================================================================================
155 struct lstring_chunk_slice_t {
156 lstring8_chunk_t * pChunks; // first chunk
157 lstring8_chunk_t * pEnd; // first free byte after last chunk
158 lstring8_chunk_t * pFree; // first free chunk
159 int used;
lstring_chunk_slice_tlstring_chunk_slice_t160 lstring_chunk_slice_t( int size )
161 {
162 pChunks = (lstring8_chunk_t *) malloc(sizeof(lstring8_chunk_t) * size);
163 pEnd = pChunks + size;
164 pFree = pChunks;
165 for (lstring8_chunk_t * p = pChunks; p<pEnd; ++p)
166 {
167 p->buf8 = (char*)(p+1);
168 p->size = 0;
169 }
170 (pEnd-1)->buf8 = NULL;
171 }
~lstring_chunk_slice_tlstring_chunk_slice_t172 ~lstring_chunk_slice_t()
173 {
174 free( pChunks );
175 }
alloc_chunklstring_chunk_slice_t176 inline lstring8_chunk_t * alloc_chunk()
177 {
178 lstring8_chunk_t * res = pFree;
179 pFree = (lstring8_chunk_t *)res->buf8;
180 return res;
181 }
alloc_chunk16lstring_chunk_slice_t182 inline lstring16_chunk_t * alloc_chunk16()
183 {
184 lstring16_chunk_t * res = (lstring16_chunk_t *)pFree;
185 pFree = (lstring8_chunk_t *)res->buf16;
186 return res;
187 }
alloc_chunk32lstring_chunk_slice_t188 inline lstring32_chunk_t * alloc_chunk32()
189 {
190 lstring32_chunk_t * res = (lstring32_chunk_t *)pFree;
191 pFree = (lstring8_chunk_t *)res->buf32;
192 return res;
193 }
free_chunklstring_chunk_slice_t194 inline bool free_chunk( lstring8_chunk_t * pChunk )
195 {
196 if (pChunk < pChunks || pChunk >= pEnd)
197 return false; // chunk does not belong to this slice
198 /*
199 #ifdef LS_DEBUG_CHECK
200 if (!pChunk->size)
201 {
202 crFatalError(); // already freed!!!
203 }
204 pChunk->size = 0;
205 #endif
206 */
207 pChunk->buf8 = (char *)pFree;
208 pFree = pChunk;
209 return true;
210 }
free_chunk16lstring_chunk_slice_t211 inline bool free_chunk16(lstring16_chunk_t * pChunk)
212 {
213 if ((lstring8_chunk_t *)pChunk < pChunks || (lstring8_chunk_t *)pChunk >= pEnd)
214 return false; // chunk does not belong to this slice
215 /*
216 #ifdef LS_DEBUG_CHECK
217 if (!pChunk->size)
218 {
219 crFatalError(); // already freed!!!
220 }
221 pChunk->size = 0;
222 #endif
223 */
224 pChunk->buf16 = (lChar16 *)pFree;
225 pFree = (lstring8_chunk_t *)pChunk;
226 return true;
227 }
free_chunk32lstring_chunk_slice_t228 inline bool free_chunk32(lstring32_chunk_t * pChunk)
229 {
230 if ((lstring8_chunk_t *)pChunk < pChunks || (lstring8_chunk_t *)pChunk >= pEnd)
231 return false; // chunk does not belong to this slice
232 /*
233 #ifdef LS_DEBUG_CHECK
234 if (!pChunk->size)
235 {
236 crFatalError(); // already freed!!!
237 }
238 pChunk->size = 0;
239 #endif
240 */
241 pChunk->buf32 = (lChar32 *)pFree;
242 pFree = (lstring8_chunk_t *)pChunk;
243 return true;
244 }
245 };
246
247 //#define FIRST_SLICE_SIZE 256
248 //#define MAX_SLICE_COUNT 20
249 #if (LDOM_USE_OWN_MEM_MAN == 1)
250 static lstring_chunk_slice_t * slices[MAX_SLICE_COUNT];
251 static int slices_count = 0;
252 static bool slices_initialized = false;
253 #endif
254
255 #if (LDOM_USE_OWN_MEM_MAN == 1)
init_ls_storage()256 static void init_ls_storage()
257 {
258 slices[0] = new lstring_chunk_slice_t( FIRST_SLICE_SIZE );
259 slices_count = 1;
260 slices_initialized = true;
261 }
262
free_ls_storage()263 void free_ls_storage()
264 {
265 if (!slices_initialized)
266 return;
267 for (int i=0; i<slices_count; i++)
268 {
269 delete slices[i];
270 }
271 slices_count = 0;
272 slices_initialized = false;
273 }
274
alloc()275 lstring8_chunk_t * lstring8_chunk_t::alloc()
276 {
277 if (!slices_initialized)
278 init_ls_storage();
279 // search for existing slice
280 for (int i=slices_count-1; i>=0; --i)
281 {
282 if (slices[i]->pFree != NULL)
283 return slices[i]->alloc_chunk();
284 }
285 // alloc new slice
286 if (slices_count >= MAX_SLICE_COUNT)
287 crFatalError();
288 lstring_chunk_slice_t * new_slice = new lstring_chunk_slice_t( FIRST_SLICE_SIZE << (slices_count+1) );
289 slices[slices_count++] = new_slice;
290 return slices[slices_count-1]->alloc_chunk();
291 }
292
free(lstring8_chunk_t * pChunk)293 void lstring8_chunk_t::free( lstring8_chunk_t * pChunk )
294 {
295 for (int i=slices_count-1; i>=0; --i)
296 {
297 if (slices[i]->free_chunk(pChunk))
298 return;
299 }
300 crFatalError(); // wrong pointer!!!
301 }
302
alloc()303 lstring16_chunk_t * lstring16_chunk_t::alloc()
304 {
305 if (!slices_initialized)
306 init_ls_storage();
307 // search for existing slice
308 for (int i=slices_count-1; i>=0; --i)
309 {
310 if (slices[i]->pFree != NULL)
311 return slices[i]->alloc_chunk16();
312 }
313 // alloc new slice
314 if (slices_count >= MAX_SLICE_COUNT)
315 crFatalError();
316 lstring_chunk_slice_t * new_slice = new lstring_chunk_slice_t( FIRST_SLICE_SIZE << (slices_count+1) );
317 slices[slices_count++] = new_slice;
318 return slices[slices_count-1]->alloc_chunk16();
319 }
320
free(lstring16_chunk_t * pChunk)321 void lstring16_chunk_t::free( lstring16_chunk_t * pChunk )
322 {
323 for (int i=slices_count-1; i>=0; --i)
324 {
325 if (slices[i]->free_chunk16(pChunk))
326 return;
327 }
328 crFatalError(); // wrong pointer!!!
329 }
330
alloc()331 lstring32_chunk_t * lstring32_chunk_t::alloc()
332 {
333 if (!slices_initialized)
334 init_ls_storage();
335 // search for existing slice
336 for (int i=slices_count-1; i>=0; --i)
337 {
338 if (slices[i]->pFree != NULL)
339 return slices[i]->alloc_chunk32();
340 }
341 // alloc new slice
342 if (slices_count >= MAX_SLICE_COUNT)
343 crFatalError();
344 lstring_chunk_slice_t * new_slice = new lstring_chunk_slice_t( FIRST_SLICE_SIZE << (slices_count+1) );
345 slices[slices_count++] = new_slice;
346 return slices[slices_count-1]->alloc_chunk32();
347 }
348
free(lstring32_chunk_t * pChunk)349 void lstring32_chunk_t::free( lstring32_chunk_t * pChunk )
350 {
351 for (int i=slices_count-1; i>=0; --i)
352 {
353 if (slices[i]->free_chunk32(pChunk))
354 return;
355 }
356 crFatalError(); // wrong pointer!!!
357 }
358 #endif // (LDOM_USE_OWN_MEM_MAN == 1)
359
360 ////////////////////////////////////////////////////////////////////////////
361 // Utility functions
362 ////////////////////////////////////////////////////////////////////////////
363
_lStr_len(const lChar16 * str)364 inline int _lStr_len(const lChar16 * str)
365 {
366 int len;
367 for (len=0; *str; str++)
368 len++;
369 return len;
370 }
371
_lStr_len(const lChar32 * str)372 inline int _lStr_len(const lChar32 * str)
373 {
374 int len;
375 for (len=0; *str; str++)
376 len++;
377 return len;
378 }
379
_lStr_len(const lChar8 * str)380 inline int _lStr_len(const lChar8 * str)
381 {
382 int len;
383 for (len=0; *str; str++)
384 len++;
385 return len;
386 }
387
_lStr_nlen(const lChar16 * str,int maxcount)388 inline int _lStr_nlen(const lChar16 * str, int maxcount)
389 {
390 int len;
391 for (len=0; len<maxcount && *str; str++)
392 len++;
393 return len;
394 }
395
_lStr_nlen(const lChar32 * str,int maxcount)396 inline int _lStr_nlen(const lChar32 * str, int maxcount)
397 {
398 int len;
399 for (len=0; len<maxcount && *str; str++)
400 len++;
401 return len;
402 }
403
_lStr_nlen(const lChar8 * str,int maxcount)404 inline int _lStr_nlen(const lChar8 * str, int maxcount)
405 {
406 int len;
407 for (len=0; len<maxcount && *str; str++)
408 len++;
409 return len;
410 }
411
_lStr_cpy(lChar16 * dst,const lChar16 * src)412 inline int _lStr_cpy(lChar16 * dst, const lChar16 * src)
413 {
414 int count;
415 for ( count=0; (*dst++ = *src++); count++ )
416 ;
417 return count;
418 }
419
_lStr_cpy(lChar32 * dst,const lChar32 * src)420 inline int _lStr_cpy(lChar32 * dst, const lChar32 * src)
421 {
422 int count;
423 for ( count=0; (*dst++ = *src++); count++ )
424 ;
425 return count;
426 }
427
_lStr_cpy(lChar8 * dst,const lChar8 * src)428 inline int _lStr_cpy(lChar8 * dst, const lChar8 * src)
429 {
430 int count;
431 for ( count=0; (*dst++ = *src++); count++ )
432 ;
433 return count;
434 }
435
_lStr_cpy(lChar16 * dst,const lChar8 * src)436 inline int _lStr_cpy(lChar16 * dst, const lChar8 * src)
437 {
438 int count;
439 for ( count=0; (*dst++ = *src++); count++ )
440 ;
441 return count;
442 }
443
_lStr_cpy(lChar32 * dst,const lChar8 * src)444 inline int _lStr_cpy(lChar32 * dst, const lChar8 * src)
445 {
446 int count;
447 for ( count=0; (*dst++ = *src++); count++ )
448 ;
449 return count;
450 }
451
_lStr_cpy(lChar8 * dst,const lChar16 * src)452 inline int _lStr_cpy(lChar8 * dst, const lChar16 * src)
453 {
454 int count;
455 for ( count=0; (*dst++ = (lChar8)*src++); count++ )
456 ;
457 return count;
458 }
459
_lStr_cpy(lChar8 * dst,const lChar32 * src)460 inline int _lStr_cpy(lChar8 * dst, const lChar32 * src)
461 {
462 int count;
463 for ( count=0; (*dst++ = (lChar8)*src++); count++ )
464 ;
465 return count;
466 }
467
_lStr_ncpy(lChar32 * dst,const lChar32 * src,int maxcount)468 inline int _lStr_ncpy(lChar32 * dst, const lChar32 * src, int maxcount)
469 {
470 int count = 0;
471 do
472 {
473 if (++count > maxcount)
474 {
475 *dst = 0;
476 return count;
477 }
478 } while ((*dst++ = *src++));
479 return count;
480 }
481
_lStr_ncpy(lChar16 * dst,const lChar16 * src,int maxcount)482 inline int _lStr_ncpy(lChar16 * dst, const lChar16 * src, int maxcount)
483 {
484 int count = 0;
485 do
486 {
487 if (++count > maxcount)
488 {
489 *dst = 0;
490 return count;
491 }
492 } while ((*dst++ = *src++));
493 return count;
494 }
495
_lStr_ncpy(lChar16 * dst,const lChar8 * src,int maxcount)496 inline int _lStr_ncpy(lChar16 * dst, const lChar8 * src, int maxcount)
497 {
498 int count = 0;
499 do
500 {
501 if (++count > maxcount)
502 {
503 *dst = 0;
504 return count;
505 }
506 } while ((*dst++ = (unsigned char)*src++));
507 return count;
508 }
509
_lStr_ncpy(lChar32 * dst,const lChar8 * src,int maxcount)510 inline int _lStr_ncpy(lChar32 * dst, const lChar8 * src, int maxcount)
511 {
512 int count = 0;
513 do
514 {
515 if (++count > maxcount)
516 {
517 *dst = 0;
518 return count;
519 }
520 } while ((*dst++ = (unsigned char)*src++));
521 return count;
522 }
523
_lStr_ncpy(lChar8 * dst,const lChar8 * src,int maxcount)524 inline int _lStr_ncpy(lChar8 * dst, const lChar8 * src, int maxcount)
525 {
526 int count = 0;
527 do
528 {
529 if (++count > maxcount)
530 {
531 *dst = 0;
532 return count;
533 }
534 } while ((*dst++ = *src++));
535 return count;
536 }
537
_lStr_memcpy(lChar16 * dst,const lChar16 * src,int count)538 inline void _lStr_memcpy(lChar16 * dst, const lChar16 * src, int count)
539 {
540 while ( count-- > 0)
541 (*dst++ = *src++);
542 }
543
_lStr_memcpy(lChar32 * dst,const lChar32 * src,int count)544 inline void _lStr_memcpy(lChar32 * dst, const lChar32 * src, int count)
545 {
546 while ( count-- > 0)
547 (*dst++ = *src++);
548 }
549
_lStr_memcpy(lChar8 * dst,const lChar8 * src,int count)550 inline void _lStr_memcpy(lChar8 * dst, const lChar8 * src, int count)
551 {
552 memcpy(dst, (const lChar8 *) src, count);
553 }
554
_lStr_memset(lChar16 * dst,lChar16 value,int count)555 inline void _lStr_memset(lChar16 * dst, lChar16 value, int count)
556 {
557 while ( count-- > 0)
558 *dst++ = value;
559 }
560
_lStr_memset(lChar32 * dst,lChar32 value,int count)561 inline void _lStr_memset(lChar32 * dst, lChar32 value, int count)
562 {
563 while ( count-- > 0)
564 *dst++ = value;
565 }
566
_lStr_memset(lChar8 * dst,lChar8 value,int count)567 inline void _lStr_memset(lChar8 * dst, lChar8 value, int count)
568 {
569 memset(dst, (lChar8) value, count);
570 }
571
lStr_len(const lChar16 * str)572 int lStr_len(const lChar16 * str)
573 {
574 return _lStr_len(str);
575 }
576
lStr_len(const lChar32 * str)577 int lStr_len(const lChar32 * str)
578 {
579 return _lStr_len(str);
580 }
581
lStr_len(const lChar8 * str)582 int lStr_len(const lChar8 * str)
583 {
584 return _lStr_len(str);
585 }
586
lStr_nlen(const lChar16 * str,int maxcount)587 int lStr_nlen(const lChar16 * str, int maxcount)
588 {
589 return _lStr_nlen(str, maxcount);
590 }
591
lStr_nlen(const lChar32 * str,int maxcount)592 int lStr_nlen(const lChar32 * str, int maxcount)
593 {
594 return _lStr_nlen(str, maxcount);
595 }
596
lStr_nlen(const lChar8 * str,int maxcount)597 int lStr_nlen(const lChar8 * str, int maxcount)
598 {
599 return _lStr_nlen(str, maxcount);
600 }
601
lStr_cpy(lChar16 * dst,const lChar16 * src)602 int lStr_cpy(lChar16 * dst, const lChar16 * src)
603 {
604 return _lStr_cpy(dst, src);
605 }
606
lStr_cpy(lChar32 * dst,const lChar32 * src)607 int lStr_cpy(lChar32 * dst, const lChar32 * src)
608 {
609 return _lStr_cpy(dst, src);
610 }
611
lStr_cpy(lChar8 * dst,const lChar8 * src)612 int lStr_cpy(lChar8 * dst, const lChar8 * src)
613 {
614 return _lStr_cpy(dst, src);
615 }
616
lStr_cpy(lChar16 * dst,const lChar8 * src)617 int lStr_cpy(lChar16 * dst, const lChar8 * src)
618 {
619 return _lStr_cpy(dst, src);
620 }
621
lStr_cpy(lChar32 * dst,const lChar8 * src)622 int lStr_cpy(lChar32 * dst, const lChar8 * src)
623 {
624 return _lStr_cpy(dst, src);
625 }
626
lStr_ncpy(lChar16 * dst,const lChar16 * src,int maxcount)627 int lStr_ncpy(lChar16 * dst, const lChar16 * src, int maxcount)
628 {
629 return _lStr_ncpy(dst, src, maxcount);
630 }
631
lStr_ncpy(lChar32 * dst,const lChar32 * src,int maxcount)632 int lStr_ncpy(lChar32 * dst, const lChar32 * src, int maxcount)
633 {
634 return _lStr_ncpy(dst, src, maxcount);
635 }
636
lStr_ncpy(lChar8 * dst,const lChar8 * src,int maxcount)637 int lStr_ncpy(lChar8 * dst, const lChar8 * src, int maxcount)
638 {
639 return _lStr_ncpy(dst, src, maxcount);
640 }
641
lStr_memcpy(lChar16 * dst,const lChar16 * src,int count)642 void lStr_memcpy(lChar16 * dst, const lChar16 * src, int count)
643 {
644 _lStr_memcpy(dst, src, count);
645 }
646
lStr_memcpy(lChar32 * dst,const lChar32 * src,int count)647 void lStr_memcpy(lChar32 * dst, const lChar32 * src, int count)
648 {
649 _lStr_memcpy(dst, src, count);
650 }
651
lStr_memcpy(lChar8 * dst,const lChar8 * src,int count)652 void lStr_memcpy(lChar8 * dst, const lChar8 * src, int count)
653 {
654 _lStr_memcpy(dst, src, count);
655 }
656
lStr_memset(lChar16 * dst,lChar16 value,int count)657 void lStr_memset(lChar16 * dst, lChar16 value, int count)
658 {
659 _lStr_memset(dst, value, count);
660 }
661
lStr_memset(lChar32 * dst,lChar32 value,int count)662 void lStr_memset(lChar32 * dst, lChar32 value, int count)
663 {
664 _lStr_memset(dst, value, count);
665 }
666
lStr_memset(lChar8 * dst,lChar8 value,int count)667 void lStr_memset(lChar8 * dst, lChar8 value, int count)
668 {
669 _lStr_memset(dst, value, count);
670 }
671
lStr_cmp(const lChar16 * dst,const lChar16 * src)672 int lStr_cmp(const lChar16 * dst, const lChar16 * src)
673 {
674 if (dst == src)
675 return 0;
676 if (!dst)
677 return -1;
678 else if (!src)
679 return 1;
680 while ( *dst == *src)
681 {
682 if (! *dst )
683 return 0;
684 ++dst;
685 ++src;
686 }
687 if ( *dst > *src )
688 return 1;
689 else
690 return -1;
691 }
692
lStr_cmp(const lChar32 * dst,const lChar32 * src)693 int lStr_cmp(const lChar32 * dst, const lChar32 * src)
694 {
695 if (dst == src)
696 return 0;
697 if (!dst)
698 return -1;
699 else if (!src)
700 return 1;
701 while ( *dst == *src)
702 {
703 if (! *dst )
704 return 0;
705 ++dst;
706 ++src;
707 }
708 if ( *dst > *src )
709 return 1;
710 else
711 return -1;
712 }
713
lStr_cmp(const lChar8 * dst,const lChar8 * src)714 int lStr_cmp(const lChar8 * dst, const lChar8 * src)
715 {
716 if (dst == src)
717 return 0;
718 if (!dst)
719 return -1;
720 else if (!src)
721 return 1;
722 while ( *dst == *src)
723 {
724 if (! *dst )
725 return 0;
726 ++dst;
727 ++src;
728 }
729 if ( *dst > *src )
730 return 1;
731 else
732 return -1;
733 }
734
lStr_cmp(const lChar16 * dst,const lChar8 * src)735 int lStr_cmp(const lChar16 * dst, const lChar8 * src)
736 {
737 if (!dst && !src)
738 return 0;
739 if (!dst)
740 return -1;
741 else if (!src)
742 return 1;
743 while ( *dst == (lChar16)*src)
744 {
745 if (! *dst )
746 return 0;
747 ++dst;
748 ++src;
749 }
750 if ( *dst > (lChar16)*src )
751 return 1;
752 else
753 return -1;
754 }
755
lStr_cmp(const lChar32 * dst,const lChar8 * src)756 int lStr_cmp(const lChar32 * dst, const lChar8 * src)
757 {
758 if (!dst && !src)
759 return 0;
760 if (!dst)
761 return -1;
762 else if (!src)
763 return 1;
764 while ( *dst == (lChar32)*src)
765 {
766 if (! *dst )
767 return 0;
768 ++dst;
769 ++src;
770 }
771 if ( *dst > (lChar32)*src )
772 return 1;
773 else
774 return -1;
775 }
776
lStr_cmp(const lChar8 * dst,const lChar16 * src)777 int lStr_cmp(const lChar8 * dst, const lChar16 * src)
778 {
779 if (!dst && !src)
780 return 0;
781 if (!dst)
782 return -1;
783 else if (!src)
784 return 1;
785 while ( (lChar16)*dst == *src)
786 {
787 if (! *dst )
788 return 0;
789 ++dst;
790 ++src;
791 }
792 if ( (lChar16)*dst > *src )
793 return 1;
794 else
795 return -1;
796 }
797
lStr_cmp(const lChar8 * dst,const lChar32 * src)798 int lStr_cmp(const lChar8 * dst, const lChar32 * src)
799 {
800 if (!dst && !src)
801 return 0;
802 if (!dst)
803 return -1;
804 else if (!src)
805 return 1;
806 while ( (lChar32)*dst == *src)
807 {
808 if (! *dst )
809 return 0;
810 ++dst;
811 ++src;
812 }
813 if ( (lChar32)*dst > *src )
814 return 1;
815 else
816 return -1;
817 }
818
lStr_cmp(const lChar32 * dst,const lChar16 * src)819 int lStr_cmp(const lChar32 * dst, const lChar16 * src) {
820 if (!dst && !src)
821 return 0;
822 if (!dst)
823 return -1;
824 else if (!src)
825 return 1;
826 while ( *dst == (lChar32)*src)
827 {
828 if (! *dst )
829 return 0;
830 ++dst;
831 ++src;
832 }
833 if ( *dst > (lChar32)*src )
834 return 1;
835 else
836 return -1;
837 }
838
lStr_cmp(const lChar16 * dst,const lChar32 * src)839 int lStr_cmp(const lChar16 * dst, const lChar32 * src)
840 {
841 if (!dst && !src)
842 return 0;
843 if (!dst)
844 return -1;
845 else if (!src)
846 return 1;
847 while ( (lChar32)*dst == *src)
848 {
849 if (! *dst )
850 return 0;
851 ++dst;
852 ++src;
853 }
854 if ( (lChar32)*dst > *src )
855 return 1;
856 else
857 return -1;
858 }
859
860 ////////////////////////////////////////////////////////////////////////////
861 // lString32
862 ////////////////////////////////////////////////////////////////////////////
863
free()864 void lString32::free()
865 {
866 if ( pchunk==EMPTY_STR_32 )
867 return;
868 //assert(pchunk->buf32[pchunk->len]==0);
869 ::free(pchunk->buf32);
870 #if (LDOM_USE_OWN_MEM_MAN == 1)
871 for (int i=slices_count-1; i>=0; --i)
872 {
873 if (slices[i]->free_chunk32(pchunk))
874 return;
875 }
876 crFatalError(); // wrong pointer!!!
877 #else
878 ::free(pchunk);
879 #endif
880 }
881
alloc(int sz)882 void lString32::alloc(int sz)
883 {
884 #if (LDOM_USE_OWN_MEM_MAN == 1)
885 pchunk = lstring_chunk_t::alloc();
886 #else
887 pchunk = (lstring_chunk_t*)::malloc(sizeof(lstring_chunk_t));
888 #endif
889 pchunk->buf32 = (lChar32*) ::malloc( sizeof(lChar32) * (sz+1) );
890 assert( pchunk->buf32!=NULL );
891 pchunk->size = sz;
892 pchunk->refCount = 1;
893 }
894
lString32(const lChar32 * str)895 lString32::lString32(const lChar32 * str)
896 {
897 if (!str || !(*str))
898 {
899 pchunk = EMPTY_STR_32;
900 addref();
901 return;
902 }
903 size_type len = _lStr_len(str);
904 alloc( len );
905 pchunk->len = len;
906 _lStr_cpy( pchunk->buf32, str );
907 }
908
lString32(const lChar8 * str)909 lString32::lString32(const lChar8 * str)
910 {
911 if (!str || !(*str))
912 {
913 pchunk = EMPTY_STR_32;
914 addref();
915 return;
916 }
917 pchunk = EMPTY_STR_32;
918 addref();
919 *this = Utf8ToUnicode( str );
920 }
921
922 /// constructor from utf8 character array fragment
lString32(const lChar8 * str,size_type count)923 lString32::lString32(const lChar8 * str, size_type count)
924 {
925 if (!str || !(*str))
926 {
927 pchunk = EMPTY_STR_32;
928 addref();
929 return;
930 }
931 pchunk = EMPTY_STR_32;
932 addref();
933 *this = Utf8ToUnicode( str, count );
934 }
935
936
lString32(const value_type * str,size_type count)937 lString32::lString32(const value_type * str, size_type count)
938 {
939 if ( !str || !(*str) || count<=0 )
940 {
941 pchunk = EMPTY_STR_32; addref();
942 }
943 else
944 {
945 size_type len = _lStr_nlen(str, count);
946 alloc(len);
947 _lStr_ncpy( pchunk->buf32, str, len );
948 pchunk->len = len;
949 }
950 }
951
lString32(const lString32 & str,size_type offset,size_type count)952 lString32::lString32(const lString32 & str, size_type offset, size_type count)
953 {
954 if ( count > str.length() - offset )
955 count = str.length() - offset;
956 if (count<=0)
957 {
958 pchunk = EMPTY_STR_32; addref();
959 }
960 else
961 {
962 alloc(count);
963 _lStr_memcpy( pchunk->buf32, str.pchunk->buf32+offset, count );
964 pchunk->buf32[count]=0;
965 pchunk->len = count;
966 }
967 }
968
assign(const lChar32 * str)969 lString32 & lString32::assign(const lChar32 * str)
970 {
971 if (!str || !(*str))
972 {
973 clear();
974 }
975 else
976 {
977 size_type len = _lStr_len(str);
978 if (refCount()==1)
979 {
980 if (pchunk->size<=len)
981 {
982 // resize is necessary
983 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
984 pchunk->size = len+1;
985 }
986 }
987 else
988 {
989 release();
990 alloc(len);
991 }
992 _lStr_cpy( pchunk->buf32, str );
993 pchunk->len = len;
994 }
995 return *this;
996 }
997
assign(const lChar8 * str)998 lString32 & lString32::assign(const lChar8 * str)
999 {
1000 if (!str || !(*str))
1001 {
1002 clear();
1003 }
1004 else
1005 {
1006 size_type len = _lStr_len(str);
1007 if (refCount()==1)
1008 {
1009 if (pchunk->size<=len)
1010 {
1011 // resize is necessary
1012 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
1013 pchunk->size = len+1;
1014 }
1015 }
1016 else
1017 {
1018 release();
1019 alloc(len);
1020 }
1021 _lStr_cpy( pchunk->buf32, str );
1022 pchunk->len = len;
1023 }
1024 return *this;
1025 }
1026
assign(const lChar32 * str,size_type count)1027 lString32 & lString32::assign(const lChar32 * str, size_type count)
1028 {
1029 if ( !str || !(*str) || count<=0 )
1030 {
1031 clear();
1032 }
1033 else
1034 {
1035 size_type len = _lStr_nlen(str, count);
1036 if (refCount()==1)
1037 {
1038 if (pchunk->size<=len)
1039 {
1040 // resize is necessary
1041 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
1042 pchunk->size = len+1;
1043 }
1044 }
1045 else
1046 {
1047 release();
1048 alloc(len);
1049 }
1050 _lStr_ncpy( pchunk->buf32, str, count );
1051 pchunk->len = len;
1052 }
1053 return *this;
1054 }
1055
assign(const lChar8 * str,size_type count)1056 lString32 & lString32::assign(const lChar8 * str, size_type count)
1057 {
1058 if ( !str || !(*str) || count<=0 )
1059 {
1060 clear();
1061 }
1062 else
1063 {
1064 size_type len = _lStr_nlen(str, count);
1065 if (refCount()==1)
1066 {
1067 if (pchunk->size<=len)
1068 {
1069 // resize is necessary
1070 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(len+1) );
1071 pchunk->size = len+1;
1072 }
1073 }
1074 else
1075 {
1076 release();
1077 alloc(len);
1078 }
1079 _lStr_ncpy( pchunk->buf32, str, count );
1080 pchunk->len = len;
1081 }
1082 return *this;
1083 }
1084
assign(const lString32 & str,size_type offset,size_type count)1085 lString32 & lString32::assign(const lString32 & str, size_type offset, size_type count)
1086 {
1087 if ( count > str.length() - offset )
1088 count = str.length() - offset;
1089 if (count<=0)
1090 {
1091 clear();
1092 }
1093 else
1094 {
1095 if (pchunk==str.pchunk)
1096 {
1097 if (&str != this)
1098 {
1099 release();
1100 alloc(count);
1101 }
1102 if (offset>0)
1103 {
1104 _lStr_memcpy( pchunk->buf32, str.pchunk->buf32+offset, count );
1105 }
1106 pchunk->buf32[count]=0;
1107 }
1108 else
1109 {
1110 if (refCount()==1)
1111 {
1112 if (pchunk->size<=count)
1113 {
1114 // resize is necessary
1115 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(count+1) );
1116 pchunk->size = count+1;
1117 }
1118 }
1119 else
1120 {
1121 release();
1122 alloc(count);
1123 }
1124 _lStr_memcpy( pchunk->buf32, str.pchunk->buf32+offset, count );
1125 pchunk->buf32[count]=0;
1126 }
1127 pchunk->len = count;
1128 }
1129 return *this;
1130 }
1131
erase(size_type offset,size_type count)1132 lString32 & lString32::erase(size_type offset, size_type count)
1133 {
1134 if ( count > length() - offset )
1135 count = length() - offset;
1136 if (count<=0)
1137 {
1138 clear();
1139 }
1140 else
1141 {
1142 size_type newlen = length()-count;
1143 if (refCount()==1)
1144 {
1145 _lStr_memcpy( pchunk->buf32+offset, pchunk->buf32+offset+count, newlen-offset+1 );
1146 }
1147 else
1148 {
1149 lstring_chunk_t * poldchunk = pchunk;
1150 release();
1151 alloc( newlen );
1152 _lStr_memcpy( pchunk->buf32, poldchunk->buf32, offset );
1153 _lStr_memcpy( pchunk->buf32+offset, poldchunk->buf32+offset+count, newlen-offset+1 );
1154 }
1155 pchunk->len = newlen;
1156 pchunk->buf32[newlen]=0;
1157 }
1158 return *this;
1159 }
1160
reserve(size_type n)1161 void lString32::reserve(size_type n)
1162 {
1163 if (refCount()==1)
1164 {
1165 if (pchunk->size < n)
1166 {
1167 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(n+1) );
1168 pchunk->size = n;
1169 }
1170 }
1171 else
1172 {
1173 lstring_chunk_t * poldchunk = pchunk;
1174 release();
1175 alloc( n );
1176 _lStr_memcpy( pchunk->buf32, poldchunk->buf32, poldchunk->len+1 );
1177 pchunk->len = poldchunk->len;
1178 }
1179 }
1180
lock(size_type newsize)1181 void lString32::lock( size_type newsize )
1182 {
1183 if (refCount()>1)
1184 {
1185 lstring_chunk_t * poldchunk = pchunk;
1186 release();
1187 alloc( newsize );
1188 size_type len = newsize;
1189 if (len>poldchunk->len)
1190 len = poldchunk->len;
1191 _lStr_memcpy( pchunk->buf32, poldchunk->buf32, len );
1192 pchunk->buf32[len]=0;
1193 pchunk->len = len;
1194 }
1195 }
1196
1197 // lock string, allocate buffer and reset length to 0
reset(size_type size)1198 void lString32::reset( size_type size )
1199 {
1200 if (refCount()>1 || pchunk->size<size)
1201 {
1202 release();
1203 alloc( size );
1204 }
1205 pchunk->buf32[0] = 0;
1206 pchunk->len = 0;
1207 }
1208
resize(size_type n,lChar32 e)1209 void lString32::resize(size_type n, lChar32 e)
1210 {
1211 lock( n );
1212 if (n>=pchunk->size)
1213 {
1214 pchunk->buf32 = (lChar32*) ::realloc( pchunk->buf32, sizeof(lChar32)*(n+1) );
1215 pchunk->size = n;
1216 }
1217 // fill with data if expanded
1218 for (size_type i=pchunk->len; i<n; i++)
1219 pchunk->buf32[i] = e;
1220 pchunk->buf32[pchunk->len] = 0;
1221 }
1222
append(const lChar32 * str)1223 lString32 & lString32::append(const lChar32 * str)
1224 {
1225 size_type len = _lStr_len(str);
1226 reserve( pchunk->len+len );
1227 _lStr_memcpy(pchunk->buf32+pchunk->len, str, len+1);
1228 pchunk->len += len;
1229 return *this;
1230 }
1231
append(const lChar32 * str,size_type count)1232 lString32 & lString32::append(const lChar32 * str, size_type count)
1233 {
1234 reserve(pchunk->len + count);
1235 _lStr_ncpy(pchunk->buf32 + pchunk->len, str, count);
1236 pchunk->len += count;
1237 return *this;
1238 }
1239
append(const lChar8 * str)1240 lString32 & lString32::append(const lChar8 * str)
1241 {
1242 size_type len = _lStr_len(str);
1243 reserve( pchunk->len+len );
1244 _lStr_ncpy(pchunk->buf32+pchunk->len, str, len+1);
1245 pchunk->len += len;
1246 return *this;
1247 }
1248
append(const lChar8 * str,size_type count)1249 lString32 & lString32::append(const lChar8 * str, size_type count)
1250 {
1251 reserve(pchunk->len + count);
1252 _lStr_ncpy(pchunk->buf32+pchunk->len, str, count);
1253 pchunk->len += count;
1254 return *this;
1255 }
1256
append(const lString32 & str)1257 lString32 & lString32::append(const lString32 & str)
1258 {
1259 size_type len2 = pchunk->len + str.pchunk->len;
1260 reserve( len2 );
1261 _lStr_memcpy( pchunk->buf32+pchunk->len, str.pchunk->buf32, str.pchunk->len+1 );
1262 pchunk->len = len2;
1263 return *this;
1264 }
1265
append(const lString32 & str,size_type offset,size_type count)1266 lString32 & lString32::append(const lString32 & str, size_type offset, size_type count)
1267 {
1268 if ( str.pchunk->len>offset )
1269 {
1270 if ( offset + count > str.pchunk->len )
1271 count = str.pchunk->len - offset;
1272 reserve( pchunk->len+count );
1273 _lStr_ncpy(pchunk->buf32 + pchunk->len, str.pchunk->buf32 + offset, count);
1274 pchunk->len += count;
1275 pchunk->buf32[pchunk->len] = 0;
1276 }
1277 return *this;
1278 }
1279
append(size_type count,lChar32 ch)1280 lString32 & lString32::append(size_type count, lChar32 ch)
1281 {
1282 reserve( pchunk->len+count );
1283 _lStr_memset(pchunk->buf32+pchunk->len, ch, count);
1284 pchunk->len += count;
1285 pchunk->buf32[pchunk->len] = 0;
1286 return *this;
1287 }
1288
insert(size_type p0,size_type count,lChar32 ch)1289 lString32 & lString32::insert(size_type p0, size_type count, lChar32 ch)
1290 {
1291 if (p0>pchunk->len)
1292 p0 = pchunk->len;
1293 reserve( pchunk->len+count );
1294 for (size_type i=pchunk->len+count; i>p0; i--)
1295 pchunk->buf32[i] = pchunk->buf32[i-1];
1296 _lStr_memset(pchunk->buf32+p0, ch, count);
1297 pchunk->len += count;
1298 pchunk->buf32[pchunk->len] = 0;
1299 return *this;
1300 }
1301
insert(size_type p0,const lString32 & str)1302 lString32 & lString32::insert(size_type p0, const lString32 & str)
1303 {
1304 if (p0>pchunk->len)
1305 p0 = pchunk->len;
1306 int count = str.length();
1307 reserve( pchunk->len+count );
1308 for (size_type i=pchunk->len+count; i>p0; i--)
1309 pchunk->buf32[i] = pchunk->buf32[i-1];
1310 _lStr_memcpy(pchunk->buf32 + p0, str.c_str(), count);
1311 pchunk->len += count;
1312 pchunk->buf32[pchunk->len] = 0;
1313 return *this;
1314 }
1315
substr(size_type pos,size_type n) const1316 lString32 lString32::substr(size_type pos, size_type n) const
1317 {
1318 if (pos>=length())
1319 return lString32::empty_str;
1320 if (pos+n>length())
1321 n = length() - pos;
1322 return lString32( pchunk->buf32+pos, n );
1323 }
1324
pack()1325 lString32 & lString32::pack()
1326 {
1327 if (pchunk->len + 4 < pchunk->size )
1328 {
1329 if (refCount()>1)
1330 {
1331 lock(pchunk->len);
1332 }
1333 else
1334 {
1335 pchunk->buf32 = cr_realloc( pchunk->buf32, pchunk->len+1 );
1336 pchunk->size = pchunk->len;
1337 }
1338 }
1339 return *this;
1340 }
1341
isAlNum(lChar32 ch)1342 bool isAlNum(lChar32 ch) {
1343 lUInt16 props = lGetCharProps(ch);
1344 return (props & (CH_PROP_ALPHA | CH_PROP_DIGIT)) != 0;
1345 }
1346
1347 /// trims non alpha at beginning and end of string
trimNonAlpha()1348 lString32 & lString32::trimNonAlpha()
1349 {
1350 int firstns;
1351 for (firstns = 0; firstns<pchunk->len &&
1352 !isAlNum(pchunk->buf32[firstns]); ++firstns)
1353 ;
1354 if (firstns >= pchunk->len)
1355 {
1356 clear();
1357 return *this;
1358 }
1359 int lastns;
1360 for (lastns = pchunk->len-1; lastns>0 &&
1361 !isAlNum(pchunk->buf32[lastns]); --lastns)
1362 ;
1363 int newlen = lastns-firstns+1;
1364 if (newlen == pchunk->len)
1365 return *this;
1366 if (refCount()==1)
1367 {
1368 if (firstns>0)
1369 lStr_memcpy( pchunk->buf32, pchunk->buf32+firstns, newlen );
1370 pchunk->buf32[newlen] = 0;
1371 pchunk->len = newlen;
1372 }
1373 else
1374 {
1375 lstring_chunk_t * poldchunk = pchunk;
1376 release();
1377 alloc( newlen );
1378 _lStr_memcpy( pchunk->buf32, poldchunk->buf32+firstns, newlen );
1379 pchunk->buf32[newlen] = 0;
1380 pchunk->len = newlen;
1381 }
1382 return *this;
1383 }
1384
trim()1385 lString32 & lString32::trim()
1386 {
1387 //
1388 int firstns;
1389 for (firstns = 0; firstns<pchunk->len &&
1390 (pchunk->buf32[firstns]==' ' || pchunk->buf32[firstns]=='\t'); ++firstns)
1391 ;
1392 if (firstns >= pchunk->len)
1393 {
1394 clear();
1395 return *this;
1396 }
1397 int lastns;
1398 for (lastns = pchunk->len-1; lastns>0 &&
1399 (pchunk->buf32[lastns]==' ' || pchunk->buf32[lastns]=='\t'); --lastns)
1400 ;
1401 int newlen = lastns-firstns+1;
1402 if (newlen == pchunk->len)
1403 return *this;
1404 if (refCount()==1)
1405 {
1406 if (firstns>0)
1407 lStr_memcpy( pchunk->buf32, pchunk->buf32+firstns, newlen );
1408 pchunk->buf32[newlen] = 0;
1409 pchunk->len = newlen;
1410 }
1411 else
1412 {
1413 lstring_chunk_t * poldchunk = pchunk;
1414 release();
1415 alloc( newlen );
1416 _lStr_memcpy( pchunk->buf32, poldchunk->buf32+firstns, newlen );
1417 pchunk->buf32[newlen] = 0;
1418 pchunk->len = newlen;
1419 }
1420 return *this;
1421 }
1422
atoi() const1423 int lString32::atoi() const
1424 {
1425 int n = 0;
1426 atoi(n);
1427 return n;
1428 }
1429
1430 static const char * hex_digits = "0123456789abcdef";
1431 // converts 0..15 to 0..f
toHexDigit(int c)1432 char toHexDigit( int c )
1433 {
1434 return hex_digits[c&0xf];
1435 }
1436
1437 // returns 0..15 if c is hex digit, -1 otherwise
hexDigit(int c)1438 int hexDigit( int c )
1439 {
1440 if ( c>='0' && c<='9')
1441 return c-'0';
1442 if ( c>='a' && c<='f')
1443 return c-'a'+10;
1444 if ( c>='A' && c<='F')
1445 return c-'A'+10;
1446 return -1;
1447 }
1448
1449 // decode LEN hex digits, return decoded number, -1 if invalid
decodeHex(const lChar32 * str,int len)1450 int decodeHex( const lChar32 * str, int len ) {
1451 int n = 0;
1452 for ( int i=0; i<len; i++ ) {
1453 if ( !str[i] )
1454 return -1;
1455 int d = hexDigit(str[i]);
1456 if ( d==-1 )
1457 return -1;
1458 n = (n<<4) | d;
1459 }
1460 return n;
1461 }
1462
1463 // decode LEN decimal digits, return decoded number, -1 if invalid
decodeDecimal(const lChar32 * str,int len)1464 int decodeDecimal( const lChar32 * str, int len ) {
1465 int n = 0;
1466 for ( int i=0; i<len; i++ ) {
1467 if ( !str[i] )
1468 return -1;
1469 int d = str[i] - '0';
1470 if ( d<0 || d>9 )
1471 return -1;
1472 n = n*10 + d;
1473 }
1474 return n;
1475 }
1476
atoi(int & n) const1477 bool lString32::atoi( int &n ) const
1478 {
1479 n = 0;
1480 int sgn = 1;
1481 const lChar32 * s = c_str();
1482 while (*s == ' ' || *s == '\t')
1483 s++;
1484 if ( s[0]=='0' && s[1]=='x') {
1485 s+=2;
1486 for (;*s;) {
1487 int d = hexDigit(*s++);
1488 if ( d>=0 )
1489 n = (n<<4) | d;
1490 }
1491 return true;
1492 }
1493 if (*s == '-')
1494 {
1495 sgn = -1;
1496 s++;
1497 }
1498 else if (*s == '+')
1499 {
1500 s++;
1501 }
1502 if ( !(*s>='0' && *s<='9') )
1503 return false;
1504 while (*s>='0' && *s<='9')
1505 {
1506 n = n * 10 + ( (*s++)-'0' );
1507 }
1508 if ( sgn<0 )
1509 n = -n;
1510 return *s=='\0' || *s==' ' || *s=='\t';
1511 }
1512
atoi(lInt64 & n) const1513 bool lString32::atoi( lInt64 &n ) const
1514 {
1515 int sgn = 1;
1516 const lChar32 * s = c_str();
1517 while (*s == ' ' || *s == '\t')
1518 s++;
1519 if (*s == '-')
1520 {
1521 sgn = -1;
1522 s++;
1523 }
1524 else if (*s == '+')
1525 {
1526 s++;
1527 }
1528 if ( !(*s>='0' && *s<='9') )
1529 return false;
1530 while (*s>='0' && *s<='9')
1531 {
1532 n = n * 10 + ( (*s++)-'0' );
1533 }
1534 if ( sgn<0 )
1535 n = -n;
1536 return *s=='\0' || *s==' ' || *s=='\t';
1537 }
1538
1539 #define STRING_HASH_MULT 31
getHash() const1540 lUInt32 lString32::getHash() const
1541 {
1542 lUInt32 res = 0;
1543 for (lInt32 i=0; i<pchunk->len; i++)
1544 res = res * STRING_HASH_MULT + pchunk->buf32[i];
1545 return res;
1546 }
1547
calcStringHash(const lChar32 * s)1548 lUInt32 calcStringHash( const lChar32 * s )
1549 {
1550 lUInt32 a = 2166136261u;
1551 while (*s)
1552 {
1553 a = a * 16777619 ^ (*s++);
1554 }
1555 return a;
1556 }
1557
1558 /// calculates CRC32 for buffer contents
lStr_crc32(lUInt32 prevValue,const void * buf,int size)1559 lUInt32 lStr_crc32( lUInt32 prevValue, const void * buf, int size )
1560 {
1561 #if (USE_ZLIB==1)
1562 return crc32( prevValue, (const lUInt8 *)buf, size );
1563 #else
1564 // TODO:
1565 return 0;
1566 #endif
1567 }
1568
1569
1570 const lString32 lString32::empty_str;
1571
1572
1573 ////////////////////////////////////////////////////////////////////////////
1574 // lString16
1575 ////////////////////////////////////////////////////////////////////////////
1576
free()1577 void lString16::free()
1578 {
1579 if ( pchunk==EMPTY_STR_16 )
1580 return;
1581 //assert(pchunk->buf16[pchunk->len]==0);
1582 ::free(pchunk->buf16);
1583 #if (LDOM_USE_OWN_MEM_MAN == 1)
1584 for (int i=slices_count-1; i>=0; --i)
1585 {
1586 if (slices[i]->free_chunk16(pchunk))
1587 return;
1588 }
1589 crFatalError(); // wrong pointer!!!
1590 #else
1591 ::free(pchunk);
1592 #endif
1593 }
1594
alloc(int sz)1595 void lString16::alloc(int sz)
1596 {
1597 #if (LDOM_USE_OWN_MEM_MAN == 1)
1598 pchunk = lstring_chunk_t::alloc();
1599 #else
1600 pchunk = (lstring_chunk_t*)::malloc(sizeof(lstring_chunk_t));
1601 #endif
1602 pchunk->buf16 = (lChar16*) ::malloc( sizeof(lChar16) * (sz+1) );
1603 assert( pchunk->buf16!=NULL );
1604 pchunk->size = sz;
1605 pchunk->refCount = 1;
1606 }
1607
lString16(const value_type * str)1608 lString16::lString16(const value_type * str)
1609 {
1610 if (!str || !(*str))
1611 {
1612 pchunk = EMPTY_STR_16;
1613 addref();
1614 return;
1615 }
1616 size_type len = _lStr_len(str);
1617 alloc( len );
1618 pchunk->len = len;
1619 _lStr_cpy( pchunk->buf16, str );
1620 }
1621
lString16(const lChar8 * str)1622 lString16::lString16(const lChar8 * str)
1623 {
1624 if (!str || !(*str))
1625 {
1626 pchunk = EMPTY_STR_16;
1627 addref();
1628 return;
1629 }
1630 pchunk = EMPTY_STR_16;
1631 addref();
1632 *this = UnicodeToUtf16( Utf8ToUnicode( str ) );
1633 }
1634
1635 /// constructor from utf8 character array fragment
lString16(const lChar8 * str,size_type count)1636 lString16::lString16(const lChar8 * str, size_type count)
1637 {
1638 if (!str || !(*str))
1639 {
1640 pchunk = EMPTY_STR_16;
1641 addref();
1642 return;
1643 }
1644 pchunk = EMPTY_STR_16;
1645 addref();
1646 *this = UnicodeToUtf16( Utf8ToUnicode( str, count ) );
1647 }
1648
1649
lString16(const value_type * str,size_type count)1650 lString16::lString16(const value_type * str, size_type count)
1651 {
1652 if ( !str || !(*str) || count<=0 )
1653 {
1654 pchunk = EMPTY_STR_16;
1655 addref();
1656 }
1657 else
1658 {
1659 size_type len = _lStr_nlen(str, count);
1660 alloc(len);
1661 _lStr_ncpy( pchunk->buf16, str, len );
1662 pchunk->len = len;
1663 }
1664 }
1665
lString16(const lString16 & str,size_type offset,size_type count)1666 lString16::lString16(const lString16 & str, size_type offset, size_type count)
1667 {
1668 if ( count > str.length() - offset )
1669 count = str.length() - offset;
1670 if (count<=0)
1671 {
1672 pchunk = EMPTY_STR_16;
1673 addref();
1674 }
1675 else
1676 {
1677 alloc(count);
1678 _lStr_memcpy( pchunk->buf16, str.pchunk->buf16+offset, count );
1679 pchunk->buf16[count]=0;
1680 pchunk->len = count;
1681 }
1682 }
1683
assign(const value_type * str)1684 lString16 & lString16::assign(const value_type * str)
1685 {
1686 if (!str || !(*str))
1687 {
1688 clear();
1689 }
1690 else
1691 {
1692 size_type len = _lStr_len(str);
1693 if (refCount()==1)
1694 {
1695 if (pchunk->size<=len)
1696 {
1697 // resize is necessary
1698 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1699 pchunk->size = len+1;
1700 }
1701 }
1702 else
1703 {
1704 release();
1705 alloc(len);
1706 }
1707 _lStr_cpy( pchunk->buf16, str );
1708 pchunk->len = len;
1709 }
1710 return *this;
1711 }
1712
assign(const lChar8 * str)1713 lString16 & lString16::assign(const lChar8 * str)
1714 {
1715 if (!str || !(*str))
1716 {
1717 clear();
1718 }
1719 else
1720 {
1721 size_type len = _lStr_len(str);
1722 if (refCount()==1)
1723 {
1724 if (pchunk->size<=len)
1725 {
1726 // resize is necessary
1727 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1728 pchunk->size = len+1;
1729 }
1730 }
1731 else
1732 {
1733 release();
1734 alloc(len);
1735 }
1736 _lStr_cpy( pchunk->buf16, str );
1737 pchunk->len = len;
1738 }
1739 return *this;
1740 }
1741
assign(const value_type * str,size_type count)1742 lString16 & lString16::assign(const value_type * str, size_type count)
1743 {
1744 if ( !str || !(*str) || count<=0 )
1745 {
1746 clear();
1747 }
1748 else
1749 {
1750 size_type len = _lStr_nlen(str, count);
1751 if (refCount()==1)
1752 {
1753 if (pchunk->size<=len)
1754 {
1755 // resize is necessary
1756 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1757 pchunk->size = len+1;
1758 }
1759 }
1760 else
1761 {
1762 release();
1763 alloc(len);
1764 }
1765 _lStr_ncpy( pchunk->buf16, str, count );
1766 pchunk->len = len;
1767 }
1768 return *this;
1769 }
1770
assign(const lChar8 * str,size_type count)1771 lString16 & lString16::assign(const lChar8 * str, size_type count)
1772 {
1773 if ( !str || !(*str) || count<=0 )
1774 {
1775 clear();
1776 }
1777 else
1778 {
1779 size_type len = _lStr_nlen(str, count);
1780 if (refCount()==1)
1781 {
1782 if (pchunk->size<=len)
1783 {
1784 // resize is necessary
1785 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(len+1) );
1786 pchunk->size = len+1;
1787 }
1788 }
1789 else
1790 {
1791 release();
1792 alloc(len);
1793 }
1794 _lStr_ncpy( pchunk->buf16, str, count );
1795 pchunk->len = len;
1796 }
1797 return *this;
1798 }
1799
assign(const lString16 & str,size_type offset,size_type count)1800 lString16 & lString16::assign(const lString16 & str, size_type offset, size_type count)
1801 {
1802 if ( count > str.length() - offset )
1803 count = str.length() - offset;
1804 if (count<=0)
1805 {
1806 clear();
1807 }
1808 else
1809 {
1810 if (pchunk==str.pchunk)
1811 {
1812 if (&str != this)
1813 {
1814 release();
1815 alloc(count);
1816 }
1817 if (offset>0)
1818 {
1819 _lStr_memcpy( pchunk->buf16, str.pchunk->buf16+offset, count );
1820 }
1821 pchunk->buf16[count]=0;
1822 }
1823 else
1824 {
1825 if (refCount()==1)
1826 {
1827 if (pchunk->size<=count)
1828 {
1829 // resize is necessary
1830 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(count+1) );
1831 pchunk->size = count+1;
1832 }
1833 }
1834 else
1835 {
1836 release();
1837 alloc(count);
1838 }
1839 _lStr_memcpy( pchunk->buf16, str.pchunk->buf16+offset, count );
1840 pchunk->buf16[count]=0;
1841 }
1842 pchunk->len = count;
1843 }
1844 return *this;
1845 }
1846
erase(size_type offset,size_type count)1847 lString16 & lString16::erase(size_type offset, size_type count)
1848 {
1849 if ( count > length() - offset )
1850 count = length() - offset;
1851 if (count<=0)
1852 {
1853 clear();
1854 }
1855 else
1856 {
1857 size_type newlen = length()-count;
1858 if (refCount()==1)
1859 {
1860 _lStr_memcpy( pchunk->buf16+offset, pchunk->buf16+offset+count, newlen-offset+1 );
1861 }
1862 else
1863 {
1864 lstring_chunk_t * poldchunk = pchunk;
1865 release();
1866 alloc( newlen );
1867 _lStr_memcpy( pchunk->buf16, poldchunk->buf16, offset );
1868 _lStr_memcpy( pchunk->buf16+offset, poldchunk->buf16+offset+count, newlen-offset+1 );
1869 }
1870 pchunk->len = newlen;
1871 pchunk->buf16[newlen]=0;
1872 }
1873 return *this;
1874 }
1875
reserve(size_type n)1876 void lString16::reserve(size_type n)
1877 {
1878 if (refCount()==1)
1879 {
1880 if (pchunk->size < n)
1881 {
1882 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(n+1) );
1883 pchunk->size = n;
1884 }
1885 }
1886 else
1887 {
1888 lstring_chunk_t * poldchunk = pchunk;
1889 release();
1890 alloc( n );
1891 _lStr_memcpy( pchunk->buf16, poldchunk->buf16, poldchunk->len+1 );
1892 pchunk->len = poldchunk->len;
1893 }
1894 }
1895
lock(size_type newsize)1896 void lString16::lock( size_type newsize )
1897 {
1898 if (refCount()>1)
1899 {
1900 lstring_chunk_t * poldchunk = pchunk;
1901 release();
1902 alloc( newsize );
1903 size_type len = newsize;
1904 if (len>poldchunk->len)
1905 len = poldchunk->len;
1906 _lStr_memcpy( pchunk->buf16, poldchunk->buf16, len );
1907 pchunk->buf16[len]=0;
1908 pchunk->len = len;
1909 }
1910 }
1911
1912 // lock string, allocate buffer and reset length to 0
reset(size_type size)1913 void lString16::reset( size_type size )
1914 {
1915 if (refCount()>1 || pchunk->size<size)
1916 {
1917 release();
1918 alloc( size );
1919 }
1920 pchunk->buf16[0] = 0;
1921 pchunk->len = 0;
1922 }
1923
resize(size_type n,value_type e)1924 void lString16::resize(size_type n, value_type e)
1925 {
1926 lock( n );
1927 if (n>=pchunk->size)
1928 {
1929 pchunk->buf16 = (lChar16*) ::realloc( pchunk->buf16, sizeof(lChar16)*(n+1) );
1930 pchunk->size = n;
1931 }
1932 // fill with data if expanded
1933 for (size_type i=pchunk->len; i<n; i++)
1934 pchunk->buf16[i] = e;
1935 pchunk->buf16[pchunk->len] = 0;
1936 }
1937
append(const value_type * str)1938 lString16 & lString16::append(const value_type * str)
1939 {
1940 size_type len = _lStr_len(str);
1941 reserve( pchunk->len+len );
1942 _lStr_memcpy(pchunk->buf16 + pchunk->len, str, len+1);
1943 pchunk->len += len;
1944 return *this;
1945 }
1946
append(const value_type * str,size_type count)1947 lString16 & lString16::append(const value_type * str, size_type count)
1948 {
1949 reserve(pchunk->len + count);
1950 _lStr_ncpy(pchunk->buf16 + pchunk->len, str, count);
1951 pchunk->len += count;
1952 return *this;
1953 }
1954
append(const lChar8 * str)1955 lString16 & lString16::append(const lChar8 * str)
1956 {
1957 size_type len = _lStr_len(str);
1958 reserve( pchunk->len+len );
1959 _lStr_ncpy(pchunk->buf16 + pchunk->len, str, len + 1);
1960 pchunk->len += len;
1961 return *this;
1962 }
1963
append(const lChar8 * str,size_type count)1964 lString16 & lString16::append(const lChar8 * str, size_type count)
1965 {
1966 reserve(pchunk->len + count);
1967 _lStr_ncpy(pchunk->buf16 + pchunk->len, str, count);
1968 pchunk->len += count;
1969 return *this;
1970 }
1971
append(const lString16 & str)1972 lString16 & lString16::append(const lString16 & str)
1973 {
1974 size_type len2 = pchunk->len + str.pchunk->len;
1975 reserve( len2 );
1976 _lStr_memcpy( pchunk->buf16+pchunk->len, str.pchunk->buf16, str.pchunk->len+1 );
1977 pchunk->len = len2;
1978 return *this;
1979 }
1980
append(const lString16 & str,size_type offset,size_type count)1981 lString16 & lString16::append(const lString16 & str, size_type offset, size_type count)
1982 {
1983 if ( str.pchunk->len>offset )
1984 {
1985 if ( offset + count > str.pchunk->len )
1986 count = str.pchunk->len - offset;
1987 reserve( pchunk->len+count );
1988 _lStr_ncpy(pchunk->buf16 + pchunk->len, str.pchunk->buf16 + offset, count);
1989 pchunk->len += count;
1990 pchunk->buf16[pchunk->len] = 0;
1991 }
1992 return *this;
1993 }
1994
append(size_type count,value_type ch)1995 lString16 & lString16::append(size_type count, value_type ch)
1996 {
1997 reserve( pchunk->len+count );
1998 _lStr_memset(pchunk->buf16+pchunk->len, ch, count);
1999 pchunk->len += count;
2000 pchunk->buf16[pchunk->len] = 0;
2001 return *this;
2002 }
2003
insert(size_type p0,const value_type * str)2004 lString16 & lString16::insert(size_type p0, const value_type * str)
2005 {
2006 if (p0>pchunk->len)
2007 p0 = pchunk->len;
2008 int count = lStr_len(str);
2009 reserve( pchunk->len+count );
2010 for (size_type i=pchunk->len+count; i>p0; i--)
2011 pchunk->buf16[i] = pchunk->buf16[i-1];
2012 _lStr_memcpy(pchunk->buf16 + p0, str, count);
2013 pchunk->len += count;
2014 pchunk->buf16[pchunk->len] = 0;
2015 return *this;
2016 }
2017
insert(size_type p0,const value_type * str,size_type count)2018 lString16 & lString16::insert(size_type p0, const value_type * str, size_type count)
2019 {
2020 if (p0>pchunk->len)
2021 p0 = pchunk->len;
2022 reserve( pchunk->len+count );
2023 for (size_type i=pchunk->len+count; i>p0; i--)
2024 pchunk->buf16[i] = pchunk->buf16[i-1];
2025 _lStr_memcpy(pchunk->buf16 + p0, str, count);
2026 pchunk->len += count;
2027 pchunk->buf16[pchunk->len] = 0;
2028 return *this;
2029 }
2030
insert(size_type p0,size_type count,value_type ch)2031 lString16 & lString16::insert(size_type p0, size_type count, value_type ch)
2032 {
2033 if (p0>pchunk->len)
2034 p0 = pchunk->len;
2035 reserve( pchunk->len+count );
2036 for (size_type i=pchunk->len+count; i>p0; i--)
2037 pchunk->buf16[i] = pchunk->buf16[i-1];
2038 _lStr_memset(pchunk->buf16+p0, ch, count);
2039 pchunk->len += count;
2040 pchunk->buf16[pchunk->len] = 0;
2041 return *this;
2042 }
2043
insert(size_type p0,const lString16 & str)2044 lString16 & lString16::insert(size_type p0, const lString16 & str)
2045 {
2046 if (p0>pchunk->len)
2047 p0 = pchunk->len;
2048 int count = str.length();
2049 reserve( pchunk->len+count );
2050 for (size_type i=pchunk->len+count; i>p0; i--)
2051 pchunk->buf16[i] = pchunk->buf16[i-1];
2052 _lStr_memcpy(pchunk->buf16 + p0, str.c_str(), count);
2053 pchunk->len += count;
2054 pchunk->buf16[pchunk->len] = 0;
2055 return *this;
2056 }
2057
substr(size_type pos,size_type n) const2058 lString16 lString16::substr(size_type pos, size_type n) const
2059 {
2060 if (pos>=length())
2061 return lString16::empty_str;
2062 if (pos+n>length())
2063 n = length() - pos;
2064 return lString16( pchunk->buf16 + pos, n );
2065 }
2066
pack()2067 lString16 & lString16::pack()
2068 {
2069 if (pchunk->len + 4 < pchunk->size )
2070 {
2071 if (refCount()>1)
2072 {
2073 lock(pchunk->len);
2074 }
2075 else
2076 {
2077 pchunk->buf16 = cr_realloc( pchunk->buf16, pchunk->len + 1 );
2078 pchunk->size = pchunk->len;
2079 }
2080 }
2081 return *this;
2082 }
2083
2084 /// trims non alpha at beginning and end of string
trimNonAlpha()2085 lString16 & lString16::trimNonAlpha()
2086 {
2087 int firstns;
2088 for (firstns = 0; firstns<pchunk->len &&
2089 !isAlNum(pchunk->buf16[firstns]); ++firstns)
2090 ;
2091 if (firstns >= pchunk->len)
2092 {
2093 clear();
2094 return *this;
2095 }
2096 int lastns;
2097 for (lastns = pchunk->len-1; lastns>0 &&
2098 !isAlNum(pchunk->buf16[lastns]); --lastns)
2099 ;
2100 int newlen = lastns-firstns+1;
2101 if (newlen == pchunk->len)
2102 return *this;
2103 if (refCount()==1)
2104 {
2105 if (firstns>0)
2106 lStr_memcpy( pchunk->buf16, pchunk->buf16 + firstns, newlen );
2107 pchunk->buf16[newlen] = 0;
2108 pchunk->len = newlen;
2109 }
2110 else
2111 {
2112 lstring_chunk_t * poldchunk = pchunk;
2113 release();
2114 alloc( newlen );
2115 _lStr_memcpy( pchunk->buf16, poldchunk->buf16+firstns, newlen );
2116 pchunk->buf16[newlen] = 0;
2117 pchunk->len = newlen;
2118 }
2119 return *this;
2120 }
2121
trim()2122 lString16 & lString16::trim()
2123 {
2124 //
2125 int firstns;
2126 for (firstns = 0; firstns<pchunk->len &&
2127 (pchunk->buf16[firstns]==' ' || pchunk->buf16[firstns]=='\t'); ++firstns)
2128 ;
2129 if (firstns >= pchunk->len)
2130 {
2131 clear();
2132 return *this;
2133 }
2134 int lastns;
2135 for (lastns = pchunk->len-1; lastns>0 &&
2136 (pchunk->buf16[lastns]==' ' || pchunk->buf16[lastns]=='\t'); --lastns)
2137 ;
2138 int newlen = lastns-firstns+1;
2139 if (newlen == pchunk->len)
2140 return *this;
2141 if (refCount()==1)
2142 {
2143 if (firstns>0)
2144 lStr_memcpy( pchunk->buf16, pchunk->buf16+firstns, newlen );
2145 pchunk->buf16[newlen] = 0;
2146 pchunk->len = newlen;
2147 }
2148 else
2149 {
2150 lstring_chunk_t * poldchunk = pchunk;
2151 release();
2152 alloc( newlen );
2153 _lStr_memcpy( pchunk->buf16, poldchunk->buf16+firstns, newlen );
2154 pchunk->buf16[newlen] = 0;
2155 pchunk->len = newlen;
2156 }
2157 return *this;
2158 }
2159
atoi() const2160 int lString16::atoi() const
2161 {
2162 int n = 0;
2163 atoi(n);
2164 return n;
2165 }
2166
atoi(int & n) const2167 bool lString16::atoi( int &n ) const
2168 {
2169 n = 0;
2170 int sgn = 1;
2171 const lChar16 * s = c_str();
2172 while (*s == ' ' || *s == '\t')
2173 s++;
2174 if ( s[0]=='0' && s[1]=='x') {
2175 s+=2;
2176 for (;*s;) {
2177 int d = hexDigit(*s++);
2178 if ( d>=0 )
2179 n = (n<<4) | d;
2180 }
2181 return true;
2182 }
2183 if (*s == '-')
2184 {
2185 sgn = -1;
2186 s++;
2187 }
2188 else if (*s == '+')
2189 {
2190 s++;
2191 }
2192 if ( !(*s>='0' && *s<='9') )
2193 return false;
2194 while (*s>='0' && *s<='9')
2195 {
2196 n = n * 10 + ( (*s++)-'0' );
2197 }
2198 if ( sgn<0 )
2199 n = -n;
2200 return *s=='\0' || *s==' ' || *s=='\t';
2201 }
2202
atoi(lInt64 & n) const2203 bool lString16::atoi( lInt64 &n ) const
2204 {
2205 int sgn = 1;
2206 const lChar16 * s = c_str();
2207 while (*s == ' ' || *s == '\t')
2208 s++;
2209 if (*s == '-')
2210 {
2211 sgn = -1;
2212 s++;
2213 }
2214 else if (*s == '+')
2215 {
2216 s++;
2217 }
2218 if ( !(*s>='0' && *s<='9') )
2219 return false;
2220 while (*s>='0' && *s<='9')
2221 {
2222 n = n * 10 + ( (*s++)-'0' );
2223 }
2224 if ( sgn<0 )
2225 n = -n;
2226 return *s=='\0' || *s==' ' || *s=='\t';
2227 }
2228
getHash() const2229 lUInt32 lString16::getHash() const
2230 {
2231 lUInt32 res = 0;
2232 for (lInt32 i=0; i<pchunk->len; i++)
2233 res = res * STRING_HASH_MULT + pchunk->buf16[i];
2234 return res;
2235 }
2236
calcStringHash(const lChar16 * s)2237 lUInt32 calcStringHash( const lChar16 * s )
2238 {
2239 lUInt32 a = 2166136261u;
2240 while (*s)
2241 {
2242 a = a * 16777619 ^ (*s++);
2243 }
2244 return a;
2245 }
2246
2247
2248 const lString16 lString16::empty_str;
2249
2250
2251 ////////////////////////////////////////////////////////////////////////////
2252 // lString8
2253 ////////////////////////////////////////////////////////////////////////////
2254
free()2255 void lString8::free()
2256 {
2257 if ( pchunk==EMPTY_STR_8 )
2258 return;
2259 ::free(pchunk->buf8);
2260 #if (LDOM_USE_OWN_MEM_MAN == 1)
2261 for (int i=slices_count-1; i>=0; --i)
2262 {
2263 if (slices[i]->free_chunk(pchunk))
2264 return;
2265 }
2266 crFatalError(); // wrong pointer!!!
2267 #else
2268 ::free(pchunk);
2269 #endif
2270 }
2271
alloc(int sz)2272 void lString8::alloc(int sz)
2273 {
2274 #if (LDOM_USE_OWN_MEM_MAN == 1)
2275 pchunk = lstring_chunk_t::alloc();
2276 #else
2277 pchunk = (lstring_chunk_t*)::malloc(sizeof(lstring_chunk_t));
2278 #endif
2279 pchunk->buf8 = (lChar8*) ::malloc( sizeof(lChar8) * (sz+1) );
2280 assert( pchunk->buf8!=NULL );
2281 pchunk->size = sz;
2282 pchunk->refCount = 1;
2283 }
2284
lString8(const lChar8 * str)2285 lString8::lString8(const lChar8 * str)
2286 {
2287 if (!str || !(*str))
2288 {
2289 pchunk = EMPTY_STR_8;
2290 addref();
2291 return;
2292 }
2293 size_type len = _lStr_len(str);
2294 alloc( len );
2295 pchunk->len = len;
2296 _lStr_cpy( pchunk->buf8, str );
2297 }
2298
lString8(const lChar32 * str)2299 lString8::lString8(const lChar32 * str)
2300 {
2301 if (!str || !(*str))
2302 {
2303 pchunk = EMPTY_STR_8;
2304 addref();
2305 return;
2306 }
2307 size_type len = _lStr_len(str);
2308 alloc( len );
2309 pchunk->len = len;
2310 _lStr_cpy( pchunk->buf8, str );
2311 }
2312
lString8(const value_type * str,size_type count)2313 lString8::lString8(const value_type * str, size_type count)
2314 {
2315 if ( !str || !(*str) || count<=0 )
2316 {
2317 pchunk = EMPTY_STR_8; addref();
2318 }
2319 else
2320 {
2321 size_type len = _lStr_nlen(str, count);
2322 alloc(len);
2323 _lStr_ncpy( pchunk->buf8, str, len );
2324 pchunk->len = len;
2325 }
2326 }
2327
lString8(const lString8 & str,size_type offset,size_type count)2328 lString8::lString8(const lString8 & str, size_type offset, size_type count)
2329 {
2330 if ( count > str.length() - offset )
2331 count = str.length() - offset;
2332 if (count<=0)
2333 {
2334 pchunk = EMPTY_STR_8; addref();
2335 }
2336 else
2337 {
2338 alloc(count);
2339 _lStr_memcpy( pchunk->buf8, str.pchunk->buf8+offset, count );
2340 pchunk->buf8[count]=0;
2341 pchunk->len = count;
2342 }
2343 }
2344
assign(const lChar8 * str)2345 lString8 & lString8::assign(const lChar8 * str)
2346 {
2347 if (!str || !(*str))
2348 {
2349 clear();
2350 }
2351 else
2352 {
2353 size_type len = _lStr_len(str);
2354 if (refCount()==1)
2355 {
2356 if (pchunk->size<=len)
2357 {
2358 // resize is necessary
2359 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(len+1) );
2360 pchunk->size = len+1;
2361 }
2362 }
2363 else
2364 {
2365 release();
2366 alloc(len);
2367 }
2368 _lStr_cpy( pchunk->buf8, str );
2369 pchunk->len = len;
2370 }
2371 return *this;
2372 }
2373
assign(const lChar8 * str,size_type count)2374 lString8 & lString8::assign(const lChar8 * str, size_type count)
2375 {
2376 if ( !str || !(*str) || count<=0 )
2377 {
2378 clear();
2379 }
2380 else
2381 {
2382 size_type len = _lStr_nlen(str, count);
2383 if (refCount()==1)
2384 {
2385 if (pchunk->size<=len)
2386 {
2387 // resize is necessary
2388 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(len+1) );
2389 pchunk->size = len+1;
2390 }
2391 }
2392 else
2393 {
2394 release();
2395 alloc(len);
2396 }
2397 _lStr_ncpy( pchunk->buf8, str, count );
2398 pchunk->len = len;
2399 }
2400 return *this;
2401 }
2402
assign(const lString8 & str,size_type offset,size_type count)2403 lString8 & lString8::assign(const lString8 & str, size_type offset, size_type count)
2404 {
2405 if ( count > str.length() - offset )
2406 count = str.length() - offset;
2407 if (count<=0)
2408 {
2409 clear();
2410 }
2411 else
2412 {
2413 if (pchunk==str.pchunk)
2414 {
2415 if (&str != this)
2416 {
2417 release();
2418 alloc(count);
2419 }
2420 if (offset>0)
2421 {
2422 _lStr_memcpy( pchunk->buf8, str.pchunk->buf8+offset, count );
2423 }
2424 pchunk->buf8[count]=0;
2425 }
2426 else
2427 {
2428 if (refCount()==1)
2429 {
2430 if (pchunk->size<=count)
2431 {
2432 // resize is necessary
2433 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(count+1) );
2434 pchunk->size = count+1;
2435 }
2436 }
2437 else
2438 {
2439 release();
2440 alloc(count);
2441 }
2442 _lStr_memcpy( pchunk->buf8, str.pchunk->buf8+offset, count );
2443 pchunk->buf8[count]=0;
2444 }
2445 pchunk->len = count;
2446 }
2447 return *this;
2448 }
2449
erase(size_type offset,size_type count)2450 lString8 & lString8::erase(size_type offset, size_type count)
2451 {
2452 if ( count > length() - offset )
2453 count = length() - offset;
2454 if (count<=0)
2455 {
2456 clear();
2457 }
2458 else
2459 {
2460 size_type newlen = length()-count;
2461 if (refCount()==1)
2462 {
2463 _lStr_memcpy( pchunk->buf8+offset, pchunk->buf8+offset+count, newlen-offset+1 );
2464 }
2465 else
2466 {
2467 lstring_chunk_t * poldchunk = pchunk;
2468 release();
2469 alloc( newlen );
2470 _lStr_memcpy( pchunk->buf8, poldchunk->buf8, offset );
2471 _lStr_memcpy( pchunk->buf8+offset, poldchunk->buf8+offset+count, newlen-offset+1 );
2472 }
2473 pchunk->len = newlen;
2474 pchunk->buf8[newlen]=0;
2475 }
2476 return *this;
2477 }
2478
reserve(size_type n)2479 void lString8::reserve(size_type n)
2480 {
2481 if (refCount()==1)
2482 {
2483 if (pchunk->size < n)
2484 {
2485 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(n+1) );
2486 pchunk->size = n;
2487 }
2488 }
2489 else
2490 {
2491 lstring_chunk_t * poldchunk = pchunk;
2492 release();
2493 alloc( n );
2494 _lStr_memcpy( pchunk->buf8, poldchunk->buf8, poldchunk->len+1 );
2495 pchunk->len = poldchunk->len;
2496 }
2497 }
2498
lock(size_type newsize)2499 void lString8::lock( size_type newsize )
2500 {
2501 if (refCount()>1)
2502 {
2503 lstring_chunk_t * poldchunk = pchunk;
2504 release();
2505 alloc( newsize );
2506 size_type len = newsize;
2507 if (len>poldchunk->len)
2508 len = poldchunk->len;
2509 _lStr_memcpy( pchunk->buf8, poldchunk->buf8, len );
2510 pchunk->buf8[len]=0;
2511 pchunk->len = len;
2512 }
2513 }
2514
2515 // lock string, allocate buffer and reset length to 0
reset(size_type size)2516 void lString8::reset( size_type size )
2517 {
2518 if (refCount()>1 || pchunk->size<size)
2519 {
2520 release();
2521 alloc( size );
2522 }
2523 pchunk->buf8[0] = 0;
2524 pchunk->len = 0;
2525 }
2526
resize(size_type n,lChar8 e)2527 void lString8::resize(size_type n, lChar8 e)
2528 {
2529 lock( n );
2530 if (n>=pchunk->size)
2531 {
2532 pchunk->buf8 = (lChar8*) ::realloc( pchunk->buf8, sizeof(lChar8)*(n+1) );
2533 pchunk->size = n;
2534 }
2535 // fill with data if expanded
2536 for (size_type i=pchunk->len; i<n; i++)
2537 pchunk->buf8[i] = e;
2538 pchunk->buf8[pchunk->len] = 0;
2539 }
2540
append(const lChar8 * str)2541 lString8 & lString8::append(const lChar8 * str)
2542 {
2543 size_type len = _lStr_len(str);
2544 reserve( pchunk->len+len );
2545 _lStr_memcpy(pchunk->buf8+pchunk->len, str, len+1);
2546 pchunk->len += len;
2547 return *this;
2548 }
2549
appendDecimal(lInt64 n)2550 lString8 & lString8::appendDecimal(lInt64 n)
2551 {
2552 lChar8 buf[24];
2553 int i=0;
2554 int negative = 0;
2555 if (n==0)
2556 return append(1, '0');
2557 else if (n<0)
2558 {
2559 negative = 1;
2560 n = -n;
2561 }
2562 for ( ; n; n/=10 )
2563 {
2564 buf[i++] = '0' + (n % 10);
2565 }
2566 reserve(length() + i + negative);
2567 if (negative)
2568 append(1, '-');
2569 for (int j=i-1; j>=0; j--)
2570 append(1, buf[j]);
2571 return *this;
2572 }
2573
appendHex(lUInt64 n)2574 lString8 & lString8::appendHex(lUInt64 n)
2575 {
2576 if (n == 0)
2577 return append(1, '0');
2578 reserve(length() + 16);
2579 bool foundNz = false;
2580 for (int i=0; i<16; i++) {
2581 int digit = (n >> 60) & 0x0F;
2582 if (digit)
2583 foundNz = true;
2584 if (foundNz)
2585 append(1, (lChar8)toHexDigit(digit));
2586 n <<= 4;
2587 }
2588 return *this;
2589 }
2590
appendDecimal(lInt64 n)2591 lString16 & lString16::appendDecimal(lInt64 n)
2592 {
2593 lChar16 buf[24];
2594 int i=0;
2595 int negative = 0;
2596 if (n==0)
2597 return append(1, '0');
2598 else if (n<0)
2599 {
2600 negative = 1;
2601 n = -n;
2602 }
2603 for ( ; n; n/=10 )
2604 {
2605 buf[i++] = '0' + (n % 10);
2606 }
2607 reserve(length() + i + negative);
2608 if (negative)
2609 append(1, '-');
2610 for (int j=i-1; j>=0; j--)
2611 append(1, buf[j]);
2612 return *this;
2613 }
2614
appendHex(lUInt64 n)2615 lString16 & lString16::appendHex(lUInt64 n)
2616 {
2617 if (n == 0)
2618 return append(1, '0');
2619 reserve(length() + 16);
2620 bool foundNz = false;
2621 for (int i=0; i<16; i++) {
2622 int digit = (n >> 60) & 0x0F;
2623 if (digit)
2624 foundNz = true;
2625 if (foundNz)
2626 append(1, toHexDigit(digit));
2627 n <<= 4;
2628 }
2629 return *this;
2630 }
2631
appendDecimal(lInt64 n)2632 lString32 & lString32::appendDecimal(lInt64 n)
2633 {
2634 lChar32 buf[24];
2635 int i=0;
2636 int negative = 0;
2637 if (n==0)
2638 return append(1, '0');
2639 else if (n<0)
2640 {
2641 negative = 1;
2642 n = -n;
2643 }
2644 for ( ; n; n/=10 )
2645 {
2646 buf[i++] = '0' + (n % 10);
2647 }
2648 reserve(length() + i + negative);
2649 if (negative)
2650 append(1, '-');
2651 for (int j=i-1; j>=0; j--)
2652 append(1, buf[j]);
2653 return *this;
2654 }
2655
appendHex(lUInt64 n)2656 lString32 & lString32::appendHex(lUInt64 n)
2657 {
2658 if (n == 0)
2659 return append(1, '0');
2660 reserve(length() + 16);
2661 bool foundNz = false;
2662 for (int i=0; i<16; i++) {
2663 int digit = (n >> 60) & 0x0F;
2664 if (digit)
2665 foundNz = true;
2666 if (foundNz)
2667 append(1, toHexDigit(digit));
2668 n <<= 4;
2669 }
2670 return *this;
2671 }
2672
append(const lChar8 * str,size_type count)2673 lString8 & lString8::append(const lChar8 * str, size_type count)
2674 {
2675 size_type len = _lStr_nlen(str, count);
2676 reserve( pchunk->len+len );
2677 _lStr_ncpy(pchunk->buf8+pchunk->len, str, len);
2678 pchunk->len += len;
2679 return *this;
2680 }
2681
append(const lString8 & str)2682 lString8 & lString8::append(const lString8 & str)
2683 {
2684 size_type len2 = pchunk->len + str.pchunk->len;
2685 reserve( len2 );
2686 _lStr_memcpy( pchunk->buf8+pchunk->len, str.pchunk->buf8, str.pchunk->len+1 );
2687 pchunk->len = len2;
2688 return *this;
2689 }
2690
append(const lString8 & str,size_type offset,size_type count)2691 lString8 & lString8::append(const lString8 & str, size_type offset, size_type count)
2692 {
2693 if ( str.pchunk->len>offset )
2694 {
2695 if ( offset + count > str.pchunk->len )
2696 count = str.pchunk->len - offset;
2697 reserve( pchunk->len+count );
2698 _lStr_ncpy(pchunk->buf8 + pchunk->len, str.pchunk->buf8 + offset, count);
2699 pchunk->len += count;
2700 pchunk->buf8[pchunk->len] = 0;
2701 }
2702 return *this;
2703 }
2704
append(size_type count,lChar8 ch)2705 lString8 & lString8::append(size_type count, lChar8 ch)
2706 {
2707 reserve( pchunk->len+count );
2708 memset( pchunk->buf8+pchunk->len, ch, count );
2709 //_lStr_memset(pchunk->buf8+pchunk->len, ch, count);
2710 pchunk->len += count;
2711 pchunk->buf8[pchunk->len] = 0;
2712 return *this;
2713 }
2714
insert(size_type p0,size_type count,lChar8 ch)2715 lString8 & lString8::insert(size_type p0, size_type count, lChar8 ch)
2716 {
2717 if (p0>pchunk->len)
2718 p0 = pchunk->len;
2719 reserve( pchunk->len+count );
2720 for (size_type i=pchunk->len+count; i>p0; i--)
2721 pchunk->buf8[i] = pchunk->buf8[i-1];
2722 //_lStr_memset(pchunk->buf8+p0, ch, count);
2723 memset(pchunk->buf8+p0, ch, count);
2724 pchunk->len += count;
2725 pchunk->buf8[pchunk->len] = 0;
2726 return *this;
2727 }
2728
substr(size_type pos,size_type n) const2729 lString8 lString8::substr(size_type pos, size_type n) const
2730 {
2731 if (pos>=length())
2732 return lString8::empty_str;
2733 if (pos+n>length())
2734 n = length() - pos;
2735 return lString8( pchunk->buf8+pos, n );
2736 }
2737
pos(lChar8 ch) const2738 int lString8::pos(lChar8 ch) const
2739 {
2740 for (int i = 0; i < length(); i++)
2741 {
2742 if (pchunk->buf8[i] == ch)
2743 {
2744 return i;
2745 }
2746 }
2747 return -1;
2748 }
2749
pos(lChar8 ch,int start) const2750 int lString8::pos(lChar8 ch, int start) const
2751 {
2752 if (length() - start < 1)
2753 return -1;
2754 for (int i = start; i < length(); i++)
2755 {
2756 if (pchunk->buf8[i] == ch)
2757 {
2758 return i;
2759 }
2760 }
2761 return -1;
2762 }
2763
pos(const lString8 & subStr) const2764 int lString8::pos(const lString8 & subStr) const
2765 {
2766 if (subStr.length()>length())
2767 return -1;
2768 int l = subStr.length();
2769 int dl = length() - l;
2770 for (int i=0; i<=dl; i++)
2771 {
2772 int flg = 1;
2773 for (int j=0; j<l; j++)
2774 if (pchunk->buf8[i+j]!=subStr.pchunk->buf8[j])
2775 {
2776 flg = 0;
2777 break;
2778 }
2779 if (flg)
2780 return i;
2781 }
2782 return -1;
2783 }
2784
2785 /// find position of substring inside string starting from right, -1 if not found
rpos(const char * subStr) const2786 int lString8::rpos(const char * subStr) const
2787 {
2788 if (!subStr || !subStr[0])
2789 return -1;
2790 int l = lStr_len(subStr);
2791 if (l > length())
2792 return -1;
2793 int dl = length() - l;
2794 for (int i=dl; i>=0; i--)
2795 {
2796 int flg = 1;
2797 for (int j=0; j<l; j++)
2798 if (pchunk->buf8[i+j] != subStr[j])
2799 {
2800 flg = 0;
2801 break;
2802 }
2803 if (flg)
2804 return i;
2805 }
2806 return -1;
2807 }
2808
2809 /// find position of substring inside string, -1 if not found
pos(const char * subStr) const2810 int lString8::pos(const char * subStr) const
2811 {
2812 if (!subStr || !subStr[0])
2813 return -1;
2814 int l = lStr_len(subStr);
2815 if (l > length())
2816 return -1;
2817 int dl = length() - l;
2818 for (int i=0; i<=dl; i++)
2819 {
2820 int flg = 1;
2821 for (int j=0; j<l; j++)
2822 if (pchunk->buf8[i+j] != subStr[j])
2823 {
2824 flg = 0;
2825 break;
2826 }
2827 if (flg)
2828 return i;
2829 }
2830 return -1;
2831 }
2832
pos(const lString8 & subStr,int startPos) const2833 int lString8::pos(const lString8 & subStr, int startPos) const
2834 {
2835 if (subStr.length() > length() - startPos)
2836 return -1;
2837 int l = subStr.length();
2838 int dl = length() - l;
2839 for (int i = startPos; i <= dl; i++) {
2840 int flg = 1;
2841 for (int j=0; j<l; j++)
2842 if (pchunk->buf8[i+j]!=subStr.pchunk->buf8[j])
2843 {
2844 flg = 0;
2845 break;
2846 }
2847 if (flg)
2848 return i;
2849 }
2850 return -1;
2851 }
2852
pos(lChar32 ch) const2853 int lString32::pos(lChar32 ch) const {
2854 for (int i = 0; i < length(); i++)
2855 {
2856 if (pchunk->buf32[i] == ch)
2857 {
2858 return i;
2859 }
2860 }
2861 return -1;
2862 }
2863
pos(lChar32 ch,int start) const2864 int lString32::pos(lChar32 ch, int start) const
2865 {
2866 if (length() - start < 1)
2867 return -1;
2868 for (int i = start; i < length(); i++)
2869 {
2870 if (pchunk->buf32[i] == ch)
2871 {
2872 return i;
2873 }
2874 }
2875 return -1;
2876 }
2877
pos(const lString32 & subStr,int startPos) const2878 int lString32::pos(const lString32 & subStr, int startPos) const
2879 {
2880 if (subStr.length() > length() - startPos)
2881 return -1;
2882 int l = subStr.length();
2883 int dl = length() - l;
2884 for (int i = startPos; i <= dl; i++) {
2885 int flg = 1;
2886 for (int j=0; j<l; j++)
2887 if (pchunk->buf32[i+j]!=subStr.pchunk->buf32[j])
2888 {
2889 flg = 0;
2890 break;
2891 }
2892 if (flg)
2893 return i;
2894 }
2895 return -1;
2896 }
2897
2898 /// find position of substring inside string, -1 if not found
pos(const char * subStr,int startPos) const2899 int lString8::pos(const char * subStr, int startPos) const
2900 {
2901 if (!subStr || !subStr[0])
2902 return -1;
2903 int l = lStr_len(subStr);
2904 if (l > length() - startPos)
2905 return -1;
2906 int dl = length() - l;
2907 for (int i = startPos; i <= dl; i++) {
2908 int flg = 1;
2909 for (int j=0; j<l; j++)
2910 if (pchunk->buf8[i+j] != subStr[j])
2911 {
2912 flg = 0;
2913 break;
2914 }
2915 if (flg)
2916 return i;
2917 }
2918 return -1;
2919 }
2920
2921 /// find position of substring inside string, -1 if not found
pos(const lChar32 * subStr,int startPos) const2922 int lString32::pos(const lChar32 * subStr, int startPos) const
2923 {
2924 if (!subStr || !subStr[0])
2925 return -1;
2926 int l = lStr_len(subStr);
2927 if (l > length() - startPos)
2928 return -1;
2929 int dl = length() - l;
2930 for (int i = startPos; i <= dl; i++) {
2931 int flg = 1;
2932 for (int j=0; j<l; j++)
2933 if (pchunk->buf32[i+j] != subStr[j])
2934 {
2935 flg = 0;
2936 break;
2937 }
2938 if (flg)
2939 return i;
2940 }
2941 return -1;
2942 }
2943
2944 /// find position of substring inside string, right to left, return -1 if not found
rpos(lString32 subStr) const2945 int lString32::rpos(lString32 subStr) const
2946 {
2947 if (subStr.length()>length())
2948 return -1;
2949 int l = subStr.length();
2950 int dl = length() - l;
2951 for (int i=dl; i>=0; i++)
2952 {
2953 int flg = 1;
2954 for (int j=0; j<l; j++)
2955 if (pchunk->buf32[i+j]!=subStr.pchunk->buf32[j])
2956 {
2957 flg = 0;
2958 break;
2959 }
2960 if (flg)
2961 return i;
2962 }
2963 return -1;
2964 }
2965
2966 /// find position of substring inside string, -1 if not found
pos(const lChar32 * subStr) const2967 int lString32::pos(const lChar32 * subStr) const
2968 {
2969 if (!subStr)
2970 return -1;
2971 int l = lStr_len(subStr);
2972 if (l > length())
2973 return -1;
2974 int dl = length() - l;
2975 for (int i=0; i <= dl; i++)
2976 {
2977 int flg = 1;
2978 for (int j=0; j<l; j++)
2979 if (pchunk->buf32[i+j] != subStr[j])
2980 {
2981 flg = 0;
2982 break;
2983 }
2984 if (flg)
2985 return i;
2986 }
2987 return -1;
2988 }
2989
2990 /// find position of substring inside string, -1 if not found
pos(const lChar8 * subStr) const2991 int lString32::pos(const lChar8 * subStr) const
2992 {
2993 if (!subStr)
2994 return -1;
2995 int l = lStr_len(subStr);
2996 if (l > length())
2997 return -1;
2998 int dl = length() - l;
2999 for (int i=0; i <= dl; i++)
3000 {
3001 int flg = 1;
3002 for (int j=0; j<l; j++)
3003 if (pchunk->buf32[i+j] != subStr[j])
3004 {
3005 flg = 0;
3006 break;
3007 }
3008 if (flg)
3009 return i;
3010 }
3011 return -1;
3012 }
3013
3014 /// find position of substring inside string, -1 if not found
pos(const lChar8 * subStr,int start) const3015 int lString32::pos(const lChar8 * subStr, int start) const
3016 {
3017 if (!subStr)
3018 return -1;
3019 int l = lStr_len(subStr);
3020 if (l > length() - start)
3021 return -1;
3022 int dl = length() - l;
3023 for (int i = start; i <= dl; i++)
3024 {
3025 int flg = 1;
3026 for (int j=0; j<l; j++)
3027 if (pchunk->buf32[i+j] != subStr[j])
3028 {
3029 flg = 0;
3030 break;
3031 }
3032 if (flg)
3033 return i;
3034 }
3035 return -1;
3036 }
3037
pos(lString32 subStr) const3038 int lString32::pos(lString32 subStr) const
3039 {
3040 if (subStr.length()>length())
3041 return -1;
3042 int l = subStr.length();
3043 int dl = length() - l;
3044 for (int i=0; i<=dl; i++)
3045 {
3046 int flg = 1;
3047 for (int j=0; j<l; j++)
3048 if (pchunk->buf32[i+j]!=subStr.pchunk->buf32[j])
3049 {
3050 flg = 0;
3051 break;
3052 }
3053 if (flg)
3054 return i;
3055 }
3056 return -1;
3057 }
3058
pack()3059 lString8 & lString8::pack()
3060 {
3061 if (pchunk->len + 4 < pchunk->size )
3062 {
3063 if (refCount()>1)
3064 {
3065 lock(pchunk->len);
3066 }
3067 else
3068 {
3069 pchunk->buf8 = cr_realloc( pchunk->buf8, pchunk->len+1 );
3070 pchunk->size = pchunk->len;
3071 }
3072 }
3073 return *this;
3074 }
3075
trim()3076 lString8 & lString8::trim()
3077 {
3078 //
3079 int firstns;
3080 for (firstns = 0;
3081 firstns < pchunk->len &&
3082 (pchunk->buf8[firstns] == ' ' ||
3083 pchunk->buf8[firstns] == '\t');
3084 ++firstns)
3085 ;
3086 if (firstns >= pchunk->len)
3087 {
3088 clear();
3089 return *this;
3090 }
3091 size_t lastns;
3092 for (lastns = pchunk->len-1;
3093 lastns>0 &&
3094 (pchunk->buf8[lastns]==' ' || pchunk->buf8[lastns]=='\t');
3095 --lastns)
3096 ;
3097 int newlen = (int)(lastns - firstns + 1);
3098 if (newlen == pchunk->len)
3099 return *this;
3100 if (refCount()==1)
3101 {
3102 if (firstns>0)
3103 lStr_memcpy( pchunk->buf8, pchunk->buf8+firstns, newlen );
3104 pchunk->buf8[newlen] = 0;
3105 pchunk->len = newlen;
3106 }
3107 else
3108 {
3109 lstring_chunk_t * poldchunk = pchunk;
3110 release();
3111 alloc( newlen );
3112 _lStr_memcpy( pchunk->buf8, poldchunk->buf8+firstns, newlen );
3113 pchunk->buf8[newlen] = 0;
3114 pchunk->len = newlen;
3115 }
3116 return *this;
3117 }
3118
atoi() const3119 int lString8::atoi() const
3120 {
3121 int sgn = 1;
3122 int n = 0;
3123 const lChar8 * s = c_str();
3124 while (*s == ' ' || *s == '\t')
3125 s++;
3126 if (*s == '-')
3127 {
3128 sgn = -1;
3129 s++;
3130 }
3131 else if (*s == '+')
3132 {
3133 s++;
3134 }
3135 while (*s>='0' && *s<='9')
3136 {
3137 n = n * 10 + ( (*s)-'0' );
3138 s++;
3139 }
3140 return (sgn>0)?n:-n;
3141 }
3142
atoi64() const3143 lInt64 lString8::atoi64() const
3144 {
3145 int sgn = 1;
3146 lInt64 n = 0;
3147 const lChar8 * s = c_str();
3148 while (*s == ' ' || *s == '\t')
3149 s++;
3150 if (*s == '-')
3151 {
3152 sgn = -1;
3153 s++;
3154 }
3155 else if (*s == '+')
3156 {
3157 s++;
3158 }
3159 while (*s>='0' && *s<='9')
3160 {
3161 n = n * 10 + ( (*s)-'0' );
3162 }
3163 return (sgn>0) ? n : -n;
3164 }
3165
3166 // constructs string representation of integer
itoa(int n)3167 lString8 lString8::itoa( int n )
3168 {
3169 lChar8 buf[16];
3170 int i=0;
3171 int negative = 0;
3172 if (n==0)
3173 return cs8("0");
3174 else if (n<0)
3175 {
3176 negative = 1;
3177 n = -n;
3178 }
3179 for ( ; n; n/=10 )
3180 {
3181 buf[i++] = '0' + (n%10);
3182 }
3183 lString8 res;
3184 res.reserve(i+negative);
3185 if (negative)
3186 res.append(1, '-');
3187 for (int j=i-1; j>=0; j--)
3188 res.append(1, buf[j]);
3189 return res;
3190 }
3191
3192 // constructs string representation of integer
itoa(unsigned int n)3193 lString8 lString8::itoa( unsigned int n )
3194 {
3195 lChar8 buf[16];
3196 int i=0;
3197 if (n==0)
3198 return cs8("0");
3199 for ( ; n; n/=10 )
3200 {
3201 buf[i++] = '0' + (n%10);
3202 }
3203 lString8 res;
3204 res.reserve(i);
3205 for (int j=i-1; j>=0; j--)
3206 res.append(1, buf[j]);
3207 return res;
3208 }
3209
3210 // constructs string representation of integer
itoa(lInt64 n)3211 lString8 lString8::itoa( lInt64 n )
3212 {
3213 lChar8 buf[32];
3214 int i=0;
3215 int negative = 0;
3216 if (n==0)
3217 return cs8("0");
3218 else if (n<0)
3219 {
3220 negative = 1;
3221 n = -n;
3222 }
3223 for ( ; n; n/=10 )
3224 {
3225 buf[i++] = '0' + (n%10);
3226 }
3227 lString8 res;
3228 res.reserve(i+negative);
3229 if (negative)
3230 res.append(1, '-');
3231 for (int j=i-1; j>=0; j--)
3232 res.append(1, buf[j]);
3233 return res;
3234 }
3235
3236 // constructs string representation of integer
itoa(int n)3237 lString16 lString16::itoa( int n )
3238 {
3239 return itoa( (lInt64)n );
3240 }
3241
3242 // constructs string representation of integer
itoa(unsigned int n)3243 lString16 lString16::itoa( unsigned int n )
3244 {
3245 return itoa( (lUInt64) n );
3246 }
3247
3248 // constructs string representation of integer
itoa(lInt64 n)3249 lString16 lString16::itoa( lInt64 n )
3250 {
3251 lChar16 buf[32];
3252 int i=0;
3253 int negative = 0;
3254 if (n==0)
3255 return lString16("0");
3256 else if (n<0)
3257 {
3258 negative = 1;
3259 n = -n;
3260 }
3261 for ( ; n && i<30; n/=10 )
3262 {
3263 buf[i++] = (lChar16)('0' + (n%10));
3264 }
3265 lString16 res;
3266 res.reserve(i+negative);
3267 if (negative)
3268 res.append(1, L'-');
3269 for (int j=i-1; j>=0; j--)
3270 res.append(1, buf[j]);
3271 return res;
3272 }
3273
3274 // constructs string representation of integer
itoa(lUInt64 n)3275 lString16 lString16::itoa( lUInt64 n )
3276 {
3277 lChar16 buf[32];
3278 int i=0;
3279 if (n==0)
3280 return lString16("0");
3281 for ( ; n; n/=10 )
3282 {
3283 buf[i++] = (lChar16)('0' + (n%10));
3284 }
3285 lString16 res;
3286 res.reserve(i);
3287 for (int j=i-1; j>=0; j--)
3288 res.append(1, buf[j]);
3289 return res;
3290 }
3291
3292 // constructs string representation of integer
itoa(int n)3293 lString32 lString32::itoa( int n )
3294 {
3295 return itoa( (lInt64)n );
3296 }
3297
3298 // constructs string representation of integer
itoa(unsigned int n)3299 lString32 lString32::itoa( unsigned int n )
3300 {
3301 return itoa( (lUInt64) n );
3302 }
3303
3304 // constructs string representation of integer
itoa(lInt64 n)3305 lString32 lString32::itoa( lInt64 n )
3306 {
3307 lChar32 buf[32];
3308 int i=0;
3309 int negative = 0;
3310 if (n==0)
3311 return cs32("0");
3312 else if (n<0)
3313 {
3314 negative = 1;
3315 n = -n;
3316 }
3317 for ( ; n && i<30; n/=10 )
3318 {
3319 buf[i++] = (lChar32)('0' + (n%10));
3320 }
3321 lString32 res;
3322 res.reserve(i+negative);
3323 if (negative)
3324 res.append(1, U'-');
3325 for (int j=i-1; j>=0; j--)
3326 res.append(1, buf[j]);
3327 return res;
3328 }
3329
3330 // constructs string representation of integer
itoa(lUInt64 n)3331 lString32 lString32::itoa( lUInt64 n )
3332 {
3333 lChar32 buf[32];
3334 int i=0;
3335 if (n==0)
3336 return cs32("0");
3337 for ( ; n; n/=10 )
3338 {
3339 buf[i++] = (lChar32)('0' + (n%10));
3340 }
3341 lString32 res;
3342 res.reserve(i);
3343 for (int j=i-1; j>=0; j--)
3344 res.append(1, buf[j]);
3345 return res;
3346 }
3347
lvUnicodeIsAlpha(lChar32 ch)3348 bool lvUnicodeIsAlpha( lChar32 ch )
3349 {
3350 if ( ch<128 ) {
3351 if ( (ch>='a' && ch<='z') || (ch>='A' && ch<='Z') )
3352 return true;
3353 } else if ( ch>=0xC0 && ch<=0x1ef9) {
3354 return true;
3355 }
3356 return false;
3357 }
3358
uppercase()3359 lString8 & lString8::uppercase()
3360 {
3361 lStr_uppercase( modify(), length() );
3362 return *this;
3363 }
3364
lowercase()3365 lString8 & lString8::lowercase()
3366 {
3367 lStr_lowercase( modify(), length() );
3368 return *this;
3369 }
3370
uppercase()3371 lString32 & lString32::uppercase()
3372 {
3373 lStr_uppercase( modify(), length() );
3374 return *this;
3375 }
3376
lowercase()3377 lString32 & lString32::lowercase()
3378 {
3379 lStr_lowercase( modify(), length() );
3380 return *this;
3381 }
3382
capitalize()3383 lString32 & lString32::capitalize()
3384 {
3385 lStr_capitalize( modify(), length() );
3386 return *this;
3387 }
3388
fullWidthChars()3389 lString32 & lString32::fullWidthChars()
3390 {
3391 lStr_fullWidthChars( modify(), length() );
3392 return *this;
3393 }
3394
lStr_uppercase(lChar8 * str,int len)3395 void lStr_uppercase( lChar8 * str, int len )
3396 {
3397 for ( int i=0; i<len; i++ ) {
3398 lChar32 ch = str[i];
3399 if ( ch>='a' && ch<='z' ) {
3400 str[i] = ch - 0x20;
3401 } else if ( ch>=0xE0 && ch<=0xFF ) {
3402 str[i] = ch - 0x20;
3403 }
3404 }
3405 }
3406
lStr_lowercase(lChar8 * str,int len)3407 void lStr_lowercase( lChar8 * str, int len )
3408 {
3409 for ( int i=0; i<len; i++ ) {
3410 lChar32 ch = str[i];
3411 if ( ch>='A' && ch<='Z' ) {
3412 str[i] = ch + 0x20;
3413 } else if ( ch>=0xC0 && ch<=0xDF ) {
3414 str[i] = ch + 0x20;
3415 }
3416 }
3417 }
3418
lStr_uppercase(lChar32 * str,int len)3419 void lStr_uppercase( lChar32 * str, int len )
3420 {
3421 for ( int i=0; i<len; i++ ) {
3422 lChar32 ch = str[i];
3423 #if (USE_UTF8PROC==1)
3424 str[i] = utf8proc_toupper(ch);
3425 #else
3426 if ( ch>='a' && ch<='z' ) {
3427 str[i] = ch - 0x20;
3428 } else if ( ch>=0xE0 && ch<=0xFF ) {
3429 str[i] = ch - 0x20;
3430 } else if ( ch>=0x430 && ch<=0x44F ) {
3431 str[i] = ch - 0x20;
3432 } else if ( ch>=0x3b0 && ch<=0x3cF ) {
3433 str[i] = ch - 0x20;
3434 } else if ( (ch >> 8)==0x1F ) { // greek
3435 lChar32 n = ch & 255;
3436 if (n<0x70) {
3437 str[i] = ch | 8;
3438 } else if (n<0x80) {
3439
3440 } else if (n<0xF0) {
3441 str[i] = ch | 8;
3442 }
3443 }
3444 #endif
3445 }
3446 }
3447
lStr_lowercase(lChar32 * str,int len)3448 void lStr_lowercase( lChar32 * str, int len )
3449 {
3450 for ( int i=0; i<len; i++ ) {
3451 lChar32 ch = str[i];
3452 #if (USE_UTF8PROC==1)
3453 str[i] = utf8proc_tolower(ch);
3454 #else
3455 if ( ch>='A' && ch<='Z' ) {
3456 str[i] = ch + 0x20;
3457 } else if ( ch>=0xC0 && ch<=0xDF ) {
3458 str[i] = ch + 0x20;
3459 } else if ( ch>=0x410 && ch<=0x42F ) {
3460 str[i] = ch + 0x20;
3461 } else if ( ch>=0x390 && ch<=0x3aF ) {
3462 str[i] = ch + 0x20;
3463 } else if ( (ch >> 8)==0x1F ) { // greek
3464 lChar32 n = ch & 255;
3465 if (n<0x70) {
3466 str[i] = ch & (~8);
3467 } else if (n<0x80) {
3468
3469 } else if (n<0xF0) {
3470 str[i] = ch & (~8);
3471 }
3472 }
3473 #endif
3474 }
3475 }
3476
lStr_fullWidthChars(lChar32 * str,int len)3477 void lStr_fullWidthChars( lChar32 * str, int len )
3478 {
3479 for ( int i=0; i<len; i++ ) {
3480 lChar32 ch = str[i];
3481 if ( ch>=0x21 && ch<=0x7E ) {
3482 // full-width versions of ascii chars 0x21-0x7E are at 0xFF01-0Xff5E
3483 str[i] = ch + UNICODE_ASCII_FULL_WIDTH_OFFSET;
3484 } else if ( ch==0x20 ) {
3485 str[i] = UNICODE_CJK_IDEOGRAPHIC_SPACE; // full-width space
3486 }
3487 }
3488 }
3489
lStr_capitalize(lChar32 * str,int len)3490 void lStr_capitalize( lChar32 * str, int len )
3491 {
3492 bool prev_is_word_sep = true; // first char of string will be capitalized
3493 for ( int i=0; i<len; i++ ) {
3494 lChar32 ch = str[i];
3495 if (prev_is_word_sep) {
3496 // as done as in lStr_uppercase()
3497 #if (USE_UTF8PROC==1)
3498 str[i] = utf8proc_toupper(ch);
3499 #else
3500 if ( ch>='a' && ch<='z' ) {
3501 str[i] = ch - 0x20;
3502 } else if ( ch>=0xE0 && ch<=0xFF ) {
3503 str[i] = ch - 0x20;
3504 } else if ( ch>=0x430 && ch<=0x44F ) {
3505 str[i] = ch - 0x20;
3506 } else if ( ch>=0x3b0 && ch<=0x3cF ) {
3507 str[i] = ch - 0x20;
3508 } else if ( (ch >> 8)==0x1F ) { // greek
3509 lChar32 n = ch & 255;
3510 if (n<0x70) {
3511 str[i] = ch | 8;
3512 } else if (n<0x80) {
3513
3514 } else if (n<0xF0) {
3515 str[i] = ch | 8;
3516 }
3517 }
3518 #endif
3519 }
3520 // update prev_is_word_sep for next char
3521 prev_is_word_sep = lStr_isWordSeparator(ch);
3522 }
3523 }
3524
3525
TrimDoubleSpaces(lChar32 * buf,int len,bool allowStartSpace,bool allowEndSpace,bool removeEolHyphens)3526 int TrimDoubleSpaces(lChar32 * buf, int len, bool allowStartSpace, bool allowEndSpace, bool removeEolHyphens)
3527 {
3528 lChar32 * psrc = buf;
3529 lChar32 * pdst = buf;
3530 int state = 0; // 0=beginning, 1=after space, 2=after non-space
3531 while ((len--) > 0) {
3532 lChar32 ch = *psrc++;
3533 if (ch == ' ' || ch == '\t') {
3534 if ( state==2 ) {
3535 if ( *psrc || allowEndSpace ) // if not last
3536 *pdst++ = ' ';
3537 } else if ( state==0 && allowStartSpace ) {
3538 *pdst++ = ' ';
3539 }
3540 state = 1;
3541 } else if ( ch=='\r' || ch=='\n' ) {
3542 if ( state==2 ) {
3543 if ( removeEolHyphens && pdst>(buf+1) && *(pdst-1)=='-' && lvUnicodeIsAlpha(*(pdst-2)) )
3544 pdst--; // remove hyphen at end of line
3545 if ( *psrc || allowEndSpace ) // if not last
3546 *pdst++ = ' ';
3547 } else if ( state==0 && allowStartSpace ) {
3548 *pdst++ = ' ';
3549 }
3550 state = 1;
3551 } else {
3552 *pdst++ = ch;
3553 state = 2;
3554 }
3555 }
3556 return (int)(pdst - buf);
3557 }
3558
trimDoubleSpaces(bool allowStartSpace,bool allowEndSpace,bool removeEolHyphens)3559 lString32 & lString32::trimDoubleSpaces( bool allowStartSpace, bool allowEndSpace, bool removeEolHyphens )
3560 {
3561 if ( empty() )
3562 return *this;
3563 lChar32 * buf = modify();
3564 int len = length();
3565 int nlen = TrimDoubleSpaces(buf, len, allowStartSpace, allowEndSpace, removeEolHyphens);
3566 if (nlen < len)
3567 limit(nlen);
3568 return *this;
3569 }
3570
getHash() const3571 lUInt32 lString8::getHash() const
3572 {
3573 lUInt32 res = 0;
3574 for (int i=0; i < pchunk->len; i++)
3575 res = res * STRING_HASH_MULT + pchunk->buf8[i];
3576 return res;
3577 }
3578
3579 const lString8 lString8::empty_str;
3580
Utf8CharCount(const lChar8 * str)3581 int Utf8CharCount( const lChar8 * str )
3582 {
3583 int count = 0;
3584 lUInt8 ch;
3585 while ( (ch=*str++) ) {
3586 if ( (ch & 0x80) == 0 ) {
3587 } else if ( (ch & 0xE0) == 0xC0 ) {
3588 if ( !(*str++) )
3589 break;
3590 } else if ( (ch & 0xF0) == 0xE0 ) {
3591 if ( !(*str++) )
3592 break;
3593 if ( !(*str++) )
3594 break;
3595 } else if ( (ch & 0xF8) == 0xF0 ) {
3596 if ( !(*str++) )
3597 break;
3598 if ( !(*str++) )
3599 break;
3600 if ( !(*str++) )
3601 break;
3602 } else {
3603 // In Unicode standard maximum length of UTF-8 sequence is 4 byte!
3604 // invalid first byte in UTF-8 sequence, just leave as is
3605 ;
3606 }
3607 count++;
3608 }
3609 return count;
3610 }
3611
Utf8CharCount(const lChar8 * str,int len)3612 int Utf8CharCount( const lChar8 * str, int len )
3613 {
3614 if (len == 0)
3615 return 0;
3616 int count = 0;
3617 lUInt8 ch;
3618 const lChar8 * endp = str + len;
3619 while ((ch=*str++)) {
3620 if ( (ch & 0x80) == 0 ) {
3621 } else if ( (ch & 0xE0) == 0xC0 ) {
3622 str++;
3623 } else if ( (ch & 0xF0) == 0xE0 ) {
3624 str+=2;
3625 } else if ( (ch & 0xF8) == 0xF0 ) {
3626 str+=3;
3627 } else {
3628 // invalid first byte of UTF-8 sequence, just leave as is
3629 ;
3630 }
3631 if (str > endp)
3632 break;
3633 count++;
3634 }
3635 return count;
3636 }
3637
Utf16CharCount(const lChar16 * str)3638 int Utf16CharCount( const lChar16 * str )
3639 {
3640 int count = 0;
3641 lUInt16 ch;
3642 while ( (ch=*str++) ) {
3643 if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
3644 } else if ( ch >= 0xD800 && ch <= 0xDBFF ) {
3645 if ( !(*str++) )
3646 break;
3647 } else {
3648 // In Unicode standard maximum length of UTF-16 sequence is 2 word!
3649 // invalid first word in UTF-16 sequence, just leave as is
3650 ;
3651 }
3652 count++;
3653 }
3654 return count;
3655 }
3656
Utf16CharCount(const lChar16 * str,int len)3657 int Utf16CharCount( const lChar16 * str, int len )
3658 {
3659 if (len == 0)
3660 return 0;
3661 int count = 0;
3662 lUInt16 ch;
3663 const lChar16 * endp = str + len;
3664 while ( (ch=*str++) ) {
3665 if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
3666 } else if ( ch >= 0xD800 && ch <= 0xDBFF ) {
3667 str++;
3668 } else {
3669 // invalid first word of UTF-16 sequence, just leave as is
3670 ;
3671 }
3672 if (str > endp)
3673 break;
3674 count++;
3675 }
3676 return count;
3677 }
3678
Wtf8CharCount(const lChar8 * str)3679 int Wtf8CharCount( const lChar8 * str )
3680 {
3681 int count = 0;
3682 lUInt8 ch;
3683 lUInt32 p;
3684 while ( (ch=*str++) ) {
3685 if ( (ch & 0x80) == 0 ) {
3686 } else if ( (ch & 0xE0) == 0xC0 ) {
3687 if ( !(*str++) )
3688 break;
3689 } else if ( (ch & 0xF0) == 0xE0 ) {
3690 p = (ch & 0x0F) << 12;
3691 if ( !(ch=*str++) )
3692 break;
3693 p |= (ch & 0x3F) << 6;
3694 if ( !(ch=*str++) )
3695 break;
3696 p |= ch & 0x3F;
3697 if (p >= 0xD800 && p <= 0xDBFF) { // high surrogate
3698 ch = *str;
3699 if ((ch & 0xF0) == 0xE0) {
3700 p = (ch & 0x0F) << 12;
3701 if ( !(ch=*(str+1)) )
3702 break;
3703 p |= (ch & 0x3F) << 6;
3704 if ( !(ch=*(str+2)) )
3705 break;
3706 p |= ch & 0x3F;
3707 if (p >= 0xDC00 && p <= 0xDFFF) { // low surrogate
3708 str += 3;
3709 }
3710 }
3711 }
3712 } else if ( (ch & 0xF8) == 0xF0 ) {
3713 // Mostly unused
3714 if ( !(*str++) )
3715 break;
3716 if ( !(*str++) )
3717 break;
3718 if ( !(*str++) )
3719 break;
3720 } else {
3721 // invalid first byte in UTF-8 sequence, just leave as is
3722 ;
3723 }
3724 count++;
3725 }
3726 return count;
3727 }
3728
Wtf8CharCount(const lChar8 * str,int len)3729 int Wtf8CharCount( const lChar8 * str, int len )
3730 {
3731 if (len == 0)
3732 return 0;
3733 int count = 0;
3734 lUInt8 ch;
3735 const lChar8 * endp = str + len;
3736 while ((ch=*str)) {
3737 if ( (ch & 0x80) == 0 ) {
3738 str++;
3739 } else if ( (ch & 0xE0) == 0xC0 ) {
3740 str+=2;
3741 } else if ( (ch & 0xF0) == 0xE0 ) {
3742 str+=3;
3743 ch=*str;
3744 if ( (ch & 0xF0) == 0xE0 ) {
3745 str+=3;
3746 }
3747 } else if ( (ch & 0xF8) == 0xF0 ) {
3748 // Mostly unused
3749 str+=4;
3750 } else {
3751 // invalid first byte of UTF-8 sequence, just leave as is
3752 str++;
3753 }
3754 if (str > endp)
3755 break;
3756 count++;
3757 }
3758 return count;
3759 }
3760
charUtf8ByteCount(lUInt32 ch)3761 inline int charUtf8ByteCount(lUInt32 ch) {
3762 if (!(ch & ~0x7F))
3763 return 1;
3764 if (!(ch & ~0x7FF))
3765 return 2;
3766 if (!(ch & ~0xFFFF))
3767 return 3;
3768 if (!(ch & ~0x1FFFFF))
3769 return 4;
3770 // In Unicode Standard codepoint must be in range U+0000..U+10FFFF
3771 // return invalid codepoint as one byte
3772 return 1;
3773 }
3774
charUtf16WordCount(lUInt32 ch)3775 inline int charUtf16WordCount(lUInt32 ch) {
3776 if (!(ch & ~0xFFFF))
3777 return 1;
3778 if (!(ch & ~0x1FFFFF))
3779 return 2;
3780 // In Unicode Standard codepoint must be in range U+0000..U+10FFFF
3781 // return invalid codepoint as one word
3782 return 1;
3783 }
3784
Utf8ByteCount(const lChar32 * str)3785 int Utf8ByteCount(const lChar32 * str)
3786 {
3787 int count = 0;
3788 lUInt32 ch;
3789 while ( (ch=*str++) ) {
3790 count += charUtf8ByteCount(ch);
3791 }
3792 return count;
3793 }
3794
charWtf8ByteCount(lUInt32 ch)3795 inline int charWtf8ByteCount(lUInt32 ch) {
3796 if (!(ch & ~0x7F))
3797 return 1;
3798 if (!(ch & ~0x7FF))
3799 return 2;
3800 if (!(ch & ~0xFFFF))
3801 return 3;
3802 if (!(ch & ~0x1FFFFF))
3803 return 6;
3804 return 1;
3805 }
3806
Utf8ByteCount(const lChar32 * str,int len)3807 int Utf8ByteCount(const lChar32 * str, int len)
3808 {
3809 int count = 0;
3810 lUInt32 ch;
3811 while ((len--) > 0) {
3812 ch = *str++;
3813 count += charUtf8ByteCount(ch);
3814 }
3815 return count;
3816 }
3817
Utf16WordCount(const lChar32 * str,int len)3818 int Utf16WordCount(const lChar32 * str, int len)
3819 {
3820 int count = 0;
3821 lUInt32 ch;
3822 while ((len--) > 0) {
3823 ch = *str++;
3824 count += charUtf16WordCount(ch);
3825 }
3826 return count;
3827 }
3828
Wtf8ByteCount(const lChar32 * str,int len)3829 int Wtf8ByteCount(const lChar32 * str, int len)
3830 {
3831 int count = 0;
3832 lUInt32 ch;
3833 while ((len--) > 0) {
3834 ch = *str++;
3835 count += charWtf8ByteCount(ch);
3836 }
3837 return count;
3838 }
3839
Utf8ToUnicode(const lString8 & str)3840 lString32 Utf8ToUnicode( const lString8 & str )
3841 {
3842 return Utf8ToUnicode( str.c_str() );
3843 }
3844
Utf16ToUnicode(const lString16 & str)3845 lString32 Utf16ToUnicode( const lString16 & str )
3846 {
3847 return Utf16ToUnicode( str.c_str() );
3848 }
3849
3850 #define CONT_BYTE(index,shift) (((lChar32)(s[index]) & 0x3F) << shift)
3851
DecodeUtf8(const char * s,lChar32 * p,int len)3852 static void DecodeUtf8(const char * s, lChar32 * p, int len)
3853 {
3854 lChar32 * endp = p + len;
3855 lUInt32 ch;
3856 while (p < endp) {
3857 ch = *s++;
3858 if ( (ch & 0x80) == 0 ) {
3859 *p++ = (char)ch;
3860 } else if ( (ch & 0xE0) == 0xC0 ) {
3861 *p++ = ((ch & 0x1F) << 6)
3862 | CONT_BYTE(0,0);
3863 s++;
3864 } else if ( (ch & 0xF0) == 0xE0 ) {
3865 *p++ = ((ch & 0x0F) << 12)
3866 | CONT_BYTE(0,6)
3867 | CONT_BYTE(1,0);
3868 s += 2;
3869 } else if ( (ch & 0xF8) == 0xF0 ) {
3870 *p++ = ((ch & 0x07) << 18)
3871 | CONT_BYTE(0,12)
3872 | CONT_BYTE(1,6)
3873 | CONT_BYTE(2,0);
3874 s += 3;
3875 } else {
3876 // Invalid first byte in UTF-8 sequence
3877 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3878 *p++ = (char) (ch & 0x7F);
3879 }
3880 }
3881 }
3882
DecodeWtf8(const char * s,lChar32 * p,int len)3883 static void DecodeWtf8(const char * s, lChar32 * p, int len)
3884 {
3885 lChar32 * endp = p + len;
3886 lUInt32 ch;
3887 while (p < endp) {
3888 ch = *s;
3889 bool matched = false;
3890 if ( (ch & 0x80) == 0 ) {
3891 matched = true;
3892 *p++ = (char)ch;
3893 s++;
3894 } else if ( (ch & 0xE0) == 0xC0 ) {
3895 matched = true;
3896 *p++ = ((ch & 0x1F) << 6)
3897 | CONT_BYTE(1,0);
3898 s += 2;
3899 } else if ( (ch & 0xF0) == 0xE0 ) {
3900 matched = true;
3901 *p++ = ((ch & 0x0F) << 12)
3902 | CONT_BYTE(1,6)
3903 | CONT_BYTE(2,0);
3904 s += 3;
3905 if (*(p-1) >= 0xD800 && *(p-1) <= 0xDBFF) { // what we wrote is a high surrogate,
3906 lUInt32 next = *s; // and there's room next for a low surrogate
3907 if ( (next & 0xF0) == 0xE0) { // is a 3-bytes sequence
3908 next = ((next & 0x0F) << 12) | CONT_BYTE(1,6) | CONT_BYTE(2,0);
3909 if (next >= 0xDC00 && next <= 0xDFFF) { // is a low surrogate: valid surrogates sequence
3910 ch = 0x10000 + ((*(p-1) & 0x3FF)<<10) + (next & 0x3FF);
3911 p--; // rewind to override what we wrote
3912 *p++ = ch;
3913 s += 3;
3914 }
3915 }
3916 }
3917 } else if ( (ch & 0xF8) == 0xF0 ) {
3918 // Mostly unused
3919 matched = true;
3920 *p++ = ((ch & 0x07) << 18)
3921 | CONT_BYTE(1,12)
3922 | CONT_BYTE(2,6)
3923 | CONT_BYTE(3,0);
3924 s += 4;
3925 } else {
3926 // Invalid first byte in UTF-8 sequence
3927 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3928 *p++ = (char) (ch & 0x7F);
3929 s++;
3930 matched = true; // just to avoid next if
3931 }
3932
3933 // unexpected character
3934 if (!matched) {
3935 *p++ = '?';
3936 s++;
3937 }
3938 }
3939 }
3940
DecodeUtf16(const lChar16 * s,lChar32 * p,int len)3941 static void DecodeUtf16(const lChar16 * s, lChar32 * p, int len)
3942 {
3943 lChar32 * endp = p + len;
3944 lUInt16 ch;
3945 while (p < endp) {
3946 ch = *s++;
3947 if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
3948 *p++ = (lChar32)ch;
3949 } else if ( ch >= 0xD800 && ch < 0xDC00 ) {
3950 lUInt16 next = (lUInt16)*s;
3951 if (next >= 0xDC00 && next < 0xE000) {
3952 // convert surrogate pair into unicode code point
3953 // 110110wwwwxxxxxx, 110111xxxxxxxxxx => 000uuuuuxxxxxxxxxxxxxxxx
3954 // where uuuuu = wwww+1
3955 *p++ = ( ( ( (ch & 0x03C0) >> 6 ) + 1 ) << 16 ) | ((ch & 0x3F) << 10) | (next & 0x3FF);
3956 } else {
3957 // Invalid second word in UTF-16 sequence (including '\0')
3958 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3959 *p++ = (char) (ch & 0x7F);
3960 }
3961 s++;
3962 } else {
3963 // Invalid first word in UTF-16 sequence
3964 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
3965 *p++ = (char) (ch & 0x7F);
3966 }
3967 }
3968 }
3969
3970 // Top two bits are 10, i.e. original & 11000000(2) == 10000000(2)
3971 #define IS_FOLLOWING(index) ((s[index] & 0xC0) == 0x80)
3972
Utf8ToUnicode(const lUInt8 * src,int & srclen,lChar32 * dst,int & dstlen)3973 void Utf8ToUnicode(const lUInt8 * src, int &srclen, lChar32 * dst, int &dstlen)
3974 {
3975 const lUInt8 * s = src;
3976 const lUInt8 * ends = s + srclen;
3977 lChar32 * p = dst;
3978 lChar32 * endp = p + dstlen;
3979 lUInt32 ch;
3980 bool matched;
3981 while (p < endp && s < ends) {
3982 ch = *s;
3983 matched = false;
3984 if ( (ch & 0x80) == 0 ) {
3985 matched = true;
3986 *p++ = (char)ch;
3987 s++;
3988 } else if ( (ch & 0xE0) == 0xC0 ) {
3989 if (s + 2 > ends)
3990 break;
3991 if (IS_FOLLOWING(1)) {
3992 matched = true;
3993 *p++ = ((ch & 0x1F) << 6)
3994 | CONT_BYTE(1,0);
3995 s += 2;
3996 }
3997 } else if ( (ch & 0xF0) == 0xE0 ) {
3998 if (s + 3 > ends)
3999 break;
4000 if (IS_FOLLOWING(1) && IS_FOLLOWING(2)) {
4001 matched = true;
4002 *p++ = ((ch & 0x0F) << 12)
4003 | CONT_BYTE(1,6)
4004 | CONT_BYTE(2,0);
4005 s += 3;
4006 // Supports WTF-8 : https://en.wikipedia.org/wiki/UTF-8#WTF-8
4007 // a superset of UTF-8, that includes UTF-16 surrogates
4008 // in UTF-8 bytes (forbidden in well-formed UTF-8).
4009 // We may get that from bad producers or converters.
4010 // As these shouldn't be there in UTF-8, if we find
4011 // these surrogates in the right sequence, we might as well
4012 // convert the char they represent to the right Unicode
4013 // codepoint and display it instead of a '?'.
4014 // Surrogates are code points from two special ranges of
4015 // Unicode values, reserved for use as the leading, and
4016 // trailing values of paired code units in UTF-16. Leading,
4017 // also called high, surrogates are from D800 to DBFF, and
4018 // trailing, or low, surrogates are from DC00 to DFFF. They
4019 // are called surrogates, since they do not represent
4020 // characters directly, but only as a pair.
4021 if (*(p-1) >= 0xD800 && *(p-1) <= 0xDBFF && s+2 < ends) { // what we wrote is a high surrogate,
4022 lUInt32 next = *s; // and there's room next for a low surrogate
4023 if ( (next & 0xF0) == 0xE0 && IS_FOLLOWING(1) && IS_FOLLOWING(2)) { // is a valid 3-bytes sequence
4024 next = ((next & 0x0F) << 12) | CONT_BYTE(1,6) | CONT_BYTE(2,0);
4025 if (next >= 0xDC00 && next <= 0xDFFF) { // is a low surrogate: valid surrogates sequence
4026 ch = 0x10000 + ((*(p-1) & 0x3FF)<<10) + (next & 0x3FF);
4027 p--; // rewind to override what we wrote
4028 *p++ = ch;
4029 s += 3;
4030 }
4031 }
4032 }
4033 }
4034 } else if ( (ch & 0xF8) == 0xF0 ) {
4035 if (s + 4 > ends)
4036 break;
4037 if (IS_FOLLOWING(1) && IS_FOLLOWING(2) && IS_FOLLOWING(3)) {
4038 matched = true;
4039 *p++ = ((ch & 0x07) << 18)
4040 | CONT_BYTE(1,12)
4041 | CONT_BYTE(2,6)
4042 | CONT_BYTE(3,0);
4043 s += 4;
4044 }
4045 } else {
4046 // Invalid first byte in UTF-8 sequence
4047 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
4048 *p++ = (char) (ch & 0x7F);
4049 s++;
4050 matched = true; // just to avoid next if
4051 }
4052 // unexpected character
4053 if (!matched) {
4054 *p++ = '?';
4055 s++;
4056 }
4057 }
4058 srclen = (int)(s - src);
4059 dstlen = (int)(p - dst);
4060 }
4061
Utf16ToUnicode(const lChar16 * src,int & srclen,lChar32 * dst,int & dstlen)4062 void Utf16ToUnicode(const lChar16 * src, int &srclen, lChar32 * dst, int &dstlen)
4063 {
4064 const lChar16 * s = src;
4065 const lChar16 * ends = s + srclen;
4066 lChar32 * p = dst;
4067 lChar32 * endp = p + dstlen;
4068 lUInt32 ch;
4069 bool matched;
4070 while (p < endp && s < ends) {
4071 ch = *s;
4072 matched = false;
4073 if ( (ch >=0 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFF) ) {
4074 matched = true;
4075 *p++ = (lChar32)ch;
4076 s++;
4077 } else if ( ch >= 0xD800 && ch < 0xDC00 ) {
4078 if (s + 2 > ends)
4079 break;
4080 lUInt16 next = *s;
4081 if (next >= 0xDC00 && next < 0xE000) {
4082 matched = true;
4083 // convert surrogate pair into unicode code point
4084 // 110110wwwwxxxxxx, 110111xxxxxxxxxx => 000uuuuuxxxxxxxxxxxxxxxx
4085 // where uuuuu = wwww+1
4086 *p++ = ( ( ( (ch & 0x03C0) >> 6 ) + 1 ) << 16 ) | ((ch & 0x3F) << 10) | (next & 0x3FF);
4087 s += 2;
4088 }
4089 } else {
4090 // Invalid first word in UTF-16 sequence
4091 // Pass with mask 0x7F, to resolve exception around env->NewStringUTF()
4092 *p++ = (char) (ch & 0x7F);
4093 s++;
4094 matched = true; // just to avoid next if
4095 }
4096 // unexpected character
4097 if (!matched) {
4098 *p++ = '?';
4099 s++;
4100 }
4101 }
4102 srclen = (int)(s - src);
4103 dstlen = (int)(p - dst);
4104 }
4105
Utf8ToUnicode(const char * s)4106 lString32 Utf8ToUnicode( const char * s ) {
4107 if (!s || !s[0])
4108 return lString32::empty_str;
4109 int len = Utf8CharCount( s );
4110 if (!len)
4111 return lString32::empty_str;
4112 lString32 dst;
4113 dst.append(len, (lChar32)0);
4114 lChar32 * p = dst.modify();
4115 DecodeUtf8(s, p, len);
4116 return dst;
4117 }
4118
Utf8ToUnicode(const char * s,int sz)4119 lString32 Utf8ToUnicode( const char * s, int sz ) {
4120 if (!s || !s[0] || sz <= 0)
4121 return lString32::empty_str;
4122 int len = Utf8CharCount( s, sz );
4123 if (!len)
4124 return lString32::empty_str;
4125 lString32 dst;
4126 dst.append(len, 0);
4127 lChar32 * p = dst.modify();
4128 DecodeUtf8(s, p, len);
4129 return dst;
4130 }
4131
Utf16ToUnicode(const lChar16 * s)4132 lString32 Utf16ToUnicode( const lChar16 * s )
4133 {
4134 if (!s || !s[0])
4135 return lString32::empty_str;
4136 int len = Utf16CharCount( s );
4137 if (!len)
4138 return lString32::empty_str;
4139 lString32 dst;
4140 dst.append(len, (lChar32)0);
4141 lChar32 * p = dst.modify();
4142 DecodeUtf16(s, p, len);
4143 return dst;
4144 }
4145
Utf16ToUnicode(const lChar16 * s,int sz)4146 lString32 Utf16ToUnicode( const lChar16 * s, int sz )
4147 {
4148 if (!s || !s[0] || sz <= 0)
4149 return lString32::empty_str;
4150 int len = Utf16CharCount( s, sz );
4151 if (!len)
4152 return lString32::empty_str;
4153 lString32 dst;
4154 dst.append(len, 0);
4155 lChar32 * p = dst.modify();
4156 DecodeUtf16(s, p, len);
4157 return dst;
4158 }
4159
Wtf8ToUnicode(const lString8 & str)4160 lString32 Wtf8ToUnicode( const lString8 & str )
4161 {
4162 return Wtf8ToUnicode( str.c_str() );
4163 }
4164
Wtf8ToUnicode(const char * s)4165 lString32 Wtf8ToUnicode( const char * s ) {
4166 if (!s || !s[0])
4167 return lString32::empty_str;
4168 int len = Wtf8CharCount( s );
4169 if (!len)
4170 return lString32::empty_str;
4171 lString32 dst;
4172 dst.append(len, (lChar32)0);
4173 lChar32 * p = dst.modify();
4174 DecodeWtf8(s, p, len);
4175 return dst;
4176 }
4177
Wtf8ToUnicode(const char * s,int sz)4178 lString32 Wtf8ToUnicode( const char * s, int sz ) {
4179 if (!s || !s[0] || sz <= 0)
4180 return lString32::empty_str;
4181 int len = Utf8CharCount( s, sz );
4182 if (!len)
4183 return lString32::empty_str;
4184 lString32 dst;
4185 dst.append(len, 0);
4186 lChar32 * p = dst.modify();
4187 DecodeWtf8(s, p, len);
4188 return dst;
4189 }
4190
UnicodeToUtf8(const lChar32 * s,int count)4191 lString8 UnicodeToUtf8(const lChar32 * s, int count)
4192 {
4193 if (count <= 0)
4194 return lString8::empty_str;
4195 lString8 dst;
4196 int len = Utf8ByteCount(s, count);
4197 if (len <= 0)
4198 return lString8::empty_str;
4199 dst.append( len, ' ' );
4200 lChar8 * buf = dst.modify();
4201 {
4202 lUInt32 ch;
4203 while ((count--) > 0) {
4204 ch = *s++;
4205 if (!(ch & ~0x7F)) {
4206 *buf++ = ( (lUInt8)ch );
4207 } else if (!(ch & ~0x7FF)) {
4208 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x1F) | 0xC0 ) );
4209 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4210 } else if (!(ch & ~0xFFFF)) {
4211 *buf++ = ( (lUInt8) ( ((ch >> 12) & 0x0F) | 0xE0 ) );
4212 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x3F) | 0x80 ) );
4213 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4214 } else if (!(ch & ~0x1FFFFF)) {
4215 *buf++ = ( (lUInt8) ( ((ch >> 18) & 0x07) | 0xF0 ) );
4216 *buf++ = ( (lUInt8) ( ((ch >> 12) & 0x3F) | 0x80 ) );
4217 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x3F) | 0x80 ) );
4218 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4219 } else {
4220 // invalid codepoint
4221 // In Unicode Standard codepoint must be in range U+0000 .. U+10FFFF
4222 *buf++ = '?';
4223 }
4224 }
4225 }
4226 return dst;
4227 }
4228
UnicodeToUtf16(const lChar32 * s,int count)4229 lString16 UnicodeToUtf16(const lChar32 * s, int count)
4230 {
4231 if (count <= 0)
4232 return lString16::empty_str;
4233 lString16 dst;
4234 int len = Utf16WordCount(s, count);
4235 if (len <= 0)
4236 return lString16::empty_str;
4237 dst.append( len, ' ' );
4238 lChar16 * buf = dst.modify();
4239 {
4240 lUInt32 ch;
4241 while ((count--) > 0) {
4242 ch = *s++;
4243 if (!(ch & ~0xFFFF)) {
4244 *buf++ = (lChar16)ch;
4245 } else if (!(ch & ~0x1FFFFF)) {
4246 // put into a surrogate pair
4247 // 000uuuuuxxxxxxxxxxxxxxxx => 110110wwwwxxxxxx, 110111xxxxxxxxxx
4248 // where wwww = uuuuu - 1
4249 // first word
4250 *buf++ = (lChar16) ( 0xD800 | ( ( ( (ch >> 16) & 0x1F ) - 1 ) << 6 ) | ( (ch >> 10) & 0x3F ) );
4251 // second word
4252 *buf++ = (lChar16) ( 0xDC00 | (ch & 0x3FF) );
4253 } else {
4254 // invalid codepoint
4255 // In Unicode Standard codepoint must be in range U+0000 .. U+10FFFF
4256 *buf++ = L'?';
4257 }
4258 }
4259 }
4260 return dst;
4261 }
4262
UnicodeToUtf8(const lString32 & str)4263 lString8 UnicodeToUtf8( const lString32 & str )
4264 {
4265 return UnicodeToUtf8(str.c_str(), str.length());
4266 }
4267
UnicodeToUtf16(const lString32 & str)4268 lString16 UnicodeToUtf16( const lString32 & str )
4269 {
4270 return UnicodeToUtf16(str.c_str(), str.length());
4271 }
4272
UnicodeToWtf8(const lChar32 * s,int count)4273 lString8 UnicodeToWtf8(const lChar32 * s, int count)
4274 {
4275 if (count <= 0)
4276 return lString8::empty_str;
4277 lString8 dst;
4278 int len = Wtf8ByteCount(s, count);
4279 if (len <= 0)
4280 return lString8::empty_str;
4281 dst.append( len, ' ' );
4282 lChar8 * buf = dst.modify();
4283 {
4284 lUInt32 ch;
4285 while ((count--) > 0) {
4286 ch = *s++;
4287 if (!(ch & ~0x7F)) {
4288 *buf++ = ( (lUInt8)ch );
4289 } else if (!(ch & ~0x7FF)) {
4290 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x1F) | 0xC0 ) );
4291 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4292 } else if (!(ch & ~0xFFFF)) {
4293 *buf++ = ( (lUInt8) ( ((ch >> 12) & 0x0F) | 0xE0 ) );
4294 *buf++ = ( (lUInt8) ( ((ch >> 6) & 0x3F) | 0x80 ) );
4295 *buf++ = ( (lUInt8) ( ((ch ) & 0x3F) | 0x80 ) );
4296 } else if (!(ch & ~0x1FFFFF)) {
4297 // UTF-16 Scalar Value
4298 // 000uuuuu xxxxxxxxxxxxxxxx
4299 // UTF-16
4300 // 110110wwwwxxxxxx 110111xxxxxxxxxx
4301 // wwww = uuuuu - 1
4302 lUInt16 wwww = (ch >> 16) - 1;
4303 lUInt16 low = ch & 0xFFFF;
4304 lUInt32 hiSurr = 0xD800 | (wwww << 6) | (low >> 10); // high surrogate
4305 lUInt32 lowSurr = 0xDC00 | (low & 0x3FF); // low surrogate
4306 *buf++ = ( (lUInt8) ( ((hiSurr >> 12) & 0x0F) | 0xE0 ) );
4307 *buf++ = ( (lUInt8) ( ((hiSurr >> 6) & 0x3F) | 0x80 ) );
4308 *buf++ = ( (lUInt8) ( ((hiSurr ) & 0x3F) | 0x80 ) );
4309 *buf++ = ( (lUInt8) ( ((lowSurr >> 12) & 0x0F) | 0xE0 ) );
4310 *buf++ = ( (lUInt8) ( ((lowSurr >> 6) & 0x3F) | 0x80 ) );
4311 *buf++ = ( (lUInt8) ( ((lowSurr ) & 0x3F) | 0x80 ) );
4312 } else {
4313 // invalid codepoint
4314 // In Unicode Standard codepoint must be in range U+0000 .. U+10FFFF
4315 *buf++ = '?';
4316 }
4317 }
4318 }
4319 return dst;
4320 }
4321
UnicodeToWtf8(const lString32 & str)4322 lString8 UnicodeToWtf8( const lString32 & str )
4323 {
4324 return UnicodeToWtf8(str.c_str(), str.length());
4325 }
4326
UnicodeTo8Bit(const lString32 & str,const lChar8 ** table)4327 lString8 UnicodeTo8Bit( const lString32 & str, const lChar8 * * table )
4328 {
4329 lString8 buf;
4330 buf.reserve( str.length() );
4331 for (int i=0; i < str.length(); i++) {
4332 lChar32 ch = str[i];
4333 const lChar8 * p = table[ (ch>>8) & 255 ];
4334 if ( p ) {
4335 buf += p[ ch&255 ];
4336 } else {
4337 buf += '?';
4338 }
4339 }
4340 return buf;
4341 }
4342
ByteToUnicode(const lString8 & str,const lChar32 * table)4343 lString32 ByteToUnicode( const lString8 & str, const lChar32 * table )
4344 {
4345 lString32 buf;
4346 buf.reserve( str.length() );
4347 for (int i=0; i < str.length(); i++) {
4348 lChar32 ch = (unsigned char)str[i];
4349 lChar32 ch32 = ((ch & 0x80) && table) ? table[ (ch&0x7F) ] : ch;
4350 buf += ch32;
4351 }
4352 return buf;
4353 }
4354
4355
4356 #if !defined(__SYMBIAN32__) && defined(_WIN32)
4357
UnicodeToLocal(const lString32 & str)4358 lString8 UnicodeToLocal( const lString32 & str )
4359 {
4360 lString8 dst;
4361 if (str.empty())
4362 return dst;
4363 lString16 utf16 = UnicodeToUtf16(str);
4364 CHAR def_char = '?';
4365 BOOL usedDefChar = FALSE;
4366 int len = WideCharToMultiByte(
4367 CP_ACP,
4368 WC_COMPOSITECHECK | WC_DISCARDNS
4369 | WC_SEPCHARS | WC_DEFAULTCHAR,
4370 utf16.c_str(),
4371 utf16.length(),
4372 NULL,
4373 0,
4374 &def_char,
4375 &usedDefChar
4376 );
4377 if (len)
4378 {
4379 dst.insert(0, len, ' ');
4380 WideCharToMultiByte(
4381 CP_ACP,
4382 WC_COMPOSITECHECK | WC_DISCARDNS
4383 | WC_SEPCHARS | WC_DEFAULTCHAR,
4384 utf16.c_str(),
4385 utf16.length(),
4386 dst.modify(),
4387 len,
4388 &def_char,
4389 &usedDefChar
4390 );
4391 }
4392 return dst;
4393 }
4394
LocalToUnicode(const lString8 & str)4395 lString32 LocalToUnicode( const lString8 & str )
4396 {
4397 lString16 utf16;
4398 if (str.empty())
4399 return lString32::empty_str;
4400 int len = MultiByteToWideChar(
4401 CP_ACP,
4402 0,
4403 str.c_str(),
4404 str.length(),
4405 NULL,
4406 0
4407 );
4408 if (len)
4409 {
4410 utf16.insert(0, len, ' ');
4411 MultiByteToWideChar(
4412 CP_ACP,
4413 0,
4414 str.c_str(),
4415 str.length(),
4416 utf16.modify(),
4417 len
4418 );
4419 }
4420 return Utf16ToUnicode(utf16);
4421 }
4422
4423 #else
4424
UnicodeToLocal(const lString32 & str)4425 lString8 UnicodeToLocal( const lString32 & str )
4426 {
4427 return UnicodeToUtf8( str );
4428 }
4429
LocalToUnicode(const lString8 & str)4430 lString32 LocalToUnicode( const lString8 & str )
4431 {
4432 return Utf8ToUnicode( str );
4433 }
4434
4435 #endif
4436
4437 //0x410
4438 static const char * russian_capital[32] =
4439 {
4440 "A", "B", "V", "G", "D", "E", "ZH", "Z", "I", "j", "K", "L", "M", "N", "O", "P", "R",
4441 "S", "T", "U", "F", "H", "TS", "CH", "SH", "SH", "\'", "Y", "\'", "E", "YU", "YA"
4442 };
4443 static const char * russian_small[32] =
4444 {
4445 "a", "b", "v", "g", "d", "e", "zh", "z", "i", "j", "k", "l", "m", "n", "o", "p", "r",
4446 "s", "t", "u", "f", "h", "ts", "ch", "sh", "sh", "\'", "y", "\'", "e", "yu", "ya"
4447 };
4448
4449 static const char * latin_1[64] =
4450 {
4451 "A", // U+00C0 LATIN CAPITAL LETTER A WITH GRAVE
4452 "A", // U+00C1 LATIN CAPITAL LETTER A WITH ACUTE
4453 "A", // U+00C2 LATIN CAPITAL LETTER A WITH CIRCUMFLEX
4454 "A", // U+00C3 LATIN CAPITAL LETTER A WITH TILDE
4455 "AE",// U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
4456 "A", // U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE
4457 "AE",// U+00C6 LATIN CAPITAL LETTER AE
4458 "C", // U+00C7 LATIN CAPITAL LETTER C WITH CEDILLA
4459 "E", // U+00C8 LATIN CAPITAL LETTER E WITH GRAVE
4460 "E", // U+00C9 LATIN CAPITAL LETTER E WITH ACUTE
4461 "E", // U+00CA LATIN CAPITAL LETTER E WITH CIRCUMFLEX
4462 "E", // U+00CB LATIN CAPITAL LETTER E WITH DIAERESIS
4463 "I", // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
4464 "I", // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
4465 "I", // U+00CE LATIN CAPITAL LETTER I WITH CIRCUMFLEX
4466 "I", // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
4467 "D", // U+00D0 LATIN CAPITAL LETTER ETH
4468 "N", // U+00D1 LATIN CAPITAL LETTER N WITH TILDE
4469 "O", // U+00D2 LATIN CAPITAL LETTER O WITH GRAVE
4470 "O", // U+00D3 LATIN CAPITAL LETTER O WITH ACUTE
4471 "O", // U+00D4 LATIN CAPITAL LETTER O WITH CIRCUMFLEX
4472 "O", // U+00D5 LATIN CAPITAL LETTER O WITH TILDE
4473 "OE",// U+00D6 LATIN CAPITAL LETTER O WITH DIAERESIS
4474 "x", // U+00D7 MULTIPLICATION SIGN
4475 "O", // U+00D8 LATIN CAPITAL LETTER O WITH STROKE
4476 "U", // U+00D9 LATIN CAPITAL LETTER U WITH GRAVE
4477 "U", // U+00DA LATIN CAPITAL LETTER U WITH ACUTE
4478 "U", // U+00DB LATIN CAPITAL LETTER U WITH CIRCUMFLEX
4479 "UE",// U+00DC LATIN CAPITAL LETTER U WITH DIAERESIS
4480 "Y", // U+00DD LATIN CAPITAL LETTER Y WITH ACUTE
4481 "p", // U+00DE LATIN CAPITAL LETTER THORN
4482 "SS",// U+00DF LATIN SMALL LETTER SHARP S
4483 "a", // U+00E0 LATIN SMALL LETTER A WITH GRAVE
4484 "a", // U+00E1 LATIN SMALL LETTER A WITH ACUTE
4485 "a", // U+00E2 LATIN SMALL LETTER A WITH CIRCUMFLEX
4486 "a", // U+00E3 LATIN SMALL LETTER A WITH TILDE
4487 "ae",// U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
4488 "a", // U+00E5 LATIN SMALL LETTER A WITH RING ABOVE
4489 "ae",// U+00E6 LATIN SMALL LETTER AE
4490 "c", // U+00E7 LATIN SMALL LETTER C WITH CEDILLA
4491 "e", // U+00E8 LATIN SMALL LETTER E WITH GRAVE
4492 "e", // U+00E9 LATIN SMALL LETTER E WITH ACUTE
4493 "e", // U+00EA LATIN SMALL LETTER E WITH CIRCUMFLEX
4494 "e", // U+00EB LATIN SMALL LETTER E WITH DIAERESIS
4495 "i", // U+00EC LATIN SMALL LETTER I WITH GRAVE
4496 "i", // U+00ED LATIN SMALL LETTER I WITH ACUTE
4497 "i", // U+00EE LATIN SMALL LETTER I WITH CIRCUMFLEX
4498 "i", // U+00EF LATIN SMALL LETTER I WITH DIAERESIS
4499 "d", // U+00F0 LATIN SMALL LETTER ETH
4500 "n", // U+00F1 LATIN SMALL LETTER N WITH TILDE
4501 "o", // U+00F2 LATIN SMALL LETTER O WITH GRAVE
4502 "o", // U+00F3 LATIN SMALL LETTER O WITH ACUTE
4503 "o", // U+00F4 LATIN SMALL LETTER O WITH CIRCUMFLEX
4504 "oe",// U+00F5 LATIN SMALL LETTER O WITH TILDE
4505 "o", // U+00F6 LATIN SMALL LETTER O WITH DIAERESIS
4506 "x", // U+00F7 DIVISION SIGN
4507 "o", // U+00F8 LATIN SMALL LETTER O WITH STROKE
4508 "u", // U+00F9 LATIN SMALL LETTER U WITH GRAVE
4509 "u", // U+00FA LATIN SMALL LETTER U WITH ACUTE
4510 "u", // U+00FB LATIN SMALL LETTER U WITH CIRCUMFLEX
4511 "ue",// U+00FC LATIN SMALL LETTER U WITH DIAERESIS
4512 "y", // U+00FD LATIN SMALL LETTER Y WITH ACUTE
4513 "p", // U+00FE LATIN SMALL LETTER THORN
4514 "y", // U+00FF LATIN SMALL LETTER Y WITH DIAERESIS
4515 };
4516
getCharTranscript(lChar32 ch)4517 static const char * getCharTranscript( lChar32 ch )
4518 {
4519 if ( ch>=0x410 && ch<0x430 )
4520 return russian_capital[ch-0x410];
4521 else if (ch>=0x430 && ch<0x450)
4522 return russian_small[ch-0x430];
4523 else if (ch>=0xC0 && ch<0xFF)
4524 return latin_1[ch-0xC0];
4525 else if (ch==0x450)
4526 return "E";
4527 else if ( ch==0x451 )
4528 return "e";
4529 return "?";
4530 }
4531
4532
UnicodeToTranslit(const lString32 & str)4533 lString8 UnicodeToTranslit( const lString32 & str )
4534 {
4535 lString8 buf;
4536 if ( str.empty() )
4537 return buf;
4538 buf.reserve( str.length()*5/4 );
4539 for ( int i=0; i<str.length(); i++ ) {
4540 lChar32 ch = str[i];
4541 if ( ch>=32 && ch<=127 ) {
4542 buf.append( 1, (lChar8)ch );
4543 } else {
4544 const char * trans = getCharTranscript(ch);
4545 buf.append( trans );
4546 }
4547 }
4548 buf.pack();
4549 return buf;
4550 }
4551
4552
4553 // Note:
4554 // CH_PROP_UPPER and CH_PROP_LOWER make out CH_PROP_ALPHA, which is,
4555 // with CH_PROP_CONSONANT, CH_PROP_VOWEL and CH_PROP_ALPHA_SIGN,
4556 // used only for detecting a word candidate to hyphenation.
4557 // CH_PROP_PUNCT and CH_PROP_DASH are used each once in some obscure places.
4558 // Others seem not used anywhere: CH_PROP_SIGN, CH_PROP_DIGIT, CH_PROP_SPACE
4559 static lUInt16 char_props[] = {
4560 // 0x0000:
4561 0,0,0,0, 0,0,0,0, CH_PROP_SPACE,CH_PROP_SPACE,CH_PROP_SPACE,0, CH_PROP_SPACE,CH_PROP_SPACE,0,0,
4562 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4563 // 0x0020:
4564 CH_PROP_SPACE, // ' '
4565 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // '!'
4566 0, // '\"'
4567 CH_PROP_SIGN, // '#'
4568 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '$'
4569 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // '%'
4570 CH_PROP_SIGN, // '&'
4571 CH_PROP_SIGN, // '\''
4572 CH_PROP_AVOID_WRAP_AFTER, // '('
4573 CH_PROP_AVOID_WRAP_BEFORE, // ')'
4574 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '*'
4575 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '+'
4576 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // ','
4577 CH_PROP_SIGN | CH_PROP_DASH | CH_PROP_AVOID_WRAP_BEFORE, // '-'
4578 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // '.'
4579 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // '/'
4580 // 0x0030:
4581 CH_PROP_DIGIT, // '0'
4582 CH_PROP_DIGIT, // '1'
4583 CH_PROP_DIGIT, // '2'
4584 CH_PROP_DIGIT, // '3'
4585 CH_PROP_DIGIT, // '4'
4586 CH_PROP_DIGIT, // '5'
4587 CH_PROP_DIGIT, // '6'
4588 CH_PROP_DIGIT, // '7'
4589 CH_PROP_DIGIT, // '8'
4590 CH_PROP_DIGIT, // '9'
4591 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // ':'
4592 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // ';'
4593 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '<'
4594 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '='
4595 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '>'
4596 CH_PROP_PUNCT | CH_PROP_AVOID_WRAP_BEFORE, // '?'
4597 // 0x0040:
4598 CH_PROP_SIGN, // '@'
4599 CH_PROP_UPPER | CH_PROP_VOWEL, // 'A'
4600 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'B'
4601 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'C'
4602 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'D'
4603 CH_PROP_UPPER | CH_PROP_VOWEL, // 'E'
4604 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'F'
4605 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'G'
4606 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'H'
4607 CH_PROP_UPPER | CH_PROP_VOWEL, // 'I'
4608 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'J'
4609 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'K'
4610 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'L'
4611 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'M'
4612 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'N'
4613 CH_PROP_UPPER | CH_PROP_VOWEL, // 'O'
4614 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'P'
4615 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'Q'
4616 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'R'
4617 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'S'
4618 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'T'
4619 CH_PROP_UPPER | CH_PROP_VOWEL, // 'U'
4620 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'V'
4621 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'W'
4622 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'X'
4623 CH_PROP_UPPER | CH_PROP_VOWEL, // 'Y'
4624 CH_PROP_UPPER | CH_PROP_CONSONANT, // 'Z'
4625 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_AFTER, // '['
4626 CH_PROP_SIGN, // '\'
4627 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // ']'
4628 CH_PROP_SIGN, // '^'
4629 CH_PROP_SIGN, // '_'
4630 // 0x0060:
4631 CH_PROP_SIGN, // '`'
4632 CH_PROP_LOWER | CH_PROP_VOWEL, // 'a'
4633 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'b'
4634 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'c'
4635 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'd'
4636 CH_PROP_LOWER | CH_PROP_VOWEL, // 'e'
4637 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'f'
4638 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'g'
4639 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'h'
4640 CH_PROP_LOWER | CH_PROP_VOWEL, // 'i'
4641 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'j'
4642 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'k'
4643 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'l'
4644 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'm'
4645 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'n'
4646 CH_PROP_LOWER | CH_PROP_VOWEL, // 'o'
4647 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'p'
4648 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'q'
4649 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'r'
4650 CH_PROP_LOWER | CH_PROP_CONSONANT, // 's'
4651 CH_PROP_LOWER | CH_PROP_CONSONANT, // 't'
4652 CH_PROP_LOWER | CH_PROP_VOWEL, // 'u'
4653 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'v'
4654 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'w'
4655 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'x'
4656 CH_PROP_LOWER | CH_PROP_VOWEL, // 'y'
4657 CH_PROP_LOWER | CH_PROP_CONSONANT, // 'z'
4658 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_AFTER, // '{'
4659 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // '|'
4660 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // '}'
4661 CH_PROP_SIGN, // '~'
4662 CH_PROP_SIGN, // ' '
4663 // 0x0080:
4664 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4665 // 0x0090:
4666 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4667 // 0x00A0:
4668 CH_PROP_SPACE, // 00A0 nbsp
4669 CH_PROP_PUNCT, // 00A1 inverted !
4670 CH_PROP_SIGN, // 00A2
4671 CH_PROP_SIGN, // 00A3
4672 CH_PROP_SIGN, // 00A4
4673 CH_PROP_SIGN, // 00A5
4674 CH_PROP_SIGN, // 00A6
4675 CH_PROP_SIGN, // 00A7
4676 CH_PROP_SIGN, // 00A8
4677 CH_PROP_SIGN, // 00A9
4678 CH_PROP_SIGN, // 00AA
4679 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_AFTER, // 00AB «
4680 CH_PROP_SIGN, // 00AC
4681 CH_PROP_HYPHEN,// 00AD soft-hyphen (UNICODE_SOFT_HYPHEN_CODE)
4682 CH_PROP_SIGN, // 00AE
4683 CH_PROP_SIGN, // 00AF
4684 // 0x00A0:
4685 CH_PROP_SIGN, // 00B0 degree
4686 CH_PROP_SIGN, // 00B1
4687 CH_PROP_SIGN, // 00B2
4688 CH_PROP_SIGN, // 00B3
4689 CH_PROP_SIGN, // 00B4
4690 CH_PROP_SIGN, // 00B5
4691 CH_PROP_SIGN, // 00B6
4692 CH_PROP_SIGN, // 00B7
4693 CH_PROP_SIGN, // 00B8
4694 CH_PROP_SIGN, // 00B9
4695 CH_PROP_SIGN, // 00BA
4696 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE, // 00BB »
4697 CH_PROP_SIGN, // 00BC
4698 CH_PROP_SIGN, // 00BD
4699 CH_PROP_SIGN, // 00BE
4700 CH_PROP_PUNCT, // 00BF
4701 // 0x00C0:
4702 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C0 A`
4703 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C1 A'
4704 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C2 A^
4705 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C3 A"
4706 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C4 A:
4707 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C5 Ao
4708 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C6 AE
4709 CH_PROP_UPPER | CH_PROP_CONSONANT, // 00C7 C~
4710 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C8 E`
4711 CH_PROP_UPPER | CH_PROP_VOWEL, // 00C9 E'
4712 CH_PROP_UPPER | CH_PROP_VOWEL, // 00CA E^
4713 CH_PROP_UPPER | CH_PROP_VOWEL, // 00CB E:
4714 CH_PROP_UPPER | CH_PROP_VOWEL, // 00CC I`
4715 CH_PROP_UPPER | CH_PROP_VOWEL, // 00CD I'
4716 CH_PROP_UPPER | CH_PROP_VOWEL, // 00CE I^
4717 CH_PROP_UPPER | CH_PROP_VOWEL, // 00CF I:
4718 // 0x00D0:
4719 CH_PROP_UPPER | CH_PROP_CONSONANT, // 00D0 D-
4720 CH_PROP_UPPER | CH_PROP_CONSONANT, // 00D1 N-
4721 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D2 O`
4722 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D3 O'
4723 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D4 O^
4724 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D5 O"
4725 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D6 O:
4726 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // 00D7 x (multiplication sign)
4727 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D8 O/
4728 CH_PROP_UPPER | CH_PROP_VOWEL, // 00D9 U`
4729 CH_PROP_UPPER | CH_PROP_VOWEL, // 00DA U'
4730 CH_PROP_UPPER | CH_PROP_VOWEL, // 00DB U^
4731 CH_PROP_UPPER | CH_PROP_VOWEL, // 00DC U:
4732 CH_PROP_UPPER | CH_PROP_VOWEL, // 00DD Y'
4733 CH_PROP_UPPER | CH_PROP_CONSONANT, // 00DE P thorn
4734 CH_PROP_LOWER | CH_PROP_CONSONANT, // 00DF ss
4735 // 0x00E0:
4736 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E0 a`
4737 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E1 a'
4738 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E2 a^
4739 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E3 a"
4740 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E4 a:
4741 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E5 ao
4742 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E6 ae
4743 CH_PROP_LOWER | CH_PROP_CONSONANT, // 00E7 c~
4744 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E8 e`
4745 CH_PROP_LOWER | CH_PROP_VOWEL, // 00E9 e'
4746 CH_PROP_LOWER | CH_PROP_VOWEL, // 00EA e^
4747 CH_PROP_LOWER | CH_PROP_VOWEL, // 00EB e:
4748 CH_PROP_LOWER | CH_PROP_VOWEL, // 00EC i`
4749 CH_PROP_LOWER | CH_PROP_VOWEL, // 00ED i'
4750 CH_PROP_LOWER | CH_PROP_VOWEL, // 00EE i^
4751 CH_PROP_LOWER | CH_PROP_VOWEL, // 00EF i:
4752 // 0x00F0:
4753 CH_PROP_LOWER | CH_PROP_CONSONANT, // 00F0 eth
4754 CH_PROP_LOWER | CH_PROP_CONSONANT, // 00F1 n~
4755 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F2 o`
4756 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F3 o'
4757 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F4 o^
4758 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F5 o"
4759 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F6 o:
4760 CH_PROP_SIGN | CH_PROP_AVOID_WRAP_BEFORE | CH_PROP_AVOID_WRAP_AFTER, // 00F7 (division sign %)
4761 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F8 o/
4762 CH_PROP_LOWER | CH_PROP_VOWEL, // 00F9 u`
4763 CH_PROP_LOWER | CH_PROP_VOWEL, // 00FA u'
4764 CH_PROP_LOWER | CH_PROP_VOWEL, // 00FB u^
4765 CH_PROP_LOWER | CH_PROP_VOWEL, // 00FC u:
4766 CH_PROP_LOWER | CH_PROP_VOWEL, // 00FD y'
4767 CH_PROP_LOWER | CH_PROP_CONSONANT, // 00FE p thorn
4768 CH_PROP_LOWER | CH_PROP_VOWEL, // 00FF y:
4769 // 0x0100:
4770 CH_PROP_UPPER | CH_PROP_VOWEL, // 0100 A_
4771 CH_PROP_LOWER | CH_PROP_VOWEL, // 0101 a_
4772 CH_PROP_UPPER | CH_PROP_VOWEL, // 0102 Au
4773 CH_PROP_LOWER | CH_PROP_VOWEL, // 0103 au
4774 CH_PROP_UPPER | CH_PROP_VOWEL, // 0104 A,
4775 CH_PROP_LOWER | CH_PROP_VOWEL, // 0105 a,
4776 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0106 C'
4777 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0107 c'
4778 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0108 C^
4779 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0109 c^
4780 CH_PROP_UPPER | CH_PROP_CONSONANT, // 010A C.
4781 CH_PROP_LOWER | CH_PROP_CONSONANT, // 010B c.
4782 CH_PROP_UPPER | CH_PROP_CONSONANT, // 010C Cu
4783 CH_PROP_LOWER | CH_PROP_CONSONANT, // 010D cu
4784 CH_PROP_UPPER | CH_PROP_CONSONANT, // 010E Du
4785 CH_PROP_LOWER | CH_PROP_CONSONANT, // 010F d'
4786
4787 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0110 D-
4788 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0111 d-
4789 CH_PROP_UPPER | CH_PROP_VOWEL, // 0112 E_
4790 CH_PROP_LOWER | CH_PROP_VOWEL, // 0113 e_
4791 CH_PROP_UPPER | CH_PROP_VOWEL, // 0114 Eu
4792 CH_PROP_LOWER | CH_PROP_VOWEL, // 0115 eu
4793 CH_PROP_UPPER | CH_PROP_VOWEL, // 0116 E.
4794 CH_PROP_LOWER | CH_PROP_VOWEL, // 0117 e.
4795 CH_PROP_UPPER | CH_PROP_VOWEL, // 0118 E,
4796 CH_PROP_LOWER | CH_PROP_VOWEL, // 0119 e,
4797 CH_PROP_UPPER | CH_PROP_VOWEL, // 011A Ev
4798 CH_PROP_LOWER | CH_PROP_VOWEL, // 011B ev
4799 CH_PROP_UPPER | CH_PROP_CONSONANT, // 011C G^
4800 CH_PROP_LOWER | CH_PROP_CONSONANT, // 011D g^
4801 CH_PROP_UPPER | CH_PROP_CONSONANT, // 011E Gu
4802 CH_PROP_LOWER | CH_PROP_CONSONANT, // 011F Gu
4803
4804 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0120 G.
4805 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0121 g.
4806 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0122 G,
4807 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0123 g,
4808 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0124 H^
4809 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0125 h^
4810 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0126 H-
4811 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0127 h-
4812 CH_PROP_UPPER | CH_PROP_VOWEL, // 0128 I~
4813 CH_PROP_LOWER | CH_PROP_VOWEL, // 0129 i~
4814 CH_PROP_UPPER | CH_PROP_VOWEL, // 012A I_
4815 CH_PROP_LOWER | CH_PROP_VOWEL, // 012B i_
4816 CH_PROP_UPPER | CH_PROP_VOWEL, // 012C Iu
4817 CH_PROP_LOWER | CH_PROP_VOWEL, // 012D iu
4818 CH_PROP_UPPER | CH_PROP_VOWEL, // 012E I,
4819 CH_PROP_LOWER | CH_PROP_VOWEL, // 012F i,
4820
4821 CH_PROP_UPPER | CH_PROP_VOWEL, // 0130 I.
4822 CH_PROP_LOWER | CH_PROP_VOWEL, // 0131 i-.
4823 CH_PROP_UPPER | CH_PROP_VOWEL, // 0132 IJ
4824 CH_PROP_LOWER | CH_PROP_VOWEL, // 0133 ij
4825 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0134 J^
4826 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0135 j^
4827 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0136 K,
4828 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0137 k,
4829 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0138 k (kra)
4830 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0139 L'
4831 CH_PROP_LOWER | CH_PROP_CONSONANT, // 013A l'
4832 CH_PROP_UPPER | CH_PROP_CONSONANT, // 013B L,
4833 CH_PROP_LOWER | CH_PROP_CONSONANT, // 013C l,
4834 CH_PROP_UPPER | CH_PROP_CONSONANT, // 013D L'
4835 CH_PROP_LOWER | CH_PROP_CONSONANT, // 013E l'
4836 CH_PROP_UPPER | CH_PROP_CONSONANT, // 013F L.
4837
4838 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0140 l.
4839 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0141 L/
4840 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0142 l/
4841 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0143 N'
4842 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0144 n'
4843 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0145 N,
4844 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0146 n,
4845 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0147 Nv
4846 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0148 nv
4847 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0149 `n
4848 CH_PROP_UPPER | CH_PROP_CONSONANT, // 014A Ng
4849 CH_PROP_LOWER | CH_PROP_CONSONANT, // 014B ng
4850 CH_PROP_UPPER | CH_PROP_VOWEL, // 014C O_
4851 CH_PROP_LOWER | CH_PROP_VOWEL, // 014D o-.
4852 CH_PROP_UPPER | CH_PROP_VOWEL, // 014E Ou
4853 CH_PROP_LOWER | CH_PROP_VOWEL, // 014F ou
4854
4855 CH_PROP_UPPER | CH_PROP_VOWEL, // 0150 O"
4856 CH_PROP_LOWER | CH_PROP_VOWEL, // 0151 o"
4857 CH_PROP_UPPER | CH_PROP_VOWEL, // 0152 Oe
4858 CH_PROP_LOWER | CH_PROP_VOWEL, // 0153 oe
4859 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0154 R'
4860 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0155 r'
4861 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0156 R,
4862 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0157 r,
4863 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0158 Rv
4864 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0159 rv
4865 CH_PROP_UPPER | CH_PROP_CONSONANT, // 015A S'
4866 CH_PROP_LOWER | CH_PROP_CONSONANT, // 015B s'
4867 CH_PROP_UPPER | CH_PROP_CONSONANT, // 015C S^
4868 CH_PROP_LOWER | CH_PROP_CONSONANT, // 015D s^
4869 CH_PROP_UPPER | CH_PROP_CONSONANT, // 015E S,
4870 CH_PROP_LOWER | CH_PROP_CONSONANT, // 015F s,
4871
4872 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0160 Sv
4873 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0161 sv
4874 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0162 T,
4875 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0163 T,
4876 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0164 Tv
4877 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0165 Tv
4878 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0166 T-
4879 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0167 T-
4880 CH_PROP_UPPER | CH_PROP_VOWEL, // 0168 U~
4881 CH_PROP_LOWER | CH_PROP_VOWEL, // 0169 u~
4882 CH_PROP_UPPER | CH_PROP_VOWEL, // 016A U_
4883 CH_PROP_LOWER | CH_PROP_VOWEL, // 016B u_
4884 CH_PROP_UPPER | CH_PROP_VOWEL, // 016C Uu
4885 CH_PROP_LOWER | CH_PROP_VOWEL, // 016D uu
4886 CH_PROP_UPPER | CH_PROP_VOWEL, // 016E Uo
4887 CH_PROP_LOWER | CH_PROP_VOWEL, // 016F uo
4888
4889 CH_PROP_UPPER | CH_PROP_VOWEL, // 0170 U"
4890 CH_PROP_LOWER | CH_PROP_VOWEL, // 0171 u"
4891 CH_PROP_UPPER | CH_PROP_VOWEL, // 0172 U,
4892 CH_PROP_LOWER | CH_PROP_VOWEL, // 0173 u,
4893 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0174 W^
4894 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0175 w^
4895 CH_PROP_UPPER | CH_PROP_VOWEL, // 0176 Y,
4896 CH_PROP_LOWER | CH_PROP_VOWEL, // 0177 y,
4897 CH_PROP_UPPER | CH_PROP_VOWEL, // 0178 Y:
4898 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0179 Z'
4899 CH_PROP_LOWER | CH_PROP_CONSONANT, // 017A z'
4900 CH_PROP_UPPER | CH_PROP_CONSONANT, // 017B Z.
4901 CH_PROP_LOWER | CH_PROP_CONSONANT, // 017C z.
4902 CH_PROP_UPPER | CH_PROP_CONSONANT, // 017D Zv
4903 CH_PROP_LOWER | CH_PROP_CONSONANT, // 017E zv
4904 CH_PROP_LOWER | CH_PROP_CONSONANT, // 017F s long
4905 // 0x0180:
4906 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4907 // 0x0190:
4908 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4909 // 0x01A0:
4910 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4911 // 0x01B0:
4912 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4913 // 0x01C0:
4914 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4915 // 0x01D0:
4916 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4917 // 0x01E0:
4918 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4919 // 0x01F0:
4920 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4921 // 0x0200:
4922 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4923 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4924 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4925 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4926 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4927 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4928 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4929 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4930 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4931 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4932 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4933 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4934 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4935 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4936 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4937 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4938 // 0x0300:
4939 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4940 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4941 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4942 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4943 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4944 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4945 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4946 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
4947 // 0x0380:
4948 0,0,0,0,
4949 CH_PROP_VOWEL, // GREEK TONOS 0384
4950 CH_PROP_VOWEL, // GREEK DIALYTIKA TONOS 0385
4951 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH TONOS 0386
4952 CH_PROP_UPPER | CH_PROP_PUNCT, // GREEK ANO TELEIA 0387
4953 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH TONOS 0388
4954 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH TONOS 0389
4955 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH TONOS 038A
4956 0,//038b
4957 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH TONOS 038C
4958 0,//038d
4959 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH TONOS 038E
4960 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH TONOS 038F
4961 // 0x0390:
4962 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS 0390
4963 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA Α 0391 Α
4964 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER BETA 0392 Β
4965 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER GAMMA 0393 Γ
4966 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER DELTA 0394 Δ
4967 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON 0395 Ε
4968 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER ZETA 0396 Ζ
4969 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA 0397 Η
4970 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER THETA 0398 Θ
4971 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA 0399 Ι
4972 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER KAPPA 039A Κ
4973 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER LAM(B)DA 039B Λ
4974 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER MU 039C Μ
4975 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER NU 039D Ν
4976 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER XI 039E Ξ
4977 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON 039F Ο
4978 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER PI 03A0 Π
4979 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER RHO 03A1 Ρ
4980 0, // 03a2
4981 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER SIGMA 03A3 Σ
4982 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER TAU 03A4 Τ
4983 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON 03A5 Υ
4984 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER PHI 03A6 Φ
4985 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER CHI 03A7 Χ
4986 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER PSI 03A8 Ψ
4987 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA 03A9 Ω
4988 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DIALYTIKA 03AA
4989 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA 03AB
4990 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH TONOS 03AC
4991 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH TONOS 03AD
4992 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH TONOS 03AE
4993 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH TONOS 03AF
4994
4995 // 03B0
4996 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS 03B0
4997 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA 03B1 α
4998 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER BETA 03B2 β
4999 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER GAMMA 03B3 γ
5000 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER DELTA 03B4 δ
5001 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON 03B5 ε
5002 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER ZETA 03B6 ζ
5003 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA 03B7 η
5004 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER THETA 03B8 θ
5005 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA 03B9 ι
5006 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER KAPPA 03BA κ
5007 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER LAM(B)DA 03BB λ
5008 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER MU 03BC μ
5009 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER NU 03BD ν
5010 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER XI 03BE ξ
5011 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON 03BF ο
5012
5013 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER PI 03C0 π
5014 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER RHO 03C1 ρ
5015 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER FINAL SIGMA 03C2
5016 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER SIGMA 03C3 σ
5017 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER TAU 03C4 τ
5018 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON 03C5 υ
5019 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER PHI 03C6 φ
5020 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER CHI 03C7 χ
5021 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER PSI 03C8 ψ
5022 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA 03C9 ω
5023 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA 03CA
5024 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA 03CB
5025 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH TONOS 03CC
5026 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH TONOS 03CD
5027 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH TONOS 03CE
5028 0, //03cf
5029 // 03d0
5030 CH_PROP_CONSONANT, // GREEK BETA SYMBOL (cursive) 03D0
5031 CH_PROP_CONSONANT, // GREEK THETA SYMBOL (cursive) 03D1
5032 CH_PROP_VOWEL, // GREEK UPSILON WITH HOOK SYMBOL 03D2
5033 CH_PROP_VOWEL, // GREEK UPSILON WITH ACUTE AND HOOK SYMBOL 03D3
5034 CH_PROP_VOWEL, // GREEK UPSILON WITH DIAERESIS AND HOOK SYMBOL 03D4
5035 CH_PROP_CONSONANT, // GREEK PHI SYMBOL (cursive) 03D5
5036 CH_PROP_CONSONANT, // GREEK PI SYMBOL 03D6
5037 CH_PROP_CONSONANT, // GREEK KAI SYMBOL 03D7
5038 0, // 03d8
5039 0, // 03d9
5040 CH_PROP_CONSONANT, // GREEK LETTER STIGMA 03DA
5041 CH_PROP_CONSONANT, // GREEK SMALL LETTER STIGMA 03DB
5042 CH_PROP_CONSONANT, // GREEK LETTER DIGAMMA (F) 03DC
5043 CH_PROP_CONSONANT, // GREEK SMALL LETTER DIGAMMA (f) 03DD
5044 CH_PROP_CONSONANT, // GREEK LETTER KOPPA 03DE
5045 CH_PROP_CONSONANT, // GREEK SMALL LETTER KOPPA 03DF
5046 // 03e0
5047 CH_PROP_CONSONANT, // GREEK LETTER SAMPI 03E0
5048 CH_PROP_CONSONANT, // GREEK SMALL LETTER SAMPI 03E1
5049 // 03e2
5050 0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5051 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5052 // 0x0400:
5053 0, // 0400
5054 CH_PROP_UPPER | CH_PROP_VOWEL, // 0401 cyrillic E:
5055 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0402 cyrillic Dje
5056 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0403 cyrillic Gje
5057 CH_PROP_UPPER | CH_PROP_VOWEL, // 0404 cyrillic ukr Ie
5058 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0405 cyrillic Dze
5059 CH_PROP_UPPER | CH_PROP_VOWEL, // 0406 cyrillic ukr I
5060 CH_PROP_UPPER | CH_PROP_VOWEL, // 0407 cyrillic ukr I:
5061 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0408 cyrillic J
5062 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0409 cyrillic L'
5063 CH_PROP_UPPER | CH_PROP_CONSONANT, // 040A cyrillic N'
5064 CH_PROP_UPPER | CH_PROP_CONSONANT, // 040B cyrillic Th
5065 CH_PROP_UPPER | CH_PROP_CONSONANT, // 040C cyrillic K'
5066 0, // 040D cyrillic
5067 CH_PROP_UPPER | CH_PROP_VOWEL, // 040E cyrillic Yu
5068 CH_PROP_UPPER | CH_PROP_CONSONANT, // 040F cyrillic Dzhe
5069 // 0x0410:
5070 CH_PROP_UPPER | CH_PROP_VOWEL, // 0410 cyrillic A
5071 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0411 cyrillic B
5072 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0412 cyrillic V
5073 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0413 cyrillic G
5074 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0414 cyrillic D
5075 CH_PROP_UPPER | CH_PROP_VOWEL, // 0415 cyrillic E
5076 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0416 cyrillic Zh
5077 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0417 cyrillic Z
5078 CH_PROP_UPPER | CH_PROP_VOWEL, // 0418 cyrillic I
5079 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0419 cyrillic YI
5080 CH_PROP_UPPER | CH_PROP_CONSONANT, // 041A cyrillic K
5081 CH_PROP_UPPER | CH_PROP_CONSONANT, // 041B cyrillic L
5082 CH_PROP_UPPER | CH_PROP_CONSONANT, // 041C cyrillic M
5083 CH_PROP_UPPER | CH_PROP_CONSONANT, // 041D cyrillic N
5084 CH_PROP_UPPER | CH_PROP_VOWEL, // 041E cyrillic O
5085 CH_PROP_UPPER | CH_PROP_CONSONANT, // 041F cyrillic P
5086 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0420 cyrillic R
5087 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0421 cyrillic S
5088 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0422 cyrillic T
5089 CH_PROP_UPPER | CH_PROP_VOWEL, // 0423 cyrillic U
5090 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0424 cyrillic F
5091 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0425 cyrillic H
5092 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0426 cyrillic C
5093 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0427 cyrillic Ch
5094 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0428 cyrillic Sh
5095 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0429 cyrillic Sch
5096 CH_PROP_UPPER | CH_PROP_ALPHA_SIGN, // 042A cyrillic Hard sign
5097 CH_PROP_UPPER | CH_PROP_VOWEL, // 042B cyrillic Y
5098 CH_PROP_UPPER | CH_PROP_ALPHA_SIGN, // 042C cyrillic Soft sign
5099 CH_PROP_UPPER | CH_PROP_VOWEL, // 042D cyrillic EE
5100 CH_PROP_UPPER | CH_PROP_VOWEL, // 042E cyrillic Yu
5101 CH_PROP_UPPER | CH_PROP_VOWEL, // 042F cyrillic Ya
5102 // 0x0430:
5103 CH_PROP_LOWER | CH_PROP_VOWEL, // 0430 cyrillic A
5104 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0431 cyrillic B
5105 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0432 cyrillic V
5106 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0433 cyrillic G
5107 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0434 cyrillic D
5108 CH_PROP_LOWER | CH_PROP_VOWEL, // 0435 cyrillic E
5109 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0436 cyrillic Zh
5110 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0437 cyrillic Z
5111 CH_PROP_LOWER | CH_PROP_VOWEL, // 0438 cyrillic I
5112 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0439 cyrillic YI
5113 CH_PROP_LOWER | CH_PROP_CONSONANT, // 043A cyrillic K
5114 CH_PROP_LOWER | CH_PROP_CONSONANT, // 043B cyrillic L
5115 CH_PROP_LOWER | CH_PROP_CONSONANT, // 043C cyrillic M
5116 CH_PROP_LOWER | CH_PROP_CONSONANT, // 043D cyrillic N
5117 CH_PROP_LOWER | CH_PROP_VOWEL, // 043E cyrillic O
5118 CH_PROP_LOWER | CH_PROP_CONSONANT, // 043F cyrillic P
5119 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0440 cyrillic R
5120 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0441 cyrillic S
5121 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0442 cyrillic T
5122 CH_PROP_LOWER | CH_PROP_VOWEL, // 0443 cyrillic U
5123 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0444 cyrillic F
5124 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0445 cyrillic H
5125 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0446 cyrillic C
5126 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0447 cyrillic Ch
5127 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0448 cyrillic Sh
5128 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0449 cyrillic Sch
5129 CH_PROP_LOWER | CH_PROP_ALPHA_SIGN, // 044A cyrillic Hard sign
5130 CH_PROP_LOWER | CH_PROP_VOWEL, // 044B cyrillic Y
5131 CH_PROP_LOWER | CH_PROP_ALPHA_SIGN, // 044C cyrillic Soft sign
5132 CH_PROP_LOWER | CH_PROP_VOWEL, // 044D cyrillic EE
5133 CH_PROP_LOWER | CH_PROP_VOWEL, // 044E cyrillic Yu
5134 CH_PROP_LOWER | CH_PROP_VOWEL, // 044F cyrillic Ya
5135 0, // 0450 cyrillic
5136 CH_PROP_LOWER | CH_PROP_VOWEL, // 0451 cyrillic e:
5137 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0452 cyrillic Dje
5138 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0453 cyrillic Gje
5139 CH_PROP_LOWER | CH_PROP_VOWEL, // 0454 cyrillic ukr Ie
5140 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0455 cyrillic Dze
5141 CH_PROP_LOWER | CH_PROP_VOWEL, // 0456 cyrillic ukr I
5142 CH_PROP_LOWER | CH_PROP_VOWEL, // 0457 cyrillic ukr I:
5143 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0458 cyrillic J
5144 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0459 cyrillic L'
5145 CH_PROP_LOWER | CH_PROP_CONSONANT, // 045A cyrillic N'
5146 CH_PROP_LOWER | CH_PROP_CONSONANT, // 045B cyrillic Th
5147 CH_PROP_LOWER | CH_PROP_CONSONANT, // 045C cyrillic K'
5148 0, // 045D cyrillic
5149 CH_PROP_LOWER | CH_PROP_VOWEL, // 045E cyrillic Yu
5150 CH_PROP_LOWER | CH_PROP_CONSONANT, // 045F cyrillic Dzhe
5151 // 0x0460:
5152 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5153 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
5154 // 0x0490:
5155 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0490 cyrillic G'
5156 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0491 cyrillic g'
5157 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0492 cyrillic G-
5158 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0493 cyrillic g-
5159 0, // 0494 cyrillic
5160 0, // 0495 cyrillic
5161 CH_PROP_UPPER | CH_PROP_CONSONANT, // 0496 cyrillic Zh,
5162 CH_PROP_LOWER | CH_PROP_CONSONANT, // 0497 cyrillic zh,
5163 0, // 0498 cyrillic
5164 0, // 0499 cyrillic
5165 CH_PROP_UPPER | CH_PROP_CONSONANT, // 049A cyrillic K,
5166 CH_PROP_LOWER | CH_PROP_CONSONANT, // 049B cyrillic k,
5167 CH_PROP_UPPER | CH_PROP_CONSONANT, // 049C cyrillic K|
5168 CH_PROP_LOWER | CH_PROP_CONSONANT, // 049D cyrillic k|
5169 0, // 049E cyrillic
5170 0, // 049F cyrillic
5171 0, // 04A0 cyrillic
5172 0, // 04A1 cyrillic
5173 CH_PROP_UPPER | CH_PROP_CONSONANT, // 04A2 cyrillic H,
5174 CH_PROP_LOWER | CH_PROP_CONSONANT, // 04A3 cyrillic n,
5175 0, // 04A4 cyrillic
5176 0, // 04A5 cyrillic
5177 0, // 04A6 cyrillic
5178 0, // 04A7 cyrillic
5179 0, // 04A8 cyrillic
5180 0, // 04A9 cyrillic
5181 0, // 04AA cyrillic
5182 0, // 04AB cyrillic
5183 0, // 04AC cyrillic
5184 0, // 04AD cyrillic
5185 CH_PROP_UPPER | CH_PROP_VOWEL, // 04AE cyrillic Y
5186 CH_PROP_LOWER | CH_PROP_VOWEL, // 04AF cyrillic y
5187 CH_PROP_UPPER | CH_PROP_VOWEL, // 04B0 cyrillic Y-
5188 CH_PROP_LOWER | CH_PROP_VOWEL, // 04B1 cyrillic y-
5189 CH_PROP_UPPER | CH_PROP_CONSONANT, // 04B2 cyrillic X,
5190 CH_PROP_LOWER | CH_PROP_CONSONANT, // 04B3 cyrillic x,
5191 };
5192
5193
5194 static lUInt16 char_props_1f00[] = {
5195 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI 1F00
5196 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA 1F01
5197 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA 1F02
5198 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA 1F03
5199 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA 1F04
5200 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA 1F05
5201 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F06
5202 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F07
5203 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI 1F08
5204 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA 1F09
5205 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA 1F0A
5206 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA 1F0B
5207 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA 1F0C
5208 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA 1F0D
5209 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 1F0E
5210 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 1F0F
5211 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH PSILI 1F10
5212 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH DASIA 1F11
5213 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH PSILI AND VARIA 1F12
5214 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH DASIA AND VARIA 1F13
5215 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH PSILI AND OXIA 1F14
5216 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA 1F15
5217 0, 0,
5218 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH PSILI 1F18
5219 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH DASIA 1F19
5220 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA 1F1A
5221 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA 1F1B
5222 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA 1F1C
5223 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA 1F1D
5224 0, 0,
5225 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI 1F20
5226 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA 1F21
5227 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND VARIA 1F22
5228 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND VARIA 1F23
5229 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND OXIA 1F24
5230 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND OXIA 1F25
5231 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI 1F26
5232 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI 1F27
5233 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI 1F28
5234 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA 1F29
5235 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA 1F2A
5236 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA 1F2B
5237 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA 1F2C
5238 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA 1F2D
5239 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI 1F2E
5240 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI 1F2F
5241 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI 1F30
5242 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA 1F31
5243 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI AND VARIA 1F32
5244 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA AND VARIA 1F33
5245 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI AND OXIA 1F34
5246 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA AND OXIA 1F35
5247 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PSILI AND PERISPOMENI 1F36
5248 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI 1F37
5249 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI 1F38
5250 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA 1F39
5251 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA 1F3A
5252 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA 1F3B
5253 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA 1F3C
5254 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA 1F3D
5255 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI 1F3E
5256 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI 1F3F
5257 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH PSILI 1F40
5258 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH DASIA 1F41
5259 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH PSILI AND VARIA 1F42
5260 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH DASIA AND VARIA 1F43
5261 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH PSILI AND OXIA 1F44
5262 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA 1F45
5263 0, 0,
5264 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH PSILI 1F48
5265 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH DASIA 1F49
5266 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA 1F4A
5267 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA 1F4B
5268 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA 1F4C
5269 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA 1F4D
5270 0, 0,
5271 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI 1F50
5272 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA 1F51
5273 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA 1F52
5274 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA AND VARIA 1F53
5275 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA 1F54
5276 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA AND OXIA 1F55
5277 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI 1F56
5278 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F57
5279 0,
5280 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA 1F59
5281 0,
5282 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA 1F5B
5283 0,
5284 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA 1F5D
5285 0,
5286 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI 1F5F
5287 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI 1F60
5288 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA 1F61
5289 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA 1F62
5290 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA 1F63
5291 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA 1F64
5292 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA 1F65
5293 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F66
5294 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F67
5295 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI 1F68
5296 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI 1F69
5297 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI 1F6A
5298 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA 1F6B
5299 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA 1F6C
5300 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA 1F6D
5301 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 1F6E
5302 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 1F6F
5303 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH VARIA 1F70
5304 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH OXIA 1F71
5305 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH VARIA 1F72
5306 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER EPSILON WITH OXIA 1F73
5307 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH VARIA 1F74
5308 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH OXIA 1F75
5309 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH VARIA 1F76
5310 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH OXIA 1F77
5311 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH VARIA 1F78
5312 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMICRON WITH OXIA 1F79
5313 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH VARIA 1F7A
5314 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH OXIA 1F7B
5315 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH VARIA 1F7C
5316 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH OXIA 1F7D
5317 0, 0,
5318 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI 1F80
5319 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI 1F81
5320 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F82
5321 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F83
5322 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F84
5323 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F85
5324 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F86
5325 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F87
5326 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI 1F88
5327 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI 1F89
5328 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F8A
5329 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F8B
5330 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F8C
5331 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F8D
5332 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F8E
5333 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F8F
5334 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI 1F90
5335 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI 1F91
5336 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1F92
5337 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1F93
5338 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1F94
5339 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1F95
5340 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1F96
5341 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1F97
5342 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI 1F98
5343 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI 1F99
5344 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1F9A
5345 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1F9B
5346 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1F9C
5347 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1F9D
5348 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1F9E
5349 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1F9F
5350 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI 1FA0
5351 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI 1FA1
5352 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI 1FA2
5353 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI 1FA3
5354 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI 1FA4
5355 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI 1FA5
5356 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI 1FA6
5357 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI 1FA7
5358 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI 1FA8
5359 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI 1FA9
5360 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI 1FAA
5361 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI 1FAB
5362 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI 1FAC
5363 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI 1FAD
5364 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI 1FAE
5365 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI 1FAF
5366 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH VRACHY 1FB0
5367 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH MACRON 1FB1
5368 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI 1FB2
5369 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI 1FB3
5370 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI 1FB4
5371 0,
5372 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI 1FB6
5373 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI 1FB7
5374 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH VRACHY 1FB8
5375 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH MACRON 1FB9
5376 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH VARIA 1FBA
5377 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH OXIA 1FBB
5378 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI 1FBC
5379 0, 0, 0,
5380 0, 0,
5381 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI 1FC2
5382 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI 1FC3
5383 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI 1FC4
5384 0,
5385 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PERISPOMENI 1FC6
5386 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI 1FC7
5387 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH VARIA 1FC8
5388 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER EPSILON WITH OXIA 1FC9
5389 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH VARIA 1FCA
5390 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH OXIA 1FCB
5391 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI 1FCC
5392 0, 0, 0,
5393 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH VRACHY 1FD0
5394 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH MACRON 1FD1
5395 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA 1FD2
5396 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA 1FD3
5397 0, 0,
5398 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH PERISPOMENI 1FD6
5399 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI 1FD7
5400 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH VRACHY 1FD8
5401 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH MACRON 1FD9
5402 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH VARIA 1FDA
5403 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER IOTA WITH OXIA 1FDB
5404 0, 0, 0, 0,
5405 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH VRACHY 1FE0
5406 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH MACRON 1FE1
5407 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA 1FE2
5408 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA 1FE3
5409 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER RHO WITH PSILI 1FE4
5410 CH_PROP_LOWER | CH_PROP_CONSONANT, // GREEK SMALL LETTER RHO WITH DASIA 1FE5
5411 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH PERISPOMENI 1FE6
5412 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI 1FE7
5413 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH VRACHY 1FE8
5414 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH MACRON 1FE9
5415 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH VARIA 1FEA
5416 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER UPSILON WITH OXIA 1FEB
5417 CH_PROP_UPPER | CH_PROP_CONSONANT, // GREEK CAPITAL LETTER RHO WITH DASIA 1FEC
5418 0, 0, 0,
5419 0, 0,
5420 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI 1FF2
5421 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI 1FF3
5422 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI 1FF4
5423 0,
5424 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI 1FF6
5425 CH_PROP_LOWER | CH_PROP_VOWEL, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI 1FF7
5426 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH VARIA 1FF8
5427 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMICRON WITH OXIA 1FF9
5428 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH VARIA 1FFA
5429 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH OXIA 1FFB
5430 CH_PROP_UPPER | CH_PROP_VOWEL, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI 1FFC
5431 0, 0, 0
5432 };
5433
getCharProp(lChar32 ch)5434 inline lUInt16 getCharProp(lChar32 ch) {
5435 static const lChar32 maxchar = sizeof(char_props) / sizeof( lUInt16 );
5436 if (ch<maxchar)
5437 return char_props[ch];
5438 else if ((ch>>8) == 0x1F)
5439 return char_props_1f00[ch & 255];
5440 else if (ch>=0x2012 && ch<=0x2015)
5441 return CH_PROP_DASH|CH_PROP_SIGN;
5442 else if (ch==0x201C) // left double quotation mark
5443 return CH_PROP_AVOID_WRAP_AFTER;
5444 else if (ch==0x201D) // right double quotation mark
5445 return CH_PROP_AVOID_WRAP_BEFORE;
5446 else if (ch>=UNICODE_CJK_IDEOGRAPHS_BEGIN && ch<=UNICODE_CJK_IDEOGRAPHS_END&&(ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN||
5447 ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END))
5448 return CH_PROP_CJK;
5449 else if ((ch>=UNICODE_CJK_PUNCTUATION_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_END) ||
5450 (ch>=UNICODE_GENERAL_PUNCTUATION_BEGIN && ch<=UNICODE_GENERAL_PUNCTUATION_END) ||
5451 (ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END))
5452 return CH_PROP_PUNCT;
5453 return 0;
5454 }
5455
lStr_getCharProps(const lChar32 * str,int sz,lUInt16 * props)5456 void lStr_getCharProps( const lChar32 * str, int sz, lUInt16 * props )
5457 {
5458 for ( int i=0; i<sz; i++ ) {
5459 lChar32 ch = str[i];
5460 props[i] = getCharProp(ch);
5461 }
5462 }
5463
lStr_isWordSeparator(lChar32 ch)5464 bool lStr_isWordSeparator( lChar32 ch )
5465 {
5466 // ASCII letters and digits are NOT word separators
5467 if (ch >= 0x61 && ch <= 0x7A) return false; // lowercase ascii letters
5468 if (ch >= 0x41 && ch <= 0x5A) return false; // uppercase ascii letters
5469 if (ch >= 0x30 && ch <= 0x39) return false; // digits
5470 if (ch == 0xAD ) return false; // soft-hyphen, considered now as part of word
5471 // All other below 0xC0 are word separators:
5472 // < 0x30 space, !"#$%&'()*+,-./
5473 // < 0x41 :;<=>?@
5474 // < 0x61 [\]^_`
5475 // < 0xC0 {|}~ and control characters and other signs
5476 if (ch < 0xC0 ) return true;
5477 // 0xC0 to 0xFF, except 0xD7 and 0xF7, are latin accentuated letters.
5478 // Above 0xFF are other alphabets. Let's consider all above 0xC0 unicode
5479 // characters as letters, except the adequately named PUNCTUATION ranges.
5480 // There may be exceptions in some alphabets, that we can individually
5481 // add here :
5482 if (ch == 0xD7 ) return true; // multiplication sign
5483 if (ch == 0xF7 ) return true; // division sign
5484 // this one includes em-dash & friends, and other quotation marks
5485 if (ch>=UNICODE_GENERAL_PUNCTUATION_BEGIN && ch<=UNICODE_GENERAL_PUNCTUATION_END) return true;
5486 // CJK puncutation
5487 if (ch>=UNICODE_CJK_PUNCTUATION_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_END) return true;
5488 if (ch>=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_BEGIN && ch<=UNICODE_CJK_PUNCTUATION_HALF_AND_FULL_WIDTH_END) return true;
5489 // Some others(from https://www.cs.tut.fi/~jkorpela/chars/spaces.html)
5490 if (ch == 0x1680 ) return true; // OGHAM SPACE MARK
5491 if (ch == 0x180E ) return true; // MONGOLIAN VOWEL SEPARATOR
5492 if (ch == 0xFEFF ) return true; // ZERO WIDTH NO-BREAK SPACE
5493 // All others are considered part of a word, thus not word separators
5494 return false;
5495 }
5496
5497 /// find alpha sequence bounds
lStr_findWordBounds(const lChar32 * str,int sz,int pos,int & start,int & end)5498 void lStr_findWordBounds( const lChar32 * str, int sz, int pos, int & start, int & end )
5499 {
5500 int hwStart, hwEnd;
5501
5502 // 20180615: don't split anymore on UNICODE_SOFT_HYPHEN_CODE, consider
5503 // it like an alpha char of zero width not drawn.
5504 // Only hyphenation code will care about it
5505 // We don't use lStr_isWordSeparator() here, but we exclusively look
5506 // for ALPHA chars or soft-hyphens, as this function is and should
5507 // only be used before calling hyphenate() to find a real word to
5508 // give to the hyphenation algorithms.
5509
5510 // // skip spaces
5511 // for (hwStart=pos-1; hwStart>0; hwStart--)
5512 // {
5513 // lChar32 ch = str[hwStart];
5514 // if ( ch<(int)maxchar ) {
5515 // lUInt16 props = char_props[ch];
5516 // if ( !(props & CH_PROP_SPACE) )
5517 // break;
5518 // }
5519 // }
5520 // // skip punctuation signs and digits
5521 // for (; hwStart>0; hwStart--)
5522 // {
5523 // lChar32 ch = str[hwStart];
5524 // if ( ch<(int)maxchar ) {
5525 // lUInt16 props = char_props[ch];
5526 // if ( !(props & (CH_PROP_PUNCT|CH_PROP_DIGIT)) )
5527 // break;
5528 // }
5529 // }
5530 // skip until first alpha
5531 for (hwStart = pos-1; hwStart > 0; hwStart--)
5532 {
5533 lChar32 ch = str[hwStart];
5534 lUInt16 props = getCharProp(ch);
5535 if ( props & CH_PROP_ALPHA || props & CH_PROP_HYPHEN )
5536 break;
5537 }
5538 if ( hwStart<0 ) {
5539 // no alphas found
5540 start = end = pos;
5541 return;
5542 }
5543 hwEnd = hwStart+1;
5544 // skipping while alpha
5545 for (; hwStart>0; hwStart--)
5546 {
5547 lChar32 ch = str[hwStart];
5548 //int lastAlpha = -1;
5549 if ( getCharProp(ch) & CH_PROP_ALPHA || getCharProp(ch) & CH_PROP_HYPHEN ) {
5550 //lastAlpha = hwStart;
5551 } else {
5552 hwStart++;
5553 break;
5554 }
5555 }
5556 // if ( lastAlpha<0 ) {
5557 // // no alphas found
5558 // start = end = pos;
5559 // return;
5560 // }
5561 for (hwEnd=hwStart+1; hwEnd<sz; hwEnd++) // 20080404
5562 {
5563 lChar32 ch = str[hwEnd];
5564 if (!(getCharProp(ch) & CH_PROP_ALPHA) && !(getCharProp(ch) & CH_PROP_HYPHEN))
5565 break;
5566 ch = str[hwEnd-1];
5567 if ( ch==' ' ) // || ch==UNICODE_SOFT_HYPHEN_CODE) )
5568 break;
5569 }
5570 start = hwStart;
5571 end = hwEnd;
5572 //CRLog::debug("Word bounds: '%s'", LCSTR(lString32(str+start, end-start)));
5573 }
5574
limit(size_type sz)5575 void lString16::limit( size_type sz )
5576 {
5577 if ( length() > sz ) {
5578 modify();
5579 pchunk->len = sz;
5580 pchunk->buf16[sz] = 0;
5581 }
5582 }
5583
limit(size_type sz)5584 void lString32::limit( size_type sz )
5585 {
5586 if ( length() > sz ) {
5587 modify();
5588 pchunk->len = sz;
5589 pchunk->buf32[sz] = 0;
5590 }
5591 }
5592
lGetCharProps(lChar32 ch)5593 lUInt16 lGetCharProps( lChar32 ch )
5594 {
5595 return getCharProp(ch);
5596 }
5597
5598
5599 /// returns true if string starts with specified substring, case insensitive
startsWithNoCase(const lString32 & substring) const5600 bool lString32::startsWithNoCase ( const lString32 & substring ) const
5601 {
5602 lString32 a = *this;
5603 lString32 b = substring;
5604 a.uppercase();
5605 b.uppercase();
5606 return a.startsWith( b );
5607 }
5608
5609 /// returns true if string starts with specified substring
startsWith(const char * substring) const5610 bool lString8::startsWith( const char * substring ) const
5611 {
5612 if (!substring || !substring[0])
5613 return true;
5614 int len = (int)strlen(substring);
5615 if (length() < len)
5616 return false;
5617 const lChar8 * s1 = c_str();
5618 const lChar8 * s2 = substring;
5619 for (int i=0; i<len; i++ )
5620 if ( s1[i] != s2[i] )
5621 return false;
5622 return true;
5623 }
5624
5625 /// returns true if string starts with specified substring
startsWith(const lString8 & substring) const5626 bool lString8::startsWith( const lString8 & substring ) const
5627 {
5628 if ( substring.empty() )
5629 return true;
5630 int len = substring.length();
5631 if (length() < len)
5632 return false;
5633 const lChar8 * s1 = c_str();
5634 const lChar8 * s2 = substring.c_str();
5635 for (int i=0; i<len; i++ )
5636 if ( s1[i] != s2[i] )
5637 return false;
5638 return true;
5639 }
5640
5641 /// returns true if string ends with specified substring
endsWith(const lChar8 * substring) const5642 bool lString8::endsWith( const lChar8 * substring ) const
5643 {
5644 if ( !substring || !*substring )
5645 return true;
5646 int len = (int)strlen(substring);
5647 if ( length() < len )
5648 return false;
5649 const lChar8 * s1 = c_str() + (length()-len);
5650 const lChar8 * s2 = substring;
5651 return lStr_cmp( s1, s2 )==0;
5652 }
5653
5654 /// returns true if string ends with specified substring
endsWith(const lChar16 * substring) const5655 bool lString16::endsWith( const lChar16 * substring ) const
5656 {
5657 if ( !substring || !*substring )
5658 return true;
5659 int len = lStr_len(substring);
5660 if ( length() < len )
5661 return false;
5662 const lChar16 * s1 = c_str() + (length()-len);
5663 const lChar16 * s2 = substring;
5664 return lStr_cmp( s1, s2 )==0;
5665 }
5666
5667 /// returns true if string ends with specified substring
endsWith(const lChar8 * substring) const5668 bool lString16::endsWith( const lChar8 * substring ) const
5669 {
5670 if ( !substring || !*substring )
5671 return true;
5672 int len = lStr_len(substring);
5673 if ( length() < len )
5674 return false;
5675 const lChar16 * s1 = c_str() + (length()-len);
5676 const lChar8 * s2 = substring;
5677 return lStr_cmp( s1, s2 )==0;
5678 }
5679
5680 /// returns true if string ends with specified substring
endsWith(const lString16 & substring) const5681 bool lString16::endsWith ( const lString16 & substring ) const
5682 {
5683 if ( substring.empty() )
5684 return true;
5685 int len = substring.length();
5686 if ( length() < len )
5687 return false;
5688 const lChar16 * s1 = c_str() + (length()-len);
5689 const lChar16 * s2 = substring.c_str();
5690 return lStr_cmp( s1, s2 )==0;
5691 }
5692
5693 /// returns true if string starts with specified substring
startsWith(const lString16 & substring) const5694 bool lString16::startsWith( const lString16 & substring ) const
5695 {
5696 if ( substring.empty() )
5697 return true;
5698 int len = substring.length();
5699 if ( length() < len )
5700 return false;
5701 const lChar16 * s1 = c_str();
5702 const lChar16 * s2 = substring.c_str();
5703 for ( int i=0; i<len; i++ )
5704 if ( s1[i]!=s2[i] )
5705 return false;
5706 return true;
5707 }
5708
5709 /// returns true if string starts with specified substring
startsWith(const lChar16 * substring) const5710 bool lString16::startsWith(const lChar16 * substring) const
5711 {
5712 if (!substring || !substring[0])
5713 return true;
5714 int len = _lStr_len(substring);
5715 if ( length() < len )
5716 return false;
5717 const lChar16 * s1 = c_str();
5718 const lChar16 * s2 = substring;
5719 for ( int i=0; i<len; i++ )
5720 if ( s1[i] != s2[i] )
5721 return false;
5722 return true;
5723 }
5724
5725 /// returns true if string starts with specified substring
startsWith(const lChar8 * substring) const5726 bool lString16::startsWith(const lChar8 * substring) const
5727 {
5728 if (!substring || !substring[0])
5729 return true;
5730 int len = _lStr_len(substring);
5731 if ( length() < len )
5732 return false;
5733 const lChar16 * s1 = c_str();
5734 const lChar8 * s2 = substring;
5735 for ( int i=0; i<len; i++ )
5736 if (s1[i] != s2[i])
5737 return false;
5738 return true;
5739 }
5740
5741 /// returns true if string ends with specified substring
endsWith(const lChar32 * substring) const5742 bool lString32::endsWith( const lChar32 * substring ) const
5743 {
5744 if ( !substring || !*substring )
5745 return true;
5746 int len = lStr_len(substring);
5747 if ( length() < len )
5748 return false;
5749 const lChar32 * s1 = c_str() + (length()-len);
5750 const lChar32 * s2 = substring;
5751 return lStr_cmp( s1, s2 )==0;
5752 }
5753
5754 /// returns true if string ends with specified substring
endsWith(const lChar8 * substring) const5755 bool lString32::endsWith( const lChar8 * substring ) const
5756 {
5757 if ( !substring || !*substring )
5758 return true;
5759 int len = lStr_len(substring);
5760 if ( length() < len )
5761 return false;
5762 const lChar32 * s1 = c_str() + (length()-len);
5763 const lChar8 * s2 = substring;
5764 return lStr_cmp( s1, s2 )==0;
5765 }
5766
5767 /// returns true if string ends with specified substring
endsWith(const lString32 & substring) const5768 bool lString32::endsWith ( const lString32 & substring ) const
5769 {
5770 if ( substring.empty() )
5771 return true;
5772 int len = substring.length();
5773 if ( length() < len )
5774 return false;
5775 const lChar32 * s1 = c_str() + (length()-len);
5776 const lChar32 * s2 = substring.c_str();
5777 return lStr_cmp( s1, s2 )==0;
5778 }
5779
5780 /// returns true if string starts with specified substring
startsWith(const lString32 & substring) const5781 bool lString32::startsWith( const lString32 & substring ) const
5782 {
5783 if ( substring.empty() )
5784 return true;
5785 int len = substring.length();
5786 if ( length() < len )
5787 return false;
5788 const lChar32 * s1 = c_str();
5789 const lChar32 * s2 = substring.c_str();
5790 for ( int i=0; i<len; i++ )
5791 if ( s1[i]!=s2[i] )
5792 return false;
5793 return true;
5794 }
5795
5796 /// returns true if string starts with specified substring
startsWith(const lChar32 * substring) const5797 bool lString32::startsWith(const lChar32 * substring) const
5798 {
5799 if (!substring || !substring[0])
5800 return true;
5801 int len = _lStr_len(substring);
5802 if ( length() < len )
5803 return false;
5804 const lChar32 * s1 = c_str();
5805 const lChar32 * s2 = substring;
5806 for ( int i=0; i<len; i++ )
5807 if ( s1[i] != s2[i] )
5808 return false;
5809 return true;
5810 }
5811
5812 /// returns true if string starts with specified substring
startsWith(const lChar8 * substring) const5813 bool lString32::startsWith(const lChar8 * substring) const
5814 {
5815 if (!substring || !substring[0])
5816 return true;
5817 int len = _lStr_len(substring);
5818 if ( length() < len )
5819 return false;
5820 const lChar32 * s1 = c_str();
5821 const lChar8 * s2 = substring;
5822 for ( int i=0; i<len; i++ )
5823 if (s1[i] != s2[i])
5824 return false;
5825 return true;
5826 }
5827
split2(const lString32 & delim,lString32 & value1,lString32 & value2)5828 bool lString32::split2( const lString32 & delim, lString32 & value1, lString32 & value2 )
5829 {
5830 if ( empty() )
5831 return false;
5832 int p = pos(delim);
5833 if ( p<=0 || p>=length()-delim.length() )
5834 return false;
5835 value1 = substr(0, p);
5836 value2 = substr(p+delim.length());
5837 return true;
5838 }
5839
split2(const lChar32 * delim,lString32 & value1,lString32 & value2)5840 bool lString32::split2( const lChar32 * delim, lString32 & value1, lString32 & value2 )
5841 {
5842 if (empty())
5843 return false;
5844 int p = pos(delim);
5845 int l = lStr_len(delim);
5846 if (p<=0 || p >= length() - l)
5847 return false;
5848 value1 = substr(0, p);
5849 value2 = substr(p + l);
5850 return true;
5851 }
5852
split2(const lChar8 * delim,lString32 & value1,lString32 & value2)5853 bool lString32::split2( const lChar8 * delim, lString32 & value1, lString32 & value2 )
5854 {
5855 if (empty())
5856 return false;
5857 int p = pos(delim);
5858 int l = lStr_len(delim);
5859 if (p<=0 || p >= length() - l)
5860 return false;
5861 value1 = substr(0, p);
5862 value2 = substr(p + l);
5863 return true;
5864 }
5865
splitIntegerList(lString32 s,lString32 delim,int & value1,int & value2)5866 bool splitIntegerList( lString32 s, lString32 delim, int &value1, int &value2 )
5867 {
5868 if ( s.empty() )
5869 return false;
5870 lString32 s1, s2;
5871 if ( !s.split2( delim, s1, s2 ) )
5872 return false;
5873 int n1, n2;
5874 if ( !s1.atoi(n1) )
5875 return false;
5876 if ( !s2.atoi(n2) )
5877 return false;
5878 value1 = n1;
5879 value2 = n2;
5880 return true;
5881 }
5882
replace(size_type p0,size_type n0,const lString8 & str)5883 lString8 & lString8::replace(size_type p0, size_type n0, const lString8 & str) {
5884 lString8 s1 = substr( 0, p0 );
5885 lString8 s2 = length() - p0 - n0 > 0 ? substr( p0+n0, length()-p0-n0 ) : lString8::empty_str;
5886 *this = s1 + str + s2;
5887 return *this;
5888 }
5889
replace(size_type p0,size_type n0,const lString32 & str)5890 lString32 & lString32::replace(size_type p0, size_type n0, const lString32 & str)
5891 {
5892 lString32 s1 = substr( 0, p0 );
5893 lString32 s2 = length() - p0 - n0 > 0 ? substr( p0+n0, length()-p0-n0 ) : lString32::empty_str;
5894 *this = s1 + str + s2;
5895 return *this;
5896 }
5897
5898 /// replaces part of string, if pattern is found
replace(const lString32 & findStr,const lString32 & replaceStr)5899 bool lString32::replace(const lString32 & findStr, const lString32 & replaceStr)
5900 {
5901 int p = pos(findStr);
5902 if ( p<0 )
5903 return false;
5904 *this = replace( p, findStr.length(), replaceStr );
5905 return true;
5906 }
5907
replaceParam(int index,const lString32 & replaceStr)5908 bool lString32::replaceParam(int index, const lString32 & replaceStr)
5909 {
5910 return replace( cs32("$") + fmt::decimal(index), replaceStr );
5911 }
5912
5913 /// replaces first found occurence of "$N" pattern with itoa of integer, where N=index
replaceIntParam(int index,int replaceNumber)5914 bool lString32::replaceIntParam(int index, int replaceNumber)
5915 {
5916 return replaceParam( index, lString32::itoa(replaceNumber));
5917 }
5918
decodeHex(lChar32 ch)5919 static int decodeHex( lChar32 ch )
5920 {
5921 if ( ch>='0' && ch<='9' )
5922 return ch-'0';
5923 else if ( ch>='a' && ch<='f' )
5924 return ch-'a'+10;
5925 else if ( ch>='A' && ch<='F' )
5926 return ch-'A'+10;
5927 return -1;
5928 }
5929
decodeHTMLChar(const lChar32 * s)5930 static lChar8 decodeHTMLChar( const lChar32 * s )
5931 {
5932 if (s[0] == '%') {
5933 int d1 = decodeHex( s[1] );
5934 if (d1 >= 0) {
5935 int d2 = decodeHex( s[2] );
5936 if (d2 >= 0) {
5937 return (lChar8)(d1*16 + d2);
5938 }
5939 }
5940 }
5941 return 0;
5942 }
5943
5944 /// decodes path like "file%20name%C3%A7" to "file nameç"
DecodeHTMLUrlString(lString32 s)5945 lString32 DecodeHTMLUrlString( lString32 s )
5946 {
5947 const lChar32 * str = s.c_str();
5948 for ( int i=0; str[i]; i++ ) {
5949 if ( str[i]=='%' ) {
5950 lChar8 ch = decodeHTMLChar( str + i );
5951 if ( ch==0 ) {
5952 continue;
5953 }
5954 // HTML encoded char found
5955 lString8 res;
5956 res.reserve(s.length());
5957 res.append(UnicodeToUtf8(str, i));
5958 res.append(1, ch);
5959 i+=3;
5960
5961 // continue conversion
5962 for ( ; str[i]; i++ ) {
5963 if ( str[i]=='%' ) {
5964 ch = decodeHTMLChar( str + i );
5965 if ( ch==0 ) {
5966 res.append(1, (lChar8)str[i]);
5967 continue;
5968 }
5969 res.append(1, ch);
5970 i+=2;
5971 } else {
5972 res.append(1, (lChar8)str[i]);
5973 }
5974 }
5975 return Utf8ToUnicode(res);
5976 }
5977 }
5978 return s;
5979 }
5980
limitStringSize(lString32 & str,int maxSize)5981 void limitStringSize(lString32 & str, int maxSize) {
5982 if (str.length() < maxSize)
5983 return;
5984 int lastSpace = -1;
5985 for (int i = str.length() - 1; i > 0; i--)
5986 if (str[i] == ' ') {
5987 while (i > 0 && str[i - 1] == ' ')
5988 i--;
5989 lastSpace = i;
5990 break;
5991 }
5992 int split = lastSpace > 0 ? lastSpace : maxSize;
5993 str = str.substr(0, split);
5994 str += "...";
5995 }
5996
5997 /// remove soft-hyphens from string
removeSoftHyphens(lString32 s)5998 lString32 removeSoftHyphens( lString32 s )
5999 {
6000 lChar32 hyphen = lChar32(UNICODE_SOFT_HYPHEN_CODE);
6001 int start = 0;
6002 while (true) {
6003 int p = -1;
6004 int len = s.length();
6005 for (int i = start; i < len; i++) {
6006 if (s[i] == hyphen) {
6007 p = i;
6008 break;
6009 }
6010 }
6011 if (p == -1)
6012 break;
6013 start = p;
6014 lString32 s1 = s.substr( 0, p );
6015 lString32 s2 = p < len-1 ? s.substr( p+1, len-p-1 ) : lString32::empty_str;
6016 s = s1 + s2;
6017 }
6018 return s;
6019 }
6020