1 /********************************************************************/
2 /* */
3 /* str_rtl.c Primitive actions for the string type. */
4 /* Copyright (C) 1989 - 2021 Thomas Mertes */
5 /* */
6 /* This file is part of the Seed7 Runtime Library. */
7 /* */
8 /* The Seed7 Runtime Library is free software; you can */
9 /* redistribute it and/or modify it under the terms of the GNU */
10 /* Lesser General Public License as published by the Free Software */
11 /* Foundation; either version 2.1 of the License, or (at your */
12 /* option) any later version. */
13 /* */
14 /* The Seed7 Runtime Library is distributed in the hope that it */
15 /* will be useful, but WITHOUT ANY WARRANTY; without even the */
16 /* implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR */
17 /* PURPOSE. See the GNU Lesser General Public License for more */
18 /* details. */
19 /* */
20 /* You should have received a copy of the GNU Lesser General */
21 /* Public License along with this program; if not, write to the */
22 /* Free Software Foundation, Inc., 51 Franklin Street, */
23 /* Fifth Floor, Boston, MA 02110-1301, USA. */
24 /* */
25 /* Module: Seed7 Runtime Library */
26 /* File: seed7/src/str_rtl.c */
27 /* Changes: 1991 - 1994, 2005, 2008 - 2021 Thomas Mertes */
28 /* Content: Primitive actions for the string type. */
29 /* */
30 /********************************************************************/
31
32 #define LOG_FUNCTIONS 0
33 #define VERBOSE_EXCEPTIONS 0
34
35 #include "version.h"
36
37 #include "stdlib.h"
38 #include "stdio.h"
39 #include "string.h"
40 #include "wchar.h"
41
42 #include "common.h"
43 #include "data_rtl.h"
44 #include "heaputl.h"
45 #include "striutl.h"
46 #include "arr_rtl.h"
47 #include "rtl_err.h"
48
49 #undef EXTERN
50 #define EXTERN
51 #include "str_rtl.h"
52
53
54 #define CHAR_DELTA_BEYOND 128
55 #define INITIAL_ARRAY_SIZE 256
56 #define ARRAY_SIZE_FACTOR 2
57 #define BOYER_MOORE_SEARCHED_STRI_THRESHOLD 2
58 #define BOYER_MOORE_MAIN_STRI_THRESHOLD 1400
59
60 /* memset_to_strelem is not used because it is */
61 /* only better for lengths greater than 7. */
62 #define LPAD_WITH_MEMSET_TO_STRELEM 0
63
64 #if WITH_STRI_FREELIST
65 #define RESIZE_THRESHOLD MAX_STRI_LEN_IN_FREELIST
66 #else
67 #define RESIZE_THRESHOLD 8
68 #endif
69
70
71
72 #if HAS_WMEMCMP && WCHAR_T_SIZE == 32
73 #define memcmp_strelem(mem1, mem2, len) \
74 wmemcmp((const wchar_t *) mem1, (const wchar_t *) mem2, (size_t) len)
75 #else
76
77
78
memcmp_strelem(register const strElemType * mem1,register const strElemType * mem2,memSizeType len)79 static inline int memcmp_strelem (register const strElemType *mem1,
80 register const strElemType *mem2, memSizeType len)
81
82 { /* memcmp_strelem */
83 for (; len > 0; mem1++, mem2++, len--) {
84 if (*mem1 != *mem2) {
85 return *mem1 < *mem2 ? -1 : 1;
86 } /* if */
87 } /* for */
88 return 0;
89 } /* memcmp_strelem */
90
91 #endif
92
93
94
search_strelem2(const strElemType * mem,const strElemType ch,const strElemType * const beyond,const memSizeType charDelta[])95 static inline const strElemType *search_strelem2 (const strElemType *mem,
96 const strElemType ch, const strElemType *const beyond,
97 const memSizeType charDelta[])
98
99 { /* search_strelem2 */
100 while (mem < beyond) {
101 if (*mem == ch) {
102 return mem;
103 } else if (*mem < CHAR_DELTA_BEYOND) {
104 mem += charDelta[*mem];
105 } else {
106 mem += charDelta[CHAR_DELTA_BEYOND];
107 } /* if */
108 } /* while */
109 return NULL;
110 } /* search_strelem2 */
111
112
113
rsearch_strelem(const strElemType * mem,const strElemType ch,size_t len)114 static inline const strElemType *rsearch_strelem (const strElemType *mem,
115 const strElemType ch, size_t len)
116
117 { /* rsearch_strelem */
118 for (; len > 0; mem--, len--) {
119 if (*mem == ch) {
120 return mem;
121 } /* if */
122 } /* for */
123 return NULL;
124 } /* rsearch_strelem */
125
126
127
rsearch_strelem2(const strElemType * mem,const strElemType ch,const strElemType * const beyond,const memSizeType charDelta[])128 static inline const strElemType *rsearch_strelem2 (const strElemType *mem,
129 const strElemType ch, const strElemType *const beyond,
130 const memSizeType charDelta[])
131
132 { /* rsearch_strelem2 */
133 while (mem > beyond) {
134 if (*mem == ch) {
135 return mem;
136 } else if (*mem < CHAR_DELTA_BEYOND) {
137 mem -= charDelta[*mem];
138 } else {
139 mem -= charDelta[CHAR_DELTA_BEYOND];
140 } /* if */
141 } /* while */
142 return NULL;
143 } /* rsearch_strelem2 */
144
145
146
147 /**
148 * Copy 'source' character array to 'dest' as lower case characters.
149 * The conversion uses the default Unicode case mapping,
150 * where each character is considered in isolation.
151 * Characters without case mapping are left unchanged.
152 * The mapping is independent from the locale. Individual
153 * character case mappings cannot be reversed, because some
154 * characters have multiple characters that map to them.
155 * @param source Character array to be copied and converted.
156 * @param length Length of the source character array.
157 * @param dest Destination character array for the lower case chars.
158 */
toLower(const strElemType * const source,memSizeType length,strElemType * const dest)159 void toLower (const strElemType *const source, memSizeType length,
160 strElemType *const dest)
161
162 {
163 memSizeType pos;
164 strElemType ch;
165
166 /* toLower */
167 for (pos = 0; pos < length; pos++) {
168 ch = source[pos];
169 switch (ch >> 8) {
170 case 0:
171 if (ch <= '\177') {
172 ch = (strElemType) ((unsigned char)
173 "\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17"
174 "\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37"
175 " !\"#$%&'()*+,-./0123456789:;<=>?"
176 "@abcdefghijklmnopqrstuvwxyz[\\]^_"
177 "`abcdefghijklmnopqrstuvwxyz{|}~\177"[ch]);
178 } else if ("\0\0\0\0\0\0\0\0\376\377\377\007\0\0\0\0"
179 "\0\0\0\0\0\0\0\0\377\377\177\177\0\0\0\0"[ch >> 3 & 31] &
180 1 << (ch & 7)) {
181 ch += 32;
182 } /* if */
183 break;
184 case 1:
185 if ("UUUUUUU\252\252TUUUUU+"
186 "\326\316\333\261\325\322\256\021\260\255\252JUU\326U"[ch >> 3 & 31] &
187 1 << (ch & 7)) {
188 switch (ch) {
189 case 0x0130: ch = 0x0069; break;
190 case 0x0178: ch = 0x00ff; break;
191 case 0x0181: ch = 0x0253; break;
192 case 0x0186: ch = 0x0254; break;
193 case 0x0189: ch = 0x0256; break;
194 case 0x018a: ch = 0x0257; break;
195 case 0x018e: ch = 0x01dd; break;
196 case 0x018f: ch = 0x0259; break;
197 case 0x0190: ch = 0x025b; break;
198 case 0x0193: ch = 0x0260; break;
199 case 0x0194: ch = 0x0263; break;
200 case 0x0196: ch = 0x0269; break;
201 case 0x0197: ch = 0x0268; break;
202 case 0x019c: ch = 0x026f; break;
203 case 0x019d: ch = 0x0272; break;
204 case 0x019f: ch = 0x0275; break;
205 case 0x01a6: ch = 0x0280; break;
206 case 0x01a9: ch = 0x0283; break;
207 case 0x01ae: ch = 0x0288; break;
208 case 0x01b1: ch = 0x028a; break;
209 case 0x01b2: ch = 0x028b; break;
210 case 0x01b7: ch = 0x0292; break;
211 case 0x01c4: ch += 2; break;
212 case 0x01c7: ch += 2; break;
213 case 0x01ca: ch += 2; break;
214 case 0x01f1: ch += 2; break;
215 case 0x01f2: ch = 0x01f3; break;
216 case 0x01f6: ch = 0x0195; break;
217 case 0x01f7: ch = 0x01bf; break;
218 case 0x01f8: ch = 0x01f9; break;
219 default: ch += 1; break;
220 } /* switch */
221 } /* if */
222 break;
223 case 2:
224 if ("UUUUUU\005lzU\0\0\0\0\0\0"
225 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
226 1 << (ch & 7)) {
227 switch (ch) {
228 case 0x0220: ch -= 130; break;
229 case 0x023a: ch = 0x2c65; break;
230 case 0x023d: ch -= 163; break;
231 case 0x023e: ch = 0x2c66; break;
232 case 0x0243: ch -= 195; break;
233 case 0x0244: ch += 69; break;
234 case 0x0245: ch += 71; break;
235 default: ch += 1; break;
236 } /* switch */
237 } /* if */
238 break;
239 case 3:
240 if ("\0\0\0\0\0\0\0\0\0\0\0\0\0\0E\0"
241 "@\327\376\377\373\017\0\0\0\200\0UUU\220\346"[ch >> 3 & 31] &
242 1 << (ch & 7)) {
243 switch (ch) {
244 case 0x0370: ch += 1; break;
245 case 0x0372: ch += 1; break;
246 case 0x0376: ch += 1; break;
247 case 0x0386: ch += 38; break;
248 case 0x0388: ch += 37; break;
249 case 0x0389: ch += 37; break;
250 case 0x038a: ch += 37; break;
251 case 0x038c: ch += 64; break;
252 case 0x038e: ch += 63; break;
253 case 0x038f: ch += 63; break;
254 case 0x03cf: ch += 8; break;
255 case 0x03f4: ch -= 60; break;
256 case 0x03f9: ch -= 7; break;
257 case 0x03fd: ch -= 130; break;
258 case 0x03fe: ch -= 130; break;
259 case 0x03ff: ch -= 130; break;
260 default:
261 if (ch <= 0x03ab) {
262 ch += 32;
263 } else {
264 ch += 1;
265 } /* if */
266 break;
267 } /* switch */
268 } /* if */
269 break;
270 case 4:
271 if ("\377\377\377\377\377\377\0\0\0\0\0\0UUUU"
272 "\001TUUUUUU\253*UUUUUU"[ch >> 3 & 31] &
273 1 << (ch & 7)) {
274 if (ch <= 0x040f) {
275 ch += 80;
276 } else if (ch <= 0x042f) {
277 ch += 32;
278 } else if (ch == 0x04c0) {
279 ch = 0x04cf;
280 } else {
281 ch += 1;
282 } /* if */
283 } /* if */
284 break;
285 case 5:
286 if (ch <= 0x0526 && (ch & 1) == 0) {
287 ch += 1;
288 } else if (ch >= 0x0531 && ch <= 0x0556) {
289 ch += 48;
290 } /* if */
291 break;
292 case 16:
293 if ("\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"
294 "\0\0\0\0\377\377\377\377\277 \0\0\0\0\0\0"[ch >> 3 & 31] &
295 1 << (ch & 7)) {
296 ch += 0x1c60;
297 } /* if */
298 break;
299 case 30:
300 if ("UUUUUUUUUUUUUUUUUU\025@UUUUUUUUUUUU"[ch >> 3 & 31] &
301 1 << (ch & 7)) {
302 if (ch == 0x1e9e) {
303 ch = 0x00df;
304 } else {
305 ch += 1;
306 } /* if */
307 } /* if */
308 break;
309 case 31:
310 if ("\0\377\0?\0\377\0\377\0?\0\252\0\377\0\0"
311 "\0\377\0\377\0\377\0\037\0\037\0\017\0\037\0\037"[ch >> 3 & 31] &
312 1 << (ch & 7)) {
313 if (ch >= 0x1fba) {
314 ch = (strElemType) ((unsigned char)
315 "\160\161\263\000\000\000\000\000\000\000"
316 "\000\000\000\000\162\163\164\165\303\000"
317 "\000\000\000\000\000\000\000\000\000\000"
318 "\320\321\166\167\000\000\000\000\000\000"
319 "\000\000\000\000\000\000\340\341\172\173"
320 "\345\000\000\000\000\000\000\000\000\000"
321 "\000\000\170\171\174\175\363"[ch - 0x1fba] + 0x1f00);
322 } else {
323 ch -= 8;
324 } /* if */
325 } /* if */
326 break;
327 case 33:
328 if ("\0\0\0\0@\f\004\0\0\0\0\0\377\377\0\0"
329 "\b\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
330 1 << (ch & 7)) {
331 switch (ch) {
332 case 0x2126: ch = 0x03c9; break;
333 case 0x212a: ch = 0x006b; break;
334 case 0x212b: ch = 0x00e5; break;
335 case 0x2132: ch += 28; break;
336 case 0x2183: ch += 1; break;
337 default: ch += 16; break;
338 } /* switch */
339 } /* if */
340 break;
341 case 36:
342 if (ch >= 0x24b6 && ch <= 0x24cf) {
343 ch += 26;
344 } /* if */
345 break;
346 case 44:
347 if ("\377\377\377\377\377\177\0\0\0\0\0\0\235\352%\300"
348 "UUUUUUUUUUUU\005(\004\0"[ch >> 3 & 31] &
349 1 << (ch & 7)) {
350 if (ch <= 0x2c2e) {
351 ch += 48;
352 } else {
353 switch (ch) {
354 case 0x2c62: ch = 0x026b; break;
355 case 0x2c63: ch = 0x1d7d; break;
356 case 0x2c64: ch = 0x027d; break;
357 case 0x2c6d: ch = 0x0251; break;
358 case 0x2c6e: ch = 0x0271; break;
359 case 0x2c6f: ch = 0x0250; break;
360 case 0x2c70: ch = 0x0252; break;
361 case 0x2c7e: ch = 0x023f; break;
362 case 0x2c7f: ch = 0x0240; break;
363 default: ch += 1; break;
364 } /* switch */
365 } /* if */
366 } /* if */
367 break;
368 case 166:
369 if ("\0\0\0\0\0\0\0\0UUUUU\025\0\0"
370 "UUU\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
371 1 << (ch & 7)) {
372 ch += 1;
373 } /* if */
374 break;
375 case 167:
376 if ("\0\0\0\0TUTUUUUUUU\0j"
377 "U(\005\0U\005\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
378 1 << (ch & 7)) {
379 if (ch == 0xa77d) {
380 ch = 0x1d79;
381 } else if (ch == 0xa78d) {
382 ch = 0x0265;
383 } else if (ch == 0xa7aa) {
384 ch = 0x0266;
385 } else {
386 ch += 1;
387 } /* if */
388 } /* if */
389 break;
390 case 255:
391 if (ch >= 0xff21 && ch <= 0xff3a) {
392 ch += 32;
393 } /* if */
394 break;
395 case 260:
396 if (ch >= 0x10400 && ch <= 0x10427) {
397 ch += 40;
398 } /* if */
399 break;
400 default:
401 break;
402 } /* switch */
403 dest[pos] = ch;
404 } /* for */
405 } /* toLower */
406
407
408
409 static const strElemType toUpperTable2[] = {
410 0x2c6f, 0x2c6d, 0x2c70, 0x0181, 0x0186, 0, 0x0189, 0x018a, 0, 0x018f,
411 0, 0x0190, 0, 0, 0, 0, 0x0193, 0, 0, 0x0194,
412 0, 0xa78d, 0xa7aa, 0, 0x0197, 0x0196, 0, 0x2c62, 0, 0,
413 0, 0x019c, 0, 0x2c6e, 0x019d, 0, 0, 0x019f, 0, 0,
414 0, 0, 0, 0, 0, 0x2c64, 0, 0, 0x01a6, 0,
415 0, 0x01a9, 0, 0, 0, 0, 0x01ae, 0x0244, 0x01b1, 0x01b2,
416 0x0245, 0, 0, 0, 0, 0, 0x01b7
417 };
418
419
420 /**
421 * Copy 'source' character array to 'dest' as upper case characters.
422 * The conversion uses the default Unicode case mapping,
423 * where each character is considered in isolation.
424 * Characters without case mapping are left unchanged.
425 * The mapping is independent from the locale. Individual
426 * character case mappings cannot be reversed, because some
427 * characters have multiple characters that map to them.
428 * @param source Character array to be copied and converted.
429 * @param length Length of the source character array.
430 * @param dest Destination character array for the upper case chars.
431 */
toUpper(const strElemType * const source,memSizeType length,strElemType * const dest)432 void toUpper (const strElemType *const source, memSizeType length,
433 strElemType *const dest)
434
435 {
436 memSizeType pos;
437 strElemType ch;
438
439 /* toUpper */
440 for (pos = 0; pos < length; pos++) {
441 ch = source[pos];
442 switch (ch >> 8) {
443 case 0:
444 if (ch <= '\177') {
445 ch = (strElemType) ((unsigned char)
446 "\0\1\2\3\4\5\6\7\10\11\12\13\14\15\16\17"
447 "\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37"
448 " !\"#$%&'()*+,-./0123456789:;<=>?"
449 "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_"
450 "`ABCDEFGHIJKLMNOPQRSTUVWXYZ{|}~\177"[ch]);
451 } else if ("\0\0\0\0\0\0\0\0\0\0\0\0\376\377\377\007"
452 "\0\0\0\0\0\0 \0\0\0\0\0\377\377\177\377"[ch >> 3 & 31] &
453 1 << (ch & 7)) {
454 if (ch == 0x00b5) {
455 ch = 0x039c;
456 } else if (ch == 0x00ff) {
457 ch = 0x0178;
458 } else {
459 ch -= 32;
460 } /* if */
461 } /* if */
462 break;
463 case 1:
464 if ("\252\252\252\252\252\252\252TU\251\252\252\252\252\252\324"
465 ")\021$F*!Q\242`[U\265\252\252,\252"[ch >> 3 & 31] &
466 1 << (ch & 7)) {
467 if (ch == 0x0131) {
468 ch = 0x0049;
469 } else if (ch == 0x017f) {
470 ch = 0x0053;
471 } else if (ch >= 0x0180 && ch <= 0x019e) {
472 ch = (strElemType) ((unsigned char)
473 "\303\0\0\002\0\004\0\0\007\0"
474 "\0\0\013\0\0\0\0\0\021\0"
475 "\0\166\0\0\0\030\275\0\0\0\240"[ch - 0x0180] + 0x0180);
476 } else if (ch >= 0x01bf && ch <= 0x01cc) {
477 ch = (strElemType) ((unsigned char)
478 "\367\0\0\0\0\0\304\304\0\307"
479 "\307\0\312\312"[ch - 0x01bf] + 0x0100);
480 } else if (ch == 0x01dd) {
481 ch = 0x018e;
482 } else if (ch == 0x01f3) {
483 ch = 0x01f1;
484 } else {
485 ch -= 1;
486 } /* if */
487 } /* if */
488 break;
489 case 2:
490 if ("\252\252\252\252\250\252\n\220\205\252\337\ni\213& "
491 "\t\037\004\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
492 1 << (ch & 7)) {
493 if (ch <= 0x024f) {
494 if (ch == 0x023f) {
495 ch = 0x2c7e;
496 } else if (ch == 0x0240) {
497 ch = 0x2c7f;
498 } else {
499 ch -= 1;
500 } /* if */
501 } else {
502 ch = toUpperTable2[ch - 0x0250];
503 } /* if */
504 } /* if */
505 break;
506 case 3:
507 if ("\0\0\0\0\0\0\0\0 \0\0\0\0\0\212"
508 "8\0\0\0\0\0\360\376\377\377\177\343\252\252\252'\t"[ch >> 3 & 31] &
509 1 << (ch & 7)) {
510 if (ch <= 0x03af) {
511 if (ch == 0x0345) {
512 ch += 84;
513 } else if (ch <= 0x0377) {
514 ch -= 1;
515 } else if (ch <= 0x037d) {
516 ch += 130;
517 } else if (ch == 0x03ac) {
518 ch -= 38;
519 } else {
520 ch -= 37;
521 } /* if */
522 } else if (ch <= 0x03cb) {
523 if (ch == 0x03c2) {
524 ch -= 31;
525 } else {
526 ch -= 32;
527 } /* if */
528 } else if (ch <= 0x03d7) {
529 ch = (strElemType) ((unsigned char)
530 "\214\216\217\0\222\230\0\0\0\246\240\317"[ch - 0x03cc] + 0x0300);
531 } else if (ch <= 0x03ef) {
532 ch -= 1;
533 } else {
534 ch = (strElemType) ((unsigned char)
535 "\232\241\371\0\0\225\0\0\367\0\0\372"[ch - 0x03f0] + 0x0300);
536 } /* if */
537 } /* if */
538 break;
539 case 4:
540 if ("\0\0\0\0\0\0\377\377\377\377\377\377\252\252\252\252"
541 "\002\250\252\252\252\252\252\252T\325\252\252\252\252\252\252"[ch >> 3 & 31] &
542 1 << (ch & 7)) {
543 if (ch <= 0x044f) {
544 ch -= 32;
545 } else if (ch <= 0x045f) {
546 ch -= 80;
547 } else if (ch == 0x04cf) {
548 ch -= 15;
549 } else {
550 ch -= 1;
551 } /* if */
552 } /* if */
553 break;
554 case 5:
555 if ("\252\252\252\252\252\0\0\0\0\0\0\0\376\377\377\377"
556 "\177\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
557 1 << (ch & 7)) {
558 if (ch <= 0x0527) {
559 ch -= 1;
560 } else {
561 ch -= 48;
562 } /* if */
563 } /* if */
564 break;
565 case 29:
566 if (ch == 0x1d79) {
567 ch = 0xa77d;
568 } else if (ch == 0x1d7d) {
569 ch = 0x2c63;
570 } /* if */
571 break;
572 case 30:
573 if ("\252\252\252\252\252\252\252\252\252\252\252\252\252\252\252\252"
574 "\252\252*\b\252\252\252\252\252\252\252\252\252\252\252\252"[ch >> 3 & 31] &
575 1 << (ch & 7)) {
576 if (ch == 0x1e9b) {
577 ch = 0x1e60;
578 } else {
579 ch -= 1;
580 } /* if */
581 } /* if */
582 break;
583 case 31:
584 if ("\377\0?\0\377\0\377\0?\0\252\0\377\0\377?"
585 "\377\0\377\0\377\0\013@\b\0\003\0#\0\b\0"[ch >> 3 & 31] &
586 1 << (ch & 7)) {
587 if (ch >= 0x1f70 && ch <= 0x1f7d) {
588 ch = (strElemType) ((unsigned char)
589 "\272\273\310\311\312\313\332\333\370\371\352\353\372\373"[ch - 0x1f70] + 0x1f00);
590 } else if (ch == 0x1fb3 || ch == 0x1fc3 || ch == 0x1ff3) {
591 ch += 9;
592 } else if (ch == 0x1fe5) {
593 ch += 7;
594 } else if (ch == 0x1fbe) {
595 ch = 0x0399;
596 } else {
597 ch += 8;
598 } /* if */
599 } /* if */
600 break;
601 case 33:
602 if ("\0\0\0\0\0\0\0\0\0@\0\0\0\0\377\377"
603 "\020\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
604 1 << (ch & 7)) {
605 if (ch == 0x214e) {
606 ch -= 28;
607 } else if (ch == 0x2184) {
608 ch -= 1;
609 } else {
610 ch -= 16;
611 } /* if */
612 } /* if */
613 break;
614 case 36:
615 if (ch >= 0x24d0 && ch <= 0x24e9) {
616 ch -= 26;
617 } /* if */
618 break;
619 case 44:
620 if ("\0\0\0\0\0\0\377\377\377\377\377\177b\025H\0"
621 "\252\252\252\252\252\252\252\252\252\252\252\252\nP\b\0"[ch >> 3 & 31] &
622 1 << (ch & 7)) {
623 if (ch <= 0x2c5e) {
624 ch -= 48;
625 } else if (ch == 0x2c65) {
626 ch = 0x023a;
627 } else if (ch == 0x2c66) {
628 ch = 0x023e;
629 } else {
630 ch -= 1;
631 } /* if */
632 } /* if */
633 break;
634 case 45:
635 if (ch >= 0x2d00 && ch <= 0x2d2d) {
636 ch -= 0x1c60;
637 } /* if */
638 break;
639 case 166:
640 if ("\0\0\0\0\0\0\0\0\252\252\252\252\252*\0\0"
641 "\252\252\252\0\0\0\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
642 1 << (ch & 7)) {
643 ch -= 1;
644 } /* if */
645 break;
646 case 167:
647 if ("\0\0\0\0\250\252\250\252\252\252\252\252\252\252\0\224"
648 "\252\020\n\0\252\002\0\0\0\0\0\0\0\0\0\0"[ch >> 3 & 31] &
649 1 << (ch & 7)) {
650 ch -= 1;
651 } /* if */
652 break;
653 case 255:
654 if (ch >= 0xff41 && ch <= 0xff5a) {
655 ch -= 32;
656 } /* if */
657 break;
658 case 260:
659 if (ch >= 0x10428 && ch <= 0x1044f) {
660 ch -= 40;
661 } /* if */
662 break;
663 default:
664 break;
665 } /* switch */
666 dest[pos] = ch;
667 } /* for */
668 } /* toUpper */
669
670
671
addCopiedStriToRtlArray(const strElemType * const stri_elems,const memSizeType length,rtlArrayType work_array,intType used_max_position)672 static rtlArrayType addCopiedStriToRtlArray (const strElemType *const stri_elems,
673 const memSizeType length, rtlArrayType work_array, intType used_max_position)
674
675 {
676 striType new_stri;
677 rtlArrayType resized_work_array;
678
679 /* addCopiedStriToRtlArray */
680 if (likely(ALLOC_STRI_SIZE_OK(new_stri, length))) {
681 new_stri->size = length;
682 memcpy(new_stri->mem, stri_elems, length * sizeof(strElemType));
683 if (used_max_position >= work_array->max_position) {
684 if (unlikely(work_array->max_position > (intType) (MAX_RTL_ARR_INDEX / ARRAY_SIZE_FACTOR) ||
685 (resized_work_array = REALLOC_RTL_ARRAY(work_array,
686 (uintType) work_array->max_position,
687 (uintType) work_array->max_position * ARRAY_SIZE_FACTOR)) == NULL)) {
688 FREE_STRI(new_stri, new_stri->size);
689 freeRtlStriArray(work_array, used_max_position);
690 work_array = NULL;
691 } else {
692 work_array = resized_work_array;
693 COUNT3_RTL_ARRAY((uintType) work_array->max_position,
694 (uintType) work_array->max_position * ARRAY_SIZE_FACTOR);
695 work_array->max_position *= ARRAY_SIZE_FACTOR;
696 work_array->arr[used_max_position].value.striValue = new_stri;
697 } /* if */
698 } else {
699 work_array->arr[used_max_position].value.striValue = new_stri;
700 } /* if */
701 } else {
702 freeRtlStriArray(work_array, used_max_position);
703 work_array = NULL;
704 } /* if */
705 return work_array;
706 } /* addCopiedStriToRtlArray */
707
708
709
completeRtlStriArray(rtlArrayType work_array,intType used_max_position)710 static inline rtlArrayType completeRtlStriArray (rtlArrayType work_array,
711 intType used_max_position)
712
713 {
714 rtlArrayType resized_work_array;
715
716 /* completeRtlStriArray */
717 if (likely(work_array != NULL)) {
718 resized_work_array = REALLOC_RTL_ARRAY(work_array,
719 (uintType) work_array->max_position, (uintType) used_max_position);
720 if (unlikely(resized_work_array == NULL)) {
721 freeRtlStriArray(work_array, used_max_position);
722 work_array = NULL;
723 } else {
724 work_array = resized_work_array;
725 COUNT3_RTL_ARRAY((uintType) work_array->max_position,
726 (uintType) used_max_position);
727 work_array->max_position = used_max_position;
728 } /* if */
729 } /* if */
730 return work_array;
731 } /* completeRtlStriArray */
732
733
734
concatAndStraightenPath(strElemType * destination_pos,const strElemType * destination_start,const strElemType * source_pos,const strElemType * source_beyond)735 static memSizeType concatAndStraightenPath (strElemType *destination_pos,
736 const strElemType *destination_start, const strElemType *source_pos,
737 const strElemType *source_beyond)
738
739 {
740 memSizeType result_size;
741
742 /* concatAndStraightenPath */
743 while (source_pos < source_beyond) {
744 if (&source_pos[1] < source_beyond &&
745 source_pos[0] == '.' && source_pos[1] == '.' &&
746 (&source_pos[2] >= source_beyond || source_pos[2] == '/')) {
747 source_pos += 2;
748 if (destination_pos > destination_start) {
749 do {
750 destination_pos--;
751 } while (*destination_pos != '/');
752 } /* if */
753 } else if (&source_pos[0] < source_beyond &&
754 source_pos[0] == '.' &&
755 (&source_pos[1] >= source_beyond || source_pos[1] == '/')) {
756 source_pos++;
757 } else if (*source_pos == '/') {
758 source_pos++;
759 } else {
760 do {
761 destination_pos++;
762 *destination_pos = *source_pos;
763 source_pos++;
764 } while (&source_pos[0] < source_beyond && source_pos[0] != '/');
765 destination_pos++;
766 /* The line below adds a temporary slash (/) to the end */
767 /* of the intermediate result. Therefore + 2 is used to */
768 /* compute the estimated_result_size. */
769 *destination_pos = '/';
770 } /* if */
771 } /* while */
772 if (destination_pos == destination_start) {
773 destination_pos[0] = '/';
774 result_size = 1;
775 } else {
776 result_size = (memSizeType) (destination_pos - destination_start);
777 } /* if */
778 return result_size;
779 } /* concatAndStraightenPath */
780
781
782
783 /**
784 * Concatenate a relative path to an absolute path.
785 * In the relative path the special directories "." and ".." are
786 * interpreted according to their conventional meaning. A ".." which
787 * would go above the file system root ("/") is ignored.
788 * @param absolutePath Absolute path in the standard path
789 * representation.
790 * @param relativePath Relative path in the standard path
791 * representation.
792 * @return the concatenated absolute path in the standard path
793 * representation, or NULL if the memory allocation failed.
794 */
concatPath(const const_striType absolutePath,const const_striType relativePath)795 striType concatPath (const const_striType absolutePath,
796 const const_striType relativePath)
797
798 {
799 memSizeType abs_path_length;
800 memSizeType estimated_result_size;
801 memSizeType result_size;
802 striType resized_result;
803 striType result;
804
805 /* concatPath */
806 logFunction(printf("concatPath(\"%s\", ", striAsUnquotedCStri(absolutePath));
807 printf("\"%s\")\n", striAsUnquotedCStri(relativePath)););
808 /* absolutePath->mem[0] is always '/'. */
809 if (absolutePath->size == 1) {
810 abs_path_length = 0;
811 } else {
812 abs_path_length = absolutePath->size;
813 } /* if */
814 if (unlikely(abs_path_length > MAX_STRI_LEN - relativePath->size - 2)) {
815 result = NULL;
816 } else {
817 /* There is one slash (/) between the two paths. Temporarily */
818 /* there is also a slash at the end of the intermediate result. */
819 estimated_result_size = abs_path_length + relativePath->size + 2;
820 if (ALLOC_STRI_SIZE_OK(result, estimated_result_size)) {
821 memcpy(result->mem, absolutePath->mem, abs_path_length * sizeof(strElemType));
822 result->mem[abs_path_length] = '/';
823 result_size = concatAndStraightenPath(&result->mem[abs_path_length],
824 result->mem, relativePath->mem, &relativePath->mem[relativePath->size]);
825 REALLOC_STRI_SIZE_SMALLER(resized_result, result, estimated_result_size, result_size);
826 if (unlikely(resized_result == NULL)) {
827 FREE_STRI(result, estimated_result_size);
828 result = NULL;
829 } else {
830 result = resized_result;
831 COUNT3_STRI(estimated_result_size, result_size);
832 result->size = result_size;
833 } /* if */
834 } /* if */
835 } /* if */
836 logFunction(printf("concatPath --> \"%s\"\n", striAsUnquotedCStri(result)););
837 return result;
838 } /* concatPath */
839
840
841
842 /**
843 * Straighten an absolute path.
844 * The special directories "." and ".." are interpreted according to
845 * their conventional meaning. A ".." which would go above the
846 * file system root ("/") is ignored.
847 * @param absolutePath Absolute path in the standard path
848 * representation.
849 * @return the straightened absolute path in the standard path
850 * representation, or NULL if the memory allocation failed.
851 */
straightenAbsolutePath(const const_striType absolutePath)852 striType straightenAbsolutePath (const const_striType absolutePath)
853
854 {
855 memSizeType estimated_result_size;
856 memSizeType result_size;
857 striType resized_result;
858 striType result;
859
860 /* straightenAbsolutePath */
861 logFunction(printf("straightenAbsolutePath(\"%s\")\n",
862 striAsUnquotedCStri(absolutePath)););
863 /* absolutePath->mem[0] is always '/'. */
864 if (unlikely(absolutePath->size > MAX_STRI_LEN - 2)) {
865 result = NULL;
866 } else {
867 /* There is one slash (/) between the two paths. Temporarily */
868 /* there is also a slash at the end of the intermediate result. */
869 estimated_result_size = absolutePath->size + 2;
870 if (ALLOC_STRI_SIZE_OK(result, estimated_result_size)) {
871 result->mem[0] = '/';
872 result_size = concatAndStraightenPath(&result->mem[0], result->mem,
873 &absolutePath->mem[1], &absolutePath->mem[absolutePath->size]);
874 REALLOC_STRI_SIZE_SMALLER(resized_result, result, estimated_result_size, result_size);
875 if (unlikely(resized_result == NULL)) {
876 FREE_STRI(result, estimated_result_size);
877 result = NULL;
878 } else {
879 result = resized_result;
880 COUNT3_STRI(estimated_result_size, result_size);
881 result->size = result_size;
882 } /* if */
883 } /* if */
884 } /* if */
885 logFunction(printf("straightenAbsolutePath --> \"%s\"\n",
886 striAsUnquotedCStri(result)););
887 return result;
888 } /* straightenAbsolutePath */
889
890
891
892 #if ALLOW_STRITYPE_SLICES
893 /**
894 * Append the string 'extension' to 'destination'.
895 * @exception MEMORY_ERROR Not enough memory for the concatenated
896 * string.
897 */
strAppend(striType * const destination,const_striType extension)898 void strAppend (striType *const destination, const_striType extension)
899
900 {
901 memSizeType new_size;
902 striType stri_dest;
903 striType new_stri;
904 memSizeType extension_size;
905 const strElemType *extension_mem;
906 const strElemType *extension_origin;
907
908 /* strAppend */
909 logFunction(printf("strAppend(\"%s\", ", striAsUnquotedCStri(*destination));
910 printf("\"%s\")", striAsUnquotedCStri(extension));
911 fflush(stdout););
912 stri_dest = *destination;
913 extension_size = extension->size;
914 extension_mem = extension->mem;
915 if (unlikely(stri_dest->size > MAX_STRI_LEN - extension_size)) {
916 /* number of bytes does not fit into memSizeType */
917 raise_error(MEMORY_ERROR);
918 } else {
919 new_size = stri_dest->size + extension_size;
920 #if WITH_STRI_CAPACITY
921 if (new_size > stri_dest->capacity) {
922 if (SLICE_OVERLAPPING(extension, stri_dest)) {
923 extension_origin = stri_dest->mem;
924 } else {
925 extension_origin = NULL;
926 } /* if */
927 new_stri = growStri(stri_dest, new_size);
928 if (unlikely(new_stri == NULL)) {
929 raise_error(MEMORY_ERROR);
930 return;
931 } else {
932 if (unlikely(extension_origin != NULL)) {
933 /* It is possible that 'extension' is identical to */
934 /* 'stri_dest' or a slice of it. This can be checked */
935 /* with the origin. In this case 'extension_mem' must */
936 /* be corrected after realloc() enlarged 'stri_dest'. */
937 extension_mem = &new_stri->mem[extension_mem - extension_origin];
938 /* Correcting extension->mem is not necessary, since */
939 /* a slice will not be used afterwards. In case */
940 /* 'extension is identical to 'stri_dest' changing */
941 /* extension->mem is dangerous since 'extension' */
942 /* could have been released. */
943 } /* if */
944 stri_dest = new_stri;
945 *destination = stri_dest;
946 } /* if */
947 } /* if */
948 COUNT_GROW_STRI(stri_dest->size, new_size);
949 memcpy(&stri_dest->mem[stri_dest->size], extension_mem,
950 extension_size * sizeof(strElemType));
951 stri_dest->size = new_size;
952 #else
953 if (SLICE_OVERLAPPING(extension, stri_dest)) {
954 extension_origin = stri_dest->mem;
955 } else {
956 extension_origin = NULL;
957 } /* if */
958 GROW_STRI(new_stri, stri_dest, stri_dest->size, new_size);
959 if (unlikely(new_stri == NULL)) {
960 raise_error(MEMORY_ERROR);
961 } else {
962 if (unlikely(extension_origin != NULL)) {
963 /* It is possible that 'extension' is identical to */
964 /* 'stri_dest' or a slice of it. This can be checked */
965 /* with the origin. In this case 'extension_mem' must */
966 /* be corrected after realloc() enlarged 'stri_dest'. */
967 extension_mem = &new_stri->mem[extension_mem - extension_origin];
968 /* Correcting extension->mem is not necessary, since */
969 /* a slice will not be used afterwards. In case */
970 /* 'extension is identical to 'stri_dest' changing */
971 /* extension->mem is dangerous since 'extension' */
972 /* could have been released. */
973 } /* if */
974 COUNT_GROW_STRI(new_stri->size, new_size);
975 memcpy(&new_stri->mem[new_stri->size], extension_mem,
976 extension_size * sizeof(strElemType));
977 new_stri->size = new_size;
978 *destination = new_stri;
979 } /* if */
980 #endif
981 } /* if */
982 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*destination)););
983 } /* strAppend */
984
985
986
987 /**
988 * Append an arbitrary number of strings.
989 * strAppendN is used by the compiler to optimize appending
990 * two or more strings.
991 * @param arraySize Number of strings in extensionArray (>= 2).
992 * @exception MEMORY_ERROR Not enough memory for the concatenated
993 * string.
994 */
strAppendN(striType * const destination,const const_striType extensionArray[],memSizeType arraySize)995 void strAppendN (striType *const destination,
996 const const_striType extensionArray[], memSizeType arraySize)
997
998 {
999 striType stri_dest;
1000 memSizeType size_limit;
1001 memSizeType pos;
1002 memSizeType new_size;
1003 const strElemType *old_dest_origin;
1004 const strElemType *old_dest_beyond;
1005 striType new_stri;
1006 strElemType *dest;
1007 memSizeType elem_size;
1008 const strElemType *extension_mem;
1009
1010 /* strAppendN */
1011 logFunction(printf("strAppendN(\"%s\", ",
1012 striAsUnquotedCStri(*destination));
1013 for (pos = 0; pos < arraySize; pos++) {
1014 printf("\"%s\", ",
1015 striAsUnquotedCStri(extensionArray[pos]));
1016 } /* for */
1017 printf(FMT_U_MEM ")", arraySize);
1018 fflush(stdout););
1019 stri_dest = *destination;
1020 size_limit = MAX_STRI_LEN - stri_dest->size;
1021 pos = arraySize;
1022 do {
1023 pos--;
1024 if (unlikely(extensionArray[pos]->size > size_limit)) {
1025 raise_error(MEMORY_ERROR);
1026 return;
1027 } else {
1028 size_limit -= extensionArray[pos]->size;
1029 } /* if */
1030 } while (pos != 0);
1031 new_size = MAX_STRI_LEN - size_limit;
1032 #if WITH_STRI_CAPACITY
1033 if (new_size > stri_dest->capacity) {
1034 if (new_size <= RESIZE_THRESHOLD) {
1035 if (unlikely(!ALLOC_STRI_SIZE_OK(new_stri, new_size))) {
1036 raise_error(MEMORY_ERROR);
1037 } else {
1038 new_stri->size = new_size;
1039 dest = new_stri->mem;
1040 memcpy(dest, stri_dest->mem, stri_dest->size * sizeof(strElemType));
1041 dest += stri_dest->size;
1042 for (pos = 0; pos < arraySize; pos++) {
1043 elem_size = extensionArray[pos]->size;
1044 memcpy(dest, extensionArray[pos]->mem, elem_size * sizeof(strElemType));
1045 dest += elem_size;
1046 } /* for */
1047 FREE_STRI(stri_dest, stri_dest->size);
1048 *destination = new_stri;
1049 } /* if */
1050 } else {
1051 old_dest_origin = GET_DESTINATION_ORIGIN(stri_dest);
1052 old_dest_beyond = GET_DESTINATION_BEYOND(stri_dest);
1053 new_stri = growStri(stri_dest, new_size);
1054 if (unlikely(new_stri == NULL)) {
1055 raise_error(MEMORY_ERROR);
1056 } else {
1057 COUNT_GROW_STRI(new_stri->size, new_size);
1058 *destination = new_stri;
1059 dest = &new_stri->mem[new_stri->size];
1060 for (pos = 0; pos < arraySize; pos++) {
1061 if (unlikely(stri_dest == extensionArray[pos])) {
1062 /* The extension (extensionArray[pos]) is identical */
1063 /* to the 'destination' (it refers to the memory area */
1064 /* of it). The resizing of the 'destination' might */
1065 /* have moved 'new_stri' to a new memory area. */
1066 /* Therefore 'extension_mem' must be corrected after */
1067 /* realloc() enlarged 'new_stri'. */
1068 elem_size = new_stri->size;
1069 extension_mem = new_stri->mem;
1070 /* Correcting extensionArray[pos]->mem is not needed, */
1071 /* since the slice will not be used afterwards. */
1072 /* Changing extensionArray[pos]->mem is dangerous, */
1073 /* since the memory could have been released. */
1074 } else if (unlikely(SLICE_OVERLAPPING2(extensionArray[pos],
1075 old_dest_origin,
1076 old_dest_beyond))) {
1077 /* The extension (extensionArray[pos]) is a slice of */
1078 /* the 'destination' (it refers to the memory area of */
1079 /* it). The resizing of the 'destination' might have */
1080 /* moved 'new_stri' to a new memory area. Therefore */
1081 /* 'extension_mem' must be corrected after realloc() */
1082 /* enlarged 'new_stri'. */
1083 elem_size = extensionArray[pos]->size;
1084 extension_mem =
1085 &new_stri->mem[extensionArray[pos]->mem - old_dest_origin];
1086 /* Correcting extensionArray[pos]->mem is not needed, */
1087 /* since the slice will not be used afterwards. */
1088 } else {
1089 elem_size = extensionArray[pos]->size;
1090 extension_mem = extensionArray[pos]->mem;
1091 } /* if */
1092 memcpy(dest, extension_mem, elem_size * sizeof(strElemType));
1093 dest += elem_size;
1094 } /* for */
1095 new_stri->size = new_size;
1096 } /* if */
1097 } /* if */
1098 } else {
1099 COUNT_GROW2_STRI(stri_dest->size, new_size);
1100 dest = &stri_dest->mem[stri_dest->size];
1101 for (pos = 0; pos < arraySize; pos++) {
1102 elem_size = extensionArray[pos]->size;
1103 memcpy(dest, extensionArray[pos]->mem,
1104 elem_size * sizeof(strElemType));
1105 dest += elem_size;
1106 } /* for */
1107 stri_dest->size = new_size;
1108 } /* if */
1109 #else
1110 old_dest_origin = GET_DESTINATION_ORIGIN(stri_dest);
1111 old_dest_beyond = GET_DESTINATION_BEYOND(stri_dest);
1112 GROW_STRI(new_stri, stri_dest, stri_dest->size, new_size);
1113 if (unlikely(new_stri == NULL)) {
1114 raise_error(MEMORY_ERROR);
1115 } else {
1116 COUNT_GROW_STRI(new_stri->size, new_size);
1117 *destination = new_stri;
1118 dest = &new_stri->mem[new_stri->size];
1119 for (pos = 0; pos < arraySize; pos++) {
1120 elem_size = extensionArray[pos]->size;
1121 if (unlikely(stri_dest == extensionArray[pos])) {
1122 /* The extension (extensionArray[pos]) is identical */
1123 /* to the 'destination' (it refers to the memory area */
1124 /* of it). The resizing of the 'destination' might */
1125 /* have moved 'new_stri' to a new memory area. */
1126 /* Therefore 'extension_mem' must be corrected after */
1127 /* realloc() enlarged 'new_stri'. */
1128 elem_size = new_stri->size;
1129 extension_mem = new_stri->mem;
1130 /* Correcting extensionArray[pos]->mem is not needed, */
1131 /* since then slice will not be used afterwards. */
1132 /* Changing extensionArray[pos]->mem is dangerous, */
1133 /* since the memory could have been released. */
1134 } else if (unlikely(SLICE_OVERLAPPING2(extensionArray[pos],
1135 old_dest_origin,
1136 old_dest_beyond))) {
1137 /* The extension (extensionArray[pos]) is a slice of */
1138 /* the 'destination' (it refers to the memory area of */
1139 /* it). The resizing of the 'destination' might have */
1140 /* moved 'new_stri' to a new memory area. Therefore */
1141 /* 'extension_mem' must be corrected after realloc() */
1142 /* enlarged 'new_stri'. */
1143 elem_size = extensionArray[pos]->size;
1144 extension_mem =
1145 &new_stri->mem[extensionArray[pos]->mem - old_dest_origin];
1146 /* Correcting extensionArray[pos]->mem is not needed, */
1147 /* since the slice will not be used afterwards. */
1148 } else {
1149 elem_size = extensionArray[pos]->size;
1150 extension_mem = extensionArray[pos]->mem;
1151 } /* if */
1152 memcpy(dest, extension_mem, elem_size * sizeof(strElemType));
1153 dest += elem_size;
1154 } /* for */
1155 new_stri->size = new_size;
1156 } /* if */
1157 #endif
1158 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*destination)););
1159 } /* strAppendN */
1160
1161 #else
1162
1163
1164
1165 /**
1166 * Append the string 'extension' to 'destination'.
1167 * @exception MEMORY_ERROR Not enough memory for the concatenated
1168 * string.
1169 */
strAppend(striType * const destination,const_striType extension)1170 void strAppend (striType *const destination, const_striType extension)
1171
1172 {
1173 memSizeType new_size;
1174 striType stri_dest;
1175 striType new_stri;
1176
1177 /* strAppend */
1178 logFunction(printf("strAppend(\"%s\", ", striAsUnquotedCStri(*destination));
1179 printf("\"%s\")", striAsUnquotedCStri(extension));
1180 fflush(stdout););
1181 stri_dest = *destination;
1182 if (unlikely(stri_dest->size > MAX_STRI_LEN - extension->size)) {
1183 /* number of bytes does not fit into memSizeType */
1184 raise_error(MEMORY_ERROR);
1185 } else {
1186 new_size = stri_dest->size + extension->size;
1187 #if WITH_STRI_CAPACITY
1188 if (new_size > stri_dest->capacity) {
1189 new_stri = growStri(stri_dest, new_size);
1190 if (unlikely(new_stri == NULL)) {
1191 raise_error(MEMORY_ERROR);
1192 return;
1193 } else {
1194 if (unlikely(stri_dest == extension)) {
1195 /* It is possible that stri_dest == extension holds. */
1196 /* In this case 'extension' must be corrected */
1197 /* after realloc() enlarged 'stri_dest'. */
1198 extension = new_stri;
1199 } /* if */
1200 stri_dest = new_stri;
1201 *destination = stri_dest;
1202 } /* if */
1203 } /* if */
1204 COUNT_GROW_STRI(stri_dest->size, new_size);
1205 memcpy(&stri_dest->mem[stri_dest->size], extension->mem,
1206 extension->size * sizeof(strElemType));
1207 stri_dest->size = new_size;
1208 #else
1209 GROW_STRI(new_stri, stri_dest, stri_dest->size, new_size);
1210 if (unlikely(new_stri == NULL)) {
1211 raise_error(MEMORY_ERROR);
1212 } else {
1213 if (unlikely(stri_dest == extension)) {
1214 /* It is possible that stri_dest == extension holds. */
1215 /* In this case 'extension' must be corrected */
1216 /* after realloc() enlarged 'stri_dest'. */
1217 extension = new_stri;
1218 } /* if */
1219 COUNT_GROW_STRI(new_stri->size, new_size);
1220 memcpy(&new_stri->mem[new_stri->size], extension->mem,
1221 extension->size * sizeof(strElemType));
1222 new_stri->size = new_size;
1223 *destination = new_stri;
1224 } /* if */
1225 #endif
1226 } /* if */
1227 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*destination)););
1228 } /* strAppend */
1229
1230
1231
1232 /**
1233 * Append an arbitrary number of strings.
1234 * strAppendN is used by the compiler to optimize appending
1235 * two or more strings.
1236 * @param arraySize Number of strings in extensionArray (>= 2).
1237 * @exception MEMORY_ERROR Not enough memory for the concatenated
1238 * string.
1239 */
strAppendN(striType * const destination,const const_striType extensionArray[],memSizeType arraySize)1240 void strAppendN (striType *const destination,
1241 const const_striType extensionArray[], memSizeType arraySize)
1242
1243 {
1244 striType stri_dest;
1245 memSizeType size_limit;
1246 memSizeType pos;
1247 memSizeType new_size;
1248 striType new_stri;
1249 strElemType *dest;
1250 memSizeType elem_size;
1251 const strElemType *extension_mem;
1252
1253 /* strAppendN */
1254 logFunction(printf("strAppendN(\"%s\", ",
1255 striAsUnquotedCStri(*destination));
1256 for (pos = 0; pos < arraySize; pos++) {
1257 printf("\"%s\", ",
1258 striAsUnquotedCStri(extensionArray[pos]));
1259 } /* for */
1260 printf(FMT_U_MEM ")", arraySize);
1261 fflush(stdout););
1262 stri_dest = *destination;
1263 size_limit = MAX_STRI_LEN - stri_dest->size;
1264 pos = arraySize;
1265 do {
1266 pos--;
1267 if (unlikely(extensionArray[pos]->size > size_limit)) {
1268 raise_error(MEMORY_ERROR);
1269 return;
1270 } else {
1271 size_limit -= extensionArray[pos]->size;
1272 } /* if */
1273 } while (pos != 0);
1274 new_size = MAX_STRI_LEN - size_limit;
1275 #if WITH_STRI_CAPACITY
1276 if (new_size > stri_dest->capacity) {
1277 new_stri = growStri(stri_dest, new_size);
1278 if (unlikely(new_stri == NULL)) {
1279 raise_error(MEMORY_ERROR);
1280 } else {
1281 COUNT_GROW_STRI(new_stri->size, new_size);
1282 *destination = new_stri;
1283 dest = &new_stri->mem[new_stri->size];
1284 for (pos = 0; pos < arraySize; pos++) {
1285 if (unlikely(stri_dest == extensionArray[pos])) {
1286 /* It is possible that stri_dest == extension holds. */
1287 /* In this case 'extension' must be corrected */
1288 /* after realloc() enlarged 'stri_dest'. */
1289 elem_size = new_stri->size;
1290 extension_mem = new_stri->mem;
1291 } else {
1292 elem_size = extensionArray[pos]->size;
1293 extension_mem = extensionArray[pos]->mem;
1294 } /* if */
1295 memcpy(dest, extension_mem, elem_size * sizeof(strElemType));
1296 dest += elem_size;
1297 } /* for */
1298 new_stri->size = new_size;
1299 } /* if */
1300 } else {
1301 COUNT_GROW2_STRI(stri_dest->size, new_size);
1302 dest = &stri_dest->mem[stri_dest->size];
1303 for (pos = 0; pos < arraySize; pos++) {
1304 elem_size = extensionArray[pos]->size;
1305 memcpy(dest, extensionArray[pos]->mem,
1306 elem_size * sizeof(strElemType));
1307 dest += elem_size;
1308 } /* for */
1309 stri_dest->size = new_size;
1310 } /* if */
1311 #else
1312 GROW_STRI(new_stri, stri_dest, stri_dest->size, new_size);
1313 if (unlikely(new_stri == NULL)) {
1314 raise_error(MEMORY_ERROR);
1315 } else {
1316 COUNT_GROW_STRI(new_stri->size, new_size);
1317 *destination = new_stri;
1318 dest = &new_stri->mem[new_stri->size];
1319 for (pos = 0; pos < arraySize; pos++) {
1320 if (unlikely(stri_dest == extensionArray[pos])) {
1321 /* It is possible that stri_dest == extension holds. */
1322 /* In this case 'extension' must be corrected */
1323 /* after realloc() enlarged 'stri_dest'. */
1324 elem_size = new_stri->size;
1325 extension_mem = new_stri->mem;
1326 } else {
1327 elem_size = extensionArray[pos]->size;
1328 extension_mem = extensionArray[pos]->mem;
1329 } /* if */
1330 memcpy(dest, extension_mem, elem_size * sizeof(strElemType));
1331 dest += elem_size;
1332 } /* for */
1333 new_stri->size = new_size;
1334 } /* if */
1335 #endif
1336 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*destination)););
1337 } /* strAppendN */
1338
1339 #endif
1340
1341
1342
1343 /**
1344 * Append the string 'extension' to 'destination'.
1345 * StrAppendTemp is used by the compiler if 'extension' is temporary
1346 * value that can be reused.
1347 * @exception MEMORY_ERROR Not enough memory for the concatenated
1348 * string.
1349 */
strAppendTemp(striType * const destination,const striType extension)1350 void strAppendTemp (striType *const destination, const striType extension)
1351
1352 {
1353 memSizeType new_size;
1354 striType stri_dest;
1355
1356 /* strAppendTemp */
1357 logFunction(printf("strAppendTemp(\"%s\", ", striAsUnquotedCStri(*destination));
1358 printf("\"%s\")", striAsUnquotedCStri(extension));
1359 fflush(stdout););
1360 stri_dest = *destination;
1361 if (unlikely(stri_dest->size > MAX_STRI_LEN - extension->size)) {
1362 /* number of bytes does not fit into memSizeType */
1363 raise_error(MEMORY_ERROR);
1364 } else {
1365 new_size = stri_dest->size + extension->size;
1366 #if WITH_STRI_CAPACITY
1367 if (new_size <= stri_dest->capacity) {
1368 COUNT_GROW2_STRI(stri_dest->size, new_size);
1369 memcpy(&stri_dest->mem[stri_dest->size], extension->mem,
1370 extension->size * sizeof(strElemType));
1371 stri_dest->size = new_size;
1372 FREE_STRI(extension, extension->size);
1373 } else if (new_size <= extension->capacity) {
1374 if (stri_dest->size != 0) {
1375 COUNT_GROW2_STRI(extension->size, new_size);
1376 memmove(&extension->mem[stri_dest->size], extension->mem,
1377 extension->size * sizeof(strElemType));
1378 memcpy(extension->mem, stri_dest->mem,
1379 stri_dest->size * sizeof(strElemType));
1380 extension->size = new_size;
1381 } /* if */
1382 *destination = extension;
1383 FREE_STRI(stri_dest, stri_dest->size);
1384 } else {
1385 stri_dest = growStri(stri_dest, new_size);
1386 if (unlikely(stri_dest == NULL)) {
1387 FREE_STRI(extension, extension->size);
1388 raise_error(MEMORY_ERROR);
1389 } else {
1390 *destination = stri_dest;
1391 COUNT_GROW_STRI(stri_dest->size, new_size);
1392 memcpy(&stri_dest->mem[stri_dest->size], extension->mem,
1393 extension->size * sizeof(strElemType));
1394 stri_dest->size = new_size;
1395 FREE_STRI(extension, extension->size);
1396 } /* if */
1397 } /* if */
1398 #else
1399 GROW_STRI(stri_dest, stri_dest, stri_dest->size, new_size);
1400 if (unlikely(stri_dest == NULL)) {
1401 FREE_STRI(extension, extension->size);
1402 raise_error(MEMORY_ERROR);
1403 } else {
1404 *destination = stri_dest;
1405 COUNT_GROW_STRI(stri_dest->size, new_size);
1406 memcpy(&stri_dest->mem[stri_dest->size], extension->mem,
1407 extension->size * sizeof(strElemType));
1408 stri_dest->size = new_size;
1409 FREE_STRI(extension, extension->size);
1410 } /* if */
1411 #endif
1412 } /* if */
1413 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*destination)););
1414 } /* strAppendTemp */
1415
1416
1417
1418 /**
1419 * Replace all occurrences of char 'searched' in 'mainStri' by 'replacement'.
1420 * @return the result of the replacement.
1421 */
strChChRepl(const const_striType mainStri,const charType searched,const charType replacement)1422 striType strChChRepl (const const_striType mainStri,
1423 const charType searched, const charType replacement)
1424
1425 {
1426 memSizeType main_size;
1427 memSizeType pos;
1428 strElemType ch;
1429 striType result;
1430
1431 /* strChChRepl */
1432 main_size = mainStri->size;
1433 if (unlikely(!ALLOC_STRI_SIZE_OK(result, main_size))) {
1434 raise_error(MEMORY_ERROR);
1435 } else {
1436 result->size = main_size;
1437 for (pos = 0; pos < main_size; pos++) {
1438 ch = mainStri->mem[pos];
1439 if (ch == searched) {
1440 ch = replacement;
1441 } /* if */
1442 result->mem[pos] = ch;
1443 } /* for */
1444 } /* if */
1445 return result;
1446 } /* strChRepl */
1447
1448
1449
1450 #ifdef OUT_OF_ORDER
strChEscSplit(const const_striType mainStri,const charType delimiter,const charType escape)1451 rtlArrayType strChEscSplit (const const_striType mainStri, const charType delimiter,
1452 const charType escape)
1453
1454 {
1455 intType used_max_position;
1456 const strElemType *search_start;
1457 const strElemType *search_end;
1458 const strElemType *curr_pos;
1459 const strElemType *found_pos;
1460 striType curr_stri;
1461 const strElemType *stri_pos;
1462 memSizeType pos;
1463 rtlArrayType resized_result_array;
1464 rtlArrayType result_array;
1465
1466 /* strChEscSplit */
1467 if (unlikely(delimiter == escape)) {
1468 raise_error(RANGE_ERROR);
1469 } else {
1470 if (ALLOC_RTL_ARRAY(result_array, INITIAL_ARRAY_SIZE)) {
1471 result_array->min_position = 1;
1472 result_array->max_position = INITIAL_ARRAY_SIZE;
1473 used_max_position = 0;
1474 search_start = mainStri->mem;
1475 search_end = &mainStri->mem[mainStri->size];
1476 old_pos = search_start;
1477 curr_pos = search_start;
1478 while (curr_pos != search_end && result_array != NULL) {
1479 while (curr_pos != search_end && *curr_pos != delimiter) {
1480 while (curr_pos != search_end && *curr_pos != delimiter && *curr_pos != escape) {
1481 curr_pos++;
1482 } /* while */
1483 memcpy(stri_pos, old_pos, (memSizeType) (curr_pos - old_pos));
1484 stri_pos += curr_pos - old_pos;
1485 if (curr_pos != search_end && *curr_pos == escape) {
1486 curr_pos++;
1487 if (curr_pos != search_end) {
1488 *stri_pos = *curr_pos;
1489 stri_pos++;
1490 } /* if */
1491 } /* if */
1492 } /* while */
1493 result_array = addCopiedStriToRtlArray(search_start,
1494 (memSizeType) (found_pos - search_start), result_array,
1495 used_max_position);
1496 used_max_position++;
1497 search_start = found_pos + 1;
1498
1499 if (result_array != NULL) {
1500 result_array = addCopiedStriToRtlArray(search_start,
1501 (memSizeType) (search_end - search_start), result_array,
1502 used_max_position);
1503 used_max_position++;
1504 result_array = completeRtlStriArray(result_array, used_max_position);
1505 } /* if */
1506 } /* if */
1507 if (unlikely(result_array == NULL)) {
1508 raise_error(MEMORY_ERROR);
1509 } /* if */
1510 return result_array;
1511 } /* strChEscSplit */
1512 #endif
1513
1514
1515
1516 /**
1517 * Search char 'searched' in 'mainStri' at or after 'fromIndex'.
1518 * The search starts at 'fromIndex' and proceeds to the right.
1519 * The first character in a string has the position 1.
1520 * @return the position of 'searched' or 0 if 'mainStri'
1521 * does not contain 'searched' at or after 'fromIndex'.
1522 * @exception RANGE_ERROR 'fromIndex' <= 0 holds.
1523 */
1524 intType strChIPos (const const_striType mainStri, const charType searched,
1525 const intType fromIndex)
1526
1527 {
1528 uintType startIndex;
1529 const strElemType *main_mem;
1530 const strElemType *found_pos;
1531
1532 /* strChIPos */
1533 logFunction(printf("strChIPos(\"%s\", '\\" FMT_U32 ";', " FMT_D ")\n",
1534 striAsUnquotedCStri(mainStri), searched, fromIndex););
1535 startIndex = ((uintType) fromIndex) - 1;
1536 if (startIndex < mainStri->size) {
1537 main_mem = mainStri->mem;
1538 found_pos = memchr_strelem(&main_mem[startIndex], searched,
1539 mainStri->size - (memSizeType) startIndex);
1540 if (found_pos != NULL) {
1541 return ((intType) (found_pos - main_mem)) + 1;
1542 } /* if */
1543 } else if (unlikely(fromIndex <= 0)) {
1544 logError(printf("strChIPos(\"%s\", '\\" FMT_U32 ";', " FMT_D "): "
1545 "fromIndex <= 0.\n",
1546 striAsUnquotedCStri(mainStri), searched, fromIndex););
1547 raise_error(RANGE_ERROR);
1548 } /* if */
1549 return 0;
1550 } /* strChIPos */
1551
1552
1553
1554 /**
1555 * String multiplication of the character 'ch'.
1556 * The character 'ch' is concatenated to itself such that in total
1557 * 'factor' characters are concatenated.
1558 * @return the result of the string multiplication.
1559 * @exception RANGE_ERROR If the factor is negative.
1560 */
1561 striType strChMult (const charType ch, const intType factor)
1562
1563 {
1564 striType result;
1565
1566 /* strChMult */
1567 logFunction(printf("strChMult('\\" FMT_U32 ";', " FMT_D ")\n",
1568 ch, factor););
1569 if (unlikely(factor < 0)) {
1570 logError(printf("strChMult('\\" FMT_U32 ";', " FMT_D "): "
1571 "Negative factor.\n",
1572 ch, factor););
1573 raise_error(RANGE_ERROR);
1574 result = NULL;
1575 } else {
1576 if (unlikely((uintType) factor > MAX_STRI_LEN ||
1577 !ALLOC_STRI_SIZE_OK(result, (memSizeType) factor))) {
1578 raise_error(MEMORY_ERROR);
1579 result = NULL;
1580 } else {
1581 result->size = (memSizeType) factor;
1582 memset_to_strelem(result->mem, ch, (memSizeType) factor);
1583 } /* if */
1584 } /* if */
1585 return result;
1586 } /* strChMult */
1587
1588
1589
1590 /**
1591 * Determine leftmost position of char 'searched' in 'mainStri'.
1592 * The first character in a string has the position 1.
1593 * @return the position of 'searched' or 0 if 'mainStri'
1594 * does not contain 'searched'.
1595 */
1596 intType strChPos (const const_striType mainStri, const charType searched)
1597
1598 {
1599 const strElemType *main_mem;
1600 const strElemType *found_pos;
1601
1602 /* strChPos */
1603 logFunction(printf("strChPos(\"%s\", '\\" FMT_U32 ";')\n",
1604 striAsUnquotedCStri(mainStri), searched););
1605 if (mainStri->size >= 1) {
1606 main_mem = mainStri->mem;
1607 found_pos = memchr_strelem(main_mem, searched, mainStri->size);
1608 if (found_pos != NULL) {
1609 return ((intType) (found_pos - main_mem)) + 1;
1610 } /* if */
1611 } /* if */
1612 return 0;
1613 } /* strChPos */
1614
1615
1616
1617 /**
1618 * Replace all occurrences of char 'searched' in 'mainStri' by 'replacement'.
1619 * @return the result of the replacement.
1620 */
1621 striType strChRepl (const const_striType mainStri,
1622 const charType searched, const const_striType replacement)
1623
1624 {
1625 memSizeType main_size;
1626 memSizeType guessed_result_size;
1627 memSizeType result_size;
1628 const strElemType *main_mem;
1629 const strElemType *search_start;
1630 const strElemType *search_end;
1631 const strElemType *copy_start;
1632 strElemType *result_end;
1633 striType resized_result;
1634 striType result;
1635
1636 /* strChRepl */
1637 main_size = mainStri->size;
1638 /* printf("main_size=" FMT_U_MEM ", replacement->size=" FMT_U_MEM "\n",
1639 main_size, replacement->size); */
1640 if (replacement->size > 1) {
1641 if (unlikely(main_size > MAX_STRI_LEN / replacement->size)) {
1642 raise_error(MEMORY_ERROR);
1643 return NULL;
1644 } else {
1645 guessed_result_size = main_size * replacement->size;
1646 } /* if */
1647 } else {
1648 guessed_result_size = main_size;
1649 } /* if */
1650 if (unlikely(!ALLOC_STRI_SIZE_OK(result, guessed_result_size))) {
1651 raise_error(MEMORY_ERROR);
1652 } else {
1653 copy_start = mainStri->mem;
1654 result_end = result->mem;
1655 if (main_size != 0) {
1656 main_mem = mainStri->mem;
1657 search_start = main_mem;
1658 search_end = &main_mem[main_size];
1659 while (search_start < search_end &&
1660 (search_start = memchr_strelem(search_start, searched,
1661 (memSizeType) (search_end - search_start))) != NULL) {
1662 memcpy(result_end, copy_start,
1663 (memSizeType) (search_start - copy_start) * sizeof(strElemType));
1664 result_end += search_start - copy_start;
1665 memcpy(result_end, replacement->mem,
1666 replacement->size * sizeof(strElemType));
1667 result_end += replacement->size;
1668 search_start++;
1669 copy_start = search_start;
1670 } /* while */
1671 } /* if */
1672 memcpy(result_end, copy_start,
1673 (memSizeType) (&mainStri->mem[main_size] - copy_start) * sizeof(strElemType));
1674 result_end += &mainStri->mem[main_size] - copy_start;
1675 result_size = (memSizeType) (result_end - result->mem);
1676 /* printf("result=%lu, guessed_result_size=%ld, result_size=%ld\n",
1677 result, guessed_result_size, result_size); */
1678 REALLOC_STRI_SIZE_SMALLER(resized_result, result, guessed_result_size, result_size);
1679 if (unlikely(resized_result == NULL)) {
1680 FREE_STRI(result, guessed_result_size);
1681 raise_error(MEMORY_ERROR);
1682 result = NULL;
1683 } else {
1684 result = resized_result;
1685 COUNT3_STRI(guessed_result_size, result_size);
1686 result->size = result_size;
1687 } /* if */
1688 } /* if */
1689 return result;
1690 } /* strChRepl */
1691
1692
1693
1694 /**
1695 * Split 'mainStri' around matches of 'delimiter' into an array of strings.
1696 * The array returned by strSplit() contains each substring of
1697 * 'mainStri' that is terminated by another substring that is equal
1698 * to the 'delimiter' or is terminated by the end of 'mainStri'.
1699 * The substrings in the array are in the order in which they occur in
1700 * 'mainStri'. If 'delimiter' does not match any part of 'mainStri'
1701 * then the resulting array has just one element, namely 'mainStri'.
1702 * split("", ':') returns []("")
1703 * split("x", ':') returns []("x")
1704 * split(":", ':') returns []("", "")
1705 * split("x:", ':') returns []("x", "")
1706 * split(":x", ':') returns []("", "x")
1707 * split("15:30", ':') returns []("15", "30")
1708 * @return the array of strings computed by splitting 'mainStri' around
1709 * matches of the given 'delimiter'.
1710 * @exception MEMORY_ERROR Not enough memory to represent the result.
1711 */
1712 rtlArrayType strChSplit (const const_striType mainStri, const charType delimiter)
1713
1714 {
1715 intType used_max_position;
1716 const strElemType *search_start;
1717 const strElemType *search_end;
1718 const strElemType *found_pos;
1719 rtlArrayType result_array;
1720
1721 /* strChSplit */
1722 logFunction(printf("strChSplit(\"%s\", '\\" FMT_U32 ";')\n",
1723 striAsUnquotedCStri(mainStri), delimiter););
1724 if (likely(ALLOC_RTL_ARRAY(result_array, INITIAL_ARRAY_SIZE))) {
1725 result_array->min_position = 1;
1726 result_array->max_position = INITIAL_ARRAY_SIZE;
1727 used_max_position = 0;
1728 search_start = mainStri->mem;
1729 search_end = &mainStri->mem[mainStri->size];
1730 while ((found_pos = memchr_strelem(search_start, delimiter,
1731 (memSizeType) (search_end - search_start))) != NULL &&
1732 result_array != NULL) {
1733 result_array = addCopiedStriToRtlArray(search_start,
1734 (memSizeType) (found_pos - search_start), result_array,
1735 used_max_position);
1736 used_max_position++;
1737 search_start = found_pos + 1;
1738 } /* while */
1739 if (likely(result_array != NULL)) {
1740 result_array = addCopiedStriToRtlArray(search_start,
1741 (memSizeType) (search_end - search_start), result_array,
1742 used_max_position);
1743 used_max_position++;
1744 result_array = completeRtlStriArray(result_array, used_max_position);
1745 } /* if */
1746 } /* if */
1747 if (unlikely(result_array == NULL)) {
1748 raise_error(MEMORY_ERROR);
1749 } /* if */
1750 logFunction(printf("strChSplit -->\n"););
1751 return result_array;
1752 } /* strChSplit */
1753
1754
1755
1756 striType strCLit (const const_striType stri)
1757
1758 {
1759 /* A string literal starts and ends with double quotes ("): */
1760 const memSizeType numOfQuotes = 2;
1761 /* Maximum escape sequence length in C string literal: */
1762 const memSizeType escSequenceMax = STRLEN("\\255");
1763 register strElemType character;
1764 register memSizeType position;
1765 memSizeType striSize;
1766 memSizeType pos;
1767 striType resized_literal;
1768 striType literal;
1769
1770 /* strCLit */
1771 logFunction(printf("strCLit(\"%s\")\n", striAsUnquotedCStri(stri)););
1772 striSize = stri->size;
1773 if (unlikely(striSize > (MAX_STRI_LEN - numOfQuotes) / escSequenceMax ||
1774 !ALLOC_STRI_SIZE_OK(literal, escSequenceMax * striSize + numOfQuotes))) {
1775 raise_error(MEMORY_ERROR);
1776 literal = NULL;
1777 } else {
1778 literal->mem[0] = (strElemType) '"';
1779 pos = 1;
1780 for (position = 0; position < striSize; position++) {
1781 character = stri->mem[position];
1782 /* The following comparisons use int literals like 255 */
1783 /* instead of char literals like '\377'. If char literals */
1784 /* are signed (this is C implementation dependent) the */
1785 /* integral promotion (conversion to int) triggers a sign */
1786 /* extension. In this case the sign extension of '\377' */
1787 /* leads to the int value -1 instead of the desired 255. */
1788 if (character < 127) {
1789 if (character < ' ') {
1790 literal->mem[pos] = (strElemType) '\\';
1791 if (cstri_escape_sequence[character][1] == '0') {
1792 /* Always write three octal digits to avoid errors if */
1793 /* the octal representation is followed by a digit. */
1794 literal->mem[pos + 1] = (strElemType) '0';
1795 /* Write the character as two octal digits. */
1796 /* This code is much faster than sprintf(). */
1797 literal->mem[pos + 2] = (strElemType) ((character >> 3 & 0x7) + '0');
1798 literal->mem[pos + 3] = (strElemType) ((character & 0x7) + '0');
1799 pos += 4;
1800 } else {
1801 literal->mem[pos + 1] = (strElemType) cstri_escape_sequence[character][1];
1802 pos += 2;
1803 } /* if */
1804 #if TRIGRAPH_SEQUENCES_ARE_REPLACED
1805 } else if (character == '\\' || character == '\"' ||
1806 (character == '?' && position >= 1 && stri->mem[position - 1] == '?')) {
1807 #else
1808 } else if (character == '\\' || character == '\"') {
1809 #endif
1810 literal->mem[pos] = (strElemType) '\\';
1811 literal->mem[pos + 1] = character;
1812 pos += 2;
1813 } else {
1814 literal->mem[pos] = character;
1815 pos++;
1816 } /* if */
1817 } else if (character < 256) {
1818 literal->mem[pos] = (strElemType) '\\';
1819 /* Write the character as three octal digits. */
1820 /* This code is much faster than sprintf(). */
1821 literal->mem[pos + 1] = (strElemType) ((character >> 6 & 0x7) + '0');
1822 literal->mem[pos + 2] = (strElemType) ((character >> 3 & 0x7) + '0');
1823 literal->mem[pos + 3] = (strElemType) ((character & 0x7) + '0');
1824 pos += 4;
1825 } else {
1826 FREE_STRI(literal, escSequenceMax * striSize + numOfQuotes);
1827 logError(printf("strCLit(\"%s\"): Character > '\\255;' found.\n",
1828 striAsUnquotedCStri(stri)););
1829 raise_error(RANGE_ERROR);
1830 return NULL;
1831 } /* if */
1832 } /* for */
1833 literal->mem[pos] = (strElemType) '"';
1834 pos++;
1835 literal->size = pos;
1836 REALLOC_STRI_SIZE_SMALLER(resized_literal, literal,
1837 escSequenceMax * striSize + numOfQuotes, pos);
1838 if (unlikely(resized_literal == NULL)) {
1839 FREE_STRI(literal, escSequenceMax * striSize + numOfQuotes);
1840 raise_error(MEMORY_ERROR);
1841 literal = NULL;
1842 } else {
1843 literal = resized_literal;
1844 COUNT3_STRI(escSequenceMax * striSize + numOfQuotes, pos);
1845 } /* if */
1846 } /* if */
1847 return literal;
1848 } /* strCLit */
1849
1850
1851
1852 /**
1853 * Compare two strings.
1854 * @return -1, 0 or 1 if the first argument is considered to be
1855 * respectively less than, equal to, or greater than the
1856 * second.
1857 */
1858 intType strCompare (const const_striType stri1, const const_striType stri2)
1859
1860 {
1861 intType signumValue;
1862
1863 /* strCompare */
1864 #if !HAS_WMEMCMP || WCHAR_T_SIZE != 32 || WMEMCMP_RETURNS_SIGNUM
1865 if (stri1->size < stri2->size) {
1866 signumValue = memcmp_strelem(stri1->mem, stri2->mem, stri1->size);
1867 if (signumValue == 0) {
1868 signumValue = -1;
1869 } /* if */
1870 } else {
1871 signumValue = memcmp_strelem(stri1->mem, stri2->mem, stri2->size);
1872 if (signumValue == 0 && stri1->size > stri2->size) {
1873 signumValue = 1;
1874 } /* if */
1875 } /* if */
1876 #else
1877 if (stri1->size < stri2->size) {
1878 if (memcmp_strelem(stri1->mem, stri2->mem, stri1->size) <= 0) {
1879 signumValue = -1;
1880 } else {
1881 signumValue = 1;
1882 } /* if */
1883 } else {
1884 signumValue = memcmp_strelem(stri1->mem, stri2->mem, stri2->size);
1885 if (signumValue == 0) {
1886 if (stri1->size > stri2->size) {
1887 signumValue = 1;
1888 } /* if */
1889 } else if (signumValue > 0) {
1890 signumValue = 1;
1891 } else {
1892 signumValue = -1;
1893 } /* if */
1894 } /* if */
1895 #endif
1896 return signumValue;
1897 } /* strCompare */
1898
1899
1900
1901 /**
1902 * Reinterpret the generic parameters as striType and call strCompare.
1903 * Function pointers in C programs generated by the Seed7 compiler
1904 * may point to this function. This assures correct behaviour even
1905 * if sizeof(genericType) != sizeof(striType).
1906 * @return -1, 0 or 1 if the first argument is considered to be
1907 * respectively less than, equal to, or greater than the
1908 * second.
1909 */
1910 intType strCmpGeneric (const genericType value1, const genericType value2)
1911
1912 { /* strCmpGeneric */
1913 return strCompare(((const_rtlObjectType *) &value1)->value.striValue,
1914 ((const_rtlObjectType *) &value2)->value.striValue);
1915 } /* strCmpGeneric */
1916
1917
1918
1919 /**
1920 * Concatenate two strings.
1921 * @return the result of the concatenation.
1922 */
1923 striType strConcat (const const_striType stri1, const const_striType stri2)
1924
1925 {
1926 memSizeType result_size;
1927 striType result;
1928
1929 /* strConcat */
1930 logFunction(printf("strConcat(\"%s\", ", striAsUnquotedCStri(stri1));
1931 printf("\"%s\")", striAsUnquotedCStri(stri2));
1932 fflush(stdout););
1933 if (unlikely(stri1->size > MAX_STRI_LEN - stri2->size)) {
1934 /* number of bytes does not fit into memSizeType */
1935 raise_error(MEMORY_ERROR);
1936 result = NULL;
1937 } else {
1938 result_size = stri1->size + stri2->size;
1939 if (unlikely(!ALLOC_STRI_SIZE_OK(result, result_size))) {
1940 raise_error(MEMORY_ERROR);
1941 } else {
1942 result->size = result_size;
1943 memcpy(result->mem, stri1->mem,
1944 stri1->size * sizeof(strElemType));
1945 memcpy(&result->mem[stri1->size], stri2->mem,
1946 stri2->size * sizeof(strElemType));
1947 } /* if */
1948 } /* if */
1949 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
1950 return result;
1951 } /* strConcat */
1952
1953
1954
1955 /**
1956 * Concatenate a character to a string.
1957 * @return the result of the concatenation.
1958 */
1959 striType strConcatChar (const const_striType stri1, const charType aChar)
1960
1961 {
1962 memSizeType result_size;
1963 striType result;
1964
1965 /* strConcatChar */
1966 logFunction(printf("strConcatChar(\"%s\", '\\" FMT_U32 ";')",
1967 striAsUnquotedCStri(stri1), aChar);
1968 fflush(stdout););
1969 if (unlikely(stri1->size > MAX_STRI_LEN - 1)) {
1970 /* number of bytes does not fit into memSizeType */
1971 raise_error(MEMORY_ERROR);
1972 result = NULL;
1973 } else {
1974 result_size = stri1->size + 1;
1975 if (unlikely(!ALLOC_STRI_SIZE_OK(result, result_size))) {
1976 raise_error(MEMORY_ERROR);
1977 } else {
1978 result->size = result_size;
1979 memcpy(result->mem, stri1->mem,
1980 stri1->size * sizeof(strElemType));
1981 result->mem[stri1->size] = aChar;
1982 } /* if */
1983 } /* if */
1984 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
1985 return result;
1986 } /* strConcatChar */
1987
1988
1989
1990 /**
1991 * Concatenate a character to a string.
1992 * The parameter 'stri1' is resized and aChar is copied to the
1993 * enlarged area of 'stri1'. StrConcatCharTemp is used by the compiler
1994 * if 'stri1' is temporary value that can be reused.
1995 * @return the resized parameter 'stri1.
1996 */
1997 striType strConcatCharTemp (striType stri1, const charType aChar)
1998
1999 {
2000 memSizeType result_size;
2001 striType resized_stri1;
2002
2003 /* strConcatCharTemp */
2004 logFunction(printf("strConcatCharTemp(\"%s\", '\\" FMT_U32 ";')",
2005 striAsUnquotedCStri(stri1), aChar);
2006 fflush(stdout););
2007 if (unlikely(stri1->size > MAX_STRI_LEN - 1)) {
2008 /* number of bytes does not fit into memSizeType */
2009 FREE_STRI(stri1, stri1->size);
2010 raise_error(MEMORY_ERROR);
2011 stri1 = NULL;
2012 } else {
2013 result_size = stri1->size + 1;
2014 #if WITH_STRI_CAPACITY
2015 if (result_size > stri1->capacity) {
2016 resized_stri1 = growStri(stri1, result_size);
2017 if (unlikely(resized_stri1 == NULL)) {
2018 FREE_STRI(stri1, stri1->size);
2019 raise_error(MEMORY_ERROR);
2020 return NULL;
2021 } else {
2022 stri1 = resized_stri1;
2023 } /* if */
2024 } /* if */
2025 COUNT_GROW_STRI(stri1->size, result_size);
2026 stri1->mem[stri1->size] = aChar;
2027 stri1->size = result_size;
2028 #else
2029 GROW_STRI(resized_stri1, stri1, stri1->size, result_size);
2030 if (unlikely(resized_stri1 == NULL)) {
2031 FREE_STRI(stri1, stri1->size);
2032 raise_error(MEMORY_ERROR);
2033 stri1 = NULL;
2034 } else {
2035 stri1 = resized_stri1;
2036 COUNT_GROW_STRI(stri1->size, result_size);
2037 stri1->mem[stri1->size] = aChar;
2038 stri1->size = result_size;
2039 } /* if */
2040 #endif
2041 } /* if */
2042 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(stri1)););
2043 return stri1;
2044 } /* strConcatCharTemp */
2045
2046
2047
2048 /**
2049 * Concatenate an arbitrary number of strings.
2050 * StrConcatN is used by the compiler to optimize the concatenation of
2051 * three or more strings.
2052 * @param arraySize Number of strings in striArray (>= 3).
2053 * @return the result of the concatenation.
2054 */
2055 striType strConcatN (const const_striType striArray[], memSizeType arraySize)
2056
2057 {
2058 memSizeType pos;
2059 memSizeType result_size;
2060 memSizeType size_limit = MAX_STRI_LEN;
2061 memSizeType elem_size;
2062 strElemType *dest;
2063 striType result;
2064
2065 /* strConcatN */
2066 logFunction(printf("strConcatN(");
2067 for (pos = 0; pos < arraySize; pos++) {
2068 printf("\"%s\", ",
2069 striAsUnquotedCStri(striArray[pos]));
2070 } /* if */
2071 printf(FMT_U_MEM ")", arraySize);
2072 fflush(stdout););
2073 pos = arraySize;
2074 do {
2075 pos--;
2076 if (unlikely(striArray[pos]->size > size_limit)) {
2077 raise_error(MEMORY_ERROR);
2078 return NULL;
2079 } else {
2080 size_limit -= striArray[pos]->size;
2081 } /* if */
2082 } while (pos != 0);
2083 result_size = MAX_STRI_LEN - size_limit;
2084 /* printf("result_size=" FMT_U_MEM "\n", result_size); */
2085 if (unlikely(!ALLOC_STRI_SIZE_OK(result, result_size))) {
2086 raise_error(MEMORY_ERROR);
2087 } else {
2088 result->size = result_size;
2089 dest = result->mem;
2090 for (pos = 0; pos < arraySize; pos++) {
2091 elem_size = striArray[pos]->size;
2092 memcpy(dest, striArray[pos]->mem, elem_size * sizeof(strElemType));
2093 dest += elem_size;
2094 } /* for */
2095 } /* if */
2096 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
2097 return result;
2098 } /* strConcatN */
2099
2100
2101
2102 /**
2103 * Concatenate two strings.
2104 * The parameter 'stri1' is resized and 'stri2' is copied to the
2105 * enlarged area of 'stri1'. StrConcatTemp is used by the compiler
2106 * if 'stri1' is temporary value that can be reused.
2107 * @return the resized parameter 'stri1.
2108 */
2109 striType strConcatTemp (striType stri1, const const_striType stri2)
2110
2111 {
2112 memSizeType result_size;
2113 striType resized_stri1;
2114
2115 /* strConcatTemp */
2116 logFunction(printf("strConcatTemp(\"%s\", ", striAsUnquotedCStri(stri1));
2117 printf("\"%s\")", striAsUnquotedCStri(stri2));
2118 fflush(stdout););
2119 if (unlikely(stri1->size > MAX_STRI_LEN - stri2->size)) {
2120 /* number of bytes does not fit into memSizeType */
2121 FREE_STRI(stri1, stri1->size);
2122 raise_error(MEMORY_ERROR);
2123 stri1 = NULL;
2124 } else {
2125 result_size = stri1->size + stri2->size;
2126 #if WITH_STRI_CAPACITY
2127 if (result_size > stri1->capacity) {
2128 /* Because 'stri1' is a temporary string it cannot happen */
2129 /* that 'stri2' is identical to 'stri1' or a slice of it. */
2130 resized_stri1 = growStri(stri1, result_size);
2131 if (unlikely(resized_stri1 == NULL)) {
2132 FREE_STRI(stri1, stri1->size);
2133 raise_error(MEMORY_ERROR);
2134 return NULL;
2135 } else {
2136 stri1 = resized_stri1;
2137 } /* if */
2138 } /* if */
2139 COUNT_GROW_STRI(stri1->size, result_size);
2140 memcpy(&stri1->mem[stri1->size], stri2->mem,
2141 stri2->size * sizeof(strElemType));
2142 stri1->size = result_size;
2143 #else
2144 /* Because 'stri1' is a temporary string it cannot happen */
2145 /* that 'stri2' is identical to 'stri1' or a slice of it. */
2146 GROW_STRI(resized_stri1, stri1, stri1->size, result_size);
2147 if (unlikely(resized_stri1 == NULL)) {
2148 FREE_STRI(stri1, stri1->size);
2149 raise_error(MEMORY_ERROR);
2150 stri1 = NULL;
2151 } else {
2152 stri1 = resized_stri1;
2153 COUNT_GROW_STRI(stri1->size, result_size);
2154 memcpy(&stri1->mem[stri1->size], stri2->mem,
2155 stri2->size * sizeof(strElemType));
2156 stri1->size = result_size;
2157 } /* if */
2158 #endif
2159 } /* if */
2160 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(stri1)););
2161 return stri1;
2162 } /* strConcatTemp */
2163
2164
2165
2166 /**
2167 * Assign source to *dest.
2168 * A copy function assumes that *dest contains a legal value.
2169 * @exception MEMORY_ERROR Not enough memory to create dest.
2170 */
2171 void strCopy (striType *const dest, const const_striType source)
2172
2173 {
2174 memSizeType new_size;
2175 striType stri_dest;
2176
2177 /* strCopy */
2178 logFunction(printf("strCopy(\"%s\", ", striAsUnquotedCStri(*dest));
2179 printf("\"%s\")", striAsUnquotedCStri(source));
2180 fflush(stdout););
2181 stri_dest = *dest;
2182 new_size = source->size;
2183 if (stri_dest->size == new_size) {
2184 /* It is possible that stri_dest and source overlap. */
2185 memmove(stri_dest->mem, source->mem,
2186 new_size * sizeof(strElemType));
2187 } else {
2188 #if WITH_STRI_CAPACITY
2189 if (stri_dest->capacity >= new_size && !SHRINK_REASON(stri_dest, new_size)) {
2190 COUNT_GROW2_STRI(stri_dest->size, new_size);
2191 stri_dest->size = new_size;
2192 /* It is possible that stri_dest and source overlap. */
2193 memmove(stri_dest->mem, source->mem,
2194 new_size * sizeof(strElemType));
2195 #else
2196 if (stri_dest->size > new_size) {
2197 /* It is possible that stri_dest and source overlap. */
2198 /* The move must be done before the shrink, to avoid */
2199 /* accessing non-existing data. */
2200 memmove(stri_dest->mem, source->mem,
2201 new_size * sizeof(strElemType));
2202 SHRINK_STRI(stri_dest, stri_dest, stri_dest->size, new_size);
2203 if (unlikely(stri_dest == NULL)) {
2204 raise_error(MEMORY_ERROR);
2205 return;
2206 } else {
2207 COUNT_SHRINK_STRI(stri_dest->size, new_size);
2208 stri_dest->size = new_size;
2209 *dest = stri_dest;
2210 } /* if */
2211 #endif
2212 } else {
2213 if (unlikely(!ALLOC_STRI_SIZE_OK(stri_dest, new_size))) {
2214 raise_error(MEMORY_ERROR);
2215 return;
2216 } else {
2217 stri_dest->size = new_size;
2218 memcpy(stri_dest->mem, source->mem,
2219 new_size * sizeof(strElemType));
2220 FREE_STRI(*dest, (*dest)->size);
2221 *dest = stri_dest;
2222 } /* if */
2223 } /* if */
2224 } /* if */
2225 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*dest)););
2226 } /* strCopy */
2227
2228
2229
2230 /**
2231 * Reinterpret the generic parameters as striType and call strCopy.
2232 * Function pointers in C programs generated by the Seed7 compiler
2233 * may point to this function. This assures correct behaviour even
2234 * if sizeof(genericType) != sizeof(striType).
2235 */
2236 void strCpyGeneric (genericType *const dest, const genericType source)
2237
2238 { /* strCpyGeneric */
2239 strCopy(&((rtlObjectType *) dest)->value.striValue,
2240 ((const_rtlObjectType *) &source)->value.striValue);
2241 } /* strCpyGeneric */
2242
2243
2244
2245 /**
2246 * Return a copy of source, that can be assigned to a new destination.
2247 * It is assumed that the destination of the assignment is undefined.
2248 * Create functions can be used to initialize Seed7 constants.
2249 * @return a copy of source.
2250 * @exception MEMORY_ERROR Not enough memory to represent the result.
2251 */
2252 striType strCreate (const const_striType source)
2253
2254 {
2255 memSizeType new_size;
2256 striType result;
2257
2258 /* strCreate */
2259 logFunction(printf("strCreate(\"%s\")", striAsUnquotedCStri(source));
2260 fflush(stdout););
2261 new_size = source->size;
2262 if (unlikely(!ALLOC_STRI_SIZE_OK(result, new_size))) {
2263 raise_error(MEMORY_ERROR);
2264 } else {
2265 result->size = new_size;
2266 if (new_size != 0) {
2267 memcpy(result->mem, source->mem, new_size * sizeof(strElemType));
2268 } /* if */
2269 } /* if */
2270 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
2271 return result;
2272 } /* strCreate */
2273
2274
2275
2276 /**
2277 * Generic Create function to be used via function pointers.
2278 * Function pointers in C programs generated by the Seed7 compiler
2279 * may point to this function. This assures correct behaviour even
2280 * if sizeof(genericType) != sizeof(striType).
2281 */
2282 genericType strCreateGeneric (const genericType source)
2283
2284 {
2285 rtlObjectType result;
2286
2287 /* strCreateGeneric */
2288 INIT_GENERIC_PTR(result.value.genericValue);
2289 result.value.striValue =
2290 strCreate(((const_rtlObjectType *) &source)->value.striValue);
2291 return result.value.genericValue;
2292 } /* strCreateGeneric */
2293
2294
2295
2296 /**
2297 * Free the memory referred by 'old_string'.
2298 * After strDestr is left 'old_string' refers to not existing memory.
2299 * The memory where 'old_string' is stored can be freed afterwards.
2300 */
2301 void strDestr (const const_striType old_string)
2302
2303 { /* strDestr */
2304 logFunction(printf("strDestr(\"%s\")\n", striAsUnquotedCStri(old_string)););
2305 if (old_string != NULL) {
2306 FREE_STRI(old_string, old_string->size);
2307 } /* if */
2308 } /* strDestr */
2309
2310
2311
2312 /**
2313 * Generic Destr function to be used via function pointers.
2314 * Function pointers in C programs generated by the Seed7 compiler
2315 * may point to this function. This assures correct behaviour even
2316 * if sizeof(genericType) != sizeof(striType).
2317 */
2318 void strDestrGeneric (const genericType old_value)
2319
2320 { /* strDestrGeneric */
2321 strDestr(((const_rtlObjectType *) &old_value)->value.striValue);
2322 } /* strDestrGeneric */
2323
2324
2325
2326 /**
2327 * Return an empty string, that can be assigned to a new destination.
2328 * This function is used as performance optimization by the compiler.
2329 * StrEmpty is used instead of strCreate, if it is known at
2330 * compile-time, that the source string is empty.
2331 * @return an empty string.
2332 * @exception MEMORY_ERROR Not enough memory to represent the result.
2333 */
2334 striType strEmpty (void)
2335
2336 {
2337 striType result;
2338
2339 /* strEmpty */
2340 logFunction(printf("strEmpty()");
2341 fflush(stdout););
2342 if (unlikely(!ALLOC_STRI_SIZE_OK(result, 0))) {
2343 raise_error(MEMORY_ERROR);
2344 } else {
2345 result->size = 0;
2346 } /* if */
2347 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
2348 return result;
2349 } /* strEmpty */
2350
2351
2352
2353 /**
2354 * Check if stri1 is greater than or equal to stri2.
2355 * @return TRUE if stri1 is greater than or equal to stri2,
2356 * FALSE otherwise.
2357 */
2358 boolType strGe (const const_striType stri1, const const_striType stri2)
2359
2360 { /* strGe */
2361 if (stri1->size >= stri2->size) {
2362 return memcmp_strelem(stri1->mem, stri2->mem, stri2->size) >= 0;
2363 } else {
2364 return memcmp_strelem(stri1->mem, stri2->mem, stri1->size) > 0;
2365 } /* if */
2366 } /* strGe */
2367
2368
2369
2370 /**
2371 * Check if stri1 is greater than stri2.
2372 * @return TRUE if stri1 is greater than stri2,
2373 * FALSE otherwise.
2374 */
2375 boolType strGt (const const_striType stri1, const const_striType stri2)
2376
2377 { /* strGt */
2378 if (stri1->size > stri2->size) {
2379 return memcmp_strelem(stri1->mem, stri2->mem, stri2->size) >= 0;
2380 } else {
2381 return memcmp_strelem(stri1->mem, stri2->mem, stri1->size) > 0;
2382 } /* if */
2383 } /* strGt */
2384
2385
2386
2387 /**
2388 * Compute the hash value of a string.
2389 * @return the hash value.
2390 */
2391 intType strHashCode (const const_striType stri)
2392
2393 { /* strHashCode */
2394 return hashCode(stri);
2395 } /* strHashCode */
2396
2397
2398
2399 #if ALLOW_STRITYPE_SLICES
2400 /**
2401 * Get a substring ending at a stop position.
2402 * The first character in a string has the position 1.
2403 * This function is used by the compiler to avoid copying string data.
2404 * The 'slice' is initialized to refer to the head of 'stri'
2405 * @exception INDEX_ERROR The stop position is negative.
2406 */
2407 void strHeadSlice (const const_striType stri, const intType stop, striType slice)
2408
2409 {
2410 memSizeType striSize;
2411
2412 /* strHeadSlice */
2413 logFunction(printf("strHeadSlice(\"%s\", " FMT_D ")",
2414 striAsUnquotedCStri(stri), stop);
2415 fflush(stdout););
2416 striSize = stri->size;
2417 if (stop >= 1 && striSize >= 1) {
2418 SET_SLICE_CAPACITY(slice, 0);
2419 slice->mem = stri->mem;
2420 if (striSize <= (uintType) stop) {
2421 slice->size = striSize;
2422 } else {
2423 slice->size = (memSizeType) stop;
2424 } /* if */
2425 } else if (unlikely(stop < 0)) {
2426 logError(printf("strHeadSlice: Stop negative."););
2427 raise_error(INDEX_ERROR);
2428 } else {
2429 SET_SLICE_CAPACITY(slice, 0);
2430 slice->mem = NULL;
2431 slice->size = 0;
2432 } /* if */
2433 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(slice)););
2434 } /* strHeadSlice */
2435
2436 #endif
2437
2438
2439
2440 /**
2441 * Get a substring ending at a stop position.
2442 * The first character in a string has the position 1.
2443 * @return the substring ending at the stop position.
2444 * @exception INDEX_ERROR The stop position is negative.
2445 * @exception MEMORY_ERROR Not enough memory to represent the result.
2446 */
2447 striType strHead (const const_striType stri, const intType stop)
2448
2449 {
2450 memSizeType striSize;
2451 memSizeType headSize;
2452 striType head;
2453
2454 /* strHead */
2455 logFunction(printf("strHead(\"%s\", " FMT_D ")",
2456 striAsUnquotedCStri(stri), stop);
2457 fflush(stdout););
2458 striSize = stri->size;
2459 if (stop >= 1 && striSize >= 1) {
2460 if (striSize <= (uintType) stop) {
2461 headSize = striSize;
2462 } else {
2463 headSize = (memSizeType) stop;
2464 } /* if */
2465 if (unlikely(!ALLOC_STRI_SIZE_OK(head, headSize))) {
2466 raise_error(MEMORY_ERROR);
2467 } else {
2468 head->size = headSize;
2469 memcpy(head->mem, stri->mem, headSize * sizeof(strElemType));
2470 } /* if */
2471 } else if (unlikely(stop < 0)) {
2472 logError(printf("strHead: Stop negative."););
2473 raise_error(INDEX_ERROR);
2474 head = NULL;
2475 } else {
2476 if (unlikely(!ALLOC_STRI_SIZE_OK(head, (memSizeType) 0))) {
2477 raise_error(MEMORY_ERROR);
2478 } else {
2479 head->size = 0;
2480 } /* if */
2481 } /* if */
2482 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(head)););
2483 return head;
2484 } /* strHead */
2485
2486
2487
2488 /**
2489 * Get a substring ending at a stop position.
2490 * The first character in a string has the position 1.
2491 * StrHeadTemp is used by the compiler if 'stri' is temporary
2492 * value that can be reused.
2493 * @return the substring ending at the stop position.
2494 * @exception INDEX_ERROR The stop position is negative.
2495 * @exception MEMORY_ERROR Not enough memory to represent the result.
2496 */
2497 striType strHeadTemp (const striType stri, const intType stop)
2498
2499 {
2500 memSizeType striSize;
2501 memSizeType headSize;
2502 striType head;
2503
2504 /* strHeadTemp */
2505 striSize = stri->size;
2506 if (stop >= 1 && striSize >= 1) {
2507 if (striSize <= (uintType) stop) {
2508 return stri;
2509 } else {
2510 headSize = (memSizeType) stop;
2511 } /* if */
2512 } else if (unlikely(stop < 0)) {
2513 logError(printf("strHeadTemp: Stop negative."););
2514 FREE_STRI(stri, stri->size);
2515 raise_error(INDEX_ERROR);
2516 return NULL;
2517 } else {
2518 headSize = 0;
2519 } /* if */
2520 #if WITH_STRI_CAPACITY
2521 if (!SHRINK_REASON(stri, headSize)) {
2522 COUNT_GROW2_STRI(striSize, headSize);
2523 head = stri;
2524 head->size = headSize;
2525 } else {
2526 head = shrinkStri(stri, headSize);
2527 if (unlikely(head == NULL)) {
2528 /* Theoretical shrinking a memory area should never fail. */
2529 /* For the strange case that it fails we keep stri intact */
2530 /* to avoid a heap corruption. Consider this expression: */
2531 /* aString = strHeadTemp(aString, anIndex); */
2532 /* In compiled programs the assignment to aString would be */
2533 /* skipped because raise_error triggers a longjmp(). */
2534 /* Therefore stri would keep the old value. */
2535 /* If shrinking a memory area fails with the expression: */
2536 /* strHeadTemp(anExpression, anIndex) */
2537 /* the result of anExpression is not freed (memory leak). */
2538 /* FREE_STRI(stri, stri->size); */
2539 raise_error(MEMORY_ERROR);
2540 } else {
2541 COUNT_SHRINK_STRI(striSize, headSize);
2542 head->size = headSize;
2543 } /* if */
2544 } /* if */
2545 #else
2546 SHRINK_STRI(head, stri, striSize, headSize);
2547 if (unlikely(head == NULL)) {
2548 /* Theoretical shrinking a memory area should never fail. */
2549 /* See above for a description of this situation. */
2550 /* FREE_STRI(stri, stri->size); */
2551 raise_error(MEMORY_ERROR);
2552 } else {
2553 COUNT_SHRINK_STRI(striSize, headSize);
2554 head->size = headSize;
2555 } /* if */
2556 #endif
2557 return head;
2558 } /* strHeadTemp */
2559
2560
2561
2562 /**
2563 * Search string 'searched' in 'mainStri' at or after 'fromIndex'.
2564 * The search starts at 'fromIndex' and proceeds to the right.
2565 * The first character in a string has the position 1.
2566 * This function uses a modified Boyer–Moore string search algorithm.
2567 * @return the position of 'searched' or 0 if 'mainStri'
2568 * does not contain 'searched' at or after 'fromIndex'.
2569 */
2570 static intType strIPos2 (const const_striType mainStri, const const_striType searched,
2571 const intType fromIndex)
2572
2573 {
2574 memSizeType main_size;
2575 memSizeType searched_size;
2576 strElemType ch_n;
2577 const strElemType *ch_n_pos;
2578 memSizeType delta;
2579 memSizeType charDelta[CHAR_DELTA_BEYOND + 1];
2580 memSizeType pos;
2581 const strElemType *main_mem;
2582 const strElemType *searched_mem;
2583 const strElemType *search_start;
2584 const strElemType *search_end;
2585
2586 /* strIPos2 */
2587 main_size = mainStri->size - ((memSizeType) fromIndex - 1);
2588 searched_size = searched->size;
2589 for (ch_n = 0; ch_n <= CHAR_DELTA_BEYOND; ch_n++) {
2590 charDelta[ch_n] = searched_size;
2591 } /* for */
2592 searched_mem = searched->mem;
2593 for (pos = 0; pos < searched_size - 1; pos++) {
2594 ch_n = searched_mem[pos];
2595 if (ch_n < CHAR_DELTA_BEYOND) {
2596 charDelta[ch_n] = searched_size - pos - 1;
2597 } else {
2598 charDelta[CHAR_DELTA_BEYOND] = searched_size - pos - 1;
2599 } /* if */
2600 } /* for */
2601 ch_n = searched_mem[searched_size - 1];
2602 ch_n_pos = rsearch_strelem(&searched_mem[searched_size - 2], ch_n, searched_size - 1);
2603 if (ch_n_pos == NULL) {
2604 delta = searched_size;
2605 } else {
2606 delta = (memSizeType) (&searched_mem[searched_size - 1] - ch_n_pos);
2607 } /* if */
2608 main_mem = &mainStri->mem[fromIndex - 1];
2609 search_start = &main_mem[searched_size - 1];
2610 search_end = &main_mem[main_size];
2611 while (search_start < search_end) {
2612 search_start = search_strelem2(search_start, ch_n, search_end, charDelta);
2613 if (search_start == NULL) {
2614 return 0;
2615 } else {
2616 if (memcmp(search_start - searched_size + 1, searched_mem,
2617 (searched_size - 1) * sizeof(strElemType)) == 0) {
2618 return ((intType) (search_start - searched_size + 1 - main_mem)) + fromIndex;
2619 } else {
2620 search_start += delta;
2621 } /* if */
2622 } /* if */
2623 } /* while */
2624 return 0;
2625 } /* strIPos2 */
2626
2627
2628
2629 /**
2630 * Search string 'searched' in 'mainStri' at or after 'fromIndex'.
2631 * The search starts at 'fromIndex' and proceeds to the right.
2632 * The first character in a string has the position 1.
2633 * This function calls strIPos2 if 'mainStri' is long.
2634 * @return the position of 'searched' or 0 if 'mainStri'
2635 * does not contain 'searched' at or after 'fromIndex'.
2636 * @exception RANGE_ERROR 'fromIndex' <= 0 holds.
2637 */
2638 intType strIPos (const const_striType mainStri, const const_striType searched,
2639 const intType fromIndex)
2640
2641 {
2642 memSizeType main_size;
2643 memSizeType searched_size;
2644 strElemType ch_n;
2645 const strElemType *ch_n_pos;
2646 memSizeType delta;
2647 const strElemType *main_mem;
2648 const strElemType *searched_mem;
2649 const strElemType *search_start;
2650 const strElemType *search_end;
2651
2652 /* strIPos */
2653 logFunction(printf("strIPos(\"%s\", ",
2654 striAsUnquotedCStri(mainStri));
2655 printf("\"%s\", " FMT_D ")\n",
2656 striAsUnquotedCStri(searched), fromIndex););
2657 if (unlikely(fromIndex <= 0)) {
2658 logError(printf("strIPos(\"%s\", ",
2659 striAsUnquotedCStri(mainStri));
2660 printf("\"%s\", " FMT_D "): fromIndex <= 0.\n",
2661 striAsUnquotedCStri(searched), fromIndex););
2662 raise_error(RANGE_ERROR);
2663 } else {
2664 main_size = mainStri->size;
2665 searched_size = searched->size;
2666 if (searched_size != 0 && main_size >= searched_size &&
2667 (uintType) fromIndex - 1 <= main_size - searched_size) {
2668 main_size -= (memSizeType) fromIndex - 1;
2669 if (searched_size >= BOYER_MOORE_SEARCHED_STRI_THRESHOLD &&
2670 main_size >= BOYER_MOORE_MAIN_STRI_THRESHOLD) {
2671 return strIPos2(mainStri, searched, fromIndex);
2672 } else if (searched_size == 1) {
2673 return strChIPos(mainStri, searched->mem[0], fromIndex);
2674 } else {
2675 searched_mem = searched->mem;
2676 ch_n = searched_mem[searched_size - 1];
2677 ch_n_pos = rsearch_strelem(&searched_mem[searched_size - 2], ch_n, searched_size - 1);
2678 if (ch_n_pos == NULL) {
2679 delta = searched_size;
2680 } else {
2681 delta = (memSizeType) (&searched_mem[searched_size - 1] - ch_n_pos);
2682 } /* if */
2683 main_mem = &mainStri->mem[fromIndex - 1];
2684 search_start = &main_mem[searched_size - 1];
2685 search_end = &main_mem[main_size];
2686 while (search_start < search_end) {
2687 search_start = memchr_strelem(search_start, ch_n,
2688 (memSizeType) (search_end - search_start));
2689 if (search_start == NULL) {
2690 return 0;
2691 } else {
2692 if (memcmp(search_start - searched_size + 1, searched_mem,
2693 (searched_size - 1) * sizeof(strElemType)) == 0) {
2694 return ((intType) (search_start - searched_size + 1 - main_mem)) + fromIndex;
2695 } else {
2696 search_start += delta;
2697 } /* if */
2698 } /* if */
2699 } /* while */
2700 } /* if */
2701 } /* if */
2702 } /* if */
2703 return 0;
2704 } /* strIPos */
2705
2706
2707
2708 /**
2709 * Check if stri1 is less than or equal to stri2.
2710 * @return TRUE if stri1 is less than or equal to stri2,
2711 * FALSE otherwise.
2712 */
2713 boolType strLe (const const_striType stri1, const const_striType stri2)
2714
2715 { /* strLe */
2716 if (stri1->size <= stri2->size) {
2717 return memcmp_strelem(stri1->mem, stri2->mem, stri1->size) <= 0;
2718 } else {
2719 return memcmp_strelem(stri1->mem, stri2->mem, stri2->size) < 0;
2720 } /* if */
2721 } /* strLe */
2722
2723
2724
2725 striType strLit (const const_striType stri)
2726
2727 {
2728 /* A string literal starts and ends with double quotes ("): */
2729 const memSizeType numOfQuotes = 2;
2730 register strElemType character;
2731 register memSizeType position;
2732 memSizeType striSize;
2733 memSizeType pos;
2734 char escapeBuffer[ESC_SEQUENCE_MAX_LEN + NULL_TERMINATION_LEN];
2735 memSizeType len;
2736 striType resized_literal;
2737 striType literal;
2738
2739 /* strLit */
2740 striSize = stri->size;
2741 if (unlikely(striSize > (MAX_STRI_LEN - numOfQuotes) / ESC_SEQUENCE_MAX_LEN ||
2742 !ALLOC_STRI_SIZE_OK(literal, ESC_SEQUENCE_MAX_LEN * striSize + numOfQuotes))) {
2743 raise_error(MEMORY_ERROR);
2744 literal = NULL;
2745 } else {
2746 literal->mem[0] = (strElemType) '"';
2747 pos = 1;
2748 for (position = 0; position < striSize; position++) {
2749 character = (strElemType) stri->mem[position];
2750 if (character < 127) {
2751 if (character < ' ') {
2752 literal->mem[pos] = (strElemType) '\\';
2753 if (stri_escape_sequence[character][1] <= '9') {
2754 /* Numeric escape sequence with one or two digits. */
2755 if (character <= 9) {
2756 literal->mem[pos + 1] = character + '0';
2757 literal->mem[pos + 2] = (strElemType) ';';
2758 pos += 3;
2759 } else {
2760 literal->mem[pos + 1] = (strElemType) stri_escape_sequence[character][1];
2761 literal->mem[pos + 2] = (strElemType) stri_escape_sequence[character][2];
2762 literal->mem[pos + 3] = (strElemType) ';';
2763 pos += 4;
2764 } /* if */
2765 } else {
2766 /* Character escape sequence. */
2767 literal->mem[pos + 1] = (strElemType) stri_escape_sequence[character][1];
2768 pos += 2;
2769 } /* if */
2770 } else if (character == '\\' || character == '\"') {
2771 literal->mem[pos] = (strElemType) '\\';
2772 literal->mem[pos + 1] = character;
2773 pos += 2;
2774 } else {
2775 literal->mem[pos] = character;
2776 pos++;
2777 } /* if */
2778 } else if (character <= 160) {
2779 /* Write characters between 128 and 160 as decimal. */
2780 /* This code is much faster than sprintf(). */
2781 literal->mem[pos] = (strElemType) '\\';
2782 literal->mem[pos + 3] = character % 10 + '0';
2783 character /= 10;
2784 literal->mem[pos + 2] = character % 10 + '0';
2785 literal->mem[pos + 1] = '1';
2786 literal->mem[pos + 4] = (strElemType) ';';
2787 pos += 5;
2788 } else if (character >= 256) {
2789 len = (memSizeType) sprintf(escapeBuffer, "\\" FMT_U32 ";", character);
2790 memcpy_to_strelem(&literal->mem[pos], (const_ustriType) escapeBuffer, len);
2791 pos += len;
2792 } else {
2793 literal->mem[pos] = character;
2794 pos++;
2795 } /* if */
2796 } /* for */
2797 literal->mem[pos] = (strElemType) '"';
2798 pos++;
2799 literal->size = pos;
2800 REALLOC_STRI_SIZE_SMALLER(resized_literal, literal,
2801 ESC_SEQUENCE_MAX_LEN * striSize + numOfQuotes, pos);
2802 if (unlikely(resized_literal == NULL)) {
2803 FREE_STRI(literal, ESC_SEQUENCE_MAX_LEN * striSize + numOfQuotes);
2804 raise_error(MEMORY_ERROR);
2805 literal = NULL;
2806 } else {
2807 literal = resized_literal;
2808 COUNT3_STRI(ESC_SEQUENCE_MAX_LEN * striSize + numOfQuotes, pos);
2809 } /* if */
2810 } /* if */
2811 return literal;
2812 } /* strLit */
2813
2814
2815
2816 /**
2817 * Convert a string to lower case.
2818 * The conversion uses the default Unicode case mapping,
2819 * where each character is considered in isolation.
2820 * Characters without case mapping are left unchanged.
2821 * The mapping is independent from the locale. Individual
2822 * character case mappings cannot be reversed, because some
2823 * characters have multiple characters that map to them.
2824 * @return the string converted to lower case.
2825 */
2826 striType strLow (const const_striType stri)
2827
2828 {
2829 memSizeType striSize;
2830 striType result;
2831
2832 /* strLow */
2833 logFunction(printf("strLow(\"%s\")",
2834 striAsUnquotedCStri(stri));
2835 fflush(stdout););
2836 striSize = stri->size;
2837 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
2838 raise_error(MEMORY_ERROR);
2839 } else {
2840 result->size = striSize;
2841 toLower(stri->mem, stri->size, result->mem);
2842 } /* if */
2843 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
2844 return result;
2845 } /* strLow */
2846
2847
2848
2849 /**
2850 * Convert a string to lower case.
2851 * StrLowTemp is used by the compiler if 'stri' is temporary
2852 * value that can be reused.
2853 * @return the string converted to lower case.
2854 */
2855 striType strLowTemp (const striType stri)
2856
2857 { /* strLowTemp */
2858 logFunction(printf("strLowTemp(\"%s\")",
2859 striAsUnquotedCStri(stri));
2860 fflush(stdout););
2861 toLower(stri->mem, stri->size, stri->mem);
2862 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(stri)););
2863 return stri;
2864 } /* strLowTemp */
2865
2866
2867
2868 /**
2869 * Pad a string with spaces at the left side up to padSize.
2870 * @return the string left padded with spaces.
2871 */
2872 striType strLpad (const const_striType stri, const intType padSize)
2873
2874 {
2875 memSizeType striSize;
2876 striType result;
2877
2878 /* strLpad */
2879 striSize = stri->size;
2880 if (padSize > 0 && (uintType) padSize > striSize) {
2881 if (unlikely((uintType) padSize > MAX_STRI_LEN ||
2882 !ALLOC_STRI_SIZE_OK(result, (memSizeType) padSize))) {
2883 raise_error(MEMORY_ERROR);
2884 result = NULL;
2885 } else {
2886 result->size = (memSizeType) padSize;
2887 #if LPAD_WITH_MEMSET_TO_STRELEM
2888 memset_to_strelem(result->mem, ' ', (memSizeType) padSize - striSize);
2889 #else
2890 {
2891 strElemType *elem = result->mem;
2892 memSizeType idx = (memSizeType) padSize - striSize - 1;
2893
2894 do {
2895 elem[idx] = (strElemType) ' ';
2896 } while (idx-- != 0);
2897 }
2898 #endif
2899 memcpy(&result->mem[(memSizeType) padSize - striSize], stri->mem,
2900 striSize * sizeof(strElemType));
2901 } /* if */
2902 } else {
2903 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
2904 raise_error(MEMORY_ERROR);
2905 } else {
2906 result->size = striSize;
2907 memcpy(result->mem, stri->mem, striSize * sizeof(strElemType));
2908 } /* if */
2909 } /* if */
2910 return result;
2911 } /* strLpad */
2912
2913
2914
2915 /**
2916 * Pad a string with spaces at the left side up to padSize.
2917 * StrLpadTemp is used by the compiler if 'stri' is temporary
2918 * value that can be reused.
2919 * @return the string left padded with spaces.
2920 */
2921 striType strLpadTemp (const striType stri, const intType padSize)
2922
2923 {
2924 memSizeType striSize;
2925 striType result;
2926
2927 /* strLpadTemp */
2928 striSize = stri->size;
2929 if (padSize > 0 && (uintType) padSize > striSize) {
2930 if (unlikely((uintType) padSize > MAX_STRI_LEN ||
2931 !ALLOC_STRI_SIZE_OK(result, (memSizeType) padSize))) {
2932 FREE_STRI(stri, striSize);
2933 raise_error(MEMORY_ERROR);
2934 result = NULL;
2935 } else {
2936 result->size = (memSizeType) padSize;
2937 #if LPAD_WITH_MEMSET_TO_STRELEM
2938 memset_to_strelem(result->mem, ' ', (memSizeType) padSize - striSize);
2939 #else
2940 {
2941 strElemType *elem = result->mem;
2942 memSizeType idx = (memSizeType) padSize - striSize - 1;
2943
2944 do {
2945 elem[idx] = (strElemType) ' ';
2946 } while (idx-- != 0);
2947 }
2948 #endif
2949 memcpy(&result->mem[(memSizeType) padSize - striSize], stri->mem,
2950 striSize * sizeof(strElemType));
2951 FREE_STRI(stri, striSize);
2952 } /* if */
2953 } else {
2954 result = stri;
2955 } /* if */
2956 return result;
2957 } /* strLpadTemp */
2958
2959
2960
2961 /**
2962 * Pad a string with zeroes at the left side up to padSize.
2963 * @return the string left padded with zeroes.
2964 */
2965 striType strLpad0 (const const_striType stri, const intType padSize)
2966
2967 {
2968 memSizeType striSize;
2969 const strElemType *sourceElem;
2970 strElemType *destElem;
2971 memSizeType len;
2972 striType result;
2973
2974 /* strLpad0 */
2975 striSize = stri->size;
2976 if (padSize > 0 && (uintType) padSize > striSize) {
2977 if (unlikely((uintType) padSize > MAX_STRI_LEN ||
2978 !ALLOC_STRI_SIZE_OK(result, (memSizeType) padSize))) {
2979 raise_error(MEMORY_ERROR);
2980 result = NULL;
2981 } else {
2982 result->size = (memSizeType) padSize;
2983 sourceElem = stri->mem;
2984 destElem = result->mem;
2985 len = (memSizeType) padSize - striSize;
2986 if (striSize != 0 && (sourceElem[0] == '-' || sourceElem[0] == '+')) {
2987 *destElem++ = sourceElem[0];
2988 sourceElem++;
2989 striSize--;
2990 } /* if */
2991 while (len--) {
2992 *destElem++ = (strElemType) '0';
2993 } /* while */
2994 memcpy(destElem, sourceElem, striSize * sizeof(strElemType));
2995 } /* if */
2996 } else {
2997 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
2998 raise_error(MEMORY_ERROR);
2999 } else {
3000 result->size = striSize;
3001 memcpy(result->mem, stri->mem, striSize * sizeof(strElemType));
3002 } /* if */
3003 } /* if */
3004 return result;
3005 } /* strLpad0 */
3006
3007
3008
3009 /**
3010 * Pad a string with zeroes at the left side up to padSize.
3011 * StrLpad0Temp is used by the compiler if 'stri' is temporary
3012 * value that can be reused.
3013 * @return the string left padded with zeroes.
3014 */
3015 striType strLpad0Temp (const striType stri, const intType padSize)
3016
3017 {
3018 memSizeType striSize;
3019 const strElemType *sourceElem;
3020 strElemType *destElem;
3021 memSizeType len;
3022 striType result;
3023
3024 /* strLpad0Temp */
3025 striSize = stri->size;
3026 if (padSize > 0 && (uintType) padSize > striSize) {
3027 if (unlikely((uintType) padSize > MAX_STRI_LEN ||
3028 !ALLOC_STRI_SIZE_OK(result, (memSizeType) padSize))) {
3029 FREE_STRI(stri, striSize);
3030 raise_error(MEMORY_ERROR);
3031 result = NULL;
3032 } else {
3033 result->size = (memSizeType) padSize;
3034 sourceElem = stri->mem;
3035 destElem = result->mem;
3036 len = (memSizeType) padSize - striSize;
3037 if (striSize != 0 && (sourceElem[0] == '-' || sourceElem[0] == '+')) {
3038 *destElem++ = sourceElem[0];
3039 sourceElem++;
3040 striSize--;
3041 } /* if */
3042 while (len--) {
3043 *destElem++ = (strElemType) '0';
3044 } /* while */
3045 memcpy(destElem, sourceElem, striSize * sizeof(strElemType));
3046 FREE_STRI(stri, striSize);
3047 } /* if */
3048 } else {
3049 result = stri;
3050 } /* if */
3051 return result;
3052 } /* strLpad0Temp */
3053
3054
3055
3056 /**
3057 * Check if stri1 is less than stri2.
3058 * @return TRUE if stri1 is less than stri2,
3059 * FALSE otherwise.
3060 */
3061 boolType strLt (const const_striType stri1, const const_striType stri2)
3062
3063 { /* strLt */
3064 if (stri1->size < stri2->size) {
3065 return memcmp_strelem(stri1->mem, stri2->mem, stri1->size) <= 0;
3066 } else {
3067 return memcmp_strelem(stri1->mem, stri2->mem, stri2->size) < 0;
3068 } /* if */
3069 } /* strLt */
3070
3071
3072
3073 /**
3074 * Return string with leading whitespace omitted.
3075 * All characters less than or equal to ' ' (space) count as whitespace.
3076 * @return string with leading whitespace omitted.
3077 */
3078 striType strLtrim (const const_striType stri)
3079
3080 {
3081 memSizeType start = 0;
3082 memSizeType striSize;
3083 striType result;
3084
3085 /* strLtrim */
3086 striSize = stri->size;
3087 if (striSize >= 1) {
3088 while (start < striSize && stri->mem[start] <= ' ') {
3089 start++;
3090 } /* while */
3091 striSize -= start;
3092 } /* if */
3093 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
3094 raise_error(MEMORY_ERROR);
3095 return NULL;
3096 } else {
3097 result->size = striSize;
3098 memcpy(result->mem, &stri->mem[start], striSize * sizeof(strElemType));
3099 return result;
3100 } /* if */
3101 } /* strLtrim */
3102
3103
3104
3105 /**
3106 * String multiplication.
3107 * The string 'stri' is concatenated to itself such that in total
3108 * 'factor' strings are concatenated.
3109 * "LA" mult 3 returns "LALALA"
3110 * "WORD" mult 0 returns ""
3111 * @return the result of the string multiplication.
3112 * @exception RANGE_ERROR If the factor is negative.
3113 */
3114 striType strMult (const const_striType stri, const intType factor)
3115
3116 {
3117 memSizeType len;
3118 memSizeType numOfRepeats;
3119 memSizeType powerOfTwo;
3120 strElemType ch;
3121 memSizeType result_size;
3122 striType result;
3123
3124 /* strMult */
3125 logFunction(printf("strMult(\"%s\", " FMT_D ")\n",
3126 striAsUnquotedCStri(stri), factor););
3127 if (unlikely(factor < 0)) {
3128 logError(printf("strMult(\"%s\", " FMT_D "): Negative factor.\n",
3129 striAsUnquotedCStri(stri), factor););
3130 raise_error(RANGE_ERROR);
3131 result = NULL;
3132 } else {
3133 len = stri->size;
3134 if (unlikely(len == 0)) {
3135 if (unlikely(!ALLOC_STRI_SIZE_OK(result, 0))) {
3136 raise_error(MEMORY_ERROR);
3137 } else {
3138 result->size = 0;
3139 } /* if */
3140 } else if (unlikely((uintType) factor > MAX_STRI_LEN / len)) {
3141 raise_error(MEMORY_ERROR);
3142 result = NULL;
3143 } else {
3144 numOfRepeats = (memSizeType) factor;
3145 result_size = numOfRepeats * len;
3146 if (unlikely(!ALLOC_STRI_SIZE_OK(result, result_size))) {
3147 raise_error(MEMORY_ERROR);
3148 } else {
3149 result->size = result_size;
3150 if (len == 1) {
3151 ch = stri->mem[0];
3152 memset_to_strelem(result->mem, ch, numOfRepeats);
3153 } else if (numOfRepeats != 0) {
3154 /* Use binary method for string multiplication: */
3155 memcpy(result->mem, stri->mem, len * sizeof(strElemType));
3156 powerOfTwo = 1;
3157 while (powerOfTwo << 1 < numOfRepeats) {
3158 memcpy(&result->mem[powerOfTwo * len], result->mem,
3159 powerOfTwo * len * sizeof(strElemType));
3160 powerOfTwo <<= 1;
3161 } /* while */
3162 memcpy(&result->mem[powerOfTwo * len], result->mem,
3163 (numOfRepeats - powerOfTwo) * len * sizeof(strElemType));
3164 } /* if */
3165 } /* if */
3166 } /* if */
3167 } /* if */
3168 return result;
3169 } /* strMult */
3170
3171
3172
3173 /**
3174 * Determine leftmost position of string 'searched' in 'mainStri'.
3175 * If the string is found the position of its first character
3176 * is the result. The first character in a string has the position 1.
3177 * This function uses a modified Boyer–Moore string search algorithm.
3178 * @return the position of 'searched' or 0 if 'mainStri'
3179 * does not contain 'searched'.
3180 */
3181 static intType strPos2 (const const_striType mainStri, const const_striType searched)
3182
3183 {
3184 memSizeType main_size;
3185 memSizeType searched_size;
3186 strElemType ch_n;
3187 const strElemType *ch_n_pos;
3188 memSizeType delta;
3189 memSizeType charDelta[CHAR_DELTA_BEYOND + 1];
3190 memSizeType pos;
3191 const strElemType *main_mem;
3192 const strElemType *searched_mem;
3193 const strElemType *search_start;
3194 const strElemType *search_end;
3195
3196 /* strPos2 */
3197 main_size = mainStri->size;
3198 searched_size = searched->size;
3199 for (ch_n = 0; ch_n <= CHAR_DELTA_BEYOND; ch_n++) {
3200 charDelta[ch_n] = searched_size;
3201 } /* for */
3202 searched_mem = searched->mem;
3203 for (pos = 0; pos < searched_size - 1; pos++) {
3204 ch_n = searched_mem[pos];
3205 if (ch_n < CHAR_DELTA_BEYOND) {
3206 charDelta[ch_n] = searched_size - pos - 1;
3207 } else {
3208 charDelta[CHAR_DELTA_BEYOND] = searched_size - pos - 1;
3209 } /* if */
3210 } /* for */
3211 ch_n = searched_mem[searched_size - 1];
3212 ch_n_pos = rsearch_strelem(&searched_mem[searched_size - 2], ch_n, searched_size - 1);
3213 if (ch_n_pos == NULL) {
3214 delta = searched_size;
3215 } else {
3216 delta = (memSizeType) (&searched_mem[searched_size - 1] - ch_n_pos);
3217 } /* if */
3218 main_mem = mainStri->mem;
3219 search_start = &main_mem[searched_size - 1];
3220 search_end = &main_mem[main_size];
3221 while (search_start < search_end) {
3222 search_start = search_strelem2(search_start, ch_n, search_end, charDelta);
3223 if (search_start == NULL) {
3224 return 0;
3225 } else {
3226 if (memcmp(search_start - searched_size + 1, searched_mem,
3227 (searched_size - 1) * sizeof(strElemType)) == 0) {
3228 return ((intType) (search_start - searched_size + 1 - main_mem)) + 1;
3229 } else {
3230 search_start += delta;
3231 } /* if */
3232 } /* if */
3233 } /* while */
3234 return 0;
3235 } /* strPos2 */
3236
3237
3238
3239 /**
3240 * Determine leftmost position of string 'searched' in 'mainStri'.
3241 * If the string is found the position of its first character
3242 * is the result. The first character in a string has the position 1.
3243 * This function calls strPos2 if 'mainStri' is long.
3244 * @return the position of 'searched' or 0 if 'mainStri'
3245 * does not contain 'searched'.
3246 */
3247 intType strPos (const const_striType mainStri, const const_striType searched)
3248
3249 {
3250 memSizeType main_size;
3251 memSizeType searched_size;
3252 strElemType ch_n;
3253 const strElemType *ch_n_pos;
3254 memSizeType delta;
3255 const strElemType *main_mem;
3256 const strElemType *searched_mem;
3257 const strElemType *search_start;
3258 const strElemType *search_end;
3259
3260 /* strPos */
3261 logFunction(printf("strPos(\"%s\", ",
3262 striAsUnquotedCStri(mainStri));
3263 printf("\"%s\")\n",
3264 striAsUnquotedCStri(searched)););
3265 main_size = mainStri->size;
3266 searched_size = searched->size;
3267 if (searched_size != 0 && main_size >= searched_size) {
3268 if (searched_size >= BOYER_MOORE_SEARCHED_STRI_THRESHOLD &&
3269 main_size >= BOYER_MOORE_MAIN_STRI_THRESHOLD) {
3270 return strPos2(mainStri, searched);
3271 } else if (searched_size == 1) {
3272 return strChPos(mainStri, searched->mem[0]);
3273 } else {
3274 searched_mem = searched->mem;
3275 ch_n = searched_mem[searched_size - 1];
3276 ch_n_pos = rsearch_strelem(&searched_mem[searched_size - 2], ch_n, searched_size - 1);
3277 if (ch_n_pos == NULL) {
3278 delta = searched_size;
3279 } else {
3280 delta = (memSizeType) (&searched_mem[searched_size - 1] - ch_n_pos);
3281 } /* if */
3282 main_mem = mainStri->mem;
3283 search_start = &main_mem[searched_size - 1];
3284 search_end = &main_mem[main_size];
3285 while (search_start < search_end) {
3286 search_start = memchr_strelem(search_start, ch_n,
3287 (memSizeType) (search_end - search_start));
3288 if (search_start == NULL) {
3289 return 0;
3290 } else {
3291 if (memcmp(search_start - searched_size + 1, searched_mem,
3292 (searched_size - 1) * sizeof(strElemType)) == 0) {
3293 return ((intType) (search_start - searched_size + 1 - main_mem)) + 1;
3294 } else {
3295 search_start += delta;
3296 } /* if */
3297 } /* if */
3298 } /* while */
3299 } /* if */
3300 } /* if */
3301 return 0;
3302 } /* strPos */
3303
3304
3305
3306 /**
3307 * Append the char 'extension' to 'destination'.
3308 * @exception MEMORY_ERROR Not enough memory for the concatenated
3309 * string.
3310 */
3311 void strPush (striType *const destination, const charType extension)
3312
3313 {
3314 memSizeType new_size;
3315 striType stri_dest;
3316
3317 /* strPush */
3318 logFunction(printf("strPush(\"%s\", '\\" FMT_U32 ";')\n",
3319 striAsUnquotedCStri(*destination), extension);
3320 fflush(stdout););
3321 stri_dest = *destination;
3322 new_size = stri_dest->size + 1;
3323 #if WITH_STRI_CAPACITY
3324 if (new_size > stri_dest->capacity) {
3325 stri_dest = growStri(stri_dest, new_size);
3326 if (unlikely(stri_dest == NULL)) {
3327 raise_error(MEMORY_ERROR);
3328 return;
3329 } else {
3330 *destination = stri_dest;
3331 } /* if */
3332 } /* if */
3333 COUNT_GROW_STRI(stri_dest->size, new_size);
3334 stri_dest->mem[stri_dest->size] = extension;
3335 stri_dest->size = new_size;
3336 #else
3337 GROW_STRI(stri_dest, stri_dest, stri_dest->size, new_size);
3338 if (unlikely(stri_dest == NULL)) {
3339 raise_error(MEMORY_ERROR);
3340 } else {
3341 COUNT_GROW_STRI(stri_dest->size, new_size);
3342 stri_dest->mem[stri_dest->size] = extension;
3343 stri_dest->size = new_size;
3344 *destination = stri_dest;
3345 } /* if */
3346 #endif
3347 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(*destination)););
3348 } /* strPush */
3349
3350
3351
3352 #if ALLOW_STRITYPE_SLICES
3353 /**
3354 * Get a substring from a start position to a stop position.
3355 * The first character in a string has the position 1.
3356 * This function is used by the compiler to avoid copying string data.
3357 * The 'slice' is initialized to refer to the range of 'stri'
3358 * @exception INDEX_ERROR The start position is negative or zero, or
3359 * the stop position is less than pred(start).
3360 */
3361 void strRangeSlice (const const_striType stri, intType start, intType stop, striType slice)
3362
3363 {
3364 memSizeType striSize;
3365
3366 /* strRangeSlice */
3367 logFunction(printf("strRangeSlice(\"%s\", " FMT_D ", " FMT_D ")",
3368 striAsUnquotedCStri(stri), start, stop);
3369 fflush(stdout););
3370 striSize = stri->size;
3371 if (unlikely(start < 1)) {
3372 logError(printf("strRangeSlice: Start negative or zero."););
3373 raise_error(INDEX_ERROR);
3374 } else if (stop >= start && (uintType) start <= striSize) {
3375 SET_SLICE_CAPACITY(slice, 0);
3376 slice->mem = &stri->mem[start - 1];
3377 if ((uintType) stop > striSize) {
3378 slice->size = striSize - (memSizeType) start + 1;
3379 } else {
3380 slice->size = (memSizeType) stop - (memSizeType) start + 1;
3381 } /* if */
3382 } else if (unlikely(stop < start - 1)) {
3383 logError(printf("strRangeSlice: Stop less then pred(start)."););
3384 raise_error(INDEX_ERROR);
3385 } else {
3386 SET_SLICE_CAPACITY(slice, 0);
3387 slice->mem = NULL;
3388 slice->size = 0;
3389 } /* if */
3390 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(slice)););
3391 } /* strRangeSlice */
3392
3393 #endif
3394
3395
3396
3397 /**
3398 * Get a substring from a start position to a stop position.
3399 * The first character in a string has the position 1.
3400 * @return the substring from position start to stop.
3401 * @exception INDEX_ERROR The start position is negative or zero, or
3402 * the stop position is less than pred(start).
3403 * @exception MEMORY_ERROR Not enough memory to represent the result.
3404 */
3405 striType strRange (const const_striType stri, intType start, intType stop)
3406
3407 {
3408 memSizeType striSize;
3409 memSizeType result_size;
3410 striType result;
3411
3412 /* strRange */
3413 striSize = stri->size;
3414 if (unlikely(start < 1)) {
3415 logError(printf("strRange: Start negative or zero."););
3416 raise_error(INDEX_ERROR);
3417 result = NULL;
3418 } else if (stop >= start && (uintType) start <= striSize) {
3419 if ((uintType) stop > striSize) {
3420 result_size = striSize - (memSizeType) start + 1;
3421 } else {
3422 result_size = (memSizeType) stop - (memSizeType) start + 1;
3423 } /* if */
3424 if (unlikely(!ALLOC_STRI_SIZE_OK(result, result_size))) {
3425 raise_error(MEMORY_ERROR);
3426 return NULL;
3427 } /* if */
3428 /* Reversing the order of the following two statements */
3429 /* causes an "Internal Compiler Error" with MSC 6.0 */
3430 /* if using the -Ozacegilt optimisation option in the */
3431 /* large memory model (-AL). Note that the order of the */
3432 /* two statements make no difference to the logic of the */
3433 /* program. */
3434 memcpy(result->mem, &stri->mem[start - 1],
3435 result_size * sizeof(strElemType));
3436 result->size = result_size;
3437 } else if (unlikely(stop < start - 1)) {
3438 logError(printf("strRange: Stop less then pred(start)."););
3439 raise_error(INDEX_ERROR);
3440 result = NULL;
3441 } else {
3442 if (unlikely(!ALLOC_STRI_SIZE_OK(result, (memSizeType) 0))) {
3443 raise_error(MEMORY_ERROR);
3444 } else {
3445 result->size = 0;
3446 } /* if */
3447 } /* if */
3448 return result;
3449 } /* strRange */
3450
3451
3452
3453 /**
3454 * Search char 'searched' in 'mainStri' at or before 'fromIndex'.
3455 * The search starts at 'fromIndex' and proceeds to the left.
3456 * The first character in a string has the position 1.
3457 * @return the position of 'searched' or 0 if 'mainStri'
3458 * does not contain 'searched' at or before 'fromIndex'.
3459 * @exception RANGE_ERROR 'fromIndex' > length(stri) holds.
3460 */
3461 intType strRChIPos (const const_striType mainStri, const charType searched,
3462 const intType fromIndex)
3463
3464 {
3465 const strElemType *main_mem;
3466 const strElemType *found_pos;
3467
3468 /* strRChIPos */
3469 logFunction(printf("strRChIPos(\"%s\", '\\" FMT_U32 ";', " FMT_D ")\n",
3470 striAsUnquotedCStri(mainStri), searched, fromIndex););
3471 if (likely(fromIndex >= 1)) {
3472 if (unlikely((uintType) fromIndex > mainStri->size)) {
3473 logError(printf("strRChIPos(\"%s\", '\\" FMT_U32 ";', " FMT_D "): "
3474 "fromIndex <= 0.\n",
3475 striAsUnquotedCStri(mainStri), searched, fromIndex););
3476 raise_error(RANGE_ERROR);
3477 } else {
3478 if (mainStri->size >= 1) {
3479 main_mem = mainStri->mem;
3480 found_pos = rsearch_strelem(&main_mem[fromIndex - 1], searched,
3481 (memSizeType) fromIndex);
3482 if (found_pos != NULL) {
3483 return ((intType) (found_pos - main_mem)) + 1;
3484 } /* if */
3485 } /* if */
3486 } /* if */
3487 } /* if */
3488 return 0;
3489 } /* strRChIPos */
3490
3491
3492
3493 /**
3494 * Determine rightmost position of char 'searched' in 'mainStri'.
3495 * The first character in a string has the position 1.
3496 * @return the position of 'searched' or 0 if 'mainStri'
3497 * does not contain 'searched'.
3498 */
3499 intType strRChPos (const const_striType mainStri, const charType searched)
3500
3501 {
3502 const strElemType *main_mem;
3503 const strElemType *found_pos;
3504
3505 /* strRChPos */
3506 logFunction(printf("strRChPos(\"%s\", '\\" FMT_U32 ";')\n",
3507 striAsUnquotedCStri(mainStri), searched););
3508 if (mainStri->size >= 1) {
3509 main_mem = mainStri->mem;
3510 found_pos = rsearch_strelem(&main_mem[mainStri->size - 1], searched,
3511 mainStri->size);
3512 if (found_pos != NULL) {
3513 return ((intType) (found_pos - main_mem)) + 1;
3514 } /* if */
3515 } /* if */
3516 return 0;
3517 } /* strRChPos */
3518
3519
3520
3521 /**
3522 * Replace all occurrences of 'searched' in 'mainStri' by 'replacement'.
3523 * This function uses a modified Boyer–Moore string search algorithm.
3524 * @return the result of the replacement.
3525 */
3526 static memSizeType strRepl2 (const const_striType mainStri, const const_striType searched,
3527 const const_striType replacement, const striType result)
3528
3529 {
3530 memSizeType main_size;
3531 memSizeType searched_size;
3532 strElemType ch_n;
3533 const strElemType *ch_n_pos;
3534 memSizeType delta;
3535 memSizeType charDelta[CHAR_DELTA_BEYOND + 1];
3536 memSizeType pos;
3537 const strElemType *main_mem;
3538 const strElemType *searched_mem;
3539 const strElemType *search_start;
3540 const strElemType *search_end;
3541 const strElemType *copy_start;
3542 strElemType *result_end;
3543 memSizeType result_size;
3544
3545 /* strRepl2 */
3546 main_size = mainStri->size;
3547 searched_size = searched->size;
3548 for (ch_n = 0; ch_n <= CHAR_DELTA_BEYOND; ch_n++) {
3549 charDelta[ch_n] = searched_size;
3550 } /* for */
3551 searched_mem = searched->mem;
3552 for (pos = 0; pos < searched_size - 1; pos++) {
3553 ch_n = searched_mem[pos];
3554 if (ch_n < CHAR_DELTA_BEYOND) {
3555 charDelta[ch_n] = searched_size - pos - 1;
3556 } else {
3557 charDelta[CHAR_DELTA_BEYOND] = searched_size - pos - 1;
3558 } /* if */
3559 } /* for */
3560 ch_n = searched_mem[searched_size - 1];
3561 ch_n_pos = rsearch_strelem(&searched_mem[searched_size - 2], ch_n, searched_size - 1);
3562 if (ch_n_pos == NULL) {
3563 delta = searched_size;
3564 } else {
3565 delta = (memSizeType) (&searched_mem[searched_size - 1] - ch_n_pos);
3566 } /* if */
3567 main_mem = mainStri->mem;
3568 search_start = &main_mem[searched_size - 1];
3569 search_end = &main_mem[main_size];
3570 copy_start = mainStri->mem;
3571 result_end = result->mem;
3572 while (search_start != NULL && search_start < search_end) {
3573 search_start = search_strelem2(search_start, ch_n, search_end, charDelta);
3574 if (search_start != NULL) {
3575 if (memcmp(search_start - searched_size + 1, searched_mem,
3576 (searched_size - 1) * sizeof(strElemType)) == 0) {
3577 memcpy(result_end, copy_start,
3578 (memSizeType) (search_start - searched_size + 1 - copy_start) *
3579 sizeof(strElemType));
3580 result_end += search_start - searched_size + 1 - copy_start;
3581 memcpy(result_end, replacement->mem,
3582 replacement->size * sizeof(strElemType));
3583 result_end += replacement->size;
3584 copy_start = search_start + 1;
3585 search_start += searched_size;
3586 } else {
3587 search_start += delta;
3588 } /* if */
3589 } /* if */
3590 } /* while */
3591 memcpy(result_end, copy_start,
3592 (memSizeType) (&mainStri->mem[main_size] - copy_start) * sizeof(strElemType));
3593 result_end += &mainStri->mem[main_size] - copy_start;
3594 result_size = (memSizeType) (result_end - result->mem);
3595 return result_size;
3596 } /* strRepl2 */
3597
3598
3599
3600 /**
3601 * Replace all occurrences of 'searched' in 'mainStri' by 'replacement'.
3602 * This function calls strRepl2 if 'mainStri' is long.
3603 * @return the result of the replacement.
3604 */
3605 striType strRepl (const const_striType mainStri,
3606 const const_striType searched, const const_striType replacement)
3607
3608 {
3609 memSizeType main_size;
3610 memSizeType searched_size;
3611 memSizeType guessed_result_size;
3612 memSizeType result_size;
3613 strElemType ch_1;
3614 const strElemType *main_mem;
3615 const strElemType *searched_mem;
3616 const strElemType *search_start;
3617 const strElemType *search_end;
3618 const strElemType *copy_start;
3619 strElemType *result_end;
3620 striType resized_result;
3621 striType result;
3622
3623 /* strRepl */
3624 main_size = mainStri->size;
3625 searched_size = searched->size;
3626 /* printf("main_size=" FMT_U_MEM ", searched_size=" FMT_U_MEM
3627 ", replacement->size=" FMT_U_MEM "\n",
3628 main_size, searched_size, replacement->size); */
3629 if (searched_size != 0 && replacement->size > searched_size) {
3630 if (unlikely(main_size / searched_size + 1 > MAX_STRI_LEN / replacement->size)) {
3631 raise_error(MEMORY_ERROR);
3632 return NULL;
3633 } else {
3634 guessed_result_size = (main_size / searched_size + 1) * replacement->size;
3635 } /* if */
3636 } else {
3637 guessed_result_size = main_size;
3638 } /* if */
3639 if (unlikely(!ALLOC_STRI_SIZE_OK(result, guessed_result_size))) {
3640 raise_error(MEMORY_ERROR);
3641 } else {
3642 if (searched_size >= BOYER_MOORE_SEARCHED_STRI_THRESHOLD &&
3643 main_size >= BOYER_MOORE_MAIN_STRI_THRESHOLD) {
3644 result_size = strRepl2(mainStri, searched, replacement, result);
3645 } else {
3646 copy_start = mainStri->mem;
3647 result_end = result->mem;
3648 if (searched_size != 0 && searched_size <= main_size) {
3649 searched_mem = searched->mem;
3650 ch_1 = searched_mem[0];
3651 main_mem = mainStri->mem;
3652 search_start = main_mem;
3653 search_end = &main_mem[main_size - searched_size + 1];
3654 while (search_start < search_end &&
3655 (search_start = memchr_strelem(search_start, ch_1,
3656 (memSizeType) (search_end - search_start))) != NULL) {
3657 if (memcmp(search_start, searched_mem,
3658 searched_size * sizeof(strElemType)) == 0) {
3659 memcpy(result_end, copy_start,
3660 (memSizeType) (search_start - copy_start) * sizeof(strElemType));
3661 result_end += search_start - copy_start;
3662 memcpy(result_end, replacement->mem,
3663 replacement->size * sizeof(strElemType));
3664 result_end += replacement->size;
3665 search_start += searched_size;
3666 copy_start = search_start;
3667 } else {
3668 search_start++;
3669 } /* if */
3670 } /* while */
3671 } /* if */
3672 memcpy(result_end, copy_start,
3673 (memSizeType) (&mainStri->mem[main_size] - copy_start) * sizeof(strElemType));
3674 result_end += &mainStri->mem[main_size] - copy_start;
3675 result_size = (memSizeType) (result_end - result->mem);
3676 } /* if */
3677 /* printf("result=%lu, guessed_result_size=%ld, result_size=%ld\n",
3678 result, guessed_result_size, result_size); */
3679 REALLOC_STRI_SIZE_SMALLER(resized_result, result, guessed_result_size, result_size);
3680 if (unlikely(resized_result == NULL)) {
3681 FREE_STRI(result, guessed_result_size);
3682 raise_error(MEMORY_ERROR);
3683 result = NULL;
3684 } else {
3685 result = resized_result;
3686 COUNT3_STRI(guessed_result_size, result_size);
3687 result->size = result_size;
3688 } /* if */
3689 } /* if */
3690 return result;
3691 } /* strRepl */
3692
3693
3694
3695 /**
3696 * Search string 'searched' in 'mainStri' at or before 'fromIndex'.
3697 * The search starts at 'fromIndex' and proceeds to the left.
3698 * The first character in a string has the position 1.
3699 * This function uses a modified Boyer–Moore string search algorithm.
3700 * @return the position of 'searched' or 0 if 'mainStri'
3701 * does not contain 'searched' at or before 'fromIndex'.
3702 */
3703 static intType strRIPos2 (const const_striType mainStri, const const_striType searched,
3704 const intType fromIndex)
3705
3706 {
3707 memSizeType main_size;
3708 memSizeType searched_size;
3709 strElemType ch_1;
3710 const strElemType *ch_1_pos;
3711 memSizeType delta;
3712 memSizeType charDelta[CHAR_DELTA_BEYOND + 1];
3713 memSizeType pos;
3714 const strElemType *main_mem;
3715 const strElemType *searched_mem;
3716 const strElemType *search_start;
3717 const strElemType *search_end;
3718
3719 /* strRIPos2 */
3720 main_size = mainStri->size;
3721 searched_size = searched->size;
3722 for (ch_1 = 0; ch_1 <= CHAR_DELTA_BEYOND; ch_1++) {
3723 charDelta[ch_1] = searched_size;
3724 } /* for */
3725 searched_mem = searched->mem;
3726 for (pos = searched_size - 1; pos > 0; pos--) {
3727 ch_1 = searched_mem[pos];
3728 if (ch_1 < CHAR_DELTA_BEYOND) {
3729 charDelta[ch_1] = pos;
3730 } else {
3731 charDelta[CHAR_DELTA_BEYOND] = pos;
3732 } /* if */
3733 } /* for */
3734 ch_1 = searched_mem[0];
3735 ch_1_pos = memchr_strelem(&searched_mem[1], ch_1, searched_size - 1);
3736 if (ch_1_pos == NULL) {
3737 delta = searched_size;
3738 } else {
3739 delta = (memSizeType) (ch_1_pos - &searched_mem[0]);
3740 } /* if */
3741 main_mem = mainStri->mem;
3742 if ((uintType) fromIndex - 1 <= main_size - searched_size) {
3743 search_start = &main_mem[fromIndex - 1];
3744 } else {
3745 search_start = &main_mem[main_size - searched_size];
3746 } /* if */
3747 search_end = &main_mem[-1];
3748 while (search_start > search_end) {
3749 search_start = rsearch_strelem2(search_start, ch_1, search_end, charDelta);
3750 if (search_start == NULL) {
3751 return 0;
3752 } else {
3753 if (memcmp(search_start + 1, &searched_mem[1],
3754 (searched_size - 1) * sizeof(strElemType)) == 0) {
3755 return ((intType) (search_start - main_mem)) + 1;
3756 } else {
3757 search_start -= delta;
3758 } /* if */
3759 } /* if */
3760 } /* while */
3761 return 0;
3762 } /* strRIPos2 */
3763
3764
3765
3766 /**
3767 * Search string 'searched' in 'mainStri' at or before 'fromIndex'.
3768 * The search starts at 'fromIndex' and proceeds to the left.
3769 * The first character in a string has the position 1.
3770 * This function calls strRIPos2 if 'mainStri' is long.
3771 * @return the position of 'searched' or 0 if 'mainStri'
3772 * does not contain 'searched' at or before 'fromIndex'.
3773 * @exception RANGE_ERROR 'fromIndex' > length(stri) holds.
3774 */
3775 intType strRIPos (const const_striType mainStri, const const_striType searched,
3776 const intType fromIndex)
3777
3778 {
3779 memSizeType main_size;
3780 memSizeType searched_size;
3781 strElemType ch_1;
3782 const strElemType *searched_mem;
3783 const strElemType *main_mem;
3784 const strElemType *search_start;
3785 const strElemType *search_end;
3786
3787 /* strRIPos */
3788 logFunction(printf("strRIPos(\"%s\", ",
3789 striAsUnquotedCStri(mainStri));
3790 printf("\"%s\", " FMT_D ")\n",
3791 striAsUnquotedCStri(searched), fromIndex););
3792 if (likely(fromIndex >= 1)) {
3793 if (unlikely((uintType) fromIndex > mainStri->size)) {
3794 logError(printf("strRIPos(\"%s\", ",
3795 striAsUnquotedCStri(mainStri));
3796 printf("\"%s\", " FMT_D "): fromIndex <= 0.\n",
3797 striAsUnquotedCStri(searched), fromIndex););
3798 raise_error(RANGE_ERROR);
3799 } else {
3800 main_size = mainStri->size;
3801 searched_size = searched->size;
3802 if (searched_size != 0 && main_size >= searched_size) {
3803 if (searched_size >= BOYER_MOORE_SEARCHED_STRI_THRESHOLD &&
3804 main_size >= BOYER_MOORE_MAIN_STRI_THRESHOLD) {
3805 return strRIPos2(mainStri, searched, fromIndex);
3806 } else {
3807 searched_mem = searched->mem;
3808 ch_1 = searched_mem[0];
3809 main_mem = mainStri->mem;
3810 if ((uintType) fromIndex - 1 <= main_size - searched_size) {
3811 search_start = &main_mem[fromIndex - 1];
3812 } else {
3813 search_start = &main_mem[main_size - searched_size];
3814 } /* if */
3815 search_end = &main_mem[-1];
3816 while ((search_start = rsearch_strelem(search_start,
3817 ch_1, (memSizeType) (search_start - search_end))) != NULL) {
3818 if (memcmp(search_start, searched_mem,
3819 searched_size * sizeof(strElemType)) == 0) {
3820 return ((intType) (search_start - main_mem)) + 1;
3821 } else {
3822 search_start--;
3823 } /* if */
3824 } /* if */
3825 } /* if */
3826 } /* if */
3827 } /* if */
3828 } /* if */
3829 return 0;
3830 } /* strRIPos */
3831
3832
3833
3834 /**
3835 * Pad a string with spaces at the right side up to padSize.
3836 * @return the string right padded with spaces.
3837 */
3838 striType strRpad (const const_striType stri, const intType padSize)
3839
3840 {
3841 memSizeType striSize;
3842 striType result;
3843
3844 /* strRpad */
3845 striSize = stri->size;
3846 if (padSize > 0 && (uintType) padSize > striSize) {
3847 if (unlikely((uintType) padSize > MAX_STRI_LEN ||
3848 !ALLOC_STRI_SIZE_OK(result, (memSizeType) padSize))) {
3849 raise_error(MEMORY_ERROR);
3850 result = NULL;
3851 } else {
3852 result->size = (memSizeType) padSize;
3853 memcpy(result->mem, stri->mem, striSize * sizeof(strElemType));
3854 {
3855 strElemType *elem = &result->mem[striSize];
3856 memSizeType len = (memSizeType) padSize - striSize;
3857
3858 while (len--) {
3859 *elem++ = (strElemType) ' ';
3860 } /* while */
3861 }
3862 } /* if */
3863 } else {
3864 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
3865 raise_error(MEMORY_ERROR);
3866 return NULL;
3867 } /* if */
3868 result->size = striSize;
3869 memcpy(result->mem, stri->mem, striSize * sizeof(strElemType));
3870 } /* if */
3871 return result;
3872 } /* strRpad */
3873
3874
3875
3876 /**
3877 * Determine rightmost position of string 'searched' in 'mainStri'.
3878 * If the string is found the position of its first character
3879 * is the result. The first character in a string has the position 1.
3880 * This function uses a modified Boyer–Moore string search algorithm.
3881 * @return the position of 'searched' or 0 if 'mainStri'
3882 * does not contain 'searched'.
3883 */
3884 static intType strRPos2 (const const_striType mainStri, const const_striType searched)
3885
3886 {
3887 memSizeType main_size;
3888 memSizeType searched_size;
3889 strElemType ch_1;
3890 const strElemType *ch_1_pos;
3891 memSizeType delta;
3892 memSizeType charDelta[CHAR_DELTA_BEYOND + 1];
3893 memSizeType pos;
3894 const strElemType *main_mem;
3895 const strElemType *searched_mem;
3896 const strElemType *search_start;
3897 const strElemType *search_end;
3898
3899 /* strRPos2 */
3900 main_size = mainStri->size;
3901 searched_size = searched->size;
3902 for (ch_1 = 0; ch_1 <= CHAR_DELTA_BEYOND; ch_1++) {
3903 charDelta[ch_1] = searched_size;
3904 } /* for */
3905 searched_mem = searched->mem;
3906 for (pos = searched_size - 1; pos > 0; pos--) {
3907 ch_1 = searched_mem[pos];
3908 if (ch_1 < CHAR_DELTA_BEYOND) {
3909 charDelta[ch_1] = pos;
3910 } else {
3911 charDelta[CHAR_DELTA_BEYOND] = pos;
3912 } /* if */
3913 } /* for */
3914 ch_1 = searched_mem[0];
3915 ch_1_pos = memchr_strelem(&searched_mem[1], ch_1, searched_size - 1);
3916 if (ch_1_pos == NULL) {
3917 delta = searched_size;
3918 } else {
3919 delta = (memSizeType) (ch_1_pos - &searched_mem[0]);
3920 } /* if */
3921 main_mem = mainStri->mem;
3922 search_start = &main_mem[main_size - searched_size];
3923 search_end = &main_mem[-1];
3924 while (search_start > search_end) {
3925 search_start = rsearch_strelem2(search_start, ch_1, search_end, charDelta);
3926 if (search_start == NULL) {
3927 return 0;
3928 } else {
3929 if (memcmp(search_start + 1, &searched_mem[1],
3930 (searched_size - 1) * sizeof(strElemType)) == 0) {
3931 return ((intType) (search_start - main_mem)) + 1;
3932 } else {
3933 search_start -= delta;
3934 } /* if */
3935 } /* if */
3936 } /* while */
3937 return 0;
3938 } /* strRPos2 */
3939
3940
3941
3942 /**
3943 * Determine rightmost position of string 'searched' in 'mainStri'.
3944 * If the string is found the position of its first character
3945 * is the result. The first character in a string has the position 1.
3946 * This function calls strRPos2 if 'mainStri' is long.
3947 * @return the position of 'searched' or 0 if 'mainStri'
3948 * does not contain 'searched'.
3949 */
3950 intType strRPos (const const_striType mainStri, const const_striType searched)
3951
3952 {
3953 memSizeType main_size;
3954 memSizeType searched_size;
3955 strElemType ch_1;
3956 const strElemType *main_mem;
3957 const strElemType *searched_mem;
3958 const strElemType *search_start;
3959 const strElemType *search_end;
3960
3961 /* strRPos */
3962 logFunction(printf("strRPos(\"%s\", ",
3963 striAsUnquotedCStri(mainStri));
3964 printf("\"%s\")\n",
3965 striAsUnquotedCStri(searched)););
3966 main_size = mainStri->size;
3967 searched_size = searched->size;
3968 if (searched_size != 0 && searched_size <= main_size) {
3969 if (searched_size >= BOYER_MOORE_SEARCHED_STRI_THRESHOLD &&
3970 main_size >= BOYER_MOORE_MAIN_STRI_THRESHOLD) {
3971 return strRPos2(mainStri, searched);
3972 } else {
3973 searched_mem = searched->mem;
3974 ch_1 = searched_mem[0];
3975 main_mem = mainStri->mem;
3976 search_start = &main_mem[main_size - searched_size];
3977 search_end = &main_mem[-1];
3978 while ((search_start = rsearch_strelem(search_start,
3979 ch_1, (memSizeType) (search_start - search_end))) != NULL) {
3980 if (memcmp(search_start, searched_mem,
3981 searched_size * sizeof(strElemType)) == 0) {
3982 return ((intType) (search_start - main_mem)) + 1;
3983 } else {
3984 search_start--;
3985 } /* if */
3986 } /* if */
3987 } /* if */
3988 } /* if */
3989 return 0;
3990 } /* strRPos */
3991
3992
3993
3994 /**
3995 * Return string with trailing whitespace omitted.
3996 * All characters less than or equal to ' ' (space) count as whitespace.
3997 * @return string with trailing whitespace omitted.
3998 */
3999 striType strRtrim (const const_striType stri)
4000
4001 {
4002 memSizeType striSize;
4003 striType result;
4004
4005 /* strRtrim */
4006 striSize = stri->size;
4007 while (striSize > 0 && stri->mem[striSize - 1] <= ' ') {
4008 striSize--;
4009 } /* while */
4010 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
4011 raise_error(MEMORY_ERROR);
4012 return NULL;
4013 } else {
4014 result->size = striSize;
4015 memcpy(result->mem, stri->mem, striSize * sizeof(strElemType));
4016 return result;
4017 } /* if */
4018 } /* strRtrim */
4019
4020
4021
4022 /**
4023 * Split 'mainStri' around matches of 'delimiter' into an array of strings.
4024 * The array returned by strSplit() contains each substring of
4025 * 'mainStri' that is terminated by another substring that is equal
4026 * to the 'delimiter' or is terminated by the end of 'mainStri'.
4027 * The substrings in the array are in the order in which they occur in
4028 * 'mainStri'. If 'delimiter' does not match any part of 'mainStri'
4029 * then the resulting array has just one element, namely 'mainStri'.
4030 * split("", ":") returns []("")
4031 * split("x", ":") returns []("x")
4032 * split(":", ":") returns []("", "")
4033 * split("x:", ":") returns []("x", "")
4034 * split(":x", ":") returns []("", "x")
4035 * split("15:30", ":") returns []("15", "30")
4036 * @return the array of strings computed by splitting 'mainStri' around
4037 * matches of the given 'delimiter'.
4038 * @exception MEMORY_ERROR Not enough memory to represent the result.
4039 */
4040 rtlArrayType strSplit (const const_striType mainStri,
4041 const const_striType delimiter)
4042
4043 {
4044 memSizeType delimiter_size;
4045 const strElemType *delimiter_mem;
4046 strElemType ch_1;
4047 intType used_max_position;
4048 const strElemType *search_start;
4049 const strElemType *segment_start;
4050 const strElemType *search_end;
4051 const strElemType *found_pos;
4052 rtlArrayType result_array;
4053
4054 /* strSplit */
4055 logFunction(printf("strSplit(\"%s\", ",
4056 striAsUnquotedCStri(mainStri));
4057 printf("\"%s\")\n", striAsUnquotedCStri(delimiter)););
4058 if (likely(ALLOC_RTL_ARRAY(result_array, INITIAL_ARRAY_SIZE))) {
4059 result_array->min_position = 1;
4060 result_array->max_position = INITIAL_ARRAY_SIZE;
4061 used_max_position = 0;
4062 delimiter_size = delimiter->size;
4063 delimiter_mem = delimiter->mem;
4064 search_start = mainStri->mem;
4065 segment_start = search_start;
4066 if (delimiter_size != 0 && mainStri->size >= delimiter_size) {
4067 ch_1 = delimiter_mem[0];
4068 search_end = &mainStri->mem[mainStri->size - delimiter_size + 1];
4069 while ((found_pos = memchr_strelem(search_start, ch_1,
4070 (memSizeType) (search_end - search_start))) != NULL &&
4071 result_array != NULL) {
4072 if (memcmp(found_pos, delimiter_mem,
4073 delimiter_size * sizeof(strElemType)) == 0) {
4074 result_array = addCopiedStriToRtlArray(segment_start,
4075 (memSizeType) (found_pos - segment_start), result_array,
4076 used_max_position);
4077 used_max_position++;
4078 search_start = found_pos + delimiter_size;
4079 segment_start = search_start;
4080 if (search_start > search_end) {
4081 search_start = search_end;
4082 } /* if */
4083 } else {
4084 search_start = found_pos + 1;
4085 } /* if */
4086 } /* while */
4087 } /* if */
4088 if (likely(result_array != NULL)) {
4089 result_array = addCopiedStriToRtlArray(segment_start,
4090 (memSizeType) (&mainStri->mem[mainStri->size] - segment_start), result_array,
4091 used_max_position);
4092 used_max_position++;
4093 result_array = completeRtlStriArray(result_array, used_max_position);
4094 } /* if */
4095 } /* if */
4096 if (unlikely(result_array == NULL)) {
4097 raise_error(MEMORY_ERROR);
4098 } /* if */
4099 logFunction(printf("strSplit -->\n"););
4100 return result_array;
4101 } /* strSplit */
4102
4103
4104
4105 #if ALLOW_STRITYPE_SLICES
4106 /**
4107 * Get a substring from a start position with a given length.
4108 * The first character in a string has the position 1.
4109 * This function is used by the compiler to avoid copying string data.
4110 * The 'slice' is initialized to refer to the substring of 'stri'
4111 * @exception INDEX_ERROR The start position is negative or zero, or
4112 * the length is negative.
4113 */
4114 void strSubstrSlice (const const_striType stri, intType start, intType length, striType slice)
4115
4116 {
4117 memSizeType striSize;
4118
4119 /* strSubstrSlice */
4120 logFunction(printf("strSubstrSlice(\"%s\", " FMT_D ", " FMT_D ")",
4121 striAsUnquotedCStri(stri), start, length);
4122 fflush(stdout););
4123 if (unlikely(start < 1 || length < 0)) {
4124 logError(printf("strSubstrSlice: Start negative or zero or length negative."););
4125 raise_error(INDEX_ERROR);
4126 } else {
4127 striSize = stri->size;
4128 SET_SLICE_CAPACITY(slice, 0);
4129 if (length != 0 && (uintType) start <= striSize) {
4130 slice->mem = &stri->mem[start - 1];
4131 if ((uintType) length > striSize - (memSizeType) start + 1) {
4132 slice->size = striSize - (memSizeType) start + 1;
4133 } else {
4134 slice->size = (memSizeType) length;
4135 } /* if */
4136 } else {
4137 slice->mem = NULL;
4138 slice->size = 0;
4139 } /* if */
4140 } /* if */
4141 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(slice)););
4142 } /* strSubstrSlice */
4143
4144 #endif
4145
4146
4147
4148 /**
4149 * Get a substring from a start position with a given length.
4150 * The first character in a string has the position 1.
4151 * @return the substring from the start position with a given length.
4152 * @exception INDEX_ERROR The start position is negative or zero, or
4153 * the length is negative.
4154 * @exception MEMORY_ERROR Not enough memory to represent the result.
4155 */
4156 striType strSubstr (const const_striType stri, intType start, intType length)
4157
4158 {
4159 memSizeType striSize;
4160 memSizeType result_size;
4161 striType result;
4162
4163 /* strSubstr */
4164 if (unlikely(start < 1 || length < 0)) {
4165 logError(printf("strSubstr: Start negative or zero or length negative."););
4166 raise_error(INDEX_ERROR);
4167 result = NULL;
4168 } else {
4169 striSize = stri->size;
4170 if (length != 0 && (uintType) start <= striSize) {
4171 if ((uintType) length > striSize - (memSizeType) start + 1) {
4172 result_size = striSize - (memSizeType) start + 1;
4173 } else {
4174 result_size = (memSizeType) length;
4175 } /* if */
4176 if (unlikely(!ALLOC_STRI_SIZE_OK(result, result_size))) {
4177 raise_error(MEMORY_ERROR);
4178 return NULL;
4179 } /* if */
4180 memcpy(result->mem, &stri->mem[start - 1],
4181 result_size * sizeof(strElemType));
4182 result->size = result_size;
4183 } else {
4184 if (unlikely(!ALLOC_STRI_SIZE_OK(result, (memSizeType) 0))) {
4185 raise_error(MEMORY_ERROR);
4186 } else {
4187 result->size = 0;
4188 } /* if */
4189 } /* if */
4190 } /* if */
4191 return result;
4192 } /* strSubstr */
4193
4194
4195
4196 #if ALLOW_STRITYPE_SLICES
4197 /**
4198 * Get a substring beginning at a start position.
4199 * The first character in a 'string' has the position 1.
4200 * This function is used by the compiler to avoid copying string data.
4201 * The 'slice' is initialized to refer to the tail of 'stri'
4202 * @exception INDEX_ERROR The start position is negative or zero.
4203 */
4204 void strTailSlice (const const_striType stri, intType start, striType slice)
4205
4206 {
4207 memSizeType striSize;
4208
4209 /* strTailSlice */
4210 logFunction(printf("strTailSlice(\"%s\", " FMT_D ")",
4211 striAsUnquotedCStri(stri), start);
4212 fflush(stdout););
4213 striSize = stri->size;
4214 if (unlikely(start < 1)) {
4215 logError(printf("strTailSlice: Start negative or zero."););
4216 raise_error(INDEX_ERROR);
4217 } else if ((uintType) start <= striSize && striSize >= 1) {
4218 SET_SLICE_CAPACITY(slice, 0);
4219 slice->mem = &stri->mem[start - 1];
4220 slice->size = striSize - (memSizeType) start + 1;
4221 } else {
4222 SET_SLICE_CAPACITY(slice, 0);
4223 slice->mem = NULL;
4224 slice->size = 0;
4225 } /* if */
4226 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(slice)););
4227 } /* strTailSlice */
4228
4229 #endif
4230
4231
4232
4233 /**
4234 * Get a substring beginning at a start position.
4235 * The first character in a 'string' has the position 1.
4236 * @return the substring beginning at the start position.
4237 * @exception INDEX_ERROR The start position is negative or zero.
4238 * @exception MEMORY_ERROR Not enough memory to represent the result.
4239 */
4240 striType strTail (const const_striType stri, intType start)
4241
4242 {
4243 memSizeType striSize;
4244 memSizeType tailSize;
4245 striType tail;
4246
4247 /* strTail */
4248 striSize = stri->size;
4249 if (unlikely(start < 1)) {
4250 logError(printf("strTail: Start negative or zero."););
4251 raise_error(INDEX_ERROR);
4252 tail = NULL;
4253 } else if ((uintType) start <= striSize && striSize >= 1) {
4254 tailSize = striSize - (memSizeType) start + 1;
4255 if (unlikely(!ALLOC_STRI_SIZE_OK(tail, tailSize))) {
4256 raise_error(MEMORY_ERROR);
4257 return NULL;
4258 } /* if */
4259 /* Reversing the order of the following two statements */
4260 /* causes an "Internal Compiler Error" with MSC 6.0 */
4261 /* if using the -Ozacegilt optimisation option in the */
4262 /* large memory model (-AL). Note that the order of the */
4263 /* two statements make no difference to the logic of the */
4264 /* program. */
4265 memcpy(tail->mem, &stri->mem[start - 1],
4266 tailSize * sizeof(strElemType));
4267 tail->size = tailSize;
4268 } else {
4269 if (unlikely(!ALLOC_STRI_SIZE_OK(tail, (memSizeType) 0))) {
4270 raise_error(MEMORY_ERROR);
4271 } else {
4272 tail->size = 0;
4273 } /* if */
4274 } /* if */
4275 return tail;
4276 } /* strTail */
4277
4278
4279
4280 /**
4281 * Get a substring beginning at a start position.
4282 * The first character in a 'string' has the position 1.
4283 * StrTailTemp is used by the compiler if 'stri' is temporary
4284 * value that can be reused.
4285 * @return the substring beginning at the start position.
4286 * @exception INDEX_ERROR The start position is negative or zero.
4287 * @exception MEMORY_ERROR Not enough memory to represent the result.
4288 */
4289 striType strTailTemp (const striType stri, intType start)
4290
4291 {
4292 memSizeType striSize;
4293 memSizeType tailSize;
4294 striType tail;
4295
4296 /* strTailTemp */
4297 if (start <= 1) {
4298 if (unlikely(start < 1)) {
4299 logError(printf("strTailTemp: Start negative or zero."););
4300 FREE_STRI(stri, stri->size);
4301 raise_error(INDEX_ERROR);
4302 tail = NULL;
4303 } else {
4304 tail = stri;
4305 } /* if */
4306 } else {
4307 striSize = stri->size;
4308 if ((uintType) start <= striSize && striSize >= 1) {
4309 tailSize = striSize - (memSizeType) start + 1;
4310 memmove(stri->mem, &stri->mem[start - 1],
4311 tailSize * sizeof(strElemType));
4312 } else {
4313 tailSize = 0;
4314 } /* if */
4315 stri->size = tailSize;
4316 #if WITH_STRI_CAPACITY
4317 if (!SHRINK_REASON(stri, tailSize)) {
4318 COUNT_GROW2_STRI(striSize, tailSize);
4319 tail = stri;
4320 } else {
4321 tail = shrinkStri(stri, tailSize);
4322 if (unlikely(tail == NULL)) {
4323 /* Theoretical shrinking a memory area should never fail. */
4324 /* For the strange case that it fails we keep stri intact */
4325 /* with the oversized capacity. */
4326 tail = stri;
4327 } else {
4328 COUNT_SHRINK_STRI(striSize, tailSize);
4329 } /* if */
4330 } /* if */
4331 #else
4332 SHRINK_STRI(tail, stri, striSize, tailSize);
4333 if (unlikely(tail == NULL)) {
4334 /* Theoretical shrinking a memory area should never fail. */
4335 /* For the strange case that it fails we keep stri intact */
4336 /* with the oversized memory usage. */
4337 tail = stri;
4338 } else {
4339 COUNT_SHRINK_STRI(striSize, tailSize);
4340 } /* if */
4341 #endif
4342 } /* if */
4343 return tail;
4344 } /* strTailTemp */
4345
4346
4347
4348 /**
4349 * Convert a string to an UTF-8 encoded string of bytes.
4350 * This function accepts unpaired surrogate halves.
4351 * strToUtf8("\16#dc00;") returns "\16#ed;\16#b0;\16#80;" (surrogate halve)
4352 * @param stri Normal (UTF-32) string to be converted to UTF-8.
4353 * @return 'stri' converted to a string of bytes with UTF-8 encoding.
4354 */
4355 striType strToUtf8 (const const_striType stri)
4356
4357 {
4358 register strElemType *dest;
4359 register strElemType ch;
4360 register memSizeType pos;
4361 memSizeType result_size;
4362 striType resized_result;
4363 striType result;
4364
4365 /* strToUtf8 */
4366 if (unlikely(stri->size > MAX_STRI_LEN / MAX_UTF8_EXPANSION_FACTOR ||
4367 !ALLOC_STRI_SIZE_OK(result, max_utf8_size(stri->size)))) {
4368 raise_error(MEMORY_ERROR);
4369 result = NULL;
4370 } else {
4371 dest = result->mem;
4372 for (pos = 0; pos < stri->size; pos++) {
4373 ch = stri->mem[pos];
4374 if (ch <= 0x7F) {
4375 *dest++ = ch;
4376 } else if (ch <= 0x7FF) {
4377 dest[0] = 0xC0 | ( ch >> 6);
4378 dest[1] = 0x80 | ( ch & 0x3F);
4379 dest += 2;
4380 } else if (ch <= 0xFFFF) {
4381 dest[0] = 0xE0 | ( ch >> 12);
4382 dest[1] = 0x80 | ((ch >> 6) & 0x3F);
4383 dest[2] = 0x80 | ( ch & 0x3F);
4384 dest += 3;
4385 } else if (ch <= 0x1FFFFF) {
4386 dest[0] = 0xF0 | ( ch >> 18);
4387 dest[1] = 0x80 | ((ch >> 12) & 0x3F);
4388 dest[2] = 0x80 | ((ch >> 6) & 0x3F);
4389 dest[3] = 0x80 | ( ch & 0x3F);
4390 dest += 4;
4391 } else if (ch <= 0x3FFFFFF) {
4392 dest[0] = 0xF8 | ( ch >> 24);
4393 dest[1] = 0x80 | ((ch >> 18) & 0x3F);
4394 dest[2] = 0x80 | ((ch >> 12) & 0x3F);
4395 dest[3] = 0x80 | ((ch >> 6) & 0x3F);
4396 dest[4] = 0x80 | ( ch & 0x3F);
4397 dest += 5;
4398 } else {
4399 dest[0] = 0xFC | ( ch >> 30);
4400 dest[1] = 0x80 | ((ch >> 24) & 0x3F);
4401 dest[2] = 0x80 | ((ch >> 18) & 0x3F);
4402 dest[3] = 0x80 | ((ch >> 12) & 0x3F);
4403 dest[4] = 0x80 | ((ch >> 6) & 0x3F);
4404 dest[5] = 0x80 | ( ch & 0x3F);
4405 dest += 6;
4406 } /* if */
4407 } /* for */
4408 result_size = (memSizeType) (dest - result->mem);
4409 REALLOC_STRI_SIZE_SMALLER(resized_result, result, max_utf8_size(stri->size), result_size);
4410 if (unlikely(resized_result == NULL)) {
4411 FREE_STRI(result, max_utf8_size(stri->size));
4412 raise_error(MEMORY_ERROR);
4413 result = NULL;
4414 } else {
4415 result = resized_result;
4416 COUNT3_STRI(max_utf8_size(stri->size), result_size);
4417 result->size = result_size;
4418 } /* if */
4419 } /* if */
4420 return result;
4421 } /* strToUtf8 */
4422
4423
4424
4425 /**
4426 * Return string with leading and trailing whitespace omitted.
4427 * All characters less than or equal to ' ' (space) count as whitespace.
4428 * @return string with leading and trailing whitespace omitted.
4429 */
4430 striType strTrim (const const_striType stri)
4431
4432 {
4433 memSizeType start = 0;
4434 memSizeType striSize;
4435 striType result;
4436
4437 /* strTrim */
4438 striSize = stri->size;
4439 if (striSize >= 1) {
4440 while (start < striSize && stri->mem[start] <= ' ') {
4441 start++;
4442 } /* while */
4443 while (striSize > start && stri->mem[striSize - 1] <= ' ') {
4444 striSize--;
4445 } /* while */
4446 striSize -= start;
4447 } /* if */
4448 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
4449 raise_error(MEMORY_ERROR);
4450 return NULL;
4451 } else {
4452 result->size = striSize;
4453 memcpy(result->mem, &stri->mem[start], striSize * sizeof(strElemType));
4454 return result;
4455 } /* if */
4456 } /* strTrim */
4457
4458
4459
4460 /**
4461 * Convert a string to upper case.
4462 * The conversion uses the default Unicode case mapping,
4463 * where each character is considered in isolation.
4464 * Characters without case mapping are left unchanged.
4465 * The mapping is independent from the locale. Individual
4466 * character case mappings cannot be reversed, because some
4467 * characters have multiple characters that map to them.
4468 * @return the string converted to upper case.
4469 */
4470 striType strUp (const const_striType stri)
4471
4472 {
4473 memSizeType striSize;
4474 striType result;
4475
4476 /* strUp */
4477 logFunction(printf("strUp(\"%s\")",
4478 striAsUnquotedCStri(stri));
4479 fflush(stdout););
4480 striSize = stri->size;
4481 if (unlikely(!ALLOC_STRI_SIZE_OK(result, striSize))) {
4482 raise_error(MEMORY_ERROR);
4483 } else {
4484 result->size = striSize;
4485 toUpper(stri->mem, stri->size, result->mem);
4486 } /* if */
4487 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(result)););
4488 return result;
4489 } /* strUp */
4490
4491
4492
4493 /**
4494 * Convert a string to upper case.
4495 * StrUpTemp is used by the compiler if 'stri' is temporary
4496 * value that can be reused.
4497 * @return the string converted to lower case.
4498 */
4499 striType strUpTemp (const striType stri)
4500
4501 { /* strUpTemp */
4502 logFunction(printf("strUpTemp(\"%s\")",
4503 striAsUnquotedCStri(stri));
4504 fflush(stdout););
4505 toUpper(stri->mem, stri->size, stri->mem);
4506 logFunctionResult(printf("\"%s\"\n", striAsUnquotedCStri(stri)););
4507 return stri;
4508 } /* strUpTemp */
4509
4510
4511
4512 /**
4513 * Convert a string with bytes in UTF-8 encoding to UTF-32.
4514 * This function accepts overlong encodings and unpaired surrogate halves.
4515 * strUtf8ToStri("\16#c0;\16#80;") returns "\0;" (overlong encoding)
4516 * strUtf8ToStri("\16#ed;\16#b0;\16#80;") returns "\16#dc00;" (surrogate halve)
4517 * @param utf8 String of bytes encoded with UTF-8.
4518 * @return 'utf8' converted to a normal (UTF-32) string.
4519 * @exception RANGE_ERROR If characters beyond '\255;' are present or
4520 * if 'utf8' is not encoded with UTF-8.
4521 */
4522 striType strUtf8ToStri (const const_striType utf8)
4523
4524 {
4525 memSizeType utf8Size;
4526 memSizeType pos;
4527 const strElemType *utf8ptr;
4528 boolType okay = TRUE;
4529 striType resized_result;
4530 striType result;
4531
4532 /* strUtf8ToStri */
4533 logFunction(printf("strUtf8ToStri(\"%s\")\n",
4534 striAsUnquotedCStri(utf8)););
4535 utf8Size = utf8->size;
4536 if (unlikely(!ALLOC_STRI_SIZE_OK(result, utf8Size))) {
4537 raise_error(MEMORY_ERROR);
4538 } else {
4539 utf8ptr = &utf8->mem[0];
4540 pos = 0;
4541 for (; utf8Size > 0; pos++, utf8Size--) {
4542 if (*utf8ptr <= 0x7F) {
4543 result->mem[pos] = *utf8ptr++;
4544 } else if (utf8ptr[0] >= 0xC0 && utf8ptr[0] <= 0xDF && utf8Size >= 2 &&
4545 utf8ptr[1] >= 0x80 && utf8ptr[1] <= 0xBF) {
4546 /* utf8ptr[0] range 192 to 223 (leading bits 110.....) */
4547 /* utf8ptr[1] range 128 to 191 (leading bits 10......) */
4548 result->mem[pos] = (utf8ptr[0] & 0x1F) << 6 |
4549 (utf8ptr[1] & 0x3F);
4550 utf8ptr += 2;
4551 utf8Size--;
4552 } else if (utf8ptr[0] >= 0xE0 && utf8ptr[0] <= 0xEF && utf8Size >= 3 &&
4553 utf8ptr[1] >= 0x80 && utf8ptr[1] <= 0xBF &&
4554 utf8ptr[2] >= 0x80 && utf8ptr[2] <= 0xBF) {
4555 /* utf8ptr[0] range 224 to 239 (leading bits 1110....) */
4556 /* utf8ptr[1..] range 128 to 191 (leading bits 10......) */
4557 result->mem[pos] = (utf8ptr[0] & 0x0F) << 12 |
4558 (utf8ptr[1] & 0x3F) << 6 |
4559 (utf8ptr[2] & 0x3F);
4560 utf8ptr += 3;
4561 utf8Size -= 2;
4562 } else if (utf8ptr[0] >= 0xF0 && utf8ptr[0] <= 0xF7 && utf8Size >= 4 &&
4563 utf8ptr[1] >= 0x80 && utf8ptr[1] <= 0xBF &&
4564 utf8ptr[2] >= 0x80 && utf8ptr[2] <= 0xBF &&
4565 utf8ptr[3] >= 0x80 && utf8ptr[3] <= 0xBF) {
4566 /* utf8ptr[0] range 240 to 247 (leading bits 11110...) */
4567 /* utf8ptr[1..] range 128 to 191 (leading bits 10......) */
4568 result->mem[pos] = (utf8ptr[0] & 0x07) << 18 |
4569 (utf8ptr[1] & 0x3F) << 12 |
4570 (utf8ptr[2] & 0x3F) << 6 |
4571 (utf8ptr[3] & 0x3F);
4572 utf8ptr += 4;
4573 utf8Size -= 3;
4574 } else if (utf8ptr[0] >= 0xF8 && utf8ptr[0] <= 0xFB && utf8Size >= 5 &&
4575 utf8ptr[1] >= 0x80 && utf8ptr[1] <= 0xBF &&
4576 utf8ptr[2] >= 0x80 && utf8ptr[2] <= 0xBF &&
4577 utf8ptr[3] >= 0x80 && utf8ptr[3] <= 0xBF &&
4578 utf8ptr[4] >= 0x80 && utf8ptr[4] <= 0xBF) {
4579 /* utf8ptr[0] range 248 to 251 (leading bits 111110..) */
4580 /* utf8ptr[1..] range 128 to 191 (leading bits 10......) */
4581 result->mem[pos] = (utf8ptr[0] & 0x03) << 24 |
4582 (utf8ptr[1] & 0x3F) << 18 |
4583 (utf8ptr[2] & 0x3F) << 12 |
4584 (utf8ptr[3] & 0x3F) << 6 |
4585 (utf8ptr[4] & 0x3F);
4586 utf8ptr += 5;
4587 utf8Size -= 4;
4588 } else if (utf8ptr[0] >= 0xFC && utf8ptr[0] <= 0xFF && utf8Size >= 6 &&
4589 utf8ptr[1] >= 0x80 && utf8ptr[1] <= 0xBF &&
4590 utf8ptr[2] >= 0x80 && utf8ptr[2] <= 0xBF &&
4591 utf8ptr[3] >= 0x80 && utf8ptr[3] <= 0xBF &&
4592 utf8ptr[4] >= 0x80 && utf8ptr[4] <= 0xBF &&
4593 utf8ptr[5] >= 0x80 && utf8ptr[5] <= 0xBF) {
4594 /* utf8ptr[0] range 252 to 255 (leading bits 111111..) */
4595 /* utf8ptr[1..] range 128 to 191 (leading bits 10......) */
4596 result->mem[pos] = (utf8ptr[0] & 0x03) << 30 |
4597 (utf8ptr[1] & 0x3F) << 24 |
4598 (utf8ptr[2] & 0x3F) << 18 |
4599 (utf8ptr[3] & 0x3F) << 12 |
4600 (utf8ptr[4] & 0x3F) << 6 |
4601 (utf8ptr[5] & 0x3F);
4602 utf8ptr += 6;
4603 utf8Size -= 5;
4604 } else {
4605 /* utf8ptr[0] not in range 0xC0 to 0xFF (192 to 255) */
4606 /* or not enough continuation bytes found. */
4607 logError(printf("strUtf8ToStri: "
4608 "Invalid byte sequence starting at position "
4609 FMT_U_MEM ": \"\\" FMT_U32 ";\\ ...\".\n",
4610 (memSizeType) (utf8ptr - &utf8->mem[0]),
4611 utf8ptr[0]););
4612 okay = FALSE;
4613 utf8Size = 1;
4614 } /* if */
4615 } /* for */
4616 if (likely(okay)) {
4617 result->size = pos;
4618 if (pos != utf8->size) {
4619 REALLOC_STRI_SIZE_SMALLER(resized_result, result, utf8->size, pos);
4620 if (unlikely(resized_result == NULL)) {
4621 FREE_STRI(result, utf8->size);
4622 raise_error(MEMORY_ERROR);
4623 result = NULL;
4624 } else {
4625 result = resized_result;
4626 COUNT3_STRI(utf8->size, pos);
4627 } /* if */
4628 } /* if */
4629 } else {
4630 FREE_STRI(result, utf8->size);
4631 raise_error(RANGE_ERROR);
4632 result = NULL;
4633 } /* if */
4634 } /* if */
4635 logFunction(printf("strUtf8ToStri --> \"%s\"\n",
4636 striAsUnquotedCStri(result)););
4637 return result;
4638 } /* strUtf8ToStri */
4639
4640
4641
4642 /**
4643 * String multiplication of the character '\0'.
4644 * The character '\0' is concatenated to itself such that in total
4645 * 'factor' characters are concatenated.
4646 * @return the result of the string multiplication.
4647 * @exception RANGE_ERROR If the factor is negative.
4648 */
4649 striType strZero (const intType factor)
4650
4651 {
4652 striType result;
4653
4654 /* strZero */
4655 logFunction(printf("strZero(" FMT_D ")\n", factor););
4656 if (unlikely(factor < 0)) {
4657 logError(printf("strZero(" FMT_D "): Negative factor.\n",
4658 factor););
4659 raise_error(RANGE_ERROR);
4660 result = NULL;
4661 } else {
4662 if (unlikely((uintType) factor > MAX_STRI_LEN ||
4663 !ALLOC_STRI_SIZE_OK(result, (memSizeType) factor))) {
4664 raise_error(MEMORY_ERROR);
4665 result = NULL;
4666 } else {
4667 result->size = (memSizeType) factor;
4668 memset(result->mem, 0, (memSizeType) factor * sizeof(strElemType));
4669 } /* if */
4670 } /* if */
4671 return result;
4672 } /* strZero */
4673