1 /*
2 Copyright (C) 2001-2014, Parrot Foundation.
3
4 =head1 NAME
5
6 src/string/api.c - Parrot Strings
7
8 =head1 DESCRIPTION
9
10 This file implements the non-ICU parts of the Parrot string subsystem.
11
12 Note that C<bufstart> and C<buflen> are used by the memory subsystem. The
13 string functions may only use C<buflen> to determine if there is some space
14 left beyond C<bufused>. This is the I<only> valid usage of these two data
15 members, beside setting C<bufstart>/C<buflen> for external strings.
16
17 =head2 Functions
18
19 =over 4
20
21 =cut
22
23 */
24
25 #include <stdio.h>
26 #include <math.h>
27
28 #include "parrot/parrot.h"
29 #include "parrot/events.h"
30 #include "private_cstring.h"
31 #include "api.str"
32
33 /* for parrot/interpreter.h */
34 STRING *STRINGNULL;
35
36 #define nonnull_encoding_name(s) (s) ? (s)->encoding->name : "null string"
37 #define ASSERT_STRING_SANITY(s) \
38 PARROT_ASSERT((s)->encoding); \
39 PARROT_ASSERT(!PObj_on_free_list_TEST(s))
40
41 /* HEADERIZER HFILE: include/parrot/string_funcs.h */
42
43 /* HEADERIZER BEGIN: static */
44 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
45
46 PARROT_WARN_UNUSED_RESULT
47 PARROT_PURE_FUNCTION
48 static INTVAL string_max_bytes(PARROT_INTERP,
49 ARGIN(const STRING *s),
50 UINTVAL nchars)
51 __attribute__nonnull__(2);
52
53 PARROT_INLINE
54 PARROT_IGNORABLE_RESULT
55 PARROT_CAN_RETURN_NULL
56 PARROT_PURE_FUNCTION
57 static const STR_VTABLE * string_rep_compatible(
58 ARGIN(const STRING *a),
59 ARGIN(const STRING *b))
60 __attribute__nonnull__(1)
61 __attribute__nonnull__(2);
62
63 PARROT_DOES_NOT_RETURN
64 PARROT_COLD
65 static void throw_illegal_escape(PARROT_INTERP, ARGIN(const STRING *s))
66 __attribute__nonnull__(1)
67 __attribute__nonnull__(2);
68
69 PARROT_DOES_NOT_RETURN
70 PARROT_COLD
71 static void throw_illegal_escape_char(PARROT_INTERP,
72 const char c,
73 ARGIN(const STRING *s))
74 __attribute__nonnull__(1)
75 __attribute__nonnull__(3);
76
77 #define ASSERT_ARGS_string_max_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
78 PARROT_ASSERT_ARG(s))
79 #define ASSERT_ARGS_string_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
80 PARROT_ASSERT_ARG(a) \
81 , PARROT_ASSERT_ARG(b))
82 #define ASSERT_ARGS_throw_illegal_escape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
83 PARROT_ASSERT_ARG(interp) \
84 , PARROT_ASSERT_ARG(s))
85 #define ASSERT_ARGS_throw_illegal_escape_char __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
86 PARROT_ASSERT_ARG(interp) \
87 , PARROT_ASSERT_ARG(s))
88 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */
89 /* HEADERIZER END: static */
90
91 /*
92 Buffer size for hexadecimal conversions:
93 Expected at most 8 characters plus room for some prefixes and suffixes.
94 */
95 #define HEX_BUF_SIZE 16
96
97 /*
98
99 =item C<INTVAL Parrot_str_is_null(PARROT_INTERP, const STRING *s)>
100
101 Tests if the given STRING is STRINGNULL.
102
103 =cut
104
105 */
106
107 PARROT_EXPORT
108 PARROT_HOT
109 PARROT_PURE_FUNCTION
110 INTVAL
Parrot_str_is_null(SHIM_INTERP,ARGIN_NULLOK (const STRING * s))111 Parrot_str_is_null(SHIM_INTERP, ARGIN_NULLOK(const STRING *s))
112 {
113 ASSERT_ARGS(Parrot_str_is_null)
114 return STRING_IS_NULL(s);
115 }
116
117
118 /*
119
120 =back
121
122 =head2 Basic String Functions
123
124 Creation, enlargement, etc.
125
126 =over 4
127
128 =item C<void Parrot_str_init(PARROT_INTERP)>
129
130 Initializes the Parrot string subsystem.
131
132 =cut
133
134 */
135
136 PARROT_EXPORT
137 void
Parrot_str_init(PARROT_INTERP)138 Parrot_str_init(PARROT_INTERP)
139 {
140 ASSERT_ARGS(Parrot_str_init)
141 Hash *const_cstring_hash;
142 size_t i;
143 const size_t n_parrot_cstrings =
144 sizeof (parrot_cstrings) / sizeof (parrot_cstrings[0]);
145
146 if (interp->parent_interpreter)
147 interp->hash_seed = interp->parent_interpreter->hash_seed;
148
149 /* interp is initialized from zeroed memory, so this is fine */
150 else if (interp->hash_seed == 0) {
151 interp->hash_seed = Parrot_get_entropy(interp);
152 }
153
154 /* initialize the constant string table */
155 if (interp->parent_interpreter) {
156 interp->const_cstring_table =
157 interp->parent_interpreter->const_cstring_table;
158 interp->const_cstring_hash =
159 interp->parent_interpreter->const_cstring_hash;
160 return;
161 }
162
163 /* Set up the cstring cache, then load the basic encodings */
164 const_cstring_hash = Parrot_hash_create_sized(interp,
165 enum_type_PMC,
166 Hash_key_type_cstring,
167 n_parrot_cstrings);
168 interp->const_cstring_hash = const_cstring_hash;
169 Parrot_encodings_init(interp);
170
171 /* initialize STRINGNULL, but not in the constant table */
172 STRINGNULL = Parrot_str_new_init(interp, NULL, 0,
173 Parrot_null_encoding_ptr,
174 PObj_constant_FLAG);
175
176 interp->const_cstring_table =
177 mem_gc_allocate_n_zeroed_typed(interp, n_parrot_cstrings, STRING *);
178 PARROT_ASSERT(interp->const_cstring_table != NULL);
179
180 for (i = 0; i < n_parrot_cstrings; ++i) {
181 DECL_CONST_CAST;
182 STRING * const s =
183 Parrot_str_new_init(interp,
184 parrot_cstrings[i].string,
185 parrot_cstrings[i].len,
186 Parrot_default_encoding_ptr,
187 PObj_external_FLAG|PObj_constant_FLAG);
188 Parrot_hash_put(interp, const_cstring_hash,
189 PARROT_const_cast(char *, parrot_cstrings[i].string), (void *)s);
190 interp->const_cstring_table[i] = s;
191 }
192 }
193
194
195 /*
196
197 =item C<void Parrot_str_finish(PARROT_INTERP)>
198
199 De-Initializes the Parrot string subsystem.
200
201 =cut
202
203 */
204
205 PARROT_EXPORT
206 void
Parrot_str_finish(PARROT_INTERP)207 Parrot_str_finish(PARROT_INTERP)
208 {
209 ASSERT_ARGS(Parrot_str_finish)
210
211 /* all are shared between interpreters */
212 if (!interp->parent_interpreter) {
213 mem_internal_free(interp->const_cstring_table);
214 interp->const_cstring_table = NULL;
215 Parrot_deinit_encodings(interp);
216 Parrot_hash_destroy(interp, interp->const_cstring_hash);
217 }
218 }
219
220
221 /*
222
223 =item C<STRING * Parrot_str_new_noinit(PARROT_INTERP, UINTVAL capacity)>
224
225 Creates and returns an empty Parrot string.
226
227 =cut
228
229 */
230
231 PARROT_EXPORT
232 PARROT_CANNOT_RETURN_NULL
233 STRING *
Parrot_str_new_noinit(PARROT_INTERP,UINTVAL capacity)234 Parrot_str_new_noinit(PARROT_INTERP, UINTVAL capacity)
235 {
236 ASSERT_ARGS(Parrot_str_new_noinit)
237 STRING * const s = Parrot_gc_new_string_header(interp, 0);
238
239 s->encoding = Parrot_default_encoding_ptr;
240
241 Parrot_gc_allocate_string_storage(interp, s,
242 (size_t)string_max_bytes(interp, s, capacity));
243
244 return s;
245 }
246
247
248 /*
249
250 =item C<static const STR_VTABLE * string_rep_compatible(const STRING *a, const
251 STRING *b)>
252
253 Find the "lowest" possible encoding for the given string. E.g.
254
255 ascii <op> utf8 => utf8
256 => ascii, B<if> C<STRING *b> has ascii chars only.
257
258 Returns NULL, if no compatible string representation can be found.
259
260 =cut
261
262 */
263
264 PARROT_INLINE
265 PARROT_IGNORABLE_RESULT
266 PARROT_CAN_RETURN_NULL
267 PARROT_PURE_FUNCTION
268 static const STR_VTABLE *
string_rep_compatible(ARGIN (const STRING * a),ARGIN (const STRING * b))269 string_rep_compatible(ARGIN(const STRING *a), ARGIN(const STRING *b))
270 {
271 ASSERT_ARGS(string_rep_compatible)
272
273 PARROT_ASSERT(a->encoding && b->encoding);
274
275 if (a->encoding == b->encoding)
276 return a->encoding;
277
278 /* a table could possibly simplify the logic */
279
280 if (STRING_max_bytes_per_codepoint(a) == 1
281 && STRING_max_bytes_per_codepoint(b) == 1) {
282 /* Return the "largest" encoding where ascii < latin1 < binary */
283
284 if (b->encoding == Parrot_ascii_encoding_ptr)
285 return a->encoding;
286 if (a->encoding == Parrot_ascii_encoding_ptr)
287 return b->encoding;
288 if (a->encoding == Parrot_binary_encoding_ptr)
289 return a->encoding;
290 if (b->encoding == Parrot_binary_encoding_ptr)
291 return b->encoding;
292 }
293 else {
294 /* UTF-8 strings are ASCII compatible if their byte length equals
295 their codepoint length. This is a nice trick but it can cause many
296 surprises when UTF-8 strings are suddenly "downgraded" to ASCII
297 strings. */
298
299 if (a->encoding == Parrot_utf8_encoding_ptr
300 && b->encoding == Parrot_ascii_encoding_ptr) {
301 if (a->strlen == a->bufused) {
302 return b->encoding;
303 }
304 return a->encoding;
305 }
306
307 if (b->encoding == Parrot_utf8_encoding_ptr
308 && a->encoding == Parrot_ascii_encoding_ptr) {
309 if (b->strlen == b->bufused) {
310 return a->encoding;
311 }
312 return b->encoding;
313 }
314 }
315
316 return NULL;
317 }
318
319 /*
320
321 =item C<const STR_VTABLE * Parrot_str_rep_compatible(PARROT_INTERP, const STRING
322 *a, const STRING *b)>
323
324 Find the "lowest" possible encoding for the given string. E.g.
325
326 ascii <op> utf8 => utf8
327 => ascii, B<if> C<STRING *b> has ascii chars only.
328
329 Returns NULL, if no compatible string representation can be found.
330
331 =cut
332
333 */
334
335 PARROT_EXPORT
336 PARROT_IGNORABLE_RESULT
337 PARROT_CAN_RETURN_NULL
338 PARROT_PURE_FUNCTION
339 const STR_VTABLE *
Parrot_str_rep_compatible(SHIM_INTERP,ARGIN (const STRING * a),ARGIN (const STRING * b))340 Parrot_str_rep_compatible(SHIM_INTERP, ARGIN(const STRING *a), ARGIN(const STRING *b))
341 {
342 ASSERT_ARGS(Parrot_str_rep_compatible)
343 return string_rep_compatible(a, b);
344 }
345
346 /*
347
348 =item C<STRING * Parrot_str_clone(PARROT_INTERP, const STRING *s)>
349
350 Helper function to clone string.
351
352 =cut
353
354 */
355
356 PARROT_WARN_UNUSED_RESULT
357 PARROT_CANNOT_RETURN_NULL
358 STRING *
Parrot_str_clone(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))359 Parrot_str_clone(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
360 {
361 ASSERT_ARGS(Parrot_str_clone)
362 size_t alloc_size;
363 STRING *result;
364
365 if (STRING_IS_NULL(s))
366 return STRINGNULL;
367
368 result = Parrot_gc_new_string_header(interp, 0);
369 alloc_size = s->bufused;
370
371 if (alloc_size) {
372 /* Allocate new chunk of memory */
373 Parrot_gc_allocate_string_storage(interp, result, alloc_size);
374
375 /* and copy it over */
376 memcpy(result->strstart, s->strstart, alloc_size);
377 }
378
379 result->bufused = alloc_size;
380 result->strlen = s->strlen;
381 result->hashval = s->hashval;
382 result->encoding = s->encoding;
383
384 return result;
385 }
386
387
388 /*
389
390 =item C<STRING * Parrot_str_copy(PARROT_INTERP, const STRING *s)>
391
392 Creates and returns a shallow copy of the specified Parrot string.
393
394 =cut
395
396 */
397
398 PARROT_EXPORT
399 PARROT_CANNOT_RETURN_NULL
400 PARROT_WARN_UNUSED_RESULT
401 STRING *
Parrot_str_copy(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))402 Parrot_str_copy(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
403 {
404 ASSERT_ARGS(Parrot_str_copy)
405 STRING *d;
406 int is_movable;
407
408 if (STRING_IS_NULL(s))
409 return STRINGNULL;
410
411 d = Parrot_gc_new_string_header(interp,
412 PObj_get_FLAGS(s) & ~PObj_constant_FLAG);
413
414 /* This might set the constant flag again but it is the right thing
415 * to do */
416 STRUCT_COPY(d, s);
417
418 /*
419 * FIXME. It's abstraction leak here from GC.
420 * Basically if we are copying string from older generation
421 * we have to clear flags about it.
422 */
423 d->flags &= ~PObj_GC_all_generation_FLAGS;
424
425 /* Clear live flag. It might be set on constant strings */
426 PObj_live_CLEAR(d);
427
428 /* Set the string copy flag */
429 PObj_is_string_copy_SET(d);
430
431 is_movable = PObj_is_movable_TESTALL(s);
432
433 /* Now check that buffer allocated from pool and affected by compacting */
434 if (is_movable && Buffer_bufstart(s)) {
435 /* If so, mark it as shared */
436 INTVAL * const buffer_flags = Buffer_bufflagsptr(d);
437 *buffer_flags |= Buffer_shared_FLAG;
438 }
439
440 PARROT_ASSERT(is_movable == PObj_is_movable_TESTALL(d));
441
442 return d;
443 }
444
445
446 /*
447
448 =item C<STRING * Parrot_str_concat(PARROT_INTERP, const STRING *a, const STRING
449 *b)>
450
451 Concatenates two Parrot strings. If necessary, converts the second
452 string's encoding and/or type to match those of the first string. If
453 either string is C<NULL>, then a copy of the non-C<NULL> string is
454 returned. If both strings are C<NULL>, return C<STRINGNULL>.
455
456 =cut
457
458 */
459
460 PARROT_EXPORT
461 PARROT_CANNOT_RETURN_NULL
462 STRING *
Parrot_str_concat(PARROT_INTERP,ARGIN_NULLOK (const STRING * a),ARGIN_NULLOK (const STRING * b))463 Parrot_str_concat(PARROT_INTERP, ARGIN_NULLOK(const STRING *a),
464 ARGIN_NULLOK(const STRING *b))
465 {
466 ASSERT_ARGS(Parrot_str_concat)
467 const STR_VTABLE *enc;
468 STRING *dest;
469 UINTVAL total_length;
470
471 if (STRING_IS_NULL(a) && STRING_IS_NULL(b))
472 return STRINGNULL;
473
474 if (STRING_length(a) == 0) {
475 if (STRING_length(b) == 0)
476 return CONST_STRING(interp, "");
477 else
478 return Parrot_str_copy(interp, b);
479 }
480 else {
481 if (STRING_length(b) == 0)
482 return Parrot_str_copy(interp, a);
483 }
484
485 ASSERT_STRING_SANITY(a);
486 ASSERT_STRING_SANITY(b);
487
488 enc = string_rep_compatible(a, b);
489
490 if (!enc) {
491 /* upgrade strings for concatenation */
492 if (a->encoding == Parrot_ucs4_encoding_ptr
493 || b->encoding == Parrot_ucs4_encoding_ptr)
494 enc = Parrot_ucs4_encoding_ptr;
495 else if (a->encoding == Parrot_utf16_encoding_ptr
496 || b->encoding == Parrot_utf16_encoding_ptr
497 || a->encoding == Parrot_ucs2_encoding_ptr
498 || b->encoding == Parrot_ucs2_encoding_ptr)
499 enc = Parrot_utf16_encoding_ptr;
500 else
501 enc = Parrot_utf8_encoding_ptr;
502
503 a = enc->to_encoding(interp, a);
504 b = enc->to_encoding(interp, b);
505 }
506 /* calc usable and total bytes */
507 total_length = a->bufused + b->bufused;
508
509 if (PObj_is_growable_TESTALL(a)
510 && a->strstart + total_length <=
511 (char *)Buffer_bufstart(a) + Buffer_buflen(a)) {
512 /* String a is growable and there's enough space in the buffer */
513 DECL_CONST_CAST;
514
515 dest = Parrot_str_copy(interp, a);
516
517 /* Switch string copy flags */
518 PObj_is_string_copy_SET(PARROT_const_cast(STRING *, a));
519 PObj_is_string_copy_CLEAR(dest);
520
521 /* Append b */
522 memcpy(dest->strstart + dest->bufused,
523 b->strstart, b->bufused);
524
525 dest->encoding = enc;
526 dest->hashval = 0;
527 }
528 else {
529 if (4 * b->bufused < a->bufused) {
530 /* Preallocate more memory if we're appending a short string to
531 a long string */
532 total_length += total_length >> 1;
533 }
534
535 dest = Parrot_str_new_noinit(interp, total_length);
536 PARROT_ASSERT(enc);
537 dest->encoding = enc;
538
539 /* Copy A first */
540 memcpy(dest->strstart, a->strstart, a->bufused);
541
542 /* Tack B on the end of A */
543 memcpy(dest->strstart + a->bufused,
544 b->strstart, b->bufused);
545 }
546
547 dest->bufused = a->bufused + b->bufused;
548 dest->strlen = a->strlen + b->strlen;
549
550 return dest;
551 }
552
553
554 /*
555
556 =item C<STRING * Parrot_str_new(PARROT_INTERP, const char *buffer, const UINTVAL
557 len)>
558
559 Makes a Parrot string from a specified C string.
560
561 =cut
562
563 */
564
565 PARROT_EXPORT
566 PARROT_WARN_UNUSED_RESULT
567 PARROT_MALLOC
568 PARROT_CANNOT_RETURN_NULL
569 STRING *
Parrot_str_new(PARROT_INTERP,ARGIN_NULLOK (const char * buffer),const UINTVAL len)570 Parrot_str_new(PARROT_INTERP, ARGIN_NULLOK(const char *buffer), const UINTVAL len)
571 {
572 ASSERT_ARGS(Parrot_str_new)
573 /* Force an 8-bit encoding at some point? */
574 const UINTVAL buff_length = (len > 0) ? len : buffer ? strlen(buffer) : 0;
575
576 return Parrot_str_new_init(interp, buffer, buff_length,
577 Parrot_default_encoding_ptr, 0);
578 }
579
580
581 /*
582
583 =item C<STRING * Parrot_str_new_from_buffer(PARROT_INTERP, Parrot_Buffer
584 *buffer, const UINTVAL len)>
585
586 Makes a Parrot string from a Buffer.
587
588 The Buffer is nulled afterwards, as only one PObj can point at a given string
589 pool object.
590
591 =cut
592
593 */
594
595 PARROT_EXPORT
596 PARROT_WARN_UNUSED_RESULT
597 PARROT_MALLOC
598 PARROT_CANNOT_RETURN_NULL
599 STRING *
Parrot_str_new_from_buffer(PARROT_INTERP,ARGMOD (Parrot_Buffer * buffer),const UINTVAL len)600 Parrot_str_new_from_buffer(PARROT_INTERP, ARGMOD(Parrot_Buffer *buffer), const UINTVAL len)
601 {
602 ASSERT_ARGS(Parrot_str_new_from_buffer)
603
604 STRING * const result = Parrot_gc_new_string_header(interp, 0);
605 Buffer_bufstart(result) = Buffer_bufstart(buffer);
606 Buffer_buflen(result) = Buffer_buflen(buffer);
607 result->strstart = (char *)Buffer_bufstart(result);
608 result->bufused = len;
609 result->strlen = len;
610 result->encoding = Parrot_binary_encoding_ptr;
611
612 Buffer_buflen(buffer) = 0;
613 Buffer_bufstart(buffer) = NULL;
614
615 return result;
616 }
617
618
619 /*
620
621 =item C<STRING * Parrot_str_new_constant(PARROT_INTERP, const char *buffer)>
622
623 Creates and returns a constant Parrot string.
624
625 =cut
626
627 */
628
629 PARROT_EXPORT
630 PARROT_WARN_UNUSED_RESULT
631 PARROT_CANNOT_RETURN_NULL
632 STRING *
Parrot_str_new_constant(PARROT_INTERP,ARGIN (const char * buffer))633 Parrot_str_new_constant(PARROT_INTERP, ARGIN(const char *buffer))
634 {
635 ASSERT_ARGS(Parrot_str_new_constant)
636 DECL_CONST_CAST;
637 Hash * const cstring_cache = interp->const_cstring_hash;
638 STRING *s = (STRING *)Parrot_hash_get(interp,
639 cstring_cache, buffer);
640
641 if (s)
642 return s;
643
644 s = Parrot_str_new_init(interp, buffer, strlen(buffer),
645 Parrot_default_encoding_ptr,
646 PObj_external_FLAG|PObj_constant_FLAG);
647
648 Parrot_hash_put(interp, cstring_cache,
649 PARROT_const_cast(char *, buffer), (void *)s);
650
651 return s;
652 }
653
654 /*
655
656 =item C<STRING * Parrot_str_new_init(PARROT_INTERP, const char *buffer, UINTVAL
657 len, const STR_VTABLE *encoding, UINTVAL flags)>
658
659 Given a buffer, its length, an encoding, and STRING flags, creates and returns
660 a new string. If buffer is NULL and len >= 0, allocates len bytes.
661
662 =cut
663
664 */
665
666 PARROT_EXPORT
667 PARROT_WARN_UNUSED_RESULT
668 PARROT_CANNOT_RETURN_NULL
669 STRING *
Parrot_str_new_init(PARROT_INTERP,ARGIN_NULLOK (const char * buffer),UINTVAL len,ARGIN (const STR_VTABLE * encoding),UINTVAL flags)670 Parrot_str_new_init(PARROT_INTERP, ARGIN_NULLOK(const char *buffer), UINTVAL len,
671 ARGIN(const STR_VTABLE *encoding), UINTVAL flags)
672 {
673 ASSERT_ARGS(Parrot_str_new_init)
674 DECL_CONST_CAST;
675 STRING * const s = Parrot_gc_new_string_header(interp, flags);
676 s->encoding = encoding;
677
678 if (flags & PObj_external_FLAG) {
679 /*
680 * fast path for external (constant) strings - don't allocate
681 * and copy data
682 */
683 /* The following cast discards the 'const'. That raises
684 a warning with gcc, but is ok since the caller indicated
685 it was safe by setting PObj_external_FLAG.
686 (The cast is necessary to pacify TenDRA's tcc.)
687 */
688 Buffer_bufstart(s) = s->strstart = PARROT_const_cast(char *, buffer);
689 Buffer_buflen(s) = s->bufused = len;
690
691 STRING_scan(interp, s);
692
693 return s;
694 }
695
696 Parrot_gc_allocate_string_storage(interp, s, len);
697
698 if (buffer && len) {
699 memcpy(s->strstart, buffer, len);
700 s->bufused = len;
701 STRING_scan(interp, s);
702 }
703 else
704 s->strlen = s->bufused = 0;
705
706 return s;
707 }
708
709
710 /*
711
712 =item C<STRING * Parrot_str_new_from_cstring(PARROT_INTERP, const char *buffer,
713 STRING *encodingname)>
714
715 Given a buffer and an encoding, creates and returns a new string. If buffer is
716 NULL the result is a null string. Otherwise, the buffer should be a zero
717 terminated c-style string and its content must be valid for the encoding
718 specified. If encoding is null, assume platform encoding.
719
720 =cut
721
722 */
723
724 PARROT_WARN_UNUSED_RESULT
725 PARROT_CANNOT_RETURN_NULL
726 STRING *
Parrot_str_new_from_cstring(PARROT_INTERP,ARGIN_NULLOK (const char * buffer),ARGIN_NULLOK (STRING * encodingname))727 Parrot_str_new_from_cstring(PARROT_INTERP, ARGIN_NULLOK(const char *buffer),
728 ARGIN_NULLOK(STRING *encodingname))
729 {
730 ASSERT_ARGS(Parrot_str_new_from_cstring)
731 STRING *result = STRINGNULL;
732 if (buffer) {
733 const STR_VTABLE *encoding = STRING_IS_NULL(encodingname) ?
734 Parrot_platform_encoding_ptr :
735 Parrot_find_encoding_by_string(interp, encodingname);
736 if (encoding == NULL)
737 Parrot_ex_throw_from_c_noargs(interp,
738 EXCEPTION_INVALID_ENCODING, "Invalid encoding");
739 else {
740 int size = strlen(buffer);
741 result = Parrot_str_new_init(interp, buffer, size, encoding, 0);
742 #if 0
743 /* if string is ascii and platform multi-byte we could downgrade.
744 but is is overall slower to scan here, than the faster ops later */
745 if (STRING_IS_NULL(encodingname)
746 && Parrot_platform_encoding_ptr->max_bytes_per_codepoint > 1
747 && result->strlen == result->bufused)
748 {
749 const unsigned char * const ptr = (unsigned char *)result->strstart;
750 int multi = 0;
751 UINTVAL i;
752 for (i = 0; i < result->strlen; ++i) {
753 if (ptr[i] >= 0x80)
754 multi++;
755 }
756 if (!multi)
757 result->encoding = Parrot_ascii_encoding_ptr;
758 }
759 #endif
760 }
761 }
762 return result;
763 }
764
765
766 /*
767
768 =item C<STRING * Parrot_str_from_platform_cstring(PARROT_INTERP, const char *c)>
769
770 Convert a C string, encoded in the platform's assumed encoding, to a Parrot
771 string.
772
773 =cut
774
775 */
776
777 PARROT_EXPORT
778 PARROT_CANNOT_RETURN_NULL
779 STRING *
Parrot_str_from_platform_cstring(PARROT_INTERP,ARGIN_NULLOK (const char * c))780 Parrot_str_from_platform_cstring(PARROT_INTERP, ARGIN_NULLOK(const char *c))
781 {
782 ASSERT_ARGS(Parrot_str_from_platform_cstring)
783 if (!c)
784 return STRINGNULL;
785 else {
786 STRING *retv;
787 Parrot_runloop jmp;
788
789 if (setjmp(jmp.resume)) {
790 /* catch */
791 Parrot_cx_delete_handler_local(interp);
792 retv = Parrot_str_new_init(interp, c, strlen(c),
793 Parrot_binary_encoding_ptr, 0);
794 }
795 else {
796 /* try */
797 Parrot_ex_add_c_handler(interp, &jmp);
798 retv = Parrot_str_new_init(interp, c, Parrot_str_platform_strlen(interp, c),
799 Parrot_platform_encoding_ptr, 0);
800 /* if string is ascii-only and platform multi-byte use ascii */
801 if (retv
802 && Parrot_platform_encoding_ptr->max_bytes_per_codepoint > 1
803 && retv->strlen == retv->bufused) {
804 retv = Parrot_ascii_encoding_ptr->to_encoding(interp, retv);
805 }
806 Parrot_cx_delete_handler_local(interp);
807 }
808
809 return retv;
810 }
811 }
812
813
814 /*
815
816 =item C<char * Parrot_str_to_platform_cstring(PARROT_INTERP, const STRING *s)>
817
818 Obtain a C string, encoded in the platform's assumed encoding, from a Parrot
819 string.
820
821 =cut
822
823 */
824
825 PARROT_EXPORT
826 PARROT_CAN_RETURN_NULL
827 PARROT_WARN_UNUSED_RESULT
828 char *
Parrot_str_to_platform_cstring(PARROT_INTERP,ARGIN (const STRING * s))829 Parrot_str_to_platform_cstring(PARROT_INTERP, ARGIN(const STRING *s))
830 {
831 ASSERT_ARGS(Parrot_str_to_platform_cstring)
832 if (STRING_IS_NULL(s)) {
833 return NULL;
834 }
835 else {
836 return Parrot_str_to_encoded_cstring(interp, s, Parrot_platform_encoding_ptr);
837 }
838 }
839
840
841 /*
842
843 =item C<STRING * Parrot_str_extract_chars(PARROT_INTERP, const char *buffer,
844 UINTVAL len, INTVAL chars, const STR_VTABLE *encoding)>
845
846 Extracts C<chars> characters from C<buffer> containing C<len> bytes.
847
848 =cut
849
850 */
851
852 PARROT_WARN_UNUSED_RESULT
853 PARROT_CANNOT_RETURN_NULL
854 STRING *
Parrot_str_extract_chars(PARROT_INTERP,ARGIN (const char * buffer),UINTVAL len,INTVAL chars,ARGIN (const STR_VTABLE * encoding))855 Parrot_str_extract_chars(PARROT_INTERP, ARGIN(const char *buffer),
856 UINTVAL len, INTVAL chars, ARGIN(const STR_VTABLE *encoding))
857 {
858 ASSERT_ARGS(Parrot_str_extract_chars)
859 Parrot_String_Bounds bounds;
860 STRING *result;
861
862 bounds.bytes = len;
863 bounds.chars = chars;
864 bounds.delim = -1;
865
866 encoding->partial_scan(interp, buffer, &bounds);
867
868 if (bounds.chars < chars)
869 Parrot_ex_throw_from_c_noargs(interp,
870 EXCEPTION_OUT_OF_BOUNDS,
871 "index out of bounds");
872
873 result = Parrot_str_new_noinit(interp, bounds.bytes);
874
875 result->encoding = encoding;
876 result->bufused = bounds.bytes;
877 result->strlen = bounds.chars;
878
879 memcpy(result->strstart, buffer, bounds.bytes);
880
881 return result;
882 }
883
884
885 /*
886
887 =back
888
889 =head2 Ordinary user-visible string operations
890
891 =over 4
892
893 =item C<UINTVAL Parrot_str_byte_length(PARROT_INTERP, const STRING *s)>
894
895 Returns the number of characters in the specified Parrot string.
896
897 =cut
898
899 */
900
901 PARROT_EXPORT
902 PARROT_PURE_FUNCTION
903 UINTVAL
Parrot_str_byte_length(SHIM_INTERP,ARGIN_NULLOK (const STRING * s))904 Parrot_str_byte_length(SHIM_INTERP, ARGIN_NULLOK(const STRING *s))
905 {
906 ASSERT_ARGS(Parrot_str_byte_length)
907
908 return STRING_IS_NULL(s) ? 0 : s->bufused;
909 }
910
911
912 /*
913
914 =item C<INTVAL Parrot_str_indexed(PARROT_INTERP, const STRING *s, INTVAL idx)>
915
916 Returns the codepoint at a given index into a string. Negative indexes are
917 treated as counting from the end of the string. Throws an exception if C<s>
918 is null or C<idx> is out of bounds.
919
920 Identical to the STRING_ord macro.
921
922 =cut
923
924 */
925
926 PARROT_EXPORT
927 PARROT_WARN_UNUSED_RESULT
928 INTVAL
Parrot_str_indexed(PARROT_INTERP,ARGIN (const STRING * s),INTVAL idx)929 Parrot_str_indexed(PARROT_INTERP, ARGIN(const STRING *s), INTVAL idx)
930 {
931 ASSERT_ARGS(Parrot_str_indexed)
932
933 if (s == NULL)
934 s = STRINGNULL;
935
936 return STRING_ord(interp, s, idx);
937 }
938
939
940 /*
941
942 =item C<INTVAL Parrot_str_find_index(PARROT_INTERP, const STRING *src, const
943 STRING *search, INTVAL start)>
944
945 Returns the character position of the second Parrot string in the first at or
946 after C<start>. The return value is a (0 based) offset in characters, not
947 bytes. If the search string is not found in the first string or it is null or
948 empty, returns -1. If C<start> is out of bounds, returns -1. Throws an
949 exception if C<src> is null.
950
951 Identical to the STRING_index macro.
952
953 =item C<INTVAL Parrot_str_find_reverse_index(PARROT_INTERP, const STRING *src,
954 const STRING *search, INTVAL start)>
955
956 Returns the last character position of the second Parrot string C<search> in
957 C<src>, not after C<start>. Returns the last position found, or -1 if no
958 instances are found.
959
960 Mostly identical to the STRING_rindex macro.
961
962 =cut
963
964 */
965
966 PARROT_EXPORT
967 PARROT_WARN_UNUSED_RESULT
968 INTVAL
Parrot_str_find_index(PARROT_INTERP,ARGIN (const STRING * src),ARGIN (const STRING * search),INTVAL start)969 Parrot_str_find_index(PARROT_INTERP, ARGIN(const STRING *src),
970 ARGIN(const STRING *search), INTVAL start)
971 {
972 ASSERT_ARGS(Parrot_str_find_index)
973
974 if (src == NULL)
975 src = STRINGNULL;
976
977 return STRING_index(interp, src, search, start);
978 }
979
980 PARROT_EXPORT
981 PARROT_WARN_UNUSED_RESULT
982 INTVAL
Parrot_str_find_reverse_index(PARROT_INTERP,ARGIN (const STRING * src),ARGIN (const STRING * search),INTVAL start)983 Parrot_str_find_reverse_index(PARROT_INTERP, ARGIN(const STRING *src),
984 ARGIN(const STRING *search), INTVAL start)
985 {
986 ASSERT_ARGS(Parrot_str_find_reverse_index)
987 INTVAL len = Parrot_str_length(interp, src);
988
989 if (start <= 0 || !len || start > len)
990 return -1;
991
992 if (!Parrot_str_length(interp, search))
993 return -1;
994
995 return STRING_rindex(interp, src, search, (UINTVAL)start);
996 }
997
998 /*
999
1000 =item C<STRING * Parrot_str_chr(PARROT_INTERP, UINTVAL character)>
1001
1002 Returns a single-character Parrot string.
1003
1004 =cut
1005
1006 */
1007
1008 PARROT_EXPORT
1009 PARROT_CANNOT_RETURN_NULL
1010 PARROT_WARN_UNUSED_RESULT
1011 STRING *
Parrot_str_chr(PARROT_INTERP,UINTVAL character)1012 Parrot_str_chr(PARROT_INTERP, UINTVAL character)
1013 {
1014 ASSERT_ARGS(Parrot_str_chr)
1015
1016 if (character > 0xff)
1017 return Parrot_utf8_encoding_ptr->chr(interp, character);
1018 else if (character > 0x7f)
1019 return Parrot_latin1_encoding_ptr->chr(interp, character);
1020 else
1021 return Parrot_ascii_encoding_ptr->chr(interp, character);
1022 }
1023
1024
1025 /*
1026
1027 =back
1028
1029 =head2 Vtable Dispatch Functions
1030
1031 =over 4
1032
1033 =item C<INTVAL Parrot_str_length(PARROT_INTERP, const STRING *s)>
1034
1035 Returns the number of characters in the specified Parrot string.
1036
1037 =cut
1038
1039 */
1040
1041 PARROT_EXPORT
1042 PARROT_PURE_FUNCTION
1043 PARROT_WARN_UNUSED_RESULT
1044 INTVAL
Parrot_str_length(SHIM_INTERP,ARGIN_NULLOK (const STRING * s))1045 Parrot_str_length(SHIM_INTERP, ARGIN_NULLOK(const STRING *s))
1046 {
1047 ASSERT_ARGS(Parrot_str_length)
1048
1049 return STRING_IS_NULL(s) ? 0 : s->strlen;
1050 }
1051
1052
1053 /*
1054
1055 =item C<static INTVAL string_max_bytes(PARROT_INTERP, const STRING *s, UINTVAL
1056 nchars)>
1057
1058 Returns the number of bytes required to safely contain the specified number
1059 of characters in the specified Parrot string's representation.
1060
1061 =cut
1062
1063 */
1064
1065 PARROT_WARN_UNUSED_RESULT
1066 PARROT_PURE_FUNCTION
1067 static INTVAL
string_max_bytes(SHIM_INTERP,ARGIN (const STRING * s),UINTVAL nchars)1068 string_max_bytes(SHIM_INTERP, ARGIN(const STRING *s), UINTVAL nchars)
1069 {
1070 ASSERT_ARGS(string_max_bytes)
1071 PARROT_ASSERT(s->encoding);
1072 return STRING_max_bytes_per_codepoint(s) * nchars;
1073 }
1074
1075
1076 /*
1077
1078 =item C<STRING * Parrot_str_repeat(PARROT_INTERP, const STRING *s, UINTVAL num)>
1079
1080 Repeats the specified Parrot string I<num> times and returns the result.
1081
1082 =cut
1083
1084 */
1085
1086 PARROT_EXPORT
1087 PARROT_CANNOT_RETURN_NULL
1088 STRING *
Parrot_str_repeat(PARROT_INTERP,ARGIN (const STRING * s),UINTVAL num)1089 Parrot_str_repeat(PARROT_INTERP, ARGIN(const STRING *s), UINTVAL num)
1090 {
1091 ASSERT_ARGS(Parrot_str_repeat)
1092 STRING * const dest = Parrot_str_new_init(interp, NULL,
1093 s->bufused * num,
1094 s->encoding, 0);
1095 if (num > 0) {
1096 /* copy s into dest num times */
1097 UINTVAL length = s->bufused;
1098 UINTVAL i;
1099 char * destpos = dest->strstart;
1100 const char * const srcpos = s->strstart;
1101 for (i = 0; i < num; ++i) {
1102 memcpy(destpos, srcpos, length);
1103 destpos += length;
1104 }
1105
1106 dest->strlen = s->strlen * num;
1107 dest->bufused = s->bufused * num;
1108 }
1109
1110 return dest;
1111 }
1112
1113
1114 /*
1115
1116 =item C<STRING * Parrot_str_substr(PARROT_INTERP, const STRING *src, INTVAL
1117 offset, INTVAL length)>
1118
1119 Returns substring of length C<length> from C<offset> from the specified
1120 Parrot string. If C<offset> is negative, it counts from the end of the
1121 string. Returns the empty string if C<offset> equals the length of the
1122 string. Throws an exception if C<src> is null or C<offset> is out of bounds.
1123 Truncates C<length> if it extends beyond the end of the string.
1124
1125 Identical to the STRING_substr macro.
1126
1127 =cut
1128
1129 */
1130
1131 PARROT_EXPORT
1132 PARROT_CANNOT_RETURN_NULL
1133 PARROT_WARN_UNUSED_RESULT
1134 STRING *
Parrot_str_substr(PARROT_INTERP,ARGIN_NULLOK (const STRING * src),INTVAL offset,INTVAL length)1135 Parrot_str_substr(PARROT_INTERP,
1136 ARGIN_NULLOK(const STRING *src), INTVAL offset, INTVAL length)
1137 {
1138 ASSERT_ARGS(Parrot_str_substr)
1139
1140 if (src == NULL)
1141 src = STRINGNULL;
1142
1143 return STRING_substr(interp, src, offset, length);
1144 }
1145
1146 /*
1147
1148 =item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, const STRING *str, const
1149 String_iter *l, const String_iter *r)>
1150
1151 Returns the substring between iterators C<l> and C<r>.
1152
1153 =cut
1154
1155 */
1156
1157 PARROT_CANNOT_RETURN_NULL
1158 PARROT_WARN_UNUSED_RESULT
1159 STRING *
Parrot_str_iter_substr(PARROT_INTERP,ARGIN (const STRING * str),ARGIN (const String_iter * l),ARGIN_NULLOK (const String_iter * r))1160 Parrot_str_iter_substr(PARROT_INTERP,
1161 ARGIN(const STRING *str),
1162 ARGIN(const String_iter *l), ARGIN_NULLOK(const String_iter *r))
1163 {
1164 ASSERT_ARGS(Parrot_str_iter_substr)
1165 STRING * const dest = Parrot_str_copy(interp, str);
1166
1167 dest->strstart += l->bytepos;
1168
1169 if (r == NULL) {
1170 dest->bufused = str->bufused - l->bytepos;
1171 dest->strlen = str->strlen - l->charpos;
1172 }
1173 else {
1174 dest->bufused = r->bytepos - l->bytepos;
1175 dest->strlen = r->charpos - l->charpos;
1176 }
1177
1178 dest->hashval = 0;
1179
1180 return dest;
1181 }
1182
1183 /*
1184
1185 =item C<INTVAL Parrot_str_iter_index(PARROT_INTERP, const STRING *src,
1186 String_iter *start, String_iter *end, const STRING *search)>
1187
1188 Find the next occurrence of STRING C<search> in STRING C<src> starting at
1189 String_iter C<start>. If C<search> is found C<start> is modified to mark the
1190 beginning of C<search> and String_iter C<end> is set to the character after
1191 C<search> in C<src>. Returns the character position where C<search> was found
1192 or -1 if it wasn't found.
1193
1194 =cut
1195
1196 */
1197
1198 INTVAL
Parrot_str_iter_index(PARROT_INTERP,ARGIN (const STRING * src),ARGMOD (String_iter * start),ARGOUT (String_iter * end),ARGIN (const STRING * search))1199 Parrot_str_iter_index(PARROT_INTERP,
1200 ARGIN(const STRING *src),
1201 ARGMOD(String_iter *start), ARGOUT(String_iter *end),
1202 ARGIN(const STRING *search))
1203 {
1204 ASSERT_ARGS(Parrot_str_iter_index)
1205 String_iter search_iter, search_start, next_start;
1206 const UINTVAL len = search->strlen;
1207 UINTVAL c0;
1208
1209 if (len == 0) {
1210 *end = *start;
1211 return start->charpos;
1212 }
1213
1214 STRING_ITER_INIT(interp, &search_iter);
1215 c0 = STRING_iter_get_and_advance(interp, search, &search_iter);
1216 search_start = search_iter;
1217 next_start = *start;
1218
1219 while (start->charpos + len <= src->strlen) {
1220 UINTVAL c1 = STRING_iter_get_and_advance(interp, src, &next_start);
1221
1222 if (c1 == c0) {
1223 UINTVAL c2;
1224 *end = next_start;
1225
1226 do {
1227 if (search_iter.charpos >= len)
1228 return start->charpos;
1229 c1 = STRING_iter_get_and_advance(interp, src, end);
1230 c2 = STRING_iter_get_and_advance(interp, search, &search_iter);
1231 } while (c1 == c2);
1232
1233 search_iter = search_start;
1234 }
1235
1236 *start = next_start;
1237 }
1238
1239 return -1;
1240 }
1241
1242
1243 /*
1244
1245 =item C<STRING * Parrot_str_replace(PARROT_INTERP, const STRING *src, INTVAL
1246 offset, INTVAL length, const STRING *rep)>
1247
1248 Replaces a sequence of C<length> characters from C<offset> in the first
1249 Parrot string with the second Parrot string, returning what was
1250 replaced.
1251
1252 This follows the Perl semantics for:
1253
1254 substr EXPR, OFFSET, LENGTH, REPLACEMENT
1255
1256 Replacing a sequence of characters with a longer string grows the
1257 string; a shorter string shrinks it.
1258
1259 Replacing 2 past the end of the string is undefined. However replacing 1
1260 past the end of the string concatenates the two strings.
1261
1262 A negative offset is allowed to replace from the end.
1263
1264 =cut
1265
1266 */
1267
1268 PARROT_EXPORT
1269 PARROT_CANNOT_RETURN_NULL
1270 PARROT_WARN_UNUSED_RESULT
1271 STRING *
Parrot_str_replace(PARROT_INTERP,ARGIN (const STRING * src),INTVAL offset,INTVAL length,ARGIN (const STRING * rep))1272 Parrot_str_replace(PARROT_INTERP, ARGIN(const STRING *src),
1273 INTVAL offset, INTVAL length, ARGIN(const STRING *rep))
1274 {
1275 ASSERT_ARGS(Parrot_str_replace)
1276 String_iter iter;
1277 const STR_VTABLE *enc;
1278 STRING *dest = NULL;
1279 UINTVAL true_offset = (UINTVAL)offset;
1280 UINTVAL true_length = (UINTVAL)length;
1281
1282 UINTVAL start_byte, end_byte, start_char, end_char;
1283 INTVAL buf_size;
1284
1285 if (STRING_IS_NULL(src)) {
1286 Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
1287 "Can't replace in NULL string");
1288 }
1289
1290 /* abs(-offset) may not be > strlen-1 */
1291 if (offset < 0)
1292 true_offset = src->strlen + offset;
1293
1294 /* Can replace 1 past end of string which is technically outside the string
1295 * but is same as a concat().
1296 * Only give exception if caller trys to replace end of string + 2
1297 */
1298 if (true_offset > src->strlen)
1299 Parrot_ex_throw_from_c_noargs(interp,
1300 EXCEPTION_SUBSTR_OUT_OF_STRING,
1301 "Can only replace inside string or index after end of string");
1302
1303 if (true_length > (src->strlen - true_offset))
1304 true_length = src->strlen - true_offset;
1305
1306 if (STRING_IS_NULL(rep)) {
1307 enc = src->encoding;
1308 }
1309 else {
1310 /* may have different reps..... */
1311 enc = string_rep_compatible(src, rep);
1312
1313 if (!enc) {
1314 if (src->encoding != Parrot_utf8_encoding_ptr)
1315 src = Parrot_utf8_encoding_ptr->to_encoding(interp, src);
1316 if (rep->encoding != Parrot_utf8_encoding_ptr)
1317 rep = Parrot_utf8_encoding_ptr->to_encoding(interp, rep);
1318 /* Remember selected encoding */
1319 enc = src->encoding;
1320 }
1321 }
1322
1323 /* get byte position of the part that will be replaced */
1324 STRING_ITER_INIT(interp, &iter);
1325
1326 STRING_iter_skip(interp, src, &iter, true_offset);
1327 start_byte = iter.bytepos;
1328 start_char = iter.charpos;
1329
1330 STRING_iter_skip(interp, src, &iter, true_length);
1331 end_byte = iter.bytepos;
1332 end_char = iter.charpos;
1333
1334 /* not possible.... */
1335 if (end_byte < start_byte)
1336 Parrot_ex_throw_from_c_noargs(interp,
1337 EXCEPTION_SUBSTR_OUT_OF_STRING,
1338 "replace: subend somehow is less than substart");
1339
1340 /* Now do the replacement */
1341 dest = Parrot_gc_new_string_header(interp, 0);
1342
1343 /* Set encoding to compatible */
1344 dest->encoding = enc;
1345
1346 /* Clear COW flag. We own buffer */
1347 PObj_get_FLAGS(dest) = PObj_is_string_FLAG
1348 | PObj_is_COWable_FLAG;
1349
1350 /* size removed bytes added bytes */
1351 buf_size = src->bufused - (end_byte - start_byte) + rep->bufused;
1352
1353 /* Alloctate new string size. */
1354 Parrot_gc_allocate_string_storage(interp, dest, buf_size);
1355 dest->bufused = buf_size;
1356
1357 /* Copy begin of string */
1358 memcpy(dest->strstart, src->strstart, start_byte);
1359
1360 /* Copy the replacement in */
1361 memcpy(dest->strstart + start_byte, rep->strstart,
1362 rep->bufused);
1363
1364 /* Copy the end of old string */
1365 memcpy(dest->strstart + start_byte + rep->bufused,
1366 src->strstart + end_byte,
1367 src->bufused - end_byte);
1368
1369 dest->strlen = src->strlen - (end_char - start_char) + rep->strlen;
1370 dest->hashval = 0;
1371
1372 return dest;
1373 }
1374
1375
1376 /*
1377
1378 =item C<STRING * Parrot_str_chopn(PARROT_INTERP, const STRING *s, INTVAL n)>
1379
1380 Removes the last C<n> characters of the specified Parrot string and returns the
1381 modified string. If C<n> is negative, cuts the string after C<+n> characters.
1382
1383 =cut
1384
1385 */
1386
1387 PARROT_EXPORT
1388 PARROT_CANNOT_RETURN_NULL
1389 STRING *
Parrot_str_chopn(PARROT_INTERP,ARGIN (const STRING * s),INTVAL n)1390 Parrot_str_chopn(PARROT_INTERP, ARGIN(const STRING *s), INTVAL n)
1391 {
1392 ASSERT_ARGS(Parrot_str_chopn)
1393 INTVAL end = -n;
1394
1395 if (n >= 0)
1396 end += STRING_length(s);
1397
1398 return STRING_substr(interp, s, 0, end);
1399 }
1400
1401
1402 /*
1403
1404 =item C<INTVAL Parrot_str_compare(PARROT_INTERP, const STRING *s1, const STRING
1405 *s2)>
1406
1407 Compares two strings to each other. If s1 is less than s2, returns -1. If the
1408 strings are equal, returns 0. If s1 is greater than s2, returns 2. This
1409 comparison uses the character set collation order of the strings for
1410 comparison. The null string is considered equal to the empty string.
1411
1412 Identical to the STRING_compare macro.
1413
1414 =cut
1415
1416 */
1417
1418 PARROT_EXPORT
1419 PARROT_WARN_UNUSED_RESULT
1420 INTVAL
Parrot_str_compare(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1421 Parrot_str_compare(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1), ARGIN_NULLOK(const STRING *s2))
1422 {
1423 ASSERT_ARGS(Parrot_str_compare)
1424
1425 if (s1 == NULL)
1426 s1 = STRINGNULL;
1427
1428 return STRING_compare(interp, s1, s2);
1429 }
1430
1431
1432 /*
1433
1434 =item C<INTVAL Parrot_str_not_equal(PARROT_INTERP, const STRING *s1, const
1435 STRING *s2)>
1436
1437 Compares two Parrot strings, performing type and encoding conversions if
1438 necessary. Returns 1 if the strings are not equal, and 0 otherwise.
1439
1440 =cut
1441
1442 */
1443
1444 PARROT_EXPORT
1445 PARROT_WARN_UNUSED_RESULT
1446 INTVAL
Parrot_str_not_equal(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1447 Parrot_str_not_equal(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1), ARGIN_NULLOK(const STRING *s2))
1448 {
1449 ASSERT_ARGS(Parrot_str_not_equal)
1450
1451 if (s1 == NULL)
1452 s1 = STRINGNULL;
1453
1454 return !STRING_equal(interp, s1, s2);
1455 }
1456
1457
1458 /*
1459
1460 =item C<INTVAL Parrot_str_equal(PARROT_INTERP, const STRING *s1, const STRING
1461 *s2)>
1462
1463 Compares two Parrot strings, performing type and encoding conversions if
1464 necessary. The null string is considered equal to the empty string.
1465
1466 Returns 1 if the strings are equal, and 0 otherwise.
1467
1468 Identical to the STRING_equal macro.
1469
1470 =cut
1471
1472 */
1473
1474 PARROT_EXPORT
1475 PARROT_WARN_UNUSED_RESULT
1476 INTVAL
Parrot_str_equal(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1477 Parrot_str_equal(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1), ARGIN_NULLOK(const STRING *s2))
1478 {
1479 ASSERT_ARGS(Parrot_str_equal)
1480
1481 if (s1 == NULL)
1482 s1 = STRINGNULL;
1483
1484 return STRING_equal(interp, s1, s2);
1485 }
1486
1487
1488 /*
1489
1490 =item C<STRING * Parrot_str_bitwise_and(PARROT_INTERP, const STRING *s1, const
1491 STRING *s2)>
1492
1493 Performs a bitwise C<AND> on two Parrot strings, performing type and encoding
1494 conversions if necessary. Returns the result as a new string.
1495
1496 =cut
1497
1498 */
1499
1500 PARROT_EXPORT
1501 PARROT_CANNOT_RETURN_NULL
1502 STRING *
Parrot_str_bitwise_and(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1503 Parrot_str_bitwise_and(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1),
1504 ARGIN_NULLOK(const STRING *s2))
1505 {
1506 ASSERT_ARGS(Parrot_str_bitwise_and)
1507 STRING *res;
1508 size_t minlen;
1509
1510 /* we could also trans_encoding to iso-8859-1 */
1511 if (s1 && STRING_max_bytes_per_codepoint(s1) != 1)
1512 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
1513 "string bitwise_and (%s/%s) unsupported",
1514 s1->encoding->name, nonnull_encoding_name(s2));
1515
1516 if (s2 && STRING_max_bytes_per_codepoint(s2) != 1)
1517 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
1518 "string bitwise_and (%s/%s) unsupported",
1519 nonnull_encoding_name(s1), s2->encoding->name);
1520
1521 /* think about case of dest string is one of the operands */
1522 if (!STRING_IS_NULL(s1) && !STRING_IS_NULL(s2))
1523 minlen = s1->strlen > s2->strlen ? s2->strlen : s1->strlen;
1524 else
1525 minlen = 0;
1526
1527 res = Parrot_str_new_init(interp, NULL, minlen,
1528 Parrot_binary_encoding_ptr, 0);
1529
1530 if (STRING_IS_NULL(s1) || STRING_IS_NULL(s2)) {
1531 res->bufused = 0;
1532 res->strlen = 0;
1533
1534 return res;
1535 }
1536
1537 #if ! DISABLE_GC_DEBUG
1538 /* trigger GC for debug */
1539 if (interp && GC_DEBUG(interp))
1540 Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1541 #endif
1542
1543 { /* bitwise AND the strings */
1544 const Parrot_UInt1 *curr1 = (Parrot_UInt1 *)s1->strstart;
1545 const Parrot_UInt1 *curr2 = (Parrot_UInt1 *)s2->strstart;
1546 Parrot_UInt1 *dp = (Parrot_UInt1 *)res->strstart;
1547 size_t len = minlen;
1548
1549 while (len--)
1550 *dp++ = *curr1++ & *curr2++;
1551 }
1552
1553 res->bufused = res->strlen = minlen;
1554
1555 return res;
1556 }
1557
1558
1559 #define BITWISE_XOR_STRINGS(type1, type2, restype, s1, s2, res, maxlen) \
1560 do { \
1561 const type1 *curr1 = NULL; \
1562 const type2 *curr2 = NULL; \
1563 size_t length1 = 0; \
1564 size_t length2 = 0; \
1565 restype *dp; \
1566 size_t _index; \
1567 \
1568 if (!STRING_IS_NULL(s1)) { \
1569 curr1 = (type1 *)(s1)->strstart; \
1570 length1 = (s1)->strlen; \
1571 } \
1572 if (!STRING_IS_NULL(s2)) { \
1573 curr2 = (type2 *)(s2)->strstart; \
1574 length2 = (s2)->strlen; \
1575 } \
1576 \
1577 dp = (restype *)(res)->strstart; \
1578 _index = 0; \
1579 \
1580 for (; _index < (maxlen) ; ++curr1, ++curr2, ++dp, ++_index) { \
1581 if (_index < length1) { \
1582 if (_index < length2) \
1583 *dp = *curr1 ^ *curr2; \
1584 else \
1585 *dp = *curr1; \
1586 } \
1587 else if (_index < length2) { \
1588 *dp = *curr2; \
1589 } \
1590 } \
1591 } while (0)
1592
1593
1594 #define BITWISE_OR_STRINGS(type1, type2, restype, s1, s2, res, maxlen) \
1595 do { \
1596 const type1 *curr1 = NULL; \
1597 const type2 *curr2 = NULL; \
1598 size_t length1 = 0; \
1599 size_t length2 = 0; \
1600 restype *dp; \
1601 size_t _index; \
1602 \
1603 if (!STRING_IS_NULL(s1)) { \
1604 curr1 = (type1 *)(s1)->strstart; \
1605 length1 = (s1)->strlen; \
1606 } \
1607 if (!STRING_IS_NULL(s2)) { \
1608 curr2 = (type2 *)(s2)->strstart; \
1609 length2 = (s2)->strlen; \
1610 } \
1611 \
1612 dp = (restype *)(res)->strstart; \
1613 _index = 0; \
1614 \
1615 for (; _index < (maxlen) ; ++curr1, ++curr2, ++dp, ++_index) { \
1616 if (_index < length1) { \
1617 if (_index < length2) \
1618 *dp = *curr1 | *curr2; \
1619 else \
1620 *dp = *curr1; \
1621 } \
1622 else if (_index < length2) { \
1623 *dp = *curr2; \
1624 } \
1625 } \
1626 } while (0)
1627
1628
1629 /*
1630
1631 =item C<STRING * Parrot_str_bitwise_or(PARROT_INTERP, const STRING *s1, const
1632 STRING *s2)>
1633
1634 Performs a bitwise C<OR> on two Parrot strings, performing type and encoding
1635 conversions if necessary. Returns the result as a new string.
1636
1637 =cut
1638
1639 */
1640
1641 PARROT_EXPORT
1642 PARROT_CANNOT_RETURN_NULL
1643 STRING *
Parrot_str_bitwise_or(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1644 Parrot_str_bitwise_or(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1),
1645 ARGIN_NULLOK(const STRING *s2))
1646 {
1647 ASSERT_ARGS(Parrot_str_bitwise_or)
1648 STRING *res;
1649 size_t maxlen = 0;
1650
1651 if (!STRING_IS_NULL(s1)) {
1652 if (STRING_max_bytes_per_codepoint(s1) != 1)
1653 Parrot_ex_throw_from_c_args(interp, NULL,
1654 EXCEPTION_INVALID_ENCODING,
1655 "string bitwise_or (%s/%s) unsupported",
1656 s1->encoding->name, nonnull_encoding_name(s2));
1657
1658 maxlen = s1->bufused;
1659 }
1660
1661 if (!STRING_IS_NULL(s2)) {
1662 if (STRING_max_bytes_per_codepoint(s2) != 1)
1663 Parrot_ex_throw_from_c_args(interp, NULL,
1664 EXCEPTION_INVALID_ENCODING,
1665 "string bitwise_or (%s/%s) unsupported",
1666 nonnull_encoding_name(s1), s2->encoding->name);
1667
1668 if (s2->bufused > maxlen)
1669 maxlen = s2->bufused;
1670 }
1671
1672 res = Parrot_str_new_init(interp, NULL, maxlen,
1673 Parrot_binary_encoding_ptr, 0);
1674
1675 if (!maxlen) {
1676 res->bufused = 0;
1677 res->strlen = 0;
1678 return res;
1679 }
1680
1681 #if ! DISABLE_GC_DEBUG
1682 /* trigger GC for debug */
1683 if (interp && GC_DEBUG(interp))
1684 Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1685 #endif
1686
1687 BITWISE_OR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
1688 s1, s2, res, maxlen);
1689 res->bufused = res->strlen = maxlen;
1690
1691 return res;
1692 }
1693
1694
1695 /*
1696
1697 =item C<STRING * Parrot_str_bitwise_xor(PARROT_INTERP, const STRING *s1, const
1698 STRING *s2)>
1699
1700 Performs a bitwise C<XOR> on two Parrot strings, performing type and encoding
1701 conversions if necessary. Returns the result as a new string.
1702
1703 =cut
1704
1705 */
1706
1707 PARROT_EXPORT
1708 PARROT_CANNOT_RETURN_NULL
1709 STRING *
Parrot_str_bitwise_xor(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1710 Parrot_str_bitwise_xor(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1),
1711 ARGIN_NULLOK(const STRING *s2))
1712 {
1713 ASSERT_ARGS(Parrot_str_bitwise_xor)
1714 STRING *res;
1715 size_t maxlen = 0;
1716
1717 if (!STRING_IS_NULL(s1)) {
1718 if (STRING_max_bytes_per_codepoint(s1) != 1)
1719 Parrot_ex_throw_from_c_args(interp, NULL,
1720 EXCEPTION_INVALID_ENCODING,
1721 "string bitwise_xor (%s/%s) unsupported",
1722 s1->encoding->name, nonnull_encoding_name(s2));
1723
1724 maxlen = s1->bufused;
1725 }
1726
1727 if (!STRING_IS_NULL(s2)) {
1728 if (STRING_max_bytes_per_codepoint(s2) != 1)
1729 Parrot_ex_throw_from_c_args(interp, NULL,
1730 EXCEPTION_INVALID_ENCODING,
1731 "string bitwise_xor (%s/%s) unsupported",
1732 nonnull_encoding_name(s1), s2->encoding->name);
1733
1734 if (s2->bufused > maxlen)
1735 maxlen = s2->bufused;
1736 }
1737
1738 res = Parrot_str_new_init(interp, NULL, maxlen,
1739 Parrot_binary_encoding_ptr, 0);
1740
1741 if (!maxlen) {
1742 res->bufused = 0;
1743 res->strlen = 0;
1744 return res;
1745 }
1746
1747 #if ! DISABLE_GC_DEBUG
1748 /* trigger GC for debug */
1749 if (interp && GC_DEBUG(interp))
1750 Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1751 #endif
1752
1753 BITWISE_XOR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
1754 s1, s2, res, maxlen);
1755 res->bufused = res->strlen = maxlen;
1756
1757 return res;
1758 }
1759
1760
1761 #define BITWISE_NOT_STRING(type, s, res) \
1762 do { \
1763 if (!STRING_IS_NULL(s) && !STRING_IS_NULL(res)) { \
1764 const type *curr = (type *)(s)->strstart; \
1765 size_t length = (s)->strlen; \
1766 Parrot_UInt1 *dp = (Parrot_UInt1 *)(res)->strstart; \
1767 \
1768 for (; length ; --length, ++dp, ++curr) \
1769 *dp = 0xFF & ~ *curr; \
1770 } \
1771 } while (0)
1772
1773
1774 /*
1775
1776 =item C<STRING * Parrot_str_bitwise_not(PARROT_INTERP, const STRING *s)>
1777
1778 Performs a bitwise C<NOT> on a Parrot string. Returns the result as a new
1779 string.
1780
1781 =cut
1782
1783 */
1784
1785 PARROT_EXPORT
1786 PARROT_CANNOT_RETURN_NULL
1787 STRING *
Parrot_str_bitwise_not(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))1788 Parrot_str_bitwise_not(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
1789 {
1790 ASSERT_ARGS(Parrot_str_bitwise_not)
1791 STRING *res;
1792 size_t len;
1793
1794 if (!STRING_IS_NULL(s)) {
1795 if (STRING_max_bytes_per_codepoint(s) != 1)
1796 Parrot_ex_throw_from_c_args(interp, NULL,
1797 EXCEPTION_INVALID_ENCODING,
1798 "string bitwise_not (%s) unsupported",
1799 s->encoding->name);
1800
1801 len = s->bufused;
1802 }
1803 else
1804 len = 0;
1805
1806 res = Parrot_str_new_init(interp, NULL, len,
1807 Parrot_binary_encoding_ptr, 0);
1808
1809 if (!len) {
1810 res->bufused = 0;
1811 res->strlen = 0;
1812 return res;
1813 }
1814
1815 #if ! DISABLE_GC_DEBUG
1816 /* trigger GC for debug */
1817 if (interp && GC_DEBUG(interp))
1818 Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1819 #endif
1820
1821 res->strlen = res->bufused = len;
1822
1823 BITWISE_NOT_STRING(Parrot_UInt1, s, res);
1824
1825 return res;
1826 }
1827
1828
1829 /*
1830
1831 =item C<INTVAL Parrot_str_boolean(PARROT_INTERP, const STRING *s)>
1832
1833 Returns whether the specified Parrot string is true. A string is true if it is
1834 equal to anything other than C<0>, C<""> or C<"0">.
1835
1836 =cut
1837
1838 */
1839
1840 PARROT_EXPORT
1841 PARROT_WARN_UNUSED_RESULT
1842 INTVAL
Parrot_str_boolean(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))1843 Parrot_str_boolean(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
1844 {
1845 ASSERT_ARGS(Parrot_str_boolean)
1846 INTVAL len;
1847
1848 if (s == NULL)
1849 return 0;
1850
1851 len = STRING_length(s);
1852 if (len == 0)
1853 return 0;
1854
1855 if (len == 1) {
1856 const UINTVAL c = STRING_ord(interp, s, 0);
1857
1858 /* relying on character literals being interpreted as ASCII--may
1859 not be correct on EBCDIC systems. use numeric value instead? */
1860 if (c == '0')
1861 /* later, accept other chars with digit value 0? or, no */
1862 return 0;
1863 }
1864
1865 /* it must be true */
1866 return 1;
1867 }
1868
1869
1870 /*
1871
1872 =item C<STRING * Parrot_str_format_data(PARROT_INTERP, ARGIN_FORMAT(const char
1873 *format), ...)>
1874
1875 Writes and returns a Parrot string.
1876
1877 =cut
1878
1879 */
1880
1881 PARROT_EXPORT
1882 PARROT_CANNOT_RETURN_NULL
1883 STRING *
Parrot_str_format_data(PARROT_INTERP,ARGIN_FORMAT (const char * format),...)1884 Parrot_str_format_data(PARROT_INTERP, ARGIN_FORMAT(const char *format), ...)
1885 {
1886 ASSERT_ARGS(Parrot_str_format_data)
1887 STRING *output;
1888 va_list args;
1889
1890 va_start(args, format);
1891 output = Parrot_vsprintf_c(interp, format, args);
1892 va_end(args);
1893
1894 return output;
1895 }
1896
1897
1898 /*
1899
1900 State of FSM during number value parsing.
1901
1902 Integer uses only parse_start, parse_before_dot and parse_end.
1903
1904 */
1905
1906 typedef enum number_parse_state {
1907 parse_start,
1908 parse_before_dot,
1909 parse_after_dot,
1910 parse_after_e,
1911 parse_after_e_sign,
1912 parse_end
1913 } number_parse_state;
1914
1915
1916 /*
1917
1918 =item C<INTVAL Parrot_str_to_int(PARROT_INTERP, const STRING *s)>
1919
1920 Converts a numeric Parrot string to an integer value.
1921
1922 A number is such that:
1923
1924 sign = '+' | '-'
1925 digit = "Any code point considered a digit by the chartype"
1926 indicator = 'e' | 'E'
1927 digits = digit [digit]...
1928 decimal-part = digits '.' [digits] | ['.'] digits
1929 exponent-part = indicator [sign] digits
1930 numeric-string = [sign] decimal-part [exponent-part]
1931
1932 The integer value is the appropriate integer representation of such a number,
1933 rounding towards zero.
1934
1935 =cut
1936
1937 */
1938
1939 PARROT_EXPORT
1940 PARROT_WARN_UNUSED_RESULT
1941 INTVAL
Parrot_str_to_int(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))1942 Parrot_str_to_int(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
1943 {
1944 ASSERT_ARGS(Parrot_str_to_int)
1945 if (STRING_IS_NULL(s))
1946 return 0;
1947 else {
1948 const UINTVAL max_safe = -(UINTVAL)PARROT_INTVAL_MIN / 10;
1949 const UINTVAL last_dig = (-(UINTVAL)PARROT_INTVAL_MIN) % 10;
1950 int sign = 1;
1951 UINTVAL i = 0;
1952 String_iter iter;
1953 INTVAL count = (INTVAL)s->strlen;
1954 UINTVAL c;
1955
1956 STRING_ITER_INIT(interp, &iter);
1957
1958 c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1959 while (c == ' ')
1960 c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1961 switch (c) {
1962 case '-':
1963 sign = -1;
1964 /* Fall through. */
1965 case '+':
1966 c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1967 break;
1968 default:
1969 ; /* nothing */
1970 }
1971 while (c) {
1972 const UINTVAL nextval = c - (UINTVAL)'0';
1973 if (nextval > 9)
1974 break;
1975 if (i < max_safe || (i == max_safe && nextval <= last_dig))
1976 i = i * 10 + nextval;
1977 else
1978 Parrot_ex_throw_from_c_args(interp, NULL,
1979 EXCEPTION_ERR_OVERFLOW,
1980 "Integer value of String '%S' too big", s);
1981 c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1982 }
1983
1984 if (sign == 1 && i > (UINTVAL)PARROT_INTVAL_MAX)
1985 Parrot_ex_throw_from_c_args(interp, NULL,
1986 EXCEPTION_ERR_OVERFLOW,
1987 "Integer value of String '%S' too big", s);
1988 return sign == -1 ? -i : i;
1989 }
1990 }
1991
1992
1993 /*
1994
1995 =item C<FLOATVAL Parrot_str_to_num(PARROT_INTERP, const STRING *s)>
1996
1997 Converts a numeric Parrot STRING to a floating point number.
1998
1999 =cut
2000
2001 */
2002
2003 PARROT_EXPORT
2004 PARROT_WARN_UNUSED_RESULT
2005 FLOATVAL
Parrot_str_to_num(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2006 Parrot_str_to_num(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2007 {
2008 ASSERT_ARGS(Parrot_str_to_num)
2009 FLOATVAL f = 0.0;
2010 FLOATVAL mantissa = 0.0;
2011 FLOATVAL sign = 1.0; /* -1 for '-' */
2012 FLOATVAL divider = 0.1;
2013 INTVAL e = 0;
2014 INTVAL e_sign = 1; /* -1 for '-' */
2015 /* How many digits it's safe to parse */
2016 const INTVAL max_safe = PARROT_INTVAL_MAX / 10;
2017 INTVAL m = 0; /* Integer mantissa */
2018 int m_is_safe = 1; /* We can use integer mantissa */
2019 INTVAL d = 0; /* Integer descriminator */
2020 int d_is_safe = 1; /* We can use integer mantissa */
2021 int d_length = 0;
2022 int check_nan = 0; /* Check for NaN and Inf after main loop */
2023 String_iter iter;
2024 number_parse_state state = parse_start;
2025
2026 if (STRING_IS_NULL(s))
2027 return 0.0;
2028
2029 STRING_ITER_INIT(interp, &iter);
2030
2031 /* Handcrafted FSM to read float value */
2032 while (state != parse_end && iter.charpos < s->strlen) {
2033 const UINTVAL c = STRING_iter_get_and_advance(interp, s, &iter);
2034 /* Check for overflow */
2035 if (c > 255)
2036 break;
2037
2038 switch (state) {
2039 case parse_start:
2040 if (isdigit((unsigned char)c)) {
2041 f = c - '0';
2042 m = c - '0';
2043 state = parse_before_dot;
2044 }
2045 else if (c == '-') {
2046 sign = -1.0;
2047 state = parse_before_dot;
2048 }
2049 else if (c == '+')
2050 state = parse_before_dot;
2051 else if (c == '.')
2052 state = parse_after_dot;
2053 else if (isspace((unsigned char)c))
2054 ; /* Do nothing */
2055 else {
2056 check_nan = 1;
2057 state = parse_end;
2058 }
2059 break;
2060
2061 case parse_before_dot:
2062 if (isdigit((unsigned char)c)) {
2063 f = f*10.0 + (c-'0');
2064 m = m*10 + (c-'0');
2065 /* Integer overflow for mantissa */
2066 if (m >= max_safe)
2067 m_is_safe = 0;
2068 }
2069 else if (c == '.') {
2070 state = parse_after_dot;
2071 /*
2072 * Throw gathered result. Recalculate from integer mantissa
2073 * to preserve precision.
2074 */
2075 if (m_is_safe)
2076 f = m;
2077 mantissa = f;
2078 }
2079 else if (c == 'e' || c == 'E') {
2080 state = parse_after_e;
2081 /* See comment above */
2082 if (m_is_safe)
2083 f = m;
2084 mantissa = f;
2085 }
2086 else {
2087 check_nan = 1;
2088 state = parse_end;
2089 }
2090 break;
2091
2092 case parse_after_dot:
2093 if (isdigit((unsigned char)c)) {
2094 f += (c-'0') * divider;
2095 divider /= 10.0;
2096 d = d*10 + (c-'0');
2097 if (d >= max_safe)
2098 d_is_safe = 0;
2099 ++d_length;
2100 }
2101 else if (c == 'e' || c == 'E')
2102 state = parse_after_e;
2103 else
2104 state = parse_end;
2105 break;
2106
2107 case parse_after_e:
2108 if (isdigit((unsigned char)c)) {
2109 e = e*10 + (c-'0');
2110 state = parse_after_e_sign;
2111 }
2112 else if (c == '-') {
2113 e_sign = -1;
2114 state = parse_after_e_sign;
2115 }
2116 else if (c == '+')
2117 state = parse_after_e_sign;
2118 else
2119 state = parse_end;
2120 break;
2121
2122 case parse_after_e_sign:
2123 if (isdigit((unsigned char)c))
2124 e = e*10 + (c-'0');
2125 else
2126 state = parse_end;
2127 break;
2128
2129 case parse_end:
2130 default:
2131 /* Pacify compiler */
2132 break;
2133 }
2134 }
2135
2136 /* Support for non-canonical NaN and Inf */
2137 /* charpos <= 2 because for "-i" iter already advanced to next char */
2138 if (check_nan && (iter.charpos <= 2)) {
2139 STRING * const t = Parrot_str_upcase(interp, s);
2140 if (STRING_equal(interp, t, CONST_STRING(interp, "NAN")))
2141 return PARROT_FLOATVAL_NAN_QUIET;
2142 else if (STRING_equal(interp, t, CONST_STRING(interp, "INF"))
2143 || STRING_equal(interp, t, CONST_STRING(interp, "INFINITY")))
2144 return PARROT_FLOATVAL_INF_POSITIVE;
2145 else if (STRING_equal(interp, t, CONST_STRING(interp, "-INF"))
2146 || STRING_equal(interp, t, CONST_STRING(interp, "-INFINITY")))
2147 return PARROT_FLOATVAL_INF_NEGATIVE;
2148 }
2149
2150 #if defined(PARROT_HAS_POWL) && !defined(__CYGWIN__)
2151 # define POW powl
2152 #else
2153 # define POW pow
2154 #endif
2155
2156 if (d && d_is_safe) {
2157 f = mantissa + (1.0 * d / POW(10.0, d_length));
2158 }
2159
2160 if (sign < 0)
2161 f = -f;
2162
2163 if (e) {
2164 if (e_sign == 1)
2165 f *= POW(10.0, e);
2166 else
2167 f /= POW(10.0, e);
2168 }
2169
2170 #undef POW
2171
2172 return f;
2173 }
2174
2175
2176 /*
2177
2178 =item C<STRING * Parrot_str_from_int(PARROT_INTERP, INTVAL i)>
2179
2180 Returns a Parrot string representation of the specified integer value.
2181
2182 =cut
2183
2184 */
2185
2186 PARROT_EXPORT
2187 PARROT_WARN_UNUSED_RESULT
2188 PARROT_CANNOT_RETURN_NULL
2189 STRING *
Parrot_str_from_int(PARROT_INTERP,INTVAL i)2190 Parrot_str_from_int(PARROT_INTERP, INTVAL i)
2191 {
2192 ASSERT_ARGS(Parrot_str_from_int)
2193 char buf[128];
2194 return Parrot_str_from_int_base(interp, buf, (HUGEINTVAL)i, 10);
2195 }
2196
2197
2198 /*
2199
2200 =item C<STRING * Parrot_str_from_num(PARROT_INTERP, FLOATVAL f)>
2201
2202 Returns a Parrot string representation of the specified floating-point value.
2203
2204 =cut
2205
2206 */
2207
2208 PARROT_EXPORT
2209 PARROT_WARN_UNUSED_RESULT
2210 PARROT_CANNOT_RETURN_NULL
2211 STRING *
Parrot_str_from_num(PARROT_INTERP,FLOATVAL f)2212 Parrot_str_from_num(PARROT_INTERP, FLOATVAL f)
2213 {
2214 ASSERT_ARGS(Parrot_str_from_num)
2215 /* Too damn hard--hand it off to Parrot_sprintf, which'll probably
2216 use the system sprintf anyway, but has gigantic buffers that are
2217 awfully hard to overflow. */
2218 return Parrot_sprintf_c(interp, FLOATVAL_FMT, f);
2219 }
2220
2221
2222 /*
2223
2224 =item C<char * Parrot_str_to_cstring(PARROT_INTERP, const STRING *s)>
2225
2226 Returns a C string for the specified Parrot string in the current
2227 representation of the string. Use C<Parrot_str_free_cstring()> to free
2228 the string. Failure to do this will result in a memory leak.
2229
2230 You usually should use Parrot_str_to_encoded_cstring instead.
2231
2232 =cut
2233
2234 */
2235
2236 PARROT_EXPORT
2237 PARROT_CANNOT_RETURN_NULL
2238 char *
Parrot_str_to_cstring(PARROT_INTERP,ARGIN (const STRING * s))2239 Parrot_str_to_cstring(PARROT_INTERP, ARGIN(const STRING *s))
2240 {
2241 ASSERT_ARGS(Parrot_str_to_cstring)
2242
2243 return Parrot_str_to_encoded_cstring(interp, s, s->encoding);
2244 }
2245
2246
2247 /*
2248
2249 =item C<char * Parrot_str_to_encoded_cstring(PARROT_INTERP, const STRING *s,
2250 const STR_VTABLE *enc)>
2251
2252 Returns a C string in the encoding C<enc> for the Parrot string C<s>. Use
2253 C<Parrot_str_free_cstring()> to free the string. Failure to do this will result
2254 in a memory leak.
2255
2256 =cut
2257
2258 */
2259
2260 PARROT_EXPORT
2261 PARROT_CANNOT_RETURN_NULL
2262 char *
Parrot_str_to_encoded_cstring(PARROT_INTERP,ARGIN (const STRING * s),ARGIN (const STR_VTABLE * enc))2263 Parrot_str_to_encoded_cstring(PARROT_INTERP, ARGIN(const STRING *s),
2264 ARGIN(const STR_VTABLE *enc))
2265 {
2266 ASSERT_ARGS(Parrot_str_to_encoded_cstring)
2267 size_t len;
2268 size_t trail;
2269 char *p;
2270
2271 if (STRING_IS_NULL(s))
2272 Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2273 "Can't convert NULL string");
2274
2275 if (s->encoding != enc) {
2276 /* Check for compatible encodings */
2277 if (s->encoding == Parrot_ascii_encoding_ptr) {
2278 if (enc == Parrot_latin1_encoding_ptr
2279 || enc == Parrot_utf8_encoding_ptr)
2280 goto skip;
2281 }
2282 else if (s->encoding == Parrot_ucs2_encoding_ptr) {
2283 if (enc == Parrot_utf16_encoding_ptr)
2284 goto skip;
2285 }
2286
2287 /* Convert */
2288 s = enc->to_encoding(interp, s);
2289 }
2290 skip:
2291
2292 len = s->bufused;
2293 trail = enc->bytes_per_unit;
2294
2295 p = (char*)mem_internal_allocate(len + trail);
2296
2297 memcpy(p, s->strstart, len);
2298 memset(p + len, 0, trail);
2299
2300 return p;
2301 }
2302
2303
2304 /*
2305
2306 =item C<void Parrot_str_free_cstring(char *p)>
2307
2308 Free a string created by C<Parrot_str_to_cstring()>.
2309
2310 TODO - Hopefully this can go away at some point, as it's got all
2311 sorts of leak potential otherwise.
2312
2313 =cut
2314
2315 */
2316
2317 PARROT_EXPORT
2318 void
Parrot_str_free_cstring(ARGFREE (char * p))2319 Parrot_str_free_cstring(ARGFREE(char *p))
2320 {
2321 ASSERT_ARGS(Parrot_str_free_cstring)
2322 mem_internal_free((void *)p);
2323 }
2324
2325
2326 /*
2327
2328 =item C<void Parrot_str_pin(PARROT_INTERP, STRING *s)>
2329
2330 Replaces the specified Parrot string's managed buffer memory by system memory.
2331
2332 =cut
2333
2334 */
2335
2336 PARROT_EXPORT
2337 void
Parrot_str_pin(SHIM_INTERP,ARGMOD (STRING * s))2338 Parrot_str_pin(SHIM_INTERP, ARGMOD(STRING *s))
2339 {
2340 ASSERT_ARGS(Parrot_str_pin)
2341 const size_t size = Buffer_buflen(s);
2342 char * const memory = (char *)mem_internal_allocate(size);
2343
2344 memcpy(memory, Buffer_bufstart(s), size);
2345 Buffer_bufstart(s) = memory;
2346 s->strstart = memory;
2347
2348 /* Mark the memory as both from the system and immobile */
2349 PObj_sysmem_SET(s);
2350 }
2351
2352
2353 /*
2354
2355 =item C<void Parrot_str_unpin(PARROT_INTERP, STRING *s)>
2356
2357 Undoes a C<Parrot_str_pin()> so that the string once again uses managed memory.
2358
2359 =cut
2360
2361 */
2362
2363 PARROT_EXPORT
2364 void
Parrot_str_unpin(PARROT_INTERP,ARGMOD (STRING * s))2365 Parrot_str_unpin(PARROT_INTERP, ARGMOD(STRING *s))
2366 {
2367 ASSERT_ARGS(Parrot_str_unpin)
2368 void *memory;
2369 size_t size;
2370
2371 /* If this string is not marked using system memory,
2372 * we just don't do this */
2373 if (!PObj_sysmem_TEST(s))
2374 return;
2375
2376 size = Buffer_buflen(s);
2377
2378 /* We need a handle on the fixed memory so we can get rid of it later */
2379 memory = Buffer_bufstart(s);
2380
2381 /* Reallocate it the same size
2382 * NOTE can't use Parrot_gc_reallocate_string_storage because of the LEA
2383 * allocator, where this is a noop for the same size
2384 *
2385 * We have to block GC here, as we have a pointer to bufstart
2386 */
2387 Parrot_block_GC_sweep(interp);
2388 Parrot_gc_allocate_string_storage(interp, s, size);
2389 Parrot_unblock_GC_sweep(interp);
2390 memcpy(Buffer_bufstart(s), memory, size);
2391
2392 /* Mark the memory as neither immobile nor system allocated */
2393 PObj_sysmem_CLEAR(s);
2394
2395 /* Free up the memory */
2396 mem_internal_free(memory);
2397 }
2398
2399
2400 /*
2401
2402 =item C<size_t Parrot_str_to_hashval(PARROT_INTERP, const STRING *s)>
2403
2404 Returns the hash value for the specified Parrot string, caching it in
2405 C<< s->hashval >>.
2406
2407 Identical to the STRING_hash macro.
2408
2409 =cut
2410
2411 */
2412
2413 PARROT_EXPORT
2414 PARROT_WARN_UNUSED_RESULT
2415 size_t
Parrot_str_to_hashval(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2416 Parrot_str_to_hashval(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2417 {
2418 ASSERT_ARGS(Parrot_str_to_hashval)
2419
2420 if (s == NULL)
2421 s = STRINGNULL;
2422
2423 return STRING_hash(interp, s, interp->hash_seed);
2424 }
2425
2426 /*
2427
2428 =item C<STRING * Parrot_str_reverse(PARROT_INTERP, const STRING *src)>
2429
2430 Return the reverse of C<src>, even for non-ascii strings.
2431
2432 =cut
2433
2434 */
2435
2436 PARROT_EXPORT
2437 PARROT_CANNOT_RETURN_NULL
2438 STRING *
Parrot_str_reverse(PARROT_INTERP,ARGIN (const STRING * src))2439 Parrot_str_reverse(PARROT_INTERP, ARGIN(const STRING *src))
2440 {
2441 ASSERT_ARGS(Parrot_str_reverse)
2442 String_iter iter;
2443 INTVAL pos;
2444 PMC *sb;
2445
2446 STRING_ITER_INIT(interp, &iter);
2447 sb = Parrot_pmc_new(interp, enum_class_StringBuilder);
2448
2449 for (pos = STRING_length(src) - 1; pos >= 0; pos--) {
2450 VTABLE_push_string(interp, sb, Parrot_str_chr(interp,
2451 STRING_iter_get(interp, src, &iter, pos)));
2452 }
2453
2454 return VTABLE_get_string(interp, sb);
2455 }
2456
2457 /*
2458
2459 =item C<STRING * Parrot_str_escape(PARROT_INTERP, const STRING *src)>
2460
2461 Escapes all non-ASCII chars to backslash sequences. Control chars that
2462 C<Parrot_str_unescape> can handle are escaped as I<\x>, as well as a double
2463 quote character. Other control chars and codepoints < 0x100 are escaped as
2464 I<\xhh>, codepoints up to 0xffff, as I<\uhhhh>, and codepoints greater than
2465 this as I<\x{hh...hh}>.
2466
2467 =cut
2468
2469 */
2470
2471 PARROT_EXPORT
2472 PARROT_CANNOT_RETURN_NULL
2473 STRING *
Parrot_str_escape(PARROT_INTERP,ARGIN_NULLOK (const STRING * src))2474 Parrot_str_escape(PARROT_INTERP, ARGIN_NULLOK(const STRING *src))
2475 {
2476 ASSERT_ARGS(Parrot_str_escape)
2477 return Parrot_str_escape_truncate(interp, src, (UINTVAL) ~0);
2478 }
2479
2480
2481 /*
2482
2483 =item C<STRING * Parrot_str_escape_truncate(PARROT_INTERP, const STRING *src,
2484 UINTVAL limit)>
2485
2486 Escapes all non-ASCII characters in the given string with backslashed versions,
2487 but limits the length of the output (used for trace output of strings).
2488
2489 =cut
2490
2491 */
2492
2493 PARROT_EXPORT
2494 PARROT_CANNOT_RETURN_NULL
2495 STRING *
Parrot_str_escape_truncate(PARROT_INTERP,ARGIN_NULLOK (const STRING * src),UINTVAL limit)2496 Parrot_str_escape_truncate(PARROT_INTERP,
2497 ARGIN_NULLOK(const STRING *src), UINTVAL limit)
2498 {
2499 ASSERT_ARGS(Parrot_str_escape_truncate)
2500 STRING *result;
2501 UINTVAL i, len, charlen;
2502 String_iter iter;
2503 char hex_buf[HEX_BUF_SIZE];
2504 char *dp;
2505
2506 if (STRING_IS_NULL(src))
2507 return STRINGNULL;
2508
2509 len = src->strlen;
2510
2511 if (len > limit)
2512 len = limit;
2513
2514 /* expect around 2x the chars */
2515 charlen = 2 * len;
2516
2517 if (charlen < HEX_BUF_SIZE)
2518 charlen = HEX_BUF_SIZE;
2519
2520 /* create ascii result */
2521 result = Parrot_str_new_init(interp, NULL, charlen,
2522 Parrot_ascii_encoding_ptr, 0);
2523
2524 /* more work TODO */
2525 STRING_ITER_INIT(interp, &iter);
2526 dp = result->strstart;
2527
2528 for (i = 0; len > 0; --len) {
2529 unsigned c = STRING_iter_get_and_advance(interp, src, &iter);
2530 int hex_len;
2531
2532 if (c < 0x7f) {
2533 /* process ASCII chars */
2534 if (i >= charlen - 2) {
2535 /* resize - still len codepoints to go */
2536 charlen += len * 2 + HEX_BUF_SIZE;
2537 result->bufused = i;
2538 Parrot_gc_reallocate_string_storage(interp, result, charlen);
2539 /* start can change */
2540 dp = result->strstart;
2541 }
2542 switch (c) {
2543 case '\\':
2544 dp[i++] = '\\';
2545 break;
2546 case '\a':
2547 dp[i++] = '\\';
2548 c = 'a';
2549 break;
2550 case '\b':
2551 dp[i++] = '\\';
2552 c = 'b';
2553 break;
2554 case '\n':
2555 dp[i++] = '\\';
2556 c = 'n';
2557 break;
2558 case '\r':
2559 dp[i++] = '\\';
2560 c = 'r';
2561 break;
2562 case '\t':
2563 dp[i++] = '\\';
2564 c = 't';
2565 break;
2566 case '\f':
2567 dp[i++] = '\\';
2568 c = 'f';
2569 break;
2570 case '"':
2571 dp[i++] = '\\';
2572 c = '"';
2573 break;
2574 case 27:
2575 dp[i++] = '\\';
2576 c = 'e';
2577 break;
2578 default:
2579 break;
2580 }
2581 if (c >= 0x20) {
2582 dp[i++] = c;
2583 continue;
2584 }
2585 }
2586
2587 /* escape by appending either \uhhhh or \x{hh...} */
2588
2589 if (c < 0x0100 || c >= 0x10000)
2590 hex_len = snprintf(hex_buf, HEX_BUF_SIZE - 1, "\\x{%x}", c);
2591 else
2592 hex_len = snprintf(hex_buf, HEX_BUF_SIZE - 1, "\\u%04x", c);
2593
2594 if (hex_len < 0)
2595 hex_len = 0;
2596
2597 if (i + hex_len > charlen) {
2598 /* resize - still len codepoints to go */
2599 charlen += len * 2 + HEX_BUF_SIZE;
2600 result->bufused = i;
2601 Parrot_gc_reallocate_string_storage(interp, result, charlen);
2602 /* start can change */
2603 dp = result->strstart;
2604 }
2605
2606 memcpy(dp + i, hex_buf, hex_len);
2607
2608 /* adjust our insert idx */
2609 i += hex_len;
2610
2611 PARROT_ASSERT(i <= charlen);
2612 }
2613
2614 result->bufused = result->strlen = i;
2615 return result;
2616 }
2617
2618 /*
2619
2620 =item C<static void throw_illegal_escape(PARROT_INTERP, const STRING *s)>
2621
2622 =item C<static void throw_illegal_escape_char(PARROT_INTERP, const char c, const
2623 STRING *s)>
2624
2625 Helper functions to avoid repeated throw calls.
2626
2627 =cut
2628
2629 */
2630
2631 PARROT_DOES_NOT_RETURN
2632 PARROT_COLD
2633 static void
throw_illegal_escape(PARROT_INTERP,ARGIN (const STRING * s))2634 throw_illegal_escape(PARROT_INTERP, ARGIN(const STRING *s))
2635 {
2636 ASSERT_ARGS(throw_illegal_escape)
2637 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
2638 "Illegal escape sequence in '%Ss'", s);
2639 }
2640
2641 PARROT_DOES_NOT_RETURN
2642 PARROT_COLD
2643 static void
throw_illegal_escape_char(PARROT_INTERP,const char c,ARGIN (const STRING * s))2644 throw_illegal_escape_char(PARROT_INTERP, const char c, ARGIN(const STRING *s))
2645 {
2646 ASSERT_ARGS(throw_illegal_escape_char)
2647 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
2648 "Illegal escape sequence \\%c in '%Ss'", c, s);
2649 }
2650
2651 /*
2652
2653 =item C<STRING * Parrot_str_unescape_string(PARROT_INTERP, const STRING *src,
2654 const STR_VTABLE *encoding, UINTVAL flags)>
2655
2656 EXPERIMENTAL, see TT #1628
2657
2658 Unescapes the src string returning a new string with the encoding specified.
2659
2660
2661 =cut
2662
2663 */
2664
2665 PARROT_EXPORT
2666 PARROT_CANNOT_RETURN_NULL
2667 STRING *
Parrot_str_unescape_string(PARROT_INTERP,ARGIN (const STRING * src),ARGIN (const STR_VTABLE * encoding),UINTVAL flags)2668 Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
2669 ARGIN(const STR_VTABLE *encoding),
2670 UINTVAL flags)
2671 {
2672 ASSERT_ARGS(Parrot_str_unescape_string)
2673
2674 UINTVAL srclen = Parrot_str_byte_length(interp, src);
2675 STRING *result = Parrot_gc_new_string_header(interp, flags);
2676 String_iter itersrc;
2677 String_iter iterdest;
2678 UINTVAL reserved;
2679 int digcount;
2680 char digbuf[9];
2681 int pending;
2682
2683 result->encoding = encoding;
2684 reserved = string_max_bytes(interp, result, srclen);
2685 Parrot_gc_allocate_string_storage(interp, result, reserved);
2686 result->bufused = reserved;
2687
2688 STRING_ITER_INIT(interp, &itersrc);
2689 STRING_ITER_INIT(interp, &iterdest);
2690 while (itersrc.bytepos < srclen) {
2691 INTVAL c = STRING_iter_get_and_advance(interp, src, &itersrc);
2692 INTVAL next;
2693
2694 do {
2695 pending = 0;
2696 next = c;
2697 if (c == '\\') {
2698 if (itersrc.bytepos >= srclen) break;
2699 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2700 switch (c) {
2701 /* Allowed escape sequences */
2702 case 'a': next = '\a'; break; /* \x07 Alarm, beep */
2703 case 'b': next = '\b'; break; /* \x08 Backspace */
2704 case 't': next = '\t'; break; /* \x09 horizontal tab */
2705 case 'n': next = '\n'; break; /* \x0a newline */
2706 case 'v': next = '\v'; break; /* \x0b vertical tab */
2707 case 'f': next = '\f'; break; /* \x0c formfeed */
2708 case 'r': next = '\r'; break; /* \x0d carriage return */
2709 case 'e': next = '\x1B'; break; /* \x1b prefix ansi escape */
2710 /* and previously handled in the default case: */
2711 case '\\': next = c; break; /* \x5c */
2712 case '"': next = c; break; /* \x22 */
2713 case '\'': next = c; break; /* \x27 */
2714 case '?': next = c; break; /* \x3f */
2715 /* Escape character */
2716 case 'c':
2717 if (itersrc.bytepos >= srclen) break;
2718 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2719 /* This assumes ascii-alike encoding */
2720 if (c < 'A' || c > 'Z')
2721 throw_illegal_escape(interp, src);
2722 next = c - 'A' + 1;
2723 break;
2724 case 'x':
2725 digcount = 0;
2726 if (itersrc.bytepos >= srclen)
2727 break;
2728 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2729 if (c == '{') {
2730 /* \x{h..h} 1..8 hex digits */
2731 while (itersrc.bytepos < srclen) {
2732 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2733 if (c == '}')
2734 break;
2735 if (!isxdigit(c))
2736 throw_illegal_escape(interp, src);
2737 if (digcount == 8)
2738 break;
2739 digbuf[digcount++] = c;
2740 }
2741 if (c != '}')
2742 throw_illegal_escape(interp, src);
2743 }
2744 else {
2745 /* \xhh 1..2 hex digits */
2746 pending = 1;
2747 for (digcount = 0; digcount < 2;) {
2748 if (!isxdigit(c))
2749 break;
2750 digbuf[digcount] = c;
2751 ++digcount;
2752 if (itersrc.bytepos >= srclen) {
2753 pending = 0;
2754 break;
2755 }
2756 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2757 }
2758 }
2759 if (digcount == 0)
2760 throw_illegal_escape(interp, src);
2761 digbuf[digcount] = '\0';
2762 next = strtol(digbuf, NULL, 16);
2763 break;
2764 case 'u':
2765 /* \uhhhh 4 hex digits */
2766 for (digcount = 0; digcount < 4; ++digcount) {
2767 if (itersrc.bytepos >= srclen) break;
2768 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2769 if (!isxdigit(c))
2770 throw_illegal_escape(interp, src);
2771 digbuf[digcount] = c;
2772 }
2773 digbuf[digcount] = '\0';
2774 next = strtol(digbuf, NULL, 16);
2775 break;
2776 case 'U':
2777 /* \Uhhhhhhhh 8 hex digits */
2778 for (digcount = 0; digcount < 8; ++digcount) {
2779 if (itersrc.bytepos >= srclen) break;
2780 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2781 if (!isxdigit(c))
2782 throw_illegal_escape(interp, src);
2783 digbuf[digcount] = c;
2784 }
2785 digbuf[digcount] = '\0';
2786 next = strtol(digbuf, NULL, 16);
2787 break;
2788 case '0': case '1': case '2': case '3':
2789 case '4': case '5': case '6': case '7':
2790 /* \ooo 1..3 oct digits */
2791 digbuf[0] = c;
2792 for (digcount = 1; digcount < 3; ++digcount) {
2793 if (itersrc.bytepos >= srclen) break;
2794 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2795 if (c < '0' || c > '7')
2796 break;
2797 digbuf[digcount] = c;
2798 }
2799 digbuf[digcount] = '\0';
2800 next = strtol(digbuf, NULL, 8);
2801 if (itersrc.bytepos < srclen && digcount < 3)
2802 pending = 1;
2803 break;
2804 default:
2805 /* Die with Illegal escape sequence since 6.9.0 but allow quoting of
2806 special chars. */
2807 /* The C standard requires such "invalid" escape sequences to be diagnosed
2808 (i.e., the compiler must print an error message). GH #1103 */
2809 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
2810 /* next = c; for a deprecation cycle? */
2811 /* catch inproper use of \i, \O, \o */
2812 throw_illegal_escape_char(interp, c, src);
2813 }
2814 else {
2815 next = c; /* ignore the \ in special chars like \[, \}, ... */
2816 }
2817 }
2818 }
2819 STRING_iter_set_and_advance(interp, result, &iterdest, next);
2820 } while (pending);
2821 }
2822 result->bufused = iterdest.bytepos;
2823 result->strlen = iterdest.charpos;
2824 return result;
2825 }
2826
2827 /*
2828
2829 =item C<STRING * Parrot_str_unescape(PARROT_INTERP, const char *cstring, char
2830 delimiter, const char *enc_char)>
2831
2832 Unescapes the specified C string. These sequences are covered:
2833
2834 \xhh 1..2 hex digits
2835 \ooo 1..3 oct digits
2836 \cX control char X
2837 \x{h..h} 1..8 hex digits
2838 \uhhhh 4 hex digits
2839 \Uhhhhhhhh 8 hex digits
2840 \a, \b, \t, \n, \v, \f, \r, \e
2841
2842 These sequences are not escaped: C<\\ \" \' \?>
2843
2844 All other escape sequences within C<[a-zA-Z]> are illegal.
2845
2846 =cut
2847
2848 */
2849
2850 PARROT_EXPORT
2851 PARROT_CANNOT_RETURN_NULL
2852 STRING *
Parrot_str_unescape(PARROT_INTERP,ARGIN (const char * cstring),char delimiter,ARGIN_NULLOK (const char * enc_char))2853 Parrot_str_unescape(PARROT_INTERP,
2854 ARGIN(const char *cstring), char delimiter, ARGIN_NULLOK(const char *enc_char))
2855 {
2856 ASSERT_ARGS(Parrot_str_unescape)
2857
2858 STRING *src;
2859 const STR_VTABLE *encoding, *src_encoding;
2860 size_t clength = strlen(cstring);
2861
2862 if (delimiter && clength)
2863 --clength;
2864
2865 if (enc_char == NULL) {
2866 encoding = Parrot_default_encoding_ptr;
2867 }
2868 else {
2869 encoding = Parrot_find_encoding(interp, enc_char);
2870
2871 if (!encoding)
2872 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
2873 "Can't make '%s' encoding strings", enc_char);
2874 }
2875
2876 if (encoding->max_bytes_per_codepoint == 1)
2877 src_encoding = encoding;
2878 else
2879 src_encoding = Parrot_utf8_encoding_ptr;
2880
2881 src = Parrot_str_new_init(interp, cstring, clength, src_encoding,
2882 PObj_external_FLAG);
2883
2884 return Parrot_str_unescape_string(interp, src, encoding,
2885 PObj_constant_FLAG);
2886 }
2887
2888
2889 /*
2890
2891 =item C<STRING * Parrot_str_upcase(PARROT_INTERP, const STRING *s)>
2892
2893 Returns a copy of the specified Parrot string converted to upper case.
2894 Non-caseable characters are left unchanged.
2895
2896 =cut
2897
2898 */
2899
2900 PARROT_EXPORT
2901 PARROT_CANNOT_RETURN_NULL
2902 PARROT_MALLOC
2903 STRING *
Parrot_str_upcase(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2904 Parrot_str_upcase(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2905 {
2906 ASSERT_ARGS(Parrot_str_upcase)
2907 if (STRING_IS_NULL(s))
2908 Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2909 "Can't upcase NULL string");
2910 else {
2911 STRING * const res = STRING_upcase(interp, s);
2912 res->hashval = 0;
2913 return res;
2914 }
2915 }
2916
2917
2918 /*
2919
2920 =item C<STRING * Parrot_str_downcase(PARROT_INTERP, const STRING *s)>
2921
2922 Returns a copy of the specified Parrot string converted to lower case.
2923 Non-caseable characters are left unchanged.
2924
2925 =cut
2926
2927 */
2928
2929 PARROT_EXPORT
2930 PARROT_CANNOT_RETURN_NULL
2931 PARROT_MALLOC
2932 STRING *
Parrot_str_downcase(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2933 Parrot_str_downcase(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2934 {
2935 ASSERT_ARGS(Parrot_str_downcase)
2936
2937 if (STRING_IS_NULL(s))
2938 Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2939 "Can't downcase NULL string");
2940 else {
2941 STRING * const res = STRING_downcase(interp, s);
2942 res->hashval = 0;
2943 return res;
2944 }
2945 }
2946
2947
2948 /*
2949
2950 =item C<STRING * Parrot_str_titlecase(PARROT_INTERP, const STRING *s)>
2951
2952 Returns a copy of the specified Parrot string converted to title case.
2953 Non-caseable characters are left unchanged.
2954
2955 =cut
2956
2957 */
2958
2959 PARROT_EXPORT
2960 PARROT_CANNOT_RETURN_NULL
2961 PARROT_MALLOC
2962 STRING *
Parrot_str_titlecase(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2963 Parrot_str_titlecase(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2964 {
2965 ASSERT_ARGS(Parrot_str_titlecase)
2966
2967 if (STRING_IS_NULL(s))
2968 Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2969 "Can't titlecase NULL string");
2970 else {
2971 STRING * const res = STRING_titlecase(interp, s);
2972 res->hashval = 0;
2973 return res;
2974 }
2975 }
2976
2977
2978 /*
2979
2980 =item C<const char * Parrot_str_cstring(PARROT_INTERP, const STRING *str)>
2981
2982 Returns a C string from a Parrot string. Both sides are treated
2983 as constants -- i.e. do not resize the result.
2984
2985 =cut
2986
2987 */
2988
2989 PARROT_EXPORT
2990 PARROT_PURE_FUNCTION
2991 PARROT_CANNOT_RETURN_NULL
2992 const char *
Parrot_str_cstring(SHIM_INTERP,ARGIN (const STRING * str))2993 Parrot_str_cstring(SHIM_INTERP, ARGIN(const STRING *str))
2994 {
2995 ASSERT_ARGS(Parrot_str_cstring)
2996 /* TODO handle NULL and friends */
2997 return str->strstart;
2998 }
2999
3000
3001 /*
3002
3003 =item C<INTVAL Parrot_str_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
3004 *s, UINTVAL offset)>
3005
3006 Returns 1 if the codepoint of string C<s> at given offset is in the given
3007 character class C<flags>. See also F<include/parrot/cclass.h> for possible
3008 character classes. Returns 0 otherwise, or if the string is empty or NULL.
3009
3010 =cut
3011
3012 */
3013
3014 PARROT_EXPORT
3015 PARROT_WARN_UNUSED_RESULT
3016 INTVAL
Parrot_str_is_cclass(PARROT_INTERP,INTVAL flags,ARGIN (const STRING * s),UINTVAL offset)3017 Parrot_str_is_cclass(PARROT_INTERP, INTVAL flags,
3018 ARGIN(const STRING *s), UINTVAL offset)
3019 {
3020 ASSERT_ARGS(Parrot_str_is_cclass)
3021
3022 if (!Parrot_str_byte_length(interp, s))
3023 return 0;
3024
3025 return STRING_is_cclass(interp, flags, s, offset);
3026 }
3027
3028
3029 /*
3030
3031 =item C<INTVAL Parrot_str_find_cclass(PARROT_INTERP, INTVAL flags, const STRING
3032 *s, UINTVAL offset, UINTVAL count)>
3033
3034 Finds the first occurrence of the given character class in C<flags> in the
3035 string, and returns its glyph-wise index.
3036
3037 =cut
3038
3039 */
3040
3041 PARROT_EXPORT
3042 PARROT_WARN_UNUSED_RESULT
3043 INTVAL
Parrot_str_find_cclass(PARROT_INTERP,INTVAL flags,ARGIN_NULLOK (const STRING * s),UINTVAL offset,UINTVAL count)3044 Parrot_str_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN_NULLOK(const STRING *s),
3045 UINTVAL offset, UINTVAL count)
3046 {
3047 ASSERT_ARGS(Parrot_str_find_cclass)
3048
3049 if (STRING_IS_NULL(s))
3050 return -1;
3051
3052 return STRING_find_cclass(interp, flags, s, offset, count);
3053 }
3054
3055
3056 /*
3057
3058 =item C<INTVAL Parrot_str_find_not_cclass(PARROT_INTERP, INTVAL flags, const
3059 STRING *s, UINTVAL offset, UINTVAL count)>
3060
3061 Finds the first occurrence of the a character I<not> in the given character
3062 class in C<flags> in the string starting from C<offset> and looking at C<count>
3063 positions, and returns its glyph-wise index. Returns C<offset + count>, if not
3064 found.
3065
3066 =cut
3067
3068 */
3069
3070 PARROT_EXPORT
3071 PARROT_WARN_UNUSED_RESULT
3072 INTVAL
Parrot_str_find_not_cclass(PARROT_INTERP,INTVAL flags,ARGIN_NULLOK (const STRING * s),UINTVAL offset,UINTVAL count)3073 Parrot_str_find_not_cclass(PARROT_INTERP, INTVAL flags,
3074 ARGIN_NULLOK(const STRING *s), UINTVAL offset, UINTVAL count)
3075 {
3076 ASSERT_ARGS(Parrot_str_find_not_cclass)
3077
3078 if (STRING_IS_NULL(s))
3079 return -1;
3080
3081 return STRING_find_not_cclass(interp, flags, s, offset, count);
3082 }
3083
3084
3085 /*
3086
3087 =item C<STRING* Parrot_str_change_encoding(PARROT_INTERP, STRING *src, INTVAL
3088 encoding_nr)>
3089
3090 Converts C<src> to the given encoding and returns the result as a new string.
3091
3092 =cut
3093
3094 */
3095
3096 PARROT_EXPORT
3097 PARROT_WARN_UNUSED_RESULT
3098 PARROT_CAN_RETURN_NULL
3099 STRING*
Parrot_str_change_encoding(PARROT_INTERP,ARGMOD_NULLOK (STRING * src),INTVAL encoding_nr)3100 Parrot_str_change_encoding(PARROT_INTERP, ARGMOD_NULLOK(STRING *src),
3101 INTVAL encoding_nr)
3102 {
3103 ASSERT_ARGS(Parrot_str_change_encoding)
3104 const STR_VTABLE *new_encoding;
3105
3106 if (STRING_IS_NULL(src))
3107 return STRINGNULL;
3108
3109 new_encoding = Parrot_get_encoding(interp, encoding_nr);
3110
3111 if (!new_encoding)
3112 Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
3113 "encoding #%d not found", (int) encoding_nr);
3114
3115 if (new_encoding == src->encoding)
3116 return src;
3117
3118 return new_encoding->to_encoding(interp, src);
3119 }
3120
3121
3122 /*
3123
3124 =item C<STRING * Parrot_str_compose(PARROT_INTERP, const STRING *src)>
3125
3126 Normalizes the string.
3127
3128 =cut
3129
3130 */
3131
3132 PARROT_EXPORT
3133 PARROT_WARN_UNUSED_RESULT
3134 PARROT_CANNOT_RETURN_NULL
3135 STRING *
Parrot_str_compose(PARROT_INTERP,ARGIN_NULLOK (const STRING * src))3136 Parrot_str_compose(PARROT_INTERP, ARGIN_NULLOK(const STRING *src))
3137 {
3138 ASSERT_ARGS(Parrot_str_compose)
3139
3140 if (STRING_IS_NULL(src))
3141 return STRINGNULL;
3142
3143 if (src->strlen == 0)
3144 return CONST_STRING(interp, "");
3145
3146 return STRING_compose(interp, src);
3147 }
3148
3149
3150 /*
3151
3152 =item C<STRING* Parrot_str_join(PARROT_INTERP, STRING *j, PMC *ar)>
3153
3154 Joins the elements of the array C<ar> as strings with the string C<j> between
3155 them, returning the result.
3156
3157 =cut
3158
3159 */
3160
3161 PARROT_EXPORT
3162 PARROT_WARN_UNUSED_RESULT
3163 PARROT_CANNOT_RETURN_NULL
3164 STRING*
Parrot_str_join(PARROT_INTERP,ARGIN_NULLOK (STRING * j),ARGIN (PMC * ar))3165 Parrot_str_join(PARROT_INTERP, ARGIN_NULLOK(STRING *j), ARGIN(PMC *ar))
3166 {
3167 ASSERT_ARGS(Parrot_str_join)
3168
3169 if (STRING_IS_NULL(j)) {
3170 PMC *sb = Parrot_pmc_new_init(interp, enum_class_StringBuilder, ar);
3171 return VTABLE_get_string(interp, sb);
3172 }
3173 else {
3174 PMC *sb;
3175 STRING *first;
3176 const int count = VTABLE_elements(interp, ar);
3177 INTVAL length, j_length;
3178 int i;
3179
3180 if (count == 0)
3181 return Parrot_str_new_noinit(interp, 0);
3182
3183 first = VTABLE_get_string_keyed_int(interp, ar, 0);
3184 length = Parrot_str_byte_length(interp, first);
3185 j_length = Parrot_str_byte_length(interp, j);
3186
3187 /* it's an approximation, but it doesn't hurt */
3188 sb = Parrot_pmc_new_init_int(interp, enum_class_StringBuilder,
3189 (length + j_length) * count);
3190
3191 VTABLE_push_string(interp, sb, first);
3192
3193 for (i = 1; i < count; ++i) {
3194 STRING *part = VTABLE_get_string_keyed_int(interp, ar, i);
3195 if (j_length)
3196 VTABLE_push_string(interp, sb, j);
3197
3198 if (part->strlen)
3199 VTABLE_push_string(interp, sb, part);
3200 }
3201
3202 return VTABLE_get_string(interp, sb);
3203 }
3204 }
3205
3206
3207 /*
3208
3209 =item C<PMC* Parrot_str_split(PARROT_INTERP, const STRING *delim, const STRING
3210 *str)>
3211
3212 Splits the string C<str> at the delimiter C<delim>, returning a
3213 C<ResizableStringArray>, or his mapped type in the current HLL, of results.
3214 Returns PMCNULL if the string or the delimiter is NULL.
3215
3216 =cut
3217
3218 */
3219
3220 PARROT_EXPORT
3221 PARROT_WARN_UNUSED_RESULT
3222 PARROT_CANNOT_RETURN_NULL
3223 PMC*
Parrot_str_split(PARROT_INTERP,ARGIN_NULLOK (const STRING * delim),ARGIN_NULLOK (const STRING * str))3224 Parrot_str_split(PARROT_INTERP,
3225 ARGIN_NULLOK(const STRING *delim), ARGIN_NULLOK(const STRING *str))
3226 {
3227 ASSERT_ARGS(Parrot_str_split)
3228 PMC *res;
3229 STRING *tstr;
3230 UINTVAL slen, dlen;
3231 String_iter iter;
3232
3233 if (STRING_IS_NULL(delim) || STRING_IS_NULL(str))
3234 return PMCNULL;
3235
3236 res = Parrot_pmc_new(interp,
3237 Parrot_hll_get_ctx_HLL_type(interp, enum_class_ResizableStringArray));
3238 slen = Parrot_str_length(interp, str);
3239
3240 if (!slen)
3241 return res;
3242
3243 STRING_ITER_INIT(interp, &iter);
3244 dlen = Parrot_str_length(interp, delim);
3245
3246 if (dlen == 0) {
3247 VTABLE_set_integer_native(interp, res, slen);
3248
3249 do {
3250 const String_iter old_iter = iter;
3251
3252 STRING_iter_skip(interp, str, &iter, 1);
3253 tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter);
3254 VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr);
3255 } while (iter.charpos < slen);
3256
3257 return res;
3258 }
3259
3260 do {
3261 String_iter start, end;
3262
3263 start = iter;
3264 if (Parrot_str_iter_index(interp, str, &start, &end, delim) < 0)
3265 break;
3266
3267 tstr = Parrot_str_iter_substr(interp, str, &iter, &start);
3268 VTABLE_push_string(interp, res, tstr);
3269 iter = end;
3270 } while (iter.charpos < slen);
3271
3272 tstr = Parrot_str_iter_substr(interp, str, &iter, NULL);
3273 VTABLE_push_string(interp, res, tstr);
3274
3275 return res;
3276 }
3277
3278
3279 /*
3280
3281 =item C<STRING* Parrot_str_from_uint(PARROT_INTERP, char *tc, UHUGEINTVAL num,
3282 unsigned int base, int minus)>
3283
3284 Returns C<num> converted to a Parrot C<STRING>.
3285
3286 Note that C<base> must be defined (a default of 10 is not assumed). The caller
3287 has to verify that C<< base >= 2 && base <= 36 >> The buffer C<tc> must be at
3288 least C<sizeof (UHUGEINTVAL)*8 + 1> chars big.
3289
3290 If C<minus> is true, then C<-> is prepended to the string representation.
3291
3292 =cut
3293
3294 */
3295
3296 PARROT_WARN_UNUSED_RESULT
3297 PARROT_CANNOT_RETURN_NULL
3298 STRING*
Parrot_str_from_uint(PARROT_INTERP,ARGOUT (char * tc),UHUGEINTVAL num,unsigned int base,int minus)3299 Parrot_str_from_uint(PARROT_INTERP, ARGOUT(char *tc), UHUGEINTVAL num,
3300 unsigned int base, int minus)
3301 {
3302 ASSERT_ARGS(Parrot_str_from_uint)
3303
3304 /* the buffer must be at least as long as this */
3305 char *p = tc + sizeof (UHUGEINTVAL)*8 + 1;
3306 const char * const tail = p;
3307
3308 PARROT_ASSERT(base >= 2 && base <= 36);
3309
3310 do {
3311 const char cur = (char)(num % base);
3312
3313 if (cur < 10)
3314 *--p = (char)('0' + cur);
3315 else
3316 *--p = (char)('a' + cur - 10);
3317
3318 } while (num /= base);
3319
3320 if (minus)
3321 *--p = '-';
3322
3323 return Parrot_str_new_init(interp, p, (UINTVAL)(tail - p),
3324 Parrot_default_encoding_ptr, 0);
3325 }
3326
3327
3328 /*
3329
3330 =item C<STRING * Parrot_str_from_int_base(PARROT_INTERP, char *tc, HUGEINTVAL
3331 num, unsigned int base)>
3332
3333 Returns C<num> converted to a Parrot C<STRING>.
3334
3335 Note that C<base> must be defined (a default of 10 is not assumed).
3336
3337 If C<< num < 0 >>, then C<-> is prepended to the string representation.
3338
3339 =cut
3340
3341 */
3342
3343 PARROT_WARN_UNUSED_RESULT
3344 PARROT_CANNOT_RETURN_NULL
3345 STRING *
Parrot_str_from_int_base(PARROT_INTERP,ARGOUT (char * tc),HUGEINTVAL num,unsigned int base)3346 Parrot_str_from_int_base(PARROT_INTERP, ARGOUT(char *tc), HUGEINTVAL num, unsigned int base)
3347 {
3348 ASSERT_ARGS(Parrot_str_from_int_base)
3349 const int is_neg = (num < 0);
3350
3351 if (is_neg)
3352 num = -num;
3353
3354 return Parrot_str_from_uint(interp, tc, (UHUGEINTVAL)num, base, is_neg);
3355 }
3356
3357 /*
3358
3359 =back
3360
3361 =head2 GC registry interface
3362
3363 =over 4
3364
3365 =item C<void Parrot_str_gc_register(PARROT_INTERP, STRING *s)>
3366
3367 Registers the STRING from the interpreter's GC registry to prevent it from
3368 being collected.
3369
3370 =cut
3371
3372 */
3373
3374
3375 PARROT_EXPORT
3376 void
Parrot_str_gc_register(PARROT_INTERP,ARGIN (STRING * s))3377 Parrot_str_gc_register(PARROT_INTERP, ARGIN(STRING *s))
3378 {
3379 ASSERT_ARGS(Parrot_str_gc_register)
3380 /* Better not trigger a GC run with a potentially unanchored PMC */
3381 Parrot_block_GC_mark(interp);
3382
3383 PARROT_ASSERT(interp->gc_registry);
3384
3385 VTABLE_set_pmc_keyed_str(interp, interp->gc_registry, s, PMCNULL);
3386 Parrot_unblock_GC_mark(interp);
3387 }
3388
3389 /*
3390
3391 =item C<void Parrot_str_gc_unregister(PARROT_INTERP, STRING *s)>
3392
3393 Unregisters the STRING from the interpreter's GC registry.
3394
3395 =cut
3396
3397 */
3398
3399 PARROT_EXPORT
3400 void
Parrot_str_gc_unregister(PARROT_INTERP,ARGIN (STRING * s))3401 Parrot_str_gc_unregister(PARROT_INTERP, ARGIN(STRING *s))
3402 {
3403 ASSERT_ARGS(Parrot_str_gc_unregister)
3404 PARROT_ASSERT(interp->gc_registry);
3405
3406 VTABLE_delete_keyed_str(interp, interp->gc_registry, s);
3407 }
3408
3409
3410 /*
3411
3412 =back
3413
3414 =head1 SEE ALSO
3415
3416 =over
3417
3418 =item F<include/parrot/string.h>
3419
3420 =item F<include/parrot/string_funcs.h>
3421
3422 =item F<docs/strings.pod>
3423
3424 =back
3425
3426 =cut
3427
3428 */
3429
3430
3431 /*
3432 * Local variables:
3433 * c-file-style: "parrot"
3434 * End:
3435 * vim: expandtab shiftwidth=4 cinoptions='\:2=2' :
3436 */
3437