1 /*
2 Copyright (C) 2001-2014, Parrot Foundation.
3 
4 =head1 NAME
5 
6 src/string/api.c - Parrot Strings
7 
8 =head1 DESCRIPTION
9 
10 This file implements the non-ICU parts of the Parrot string subsystem.
11 
12 Note that C<bufstart> and C<buflen> are used by the memory subsystem. The
13 string functions may only use C<buflen> to determine if there is some space
14 left beyond C<bufused>. This is the I<only> valid usage of these two data
15 members, beside setting C<bufstart>/C<buflen> for external strings.
16 
17 =head2 Functions
18 
19 =over 4
20 
21 =cut
22 
23 */
24 
25 #include <stdio.h>
26 #include <math.h>
27 
28 #include "parrot/parrot.h"
29 #include "parrot/events.h"
30 #include "private_cstring.h"
31 #include "api.str"
32 
33 /* for parrot/interpreter.h */
34 STRING *STRINGNULL;
35 
36 #define nonnull_encoding_name(s) (s) ? (s)->encoding->name : "null string"
37 #define ASSERT_STRING_SANITY(s) \
38     PARROT_ASSERT((s)->encoding); \
39     PARROT_ASSERT(!PObj_on_free_list_TEST(s))
40 
41 /* HEADERIZER HFILE: include/parrot/string_funcs.h */
42 
43 /* HEADERIZER BEGIN: static */
44 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
45 
46 PARROT_WARN_UNUSED_RESULT
47 PARROT_PURE_FUNCTION
48 static INTVAL string_max_bytes(PARROT_INTERP,
49     ARGIN(const STRING *s),
50     UINTVAL nchars)
51         __attribute__nonnull__(2);
52 
53 PARROT_INLINE
54 PARROT_IGNORABLE_RESULT
55 PARROT_CAN_RETURN_NULL
56 PARROT_PURE_FUNCTION
57 static const STR_VTABLE * string_rep_compatible(
58     ARGIN(const STRING *a),
59     ARGIN(const STRING *b))
60         __attribute__nonnull__(1)
61         __attribute__nonnull__(2);
62 
63 PARROT_DOES_NOT_RETURN
64 PARROT_COLD
65 static void throw_illegal_escape(PARROT_INTERP, ARGIN(const STRING *s))
66         __attribute__nonnull__(1)
67         __attribute__nonnull__(2);
68 
69 PARROT_DOES_NOT_RETURN
70 PARROT_COLD
71 static void throw_illegal_escape_char(PARROT_INTERP,
72     const char c,
73     ARGIN(const STRING *s))
74         __attribute__nonnull__(1)
75         __attribute__nonnull__(3);
76 
77 #define ASSERT_ARGS_string_max_bytes __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
78        PARROT_ASSERT_ARG(s))
79 #define ASSERT_ARGS_string_rep_compatible __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
80        PARROT_ASSERT_ARG(a) \
81     , PARROT_ASSERT_ARG(b))
82 #define ASSERT_ARGS_throw_illegal_escape __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
83        PARROT_ASSERT_ARG(interp) \
84     , PARROT_ASSERT_ARG(s))
85 #define ASSERT_ARGS_throw_illegal_escape_char __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
86        PARROT_ASSERT_ARG(interp) \
87     , PARROT_ASSERT_ARG(s))
88 /* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
89 /* HEADERIZER END: static */
90 
91 /*
92   Buffer size for hexadecimal conversions:
93   Expected at most 8 characters plus room for some prefixes and suffixes.
94 */
95 #define HEX_BUF_SIZE 16
96 
97 /*
98 
99 =item C<INTVAL Parrot_str_is_null(PARROT_INTERP, const STRING *s)>
100 
101 Tests if the given STRING is STRINGNULL.
102 
103 =cut
104 
105 */
106 
107 PARROT_EXPORT
108 PARROT_HOT
109 PARROT_PURE_FUNCTION
110 INTVAL
Parrot_str_is_null(SHIM_INTERP,ARGIN_NULLOK (const STRING * s))111 Parrot_str_is_null(SHIM_INTERP, ARGIN_NULLOK(const STRING *s))
112 {
113     ASSERT_ARGS(Parrot_str_is_null)
114     return STRING_IS_NULL(s);
115 }
116 
117 
118 /*
119 
120 =back
121 
122 =head2 Basic String Functions
123 
124 Creation, enlargement, etc.
125 
126 =over 4
127 
128 =item C<void Parrot_str_init(PARROT_INTERP)>
129 
130 Initializes the Parrot string subsystem.
131 
132 =cut
133 
134 */
135 
136 PARROT_EXPORT
137 void
Parrot_str_init(PARROT_INTERP)138 Parrot_str_init(PARROT_INTERP)
139 {
140     ASSERT_ARGS(Parrot_str_init)
141     Hash        *const_cstring_hash;
142     size_t       i;
143     const size_t n_parrot_cstrings =
144         sizeof (parrot_cstrings) / sizeof (parrot_cstrings[0]);
145 
146     if (interp->parent_interpreter)
147         interp->hash_seed = interp->parent_interpreter->hash_seed;
148 
149     /* interp is initialized from zeroed memory, so this is fine */
150     else if (interp->hash_seed == 0) {
151         interp->hash_seed = Parrot_get_entropy(interp);
152     }
153 
154     /* initialize the constant string table */
155     if (interp->parent_interpreter) {
156         interp->const_cstring_table =
157             interp->parent_interpreter->const_cstring_table;
158         interp->const_cstring_hash  =
159             interp->parent_interpreter->const_cstring_hash;
160         return;
161     }
162 
163     /* Set up the cstring cache, then load the basic encodings */
164     const_cstring_hash          = Parrot_hash_create_sized(interp,
165                                         enum_type_PMC,
166                                         Hash_key_type_cstring,
167                                         n_parrot_cstrings);
168     interp->const_cstring_hash  = const_cstring_hash;
169     Parrot_encodings_init(interp);
170 
171     /* initialize STRINGNULL, but not in the constant table */
172     STRINGNULL = Parrot_str_new_init(interp, NULL, 0,
173                        Parrot_null_encoding_ptr,
174                        PObj_constant_FLAG);
175 
176     interp->const_cstring_table =
177         mem_gc_allocate_n_zeroed_typed(interp, n_parrot_cstrings, STRING *);
178     PARROT_ASSERT(interp->const_cstring_table != NULL);
179 
180     for (i = 0; i < n_parrot_cstrings; ++i) {
181         DECL_CONST_CAST;
182         STRING * const s =
183             Parrot_str_new_init(interp,
184                 parrot_cstrings[i].string,
185                 parrot_cstrings[i].len,
186                 Parrot_default_encoding_ptr,
187                 PObj_external_FLAG|PObj_constant_FLAG);
188         Parrot_hash_put(interp, const_cstring_hash,
189             PARROT_const_cast(char *, parrot_cstrings[i].string), (void *)s);
190         interp->const_cstring_table[i] = s;
191     }
192 }
193 
194 
195 /*
196 
197 =item C<void Parrot_str_finish(PARROT_INTERP)>
198 
199 De-Initializes the Parrot string subsystem.
200 
201 =cut
202 
203 */
204 
205 PARROT_EXPORT
206 void
Parrot_str_finish(PARROT_INTERP)207 Parrot_str_finish(PARROT_INTERP)
208 {
209     ASSERT_ARGS(Parrot_str_finish)
210 
211     /* all are shared between interpreters */
212     if (!interp->parent_interpreter) {
213         mem_internal_free(interp->const_cstring_table);
214         interp->const_cstring_table = NULL;
215         Parrot_deinit_encodings(interp);
216         Parrot_hash_destroy(interp, interp->const_cstring_hash);
217     }
218 }
219 
220 
221 /*
222 
223 =item C<STRING * Parrot_str_new_noinit(PARROT_INTERP, UINTVAL capacity)>
224 
225 Creates and returns an empty Parrot string.
226 
227 =cut
228 
229 */
230 
231 PARROT_EXPORT
232 PARROT_CANNOT_RETURN_NULL
233 STRING *
Parrot_str_new_noinit(PARROT_INTERP,UINTVAL capacity)234 Parrot_str_new_noinit(PARROT_INTERP, UINTVAL capacity)
235 {
236     ASSERT_ARGS(Parrot_str_new_noinit)
237     STRING * const s = Parrot_gc_new_string_header(interp, 0);
238 
239     s->encoding = Parrot_default_encoding_ptr;
240 
241     Parrot_gc_allocate_string_storage(interp, s,
242         (size_t)string_max_bytes(interp, s, capacity));
243 
244     return s;
245 }
246 
247 
248 /*
249 
250 =item C<static const STR_VTABLE * string_rep_compatible(const STRING *a, const
251 STRING *b)>
252 
253 Find the "lowest" possible encoding for the given string. E.g.
254 
255   ascii <op> utf8 => utf8
256                   => ascii, B<if> C<STRING *b> has ascii chars only.
257 
258 Returns NULL, if no compatible string representation can be found.
259 
260 =cut
261 
262 */
263 
264 PARROT_INLINE
265 PARROT_IGNORABLE_RESULT
266 PARROT_CAN_RETURN_NULL
267 PARROT_PURE_FUNCTION
268 static const STR_VTABLE *
string_rep_compatible(ARGIN (const STRING * a),ARGIN (const STRING * b))269 string_rep_compatible(ARGIN(const STRING *a), ARGIN(const STRING *b))
270 {
271     ASSERT_ARGS(string_rep_compatible)
272 
273     PARROT_ASSERT(a->encoding && b->encoding);
274 
275     if (a->encoding == b->encoding)
276         return a->encoding;
277 
278     /* a table could possibly simplify the logic */
279 
280     if (STRING_max_bytes_per_codepoint(a) == 1
281     &&  STRING_max_bytes_per_codepoint(b) == 1) {
282         /* Return the "largest" encoding where ascii < latin1 < binary */
283 
284         if (b->encoding == Parrot_ascii_encoding_ptr)
285             return a->encoding;
286         if (a->encoding == Parrot_ascii_encoding_ptr)
287             return b->encoding;
288         if (a->encoding == Parrot_binary_encoding_ptr)
289             return a->encoding;
290         if (b->encoding == Parrot_binary_encoding_ptr)
291             return b->encoding;
292     }
293     else {
294         /* UTF-8 strings are ASCII compatible if their byte length equals
295            their codepoint length. This is a nice trick but it can cause many
296            surprises when UTF-8 strings are suddenly "downgraded" to ASCII
297            strings. */
298 
299         if (a->encoding == Parrot_utf8_encoding_ptr
300         &&  b->encoding == Parrot_ascii_encoding_ptr) {
301             if (a->strlen == a->bufused) {
302                 return b->encoding;
303             }
304             return a->encoding;
305         }
306 
307         if (b->encoding == Parrot_utf8_encoding_ptr
308         &&  a->encoding == Parrot_ascii_encoding_ptr) {
309             if (b->strlen == b->bufused) {
310                 return a->encoding;
311             }
312             return b->encoding;
313         }
314     }
315 
316     return NULL;
317 }
318 
319 /*
320 
321 =item C<const STR_VTABLE * Parrot_str_rep_compatible(PARROT_INTERP, const STRING
322 *a, const STRING *b)>
323 
324 Find the "lowest" possible encoding for the given string. E.g.
325 
326   ascii <op> utf8 => utf8
327                   => ascii, B<if> C<STRING *b> has ascii chars only.
328 
329 Returns NULL, if no compatible string representation can be found.
330 
331 =cut
332 
333 */
334 
335 PARROT_EXPORT
336 PARROT_IGNORABLE_RESULT
337 PARROT_CAN_RETURN_NULL
338 PARROT_PURE_FUNCTION
339 const STR_VTABLE *
Parrot_str_rep_compatible(SHIM_INTERP,ARGIN (const STRING * a),ARGIN (const STRING * b))340 Parrot_str_rep_compatible(SHIM_INTERP, ARGIN(const STRING *a), ARGIN(const STRING *b))
341 {
342     ASSERT_ARGS(Parrot_str_rep_compatible)
343     return string_rep_compatible(a, b);
344 }
345 
346 /*
347 
348 =item C<STRING * Parrot_str_clone(PARROT_INTERP, const STRING *s)>
349 
350 Helper function to clone string.
351 
352 =cut
353 
354 */
355 
356 PARROT_WARN_UNUSED_RESULT
357 PARROT_CANNOT_RETURN_NULL
358 STRING *
Parrot_str_clone(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))359 Parrot_str_clone(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
360 {
361     ASSERT_ARGS(Parrot_str_clone)
362     size_t  alloc_size;
363     STRING *result;
364 
365     if (STRING_IS_NULL(s))
366         return STRINGNULL;
367 
368     result     = Parrot_gc_new_string_header(interp, 0);
369     alloc_size = s->bufused;
370 
371     if (alloc_size) {
372         /* Allocate new chunk of memory */
373         Parrot_gc_allocate_string_storage(interp, result, alloc_size);
374 
375         /* and copy it over */
376         memcpy(result->strstart, s->strstart, alloc_size);
377     }
378 
379     result->bufused  = alloc_size;
380     result->strlen   = s->strlen;
381     result->hashval  = s->hashval;
382     result->encoding = s->encoding;
383 
384     return result;
385 }
386 
387 
388 /*
389 
390 =item C<STRING * Parrot_str_copy(PARROT_INTERP, const STRING *s)>
391 
392 Creates and returns a shallow copy of the specified Parrot string.
393 
394 =cut
395 
396 */
397 
398 PARROT_EXPORT
399 PARROT_CANNOT_RETURN_NULL
400 PARROT_WARN_UNUSED_RESULT
401 STRING *
Parrot_str_copy(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))402 Parrot_str_copy(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
403 {
404     ASSERT_ARGS(Parrot_str_copy)
405     STRING *d;
406     int     is_movable;
407 
408     if (STRING_IS_NULL(s))
409         return STRINGNULL;
410 
411     d = Parrot_gc_new_string_header(interp,
412           PObj_get_FLAGS(s) & ~PObj_constant_FLAG);
413 
414     /* This might set the constant flag again but it is the right thing
415      * to do */
416     STRUCT_COPY(d, s);
417 
418     /*
419      * FIXME. It's abstraction leak here from GC.
420      * Basically if we are copying string from older generation
421      * we have to clear flags about it.
422      */
423     d->flags &= ~PObj_GC_all_generation_FLAGS;
424 
425     /* Clear live flag. It might be set on constant strings */
426     PObj_live_CLEAR(d);
427 
428     /* Set the string copy flag */
429     PObj_is_string_copy_SET(d);
430 
431     is_movable = PObj_is_movable_TESTALL(s);
432 
433     /* Now check that buffer allocated from pool and affected by compacting */
434     if (is_movable && Buffer_bufstart(s)) {
435         /* If so, mark it as shared */
436         INTVAL * const buffer_flags = Buffer_bufflagsptr(d);
437         *buffer_flags |= Buffer_shared_FLAG;
438     }
439 
440     PARROT_ASSERT(is_movable == PObj_is_movable_TESTALL(d));
441 
442     return d;
443 }
444 
445 
446 /*
447 
448 =item C<STRING * Parrot_str_concat(PARROT_INTERP, const STRING *a, const STRING
449 *b)>
450 
451 Concatenates two Parrot strings. If necessary, converts the second
452 string's encoding and/or type to match those of the first string. If
453 either string is C<NULL>, then a copy of the non-C<NULL> string is
454 returned. If both strings are C<NULL>, return C<STRINGNULL>.
455 
456 =cut
457 
458 */
459 
460 PARROT_EXPORT
461 PARROT_CANNOT_RETURN_NULL
462 STRING *
Parrot_str_concat(PARROT_INTERP,ARGIN_NULLOK (const STRING * a),ARGIN_NULLOK (const STRING * b))463 Parrot_str_concat(PARROT_INTERP, ARGIN_NULLOK(const STRING *a),
464             ARGIN_NULLOK(const STRING *b))
465 {
466     ASSERT_ARGS(Parrot_str_concat)
467     const STR_VTABLE *enc;
468     STRING           *dest;
469     UINTVAL           total_length;
470 
471     if (STRING_IS_NULL(a) && STRING_IS_NULL(b))
472         return STRINGNULL;
473 
474     if (STRING_length(a) == 0) {
475         if (STRING_length(b) == 0)
476             return CONST_STRING(interp, "");
477         else
478             return Parrot_str_copy(interp, b);
479     }
480     else {
481         if (STRING_length(b) == 0)
482             return Parrot_str_copy(interp, a);
483     }
484 
485     ASSERT_STRING_SANITY(a);
486     ASSERT_STRING_SANITY(b);
487 
488     enc = string_rep_compatible(a, b);
489 
490     if (!enc) {
491         /* upgrade strings for concatenation */
492         if (a->encoding == Parrot_ucs4_encoding_ptr
493             || b->encoding == Parrot_ucs4_encoding_ptr)
494             enc = Parrot_ucs4_encoding_ptr;
495         else if (a->encoding == Parrot_utf16_encoding_ptr
496             ||  b->encoding == Parrot_utf16_encoding_ptr
497             ||  a->encoding == Parrot_ucs2_encoding_ptr
498             ||  b->encoding == Parrot_ucs2_encoding_ptr)
499             enc = Parrot_utf16_encoding_ptr;
500         else
501             enc = Parrot_utf8_encoding_ptr;
502 
503         a = enc->to_encoding(interp, a);
504         b = enc->to_encoding(interp, b);
505     }
506     /* calc usable and total bytes */
507     total_length = a->bufused + b->bufused;
508 
509     if (PObj_is_growable_TESTALL(a)
510     &&  a->strstart + total_length <=
511         (char *)Buffer_bufstart(a) + Buffer_buflen(a)) {
512         /* String a is growable and there's enough space in the buffer */
513         DECL_CONST_CAST;
514 
515         dest = Parrot_str_copy(interp, a);
516 
517         /* Switch string copy flags */
518         PObj_is_string_copy_SET(PARROT_const_cast(STRING *, a));
519         PObj_is_string_copy_CLEAR(dest);
520 
521         /* Append b */
522         memcpy(dest->strstart + dest->bufused,
523                 b->strstart, b->bufused);
524 
525         dest->encoding = enc;
526         dest->hashval = 0;
527     }
528     else {
529         if (4 * b->bufused < a->bufused) {
530             /* Preallocate more memory if we're appending a short string to
531                a long string */
532             total_length += total_length >> 1;
533         }
534 
535         dest = Parrot_str_new_noinit(interp, total_length);
536         PARROT_ASSERT(enc);
537         dest->encoding = enc;
538 
539         /* Copy A first */
540         memcpy(dest->strstart, a->strstart, a->bufused);
541 
542         /* Tack B on the end of A */
543         memcpy(dest->strstart + a->bufused,
544                 b->strstart, b->bufused);
545     }
546 
547     dest->bufused = a->bufused + b->bufused;
548     dest->strlen  = a->strlen + b->strlen;
549 
550     return dest;
551 }
552 
553 
554 /*
555 
556 =item C<STRING * Parrot_str_new(PARROT_INTERP, const char *buffer, const UINTVAL
557 len)>
558 
559 Makes a Parrot string from a specified C string.
560 
561 =cut
562 
563 */
564 
565 PARROT_EXPORT
566 PARROT_WARN_UNUSED_RESULT
567 PARROT_MALLOC
568 PARROT_CANNOT_RETURN_NULL
569 STRING *
Parrot_str_new(PARROT_INTERP,ARGIN_NULLOK (const char * buffer),const UINTVAL len)570 Parrot_str_new(PARROT_INTERP, ARGIN_NULLOK(const char *buffer), const UINTVAL len)
571 {
572     ASSERT_ARGS(Parrot_str_new)
573     /* Force an 8-bit encoding at some point? */
574     const UINTVAL buff_length = (len > 0) ? len : buffer ? strlen(buffer) : 0;
575 
576     return Parrot_str_new_init(interp, buffer, buff_length,
577         Parrot_default_encoding_ptr, 0);
578 }
579 
580 
581 /*
582 
583 =item C<STRING * Parrot_str_new_from_buffer(PARROT_INTERP, Parrot_Buffer
584 *buffer, const UINTVAL len)>
585 
586 Makes a Parrot string from a Buffer.
587 
588 The Buffer is nulled afterwards, as only one PObj can point at a given string
589 pool object.
590 
591 =cut
592 
593 */
594 
595 PARROT_EXPORT
596 PARROT_WARN_UNUSED_RESULT
597 PARROT_MALLOC
598 PARROT_CANNOT_RETURN_NULL
599 STRING *
Parrot_str_new_from_buffer(PARROT_INTERP,ARGMOD (Parrot_Buffer * buffer),const UINTVAL len)600 Parrot_str_new_from_buffer(PARROT_INTERP, ARGMOD(Parrot_Buffer *buffer), const UINTVAL len)
601 {
602     ASSERT_ARGS(Parrot_str_new_from_buffer)
603 
604     STRING * const result   = Parrot_gc_new_string_header(interp, 0);
605     Buffer_bufstart(result) = Buffer_bufstart(buffer);
606     Buffer_buflen(result)   = Buffer_buflen(buffer);
607     result->strstart        = (char *)Buffer_bufstart(result);
608     result->bufused         = len;
609     result->strlen          = len;
610     result->encoding        = Parrot_binary_encoding_ptr;
611 
612     Buffer_buflen(buffer)   = 0;
613     Buffer_bufstart(buffer) = NULL;
614 
615     return result;
616 }
617 
618 
619 /*
620 
621 =item C<STRING * Parrot_str_new_constant(PARROT_INTERP, const char *buffer)>
622 
623 Creates and returns a constant Parrot string.
624 
625 =cut
626 
627 */
628 
629 PARROT_EXPORT
630 PARROT_WARN_UNUSED_RESULT
631 PARROT_CANNOT_RETURN_NULL
632 STRING *
Parrot_str_new_constant(PARROT_INTERP,ARGIN (const char * buffer))633 Parrot_str_new_constant(PARROT_INTERP, ARGIN(const char *buffer))
634 {
635     ASSERT_ARGS(Parrot_str_new_constant)
636     DECL_CONST_CAST;
637     Hash   * const cstring_cache = interp->const_cstring_hash;
638     STRING *s                    = (STRING *)Parrot_hash_get(interp,
639                                         cstring_cache, buffer);
640 
641     if (s)
642         return s;
643 
644     s = Parrot_str_new_init(interp, buffer, strlen(buffer),
645                        Parrot_default_encoding_ptr,
646                        PObj_external_FLAG|PObj_constant_FLAG);
647 
648     Parrot_hash_put(interp, cstring_cache,
649         PARROT_const_cast(char *, buffer), (void *)s);
650 
651     return s;
652 }
653 
654 /*
655 
656 =item C<STRING * Parrot_str_new_init(PARROT_INTERP, const char *buffer, UINTVAL
657 len, const STR_VTABLE *encoding, UINTVAL flags)>
658 
659 Given a buffer, its length, an encoding, and STRING flags, creates and returns
660 a new string. If buffer is NULL and len >= 0, allocates len bytes.
661 
662 =cut
663 
664 */
665 
666 PARROT_EXPORT
667 PARROT_WARN_UNUSED_RESULT
668 PARROT_CANNOT_RETURN_NULL
669 STRING *
Parrot_str_new_init(PARROT_INTERP,ARGIN_NULLOK (const char * buffer),UINTVAL len,ARGIN (const STR_VTABLE * encoding),UINTVAL flags)670 Parrot_str_new_init(PARROT_INTERP, ARGIN_NULLOK(const char *buffer), UINTVAL len,
671         ARGIN(const STR_VTABLE *encoding), UINTVAL flags)
672 {
673     ASSERT_ARGS(Parrot_str_new_init)
674     DECL_CONST_CAST;
675     STRING * const s = Parrot_gc_new_string_header(interp, flags);
676     s->encoding      = encoding;
677 
678     if (flags & PObj_external_FLAG) {
679         /*
680          * fast path for external (constant) strings - don't allocate
681          * and copy data
682          */
683         /* The following cast discards the 'const'.  That raises
684            a warning with gcc, but is ok since the caller indicated
685            it was safe by setting PObj_external_FLAG.
686            (The cast is necessary to pacify TenDRA's tcc.)
687            */
688         Buffer_bufstart(s) = s->strstart = PARROT_const_cast(char *, buffer);
689         Buffer_buflen(s)   = s->bufused  = len;
690 
691         STRING_scan(interp, s);
692 
693         return s;
694     }
695 
696     Parrot_gc_allocate_string_storage(interp, s, len);
697 
698     if (buffer && len) {
699         memcpy(s->strstart, buffer, len);
700         s->bufused = len;
701         STRING_scan(interp, s);
702     }
703     else
704         s->strlen = s->bufused = 0;
705 
706     return s;
707 }
708 
709 
710 /*
711 
712 =item C<STRING * Parrot_str_new_from_cstring(PARROT_INTERP, const char *buffer,
713 STRING *encodingname)>
714 
715 Given a buffer and an encoding, creates and returns a new string. If buffer is
716 NULL the result is a null string. Otherwise, the buffer should be a zero
717 terminated c-style string and its content must be valid for the encoding
718 specified. If encoding is null, assume platform encoding.
719 
720 =cut
721 
722 */
723 
724 PARROT_WARN_UNUSED_RESULT
725 PARROT_CANNOT_RETURN_NULL
726 STRING *
Parrot_str_new_from_cstring(PARROT_INTERP,ARGIN_NULLOK (const char * buffer),ARGIN_NULLOK (STRING * encodingname))727 Parrot_str_new_from_cstring(PARROT_INTERP, ARGIN_NULLOK(const char *buffer),
728         ARGIN_NULLOK(STRING *encodingname))
729 {
730     ASSERT_ARGS(Parrot_str_new_from_cstring)
731     STRING *result = STRINGNULL;
732     if (buffer) {
733         const STR_VTABLE *encoding = STRING_IS_NULL(encodingname) ?
734                 Parrot_platform_encoding_ptr :
735                 Parrot_find_encoding_by_string(interp, encodingname);
736         if (encoding == NULL)
737             Parrot_ex_throw_from_c_noargs(interp,
738                     EXCEPTION_INVALID_ENCODING, "Invalid encoding");
739         else {
740             int size = strlen(buffer);
741             result = Parrot_str_new_init(interp, buffer, size, encoding, 0);
742 #if 0
743             /* if string is ascii and platform multi-byte we could downgrade.
744                but is is overall slower to scan here, than the faster ops later */
745             if (STRING_IS_NULL(encodingname)
746                 && Parrot_platform_encoding_ptr->max_bytes_per_codepoint > 1
747                 && result->strlen == result->bufused)
748             {
749                 const unsigned char * const ptr = (unsigned char *)result->strstart;
750                 int multi = 0;
751                 UINTVAL i;
752                 for (i = 0; i < result->strlen; ++i) {
753                     if (ptr[i] >= 0x80)
754                         multi++;
755                 }
756                 if (!multi)
757                     result->encoding = Parrot_ascii_encoding_ptr;
758             }
759 #endif
760         }
761     }
762     return result;
763 }
764 
765 
766 /*
767 
768 =item C<STRING * Parrot_str_from_platform_cstring(PARROT_INTERP, const char *c)>
769 
770 Convert a C string, encoded in the platform's assumed encoding, to a Parrot
771 string.
772 
773 =cut
774 
775 */
776 
777 PARROT_EXPORT
778 PARROT_CANNOT_RETURN_NULL
779 STRING *
Parrot_str_from_platform_cstring(PARROT_INTERP,ARGIN_NULLOK (const char * c))780 Parrot_str_from_platform_cstring(PARROT_INTERP, ARGIN_NULLOK(const char *c))
781 {
782     ASSERT_ARGS(Parrot_str_from_platform_cstring)
783     if (!c)
784         return STRINGNULL;
785     else {
786         STRING *retv;
787         Parrot_runloop jmp;
788 
789         if (setjmp(jmp.resume)) {
790             /* catch */
791             Parrot_cx_delete_handler_local(interp);
792             retv =  Parrot_str_new_init(interp, c, strlen(c),
793                                         Parrot_binary_encoding_ptr, 0);
794         }
795         else {
796             /* try */
797             Parrot_ex_add_c_handler(interp, &jmp);
798             retv = Parrot_str_new_init(interp, c, Parrot_str_platform_strlen(interp, c),
799                                         Parrot_platform_encoding_ptr, 0);
800             /* if string is ascii-only and platform multi-byte use ascii */
801             if (retv
802                 && Parrot_platform_encoding_ptr->max_bytes_per_codepoint > 1
803                 && retv->strlen == retv->bufused) {
804                 retv = Parrot_ascii_encoding_ptr->to_encoding(interp, retv);
805             }
806             Parrot_cx_delete_handler_local(interp);
807         }
808 
809         return retv;
810     }
811 }
812 
813 
814 /*
815 
816 =item C<char * Parrot_str_to_platform_cstring(PARROT_INTERP, const STRING *s)>
817 
818 Obtain a C string, encoded in the platform's assumed encoding, from a Parrot
819 string.
820 
821 =cut
822 
823 */
824 
825 PARROT_EXPORT
826 PARROT_CAN_RETURN_NULL
827 PARROT_WARN_UNUSED_RESULT
828 char *
Parrot_str_to_platform_cstring(PARROT_INTERP,ARGIN (const STRING * s))829 Parrot_str_to_platform_cstring(PARROT_INTERP, ARGIN(const STRING *s))
830 {
831     ASSERT_ARGS(Parrot_str_to_platform_cstring)
832     if (STRING_IS_NULL(s)) {
833         return NULL;
834     }
835     else {
836         return Parrot_str_to_encoded_cstring(interp, s, Parrot_platform_encoding_ptr);
837     }
838 }
839 
840 
841 /*
842 
843 =item C<STRING * Parrot_str_extract_chars(PARROT_INTERP, const char *buffer,
844 UINTVAL len, INTVAL chars, const STR_VTABLE *encoding)>
845 
846 Extracts C<chars> characters from C<buffer> containing C<len> bytes.
847 
848 =cut
849 
850 */
851 
852 PARROT_WARN_UNUSED_RESULT
853 PARROT_CANNOT_RETURN_NULL
854 STRING *
Parrot_str_extract_chars(PARROT_INTERP,ARGIN (const char * buffer),UINTVAL len,INTVAL chars,ARGIN (const STR_VTABLE * encoding))855 Parrot_str_extract_chars(PARROT_INTERP, ARGIN(const char *buffer),
856         UINTVAL len, INTVAL chars, ARGIN(const STR_VTABLE *encoding))
857 {
858     ASSERT_ARGS(Parrot_str_extract_chars)
859     Parrot_String_Bounds  bounds;
860     STRING               *result;
861 
862     bounds.bytes = len;
863     bounds.chars = chars;
864     bounds.delim = -1;
865 
866     encoding->partial_scan(interp, buffer, &bounds);
867 
868     if (bounds.chars < chars)
869         Parrot_ex_throw_from_c_noargs(interp,
870                 EXCEPTION_OUT_OF_BOUNDS,
871                 "index out of bounds");
872 
873     result = Parrot_str_new_noinit(interp, bounds.bytes);
874 
875     result->encoding = encoding;
876     result->bufused  = bounds.bytes;
877     result->strlen   = bounds.chars;
878 
879     memcpy(result->strstart, buffer, bounds.bytes);
880 
881     return result;
882 }
883 
884 
885 /*
886 
887 =back
888 
889 =head2 Ordinary user-visible string operations
890 
891 =over 4
892 
893 =item C<UINTVAL Parrot_str_byte_length(PARROT_INTERP, const STRING *s)>
894 
895 Returns the number of characters in the specified Parrot string.
896 
897 =cut
898 
899 */
900 
901 PARROT_EXPORT
902 PARROT_PURE_FUNCTION
903 UINTVAL
Parrot_str_byte_length(SHIM_INTERP,ARGIN_NULLOK (const STRING * s))904 Parrot_str_byte_length(SHIM_INTERP, ARGIN_NULLOK(const STRING *s))
905 {
906     ASSERT_ARGS(Parrot_str_byte_length)
907 
908     return STRING_IS_NULL(s) ? 0 : s->bufused;
909 }
910 
911 
912 /*
913 
914 =item C<INTVAL Parrot_str_indexed(PARROT_INTERP, const STRING *s, INTVAL idx)>
915 
916 Returns the codepoint at a given index into a string. Negative indexes are
917 treated as counting from the end of the string. Throws an exception if C<s>
918 is null or C<idx> is out of bounds.
919 
920 Identical to the STRING_ord macro.
921 
922 =cut
923 
924 */
925 
926 PARROT_EXPORT
927 PARROT_WARN_UNUSED_RESULT
928 INTVAL
Parrot_str_indexed(PARROT_INTERP,ARGIN (const STRING * s),INTVAL idx)929 Parrot_str_indexed(PARROT_INTERP, ARGIN(const STRING *s), INTVAL idx)
930 {
931     ASSERT_ARGS(Parrot_str_indexed)
932 
933     if (s == NULL)
934         s = STRINGNULL;
935 
936     return STRING_ord(interp, s, idx);
937 }
938 
939 
940 /*
941 
942 =item C<INTVAL Parrot_str_find_index(PARROT_INTERP, const STRING *src, const
943 STRING *search, INTVAL start)>
944 
945 Returns the character position of the second Parrot string in the first at or
946 after C<start>. The return value is a (0 based) offset in characters, not
947 bytes. If the search string is not found in the first string or it is null or
948 empty, returns -1. If C<start> is out of bounds, returns -1. Throws an
949 exception if C<src> is null.
950 
951 Identical to the STRING_index macro.
952 
953 =item C<INTVAL Parrot_str_find_reverse_index(PARROT_INTERP, const STRING *src,
954 const STRING *search, INTVAL start)>
955 
956 Returns the last character position of the second Parrot string C<search> in
957 C<src>, not after C<start>. Returns the last position found, or -1 if no
958 instances are found.
959 
960 Mostly identical to the STRING_rindex macro.
961 
962 =cut
963 
964 */
965 
966 PARROT_EXPORT
967 PARROT_WARN_UNUSED_RESULT
968 INTVAL
Parrot_str_find_index(PARROT_INTERP,ARGIN (const STRING * src),ARGIN (const STRING * search),INTVAL start)969 Parrot_str_find_index(PARROT_INTERP, ARGIN(const STRING *src),
970         ARGIN(const STRING *search), INTVAL start)
971 {
972     ASSERT_ARGS(Parrot_str_find_index)
973 
974     if (src == NULL)
975         src = STRINGNULL;
976 
977     return STRING_index(interp, src, search, start);
978 }
979 
980 PARROT_EXPORT
981 PARROT_WARN_UNUSED_RESULT
982 INTVAL
Parrot_str_find_reverse_index(PARROT_INTERP,ARGIN (const STRING * src),ARGIN (const STRING * search),INTVAL start)983 Parrot_str_find_reverse_index(PARROT_INTERP, ARGIN(const STRING *src),
984     ARGIN(const STRING *search), INTVAL start)
985 {
986     ASSERT_ARGS(Parrot_str_find_reverse_index)
987     INTVAL len = Parrot_str_length(interp, src);
988 
989     if (start <= 0 || !len || start > len)
990         return -1;
991 
992     if (!Parrot_str_length(interp, search))
993         return -1;
994 
995     return STRING_rindex(interp, src, search, (UINTVAL)start);
996 }
997 
998 /*
999 
1000 =item C<STRING * Parrot_str_chr(PARROT_INTERP, UINTVAL character)>
1001 
1002 Returns a single-character Parrot string.
1003 
1004 =cut
1005 
1006 */
1007 
1008 PARROT_EXPORT
1009 PARROT_CANNOT_RETURN_NULL
1010 PARROT_WARN_UNUSED_RESULT
1011 STRING *
Parrot_str_chr(PARROT_INTERP,UINTVAL character)1012 Parrot_str_chr(PARROT_INTERP, UINTVAL character)
1013 {
1014     ASSERT_ARGS(Parrot_str_chr)
1015 
1016     if (character > 0xff)
1017         return Parrot_utf8_encoding_ptr->chr(interp, character);
1018     else if (character > 0x7f)
1019         return Parrot_latin1_encoding_ptr->chr(interp, character);
1020     else
1021         return Parrot_ascii_encoding_ptr->chr(interp, character);
1022 }
1023 
1024 
1025 /*
1026 
1027 =back
1028 
1029 =head2 Vtable Dispatch Functions
1030 
1031 =over 4
1032 
1033 =item C<INTVAL Parrot_str_length(PARROT_INTERP, const STRING *s)>
1034 
1035 Returns the number of characters in the specified Parrot string.
1036 
1037 =cut
1038 
1039 */
1040 
1041 PARROT_EXPORT
1042 PARROT_PURE_FUNCTION
1043 PARROT_WARN_UNUSED_RESULT
1044 INTVAL
Parrot_str_length(SHIM_INTERP,ARGIN_NULLOK (const STRING * s))1045 Parrot_str_length(SHIM_INTERP, ARGIN_NULLOK(const STRING *s))
1046 {
1047     ASSERT_ARGS(Parrot_str_length)
1048 
1049     return STRING_IS_NULL(s) ? 0 : s->strlen;
1050 }
1051 
1052 
1053 /*
1054 
1055 =item C<static INTVAL string_max_bytes(PARROT_INTERP, const STRING *s, UINTVAL
1056 nchars)>
1057 
1058 Returns the number of bytes required to safely contain the specified number
1059 of characters in the specified Parrot string's representation.
1060 
1061 =cut
1062 
1063 */
1064 
1065 PARROT_WARN_UNUSED_RESULT
1066 PARROT_PURE_FUNCTION
1067 static INTVAL
string_max_bytes(SHIM_INTERP,ARGIN (const STRING * s),UINTVAL nchars)1068 string_max_bytes(SHIM_INTERP, ARGIN(const STRING *s), UINTVAL nchars)
1069 {
1070     ASSERT_ARGS(string_max_bytes)
1071     PARROT_ASSERT(s->encoding);
1072     return STRING_max_bytes_per_codepoint(s) * nchars;
1073 }
1074 
1075 
1076 /*
1077 
1078 =item C<STRING * Parrot_str_repeat(PARROT_INTERP, const STRING *s, UINTVAL num)>
1079 
1080 Repeats the specified Parrot string I<num> times and returns the result.
1081 
1082 =cut
1083 
1084 */
1085 
1086 PARROT_EXPORT
1087 PARROT_CANNOT_RETURN_NULL
1088 STRING *
Parrot_str_repeat(PARROT_INTERP,ARGIN (const STRING * s),UINTVAL num)1089 Parrot_str_repeat(PARROT_INTERP, ARGIN(const STRING *s), UINTVAL num)
1090 {
1091     ASSERT_ARGS(Parrot_str_repeat)
1092     STRING * const dest = Parrot_str_new_init(interp, NULL,
1093                         s->bufused * num,
1094                         s->encoding, 0);
1095     if (num > 0) {
1096         /* copy s into dest num times */
1097         UINTVAL length = s->bufused;
1098         UINTVAL i;
1099         char *             destpos = dest->strstart;
1100         const char * const srcpos  = s->strstart;
1101         for (i = 0; i < num; ++i) {
1102             memcpy(destpos, srcpos, length);
1103             destpos += length;
1104         }
1105 
1106         dest->strlen  = s->strlen  * num;
1107         dest->bufused = s->bufused * num;
1108     }
1109 
1110     return dest;
1111 }
1112 
1113 
1114 /*
1115 
1116 =item C<STRING * Parrot_str_substr(PARROT_INTERP, const STRING *src, INTVAL
1117 offset, INTVAL length)>
1118 
1119 Returns substring of length C<length> from C<offset> from the specified
1120 Parrot string. If C<offset> is negative, it counts from the end of the
1121 string. Returns the empty string if C<offset> equals the length of the
1122 string. Throws an exception if C<src> is null or C<offset> is out of bounds.
1123 Truncates C<length> if it extends beyond the end of the string.
1124 
1125 Identical to the STRING_substr macro.
1126 
1127 =cut
1128 
1129 */
1130 
1131 PARROT_EXPORT
1132 PARROT_CANNOT_RETURN_NULL
1133 PARROT_WARN_UNUSED_RESULT
1134 STRING *
Parrot_str_substr(PARROT_INTERP,ARGIN_NULLOK (const STRING * src),INTVAL offset,INTVAL length)1135 Parrot_str_substr(PARROT_INTERP,
1136         ARGIN_NULLOK(const STRING *src), INTVAL offset, INTVAL length)
1137 {
1138     ASSERT_ARGS(Parrot_str_substr)
1139 
1140     if (src == NULL)
1141         src = STRINGNULL;
1142 
1143     return STRING_substr(interp, src, offset, length);
1144 }
1145 
1146 /*
1147 
1148 =item C<STRING * Parrot_str_iter_substr(PARROT_INTERP, const STRING *str, const
1149 String_iter *l, const String_iter *r)>
1150 
1151 Returns the substring between iterators C<l> and C<r>.
1152 
1153 =cut
1154 
1155 */
1156 
1157 PARROT_CANNOT_RETURN_NULL
1158 PARROT_WARN_UNUSED_RESULT
1159 STRING *
Parrot_str_iter_substr(PARROT_INTERP,ARGIN (const STRING * str),ARGIN (const String_iter * l),ARGIN_NULLOK (const String_iter * r))1160 Parrot_str_iter_substr(PARROT_INTERP,
1161     ARGIN(const STRING *str),
1162     ARGIN(const String_iter *l), ARGIN_NULLOK(const String_iter *r))
1163 {
1164     ASSERT_ARGS(Parrot_str_iter_substr)
1165     STRING * const dest = Parrot_str_copy(interp, str);
1166 
1167     dest->strstart += l->bytepos;
1168 
1169     if (r == NULL) {
1170         dest->bufused = str->bufused - l->bytepos;
1171         dest->strlen  = str->strlen  - l->charpos;
1172     }
1173     else {
1174         dest->bufused = r->bytepos - l->bytepos;
1175         dest->strlen  = r->charpos - l->charpos;
1176     }
1177 
1178     dest->hashval = 0;
1179 
1180     return dest;
1181 }
1182 
1183 /*
1184 
1185 =item C<INTVAL Parrot_str_iter_index(PARROT_INTERP, const STRING *src,
1186 String_iter *start, String_iter *end, const STRING *search)>
1187 
1188 Find the next occurrence of STRING C<search> in STRING C<src> starting at
1189 String_iter C<start>. If C<search> is found C<start> is modified to mark the
1190 beginning of C<search> and String_iter C<end> is set to the character after
1191 C<search> in C<src>.  Returns the character position where C<search> was found
1192 or -1 if it wasn't found.
1193 
1194 =cut
1195 
1196 */
1197 
1198 INTVAL
Parrot_str_iter_index(PARROT_INTERP,ARGIN (const STRING * src),ARGMOD (String_iter * start),ARGOUT (String_iter * end),ARGIN (const STRING * search))1199 Parrot_str_iter_index(PARROT_INTERP,
1200     ARGIN(const STRING *src),
1201     ARGMOD(String_iter *start), ARGOUT(String_iter *end),
1202     ARGIN(const STRING *search))
1203 {
1204     ASSERT_ARGS(Parrot_str_iter_index)
1205     String_iter search_iter, search_start, next_start;
1206     const UINTVAL len = search->strlen;
1207     UINTVAL c0;
1208 
1209     if (len == 0) {
1210         *end = *start;
1211         return start->charpos;
1212     }
1213 
1214     STRING_ITER_INIT(interp, &search_iter);
1215     c0 = STRING_iter_get_and_advance(interp, search, &search_iter);
1216     search_start = search_iter;
1217     next_start = *start;
1218 
1219     while (start->charpos + len <= src->strlen) {
1220         UINTVAL c1 = STRING_iter_get_and_advance(interp, src, &next_start);
1221 
1222         if (c1 == c0) {
1223             UINTVAL c2;
1224             *end = next_start;
1225 
1226             do {
1227                 if (search_iter.charpos >= len)
1228                     return start->charpos;
1229                 c1 = STRING_iter_get_and_advance(interp, src, end);
1230                 c2 = STRING_iter_get_and_advance(interp, search, &search_iter);
1231             } while (c1 == c2);
1232 
1233             search_iter = search_start;
1234         }
1235 
1236         *start = next_start;
1237     }
1238 
1239     return -1;
1240 }
1241 
1242 
1243 /*
1244 
1245 =item C<STRING * Parrot_str_replace(PARROT_INTERP, const STRING *src, INTVAL
1246 offset, INTVAL length, const STRING *rep)>
1247 
1248 Replaces a sequence of C<length> characters from C<offset> in the first
1249 Parrot string with the second Parrot string, returning what was
1250 replaced.
1251 
1252 This follows the Perl semantics for:
1253 
1254     substr EXPR, OFFSET, LENGTH, REPLACEMENT
1255 
1256 Replacing a sequence of characters with a longer string grows the
1257 string; a shorter string shrinks it.
1258 
1259 Replacing 2 past the end of the string is undefined. However replacing 1
1260 past the end of the string concatenates the two strings.
1261 
1262 A negative offset is allowed to replace from the end.
1263 
1264 =cut
1265 
1266 */
1267 
1268 PARROT_EXPORT
1269 PARROT_CANNOT_RETURN_NULL
1270 PARROT_WARN_UNUSED_RESULT
1271 STRING *
Parrot_str_replace(PARROT_INTERP,ARGIN (const STRING * src),INTVAL offset,INTVAL length,ARGIN (const STRING * rep))1272 Parrot_str_replace(PARROT_INTERP, ARGIN(const STRING *src),
1273     INTVAL offset, INTVAL length, ARGIN(const STRING *rep))
1274 {
1275     ASSERT_ARGS(Parrot_str_replace)
1276     String_iter       iter;
1277     const STR_VTABLE *enc;
1278     STRING           *dest        = NULL;
1279     UINTVAL           true_offset = (UINTVAL)offset;
1280     UINTVAL           true_length = (UINTVAL)length;
1281 
1282     UINTVAL         start_byte, end_byte, start_char, end_char;
1283     INTVAL          buf_size;
1284 
1285     if (STRING_IS_NULL(src)) {
1286         Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
1287             "Can't replace in NULL string");
1288     }
1289 
1290     /* abs(-offset) may not be > strlen-1 */
1291     if (offset < 0)
1292         true_offset = src->strlen + offset;
1293 
1294     /* Can replace 1 past end of string which is technically outside the string
1295      * but is same as a concat().
1296      * Only give exception if caller trys to replace end of string + 2
1297      */
1298     if (true_offset > src->strlen)
1299         Parrot_ex_throw_from_c_noargs(interp,
1300             EXCEPTION_SUBSTR_OUT_OF_STRING,
1301             "Can only replace inside string or index after end of string");
1302 
1303     if (true_length > (src->strlen - true_offset))
1304         true_length = src->strlen - true_offset;
1305 
1306     if (STRING_IS_NULL(rep)) {
1307         enc = src->encoding;
1308     }
1309     else {
1310         /* may have different reps..... */
1311         enc = string_rep_compatible(src, rep);
1312 
1313         if (!enc) {
1314             if (src->encoding != Parrot_utf8_encoding_ptr)
1315                 src = Parrot_utf8_encoding_ptr->to_encoding(interp, src);
1316             if (rep->encoding != Parrot_utf8_encoding_ptr)
1317                 rep = Parrot_utf8_encoding_ptr->to_encoding(interp, rep);
1318             /* Remember selected encoding */
1319             enc = src->encoding;
1320         }
1321     }
1322 
1323     /* get byte position of the part that will be replaced */
1324     STRING_ITER_INIT(interp, &iter);
1325 
1326     STRING_iter_skip(interp, src, &iter, true_offset);
1327     start_byte = iter.bytepos;
1328     start_char = iter.charpos;
1329 
1330     STRING_iter_skip(interp, src, &iter, true_length);
1331     end_byte   = iter.bytepos;
1332     end_char   = iter.charpos;
1333 
1334     /* not possible.... */
1335     if (end_byte < start_byte)
1336         Parrot_ex_throw_from_c_noargs(interp,
1337             EXCEPTION_SUBSTR_OUT_OF_STRING,
1338             "replace: subend somehow is less than substart");
1339 
1340     /* Now do the replacement */
1341     dest = Parrot_gc_new_string_header(interp, 0);
1342 
1343     /* Set encoding to compatible */
1344     dest->encoding = enc;
1345 
1346     /* Clear COW flag. We own buffer */
1347     PObj_get_FLAGS(dest) = PObj_is_string_FLAG
1348                          | PObj_is_COWable_FLAG;
1349 
1350             /* size            removed bytes            added bytes */
1351     buf_size = src->bufused - (end_byte - start_byte) + rep->bufused;
1352 
1353     /* Alloctate new string size. */
1354     Parrot_gc_allocate_string_storage(interp, dest, buf_size);
1355     dest->bufused = buf_size;
1356 
1357     /* Copy begin of string */
1358     memcpy(dest->strstart, src->strstart, start_byte);
1359 
1360     /* Copy the replacement in */
1361     memcpy(dest->strstart + start_byte, rep->strstart,
1362             rep->bufused);
1363 
1364     /* Copy the end of old string */
1365     memcpy(dest->strstart + start_byte + rep->bufused,
1366             src->strstart + end_byte,
1367             src->bufused - end_byte);
1368 
1369     dest->strlen  = src->strlen - (end_char - start_char) + rep->strlen;
1370     dest->hashval = 0;
1371 
1372     return dest;
1373 }
1374 
1375 
1376 /*
1377 
1378 =item C<STRING * Parrot_str_chopn(PARROT_INTERP, const STRING *s, INTVAL n)>
1379 
1380 Removes the last C<n> characters of the specified Parrot string and returns the
1381 modified string. If C<n> is negative, cuts the string after C<+n> characters.
1382 
1383 =cut
1384 
1385 */
1386 
1387 PARROT_EXPORT
1388 PARROT_CANNOT_RETURN_NULL
1389 STRING *
Parrot_str_chopn(PARROT_INTERP,ARGIN (const STRING * s),INTVAL n)1390 Parrot_str_chopn(PARROT_INTERP, ARGIN(const STRING *s), INTVAL n)
1391 {
1392     ASSERT_ARGS(Parrot_str_chopn)
1393     INTVAL end = -n;
1394 
1395     if (n >= 0)
1396         end += STRING_length(s);
1397 
1398     return STRING_substr(interp, s, 0, end);
1399 }
1400 
1401 
1402 /*
1403 
1404 =item C<INTVAL Parrot_str_compare(PARROT_INTERP, const STRING *s1, const STRING
1405 *s2)>
1406 
1407 Compares two strings to each other.  If s1 is less than s2, returns -1.  If the
1408 strings are equal, returns 0.  If s1 is greater than s2, returns 2.  This
1409 comparison uses the character set collation order of the strings for
1410 comparison. The null string is considered equal to the empty string.
1411 
1412 Identical to the STRING_compare macro.
1413 
1414 =cut
1415 
1416 */
1417 
1418 PARROT_EXPORT
1419 PARROT_WARN_UNUSED_RESULT
1420 INTVAL
Parrot_str_compare(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1421 Parrot_str_compare(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1), ARGIN_NULLOK(const STRING *s2))
1422 {
1423     ASSERT_ARGS(Parrot_str_compare)
1424 
1425     if (s1 == NULL)
1426         s1 = STRINGNULL;
1427 
1428     return STRING_compare(interp, s1, s2);
1429 }
1430 
1431 
1432 /*
1433 
1434 =item C<INTVAL Parrot_str_not_equal(PARROT_INTERP, const STRING *s1, const
1435 STRING *s2)>
1436 
1437 Compares two Parrot strings, performing type and encoding conversions if
1438 necessary. Returns 1 if the strings are not equal, and 0 otherwise.
1439 
1440 =cut
1441 
1442 */
1443 
1444 PARROT_EXPORT
1445 PARROT_WARN_UNUSED_RESULT
1446 INTVAL
Parrot_str_not_equal(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1447 Parrot_str_not_equal(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1), ARGIN_NULLOK(const STRING *s2))
1448 {
1449     ASSERT_ARGS(Parrot_str_not_equal)
1450 
1451     if (s1 == NULL)
1452         s1 = STRINGNULL;
1453 
1454     return !STRING_equal(interp, s1, s2);
1455 }
1456 
1457 
1458 /*
1459 
1460 =item C<INTVAL Parrot_str_equal(PARROT_INTERP, const STRING *s1, const STRING
1461 *s2)>
1462 
1463 Compares two Parrot strings, performing type and encoding conversions if
1464 necessary. The null string is considered equal to the empty string.
1465 
1466 Returns 1 if the strings are equal, and 0 otherwise.
1467 
1468 Identical to the STRING_equal macro.
1469 
1470 =cut
1471 
1472 */
1473 
1474 PARROT_EXPORT
1475 PARROT_WARN_UNUSED_RESULT
1476 INTVAL
Parrot_str_equal(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1477 Parrot_str_equal(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1), ARGIN_NULLOK(const STRING *s2))
1478 {
1479     ASSERT_ARGS(Parrot_str_equal)
1480 
1481     if (s1 == NULL)
1482         s1 = STRINGNULL;
1483 
1484     return STRING_equal(interp, s1, s2);
1485 }
1486 
1487 
1488 /*
1489 
1490 =item C<STRING * Parrot_str_bitwise_and(PARROT_INTERP, const STRING *s1, const
1491 STRING *s2)>
1492 
1493 Performs a bitwise C<AND> on two Parrot strings, performing type and encoding
1494 conversions if necessary. Returns the result as a new string.
1495 
1496 =cut
1497 
1498 */
1499 
1500 PARROT_EXPORT
1501 PARROT_CANNOT_RETURN_NULL
1502 STRING *
Parrot_str_bitwise_and(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1503 Parrot_str_bitwise_and(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1),
1504         ARGIN_NULLOK(const STRING *s2))
1505 {
1506     ASSERT_ARGS(Parrot_str_bitwise_and)
1507     STRING *res;
1508     size_t  minlen;
1509 
1510     /* we could also trans_encoding to iso-8859-1 */
1511     if (s1 && STRING_max_bytes_per_codepoint(s1) != 1)
1512         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
1513             "string bitwise_and (%s/%s) unsupported",
1514             s1->encoding->name, nonnull_encoding_name(s2));
1515 
1516     if (s2 && STRING_max_bytes_per_codepoint(s2) != 1)
1517         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
1518             "string bitwise_and (%s/%s) unsupported",
1519             nonnull_encoding_name(s1), s2->encoding->name);
1520 
1521     /* think about case of dest string is one of the operands */
1522     if (!STRING_IS_NULL(s1) && !STRING_IS_NULL(s2))
1523         minlen = s1->strlen > s2->strlen ? s2->strlen : s1->strlen;
1524     else
1525         minlen = 0;
1526 
1527     res = Parrot_str_new_init(interp, NULL, minlen,
1528             Parrot_binary_encoding_ptr, 0);
1529 
1530     if (STRING_IS_NULL(s1) || STRING_IS_NULL(s2)) {
1531         res->bufused = 0;
1532         res->strlen  = 0;
1533 
1534         return res;
1535     }
1536 
1537 #if ! DISABLE_GC_DEBUG
1538     /* trigger GC for debug */
1539     if (interp && GC_DEBUG(interp))
1540         Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1541 #endif
1542 
1543     { /* bitwise AND the strings */
1544         const Parrot_UInt1 *curr1 = (Parrot_UInt1 *)s1->strstart;
1545         const Parrot_UInt1 *curr2 = (Parrot_UInt1 *)s2->strstart;
1546         Parrot_UInt1       *dp    = (Parrot_UInt1 *)res->strstart;
1547         size_t              len   = minlen;
1548 
1549         while (len--)
1550             *dp++ = *curr1++ & *curr2++;
1551     }
1552 
1553     res->bufused = res->strlen = minlen;
1554 
1555     return res;
1556 }
1557 
1558 
1559 #define BITWISE_XOR_STRINGS(type1, type2, restype, s1, s2, res, maxlen) \
1560 do { \
1561     const type1 *curr1   = NULL; \
1562     const type2 *curr2   = NULL; \
1563     size_t       length1 = 0; \
1564     size_t       length2 = 0; \
1565     restype     *dp; \
1566     size_t       _index; \
1567  \
1568     if (!STRING_IS_NULL(s1)) { \
1569         curr1   = (type1 *)(s1)->strstart; \
1570         length1 = (s1)->strlen; \
1571     } \
1572     if (!STRING_IS_NULL(s2)) { \
1573         curr2   = (type2 *)(s2)->strstart; \
1574         length2 = (s2)->strlen; \
1575     } \
1576  \
1577     dp = (restype *)(res)->strstart; \
1578     _index = 0; \
1579  \
1580     for (; _index < (maxlen) ; ++curr1, ++curr2, ++dp, ++_index) { \
1581         if (_index < length1) { \
1582             if (_index < length2) \
1583                 *dp = *curr1 ^ *curr2; \
1584             else \
1585                 *dp = *curr1; \
1586         } \
1587         else if (_index < length2) { \
1588             *dp = *curr2; \
1589         } \
1590     } \
1591 } while (0)
1592 
1593 
1594 #define BITWISE_OR_STRINGS(type1, type2, restype, s1, s2, res, maxlen) \
1595 do { \
1596     const type1 *curr1   = NULL; \
1597     const type2 *curr2   = NULL; \
1598     size_t       length1 = 0; \
1599     size_t       length2 = 0; \
1600     restype     *dp; \
1601     size_t       _index; \
1602  \
1603     if (!STRING_IS_NULL(s1)) { \
1604         curr1   = (type1 *)(s1)->strstart; \
1605         length1 = (s1)->strlen; \
1606     } \
1607     if (!STRING_IS_NULL(s2)) { \
1608         curr2   = (type2 *)(s2)->strstart; \
1609         length2 = (s2)->strlen; \
1610     } \
1611  \
1612     dp = (restype *)(res)->strstart; \
1613     _index = 0; \
1614  \
1615     for (; _index < (maxlen) ; ++curr1, ++curr2, ++dp, ++_index) { \
1616         if (_index < length1) { \
1617             if (_index < length2) \
1618                 *dp = *curr1 | *curr2; \
1619             else \
1620                 *dp = *curr1; \
1621         } \
1622         else if (_index < length2) { \
1623             *dp = *curr2; \
1624         } \
1625     } \
1626 } while (0)
1627 
1628 
1629 /*
1630 
1631 =item C<STRING * Parrot_str_bitwise_or(PARROT_INTERP, const STRING *s1, const
1632 STRING *s2)>
1633 
1634 Performs a bitwise C<OR> on two Parrot strings, performing type and encoding
1635 conversions if necessary.  Returns the result as a new string.
1636 
1637 =cut
1638 
1639 */
1640 
1641 PARROT_EXPORT
1642 PARROT_CANNOT_RETURN_NULL
1643 STRING *
Parrot_str_bitwise_or(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1644 Parrot_str_bitwise_or(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1),
1645         ARGIN_NULLOK(const STRING *s2))
1646 {
1647     ASSERT_ARGS(Parrot_str_bitwise_or)
1648     STRING *res;
1649     size_t  maxlen = 0;
1650 
1651     if (!STRING_IS_NULL(s1)) {
1652         if (STRING_max_bytes_per_codepoint(s1) != 1)
1653             Parrot_ex_throw_from_c_args(interp, NULL,
1654                 EXCEPTION_INVALID_ENCODING,
1655                 "string bitwise_or (%s/%s) unsupported",
1656                 s1->encoding->name, nonnull_encoding_name(s2));
1657 
1658         maxlen = s1->bufused;
1659     }
1660 
1661     if (!STRING_IS_NULL(s2)) {
1662         if (STRING_max_bytes_per_codepoint(s2) != 1)
1663             Parrot_ex_throw_from_c_args(interp, NULL,
1664                 EXCEPTION_INVALID_ENCODING,
1665                 "string bitwise_or (%s/%s) unsupported",
1666                 nonnull_encoding_name(s1), s2->encoding->name);
1667 
1668         if (s2->bufused > maxlen)
1669             maxlen = s2->bufused;
1670     }
1671 
1672     res = Parrot_str_new_init(interp, NULL, maxlen,
1673             Parrot_binary_encoding_ptr, 0);
1674 
1675     if (!maxlen) {
1676         res->bufused = 0;
1677         res->strlen  = 0;
1678         return res;
1679     }
1680 
1681 #if ! DISABLE_GC_DEBUG
1682     /* trigger GC for debug */
1683     if (interp && GC_DEBUG(interp))
1684         Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1685 #endif
1686 
1687     BITWISE_OR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
1688             s1, s2, res, maxlen);
1689     res->bufused = res->strlen = maxlen;
1690 
1691     return res;
1692 }
1693 
1694 
1695 /*
1696 
1697 =item C<STRING * Parrot_str_bitwise_xor(PARROT_INTERP, const STRING *s1, const
1698 STRING *s2)>
1699 
1700 Performs a bitwise C<XOR> on two Parrot strings, performing type and encoding
1701 conversions if necessary.  Returns the result as a new string.
1702 
1703 =cut
1704 
1705 */
1706 
1707 PARROT_EXPORT
1708 PARROT_CANNOT_RETURN_NULL
1709 STRING *
Parrot_str_bitwise_xor(PARROT_INTERP,ARGIN_NULLOK (const STRING * s1),ARGIN_NULLOK (const STRING * s2))1710 Parrot_str_bitwise_xor(PARROT_INTERP, ARGIN_NULLOK(const STRING *s1),
1711         ARGIN_NULLOK(const STRING *s2))
1712 {
1713     ASSERT_ARGS(Parrot_str_bitwise_xor)
1714     STRING *res;
1715     size_t  maxlen = 0;
1716 
1717     if (!STRING_IS_NULL(s1)) {
1718         if (STRING_max_bytes_per_codepoint(s1) != 1)
1719             Parrot_ex_throw_from_c_args(interp, NULL,
1720                 EXCEPTION_INVALID_ENCODING,
1721                 "string bitwise_xor (%s/%s) unsupported",
1722                 s1->encoding->name, nonnull_encoding_name(s2));
1723 
1724         maxlen = s1->bufused;
1725     }
1726 
1727     if (!STRING_IS_NULL(s2)) {
1728         if (STRING_max_bytes_per_codepoint(s2) != 1)
1729             Parrot_ex_throw_from_c_args(interp, NULL,
1730                 EXCEPTION_INVALID_ENCODING,
1731                 "string bitwise_xor (%s/%s) unsupported",
1732                 nonnull_encoding_name(s1), s2->encoding->name);
1733 
1734         if (s2->bufused > maxlen)
1735             maxlen = s2->bufused;
1736     }
1737 
1738     res = Parrot_str_new_init(interp, NULL, maxlen,
1739             Parrot_binary_encoding_ptr, 0);
1740 
1741     if (!maxlen) {
1742         res->bufused = 0;
1743         res->strlen  = 0;
1744         return res;
1745     }
1746 
1747 #if ! DISABLE_GC_DEBUG
1748     /* trigger GC for debug */
1749     if (interp && GC_DEBUG(interp))
1750         Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1751 #endif
1752 
1753     BITWISE_XOR_STRINGS(Parrot_UInt1, Parrot_UInt1, Parrot_UInt1,
1754             s1, s2, res, maxlen);
1755     res->bufused = res->strlen = maxlen;
1756 
1757     return res;
1758 }
1759 
1760 
1761 #define BITWISE_NOT_STRING(type, s, res) \
1762 do { \
1763     if (!STRING_IS_NULL(s) && !STRING_IS_NULL(res)) { \
1764         const type   *curr   = (type *)(s)->strstart; \
1765         size_t        length = (s)->strlen; \
1766         Parrot_UInt1 *dp     = (Parrot_UInt1 *)(res)->strstart; \
1767  \
1768         for (; length ; --length, ++dp, ++curr) \
1769             *dp = 0xFF & ~ *curr; \
1770     } \
1771 } while (0)
1772 
1773 
1774 /*
1775 
1776 =item C<STRING * Parrot_str_bitwise_not(PARROT_INTERP, const STRING *s)>
1777 
1778 Performs a bitwise C<NOT> on a Parrot string.  Returns the result as a new
1779 string.
1780 
1781 =cut
1782 
1783 */
1784 
1785 PARROT_EXPORT
1786 PARROT_CANNOT_RETURN_NULL
1787 STRING *
Parrot_str_bitwise_not(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))1788 Parrot_str_bitwise_not(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
1789 {
1790     ASSERT_ARGS(Parrot_str_bitwise_not)
1791     STRING *res;
1792     size_t  len;
1793 
1794     if (!STRING_IS_NULL(s)) {
1795         if (STRING_max_bytes_per_codepoint(s) != 1)
1796             Parrot_ex_throw_from_c_args(interp, NULL,
1797                 EXCEPTION_INVALID_ENCODING,
1798                 "string bitwise_not (%s) unsupported",
1799                 s->encoding->name);
1800 
1801         len = s->bufused;
1802     }
1803     else
1804         len = 0;
1805 
1806     res = Parrot_str_new_init(interp, NULL, len,
1807             Parrot_binary_encoding_ptr, 0);
1808 
1809     if (!len) {
1810         res->bufused = 0;
1811         res->strlen  = 0;
1812         return res;
1813     }
1814 
1815 #if ! DISABLE_GC_DEBUG
1816     /* trigger GC for debug */
1817     if (interp && GC_DEBUG(interp))
1818         Parrot_gc_mark_and_sweep(interp, GC_trace_stack_FLAG);
1819 #endif
1820 
1821     res->strlen = res->bufused = len;
1822 
1823     BITWISE_NOT_STRING(Parrot_UInt1, s, res);
1824 
1825     return res;
1826 }
1827 
1828 
1829 /*
1830 
1831 =item C<INTVAL Parrot_str_boolean(PARROT_INTERP, const STRING *s)>
1832 
1833 Returns whether the specified Parrot string is true. A string is true if it is
1834 equal to anything other than C<0>, C<""> or C<"0">.
1835 
1836 =cut
1837 
1838 */
1839 
1840 PARROT_EXPORT
1841 PARROT_WARN_UNUSED_RESULT
1842 INTVAL
Parrot_str_boolean(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))1843 Parrot_str_boolean(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
1844 {
1845     ASSERT_ARGS(Parrot_str_boolean)
1846     INTVAL len;
1847 
1848     if (s == NULL)
1849         return 0;
1850 
1851     len = STRING_length(s);
1852     if (len == 0)
1853         return 0;
1854 
1855     if (len == 1) {
1856         const UINTVAL c = STRING_ord(interp, s, 0);
1857 
1858         /* relying on character literals being interpreted as ASCII--may
1859         not be correct on EBCDIC systems. use numeric value instead? */
1860         if (c == '0')
1861             /* later, accept other chars with digit value 0? or, no */
1862             return 0;
1863     }
1864 
1865     /* it must be true */
1866     return 1;
1867 }
1868 
1869 
1870 /*
1871 
1872 =item C<STRING * Parrot_str_format_data(PARROT_INTERP, ARGIN_FORMAT(const char
1873 *format), ...)>
1874 
1875 Writes and returns a Parrot string.
1876 
1877 =cut
1878 
1879 */
1880 
1881 PARROT_EXPORT
1882 PARROT_CANNOT_RETURN_NULL
1883 STRING *
Parrot_str_format_data(PARROT_INTERP,ARGIN_FORMAT (const char * format),...)1884 Parrot_str_format_data(PARROT_INTERP, ARGIN_FORMAT(const char *format), ...)
1885 {
1886     ASSERT_ARGS(Parrot_str_format_data)
1887     STRING *output;
1888     va_list args;
1889 
1890     va_start(args, format);
1891     output = Parrot_vsprintf_c(interp, format, args);
1892     va_end(args);
1893 
1894     return output;
1895 }
1896 
1897 
1898 /*
1899 
1900 State of FSM during number value parsing.
1901 
1902 Integer uses only parse_start, parse_before_dot and parse_end.
1903 
1904 */
1905 
1906 typedef enum number_parse_state {
1907     parse_start,
1908     parse_before_dot,
1909     parse_after_dot,
1910     parse_after_e,
1911     parse_after_e_sign,
1912     parse_end
1913 } number_parse_state;
1914 
1915 
1916 /*
1917 
1918 =item C<INTVAL Parrot_str_to_int(PARROT_INTERP, const STRING *s)>
1919 
1920 Converts a numeric Parrot string to an integer value.
1921 
1922 A number is such that:
1923 
1924     sign            =  '+' | '-'
1925     digit           =  "Any code point considered a digit by the chartype"
1926     indicator       =  'e' | 'E'
1927     digits          =  digit [digit]...
1928     decimal-part    =  digits '.' [digits] | ['.'] digits
1929     exponent-part   =  indicator [sign] digits
1930     numeric-string  =  [sign] decimal-part [exponent-part]
1931 
1932 The integer value is the appropriate integer representation of such a number,
1933 rounding towards zero.
1934 
1935 =cut
1936 
1937 */
1938 
1939 PARROT_EXPORT
1940 PARROT_WARN_UNUSED_RESULT
1941 INTVAL
Parrot_str_to_int(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))1942 Parrot_str_to_int(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
1943 {
1944     ASSERT_ARGS(Parrot_str_to_int)
1945     if (STRING_IS_NULL(s))
1946         return 0;
1947     else {
1948         const UINTVAL       max_safe  = -(UINTVAL)PARROT_INTVAL_MIN / 10;
1949         const UINTVAL       last_dig  = (-(UINTVAL)PARROT_INTVAL_MIN) % 10;
1950         int                 sign      = 1;
1951         UINTVAL             i         = 0;
1952         String_iter         iter;
1953         INTVAL              count = (INTVAL)s->strlen;
1954         UINTVAL             c;
1955 
1956         STRING_ITER_INIT(interp, &iter);
1957 
1958         c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1959         while (c == ' ')
1960             c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1961         switch (c) {
1962           case '-':
1963             sign = -1;
1964             /* Fall through. */
1965           case '+':
1966             c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1967             break;
1968           default:
1969             ; /* nothing */
1970         }
1971         while (c) {
1972             const UINTVAL nextval = c - (UINTVAL)'0';
1973             if (nextval > 9)
1974                 break;
1975             if (i < max_safe || (i == max_safe && nextval <= last_dig))
1976                 i = i * 10 + nextval;
1977             else
1978                 Parrot_ex_throw_from_c_args(interp, NULL,
1979                     EXCEPTION_ERR_OVERFLOW,
1980                     "Integer value of String '%S' too big", s);
1981             c = count-- > 0 ? STRING_iter_get_and_advance(interp, s, &iter) : 0;
1982         }
1983 
1984         if (sign == 1 && i > (UINTVAL)PARROT_INTVAL_MAX)
1985             Parrot_ex_throw_from_c_args(interp, NULL,
1986                     EXCEPTION_ERR_OVERFLOW,
1987                     "Integer value of String '%S' too big", s);
1988         return sign == -1 ? -i : i;
1989     }
1990 }
1991 
1992 
1993 /*
1994 
1995 =item C<FLOATVAL Parrot_str_to_num(PARROT_INTERP, const STRING *s)>
1996 
1997 Converts a numeric Parrot STRING to a floating point number.
1998 
1999 =cut
2000 
2001 */
2002 
2003 PARROT_EXPORT
2004 PARROT_WARN_UNUSED_RESULT
2005 FLOATVAL
Parrot_str_to_num(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2006 Parrot_str_to_num(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2007 {
2008     ASSERT_ARGS(Parrot_str_to_num)
2009     FLOATVAL      f         = 0.0;
2010     FLOATVAL      mantissa  = 0.0;
2011     FLOATVAL      sign      = 1.0; /* -1 for '-' */
2012     FLOATVAL      divider   = 0.1;
2013     INTVAL        e         = 0;
2014     INTVAL        e_sign    = 1; /* -1 for '-' */
2015     /* How many digits it's safe to parse */
2016     const INTVAL  max_safe  = PARROT_INTVAL_MAX / 10;
2017     INTVAL        m         = 0;    /* Integer mantissa */
2018     int           m_is_safe = 1;    /* We can use integer mantissa */
2019     INTVAL        d         = 0;    /* Integer descriminator */
2020     int           d_is_safe = 1;    /* We can use integer mantissa */
2021     int           d_length  = 0;
2022     int           check_nan = 0;    /* Check for NaN and Inf after main loop */
2023     String_iter iter;
2024     number_parse_state state = parse_start;
2025 
2026     if (STRING_IS_NULL(s))
2027         return 0.0;
2028 
2029     STRING_ITER_INIT(interp, &iter);
2030 
2031     /* Handcrafted FSM to read float value */
2032     while (state != parse_end && iter.charpos < s->strlen) {
2033         const UINTVAL c = STRING_iter_get_and_advance(interp, s, &iter);
2034         /* Check for overflow */
2035         if (c > 255)
2036             break;
2037 
2038         switch (state) {
2039           case parse_start:
2040             if (isdigit((unsigned char)c)) {
2041                 f = c - '0';
2042                 m = c - '0';
2043                 state = parse_before_dot;
2044             }
2045             else if (c == '-') {
2046                 sign = -1.0;
2047                 state = parse_before_dot;
2048             }
2049             else if (c == '+')
2050                 state = parse_before_dot;
2051             else if (c == '.')
2052                 state = parse_after_dot;
2053             else if (isspace((unsigned char)c))
2054                 ; /* Do nothing */
2055             else {
2056                 check_nan = 1;
2057                 state     = parse_end;
2058             }
2059             break;
2060 
2061           case parse_before_dot:
2062             if (isdigit((unsigned char)c)) {
2063                 f = f*10.0 + (c-'0');
2064                 m = m*10 + (c-'0');
2065                 /* Integer overflow for mantissa */
2066                 if (m >= max_safe)
2067                     m_is_safe = 0;
2068             }
2069             else if (c == '.') {
2070                 state = parse_after_dot;
2071                 /*
2072                  * Throw gathered result. Recalculate from integer mantissa
2073                  * to preserve precision.
2074                  */
2075                 if (m_is_safe)
2076                     f = m;
2077                 mantissa = f;
2078             }
2079             else if (c == 'e' || c == 'E') {
2080                 state = parse_after_e;
2081                 /* See comment above */
2082                 if (m_is_safe)
2083                     f = m;
2084                 mantissa = f;
2085             }
2086             else {
2087                 check_nan = 1;
2088                 state     = parse_end;
2089             }
2090             break;
2091 
2092           case parse_after_dot:
2093             if (isdigit((unsigned char)c)) {
2094                 f += (c-'0') * divider;
2095                 divider /= 10.0;
2096                 d = d*10 + (c-'0');
2097                 if (d >= max_safe)
2098                     d_is_safe = 0;
2099                 ++d_length;
2100             }
2101             else if (c == 'e' || c == 'E')
2102                 state = parse_after_e;
2103             else
2104                 state = parse_end;
2105             break;
2106 
2107           case parse_after_e:
2108             if (isdigit((unsigned char)c)) {
2109                 e = e*10 + (c-'0');
2110                 state = parse_after_e_sign;
2111             }
2112             else if (c == '-') {
2113                 e_sign = -1;
2114                 state = parse_after_e_sign;
2115             }
2116             else if (c == '+')
2117                 state = parse_after_e_sign;
2118             else
2119                 state = parse_end;
2120             break;
2121 
2122           case parse_after_e_sign:
2123             if (isdigit((unsigned char)c))
2124                 e = e*10 + (c-'0');
2125             else
2126                 state = parse_end;
2127             break;
2128 
2129           case parse_end:
2130           default:
2131             /* Pacify compiler */
2132             break;
2133         }
2134     }
2135 
2136     /* Support for non-canonical NaN and Inf */
2137     /* charpos <= 2 because for "-i" iter already advanced to next char */
2138     if (check_nan && (iter.charpos <= 2)) {
2139         STRING * const t = Parrot_str_upcase(interp, s);
2140         if (STRING_equal(interp, t, CONST_STRING(interp, "NAN")))
2141             return PARROT_FLOATVAL_NAN_QUIET;
2142         else if (STRING_equal(interp, t, CONST_STRING(interp, "INF"))
2143              ||  STRING_equal(interp, t, CONST_STRING(interp, "INFINITY")))
2144             return PARROT_FLOATVAL_INF_POSITIVE;
2145         else if (STRING_equal(interp, t, CONST_STRING(interp, "-INF"))
2146              ||  STRING_equal(interp, t, CONST_STRING(interp, "-INFINITY")))
2147             return PARROT_FLOATVAL_INF_NEGATIVE;
2148     }
2149 
2150 #if defined(PARROT_HAS_POWL) && !defined(__CYGWIN__)
2151 #  define POW powl
2152 #else
2153 #  define POW pow
2154 #endif
2155 
2156      if (d && d_is_safe) {
2157         f = mantissa + (1.0 * d / POW(10.0, d_length));
2158      }
2159 
2160     if (sign < 0)
2161         f = -f;
2162 
2163     if (e) {
2164         if (e_sign == 1)
2165             f *= POW(10.0, e);
2166         else
2167             f /= POW(10.0, e);
2168     }
2169 
2170 #undef POW
2171 
2172     return f;
2173 }
2174 
2175 
2176 /*
2177 
2178 =item C<STRING * Parrot_str_from_int(PARROT_INTERP, INTVAL i)>
2179 
2180 Returns a Parrot string representation of the specified integer value.
2181 
2182 =cut
2183 
2184 */
2185 
2186 PARROT_EXPORT
2187 PARROT_WARN_UNUSED_RESULT
2188 PARROT_CANNOT_RETURN_NULL
2189 STRING *
Parrot_str_from_int(PARROT_INTERP,INTVAL i)2190 Parrot_str_from_int(PARROT_INTERP, INTVAL i)
2191 {
2192     ASSERT_ARGS(Parrot_str_from_int)
2193     char buf[128];
2194     return Parrot_str_from_int_base(interp, buf, (HUGEINTVAL)i, 10);
2195 }
2196 
2197 
2198 /*
2199 
2200 =item C<STRING * Parrot_str_from_num(PARROT_INTERP, FLOATVAL f)>
2201 
2202 Returns a Parrot string representation of the specified floating-point value.
2203 
2204 =cut
2205 
2206 */
2207 
2208 PARROT_EXPORT
2209 PARROT_WARN_UNUSED_RESULT
2210 PARROT_CANNOT_RETURN_NULL
2211 STRING *
Parrot_str_from_num(PARROT_INTERP,FLOATVAL f)2212 Parrot_str_from_num(PARROT_INTERP, FLOATVAL f)
2213 {
2214     ASSERT_ARGS(Parrot_str_from_num)
2215     /* Too damn hard--hand it off to Parrot_sprintf, which'll probably
2216        use the system sprintf anyway, but has gigantic buffers that are
2217        awfully hard to overflow. */
2218     return Parrot_sprintf_c(interp, FLOATVAL_FMT, f);
2219 }
2220 
2221 
2222 /*
2223 
2224 =item C<char * Parrot_str_to_cstring(PARROT_INTERP, const STRING *s)>
2225 
2226 Returns a C string for the specified Parrot string in the current
2227 representation of the string. Use C<Parrot_str_free_cstring()> to free
2228 the string. Failure to do this will result in a memory leak.
2229 
2230 You usually should use Parrot_str_to_encoded_cstring instead.
2231 
2232 =cut
2233 
2234 */
2235 
2236 PARROT_EXPORT
2237 PARROT_CANNOT_RETURN_NULL
2238 char *
Parrot_str_to_cstring(PARROT_INTERP,ARGIN (const STRING * s))2239 Parrot_str_to_cstring(PARROT_INTERP, ARGIN(const STRING *s))
2240 {
2241     ASSERT_ARGS(Parrot_str_to_cstring)
2242 
2243     return Parrot_str_to_encoded_cstring(interp, s, s->encoding);
2244 }
2245 
2246 
2247 /*
2248 
2249 =item C<char * Parrot_str_to_encoded_cstring(PARROT_INTERP, const STRING *s,
2250 const STR_VTABLE *enc)>
2251 
2252 Returns a C string in the encoding C<enc> for the Parrot string C<s>. Use
2253 C<Parrot_str_free_cstring()> to free the string. Failure to do this will result
2254 in a memory leak.
2255 
2256 =cut
2257 
2258 */
2259 
2260 PARROT_EXPORT
2261 PARROT_CANNOT_RETURN_NULL
2262 char *
Parrot_str_to_encoded_cstring(PARROT_INTERP,ARGIN (const STRING * s),ARGIN (const STR_VTABLE * enc))2263 Parrot_str_to_encoded_cstring(PARROT_INTERP, ARGIN(const STRING *s),
2264         ARGIN(const STR_VTABLE *enc))
2265 {
2266     ASSERT_ARGS(Parrot_str_to_encoded_cstring)
2267     size_t  len;
2268     size_t  trail;
2269     char   *p;
2270 
2271     if (STRING_IS_NULL(s))
2272         Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2273             "Can't convert NULL string");
2274 
2275     if (s->encoding != enc) {
2276         /* Check for compatible encodings */
2277         if (s->encoding == Parrot_ascii_encoding_ptr) {
2278             if (enc == Parrot_latin1_encoding_ptr
2279             ||  enc == Parrot_utf8_encoding_ptr)
2280                     goto skip;
2281         }
2282         else if (s->encoding == Parrot_ucs2_encoding_ptr) {
2283             if (enc == Parrot_utf16_encoding_ptr)
2284                 goto skip;
2285         }
2286 
2287         /* Convert */
2288         s = enc->to_encoding(interp, s);
2289     }
2290 skip:
2291 
2292     len   = s->bufused;
2293     trail = enc->bytes_per_unit;
2294 
2295     p = (char*)mem_internal_allocate(len + trail);
2296 
2297     memcpy(p, s->strstart, len);
2298     memset(p + len, 0, trail);
2299 
2300     return p;
2301 }
2302 
2303 
2304 /*
2305 
2306 =item C<void Parrot_str_free_cstring(char *p)>
2307 
2308 Free a string created by C<Parrot_str_to_cstring()>.
2309 
2310 TODO - Hopefully this can go away at some point, as it's got all
2311 sorts of leak potential otherwise.
2312 
2313 =cut
2314 
2315 */
2316 
2317 PARROT_EXPORT
2318 void
Parrot_str_free_cstring(ARGFREE (char * p))2319 Parrot_str_free_cstring(ARGFREE(char *p))
2320 {
2321     ASSERT_ARGS(Parrot_str_free_cstring)
2322     mem_internal_free((void *)p);
2323 }
2324 
2325 
2326 /*
2327 
2328 =item C<void Parrot_str_pin(PARROT_INTERP, STRING *s)>
2329 
2330 Replaces the specified Parrot string's managed buffer memory by system memory.
2331 
2332 =cut
2333 
2334 */
2335 
2336 PARROT_EXPORT
2337 void
Parrot_str_pin(SHIM_INTERP,ARGMOD (STRING * s))2338 Parrot_str_pin(SHIM_INTERP, ARGMOD(STRING *s))
2339 {
2340     ASSERT_ARGS(Parrot_str_pin)
2341     const size_t size = Buffer_buflen(s);
2342     char * const memory = (char *)mem_internal_allocate(size);
2343 
2344     memcpy(memory, Buffer_bufstart(s), size);
2345     Buffer_bufstart(s) = memory;
2346     s->strstart        = memory;
2347 
2348     /* Mark the memory as both from the system and immobile */
2349     PObj_sysmem_SET(s);
2350 }
2351 
2352 
2353 /*
2354 
2355 =item C<void Parrot_str_unpin(PARROT_INTERP, STRING *s)>
2356 
2357 Undoes a C<Parrot_str_pin()> so that the string once again uses managed memory.
2358 
2359 =cut
2360 
2361 */
2362 
2363 PARROT_EXPORT
2364 void
Parrot_str_unpin(PARROT_INTERP,ARGMOD (STRING * s))2365 Parrot_str_unpin(PARROT_INTERP, ARGMOD(STRING *s))
2366 {
2367     ASSERT_ARGS(Parrot_str_unpin)
2368     void  *memory;
2369     size_t size;
2370 
2371     /* If this string is not marked using system memory,
2372      * we just don't do this */
2373     if (!PObj_sysmem_TEST(s))
2374         return;
2375 
2376     size = Buffer_buflen(s);
2377 
2378     /* We need a handle on the fixed memory so we can get rid of it later */
2379     memory = Buffer_bufstart(s);
2380 
2381     /* Reallocate it the same size
2382      * NOTE can't use Parrot_gc_reallocate_string_storage because of the LEA
2383      * allocator, where this is a noop for the same size
2384      *
2385      * We have to block GC here, as we have a pointer to bufstart
2386      */
2387     Parrot_block_GC_sweep(interp);
2388     Parrot_gc_allocate_string_storage(interp, s, size);
2389     Parrot_unblock_GC_sweep(interp);
2390     memcpy(Buffer_bufstart(s), memory, size);
2391 
2392     /* Mark the memory as neither immobile nor system allocated */
2393     PObj_sysmem_CLEAR(s);
2394 
2395     /* Free up the memory */
2396     mem_internal_free(memory);
2397 }
2398 
2399 
2400 /*
2401 
2402 =item C<size_t Parrot_str_to_hashval(PARROT_INTERP, const STRING *s)>
2403 
2404 Returns the hash value for the specified Parrot string, caching it in
2405 C<< s->hashval >>.
2406 
2407 Identical to the STRING_hash macro.
2408 
2409 =cut
2410 
2411 */
2412 
2413 PARROT_EXPORT
2414 PARROT_WARN_UNUSED_RESULT
2415 size_t
Parrot_str_to_hashval(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2416 Parrot_str_to_hashval(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2417 {
2418     ASSERT_ARGS(Parrot_str_to_hashval)
2419 
2420     if (s == NULL)
2421         s = STRINGNULL;
2422 
2423     return STRING_hash(interp, s, interp->hash_seed);
2424 }
2425 
2426 /*
2427 
2428 =item C<STRING * Parrot_str_reverse(PARROT_INTERP, const STRING *src)>
2429 
2430 Return the reverse of C<src>, even for non-ascii strings.
2431 
2432 =cut
2433 
2434 */
2435 
2436 PARROT_EXPORT
2437 PARROT_CANNOT_RETURN_NULL
2438 STRING *
Parrot_str_reverse(PARROT_INTERP,ARGIN (const STRING * src))2439 Parrot_str_reverse(PARROT_INTERP, ARGIN(const STRING *src))
2440 {
2441     ASSERT_ARGS(Parrot_str_reverse)
2442     String_iter  iter;
2443     INTVAL       pos;
2444     PMC         *sb;
2445 
2446     STRING_ITER_INIT(interp, &iter);
2447     sb = Parrot_pmc_new(interp, enum_class_StringBuilder);
2448 
2449     for (pos = STRING_length(src) - 1; pos >= 0; pos--) {
2450         VTABLE_push_string(interp, sb, Parrot_str_chr(interp,
2451             STRING_iter_get(interp, src, &iter, pos)));
2452     }
2453 
2454     return VTABLE_get_string(interp, sb);
2455 }
2456 
2457 /*
2458 
2459 =item C<STRING * Parrot_str_escape(PARROT_INTERP, const STRING *src)>
2460 
2461 Escapes all non-ASCII chars to backslash sequences. Control chars that
2462 C<Parrot_str_unescape> can handle are escaped as I<\x>, as well as a double
2463 quote character. Other control chars and codepoints < 0x100 are escaped as
2464 I<\xhh>, codepoints up to 0xffff, as I<\uhhhh>, and codepoints greater than
2465 this as I<\x{hh...hh}>.
2466 
2467 =cut
2468 
2469 */
2470 
2471 PARROT_EXPORT
2472 PARROT_CANNOT_RETURN_NULL
2473 STRING *
Parrot_str_escape(PARROT_INTERP,ARGIN_NULLOK (const STRING * src))2474 Parrot_str_escape(PARROT_INTERP, ARGIN_NULLOK(const STRING *src))
2475 {
2476     ASSERT_ARGS(Parrot_str_escape)
2477     return Parrot_str_escape_truncate(interp, src, (UINTVAL) ~0);
2478 }
2479 
2480 
2481 /*
2482 
2483 =item C<STRING * Parrot_str_escape_truncate(PARROT_INTERP, const STRING *src,
2484 UINTVAL limit)>
2485 
2486 Escapes all non-ASCII characters in the given string with backslashed versions,
2487 but limits the length of the output (used for trace output of strings).
2488 
2489 =cut
2490 
2491 */
2492 
2493 PARROT_EXPORT
2494 PARROT_CANNOT_RETURN_NULL
2495 STRING *
Parrot_str_escape_truncate(PARROT_INTERP,ARGIN_NULLOK (const STRING * src),UINTVAL limit)2496 Parrot_str_escape_truncate(PARROT_INTERP,
2497         ARGIN_NULLOK(const STRING *src), UINTVAL limit)
2498 {
2499     ASSERT_ARGS(Parrot_str_escape_truncate)
2500     STRING      *result;
2501     UINTVAL      i, len, charlen;
2502     String_iter  iter;
2503     char         hex_buf[HEX_BUF_SIZE];
2504     char        *dp;
2505 
2506     if (STRING_IS_NULL(src))
2507         return STRINGNULL;
2508 
2509     len = src->strlen;
2510 
2511     if (len > limit)
2512         len = limit;
2513 
2514     /* expect around 2x the chars */
2515     charlen = 2 * len;
2516 
2517     if (charlen < HEX_BUF_SIZE)
2518         charlen = HEX_BUF_SIZE;
2519 
2520     /* create ascii result */
2521     result = Parrot_str_new_init(interp, NULL, charlen,
2522             Parrot_ascii_encoding_ptr, 0);
2523 
2524     /* more work TODO */
2525     STRING_ITER_INIT(interp, &iter);
2526     dp = result->strstart;
2527 
2528     for (i = 0; len > 0; --len) {
2529         unsigned c = STRING_iter_get_and_advance(interp, src, &iter);
2530         int hex_len;
2531 
2532         if (c < 0x7f) {
2533             /* process ASCII chars */
2534             if (i >= charlen - 2) {
2535                 /* resize - still len codepoints to go */
2536                 charlen += len * 2 + HEX_BUF_SIZE;
2537                 result->bufused = i;
2538                 Parrot_gc_reallocate_string_storage(interp, result, charlen);
2539                 /* start can change */
2540                 dp = result->strstart;
2541             }
2542             switch (c) {
2543               case '\\':
2544                 dp[i++] = '\\';
2545                 break;
2546               case '\a':
2547                 dp[i++] = '\\';
2548                 c = 'a';
2549                 break;
2550               case '\b':
2551                 dp[i++] = '\\';
2552                 c = 'b';
2553                 break;
2554               case '\n':
2555                 dp[i++] = '\\';
2556                 c = 'n';
2557                 break;
2558               case '\r':
2559                 dp[i++] = '\\';
2560                 c = 'r';
2561                 break;
2562               case '\t':
2563                 dp[i++] = '\\';
2564                 c = 't';
2565                 break;
2566               case '\f':
2567                 dp[i++] = '\\';
2568                 c = 'f';
2569                 break;
2570               case '"':
2571                 dp[i++] = '\\';
2572                 c = '"';
2573                 break;
2574               case 27:
2575                 dp[i++] = '\\';
2576                 c = 'e';
2577                 break;
2578               default:
2579                 break;
2580             }
2581             if (c >= 0x20) {
2582                 dp[i++] = c;
2583                 continue;
2584             }
2585         }
2586 
2587         /* escape by appending either \uhhhh or \x{hh...} */
2588 
2589         if (c < 0x0100 || c >= 0x10000)
2590             hex_len = snprintf(hex_buf, HEX_BUF_SIZE - 1, "\\x{%x}", c);
2591         else
2592             hex_len = snprintf(hex_buf, HEX_BUF_SIZE - 1, "\\u%04x", c);
2593 
2594         if (hex_len < 0)
2595             hex_len = 0;
2596 
2597         if (i + hex_len > charlen) {
2598             /* resize - still len codepoints to go */
2599             charlen += len * 2 + HEX_BUF_SIZE;
2600             result->bufused = i;
2601             Parrot_gc_reallocate_string_storage(interp, result, charlen);
2602             /* start can change */
2603             dp = result->strstart;
2604         }
2605 
2606         memcpy(dp + i, hex_buf, hex_len);
2607 
2608         /* adjust our insert idx */
2609         i += hex_len;
2610 
2611         PARROT_ASSERT(i <= charlen);
2612     }
2613 
2614     result->bufused = result->strlen = i;
2615     return result;
2616 }
2617 
2618 /*
2619 
2620 =item C<static void throw_illegal_escape(PARROT_INTERP, const STRING *s)>
2621 
2622 =item C<static void throw_illegal_escape_char(PARROT_INTERP, const char c, const
2623 STRING *s)>
2624 
2625 Helper functions to avoid repeated throw calls.
2626 
2627 =cut
2628 
2629 */
2630 
2631 PARROT_DOES_NOT_RETURN
2632 PARROT_COLD
2633 static void
throw_illegal_escape(PARROT_INTERP,ARGIN (const STRING * s))2634 throw_illegal_escape(PARROT_INTERP, ARGIN(const STRING *s))
2635 {
2636     ASSERT_ARGS(throw_illegal_escape)
2637     Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
2638             "Illegal escape sequence in '%Ss'", s);
2639 }
2640 
2641 PARROT_DOES_NOT_RETURN
2642 PARROT_COLD
2643 static void
throw_illegal_escape_char(PARROT_INTERP,const char c,ARGIN (const STRING * s))2644 throw_illegal_escape_char(PARROT_INTERP, const char c, ARGIN(const STRING *s))
2645 {
2646     ASSERT_ARGS(throw_illegal_escape_char)
2647     Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_CHARACTER,
2648             "Illegal escape sequence \\%c in '%Ss'", c, s);
2649 }
2650 
2651 /*
2652 
2653 =item C<STRING * Parrot_str_unescape_string(PARROT_INTERP, const STRING *src,
2654 const STR_VTABLE *encoding, UINTVAL flags)>
2655 
2656 EXPERIMENTAL, see TT #1628
2657 
2658 Unescapes the src string returning a new string with the encoding specified.
2659 
2660 
2661 =cut
2662 
2663 */
2664 
2665 PARROT_EXPORT
2666 PARROT_CANNOT_RETURN_NULL
2667 STRING *
Parrot_str_unescape_string(PARROT_INTERP,ARGIN (const STRING * src),ARGIN (const STR_VTABLE * encoding),UINTVAL flags)2668 Parrot_str_unescape_string(PARROT_INTERP, ARGIN(const STRING *src),
2669         ARGIN(const STR_VTABLE *encoding),
2670         UINTVAL flags)
2671 {
2672     ASSERT_ARGS(Parrot_str_unescape_string)
2673 
2674     UINTVAL srclen = Parrot_str_byte_length(interp, src);
2675     STRING *result = Parrot_gc_new_string_header(interp, flags);
2676     String_iter itersrc;
2677     String_iter iterdest;
2678     UINTVAL reserved;
2679     int digcount;
2680     char digbuf[9];
2681     int pending;
2682 
2683     result->encoding = encoding;
2684     reserved = string_max_bytes(interp, result, srclen);
2685     Parrot_gc_allocate_string_storage(interp, result, reserved);
2686     result->bufused = reserved;
2687 
2688     STRING_ITER_INIT(interp, &itersrc);
2689     STRING_ITER_INIT(interp, &iterdest);
2690     while (itersrc.bytepos < srclen) {
2691         INTVAL c = STRING_iter_get_and_advance(interp, src, &itersrc);
2692         INTVAL next;
2693 
2694         do {
2695             pending = 0;
2696             next = c;
2697             if (c == '\\') {
2698                 if (itersrc.bytepos >= srclen) break;
2699                 c = STRING_iter_get_and_advance(interp, src, &itersrc);
2700                 switch (c) {
2701                   /* Allowed escape sequences */
2702                   case 'a': next = '\a'; break; /* \x07 Alarm, beep */
2703                   case 'b': next = '\b'; break; /* \x08 Backspace */
2704                   case 't': next = '\t'; break; /* \x09 horizontal tab */
2705                   case 'n': next = '\n'; break; /* \x0a newline */
2706                   case 'v': next = '\v'; break; /* \x0b vertical tab */
2707                   case 'f': next = '\f'; break; /* \x0c formfeed */
2708                   case 'r': next = '\r'; break; /* \x0d carriage return */
2709                   case 'e': next = '\x1B'; break; /* \x1b prefix ansi escape */
2710                   /* and previously handled in the default case: */
2711                   case '\\': next = c; break;   /* \x5c */
2712                   case '"':  next = c; break;   /* \x22 */
2713                   case '\'': next = c; break;   /* \x27 */
2714                   case '?':  next = c; break;   /* \x3f */
2715                   /* Escape character */
2716                   case 'c':
2717                     if (itersrc.bytepos >= srclen) break;
2718                     c = STRING_iter_get_and_advance(interp, src, &itersrc);
2719                     /* This assumes ascii-alike encoding */
2720                     if (c < 'A' || c > 'Z')
2721                         throw_illegal_escape(interp, src);
2722                     next = c - 'A' + 1;
2723                     break;
2724                 case 'x':
2725                     digcount = 0;
2726                     if (itersrc.bytepos >= srclen)
2727                         break;
2728                     c = STRING_iter_get_and_advance(interp, src, &itersrc);
2729                     if (c == '{') {
2730                         /* \x{h..h} 1..8 hex digits */
2731                         while (itersrc.bytepos < srclen) {
2732                             c = STRING_iter_get_and_advance(interp, src, &itersrc);
2733                             if (c == '}')
2734                                 break;
2735                             if (!isxdigit(c))
2736                                 throw_illegal_escape(interp, src);
2737                             if (digcount == 8)
2738                                 break;
2739                             digbuf[digcount++] = c;
2740                         }
2741                         if (c != '}')
2742                             throw_illegal_escape(interp, src);
2743                     }
2744                     else {
2745                         /* \xhh 1..2 hex digits */
2746                         pending = 1;
2747                         for (digcount = 0; digcount < 2;) {
2748                             if (!isxdigit(c))
2749                                 break;
2750                             digbuf[digcount] = c;
2751                             ++digcount;
2752                             if (itersrc.bytepos >= srclen) {
2753                                 pending = 0;
2754                                 break;
2755                             }
2756                             c = STRING_iter_get_and_advance(interp, src, &itersrc);
2757                         }
2758                     }
2759                     if (digcount == 0)
2760                         throw_illegal_escape(interp, src);
2761                     digbuf[digcount] = '\0';
2762                     next = strtol(digbuf, NULL, 16);
2763                     break;
2764                 case 'u':
2765                     /* \uhhhh 4 hex digits */
2766                     for (digcount = 0; digcount < 4; ++digcount) {
2767                         if (itersrc.bytepos >= srclen) break;
2768                         c = STRING_iter_get_and_advance(interp, src, &itersrc);
2769                         if (!isxdigit(c))
2770                             throw_illegal_escape(interp, src);
2771                         digbuf[digcount] = c;
2772                     }
2773                     digbuf[digcount] = '\0';
2774                     next = strtol(digbuf, NULL, 16);
2775                     break;
2776                 case 'U':
2777                     /* \Uhhhhhhhh 8 hex digits */
2778                     for (digcount = 0; digcount < 8; ++digcount) {
2779                         if (itersrc.bytepos >= srclen) break;
2780                         c = STRING_iter_get_and_advance(interp, src, &itersrc);
2781                         if (!isxdigit(c))
2782                             throw_illegal_escape(interp, src);
2783                         digbuf[digcount] = c;
2784                     }
2785                     digbuf[digcount] = '\0';
2786                     next = strtol(digbuf, NULL, 16);
2787                     break;
2788                 case '0': case '1': case '2': case '3':
2789                 case '4': case '5': case '6': case '7':
2790                     /* \ooo 1..3 oct digits */
2791                     digbuf[0] = c;
2792                     for (digcount = 1; digcount < 3; ++digcount) {
2793                         if (itersrc.bytepos >= srclen) break;
2794                         c = STRING_iter_get_and_advance(interp, src, &itersrc);
2795                         if (c < '0' || c > '7')
2796                             break;
2797                         digbuf[digcount] = c;
2798                     }
2799                     digbuf[digcount] = '\0';
2800                     next = strtol(digbuf, NULL, 8);
2801                     if (itersrc.bytepos < srclen && digcount < 3)
2802                         pending = 1;
2803                     break;
2804                 default:
2805                     /* Die with Illegal escape sequence since 6.9.0 but allow quoting of
2806                        special chars. */
2807                     /* The C standard requires such "invalid" escape sequences to be diagnosed
2808                        (i.e., the compiler must print an error message). GH #1103 */
2809                     if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) {
2810                         /* next = c; for a deprecation cycle? */
2811                         /* catch inproper use of \i, \O, \o */
2812                         throw_illegal_escape_char(interp, c, src);
2813                     }
2814                     else {
2815                         next = c; /* ignore the \ in special chars like \[, \}, ... */
2816                     }
2817                 }
2818             }
2819             STRING_iter_set_and_advance(interp, result, &iterdest, next);
2820         } while (pending);
2821     }
2822     result->bufused = iterdest.bytepos;
2823     result->strlen = iterdest.charpos;
2824     return result;
2825 }
2826 
2827 /*
2828 
2829 =item C<STRING * Parrot_str_unescape(PARROT_INTERP, const char *cstring, char
2830 delimiter, const char *enc_char)>
2831 
2832 Unescapes the specified C string. These sequences are covered:
2833 
2834   \xhh        1..2 hex digits
2835   \ooo        1..3 oct digits
2836   \cX         control char X
2837   \x{h..h}    1..8 hex digits
2838   \uhhhh      4 hex digits
2839   \Uhhhhhhhh  8 hex digits
2840   \a, \b, \t, \n, \v, \f, \r, \e
2841 
2842 These sequences are not escaped: C<\\ \" \' \?>
2843 
2844 All other escape sequences within C<[a-zA-Z]> are illegal.
2845 
2846 =cut
2847 
2848 */
2849 
2850 PARROT_EXPORT
2851 PARROT_CANNOT_RETURN_NULL
2852 STRING *
Parrot_str_unescape(PARROT_INTERP,ARGIN (const char * cstring),char delimiter,ARGIN_NULLOK (const char * enc_char))2853 Parrot_str_unescape(PARROT_INTERP,
2854     ARGIN(const char *cstring), char delimiter, ARGIN_NULLOK(const char *enc_char))
2855 {
2856     ASSERT_ARGS(Parrot_str_unescape)
2857 
2858     STRING           *src;
2859     const STR_VTABLE *encoding, *src_encoding;
2860     size_t            clength = strlen(cstring);
2861 
2862     if (delimiter && clength)
2863         --clength;
2864 
2865     if (enc_char == NULL) {
2866         encoding = Parrot_default_encoding_ptr;
2867     }
2868     else {
2869         encoding = Parrot_find_encoding(interp, enc_char);
2870 
2871         if (!encoding)
2872             Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_UNIMPLEMENTED,
2873                 "Can't make '%s' encoding strings", enc_char);
2874     }
2875 
2876     if (encoding->max_bytes_per_codepoint == 1)
2877         src_encoding = encoding;
2878     else
2879         src_encoding = Parrot_utf8_encoding_ptr;
2880 
2881     src = Parrot_str_new_init(interp, cstring, clength, src_encoding,
2882             PObj_external_FLAG);
2883 
2884     return Parrot_str_unescape_string(interp, src, encoding,
2885             PObj_constant_FLAG);
2886 }
2887 
2888 
2889 /*
2890 
2891 =item C<STRING * Parrot_str_upcase(PARROT_INTERP, const STRING *s)>
2892 
2893 Returns a copy of the specified Parrot string converted to upper case.
2894 Non-caseable characters are left unchanged.
2895 
2896 =cut
2897 
2898 */
2899 
2900 PARROT_EXPORT
2901 PARROT_CANNOT_RETURN_NULL
2902 PARROT_MALLOC
2903 STRING *
Parrot_str_upcase(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2904 Parrot_str_upcase(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2905 {
2906     ASSERT_ARGS(Parrot_str_upcase)
2907     if (STRING_IS_NULL(s))
2908         Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2909             "Can't upcase NULL string");
2910     else {
2911         STRING * const res = STRING_upcase(interp, s);
2912         res->hashval = 0;
2913         return res;
2914     }
2915 }
2916 
2917 
2918 /*
2919 
2920 =item C<STRING * Parrot_str_downcase(PARROT_INTERP, const STRING *s)>
2921 
2922 Returns a copy of the specified Parrot string converted to lower case.
2923 Non-caseable characters are left unchanged.
2924 
2925 =cut
2926 
2927 */
2928 
2929 PARROT_EXPORT
2930 PARROT_CANNOT_RETURN_NULL
2931 PARROT_MALLOC
2932 STRING *
Parrot_str_downcase(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2933 Parrot_str_downcase(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2934 {
2935     ASSERT_ARGS(Parrot_str_downcase)
2936 
2937     if (STRING_IS_NULL(s))
2938         Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2939             "Can't downcase NULL string");
2940     else {
2941         STRING * const res = STRING_downcase(interp, s);
2942         res->hashval = 0;
2943         return res;
2944     }
2945 }
2946 
2947 
2948 /*
2949 
2950 =item C<STRING * Parrot_str_titlecase(PARROT_INTERP, const STRING *s)>
2951 
2952 Returns a copy of the specified Parrot string converted to title case.
2953 Non-caseable characters are left unchanged.
2954 
2955 =cut
2956 
2957 */
2958 
2959 PARROT_EXPORT
2960 PARROT_CANNOT_RETURN_NULL
2961 PARROT_MALLOC
2962 STRING *
Parrot_str_titlecase(PARROT_INTERP,ARGIN_NULLOK (const STRING * s))2963 Parrot_str_titlecase(PARROT_INTERP, ARGIN_NULLOK(const STRING *s))
2964 {
2965     ASSERT_ARGS(Parrot_str_titlecase)
2966 
2967     if (STRING_IS_NULL(s))
2968         Parrot_ex_throw_from_c_noargs(interp, EXCEPTION_UNEXPECTED_NULL,
2969             "Can't titlecase NULL string");
2970     else {
2971         STRING * const res = STRING_titlecase(interp, s);
2972         res->hashval = 0;
2973         return res;
2974     }
2975 }
2976 
2977 
2978 /*
2979 
2980 =item C<const char * Parrot_str_cstring(PARROT_INTERP, const STRING *str)>
2981 
2982 Returns a C string from a Parrot string.  Both sides are treated
2983 as constants -- i.e. do not resize the result.
2984 
2985 =cut
2986 
2987 */
2988 
2989 PARROT_EXPORT
2990 PARROT_PURE_FUNCTION
2991 PARROT_CANNOT_RETURN_NULL
2992 const char *
Parrot_str_cstring(SHIM_INTERP,ARGIN (const STRING * str))2993 Parrot_str_cstring(SHIM_INTERP, ARGIN(const STRING *str))
2994 {
2995     ASSERT_ARGS(Parrot_str_cstring)
2996     /* TODO handle NULL and friends */
2997     return str->strstart;
2998 }
2999 
3000 
3001 /*
3002 
3003 =item C<INTVAL Parrot_str_is_cclass(PARROT_INTERP, INTVAL flags, const STRING
3004 *s, UINTVAL offset)>
3005 
3006 Returns 1 if the codepoint of string C<s> at given offset is in the given
3007 character class C<flags>. See also F<include/parrot/cclass.h> for possible
3008 character classes. Returns 0 otherwise, or if the string is empty or NULL.
3009 
3010 =cut
3011 
3012 */
3013 
3014 PARROT_EXPORT
3015 PARROT_WARN_UNUSED_RESULT
3016 INTVAL
Parrot_str_is_cclass(PARROT_INTERP,INTVAL flags,ARGIN (const STRING * s),UINTVAL offset)3017 Parrot_str_is_cclass(PARROT_INTERP, INTVAL flags,
3018         ARGIN(const STRING *s), UINTVAL offset)
3019 {
3020     ASSERT_ARGS(Parrot_str_is_cclass)
3021 
3022     if (!Parrot_str_byte_length(interp, s))
3023         return 0;
3024 
3025     return STRING_is_cclass(interp, flags, s, offset);
3026 }
3027 
3028 
3029 /*
3030 
3031 =item C<INTVAL Parrot_str_find_cclass(PARROT_INTERP, INTVAL flags, const STRING
3032 *s, UINTVAL offset, UINTVAL count)>
3033 
3034 Finds the first occurrence of the given character class in C<flags> in the
3035 string, and returns its glyph-wise index.
3036 
3037 =cut
3038 
3039 */
3040 
3041 PARROT_EXPORT
3042 PARROT_WARN_UNUSED_RESULT
3043 INTVAL
Parrot_str_find_cclass(PARROT_INTERP,INTVAL flags,ARGIN_NULLOK (const STRING * s),UINTVAL offset,UINTVAL count)3044 Parrot_str_find_cclass(PARROT_INTERP, INTVAL flags, ARGIN_NULLOK(const STRING *s),
3045                           UINTVAL offset, UINTVAL count)
3046 {
3047     ASSERT_ARGS(Parrot_str_find_cclass)
3048 
3049     if (STRING_IS_NULL(s))
3050         return -1;
3051 
3052     return STRING_find_cclass(interp, flags, s, offset, count);
3053 }
3054 
3055 
3056 /*
3057 
3058 =item C<INTVAL Parrot_str_find_not_cclass(PARROT_INTERP, INTVAL flags, const
3059 STRING *s, UINTVAL offset, UINTVAL count)>
3060 
3061 Finds the first occurrence of the a character I<not> in the given character
3062 class in C<flags> in the string starting from C<offset> and looking at C<count>
3063 positions, and returns its glyph-wise index.  Returns C<offset + count>, if not
3064 found.
3065 
3066 =cut
3067 
3068 */
3069 
3070 PARROT_EXPORT
3071 PARROT_WARN_UNUSED_RESULT
3072 INTVAL
Parrot_str_find_not_cclass(PARROT_INTERP,INTVAL flags,ARGIN_NULLOK (const STRING * s),UINTVAL offset,UINTVAL count)3073 Parrot_str_find_not_cclass(PARROT_INTERP, INTVAL flags,
3074     ARGIN_NULLOK(const STRING *s), UINTVAL offset, UINTVAL count)
3075 {
3076     ASSERT_ARGS(Parrot_str_find_not_cclass)
3077 
3078     if (STRING_IS_NULL(s))
3079         return -1;
3080 
3081     return STRING_find_not_cclass(interp, flags, s, offset, count);
3082 }
3083 
3084 
3085 /*
3086 
3087 =item C<STRING* Parrot_str_change_encoding(PARROT_INTERP, STRING *src, INTVAL
3088 encoding_nr)>
3089 
3090 Converts C<src> to the given encoding and returns the result as a new string.
3091 
3092 =cut
3093 
3094 */
3095 
3096 PARROT_EXPORT
3097 PARROT_WARN_UNUSED_RESULT
3098 PARROT_CAN_RETURN_NULL
3099 STRING*
Parrot_str_change_encoding(PARROT_INTERP,ARGMOD_NULLOK (STRING * src),INTVAL encoding_nr)3100 Parrot_str_change_encoding(PARROT_INTERP, ARGMOD_NULLOK(STRING *src),
3101         INTVAL encoding_nr)
3102 {
3103     ASSERT_ARGS(Parrot_str_change_encoding)
3104     const STR_VTABLE *new_encoding;
3105 
3106     if (STRING_IS_NULL(src))
3107         return STRINGNULL;
3108 
3109     new_encoding = Parrot_get_encoding(interp, encoding_nr);
3110 
3111     if (!new_encoding)
3112         Parrot_ex_throw_from_c_args(interp, NULL, EXCEPTION_INVALID_ENCODING,
3113             "encoding #%d not found", (int) encoding_nr);
3114 
3115     if (new_encoding == src->encoding)
3116         return src;
3117 
3118     return new_encoding->to_encoding(interp, src);
3119 }
3120 
3121 
3122 /*
3123 
3124 =item C<STRING * Parrot_str_compose(PARROT_INTERP, const STRING *src)>
3125 
3126 Normalizes the string.
3127 
3128 =cut
3129 
3130 */
3131 
3132 PARROT_EXPORT
3133 PARROT_WARN_UNUSED_RESULT
3134 PARROT_CANNOT_RETURN_NULL
3135 STRING *
Parrot_str_compose(PARROT_INTERP,ARGIN_NULLOK (const STRING * src))3136 Parrot_str_compose(PARROT_INTERP, ARGIN_NULLOK(const STRING *src))
3137 {
3138     ASSERT_ARGS(Parrot_str_compose)
3139 
3140     if (STRING_IS_NULL(src))
3141         return STRINGNULL;
3142 
3143     if (src->strlen == 0)
3144         return CONST_STRING(interp, "");
3145 
3146     return STRING_compose(interp, src);
3147 }
3148 
3149 
3150 /*
3151 
3152 =item C<STRING* Parrot_str_join(PARROT_INTERP, STRING *j, PMC *ar)>
3153 
3154 Joins the elements of the array C<ar> as strings with the string C<j> between
3155 them, returning the result.
3156 
3157 =cut
3158 
3159 */
3160 
3161 PARROT_EXPORT
3162 PARROT_WARN_UNUSED_RESULT
3163 PARROT_CANNOT_RETURN_NULL
3164 STRING*
Parrot_str_join(PARROT_INTERP,ARGIN_NULLOK (STRING * j),ARGIN (PMC * ar))3165 Parrot_str_join(PARROT_INTERP, ARGIN_NULLOK(STRING *j), ARGIN(PMC *ar))
3166 {
3167     ASSERT_ARGS(Parrot_str_join)
3168 
3169     if (STRING_IS_NULL(j)) {
3170         PMC *sb = Parrot_pmc_new_init(interp, enum_class_StringBuilder, ar);
3171         return VTABLE_get_string(interp, sb);
3172     }
3173     else {
3174         PMC      *sb;
3175         STRING   *first;
3176         const int count = VTABLE_elements(interp, ar);
3177         INTVAL    length, j_length;
3178         int       i;
3179 
3180         if (count == 0)
3181             return Parrot_str_new_noinit(interp, 0);
3182 
3183         first    = VTABLE_get_string_keyed_int(interp, ar, 0);
3184         length   = Parrot_str_byte_length(interp, first);
3185         j_length = Parrot_str_byte_length(interp, j);
3186 
3187         /* it's an approximation, but it doesn't hurt */
3188         sb       = Parrot_pmc_new_init_int(interp, enum_class_StringBuilder,
3189                     (length + j_length) * count);
3190 
3191         VTABLE_push_string(interp, sb, first);
3192 
3193         for (i = 1; i < count; ++i) {
3194             STRING *part = VTABLE_get_string_keyed_int(interp, ar, i);
3195             if (j_length)
3196                 VTABLE_push_string(interp, sb, j);
3197 
3198             if (part->strlen)
3199                 VTABLE_push_string(interp, sb, part);
3200         }
3201 
3202         return VTABLE_get_string(interp, sb);
3203     }
3204 }
3205 
3206 
3207 /*
3208 
3209 =item C<PMC* Parrot_str_split(PARROT_INTERP, const STRING *delim, const STRING
3210 *str)>
3211 
3212 Splits the string C<str> at the delimiter C<delim>, returning a
3213 C<ResizableStringArray>, or his mapped type in the current HLL, of results.
3214 Returns PMCNULL if the string or the delimiter is NULL.
3215 
3216 =cut
3217 
3218 */
3219 
3220 PARROT_EXPORT
3221 PARROT_WARN_UNUSED_RESULT
3222 PARROT_CANNOT_RETURN_NULL
3223 PMC*
Parrot_str_split(PARROT_INTERP,ARGIN_NULLOK (const STRING * delim),ARGIN_NULLOK (const STRING * str))3224 Parrot_str_split(PARROT_INTERP,
3225     ARGIN_NULLOK(const STRING *delim), ARGIN_NULLOK(const STRING *str))
3226 {
3227     ASSERT_ARGS(Parrot_str_split)
3228     PMC     *res;
3229     STRING  *tstr;
3230     UINTVAL  slen, dlen;
3231     String_iter iter;
3232 
3233     if (STRING_IS_NULL(delim) || STRING_IS_NULL(str))
3234         return PMCNULL;
3235 
3236     res  = Parrot_pmc_new(interp,
3237             Parrot_hll_get_ctx_HLL_type(interp, enum_class_ResizableStringArray));
3238     slen = Parrot_str_length(interp, str);
3239 
3240     if (!slen)
3241         return res;
3242 
3243     STRING_ITER_INIT(interp, &iter);
3244     dlen = Parrot_str_length(interp, delim);
3245 
3246     if (dlen == 0) {
3247         VTABLE_set_integer_native(interp, res, slen);
3248 
3249         do {
3250             const String_iter old_iter = iter;
3251 
3252             STRING_iter_skip(interp, str, &iter, 1);
3253             tstr = Parrot_str_iter_substr(interp, str, &old_iter, &iter);
3254             VTABLE_set_string_keyed_int(interp, res, old_iter.charpos, tstr);
3255         } while (iter.charpos < slen);
3256 
3257         return res;
3258     }
3259 
3260     do {
3261         String_iter start, end;
3262 
3263         start = iter;
3264         if (Parrot_str_iter_index(interp, str, &start, &end, delim) < 0)
3265             break;
3266 
3267         tstr = Parrot_str_iter_substr(interp, str, &iter, &start);
3268         VTABLE_push_string(interp, res, tstr);
3269         iter = end;
3270     } while (iter.charpos < slen);
3271 
3272     tstr = Parrot_str_iter_substr(interp, str, &iter, NULL);
3273     VTABLE_push_string(interp, res, tstr);
3274 
3275     return res;
3276 }
3277 
3278 
3279 /*
3280 
3281 =item C<STRING* Parrot_str_from_uint(PARROT_INTERP, char *tc, UHUGEINTVAL num,
3282 unsigned int base, int minus)>
3283 
3284 Returns C<num> converted to a Parrot C<STRING>.
3285 
3286 Note that C<base> must be defined (a default of 10 is not assumed). The caller
3287 has to verify that C<< base >= 2 && base <= 36 >> The buffer C<tc> must be at
3288 least C<sizeof (UHUGEINTVAL)*8 + 1> chars big.
3289 
3290 If C<minus> is true, then C<-> is prepended to the string representation.
3291 
3292 =cut
3293 
3294 */
3295 
3296 PARROT_WARN_UNUSED_RESULT
3297 PARROT_CANNOT_RETURN_NULL
3298 STRING*
Parrot_str_from_uint(PARROT_INTERP,ARGOUT (char * tc),UHUGEINTVAL num,unsigned int base,int minus)3299 Parrot_str_from_uint(PARROT_INTERP, ARGOUT(char *tc), UHUGEINTVAL num,
3300     unsigned int base, int minus)
3301 {
3302     ASSERT_ARGS(Parrot_str_from_uint)
3303 
3304     /* the buffer must be at least as long as this */
3305     char               *p    = tc + sizeof (UHUGEINTVAL)*8 + 1;
3306     const char * const  tail = p;
3307 
3308     PARROT_ASSERT(base >= 2 && base <= 36);
3309 
3310     do {
3311         const char cur = (char)(num % base);
3312 
3313         if (cur < 10)
3314             *--p = (char)('0' + cur);
3315         else
3316             *--p = (char)('a' + cur - 10);
3317 
3318     } while (num /= base);
3319 
3320     if (minus)
3321         *--p = '-';
3322 
3323     return Parrot_str_new_init(interp, p, (UINTVAL)(tail - p),
3324             Parrot_default_encoding_ptr, 0);
3325 }
3326 
3327 
3328 /*
3329 
3330 =item C<STRING * Parrot_str_from_int_base(PARROT_INTERP, char *tc, HUGEINTVAL
3331 num, unsigned int base)>
3332 
3333 Returns C<num> converted to a Parrot C<STRING>.
3334 
3335 Note that C<base> must be defined (a default of 10 is not assumed).
3336 
3337 If C<< num < 0 >>, then C<-> is prepended to the string representation.
3338 
3339 =cut
3340 
3341 */
3342 
3343 PARROT_WARN_UNUSED_RESULT
3344 PARROT_CANNOT_RETURN_NULL
3345 STRING *
Parrot_str_from_int_base(PARROT_INTERP,ARGOUT (char * tc),HUGEINTVAL num,unsigned int base)3346 Parrot_str_from_int_base(PARROT_INTERP, ARGOUT(char *tc), HUGEINTVAL num, unsigned int base)
3347 {
3348     ASSERT_ARGS(Parrot_str_from_int_base)
3349     const int is_neg = (num < 0);
3350 
3351     if (is_neg)
3352         num = -num;
3353 
3354     return Parrot_str_from_uint(interp, tc, (UHUGEINTVAL)num, base, is_neg);
3355 }
3356 
3357 /*
3358 
3359 =back
3360 
3361 =head2 GC registry interface
3362 
3363 =over 4
3364 
3365 =item C<void Parrot_str_gc_register(PARROT_INTERP, STRING *s)>
3366 
3367 Registers the STRING from the interpreter's GC registry to prevent it from
3368 being collected.
3369 
3370 =cut
3371 
3372 */
3373 
3374 
3375 PARROT_EXPORT
3376 void
Parrot_str_gc_register(PARROT_INTERP,ARGIN (STRING * s))3377 Parrot_str_gc_register(PARROT_INTERP, ARGIN(STRING *s))
3378 {
3379     ASSERT_ARGS(Parrot_str_gc_register)
3380     /* Better not trigger a GC run with a potentially unanchored PMC */
3381     Parrot_block_GC_mark(interp);
3382 
3383     PARROT_ASSERT(interp->gc_registry);
3384 
3385     VTABLE_set_pmc_keyed_str(interp, interp->gc_registry, s, PMCNULL);
3386     Parrot_unblock_GC_mark(interp);
3387 }
3388 
3389 /*
3390 
3391 =item C<void Parrot_str_gc_unregister(PARROT_INTERP, STRING *s)>
3392 
3393 Unregisters the STRING from the interpreter's GC registry.
3394 
3395 =cut
3396 
3397 */
3398 
3399 PARROT_EXPORT
3400 void
Parrot_str_gc_unregister(PARROT_INTERP,ARGIN (STRING * s))3401 Parrot_str_gc_unregister(PARROT_INTERP, ARGIN(STRING *s))
3402 {
3403     ASSERT_ARGS(Parrot_str_gc_unregister)
3404     PARROT_ASSERT(interp->gc_registry);
3405 
3406     VTABLE_delete_keyed_str(interp, interp->gc_registry, s);
3407 }
3408 
3409 
3410 /*
3411 
3412 =back
3413 
3414 =head1 SEE ALSO
3415 
3416 =over
3417 
3418 =item F<include/parrot/string.h>
3419 
3420 =item F<include/parrot/string_funcs.h>
3421 
3422 =item F<docs/strings.pod>
3423 
3424 =back
3425 
3426 =cut
3427 
3428 */
3429 
3430 
3431 /*
3432  * Local variables:
3433  *   c-file-style: "parrot"
3434  * End:
3435  * vim: expandtab shiftwidth=4 cinoptions='\:2=2' :
3436  */
3437