1/*
2Copyright (C) 2010-2015, Parrot Foundation.
3
4=head1 NAME
5
6src/pmc/stringbuilder.pmc - StringBuilder PMC
7
8=head1 DESCRIPTION
9
10Helper class to create and manipulate strings, which are outside of
11StringBuilder immutable.
12
13=head2 Methods
14
15=over 4
16
17=cut
18
19*/
20
21#include "parrot/string_funcs.h"
22
23/* HEADERIZER HFILE: none */
24/* HEADERIZER BEGIN: static */
25/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
26
27PARROT_WARN_UNUSED_RESULT
28PARROT_CONST_FUNCTION
29static size_t calculate_capacity(PARROT_INTERP, size_t needed);
30
31static void convert_encoding(PARROT_INTERP,
32    ARGIN(STR_VTABLE *dest_encoding),
33    ARGMOD(STRING *buffer),
34    size_t size_to_add)
35        __attribute__nonnull__(1)
36        __attribute__nonnull__(2)
37        __attribute__nonnull__(3)
38        FUNC_MODIFIES(*buffer);
39
40#define ASSERT_ARGS_calculate_capacity __attribute__unused__ int _ASSERT_ARGS_CHECK = (0)
41#define ASSERT_ARGS_convert_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\
42       PARROT_ASSERT_ARG(interp) \
43    , PARROT_ASSERT_ARG(dest_encoding) \
44    , PARROT_ASSERT_ARG(buffer))
45/* Don't modify between HEADERIZER BEGIN / HEADERIZER END.  Your changes will be lost. */
46/* HEADERIZER END: static */
47
48#define INITIAL_STRING_CAPACITY 128
49
50pmclass StringBuilder provides string auto_attrs {
51    ATTR STRING *buffer;    /* Mutable string to gather results */
52
53
54/*
55
56=item C<void init()>
57
58Initializes the StringBuilder.
59
60=cut
61
62*/
63
64    VTABLE void init() :manual_wb {
65        STATICSELF.init_int(INITIAL_STRING_CAPACITY);
66    }
67
68
69/*
70
71=item C<void init_int()>
72
73Initializes the StringBuilder with initial size of buffer.
74
75=cut
76
77*/
78
79    VTABLE void init_int(INTVAL initial_size) {
80        STRING * const buffer = Parrot_gc_new_string_header(INTERP, 0);
81
82        if (initial_size < INITIAL_STRING_CAPACITY)
83            initial_size = INITIAL_STRING_CAPACITY;
84
85        Parrot_gc_allocate_string_storage(INTERP, buffer, initial_size);
86        buffer->encoding = Parrot_default_encoding_ptr;
87
88        SET_ATTR_buffer(INTERP, SELF, buffer);
89
90        PObj_custom_mark_SET(SELF);
91    }
92
93
94/*
95
96=item C<void init_pmc()>
97
98Initializes the StringBuilder with an array of STRINGs.
99
100=cut
101
102*/
103
104    VTABLE void init_pmc(PMC *ar) :manual_wb {
105        const INTVAL count = VTABLE_elements(INTERP, ar);
106
107        if (!count)
108            STATICSELF.init_int(INITIAL_STRING_CAPACITY);
109        else {
110            STRING * const first = VTABLE_get_string_keyed_int(INTERP, ar, 0);
111            const INTVAL   size  = Parrot_str_byte_length(INTERP, first);
112            INTVAL         i;
113
114            /* it's just an estimate, but estimates help */
115            STATICSELF.init_int(size * count);
116            SELF.push_string(first);
117
118            for (i = 1; i < count; ++i)
119                SELF.push_string(VTABLE_get_string_keyed_int(INTERP, ar, i));
120        }
121    }
122
123/*
124
125=item C<void mark()>
126
127Mark the buffer.
128
129=cut
130
131*/
132
133    VTABLE void mark() :no_wb {
134        if (PMC_data(SELF)) {
135            STRING *buffer;
136            GET_ATTR_buffer(INTERP, SELF, buffer);
137            Parrot_gc_mark_STRING_alive(INTERP, buffer);
138        }
139    }
140
141/*
142
143=item C<STRING *get_string()>
144
145Returns created string.
146
147=cut
148
149*/
150
151    VTABLE STRING *get_string() :no_wb {
152        STRING *buffer;
153        GET_ATTR_buffer(INTERP, SELF, buffer);
154        /* We need to build a new string because outside of StringBuilder
155         * strings are immutable. */
156        return Parrot_str_clone(INTERP, buffer);
157    }
158
159/*
160
161=item C<STRING *push_string()>
162
163Append string to current buffer.
164
165=cut
166
167*/
168
169    VTABLE void push_string(STRING *s) {
170        STRING           *buffer;
171        size_t            total_size;
172
173        /* Early return on NULL strings */
174        if (STRING_IS_NULL(s) || s->strlen == 0)
175            return;
176
177        GET_ATTR_buffer(INTERP, SELF, buffer);
178
179        if (buffer->bufused == 0) {
180            /* Always copy the encoding of the first string. The IO functions
181               assume that the concatenation of utf8 strings doesn't change
182               the encoding. */
183            buffer->encoding = s->encoding;
184        }
185        else {
186            const STR_VTABLE * const enc =
187                  buffer->encoding == s->encoding
188                ? buffer->encoding
189                : Parrot_str_rep_compatible(interp, buffer, s);
190
191            if (enc) {
192                buffer->encoding = enc;
193            }
194            else {
195                /* If strings are incompatible - convert them to utf8 */
196
197                if (s->encoding != Parrot_utf8_encoding_ptr)
198                    s = Parrot_utf8_encoding_ptr->to_encoding(interp, s);
199
200                if (buffer->encoding != Parrot_utf8_encoding_ptr)
201                    convert_encoding(INTERP, Parrot_utf8_encoding_ptr, buffer, s->bufused);
202            }
203        }
204
205        total_size = buffer->bufused + s->bufused;
206
207        /* Reallocate if necessary */
208        if (total_size > buffer->_buflen) {
209            /* Calculate (possibly new) total size */
210            total_size = calculate_capacity(INTERP, total_size);
211
212            Parrot_gc_reallocate_string_storage(INTERP, buffer, total_size);
213            buffer->_buflen = total_size;
214        }
215
216        /* Tack s on the end of buffer */
217        memcpy((char *)buffer->_bufstart + buffer->bufused,
218                s->strstart, s->bufused);
219
220        /* Update buffer */
221        buffer->bufused += s->bufused;
222        buffer->strstart = (char *)buffer->_bufstart;
223        buffer->strlen  += s->strlen;
224        buffer->hashval  = 0; /* hash is invalid */
225
226        PARROT_ASSERT(buffer->bufused <= Buffer_buflen(buffer));
227    }
228
229    VTABLE void push_integer(INTVAL value) {
230        STRING * s = PARROT_STRINGBUILDER(SELF)->buffer;
231        String_iter iter;
232        size_t total_size;
233
234        if (s->encoding != Parrot_utf8_encoding_ptr && value > 0x7F) {
235            if (s->strlen == 0)
236                s->encoding = Parrot_utf8_encoding_ptr;
237            else
238                convert_encoding(INTERP, Parrot_utf8_encoding_ptr, s, sizeof (INTVAL));
239        }
240
241        total_size = s->bufused + sizeof (INTVAL);
242        if (total_size > s->_buflen) {
243            total_size = calculate_capacity(INTERP, total_size);
244            Parrot_gc_reallocate_string_storage(INTERP, s, total_size);
245        }
246
247        STRING_ITER_INIT(INTERP, &iter);
248        iter.charpos = s->strlen;
249        iter.bytepos = s->bufused;
250        STRING_iter_set_and_advance(INTERP, s, &iter, value);
251        s->strlen = iter.charpos;
252        s->bufused = iter.bytepos;
253    }
254
255/*
256
257=item C<VTABLE i_concatenate_str()>
258
259=item C<VTABLE i_concatenate()>
260
261Append string. Synonym for push_string
262
263=cut
264
265*/
266
267    VTABLE void i_concatenate_str(STRING *s) :manual_wb {
268        STATICSELF.push_string(s);
269    }
270
271    VTABLE void i_concatenate(PMC *p) :manual_wb {
272        if (p->vtable->base_type == enum_class_StringBuilder) {
273            STRING *buffer;
274            GET_ATTR_buffer(INTERP, p, buffer);
275            STATICSELF.push_string(buffer);
276        }
277        else {
278            STATICSELF.push_string(VTABLE_get_string(INTERP, p));
279        }
280    }
281
282    VTABLE void push_pmc(PMC *p) :manual_wb {
283        if (p->vtable->base_type == enum_class_StringBuilder) {
284            STRING *buffer;
285            GET_ATTR_buffer(INTERP, p, buffer);
286            STATICSELF.push_string(buffer);
287        }
288        else {
289            STATICSELF.push_string(VTABLE_get_string(INTERP, p));
290        }
291    }
292
293/*
294
295=item C<VTABLE set_string_native(STRING)>
296
297=item C<VTABLE set_pmc(PMC)>
298
299Set content of buffer to passed string or PMC
300
301=cut
302
303*/
304    VTABLE void set_string_native(STRING *s) {
305        STRING * buffer;
306
307        /* Calculate (possibly new) total size */
308        const size_t total_size = calculate_capacity(INTERP, s->bufused);
309
310        GET_ATTR_buffer(INTERP, SELF, buffer);
311
312        /* Reallocate if necessary */
313        if (total_size > Buffer_buflen(buffer)) {
314            Parrot_gc_reallocate_string_storage(INTERP, buffer, total_size);
315            buffer->strstart = (char*)buffer->_bufstart;
316        }
317
318        /* Tack s on the buffer */
319        memcpy((void *)((char*)buffer->_bufstart),
320                s->strstart, s->bufused);
321
322        /* Update buffer */
323        buffer->bufused  = s->bufused;
324        buffer->strlen   = Parrot_str_length(INTERP, s);
325        buffer->encoding = s->encoding;
326    }
327
328    VTABLE void set_pmc(PMC *s) :manual_wb {
329        if (s->vtable->base_type == enum_class_StringBuilder) {
330            STRING *buffer;
331            GET_ATTR_buffer(INTERP, s, buffer);
332            STATICSELF.push_string(buffer);
333        }
334        else {
335            STATICSELF.set_string_native(VTABLE_get_string(INTERP, s));
336        }
337    }
338
339
340/*
341
342=item C<VTABLE get_integer()>
343
344Returns current capacity of allocated buffer.
345
346For testing purpose only?
347
348=cut
349
350*/
351
352    INTVAL get_integer() :no_wb {
353        STRING *buffer;
354        GET_ATTR_buffer(INTERP, SELF, buffer);
355        return Buffer_buflen(buffer);
356    }
357
358/*
359
360=item C<VTABLE substr()>
361
362Returns a copied substring of the STRING.
363
364=cut
365
366*/
367
368    VTABLE STRING *substr(INTVAL offset, INTVAL length) :no_wb {
369        STRING *buffer;
370        GET_ATTR_buffer(INTERP, SELF, buffer);
371        /* STRING_substr already creates a copy for us, GH #1123 */
372        return STRING_substr(INTERP, buffer, offset, length);
373    }
374
375/*
376
377=item C<append_format(string fmt [, pmc args ] [, pmc hash ])>
378
379Add a line to a C<StringBuilder> object according to C<fmt>.
380The C<fmt> string can contain any number of "%-replacements"
381which are replaced by the corresponding values from C<args>
382or C<hash> prior to being appended to the string.  (Here
383C<args> is a slurpy array, and C<hash> is a slurpy hash.)
384
385The currently defined replacements include:
386
387    %0 %1 ... %9     the value from the args array at index 0..9
388    %,               the values of the args array separated by commas
389    %%               a percent sign
390
391A percent-sign followed by any other character that is a hash
392key receives the value of the hash element.
393
394=cut
395
396*/
397
398    METHOD append_format(STRING *fmt, PMC *args :slurpy, PMC *hash :slurpy :named) {
399        STRING * const percent     = CONST_STRING(INTERP, "%");
400        STRING * const comma       = CONST_STRING(INTERP, ",");
401        STRING * const comma_space = CONST_STRING(INTERP, ", ");
402        PMC    *stringbuilder = SELF;
403        INTVAL pos = 0;
404
405        /* Loop over the format string, splitting it into chunks
406         * for the string builder. */
407        while (pos >= 0) {
408            /* Find the next % */
409            const INTVAL percentPos = STRING_index(INTERP, fmt, percent, pos);
410            STRING *key;
411
412            if (percentPos < 0) {
413                if (pos == 0) {
414                    VTABLE_push_string(INTERP, stringbuilder, fmt);
415                }
416                else {
417                    /* remaining string can be added as is. */
418                    VTABLE_push_string(INTERP, stringbuilder,
419                        STRING_substr(INTERP, fmt, pos,
420                            Parrot_str_length(INTERP, fmt) -pos));
421                    }
422                break;
423            }
424            else {
425                /* slurp up to just before the % sign... */
426                VTABLE_push_string(INTERP, stringbuilder,
427                    STRING_substr(INTERP, fmt, pos, percentPos - pos));
428                /* skip the % sign */
429                pos = percentPos + 1 ;
430            }
431
432            /* key is always a single character */
433            key = STRING_substr(INTERP, fmt, pos++, 1);
434
435            if (VTABLE_exists_keyed_str(INTERP, hash, key)) {
436                VTABLE_push_string(INTERP, stringbuilder,
437                        VTABLE_get_string_keyed_str(INTERP, hash, key));
438            }
439            else if (Parrot_str_is_cclass(INTERP, enum_cclass_numeric, key, 0)) {
440                VTABLE_push_string(INTERP, stringbuilder,
441                    VTABLE_get_string_keyed_int(INTERP, args,
442                        Parrot_str_to_int(INTERP, key)));
443            }
444            else if (STRING_equal(INTERP, key, comma)) {
445                INTVAL num_args = VTABLE_elements(INTERP, args);
446                INTVAL pos_args;
447
448                for (pos_args = 0; pos_args < num_args; ++pos_args) {
449                    if (pos_args > 0)
450                        VTABLE_push_string(INTERP, stringbuilder, comma_space);
451                    VTABLE_push_string(INTERP, stringbuilder,
452                        VTABLE_get_string_keyed_int(INTERP, args, pos_args));
453                }
454            }
455            else if (STRING_equal(INTERP, key, percent)) {
456                VTABLE_push_string(INTERP, stringbuilder, percent);
457            }
458            else {
459                /* %foo has no special meaning, pass it through unchanged */
460                VTABLE_push_string(INTERP, stringbuilder,
461                    STRING_substr(INTERP, fmt, pos-2, 2));
462            }
463        }
464
465        RETURN(PMC *SELF);
466    }
467
468/*
469
470=item C<INTVAL get_string_length()>
471
472Returns length of currently built string.
473
474=cut
475
476*/
477
478    METHOD get_string_length() :no_wb {
479        STRING *buffer;
480        INTVAL  length;
481        GET_ATTR_buffer(INTERP, SELF, buffer);
482        length = Parrot_str_length(INTERP, buffer);
483        RETURN(INTVAL length);
484    }
485
486
487/*
488
489=back
490
491=cut
492
493*/
494
495}
496
497
498/*
499
500=head2 Helper functions.
501
502=over 4
503
504=cut
505
506*/
507
508/*
509
510=item C<static size_t calculate_capacity(PARROT_INTERP, size_t needed)>
511
512Calculate capacity for string. We overallocate by 2 for smaller buffers
513resp. onto the next block size.
514
515=cut
516
517*/
518
519PARROT_WARN_UNUSED_RESULT
520PARROT_CONST_FUNCTION
521static size_t
522calculate_capacity(SHIM_INTERP, size_t needed)
523{
524    ASSERT_ARGS(calculate_capacity)
525
526    if (needed < 8192) {
527#if 1
528        needed += needed/2;           /* overallocate by 1.5 */
529#else
530        needed *= 2;                  /* overallocate by 2 */
531#endif
532        needed = (needed + 15) & ~15; /* round up to 16 */
533    }
534    else {
535        needed &= ~0xfff;
536        needed += 4096;               /* next block */
537    }
538
539    return needed;
540}
541
542/*
543
544=item C<static void convert_encoding(PARROT_INTERP, STR_VTABLE *dest_encoding,
545STRING *buffer, size_t size_to_add)>
546
547Convert buffer content to the encoding specified and increase its size,
548reallocating it if needed.
549
550=back
551
552=cut
553
554*/
555
556static void
557convert_encoding(PARROT_INTERP, ARGIN(STR_VTABLE *dest_encoding),
558        ARGMOD(STRING *buffer), size_t size_to_add)
559{
560    ASSERT_ARGS(convert_encoding)
561    STRING * new_buffer;
562    size_t total_size;
563
564    new_buffer = dest_encoding->to_encoding(interp, buffer);
565    total_size = new_buffer->bufused + size_to_add;
566
567    if (total_size > buffer->_buflen) {
568        /* Reallocate */
569        total_size = calculate_capacity(interp, total_size);
570        Parrot_gc_reallocate_string_storage(interp, buffer, total_size);
571    }
572    buffer->bufused  = new_buffer->bufused;
573    buffer->encoding = new_buffer->encoding;
574
575    memcpy(buffer->strstart, new_buffer->strstart,
576            new_buffer->bufused);
577}
578
579/*
580 * Local variables:
581 *   c-file-style: "parrot"
582 * End:
583 * vim: expandtab shiftwidth=4 cinoptions='\:2=2' :
584 */
585