1/* 2Copyright (C) 2010-2015, Parrot Foundation. 3 4=head1 NAME 5 6src/pmc/stringbuilder.pmc - StringBuilder PMC 7 8=head1 DESCRIPTION 9 10Helper class to create and manipulate strings, which are outside of 11StringBuilder immutable. 12 13=head2 Methods 14 15=over 4 16 17=cut 18 19*/ 20 21#include "parrot/string_funcs.h" 22 23/* HEADERIZER HFILE: none */ 24/* HEADERIZER BEGIN: static */ 25/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 26 27PARROT_WARN_UNUSED_RESULT 28PARROT_CONST_FUNCTION 29static size_t calculate_capacity(PARROT_INTERP, size_t needed); 30 31static void convert_encoding(PARROT_INTERP, 32 ARGIN(STR_VTABLE *dest_encoding), 33 ARGMOD(STRING *buffer), 34 size_t size_to_add) 35 __attribute__nonnull__(1) 36 __attribute__nonnull__(2) 37 __attribute__nonnull__(3) 38 FUNC_MODIFIES(*buffer); 39 40#define ASSERT_ARGS_calculate_capacity __attribute__unused__ int _ASSERT_ARGS_CHECK = (0) 41#define ASSERT_ARGS_convert_encoding __attribute__unused__ int _ASSERT_ARGS_CHECK = (\ 42 PARROT_ASSERT_ARG(interp) \ 43 , PARROT_ASSERT_ARG(dest_encoding) \ 44 , PARROT_ASSERT_ARG(buffer)) 45/* Don't modify between HEADERIZER BEGIN / HEADERIZER END. Your changes will be lost. */ 46/* HEADERIZER END: static */ 47 48#define INITIAL_STRING_CAPACITY 128 49 50pmclass StringBuilder provides string auto_attrs { 51 ATTR STRING *buffer; /* Mutable string to gather results */ 52 53 54/* 55 56=item C<void init()> 57 58Initializes the StringBuilder. 59 60=cut 61 62*/ 63 64 VTABLE void init() :manual_wb { 65 STATICSELF.init_int(INITIAL_STRING_CAPACITY); 66 } 67 68 69/* 70 71=item C<void init_int()> 72 73Initializes the StringBuilder with initial size of buffer. 74 75=cut 76 77*/ 78 79 VTABLE void init_int(INTVAL initial_size) { 80 STRING * const buffer = Parrot_gc_new_string_header(INTERP, 0); 81 82 if (initial_size < INITIAL_STRING_CAPACITY) 83 initial_size = INITIAL_STRING_CAPACITY; 84 85 Parrot_gc_allocate_string_storage(INTERP, buffer, initial_size); 86 buffer->encoding = Parrot_default_encoding_ptr; 87 88 SET_ATTR_buffer(INTERP, SELF, buffer); 89 90 PObj_custom_mark_SET(SELF); 91 } 92 93 94/* 95 96=item C<void init_pmc()> 97 98Initializes the StringBuilder with an array of STRINGs. 99 100=cut 101 102*/ 103 104 VTABLE void init_pmc(PMC *ar) :manual_wb { 105 const INTVAL count = VTABLE_elements(INTERP, ar); 106 107 if (!count) 108 STATICSELF.init_int(INITIAL_STRING_CAPACITY); 109 else { 110 STRING * const first = VTABLE_get_string_keyed_int(INTERP, ar, 0); 111 const INTVAL size = Parrot_str_byte_length(INTERP, first); 112 INTVAL i; 113 114 /* it's just an estimate, but estimates help */ 115 STATICSELF.init_int(size * count); 116 SELF.push_string(first); 117 118 for (i = 1; i < count; ++i) 119 SELF.push_string(VTABLE_get_string_keyed_int(INTERP, ar, i)); 120 } 121 } 122 123/* 124 125=item C<void mark()> 126 127Mark the buffer. 128 129=cut 130 131*/ 132 133 VTABLE void mark() :no_wb { 134 if (PMC_data(SELF)) { 135 STRING *buffer; 136 GET_ATTR_buffer(INTERP, SELF, buffer); 137 Parrot_gc_mark_STRING_alive(INTERP, buffer); 138 } 139 } 140 141/* 142 143=item C<STRING *get_string()> 144 145Returns created string. 146 147=cut 148 149*/ 150 151 VTABLE STRING *get_string() :no_wb { 152 STRING *buffer; 153 GET_ATTR_buffer(INTERP, SELF, buffer); 154 /* We need to build a new string because outside of StringBuilder 155 * strings are immutable. */ 156 return Parrot_str_clone(INTERP, buffer); 157 } 158 159/* 160 161=item C<STRING *push_string()> 162 163Append string to current buffer. 164 165=cut 166 167*/ 168 169 VTABLE void push_string(STRING *s) { 170 STRING *buffer; 171 size_t total_size; 172 173 /* Early return on NULL strings */ 174 if (STRING_IS_NULL(s) || s->strlen == 0) 175 return; 176 177 GET_ATTR_buffer(INTERP, SELF, buffer); 178 179 if (buffer->bufused == 0) { 180 /* Always copy the encoding of the first string. The IO functions 181 assume that the concatenation of utf8 strings doesn't change 182 the encoding. */ 183 buffer->encoding = s->encoding; 184 } 185 else { 186 const STR_VTABLE * const enc = 187 buffer->encoding == s->encoding 188 ? buffer->encoding 189 : Parrot_str_rep_compatible(interp, buffer, s); 190 191 if (enc) { 192 buffer->encoding = enc; 193 } 194 else { 195 /* If strings are incompatible - convert them to utf8 */ 196 197 if (s->encoding != Parrot_utf8_encoding_ptr) 198 s = Parrot_utf8_encoding_ptr->to_encoding(interp, s); 199 200 if (buffer->encoding != Parrot_utf8_encoding_ptr) 201 convert_encoding(INTERP, Parrot_utf8_encoding_ptr, buffer, s->bufused); 202 } 203 } 204 205 total_size = buffer->bufused + s->bufused; 206 207 /* Reallocate if necessary */ 208 if (total_size > buffer->_buflen) { 209 /* Calculate (possibly new) total size */ 210 total_size = calculate_capacity(INTERP, total_size); 211 212 Parrot_gc_reallocate_string_storage(INTERP, buffer, total_size); 213 buffer->_buflen = total_size; 214 } 215 216 /* Tack s on the end of buffer */ 217 memcpy((char *)buffer->_bufstart + buffer->bufused, 218 s->strstart, s->bufused); 219 220 /* Update buffer */ 221 buffer->bufused += s->bufused; 222 buffer->strstart = (char *)buffer->_bufstart; 223 buffer->strlen += s->strlen; 224 buffer->hashval = 0; /* hash is invalid */ 225 226 PARROT_ASSERT(buffer->bufused <= Buffer_buflen(buffer)); 227 } 228 229 VTABLE void push_integer(INTVAL value) { 230 STRING * s = PARROT_STRINGBUILDER(SELF)->buffer; 231 String_iter iter; 232 size_t total_size; 233 234 if (s->encoding != Parrot_utf8_encoding_ptr && value > 0x7F) { 235 if (s->strlen == 0) 236 s->encoding = Parrot_utf8_encoding_ptr; 237 else 238 convert_encoding(INTERP, Parrot_utf8_encoding_ptr, s, sizeof (INTVAL)); 239 } 240 241 total_size = s->bufused + sizeof (INTVAL); 242 if (total_size > s->_buflen) { 243 total_size = calculate_capacity(INTERP, total_size); 244 Parrot_gc_reallocate_string_storage(INTERP, s, total_size); 245 } 246 247 STRING_ITER_INIT(INTERP, &iter); 248 iter.charpos = s->strlen; 249 iter.bytepos = s->bufused; 250 STRING_iter_set_and_advance(INTERP, s, &iter, value); 251 s->strlen = iter.charpos; 252 s->bufused = iter.bytepos; 253 } 254 255/* 256 257=item C<VTABLE i_concatenate_str()> 258 259=item C<VTABLE i_concatenate()> 260 261Append string. Synonym for push_string 262 263=cut 264 265*/ 266 267 VTABLE void i_concatenate_str(STRING *s) :manual_wb { 268 STATICSELF.push_string(s); 269 } 270 271 VTABLE void i_concatenate(PMC *p) :manual_wb { 272 if (p->vtable->base_type == enum_class_StringBuilder) { 273 STRING *buffer; 274 GET_ATTR_buffer(INTERP, p, buffer); 275 STATICSELF.push_string(buffer); 276 } 277 else { 278 STATICSELF.push_string(VTABLE_get_string(INTERP, p)); 279 } 280 } 281 282 VTABLE void push_pmc(PMC *p) :manual_wb { 283 if (p->vtable->base_type == enum_class_StringBuilder) { 284 STRING *buffer; 285 GET_ATTR_buffer(INTERP, p, buffer); 286 STATICSELF.push_string(buffer); 287 } 288 else { 289 STATICSELF.push_string(VTABLE_get_string(INTERP, p)); 290 } 291 } 292 293/* 294 295=item C<VTABLE set_string_native(STRING)> 296 297=item C<VTABLE set_pmc(PMC)> 298 299Set content of buffer to passed string or PMC 300 301=cut 302 303*/ 304 VTABLE void set_string_native(STRING *s) { 305 STRING * buffer; 306 307 /* Calculate (possibly new) total size */ 308 const size_t total_size = calculate_capacity(INTERP, s->bufused); 309 310 GET_ATTR_buffer(INTERP, SELF, buffer); 311 312 /* Reallocate if necessary */ 313 if (total_size > Buffer_buflen(buffer)) { 314 Parrot_gc_reallocate_string_storage(INTERP, buffer, total_size); 315 buffer->strstart = (char*)buffer->_bufstart; 316 } 317 318 /* Tack s on the buffer */ 319 memcpy((void *)((char*)buffer->_bufstart), 320 s->strstart, s->bufused); 321 322 /* Update buffer */ 323 buffer->bufused = s->bufused; 324 buffer->strlen = Parrot_str_length(INTERP, s); 325 buffer->encoding = s->encoding; 326 } 327 328 VTABLE void set_pmc(PMC *s) :manual_wb { 329 if (s->vtable->base_type == enum_class_StringBuilder) { 330 STRING *buffer; 331 GET_ATTR_buffer(INTERP, s, buffer); 332 STATICSELF.push_string(buffer); 333 } 334 else { 335 STATICSELF.set_string_native(VTABLE_get_string(INTERP, s)); 336 } 337 } 338 339 340/* 341 342=item C<VTABLE get_integer()> 343 344Returns current capacity of allocated buffer. 345 346For testing purpose only? 347 348=cut 349 350*/ 351 352 INTVAL get_integer() :no_wb { 353 STRING *buffer; 354 GET_ATTR_buffer(INTERP, SELF, buffer); 355 return Buffer_buflen(buffer); 356 } 357 358/* 359 360=item C<VTABLE substr()> 361 362Returns a copied substring of the STRING. 363 364=cut 365 366*/ 367 368 VTABLE STRING *substr(INTVAL offset, INTVAL length) :no_wb { 369 STRING *buffer; 370 GET_ATTR_buffer(INTERP, SELF, buffer); 371 /* STRING_substr already creates a copy for us, GH #1123 */ 372 return STRING_substr(INTERP, buffer, offset, length); 373 } 374 375/* 376 377=item C<append_format(string fmt [, pmc args ] [, pmc hash ])> 378 379Add a line to a C<StringBuilder> object according to C<fmt>. 380The C<fmt> string can contain any number of "%-replacements" 381which are replaced by the corresponding values from C<args> 382or C<hash> prior to being appended to the string. (Here 383C<args> is a slurpy array, and C<hash> is a slurpy hash.) 384 385The currently defined replacements include: 386 387 %0 %1 ... %9 the value from the args array at index 0..9 388 %, the values of the args array separated by commas 389 %% a percent sign 390 391A percent-sign followed by any other character that is a hash 392key receives the value of the hash element. 393 394=cut 395 396*/ 397 398 METHOD append_format(STRING *fmt, PMC *args :slurpy, PMC *hash :slurpy :named) { 399 STRING * const percent = CONST_STRING(INTERP, "%"); 400 STRING * const comma = CONST_STRING(INTERP, ","); 401 STRING * const comma_space = CONST_STRING(INTERP, ", "); 402 PMC *stringbuilder = SELF; 403 INTVAL pos = 0; 404 405 /* Loop over the format string, splitting it into chunks 406 * for the string builder. */ 407 while (pos >= 0) { 408 /* Find the next % */ 409 const INTVAL percentPos = STRING_index(INTERP, fmt, percent, pos); 410 STRING *key; 411 412 if (percentPos < 0) { 413 if (pos == 0) { 414 VTABLE_push_string(INTERP, stringbuilder, fmt); 415 } 416 else { 417 /* remaining string can be added as is. */ 418 VTABLE_push_string(INTERP, stringbuilder, 419 STRING_substr(INTERP, fmt, pos, 420 Parrot_str_length(INTERP, fmt) -pos)); 421 } 422 break; 423 } 424 else { 425 /* slurp up to just before the % sign... */ 426 VTABLE_push_string(INTERP, stringbuilder, 427 STRING_substr(INTERP, fmt, pos, percentPos - pos)); 428 /* skip the % sign */ 429 pos = percentPos + 1 ; 430 } 431 432 /* key is always a single character */ 433 key = STRING_substr(INTERP, fmt, pos++, 1); 434 435 if (VTABLE_exists_keyed_str(INTERP, hash, key)) { 436 VTABLE_push_string(INTERP, stringbuilder, 437 VTABLE_get_string_keyed_str(INTERP, hash, key)); 438 } 439 else if (Parrot_str_is_cclass(INTERP, enum_cclass_numeric, key, 0)) { 440 VTABLE_push_string(INTERP, stringbuilder, 441 VTABLE_get_string_keyed_int(INTERP, args, 442 Parrot_str_to_int(INTERP, key))); 443 } 444 else if (STRING_equal(INTERP, key, comma)) { 445 INTVAL num_args = VTABLE_elements(INTERP, args); 446 INTVAL pos_args; 447 448 for (pos_args = 0; pos_args < num_args; ++pos_args) { 449 if (pos_args > 0) 450 VTABLE_push_string(INTERP, stringbuilder, comma_space); 451 VTABLE_push_string(INTERP, stringbuilder, 452 VTABLE_get_string_keyed_int(INTERP, args, pos_args)); 453 } 454 } 455 else if (STRING_equal(INTERP, key, percent)) { 456 VTABLE_push_string(INTERP, stringbuilder, percent); 457 } 458 else { 459 /* %foo has no special meaning, pass it through unchanged */ 460 VTABLE_push_string(INTERP, stringbuilder, 461 STRING_substr(INTERP, fmt, pos-2, 2)); 462 } 463 } 464 465 RETURN(PMC *SELF); 466 } 467 468/* 469 470=item C<INTVAL get_string_length()> 471 472Returns length of currently built string. 473 474=cut 475 476*/ 477 478 METHOD get_string_length() :no_wb { 479 STRING *buffer; 480 INTVAL length; 481 GET_ATTR_buffer(INTERP, SELF, buffer); 482 length = Parrot_str_length(INTERP, buffer); 483 RETURN(INTVAL length); 484 } 485 486 487/* 488 489=back 490 491=cut 492 493*/ 494 495} 496 497 498/* 499 500=head2 Helper functions. 501 502=over 4 503 504=cut 505 506*/ 507 508/* 509 510=item C<static size_t calculate_capacity(PARROT_INTERP, size_t needed)> 511 512Calculate capacity for string. We overallocate by 2 for smaller buffers 513resp. onto the next block size. 514 515=cut 516 517*/ 518 519PARROT_WARN_UNUSED_RESULT 520PARROT_CONST_FUNCTION 521static size_t 522calculate_capacity(SHIM_INTERP, size_t needed) 523{ 524 ASSERT_ARGS(calculate_capacity) 525 526 if (needed < 8192) { 527#if 1 528 needed += needed/2; /* overallocate by 1.5 */ 529#else 530 needed *= 2; /* overallocate by 2 */ 531#endif 532 needed = (needed + 15) & ~15; /* round up to 16 */ 533 } 534 else { 535 needed &= ~0xfff; 536 needed += 4096; /* next block */ 537 } 538 539 return needed; 540} 541 542/* 543 544=item C<static void convert_encoding(PARROT_INTERP, STR_VTABLE *dest_encoding, 545STRING *buffer, size_t size_to_add)> 546 547Convert buffer content to the encoding specified and increase its size, 548reallocating it if needed. 549 550=back 551 552=cut 553 554*/ 555 556static void 557convert_encoding(PARROT_INTERP, ARGIN(STR_VTABLE *dest_encoding), 558 ARGMOD(STRING *buffer), size_t size_to_add) 559{ 560 ASSERT_ARGS(convert_encoding) 561 STRING * new_buffer; 562 size_t total_size; 563 564 new_buffer = dest_encoding->to_encoding(interp, buffer); 565 total_size = new_buffer->bufused + size_to_add; 566 567 if (total_size > buffer->_buflen) { 568 /* Reallocate */ 569 total_size = calculate_capacity(interp, total_size); 570 Parrot_gc_reallocate_string_storage(interp, buffer, total_size); 571 } 572 buffer->bufused = new_buffer->bufused; 573 buffer->encoding = new_buffer->encoding; 574 575 memcpy(buffer->strstart, new_buffer->strstart, 576 new_buffer->bufused); 577} 578 579/* 580 * Local variables: 581 * c-file-style: "parrot" 582 * End: 583 * vim: expandtab shiftwidth=4 cinoptions='\:2=2' : 584 */ 585