1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'So that was the job I felt I had to do when I started,' thought Sam. 13 * 14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"] 15 */ 16 17 /* This file contains some common functions needed to carry out certain 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function 19 * do_sprintf() found in this file. 20 */ 21 22 #include "EXTERN.h" 23 #define PERL_IN_DOOP_C 24 #include "perl.h" 25 #include "invlist_inline.h" 26 27 #ifndef PERL_MICRO 28 #include <signal.h> 29 #endif 30 31 32 /* Helper function for do_trans(). 33 * Handles cases where the search and replacement charlists aren't UTF-8, 34 * aren't identical, and neither the /d nor /s flag is present. 35 * 36 * sv may or may not be utf8. Note that no code point above 255 can possibly 37 * be in the to-translate set 38 */ 39 40 STATIC Size_t 41 S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl) 42 { 43 Size_t matches = 0; 44 STRLEN len; 45 U8 *s = (U8*)SvPV_nomg(sv,len); 46 U8 * const send = s+len; 47 48 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; 49 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_simple:" 50 " input sv:\n", 51 __FILE__, __LINE__)); 52 DEBUG_y(sv_dump(sv)); 53 54 /* First, take care of non-UTF-8 input strings, because they're easy */ 55 if (!SvUTF8(sv)) { 56 while (s < send) { 57 const short ch = tbl->map[*s]; 58 if (ch >= 0) { 59 matches++; 60 *s = (U8)ch; 61 } 62 s++; 63 } 64 SvSETMAGIC(sv); 65 } 66 else { 67 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); 68 U8 *d; 69 U8 *dstart; 70 71 /* Allow for worst-case expansion: Each input byte can become 2. For a 72 * given input character, this happens when it occupies a single byte 73 * under UTF-8, but is to be translated to something that occupies two: 74 * $_="a".chr(400); tr/a/\xFE/, FE needs encoding. */ 75 if (grows) 76 Newx(d, len*2+1, U8); 77 else 78 d = s; 79 dstart = d; 80 while (s < send) { 81 STRLEN ulen; 82 short ch; 83 84 /* Need to check this, otherwise 128..255 won't match */ 85 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 86 if (c < 0x100 && (ch = tbl->map[c]) >= 0) { 87 matches++; 88 d = uvchr_to_utf8(d, (UV)ch); 89 s += ulen; 90 } 91 else { /* No match -> copy */ 92 Move(s, d, ulen, U8); 93 d += ulen; 94 s += ulen; 95 } 96 } 97 if (grows) { 98 sv_setpvn(sv, (char*)dstart, d - dstart); 99 Safefree(dstart); 100 } 101 else { 102 *d = '\0'; 103 SvCUR_set(sv, d - dstart); 104 } 105 SvUTF8_on(sv); 106 SvSETMAGIC(sv); 107 } 108 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 109 __FILE__, __LINE__, matches)); 110 DEBUG_y(sv_dump(sv)); 111 return matches; 112 } 113 114 115 /* Helper function for do_trans(). 116 * Handles cases where the search and replacement charlists are identical and 117 * non-utf8: so the string isn't modified, and only a count of modifiable 118 * chars is needed. 119 * 120 * Note that it doesn't handle /d or /s, since these modify the string even if 121 * the replacement list is empty. 122 * 123 * sv may or may not be utf8. Note that no code point above 255 can possibly 124 * be in the to-translate set 125 */ 126 127 STATIC Size_t 128 S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl) 129 { 130 STRLEN len; 131 const U8 *s = (const U8*)SvPV_nomg_const(sv, len); 132 const U8 * const send = s + len; 133 Size_t matches = 0; 134 135 PERL_ARGS_ASSERT_DO_TRANS_COUNT; 136 137 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_count:" 138 " input sv:\n", 139 __FILE__, __LINE__)); 140 DEBUG_y(sv_dump(sv)); 141 142 if (!SvUTF8(sv)) { 143 while (s < send) { 144 if (tbl->map[*s++] >= 0) 145 matches++; 146 } 147 } 148 else { 149 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); 150 while (s < send) { 151 STRLEN ulen; 152 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 153 if (c < 0x100) { 154 if (tbl->map[c] >= 0) 155 matches++; 156 } else if (complement) 157 matches++; 158 s += ulen; 159 } 160 } 161 162 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: count returning %zu\n", 163 __FILE__, __LINE__, matches)); 164 return matches; 165 } 166 167 168 /* Helper function for do_trans(). 169 * Handles cases where the search and replacement charlists aren't identical 170 * and both are non-utf8, and one or both of /d, /s is specified. 171 * 172 * sv may or may not be utf8. Note that no code point above 255 can possibly 173 * be in the to-translate set 174 */ 175 176 STATIC Size_t 177 S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl) 178 { 179 STRLEN len; 180 U8 *s = (U8*)SvPV_nomg(sv, len); 181 U8 * const send = s+len; 182 Size_t matches = 0; 183 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); 184 185 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; 186 187 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_complex:" 188 " input sv:\n", 189 __FILE__, __LINE__)); 190 DEBUG_y(sv_dump(sv)); 191 192 if (!SvUTF8(sv)) { 193 U8 *d = s; 194 U8 * const dstart = d; 195 196 if (PL_op->op_private & OPpTRANS_SQUASH) { 197 198 /* What the mapping of the previous character was to. If the new 199 * character has the same mapping, it is squashed from the output 200 * (but still is included in the count) */ 201 short previous_map = (short) TR_OOB; 202 203 while (s < send) { 204 const short this_map = tbl->map[*s]; 205 if (this_map >= 0) { 206 matches++; 207 if (this_map != previous_map) { 208 *d++ = (U8)this_map; 209 previous_map = this_map; 210 } 211 } 212 else { 213 if (this_map == (short) TR_UNMAPPED) { 214 *d++ = *s; 215 previous_map = (short) TR_OOB; 216 } 217 else { 218 assert(this_map == (short) TR_DELETE); 219 matches++; 220 } 221 } 222 223 s++; 224 } 225 } 226 else { /* Not to squash */ 227 while (s < send) { 228 const short this_map = tbl->map[*s]; 229 if (this_map >= 0) { 230 matches++; 231 *d++ = (U8)this_map; 232 } 233 else if (this_map == (short) TR_UNMAPPED) 234 *d++ = *s; 235 else if (this_map == (short) TR_DELETE) 236 matches++; 237 s++; 238 } 239 } 240 *d = '\0'; 241 SvCUR_set(sv, d - dstart); 242 } 243 else { /* is utf8 */ 244 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); 245 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); 246 U8 *d; 247 U8 *dstart; 248 Size_t size = tbl->size; 249 250 /* What the mapping of the previous character was to. If the new 251 * character has the same mapping, it is squashed from the output (but 252 * still is included in the count) */ 253 UV pch = TR_OOB; 254 255 if (grows) 256 /* Allow for worst-case expansion: Each input byte can become 2. 257 * For a given input character, this happens when it occupies a 258 * single byte under UTF-8, but is to be translated to something 259 * that occupies two: */ 260 Newx(d, len*2+1, U8); 261 else 262 d = s; 263 dstart = d; 264 265 while (s < send) { 266 STRLEN len; 267 const UV comp = utf8n_to_uvchr(s, send - s, &len, 268 UTF8_ALLOW_DEFAULT); 269 UV ch; 270 short sch; 271 272 sch = (comp < size) 273 ? tbl->map[comp] 274 : (! complement) 275 ? (short) TR_UNMAPPED 276 : tbl->map[size]; 277 278 if (sch >= 0) { 279 ch = (UV)sch; 280 replace: 281 matches++; 282 if (LIKELY(!squash || ch != pch)) { 283 d = uvchr_to_utf8(d, ch); 284 pch = ch; 285 } 286 s += len; 287 continue; 288 } 289 else if (sch == (short) TR_UNMAPPED) { 290 Move(s, d, len, U8); 291 d += len; 292 pch = TR_OOB; 293 } 294 else if (sch == (short) TR_DELETE) 295 matches++; 296 else { 297 assert(sch == (short) TR_R_EMPTY); /* empty replacement */ 298 ch = comp; 299 goto replace; 300 } 301 302 s += len; 303 } 304 305 if (grows) { 306 sv_setpvn(sv, (char*)dstart, d - dstart); 307 Safefree(dstart); 308 } 309 else { 310 *d = '\0'; 311 SvCUR_set(sv, d - dstart); 312 } 313 SvUTF8_on(sv); 314 } 315 SvSETMAGIC(sv); 316 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 317 __FILE__, __LINE__, matches)); 318 DEBUG_y(sv_dump(sv)); 319 return matches; 320 } 321 322 323 /* Helper function for do_trans(). 324 * Handles cases where an inversion map implementation is to be used and the 325 * search and replacement charlists are identical: so the string isn't 326 * modified, and only a count of modifiable chars is needed. 327 * 328 * Note that it doesn't handle /d nor /s, since these modify the string 329 * even if the replacement charlist is empty. 330 * 331 * sv may or may not be utf8. 332 */ 333 334 STATIC Size_t 335 S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap) 336 { 337 U8 *s; 338 U8 *send; 339 Size_t matches = 0; 340 STRLEN len; 341 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE); 342 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE); 343 SV* from_invlist = *from_invlist_ptr; 344 SV* to_invmap_sv = *to_invmap_ptr; 345 UV* map = (UV *) SvPVX(to_invmap_sv); 346 347 PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP; 348 349 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d:" 350 "entering do_trans_count_invmap:" 351 " input sv:\n", 352 __FILE__, __LINE__)); 353 DEBUG_y(sv_dump(sv)); 354 DEBUG_y(PerlIO_printf(Perl_debug_log, "mapping:\n")); 355 DEBUG_y(invmap_dump(from_invlist, (UV *) SvPVX(to_invmap_sv))); 356 357 s = (U8*)SvPV_nomg(sv, len); 358 359 send = s + len; 360 361 while (s < send) { 362 UV from; 363 SSize_t i; 364 STRLEN s_len; 365 366 /* Get the code point of the next character in the string */ 367 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) { 368 from = *s; 369 s_len = 1; 370 } 371 else { 372 from = utf8_to_uvchr_buf(s, send, &s_len); 373 if (from == 0 && *s != '\0') { 374 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE); 375 } 376 } 377 378 /* Look the code point up in the data structure for this tr/// to get 379 * what it maps to */ 380 i = _invlist_search(from_invlist, from); 381 assert(i >= 0); 382 383 if (map[i] != (UV) TR_UNLISTED) { 384 matches++; 385 } 386 387 s += s_len; 388 } 389 390 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 391 __FILE__, __LINE__, matches)); 392 return matches; 393 } 394 395 /* Helper function for do_trans(). 396 * Handles cases where an inversion map implementation is to be used and the 397 * search and replacement charlists are either not identical or flags are 398 * present. 399 * 400 * sv may or may not be utf8. 401 */ 402 403 STATIC Size_t 404 S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap) 405 { 406 U8 *s; 407 U8 *send; 408 U8 *d; 409 U8 *s0; 410 U8 *d0; 411 Size_t matches = 0; 412 STRLEN len; 413 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE); 414 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE); 415 SV** const to_expansion_ptr = av_fetch(invmap, 2, TRUE); 416 NV max_expansion = SvNV(*to_expansion_ptr); 417 SV* from_invlist = *from_invlist_ptr; 418 SV* to_invmap_sv = *to_invmap_ptr; 419 UV* map = (UV *) SvPVX(to_invmap_sv); 420 UV previous_map = TR_OOB; 421 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); 422 const bool delete_unfound = cBOOL(PL_op->op_private & OPpTRANS_DELETE); 423 bool inplace = ! cBOOL(PL_op->op_private & OPpTRANS_GROWS); 424 const UV* from_array = invlist_array(from_invlist); 425 UV final_map = TR_OOB; 426 bool out_is_utf8 = cBOOL(SvUTF8(sv)); 427 STRLEN s_len; 428 429 PERL_ARGS_ASSERT_DO_TRANS_INVMAP; 430 431 /* A third element in the array indicates that the replacement list was 432 * shorter than the search list, and this element contains the value to use 433 * for the items that don't correspond */ 434 if (av_top_index(invmap) >= 3) { 435 SV** const final_map_ptr = av_fetch(invmap, 3, TRUE); 436 SV* const final_map_sv = *final_map_ptr; 437 final_map = SvUV(final_map_sv); 438 } 439 440 /* If there is something in the transliteration that could force the input 441 * to be changed to UTF-8, we don't know if we can do it in place, so 442 * assume cannot */ 443 if (! out_is_utf8 && (PL_op->op_private & OPpTRANS_CAN_FORCE_UTF8)) { 444 inplace = FALSE; 445 } 446 447 s = (U8*)SvPV_nomg(sv, len); 448 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_invmap:" 449 " input sv:\n", 450 __FILE__, __LINE__)); 451 DEBUG_y(sv_dump(sv)); 452 DEBUG_y(PerlIO_printf(Perl_debug_log, "mapping:\n")); 453 DEBUG_y(invmap_dump(from_invlist, map)); 454 455 send = s + len; 456 s0 = s; 457 458 /* We know by now if there are some possible input strings whose 459 * transliterations are longer than the input. If none can, we just edit 460 * in place. */ 461 if (inplace) { 462 d0 = d = s; 463 } 464 else { 465 /* Here, we can't edit in place. We have no idea how much, if any, 466 * this particular input string will grow. However, the compilation 467 * calculated the maximum expansion possible. Use that to allocate 468 * based on the worst case scenario. (First +1 is to round up; 2nd is 469 * for \0) */ 470 Newx(d, (STRLEN) (len * max_expansion + 1 + 1), U8); 471 d0 = d; 472 } 473 474 restart: 475 476 /* Do the actual transliteration */ 477 while (s < send) { 478 UV from; 479 UV to; 480 SSize_t i; 481 STRLEN s_len; 482 483 /* Get the code point of the next character in the string */ 484 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) { 485 from = *s; 486 s_len = 1; 487 } 488 else { 489 from = utf8_to_uvchr_buf(s, send, &s_len); 490 if (from == 0 && *s != '\0') { 491 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE); 492 } 493 } 494 495 /* Look the code point up in the data structure for this tr/// to get 496 * what it maps to */ 497 i = _invlist_search(from_invlist, from); 498 assert(i >= 0); 499 500 to = map[i]; 501 502 if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */ 503 if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) { 504 *d++ = (U8) from; 505 } 506 else if (SvUTF8(sv)) { 507 Move(s, d, s_len, U8); 508 d += s_len; 509 } 510 else { /* Convert to UTF-8 */ 511 append_utf8_from_native_byte(*s, &d); 512 } 513 514 previous_map = to; 515 s += s_len; 516 continue; 517 } 518 519 /* Everything else is counted as a match */ 520 matches++; 521 522 if (to == (UV) TR_SPECIAL_HANDLING) { 523 if (delete_unfound) { 524 s += s_len; 525 continue; 526 } 527 528 /* Use the final character in the replacement list */ 529 to = final_map; 530 } 531 else { /* Here the input code point is to be remapped. The actual 532 value is offset from the base of this entry */ 533 to += from - from_array[i]; 534 } 535 536 /* If copying all occurrences, or this is the first occurrence, copy it 537 * to the output */ 538 if (! squash || to != previous_map) { 539 if (out_is_utf8) { 540 d = uvchr_to_utf8(d, to); 541 } 542 else { 543 if (to >= 256) { /* If need to convert to UTF-8, restart */ 544 out_is_utf8 = TRUE; 545 s = s0; 546 d = d0; 547 matches = 0; 548 goto restart; 549 } 550 *d++ = (U8) to; 551 } 552 } 553 554 previous_map = to; 555 s += s_len; 556 } 557 558 s_len = 0; 559 s += s_len; 560 if (! inplace) { 561 sv_setpvn(sv, (char*)d0, d - d0); 562 Safefree(d0); 563 } 564 else { 565 *d = '\0'; 566 SvCUR_set(sv, d - d0); 567 } 568 569 if (! SvUTF8(sv) && out_is_utf8) { 570 SvUTF8_on(sv); 571 } 572 SvSETMAGIC(sv); 573 574 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 575 __FILE__, __LINE__, matches)); 576 DEBUG_y(sv_dump(sv)); 577 return matches; 578 } 579 580 /* Execute a tr//. sv is the value to be translated, while PL_op 581 * should be an OP_TRANS or OP_TRANSR op, whose op_pv field contains a 582 * translation table or whose op_sv field contains an inversion map. 583 * 584 * Returns a count of number of characters translated 585 */ 586 587 Size_t 588 Perl_do_trans(pTHX_ SV *sv) 589 { 590 STRLEN len; 591 const U8 flags = PL_op->op_private; 592 bool use_utf8_fcns = cBOOL(flags & OPpTRANS_USE_SVOP); 593 bool identical = cBOOL(flags & OPpTRANS_IDENTICAL); 594 595 PERL_ARGS_ASSERT_DO_TRANS; 596 597 if (SvREADONLY(sv) && ! identical) { 598 Perl_croak_no_modify(); 599 } 600 (void)SvPV_const(sv, len); 601 if (!len) 602 return 0; 603 if (! identical) { 604 if (!SvPOKp(sv) || SvTHINKFIRST(sv)) 605 (void)SvPV_force_nomg(sv, len); 606 (void)SvPOK_only_UTF8(sv); 607 } 608 609 if (use_utf8_fcns) { 610 SV* const map = 611 #ifdef USE_ITHREADS 612 PAD_SVl(cPADOP->op_padix); 613 #else 614 MUTABLE_SV(cSVOP->op_sv); 615 #endif 616 617 if (identical) { 618 return do_trans_count_invmap(sv, (AV *) map); 619 } 620 else { 621 return do_trans_invmap(sv, (AV *) map); 622 } 623 } 624 else { 625 const OPtrans_map * const map = (OPtrans_map*)cPVOP->op_pv; 626 627 if (identical) { 628 return do_trans_count(sv, map); 629 } 630 else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 631 return do_trans_complex(sv, map); 632 } 633 else 634 return do_trans_simple(sv, map); 635 } 636 } 637 638 /* 639 =for apidoc_section $string 640 =for apidoc do_join 641 642 This performs a Perl L<C<join>|perlfunc/join>, placing the joined output 643 into C<sv>. 644 645 The elements to join are in SVs, stored in a C array of pointers to SVs, from 646 C<**mark> to S<C<**sp - 1>>. Hence C<*mark> is a reference to the first SV. 647 Each SV will be coerced into a PV if not one already. 648 649 C<delim> contains the string (or coerced into a string) that is to separate 650 each of the joined elements. 651 652 If any component is in UTF-8, the result will be as well, and all non-UTF-8 653 components will be converted to UTF-8 as necessary. 654 655 Magic and tainting are handled. 656 657 =cut 658 */ 659 660 void 661 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) 662 { 663 SV ** const oldmark = mark; 664 I32 items = sp - mark; 665 STRLEN len; 666 STRLEN delimlen; 667 const char * const delims = SvPV_const(delim, delimlen); 668 669 PERL_ARGS_ASSERT_DO_JOIN; 670 671 mark++; 672 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 673 SvUPGRADE(sv, SVt_PV); 674 if (SvLEN(sv) < len + items) { /* current length is way too short */ 675 while (items-- > 0) { 676 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 677 STRLEN tmplen; 678 SvPV_const(*mark, tmplen); 679 len += tmplen; 680 } 681 mark++; 682 } 683 SvGROW(sv, len + 1); /* so try to pre-extend */ 684 685 mark = oldmark; 686 items = sp - mark; 687 ++mark; 688 } 689 690 SvPVCLEAR(sv); 691 /* sv_setpv retains old UTF8ness [perl #24846] */ 692 SvUTF8_off(sv); 693 694 if (TAINTING_get && SvMAGICAL(sv)) 695 SvTAINTED_off(sv); 696 697 if (items-- > 0) { 698 if (*mark) 699 sv_catsv(sv, *mark); 700 mark++; 701 } 702 703 if (delimlen) { 704 const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES; 705 for (; items > 0; items--,mark++) { 706 STRLEN len; 707 const char *s; 708 sv_catpvn_flags(sv,delims,delimlen,delimflag); 709 s = SvPV_const(*mark,len); 710 sv_catpvn_flags(sv,s,len, 711 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 712 } 713 } 714 else { 715 for (; items > 0; items--,mark++) 716 { 717 STRLEN len; 718 const char *s = SvPV_const(*mark,len); 719 sv_catpvn_flags(sv,s,len, 720 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 721 } 722 } 723 SvSETMAGIC(sv); 724 } 725 726 /* 727 =for apidoc_section $string 728 =for apidoc do_sprintf 729 730 This performs a Perl L<C<sprintf>|perlfunc/sprintf> placing the string output 731 into C<sv>. 732 733 The elements to format are in SVs, stored in a C array of pointers to SVs of 734 length C<len>> and beginning at C<**sarg>. The element referenced by C<*sarg> 735 is the format. 736 737 Magic and tainting are handled. 738 739 =cut 740 */ 741 742 void 743 Perl_do_sprintf(pTHX_ SV *sv, SSize_t len, SV **sarg) 744 { 745 STRLEN patlen; 746 const char * const pat = SvPV_const(*sarg, patlen); 747 bool do_taint = FALSE; 748 749 PERL_ARGS_ASSERT_DO_SPRINTF; 750 assert(len >= 1); 751 752 if (SvTAINTED(*sarg)) 753 TAINT_PROPER( 754 (PL_op && PL_op->op_type < OP_max) 755 ? (PL_op->op_type == OP_PRTF) 756 ? "printf" 757 : PL_op_name[PL_op->op_type] 758 : "(unknown)" 759 ); 760 SvUTF8_off(sv); 761 if (DO_UTF8(*sarg)) 762 SvUTF8_on(sv); 763 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, (Size_t)(len - 1), &do_taint); 764 SvSETMAGIC(sv); 765 if (do_taint) 766 SvTAINTED_on(sv); 767 } 768 769 UV 770 Perl_do_vecget(pTHX_ SV *sv, STRLEN offset, int size) 771 { 772 STRLEN srclen; 773 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) 774 ? SV_UNDEF_RETURNS_NULL : 0); 775 unsigned char *s = (unsigned char *) 776 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); 777 UV retnum = 0; 778 779 if (!s) { 780 s = (unsigned char *)""; 781 } 782 783 PERL_ARGS_ASSERT_DO_VECGET; 784 785 if (size < 1 || ! isPOWER_OF_2(size)) 786 Perl_croak(aTHX_ "Illegal number of bits in vec"); 787 788 if (SvUTF8(sv)) { 789 if (Perl_sv_utf8_downgrade_flags(aTHX_ sv, TRUE, 0)) { 790 /* PVX may have changed */ 791 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); 792 } 793 else { 794 Perl_croak(aTHX_ "Use of strings with code points over 0xFF" 795 " as arguments to vec is forbidden"); 796 } 797 } 798 799 if (size <= 8) { 800 STRLEN bitoffs = ((offset % 8) * size) % 8; 801 STRLEN uoffset = offset / (8 / size); 802 803 if (uoffset >= srclen) 804 return 0; 805 806 retnum = (s[uoffset] >> bitoffs) & nBIT_MASK(size); 807 } 808 else { 809 int n = size / 8; /* required number of bytes */ 810 SSize_t uoffset; 811 812 #ifdef UV_IS_QUAD 813 814 if (size == 64) { 815 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 816 "Bit vector size > 32 non-portable"); 817 } 818 #endif 819 if (offset > Size_t_MAX / n - 1) /* would overflow */ 820 return 0; 821 822 uoffset = offset * n; 823 824 /* Switch on the number of bytes available, but no more than the number 825 * required */ 826 switch (MIN(n, (SSize_t) srclen - uoffset)) { 827 828 #ifdef UV_IS_QUAD 829 830 case 8: 831 retnum += ((UV) s[uoffset + 7]); 832 /* FALLTHROUGH */ 833 case 7: 834 retnum += ((UV) s[uoffset + 6] << 8); /* = size - 56 */ 835 /* FALLTHROUGH */ 836 case 6: 837 retnum += ((UV) s[uoffset + 5] << 16); /* = size - 48 */ 838 /* FALLTHROUGH */ 839 case 5: 840 retnum += ((UV) s[uoffset + 4] << 24); /* = size - 40 */ 841 #endif 842 /* FALLTHROUGH */ 843 case 4: 844 retnum += ((UV) s[uoffset + 3] << (size - 32)); 845 /* FALLTHROUGH */ 846 case 3: 847 retnum += ((UV) s[uoffset + 2] << (size - 24)); 848 /* FALLTHROUGH */ 849 case 2: 850 retnum += ((UV) s[uoffset + 1] << (size - 16)); 851 /* FALLTHROUGH */ 852 case 1: 853 retnum += ((UV) s[uoffset ] << (size - 8)); 854 break; 855 856 default: 857 return 0; 858 } 859 } 860 861 return retnum; 862 } 863 864 /* currently converts input to bytes if possible but doesn't sweat failures, 865 * although it does ensure that the string it clobbers is not marked as 866 * utf8-valid any more 867 */ 868 void 869 Perl_do_vecset(pTHX_ SV *sv) 870 { 871 STRLEN offset, bitoffs = 0; 872 int size; 873 unsigned char *s; 874 UV lval; 875 I32 mask; 876 STRLEN targlen; 877 STRLEN len; 878 SV * const targ = LvTARG(sv); 879 char errflags = LvFLAGS(sv); 880 881 PERL_ARGS_ASSERT_DO_VECSET; 882 883 /* some out-of-range errors have been deferred if/until the LV is 884 * actually written to: f(vec($s,-1,8)) is not always fatal */ 885 if (errflags) { 886 assert(!(errflags & ~(LVf_NEG_OFF|LVf_OUT_OF_RANGE))); 887 if (errflags & LVf_NEG_OFF) 888 Perl_croak_nocontext("Negative offset to vec in lvalue context"); 889 Perl_croak_nocontext("Out of memory!"); 890 } 891 892 if (!targ) 893 return; 894 s = (unsigned char*)SvPV_force_flags(targ, targlen, 895 SV_GMAGIC | SV_UNDEF_RETURNS_NULL); 896 if (SvUTF8(targ)) { 897 /* This is handled by the SvPOK_only below... 898 if (!Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0)) 899 SvUTF8_off(targ); 900 */ 901 (void) Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0); 902 } 903 904 (void)SvPOK_only(targ); 905 lval = SvUV(sv); 906 offset = LvTARGOFF(sv); 907 size = LvTARGLEN(sv); 908 909 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 910 Perl_croak(aTHX_ "Illegal number of bits in vec"); 911 912 if (size < 8) { 913 bitoffs = ((offset%8)*size)%8; 914 offset /= 8/size; 915 } 916 else if (size > 8) { 917 int n = size/8; 918 if (offset > Size_t_MAX / n - 1) /* would overflow */ 919 Perl_croak_nocontext("Out of memory!"); 920 offset *= n; 921 } 922 923 len = (bitoffs + size + 7)/8; /* required number of bytes */ 924 if (targlen < offset || targlen - offset < len) { 925 STRLEN newlen = offset > Size_t_MAX - len - 1 ? /* avoid overflow */ 926 Size_t_MAX : offset + len + 1; 927 s = (unsigned char*)SvGROW(targ, newlen); 928 (void)memzero((char *)(s + targlen), newlen - targlen); 929 SvCUR_set(targ, newlen - 1); 930 } 931 932 if (size < 8) { 933 mask = nBIT_MASK(size); 934 lval &= mask; 935 s[offset] &= ~(mask << bitoffs); 936 s[offset] |= lval << bitoffs; 937 } 938 else switch (size) { 939 940 #ifdef UV_IS_QUAD 941 942 case 64: 943 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 944 "Bit vector size > 32 non-portable"); 945 s[offset+7] = (U8)( lval ); /* = size - 64 */ 946 s[offset+6] = (U8)( lval >> 8); /* = size - 56 */ 947 s[offset+5] = (U8)( lval >> 16); /* = size - 48 */ 948 s[offset+4] = (U8)( lval >> 24); /* = size - 40 */ 949 #endif 950 /* FALLTHROUGH */ 951 case 32: 952 s[offset+3] = (U8)( lval >> (size - 32)); 953 s[offset+2] = (U8)( lval >> (size - 24)); 954 /* FALLTHROUGH */ 955 case 16: 956 s[offset+1] = (U8)( lval >> (size - 16)); 957 /* FALLTHROUGH */ 958 case 8: 959 s[offset ] = (U8)( lval >> (size - 8)); 960 } 961 SvSETMAGIC(targ); 962 } 963 964 void 965 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 966 { 967 long *dl; 968 long *ll; 969 long *rl; 970 char *dc; 971 STRLEN leftlen; 972 STRLEN rightlen; 973 const char *lc; 974 const char *rc; 975 STRLEN len = 0; 976 STRLEN lensave; 977 const char *lsave; 978 const char *rsave; 979 STRLEN needlen = 0; 980 bool result_needs_to_be_utf8 = FALSE; 981 bool left_utf8 = FALSE; 982 bool right_utf8 = FALSE; 983 U8 * left_non_downgraded = NULL; 984 U8 * right_non_downgraded = NULL; 985 Size_t left_non_downgraded_len = 0; 986 Size_t right_non_downgraded_len = 0; 987 char * non_downgraded = NULL; 988 Size_t non_downgraded_len = 0; 989 990 PERL_ARGS_ASSERT_DO_VOP; 991 992 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv))) 993 SvPVCLEAR(sv); /* avoid undef warning on |= and ^= */ 994 if (sv == left) { 995 lc = SvPV_force_nomg(left, leftlen); 996 } 997 else { 998 lc = SvPV_nomg_const(left, leftlen); 999 SvPV_force_nomg_nolen(sv); 1000 } 1001 rc = SvPV_nomg_const(right, rightlen); 1002 1003 /* This needs to come after SvPV to ensure that string overloading has 1004 fired off. */ 1005 1006 /* Create downgraded temporaries of any UTF-8 encoded operands */ 1007 if (DO_UTF8(left)) { 1008 const U8 * save_lc = (U8 *) lc; 1009 1010 left_utf8 = TRUE; 1011 result_needs_to_be_utf8 = TRUE; 1012 1013 left_non_downgraded_len = leftlen; 1014 lc = (char *) bytes_from_utf8_loc((const U8 *) lc, &leftlen, 1015 &left_utf8, 1016 (const U8 **) &left_non_downgraded); 1017 /* Calculate the number of trailing unconvertible bytes. This quantity 1018 * is the original length minus the length of the converted portion. */ 1019 left_non_downgraded_len -= left_non_downgraded - save_lc; 1020 SAVEFREEPV(lc); 1021 } 1022 if (DO_UTF8(right)) { 1023 const U8 * save_rc = (U8 *) rc; 1024 1025 right_utf8 = TRUE; 1026 result_needs_to_be_utf8 = TRUE; 1027 1028 right_non_downgraded_len = rightlen; 1029 rc = (char *) bytes_from_utf8_loc((const U8 *) rc, &rightlen, 1030 &right_utf8, 1031 (const U8 **) &right_non_downgraded); 1032 right_non_downgraded_len -= right_non_downgraded - save_rc; 1033 SAVEFREEPV(rc); 1034 } 1035 1036 /* We set 'len' to the length that the operation actually operates on. The 1037 * dangling part of the longer operand doesn't actually participate in the 1038 * operation. What happens is that we pretend that the shorter operand has 1039 * been extended to the right by enough imaginary zeros to match the length 1040 * of the longer one. But we know in advance the result of the operation 1041 * on zeros without having to do it. In the case of '&', the result is 1042 * zero, and the dangling portion is simply discarded. For '|' and '^', the 1043 * result is the same as the other operand, so the dangling part is just 1044 * appended to the final result, unchanged. As of perl-5.32, we no longer 1045 * accept above-FF code points in the dangling portion. 1046 */ 1047 if (left_utf8 || right_utf8) { 1048 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]); 1049 } 1050 else { /* Neither is UTF-8 */ 1051 len = MIN(leftlen, rightlen); 1052 } 1053 1054 lensave = len; 1055 lsave = lc; 1056 rsave = rc; 1057 1058 (void)SvPOK_only(sv); 1059 if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1060 dc = SvPV_force_nomg_nolen(sv); 1061 if (SvLEN(sv) < len + 1) { 1062 dc = SvGROW(sv, len + 1); 1063 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1064 } 1065 } 1066 else { 1067 needlen = optype == OP_BIT_AND 1068 ? len : (leftlen > rightlen ? leftlen : rightlen); 1069 Newxz(dc, needlen + 1, char); 1070 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL); 1071 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1072 } 1073 SvCUR_set(sv, len); 1074 1075 if (len >= sizeof(long)*4 && 1076 !(PTR2nat(dc) % sizeof(long)) && 1077 !(PTR2nat(lc) % sizeof(long)) && 1078 !(PTR2nat(rc) % sizeof(long))) /* It's almost always aligned... */ 1079 { 1080 const STRLEN remainder = len % (sizeof(long)*4); 1081 len /= (sizeof(long)*4); 1082 1083 dl = (long*)dc; 1084 ll = (long*)lc; 1085 rl = (long*)rc; 1086 1087 switch (optype) { 1088 case OP_BIT_AND: 1089 while (len--) { 1090 *dl++ = *ll++ & *rl++; 1091 *dl++ = *ll++ & *rl++; 1092 *dl++ = *ll++ & *rl++; 1093 *dl++ = *ll++ & *rl++; 1094 } 1095 break; 1096 case OP_BIT_XOR: 1097 while (len--) { 1098 *dl++ = *ll++ ^ *rl++; 1099 *dl++ = *ll++ ^ *rl++; 1100 *dl++ = *ll++ ^ *rl++; 1101 *dl++ = *ll++ ^ *rl++; 1102 } 1103 break; 1104 case OP_BIT_OR: 1105 while (len--) { 1106 *dl++ = *ll++ | *rl++; 1107 *dl++ = *ll++ | *rl++; 1108 *dl++ = *ll++ | *rl++; 1109 *dl++ = *ll++ | *rl++; 1110 } 1111 } 1112 1113 dc = (char*)dl; 1114 lc = (char*)ll; 1115 rc = (char*)rl; 1116 1117 len = remainder; 1118 } 1119 1120 switch (optype) { 1121 case OP_BIT_AND: 1122 while (len--) 1123 *dc++ = *lc++ & *rc++; 1124 *dc = '\0'; 1125 break; 1126 case OP_BIT_XOR: 1127 while (len--) 1128 *dc++ = *lc++ ^ *rc++; 1129 goto mop_up; 1130 case OP_BIT_OR: 1131 while (len--) 1132 *dc++ = *lc++ | *rc++; 1133 mop_up: 1134 len = lensave; 1135 if (rightlen > len) { 1136 if (dc == rc) 1137 SvCUR_set(sv, rightlen); 1138 else 1139 sv_catpvn_nomg(sv, rsave + len, rightlen - len); 1140 } 1141 else if (leftlen > len) { 1142 if (dc == lc) 1143 SvCUR_set(sv, leftlen); 1144 else 1145 sv_catpvn_nomg(sv, lsave + len, leftlen - len); 1146 } 1147 *SvEND(sv) = '\0'; 1148 1149 /* If there is trailing stuff that couldn't be converted from UTF-8, it 1150 * is appended as-is for the ^ and | operators. This preserves 1151 * backwards compatibility */ 1152 if (right_non_downgraded) { 1153 non_downgraded = (char *) right_non_downgraded; 1154 non_downgraded_len = right_non_downgraded_len; 1155 } 1156 else if (left_non_downgraded) { 1157 non_downgraded = (char *) left_non_downgraded; 1158 non_downgraded_len = left_non_downgraded_len; 1159 } 1160 1161 break; 1162 } 1163 1164 if (result_needs_to_be_utf8) { 1165 sv_utf8_upgrade_nomg(sv); 1166 1167 /* Append any trailing UTF-8 as-is. */ 1168 if (non_downgraded) { 1169 sv_catpvn_nomg(sv, non_downgraded, non_downgraded_len); 1170 } 1171 } 1172 1173 SvTAINT(sv); 1174 } 1175 1176 1177 /* Perl_do_kv() may be: 1178 * * called directly as the pp function for pp_keys() and pp_values(); 1179 * * It may also be called directly when the op is OP_AVHVSWITCH, to 1180 * implement CORE::keys(), CORE::values(). 1181 * 1182 * In all cases it expects an HV on the stack and returns a list of keys, 1183 * values, or key-value pairs, depending on PL_op. 1184 */ 1185 1186 PP(do_kv) 1187 { 1188 dSP; 1189 HV * const keys = MUTABLE_HV(POPs); 1190 const U8 gimme = GIMME_V; 1191 1192 const I32 dokeys = (PL_op->op_type == OP_KEYS) 1193 || ( PL_op->op_type == OP_AVHVSWITCH 1194 && (PL_op->op_private & OPpAVHVSWITCH_MASK) 1195 + OP_EACH == OP_KEYS); 1196 1197 const I32 dovalues = (PL_op->op_type == OP_VALUES) 1198 || ( PL_op->op_type == OP_AVHVSWITCH 1199 && (PL_op->op_private & OPpAVHVSWITCH_MASK) 1200 + OP_EACH == OP_VALUES); 1201 1202 assert( PL_op->op_type == OP_KEYS 1203 || PL_op->op_type == OP_VALUES 1204 || PL_op->op_type == OP_AVHVSWITCH); 1205 1206 assert(!( PL_op->op_type == OP_VALUES 1207 && (PL_op->op_private & OPpMAYBE_LVSUB))); 1208 1209 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1210 1211 if (gimme == G_VOID) 1212 RETURN; 1213 1214 if (gimme == G_SCALAR) { 1215 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1216 SV * const ret = newSV_type_mortal(SVt_PVLV); /* Not TARG RT#67838 */ 1217 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0); 1218 LvTYPE(ret) = 'k'; 1219 LvTARG(ret) = SvREFCNT_inc_simple(keys); 1220 PUSHs(ret); 1221 } 1222 else { 1223 IV i; 1224 dTARGET; 1225 1226 /* note that in 'scalar(keys %h)' the OP_KEYS is usually 1227 * optimised away and the action is performed directly by the 1228 * padhv or rv2hv op. We now only get here via OP_AVHVSWITCH 1229 * and \&CORE::keys 1230 */ 1231 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) { 1232 i = HvUSEDKEYS(keys); 1233 } 1234 else { 1235 i = 0; 1236 while (hv_iternext(keys)) i++; 1237 } 1238 PUSHi( i ); 1239 } 1240 RETURN; 1241 } 1242 1243 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) { 1244 const I32 flags = is_lvalue_sub(); 1245 if (flags && !(flags & OPpENTERSUB_INARGS)) 1246 /* diag_listed_as: Can't modify %s in %s */ 1247 Perl_croak(aTHX_ "Can't modify keys in list assignment"); 1248 } 1249 1250 PUTBACK; 1251 hv_pushkv(keys, (dokeys | (dovalues << 1))); 1252 return NORMAL; 1253 } 1254 1255 /* 1256 * ex: set ts=8 sts=4 sw=4 et: 1257 */ 1258