1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'So that was the job I felt I had to do when I started,' thought Sam. 13 * 14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"] 15 */ 16 17 /* This file contains some common functions needed to carry out certain 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function 19 * do_sprintf() found in this file. 20 */ 21 22 #include "EXTERN.h" 23 #define PERL_IN_DOOP_C 24 #include "perl.h" 25 #include "invlist_inline.h" 26 27 #ifndef PERL_MICRO 28 #include <signal.h> 29 #endif 30 31 32 /* Helper function for do_trans(). 33 * Handles cases where the search and replacement charlists aren't UTF-8, 34 * aren't identical, and neither the /d nor /s flag is present. 35 * 36 * sv may or may not be utf8. Note that no code point above 255 can possibly 37 * be in the to-translate set 38 */ 39 40 STATIC Size_t 41 S_do_trans_simple(pTHX_ SV * const sv, const OPtrans_map * const tbl) 42 { 43 Size_t matches = 0; 44 STRLEN len; 45 U8 *s = (U8*)SvPV_nomg(sv,len); 46 U8 * const send = s+len; 47 48 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; 49 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_simple:" 50 " input sv:\n", 51 __FILE__, __LINE__)); 52 DEBUG_y(sv_dump(sv)); 53 54 /* First, take care of non-UTF-8 input strings, because they're easy */ 55 if (!SvUTF8(sv)) { 56 while (s < send) { 57 const short ch = tbl->map[*s]; 58 if (ch >= 0) { 59 matches++; 60 *s = (U8)ch; 61 } 62 s++; 63 } 64 SvSETMAGIC(sv); 65 } 66 else { 67 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); 68 U8 *d; 69 U8 *dstart; 70 71 /* Allow for worst-case expansion: Each input byte can become 2. For a 72 * given input character, this happens when it occupies a single byte 73 * under UTF-8, but is to be translated to something that occupies two: 74 * $_="a".chr(400); tr/a/\xFE/, FE needs encoding. */ 75 if (grows) 76 Newx(d, len*2+1, U8); 77 else 78 d = s; 79 dstart = d; 80 while (s < send) { 81 STRLEN ulen; 82 short ch; 83 84 /* Need to check this, otherwise 128..255 won't match */ 85 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 86 if (c < 0x100 && (ch = tbl->map[c]) >= 0) { 87 matches++; 88 d = uvchr_to_utf8(d, (UV)ch); 89 s += ulen; 90 } 91 else { /* No match -> copy */ 92 Move(s, d, ulen, U8); 93 d += ulen; 94 s += ulen; 95 } 96 } 97 if (grows) { 98 sv_setpvn(sv, (char*)dstart, d - dstart); 99 Safefree(dstart); 100 } 101 else { 102 *d = '\0'; 103 SvCUR_set(sv, d - dstart); 104 } 105 SvUTF8_on(sv); 106 SvSETMAGIC(sv); 107 } 108 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 109 __FILE__, __LINE__, matches)); 110 DEBUG_y(sv_dump(sv)); 111 return matches; 112 } 113 114 115 /* Helper function for do_trans(). 116 * Handles cases where the search and replacement charlists are identical and 117 * non-utf8: so the string isn't modified, and only a count of modifiable 118 * chars is needed. 119 * 120 * Note that it doesn't handle /d or /s, since these modify the string even if 121 * the replacement list is empty. 122 * 123 * sv may or may not be utf8. Note that no code point above 255 can possibly 124 * be in the to-translate set 125 */ 126 127 STATIC Size_t 128 S_do_trans_count(pTHX_ SV * const sv, const OPtrans_map * const tbl) 129 { 130 STRLEN len; 131 const U8 *s = (const U8*)SvPV_nomg_const(sv, len); 132 const U8 * const send = s + len; 133 Size_t matches = 0; 134 135 PERL_ARGS_ASSERT_DO_TRANS_COUNT; 136 137 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_count:" 138 " input sv:\n", 139 __FILE__, __LINE__)); 140 DEBUG_y(sv_dump(sv)); 141 142 if (!SvUTF8(sv)) { 143 while (s < send) { 144 if (tbl->map[*s++] >= 0) 145 matches++; 146 } 147 } 148 else { 149 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); 150 while (s < send) { 151 STRLEN ulen; 152 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 153 if (c < 0x100) { 154 if (tbl->map[c] >= 0) 155 matches++; 156 } else if (complement) 157 matches++; 158 s += ulen; 159 } 160 } 161 162 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: count returning %zu\n", 163 __FILE__, __LINE__, matches)); 164 return matches; 165 } 166 167 168 /* Helper function for do_trans(). 169 * Handles cases where the search and replacement charlists aren't identical 170 * and both are non-utf8, and one or both of /d, /s is specified. 171 * 172 * sv may or may not be utf8. Note that no code point above 255 can possibly 173 * be in the to-translate set 174 */ 175 176 STATIC Size_t 177 S_do_trans_complex(pTHX_ SV * const sv, const OPtrans_map * const tbl) 178 { 179 STRLEN len; 180 U8 *s = (U8*)SvPV_nomg(sv, len); 181 U8 * const send = s+len; 182 Size_t matches = 0; 183 const bool complement = cBOOL(PL_op->op_private & OPpTRANS_COMPLEMENT); 184 185 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; 186 187 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_complex:" 188 " input sv:\n", 189 __FILE__, __LINE__)); 190 DEBUG_y(sv_dump(sv)); 191 192 if (!SvUTF8(sv)) { 193 U8 *d = s; 194 U8 * const dstart = d; 195 196 if (PL_op->op_private & OPpTRANS_SQUASH) { 197 198 /* What the mapping of the previous character was to. If the new 199 * character has the same mapping, it is squashed from the output 200 * (but still is included in the count) */ 201 short previous_map = (short) TR_OOB; 202 203 while (s < send) { 204 const short this_map = tbl->map[*s]; 205 if (this_map >= 0) { 206 matches++; 207 if (this_map != previous_map) { 208 *d++ = (U8)this_map; 209 previous_map = this_map; 210 } 211 } 212 else { 213 if (this_map == (short) TR_UNMAPPED) { 214 *d++ = *s; 215 previous_map = (short) TR_OOB; 216 } 217 else { 218 assert(this_map == (short) TR_DELETE); 219 matches++; 220 } 221 } 222 223 s++; 224 } 225 } 226 else { /* Not to squash */ 227 while (s < send) { 228 const short this_map = tbl->map[*s]; 229 if (this_map >= 0) { 230 matches++; 231 *d++ = (U8)this_map; 232 } 233 else if (this_map == (short) TR_UNMAPPED) 234 *d++ = *s; 235 else if (this_map == (short) TR_DELETE) 236 matches++; 237 s++; 238 } 239 } 240 *d = '\0'; 241 SvCUR_set(sv, d - dstart); 242 } 243 else { /* is utf8 */ 244 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); 245 const bool grows = cBOOL(PL_op->op_private & OPpTRANS_GROWS); 246 U8 *d; 247 U8 *dstart; 248 Size_t size = tbl->size; 249 250 /* What the mapping of the previous character was to. If the new 251 * character has the same mapping, it is squashed from the output (but 252 * still is included in the count) */ 253 UV pch = TR_OOB; 254 255 if (grows) 256 /* Allow for worst-case expansion: Each input byte can become 2. 257 * For a given input character, this happens when it occupies a 258 * single byte under UTF-8, but is to be translated to something 259 * that occupies two: */ 260 Newx(d, len*2+1, U8); 261 else 262 d = s; 263 dstart = d; 264 265 while (s < send) { 266 STRLEN len; 267 const UV comp = utf8n_to_uvchr(s, send - s, &len, 268 UTF8_ALLOW_DEFAULT); 269 UV ch; 270 short sch; 271 272 sch = (comp < size) 273 ? tbl->map[comp] 274 : (! complement) 275 ? (short) TR_UNMAPPED 276 : tbl->map[size]; 277 278 if (sch >= 0) { 279 ch = (UV)sch; 280 replace: 281 matches++; 282 if (LIKELY(!squash || ch != pch)) { 283 d = uvchr_to_utf8(d, ch); 284 pch = ch; 285 } 286 s += len; 287 continue; 288 } 289 else if (sch == (short) TR_UNMAPPED) { 290 Move(s, d, len, U8); 291 d += len; 292 pch = TR_OOB; 293 } 294 else if (sch == (short) TR_DELETE) 295 matches++; 296 else { 297 assert(sch == (short) TR_R_EMPTY); /* empty replacement */ 298 ch = comp; 299 goto replace; 300 } 301 302 s += len; 303 } 304 305 if (grows) { 306 sv_setpvn(sv, (char*)dstart, d - dstart); 307 Safefree(dstart); 308 } 309 else { 310 *d = '\0'; 311 SvCUR_set(sv, d - dstart); 312 } 313 SvUTF8_on(sv); 314 } 315 SvSETMAGIC(sv); 316 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 317 __FILE__, __LINE__, matches)); 318 DEBUG_y(sv_dump(sv)); 319 return matches; 320 } 321 322 323 /* Helper function for do_trans(). 324 * Handles cases where an inversion map implementation is to be used and the 325 * search and replacement charlists are identical: so the string isn't 326 * modified, and only a count of modifiable chars is needed. 327 * 328 * Note that it doesn't handle /d nor /s, since these modify the string 329 * even if the replacement charlist is empty. 330 * 331 * sv may or may not be utf8. 332 */ 333 334 STATIC Size_t 335 S_do_trans_count_invmap(pTHX_ SV * const sv, AV * const invmap) 336 { 337 U8 *s; 338 U8 *send; 339 Size_t matches = 0; 340 STRLEN len; 341 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE); 342 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE); 343 SV* from_invlist = *from_invlist_ptr; 344 SV* to_invmap_sv = *to_invmap_ptr; 345 UV* map = (UV *) SvPVX(to_invmap_sv); 346 347 PERL_ARGS_ASSERT_DO_TRANS_COUNT_INVMAP; 348 349 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d:" 350 "entering do_trans_count_invmap:" 351 " input sv:\n", 352 __FILE__, __LINE__)); 353 DEBUG_y(sv_dump(sv)); 354 DEBUG_y(PerlIO_printf(Perl_debug_log, "mapping:\n")); 355 DEBUG_y(invmap_dump(from_invlist, (UV *) SvPVX(to_invmap_sv))); 356 357 s = (U8*)SvPV_nomg(sv, len); 358 359 send = s + len; 360 361 while (s < send) { 362 UV from; 363 SSize_t i; 364 STRLEN s_len; 365 366 /* Get the code point of the next character in the string */ 367 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) { 368 from = *s; 369 s_len = 1; 370 } 371 else { 372 from = utf8_to_uvchr_buf(s, send, &s_len); 373 if (from == 0 && *s != '\0') { 374 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE); 375 } 376 } 377 378 /* Look the code point up in the data structure for this tr/// to get 379 * what it maps to */ 380 i = _invlist_search(from_invlist, from); 381 assert(i >= 0); 382 383 if (map[i] != (UV) TR_UNLISTED) { 384 matches++; 385 } 386 387 s += s_len; 388 } 389 390 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 391 __FILE__, __LINE__, matches)); 392 return matches; 393 } 394 395 /* Helper function for do_trans(). 396 * Handles cases where an inversion map implementation is to be used and the 397 * search and replacement charlists are either not identical or flags are 398 * present. 399 * 400 * sv may or may not be utf8. 401 */ 402 403 STATIC Size_t 404 S_do_trans_invmap(pTHX_ SV * const sv, AV * const invmap) 405 { 406 U8 *s; 407 U8 *send; 408 U8 *d; 409 U8 *s0; 410 U8 *d0; 411 Size_t matches = 0; 412 STRLEN len; 413 SV** const from_invlist_ptr = av_fetch(invmap, 0, TRUE); 414 SV** const to_invmap_ptr = av_fetch(invmap, 1, TRUE); 415 SV** const to_expansion_ptr = av_fetch(invmap, 2, TRUE); 416 NV max_expansion = SvNV(*to_expansion_ptr); 417 SV* from_invlist = *from_invlist_ptr; 418 SV* to_invmap_sv = *to_invmap_ptr; 419 UV* map = (UV *) SvPVX(to_invmap_sv); 420 UV previous_map = TR_OOB; 421 const bool squash = cBOOL(PL_op->op_private & OPpTRANS_SQUASH); 422 const bool delete_unfound = cBOOL(PL_op->op_private & OPpTRANS_DELETE); 423 bool inplace = ! cBOOL(PL_op->op_private & OPpTRANS_GROWS); 424 const UV* from_array = invlist_array(from_invlist); 425 UV final_map = TR_OOB; 426 bool out_is_utf8 = cBOOL(SvUTF8(sv)); 427 STRLEN s_len; 428 429 PERL_ARGS_ASSERT_DO_TRANS_INVMAP; 430 431 /* A third element in the array indicates that the replacement list was 432 * shorter than the search list, and this element contains the value to use 433 * for the items that don't correspond */ 434 if (av_top_index(invmap) >= 3) { 435 SV** const final_map_ptr = av_fetch(invmap, 3, TRUE); 436 SV* const final_map_sv = *final_map_ptr; 437 final_map = SvUV(final_map_sv); 438 } 439 440 /* If there is something in the transliteration that could force the input 441 * to be changed to UTF-8, we don't know if we can do it in place, so 442 * assume cannot */ 443 if (! out_is_utf8 && (PL_op->op_private & OPpTRANS_CAN_FORCE_UTF8)) { 444 inplace = FALSE; 445 if (max_expansion < 2) { 446 max_expansion = 2; 447 } 448 } 449 450 s = (U8*)SvPV_nomg(sv, len); 451 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: entering do_trans_invmap:" 452 " input sv:\n", 453 __FILE__, __LINE__)); 454 DEBUG_y(sv_dump(sv)); 455 DEBUG_y(PerlIO_printf(Perl_debug_log, "mapping:\n")); 456 DEBUG_y(invmap_dump(from_invlist, map)); 457 458 send = s + len; 459 s0 = s; 460 461 /* We know by now if there are some possible input strings whose 462 * transliterations are longer than the input. If none can, we just edit 463 * in place. */ 464 if (inplace) { 465 d0 = d = s; 466 } 467 else { 468 /* Here, we can't edit in place. We have no idea how much, if any, 469 * this particular input string will grow. However, the compilation 470 * calculated the maximum expansion possible. Use that to allocate 471 * based on the worst case scenario. (First +1 is to round up; 2nd is 472 * for \0) */ 473 Newx(d, (STRLEN) (len * max_expansion + 1 + 1), U8); 474 d0 = d; 475 } 476 477 restart: 478 479 /* Do the actual transliteration */ 480 while (s < send) { 481 UV from; 482 UV to; 483 SSize_t i; 484 STRLEN s_len; 485 486 /* Get the code point of the next character in the string */ 487 if (! SvUTF8(sv) || UTF8_IS_INVARIANT(*s)) { 488 from = *s; 489 s_len = 1; 490 } 491 else { 492 from = utf8_to_uvchr_buf(s, send, &s_len); 493 if (from == 0 && *s != '\0') { 494 _force_out_malformed_utf8_message(s, send, 0, /*die*/TRUE); 495 } 496 } 497 498 /* Look the code point up in the data structure for this tr/// to get 499 * what it maps to */ 500 i = _invlist_search(from_invlist, from); 501 assert(i >= 0); 502 503 to = map[i]; 504 505 if (to == (UV) TR_UNLISTED) { /* Just copy the unreplaced character */ 506 if (UVCHR_IS_INVARIANT(from) || ! out_is_utf8) { 507 *d++ = (U8) from; 508 } 509 else if (SvUTF8(sv)) { 510 Move(s, d, s_len, U8); 511 d += s_len; 512 } 513 else { /* Convert to UTF-8 */ 514 append_utf8_from_native_byte(*s, &d); 515 } 516 517 previous_map = to; 518 s += s_len; 519 continue; 520 } 521 522 /* Everything else is counted as a match */ 523 matches++; 524 525 if (to == (UV) TR_SPECIAL_HANDLING) { 526 if (delete_unfound) { 527 s += s_len; 528 continue; 529 } 530 531 /* Use the final character in the replacement list */ 532 to = final_map; 533 } 534 else { /* Here the input code point is to be remapped. The actual 535 value is offset from the base of this entry */ 536 to += from - from_array[i]; 537 } 538 539 /* If copying all occurrences, or this is the first occurrence, copy it 540 * to the output */ 541 if (! squash || to != previous_map) { 542 if (out_is_utf8) { 543 d = uvchr_to_utf8(d, to); 544 } 545 else { 546 if (to >= 256) { /* If need to convert to UTF-8, restart */ 547 out_is_utf8 = TRUE; 548 s = s0; 549 d = d0; 550 matches = 0; 551 goto restart; 552 } 553 *d++ = (U8) to; 554 } 555 } 556 557 previous_map = to; 558 s += s_len; 559 } 560 561 s_len = 0; 562 s += s_len; 563 if (! inplace) { 564 sv_setpvn(sv, (char*)d0, d - d0); 565 Safefree(d0); 566 } 567 else { 568 *d = '\0'; 569 SvCUR_set(sv, d - d0); 570 } 571 572 if (! SvUTF8(sv) && out_is_utf8) { 573 SvUTF8_on(sv); 574 } 575 SvSETMAGIC(sv); 576 577 DEBUG_y(PerlIO_printf(Perl_debug_log, "%s: %d: returning %zu\n", 578 __FILE__, __LINE__, matches)); 579 DEBUG_y(sv_dump(sv)); 580 return matches; 581 } 582 583 /* Execute a tr//. sv is the value to be translated, while PL_op 584 * should be an OP_TRANS or OP_TRANSR op, whose op_pv field contains a 585 * translation table or whose op_sv field contains an inversion map. 586 * 587 * Returns a count of number of characters translated 588 */ 589 590 Size_t 591 Perl_do_trans(pTHX_ SV *sv) 592 { 593 STRLEN len; 594 const U8 flags = PL_op->op_private; 595 bool use_utf8_fcns = cBOOL(flags & OPpTRANS_USE_SVOP); 596 bool identical = cBOOL(flags & OPpTRANS_IDENTICAL); 597 598 PERL_ARGS_ASSERT_DO_TRANS; 599 600 if (SvREADONLY(sv) && ! identical) { 601 Perl_croak_no_modify(); 602 } 603 (void)SvPV_const(sv, len); 604 if (!len) 605 return 0; 606 if (! identical) { 607 if (!SvPOKp(sv) || SvTHINKFIRST(sv)) 608 (void)SvPV_force_nomg(sv, len); 609 (void)SvPOK_only_UTF8(sv); 610 } 611 612 if (use_utf8_fcns) { 613 SV* const map = 614 #ifdef USE_ITHREADS 615 PAD_SVl(cPADOP->op_padix); 616 #else 617 MUTABLE_SV(cSVOP->op_sv); 618 #endif 619 620 if (identical) { 621 return do_trans_count_invmap(sv, (AV *) map); 622 } 623 else { 624 return do_trans_invmap(sv, (AV *) map); 625 } 626 } 627 else { 628 const OPtrans_map * const map = (OPtrans_map*)cPVOP->op_pv; 629 630 if (identical) { 631 return do_trans_count(sv, map); 632 } 633 else if (flags & (OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 634 return do_trans_complex(sv, map); 635 } 636 else 637 return do_trans_simple(sv, map); 638 } 639 } 640 641 void 642 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) 643 { 644 SV ** const oldmark = mark; 645 I32 items = sp - mark; 646 STRLEN len; 647 STRLEN delimlen; 648 const char * const delims = SvPV_const(delim, delimlen); 649 650 PERL_ARGS_ASSERT_DO_JOIN; 651 652 mark++; 653 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 654 SvUPGRADE(sv, SVt_PV); 655 if (SvLEN(sv) < len + items) { /* current length is way too short */ 656 while (items-- > 0) { 657 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 658 STRLEN tmplen; 659 SvPV_const(*mark, tmplen); 660 len += tmplen; 661 } 662 mark++; 663 } 664 SvGROW(sv, len + 1); /* so try to pre-extend */ 665 666 mark = oldmark; 667 items = sp - mark; 668 ++mark; 669 } 670 671 SvPVCLEAR(sv); 672 /* sv_setpv retains old UTF8ness [perl #24846] */ 673 SvUTF8_off(sv); 674 675 if (TAINTING_get && SvMAGICAL(sv)) 676 SvTAINTED_off(sv); 677 678 if (items-- > 0) { 679 if (*mark) 680 sv_catsv(sv, *mark); 681 mark++; 682 } 683 684 if (delimlen) { 685 const U32 delimflag = DO_UTF8(delim) ? SV_CATUTF8 : SV_CATBYTES; 686 for (; items > 0; items--,mark++) { 687 STRLEN len; 688 const char *s; 689 sv_catpvn_flags(sv,delims,delimlen,delimflag); 690 s = SvPV_const(*mark,len); 691 sv_catpvn_flags(sv,s,len, 692 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 693 } 694 } 695 else { 696 for (; items > 0; items--,mark++) 697 { 698 STRLEN len; 699 const char *s = SvPV_const(*mark,len); 700 sv_catpvn_flags(sv,s,len, 701 DO_UTF8(*mark) ? SV_CATUTF8 : SV_CATBYTES); 702 } 703 } 704 SvSETMAGIC(sv); 705 } 706 707 void 708 Perl_do_sprintf(pTHX_ SV *sv, SSize_t len, SV **sarg) 709 { 710 STRLEN patlen; 711 const char * const pat = SvPV_const(*sarg, patlen); 712 bool do_taint = FALSE; 713 714 PERL_ARGS_ASSERT_DO_SPRINTF; 715 assert(len >= 1); 716 717 if (SvTAINTED(*sarg)) 718 TAINT_PROPER( 719 (PL_op && PL_op->op_type < OP_max) 720 ? (PL_op->op_type == OP_PRTF) 721 ? "printf" 722 : PL_op_name[PL_op->op_type] 723 : "(unknown)" 724 ); 725 SvUTF8_off(sv); 726 if (DO_UTF8(*sarg)) 727 SvUTF8_on(sv); 728 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, (Size_t)(len - 1), &do_taint); 729 SvSETMAGIC(sv); 730 if (do_taint) 731 SvTAINTED_on(sv); 732 } 733 734 /* currently converts input to bytes if possible, but doesn't sweat failure */ 735 UV 736 Perl_do_vecget(pTHX_ SV *sv, STRLEN offset, int size) 737 { 738 STRLEN srclen, len, avail, uoffset, bitoffs = 0; 739 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) 740 ? SV_UNDEF_RETURNS_NULL : 0); 741 unsigned char *s = (unsigned char *) 742 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); 743 UV retnum = 0; 744 745 if (!s) { 746 s = (unsigned char *)""; 747 } 748 749 PERL_ARGS_ASSERT_DO_VECGET; 750 751 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 752 Perl_croak(aTHX_ "Illegal number of bits in vec"); 753 754 if (SvUTF8(sv)) { 755 if (Perl_sv_utf8_downgrade_flags(aTHX_ sv, TRUE, 0)) { 756 /* PVX may have changed */ 757 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); 758 } 759 else { 760 Perl_croak(aTHX_ "Use of strings with code points over 0xFF as arguments to vec is forbidden"); 761 } 762 } 763 764 if (size < 8) { 765 bitoffs = ((offset%8)*size)%8; 766 uoffset = offset/(8/size); 767 } 768 else if (size > 8) { 769 int n = size/8; 770 if (offset > Size_t_MAX / n - 1) /* would overflow */ 771 return 0; 772 uoffset = offset*n; 773 } 774 else 775 uoffset = offset; 776 777 if (uoffset >= srclen) 778 return 0; 779 780 len = (bitoffs + size + 7)/8; /* required number of bytes */ 781 avail = srclen - uoffset; /* available number of bytes */ 782 783 /* Does the byte range overlap the end of the string? If so, 784 * handle specially. */ 785 if (avail < len) { 786 if (size <= 8) 787 retnum = 0; 788 else { 789 if (size == 16) { 790 assert(avail == 1); 791 retnum = (UV) s[uoffset] << 8; 792 } 793 else if (size == 32) { 794 assert(avail >= 1 && avail <= 3); 795 if (avail == 1) 796 retnum = 797 ((UV) s[uoffset ] << 24); 798 else if (avail == 2) 799 retnum = 800 ((UV) s[uoffset ] << 24) + 801 ((UV) s[uoffset + 1] << 16); 802 else 803 retnum = 804 ((UV) s[uoffset ] << 24) + 805 ((UV) s[uoffset + 1] << 16) + 806 ( s[uoffset + 2] << 8); 807 } 808 #ifdef UV_IS_QUAD 809 else if (size == 64) { 810 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 811 "Bit vector size > 32 non-portable"); 812 assert(avail >= 1 && avail <= 7); 813 if (avail == 1) 814 retnum = 815 (UV) s[uoffset ] << 56; 816 else if (avail == 2) 817 retnum = 818 ((UV) s[uoffset ] << 56) + 819 ((UV) s[uoffset + 1] << 48); 820 else if (avail == 3) 821 retnum = 822 ((UV) s[uoffset ] << 56) + 823 ((UV) s[uoffset + 1] << 48) + 824 ((UV) s[uoffset + 2] << 40); 825 else if (avail == 4) 826 retnum = 827 ((UV) s[uoffset ] << 56) + 828 ((UV) s[uoffset + 1] << 48) + 829 ((UV) s[uoffset + 2] << 40) + 830 ((UV) s[uoffset + 3] << 32); 831 else if (avail == 5) 832 retnum = 833 ((UV) s[uoffset ] << 56) + 834 ((UV) s[uoffset + 1] << 48) + 835 ((UV) s[uoffset + 2] << 40) + 836 ((UV) s[uoffset + 3] << 32) + 837 ((UV) s[uoffset + 4] << 24); 838 else if (avail == 6) 839 retnum = 840 ((UV) s[uoffset ] << 56) + 841 ((UV) s[uoffset + 1] << 48) + 842 ((UV) s[uoffset + 2] << 40) + 843 ((UV) s[uoffset + 3] << 32) + 844 ((UV) s[uoffset + 4] << 24) + 845 ((UV) s[uoffset + 5] << 16); 846 else 847 retnum = 848 ((UV) s[uoffset ] << 56) + 849 ((UV) s[uoffset + 1] << 48) + 850 ((UV) s[uoffset + 2] << 40) + 851 ((UV) s[uoffset + 3] << 32) + 852 ((UV) s[uoffset + 4] << 24) + 853 ((UV) s[uoffset + 5] << 16) + 854 ((UV) s[uoffset + 6] << 8); 855 } 856 #endif 857 } 858 } 859 else if (size < 8) 860 retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1); 861 else { 862 if (size == 8) 863 retnum = s[uoffset]; 864 else if (size == 16) 865 retnum = 866 ((UV) s[uoffset] << 8) + 867 s[uoffset + 1]; 868 else if (size == 32) 869 retnum = 870 ((UV) s[uoffset ] << 24) + 871 ((UV) s[uoffset + 1] << 16) + 872 ( s[uoffset + 2] << 8) + 873 s[uoffset + 3]; 874 #ifdef UV_IS_QUAD 875 else if (size == 64) { 876 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 877 "Bit vector size > 32 non-portable"); 878 retnum = 879 ((UV) s[uoffset ] << 56) + 880 ((UV) s[uoffset + 1] << 48) + 881 ((UV) s[uoffset + 2] << 40) + 882 ((UV) s[uoffset + 3] << 32) + 883 ((UV) s[uoffset + 4] << 24) + 884 ((UV) s[uoffset + 5] << 16) + 885 ( s[uoffset + 6] << 8) + 886 s[uoffset + 7]; 887 } 888 #endif 889 } 890 891 return retnum; 892 } 893 894 /* currently converts input to bytes if possible but doesn't sweat failures, 895 * although it does ensure that the string it clobbers is not marked as 896 * utf8-valid any more 897 */ 898 void 899 Perl_do_vecset(pTHX_ SV *sv) 900 { 901 STRLEN offset, bitoffs = 0; 902 int size; 903 unsigned char *s; 904 UV lval; 905 I32 mask; 906 STRLEN targlen; 907 STRLEN len; 908 SV * const targ = LvTARG(sv); 909 char errflags = LvFLAGS(sv); 910 911 PERL_ARGS_ASSERT_DO_VECSET; 912 913 /* some out-of-range errors have been deferred if/until the LV is 914 * actually written to: f(vec($s,-1,8)) is not always fatal */ 915 if (errflags) { 916 assert(!(errflags & ~(LVf_NEG_OFF|LVf_OUT_OF_RANGE))); 917 if (errflags & LVf_NEG_OFF) 918 Perl_croak_nocontext("Negative offset to vec in lvalue context"); 919 Perl_croak_nocontext("Out of memory!"); 920 } 921 922 if (!targ) 923 return; 924 s = (unsigned char*)SvPV_force_flags(targ, targlen, 925 SV_GMAGIC | SV_UNDEF_RETURNS_NULL); 926 if (SvUTF8(targ)) { 927 /* This is handled by the SvPOK_only below... 928 if (!Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0)) 929 SvUTF8_off(targ); 930 */ 931 (void) Perl_sv_utf8_downgrade_flags(aTHX_ targ, TRUE, 0); 932 } 933 934 (void)SvPOK_only(targ); 935 lval = SvUV(sv); 936 offset = LvTARGOFF(sv); 937 size = LvTARGLEN(sv); 938 939 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 940 Perl_croak(aTHX_ "Illegal number of bits in vec"); 941 942 if (size < 8) { 943 bitoffs = ((offset%8)*size)%8; 944 offset /= 8/size; 945 } 946 else if (size > 8) { 947 int n = size/8; 948 if (offset > Size_t_MAX / n - 1) /* would overflow */ 949 Perl_croak_nocontext("Out of memory!"); 950 offset *= n; 951 } 952 953 len = (bitoffs + size + 7)/8; /* required number of bytes */ 954 if (targlen < offset || targlen - offset < len) { 955 STRLEN newlen = offset > Size_t_MAX - len - 1 ? /* avoid overflow */ 956 Size_t_MAX : offset + len + 1; 957 s = (unsigned char*)SvGROW(targ, newlen); 958 (void)memzero((char *)(s + targlen), newlen - targlen); 959 SvCUR_set(targ, newlen - 1); 960 } 961 962 if (size < 8) { 963 mask = (1 << size) - 1; 964 lval &= mask; 965 s[offset] &= ~(mask << bitoffs); 966 s[offset] |= lval << bitoffs; 967 } 968 else { 969 if (size == 8) 970 s[offset ] = (U8)( lval & 0xff); 971 else if (size == 16) { 972 s[offset ] = (U8)((lval >> 8) & 0xff); 973 s[offset+1] = (U8)( lval & 0xff); 974 } 975 else if (size == 32) { 976 s[offset ] = (U8)((lval >> 24) & 0xff); 977 s[offset+1] = (U8)((lval >> 16) & 0xff); 978 s[offset+2] = (U8)((lval >> 8) & 0xff); 979 s[offset+3] = (U8)( lval & 0xff); 980 } 981 #ifdef UV_IS_QUAD 982 else if (size == 64) { 983 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 984 "Bit vector size > 32 non-portable"); 985 s[offset ] = (U8)((lval >> 56) & 0xff); 986 s[offset+1] = (U8)((lval >> 48) & 0xff); 987 s[offset+2] = (U8)((lval >> 40) & 0xff); 988 s[offset+3] = (U8)((lval >> 32) & 0xff); 989 s[offset+4] = (U8)((lval >> 24) & 0xff); 990 s[offset+5] = (U8)((lval >> 16) & 0xff); 991 s[offset+6] = (U8)((lval >> 8) & 0xff); 992 s[offset+7] = (U8)( lval & 0xff); 993 } 994 #endif 995 } 996 SvSETMAGIC(targ); 997 } 998 999 void 1000 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 1001 { 1002 long *dl; 1003 long *ll; 1004 long *rl; 1005 char *dc; 1006 STRLEN leftlen; 1007 STRLEN rightlen; 1008 const char *lc; 1009 const char *rc; 1010 STRLEN len = 0; 1011 STRLEN lensave; 1012 const char *lsave; 1013 const char *rsave; 1014 STRLEN needlen = 0; 1015 bool result_needs_to_be_utf8 = FALSE; 1016 bool left_utf8 = FALSE; 1017 bool right_utf8 = FALSE; 1018 U8 * left_non_downgraded = NULL; 1019 U8 * right_non_downgraded = NULL; 1020 Size_t left_non_downgraded_len = 0; 1021 Size_t right_non_downgraded_len = 0; 1022 char * non_downgraded = NULL; 1023 Size_t non_downgraded_len = 0; 1024 1025 PERL_ARGS_ASSERT_DO_VOP; 1026 1027 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv))) 1028 SvPVCLEAR(sv); /* avoid undef warning on |= and ^= */ 1029 if (sv == left) { 1030 lc = SvPV_force_nomg(left, leftlen); 1031 } 1032 else { 1033 lc = SvPV_nomg_const(left, leftlen); 1034 SvPV_force_nomg_nolen(sv); 1035 } 1036 rc = SvPV_nomg_const(right, rightlen); 1037 1038 /* This needs to come after SvPV to ensure that string overloading has 1039 fired off. */ 1040 1041 /* Create downgraded temporaries of any UTF-8 encoded operands */ 1042 if (DO_UTF8(left)) { 1043 const U8 * save_lc = (U8 *) lc; 1044 1045 left_utf8 = TRUE; 1046 result_needs_to_be_utf8 = TRUE; 1047 1048 left_non_downgraded_len = leftlen; 1049 lc = (char *) bytes_from_utf8_loc((const U8 *) lc, &leftlen, 1050 &left_utf8, 1051 (const U8 **) &left_non_downgraded); 1052 /* Calculate the number of trailing unconvertible bytes. This quantity 1053 * is the original length minus the length of the converted portion. */ 1054 left_non_downgraded_len -= left_non_downgraded - save_lc; 1055 SAVEFREEPV(lc); 1056 } 1057 if (DO_UTF8(right)) { 1058 const U8 * save_rc = (U8 *) rc; 1059 1060 right_utf8 = TRUE; 1061 result_needs_to_be_utf8 = TRUE; 1062 1063 right_non_downgraded_len = rightlen; 1064 rc = (char *) bytes_from_utf8_loc((const U8 *) rc, &rightlen, 1065 &right_utf8, 1066 (const U8 **) &right_non_downgraded); 1067 right_non_downgraded_len -= right_non_downgraded - save_rc; 1068 SAVEFREEPV(rc); 1069 } 1070 1071 /* We set 'len' to the length that the operation actually operates on. The 1072 * dangling part of the longer operand doesn't actually participate in the 1073 * operation. What happens is that we pretend that the shorter operand has 1074 * been extended to the right by enough imaginary zeros to match the length 1075 * of the longer one. But we know in advance the result of the operation 1076 * on zeros without having to do it. In the case of '&', the result is 1077 * zero, and the dangling portion is simply discarded. For '|' and '^', the 1078 * result is the same as the other operand, so the dangling part is just 1079 * appended to the final result, unchanged. As of perl-5.32, we no longer 1080 * accept above-FF code points in the dangling portion. 1081 */ 1082 if (left_utf8 || right_utf8) { 1083 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[optype]); 1084 } 1085 else { /* Neither is UTF-8 */ 1086 len = MIN(leftlen, rightlen); 1087 } 1088 1089 lensave = len; 1090 lsave = lc; 1091 rsave = rc; 1092 1093 (void)SvPOK_only(sv); 1094 if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1095 dc = SvPV_force_nomg_nolen(sv); 1096 if (SvLEN(sv) < len + 1) { 1097 dc = SvGROW(sv, len + 1); 1098 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1099 } 1100 } 1101 else { 1102 needlen = optype == OP_BIT_AND 1103 ? len : (leftlen > rightlen ? leftlen : rightlen); 1104 Newxz(dc, needlen + 1, char); 1105 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL); 1106 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1107 } 1108 SvCUR_set(sv, len); 1109 1110 if (len >= sizeof(long)*4 && 1111 !(PTR2nat(dc) % sizeof(long)) && 1112 !(PTR2nat(lc) % sizeof(long)) && 1113 !(PTR2nat(rc) % sizeof(long))) /* It's almost always aligned... */ 1114 { 1115 const STRLEN remainder = len % (sizeof(long)*4); 1116 len /= (sizeof(long)*4); 1117 1118 dl = (long*)dc; 1119 ll = (long*)lc; 1120 rl = (long*)rc; 1121 1122 switch (optype) { 1123 case OP_BIT_AND: 1124 while (len--) { 1125 *dl++ = *ll++ & *rl++; 1126 *dl++ = *ll++ & *rl++; 1127 *dl++ = *ll++ & *rl++; 1128 *dl++ = *ll++ & *rl++; 1129 } 1130 break; 1131 case OP_BIT_XOR: 1132 while (len--) { 1133 *dl++ = *ll++ ^ *rl++; 1134 *dl++ = *ll++ ^ *rl++; 1135 *dl++ = *ll++ ^ *rl++; 1136 *dl++ = *ll++ ^ *rl++; 1137 } 1138 break; 1139 case OP_BIT_OR: 1140 while (len--) { 1141 *dl++ = *ll++ | *rl++; 1142 *dl++ = *ll++ | *rl++; 1143 *dl++ = *ll++ | *rl++; 1144 *dl++ = *ll++ | *rl++; 1145 } 1146 } 1147 1148 dc = (char*)dl; 1149 lc = (char*)ll; 1150 rc = (char*)rl; 1151 1152 len = remainder; 1153 } 1154 1155 switch (optype) { 1156 case OP_BIT_AND: 1157 while (len--) 1158 *dc++ = *lc++ & *rc++; 1159 *dc = '\0'; 1160 break; 1161 case OP_BIT_XOR: 1162 while (len--) 1163 *dc++ = *lc++ ^ *rc++; 1164 goto mop_up; 1165 case OP_BIT_OR: 1166 while (len--) 1167 *dc++ = *lc++ | *rc++; 1168 mop_up: 1169 len = lensave; 1170 if (rightlen > len) { 1171 if (dc == rc) 1172 SvCUR_set(sv, rightlen); 1173 else 1174 sv_catpvn_nomg(sv, rsave + len, rightlen - len); 1175 } 1176 else if (leftlen > len) { 1177 if (dc == lc) 1178 SvCUR_set(sv, leftlen); 1179 else 1180 sv_catpvn_nomg(sv, lsave + len, leftlen - len); 1181 } 1182 *SvEND(sv) = '\0'; 1183 1184 /* If there is trailing stuff that couldn't be converted from UTF-8, it 1185 * is appended as-is for the ^ and | operators. This preserves 1186 * backwards compatibility */ 1187 if (right_non_downgraded) { 1188 non_downgraded = (char *) right_non_downgraded; 1189 non_downgraded_len = right_non_downgraded_len; 1190 } 1191 else if (left_non_downgraded) { 1192 non_downgraded = (char *) left_non_downgraded; 1193 non_downgraded_len = left_non_downgraded_len; 1194 } 1195 1196 break; 1197 } 1198 1199 if (result_needs_to_be_utf8) { 1200 sv_utf8_upgrade_nomg(sv); 1201 1202 /* Append any trailing UTF-8 as-is. */ 1203 if (non_downgraded) { 1204 sv_catpvn_nomg(sv, non_downgraded, non_downgraded_len); 1205 } 1206 } 1207 1208 SvTAINT(sv); 1209 } 1210 1211 1212 /* Perl_do_kv() may be: 1213 * * called directly as the pp function for pp_keys() and pp_values(); 1214 * * It may also be called directly when the op is OP_AVHVSWITCH, to 1215 * implement CORE::keys(), CORE::values(). 1216 * 1217 * In all cases it expects an HV on the stack and returns a list of keys, 1218 * values, or key-value pairs, depending on PL_op. 1219 */ 1220 1221 OP * 1222 Perl_do_kv(pTHX) 1223 { 1224 dSP; 1225 HV * const keys = MUTABLE_HV(POPs); 1226 const U8 gimme = GIMME_V; 1227 1228 const I32 dokeys = (PL_op->op_type == OP_KEYS) 1229 || ( PL_op->op_type == OP_AVHVSWITCH 1230 && (PL_op->op_private & OPpAVHVSWITCH_MASK) 1231 + OP_EACH == OP_KEYS); 1232 1233 const I32 dovalues = (PL_op->op_type == OP_VALUES) 1234 || ( PL_op->op_type == OP_AVHVSWITCH 1235 && (PL_op->op_private & OPpAVHVSWITCH_MASK) 1236 + OP_EACH == OP_VALUES); 1237 1238 assert( PL_op->op_type == OP_KEYS 1239 || PL_op->op_type == OP_VALUES 1240 || PL_op->op_type == OP_AVHVSWITCH); 1241 1242 assert(!( PL_op->op_type == OP_VALUES 1243 && (PL_op->op_private & OPpMAYBE_LVSUB))); 1244 1245 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1246 1247 if (gimme == G_VOID) 1248 RETURN; 1249 1250 if (gimme == G_SCALAR) { 1251 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1252 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 1253 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0); 1254 LvTYPE(ret) = 'k'; 1255 LvTARG(ret) = SvREFCNT_inc_simple(keys); 1256 PUSHs(ret); 1257 } 1258 else { 1259 IV i; 1260 dTARGET; 1261 1262 /* note that in 'scalar(keys %h)' the OP_KEYS is usually 1263 * optimised away and the action is performed directly by the 1264 * padhv or rv2hv op. We now only get here via OP_AVHVSWITCH 1265 * and \&CORE::keys 1266 */ 1267 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) { 1268 i = HvUSEDKEYS(keys); 1269 } 1270 else { 1271 i = 0; 1272 while (hv_iternext(keys)) i++; 1273 } 1274 PUSHi( i ); 1275 } 1276 RETURN; 1277 } 1278 1279 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) { 1280 const I32 flags = is_lvalue_sub(); 1281 if (flags && !(flags & OPpENTERSUB_INARGS)) 1282 /* diag_listed_as: Can't modify %s in %s */ 1283 Perl_croak(aTHX_ "Can't modify keys in list assignment"); 1284 } 1285 1286 PUTBACK; 1287 hv_pushkv(keys, (dokeys | (dovalues << 1))); 1288 return NORMAL; 1289 } 1290 1291 /* 1292 * ex: set ts=8 sts=4 sw=4 et: 1293 */ 1294