1 /* doop.c 2 * 3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 4 * 2001, 2002, 2004, 2005, 2006, 2007, 2008, 2009 by Larry Wall and others 5 * 6 * You may distribute under the terms of either the GNU General Public 7 * License or the Artistic License, as specified in the README file. 8 * 9 */ 10 11 /* 12 * 'So that was the job I felt I had to do when I started,' thought Sam. 13 * 14 * [p.934 of _The Lord of the Rings_, VI/iii: "Mount Doom"] 15 */ 16 17 /* This file contains some common functions needed to carry out certain 18 * ops. For example, both pp_sprintf() and pp_prtf() call the function 19 * do_printf() found in this file. 20 */ 21 22 #include "EXTERN.h" 23 #define PERL_IN_DOOP_C 24 #include "perl.h" 25 26 #ifndef PERL_MICRO 27 #include <signal.h> 28 #endif 29 30 STATIC I32 31 S_do_trans_simple(pTHX_ SV * const sv) 32 { 33 dVAR; 34 I32 matches = 0; 35 STRLEN len; 36 U8 *s = (U8*)SvPV_nomg(sv,len); 37 U8 * const send = s+len; 38 const short * const tbl = (short*)cPVOP->op_pv; 39 40 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE; 41 42 if (!tbl) 43 Perl_croak(aTHX_ "panic: do_trans_simple line %d",__LINE__); 44 45 /* First, take care of non-UTF-8 input strings, because they're easy */ 46 if (!SvUTF8(sv)) { 47 while (s < send) { 48 const I32 ch = tbl[*s]; 49 if (ch >= 0) { 50 matches++; 51 *s = (U8)ch; 52 } 53 s++; 54 } 55 SvSETMAGIC(sv); 56 } 57 else { 58 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 59 U8 *d; 60 U8 *dstart; 61 62 /* Allow for expansion: $_="a".chr(400); tr/a/\xFE/, FE needs encoding */ 63 if (grows) 64 Newx(d, len*2+1, U8); 65 else 66 d = s; 67 dstart = d; 68 while (s < send) { 69 STRLEN ulen; 70 I32 ch; 71 72 /* Need to check this, otherwise 128..255 won't match */ 73 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 74 if (c < 0x100 && (ch = tbl[c]) >= 0) { 75 matches++; 76 d = uvchr_to_utf8(d, ch); 77 s += ulen; 78 } 79 else { /* No match -> copy */ 80 Move(s, d, ulen, U8); 81 d += ulen; 82 s += ulen; 83 } 84 } 85 if (grows) { 86 sv_setpvn(sv, (char*)dstart, d - dstart); 87 Safefree(dstart); 88 } 89 else { 90 *d = '\0'; 91 SvCUR_set(sv, d - dstart); 92 } 93 SvUTF8_on(sv); 94 SvSETMAGIC(sv); 95 } 96 return matches; 97 } 98 99 STATIC I32 100 S_do_trans_count(pTHX_ SV * const sv) 101 { 102 dVAR; 103 STRLEN len; 104 const U8 *s = (const U8*)SvPV_nomg_const(sv, len); 105 const U8 * const send = s + len; 106 I32 matches = 0; 107 const short * const tbl = (short*)cPVOP->op_pv; 108 109 PERL_ARGS_ASSERT_DO_TRANS_COUNT; 110 111 if (!tbl) 112 Perl_croak(aTHX_ "panic: do_trans_count line %d",__LINE__); 113 114 if (!SvUTF8(sv)) { 115 while (s < send) { 116 if (tbl[*s++] >= 0) 117 matches++; 118 } 119 } 120 else { 121 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 122 while (s < send) { 123 STRLEN ulen; 124 const UV c = utf8n_to_uvchr(s, send - s, &ulen, UTF8_ALLOW_DEFAULT); 125 if (c < 0x100) { 126 if (tbl[c] >= 0) 127 matches++; 128 } else if (complement) 129 matches++; 130 s += ulen; 131 } 132 } 133 134 return matches; 135 } 136 137 STATIC I32 138 S_do_trans_complex(pTHX_ SV * const sv) 139 { 140 dVAR; 141 STRLEN len; 142 U8 *s = (U8*)SvPV_nomg(sv, len); 143 U8 * const send = s+len; 144 I32 matches = 0; 145 const short * const tbl = (short*)cPVOP->op_pv; 146 147 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX; 148 149 if (!tbl) 150 Perl_croak(aTHX_ "panic: do_trans_complex line %d",__LINE__); 151 152 if (!SvUTF8(sv)) { 153 U8 *d = s; 154 U8 * const dstart = d; 155 156 if (PL_op->op_private & OPpTRANS_SQUASH) { 157 const U8* p = send; 158 while (s < send) { 159 const I32 ch = tbl[*s]; 160 if (ch >= 0) { 161 *d = (U8)ch; 162 matches++; 163 if (p != d - 1 || *p != *d) 164 p = d++; 165 } 166 else if (ch == -1) /* -1 is unmapped character */ 167 *d++ = *s; 168 else if (ch == -2) /* -2 is delete character */ 169 matches++; 170 s++; 171 } 172 } 173 else { 174 while (s < send) { 175 const I32 ch = tbl[*s]; 176 if (ch >= 0) { 177 matches++; 178 *d++ = (U8)ch; 179 } 180 else if (ch == -1) /* -1 is unmapped character */ 181 *d++ = *s; 182 else if (ch == -2) /* -2 is delete character */ 183 matches++; 184 s++; 185 } 186 } 187 *d = '\0'; 188 SvCUR_set(sv, d - dstart); 189 } 190 else { /* is utf8 */ 191 const I32 complement = PL_op->op_private & OPpTRANS_COMPLEMENT; 192 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 193 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 194 U8 *d; 195 U8 *dstart; 196 STRLEN rlen = 0; 197 198 if (grows) 199 Newx(d, len*2+1, U8); 200 else 201 d = s; 202 dstart = d; 203 if (complement && !del) 204 rlen = tbl[0x100]; 205 206 if (PL_op->op_private & OPpTRANS_SQUASH) { 207 UV pch = 0xfeedface; 208 while (s < send) { 209 STRLEN len; 210 const UV comp = utf8n_to_uvchr(s, send - s, &len, 211 UTF8_ALLOW_DEFAULT); 212 I32 ch; 213 214 if (comp > 0xff) { 215 if (!complement) { 216 Move(s, d, len, U8); 217 d += len; 218 } 219 else { 220 matches++; 221 if (!del) { 222 ch = (rlen == 0) ? (I32)comp : 223 (comp - 0x100 < rlen) ? 224 tbl[comp+1] : tbl[0x100+rlen]; 225 if ((UV)ch != pch) { 226 d = uvchr_to_utf8(d, ch); 227 pch = (UV)ch; 228 } 229 s += len; 230 continue; 231 } 232 } 233 } 234 else if ((ch = tbl[comp]) >= 0) { 235 matches++; 236 if ((UV)ch != pch) { 237 d = uvchr_to_utf8(d, ch); 238 pch = (UV)ch; 239 } 240 s += len; 241 continue; 242 } 243 else if (ch == -1) { /* -1 is unmapped character */ 244 Move(s, d, len, U8); 245 d += len; 246 } 247 else if (ch == -2) /* -2 is delete character */ 248 matches++; 249 s += len; 250 pch = 0xfeedface; 251 } 252 } 253 else { 254 while (s < send) { 255 STRLEN len; 256 const UV comp = utf8n_to_uvchr(s, send - s, &len, 257 UTF8_ALLOW_DEFAULT); 258 I32 ch; 259 if (comp > 0xff) { 260 if (!complement) { 261 Move(s, d, len, U8); 262 d += len; 263 } 264 else { 265 matches++; 266 if (!del) { 267 if (comp - 0x100 < rlen) 268 d = uvchr_to_utf8(d, tbl[comp+1]); 269 else 270 d = uvchr_to_utf8(d, tbl[0x100+rlen]); 271 } 272 } 273 } 274 else if ((ch = tbl[comp]) >= 0) { 275 d = uvchr_to_utf8(d, ch); 276 matches++; 277 } 278 else if (ch == -1) { /* -1 is unmapped character */ 279 Move(s, d, len, U8); 280 d += len; 281 } 282 else if (ch == -2) /* -2 is delete character */ 283 matches++; 284 s += len; 285 } 286 } 287 if (grows) { 288 sv_setpvn(sv, (char*)dstart, d - dstart); 289 Safefree(dstart); 290 } 291 else { 292 *d = '\0'; 293 SvCUR_set(sv, d - dstart); 294 } 295 SvUTF8_on(sv); 296 } 297 SvSETMAGIC(sv); 298 return matches; 299 } 300 301 STATIC I32 302 S_do_trans_simple_utf8(pTHX_ SV * const sv) 303 { 304 dVAR; 305 U8 *s; 306 U8 *send; 307 U8 *d; 308 U8 *start; 309 U8 *dstart, *dend; 310 I32 matches = 0; 311 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 312 STRLEN len; 313 SV* const rv = 314 #ifdef USE_ITHREADS 315 PAD_SVl(cPADOP->op_padix); 316 #else 317 MUTABLE_SV(cSVOP->op_sv); 318 #endif 319 HV* const hv = MUTABLE_HV(SvRV(rv)); 320 SV* const * svp = hv_fetchs(hv, "NONE", FALSE); 321 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 322 const UV extra = none + 1; 323 UV final = 0; 324 U8 hibit = 0; 325 326 PERL_ARGS_ASSERT_DO_TRANS_SIMPLE_UTF8; 327 328 s = (U8*)SvPV_nomg(sv, len); 329 if (!SvUTF8(sv)) { 330 const U8 *t = s; 331 const U8 * const e = s + len; 332 while (t < e) { 333 const U8 ch = *t++; 334 hibit = !NATIVE_BYTE_IS_INVARIANT(ch); 335 if (hibit) { 336 s = bytes_to_utf8(s, &len); 337 break; 338 } 339 } 340 } 341 send = s + len; 342 start = s; 343 344 svp = hv_fetchs(hv, "FINAL", FALSE); 345 if (svp) 346 final = SvUV(*svp); 347 348 if (grows) { 349 /* d needs to be bigger than s, in case e.g. upgrading is required */ 350 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 351 dend = d + len * 3; 352 dstart = d; 353 } 354 else { 355 dstart = d = s; 356 dend = d + len; 357 } 358 359 while (s < send) { 360 const UV uv = swash_fetch(rv, s, TRUE); 361 if (uv < none) { 362 s += UTF8SKIP(s); 363 matches++; 364 d = uvchr_to_utf8(d, uv); 365 } 366 else if (uv == none) { 367 const int i = UTF8SKIP(s); 368 Move(s, d, i, U8); 369 d += i; 370 s += i; 371 } 372 else if (uv == extra) { 373 s += UTF8SKIP(s); 374 matches++; 375 d = uvchr_to_utf8(d, final); 376 } 377 else 378 s += UTF8SKIP(s); 379 380 if (d > dend) { 381 const STRLEN clen = d - dstart; 382 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 383 if (!grows) 384 Perl_croak(aTHX_ "panic: do_trans_simple_utf8 line %d",__LINE__); 385 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 386 d = dstart + clen; 387 dend = dstart + nlen; 388 } 389 } 390 if (grows || hibit) { 391 sv_setpvn(sv, (char*)dstart, d - dstart); 392 Safefree(dstart); 393 if (grows && hibit) 394 Safefree(start); 395 } 396 else { 397 *d = '\0'; 398 SvCUR_set(sv, d - dstart); 399 } 400 SvSETMAGIC(sv); 401 SvUTF8_on(sv); 402 403 return matches; 404 } 405 406 STATIC I32 407 S_do_trans_count_utf8(pTHX_ SV * const sv) 408 { 409 dVAR; 410 const U8 *s; 411 const U8 *start = NULL; 412 const U8 *send; 413 I32 matches = 0; 414 STRLEN len; 415 SV* const rv = 416 #ifdef USE_ITHREADS 417 PAD_SVl(cPADOP->op_padix); 418 #else 419 MUTABLE_SV(cSVOP->op_sv); 420 #endif 421 HV* const hv = MUTABLE_HV(SvRV(rv)); 422 SV* const * const svp = hv_fetchs(hv, "NONE", FALSE); 423 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 424 const UV extra = none + 1; 425 U8 hibit = 0; 426 427 PERL_ARGS_ASSERT_DO_TRANS_COUNT_UTF8; 428 429 s = (const U8*)SvPV_nomg_const(sv, len); 430 if (!SvUTF8(sv)) { 431 const U8 *t = s; 432 const U8 * const e = s + len; 433 while (t < e) { 434 const U8 ch = *t++; 435 hibit = !NATIVE_BYTE_IS_INVARIANT(ch); 436 if (hibit) { 437 start = s = bytes_to_utf8(s, &len); 438 break; 439 } 440 } 441 } 442 send = s + len; 443 444 while (s < send) { 445 const UV uv = swash_fetch(rv, s, TRUE); 446 if (uv < none || uv == extra) 447 matches++; 448 s += UTF8SKIP(s); 449 } 450 if (hibit) 451 Safefree(start); 452 453 return matches; 454 } 455 456 STATIC I32 457 S_do_trans_complex_utf8(pTHX_ SV * const sv) 458 { 459 dVAR; 460 U8 *start, *send; 461 U8 *d; 462 I32 matches = 0; 463 const I32 squash = PL_op->op_private & OPpTRANS_SQUASH; 464 const I32 del = PL_op->op_private & OPpTRANS_DELETE; 465 const I32 grows = PL_op->op_private & OPpTRANS_GROWS; 466 SV* const rv = 467 #ifdef USE_ITHREADS 468 PAD_SVl(cPADOP->op_padix); 469 #else 470 MUTABLE_SV(cSVOP->op_sv); 471 #endif 472 HV * const hv = MUTABLE_HV(SvRV(rv)); 473 SV * const *svp = hv_fetchs(hv, "NONE", FALSE); 474 const UV none = svp ? SvUV(*svp) : 0x7fffffff; 475 const UV extra = none + 1; 476 UV final = 0; 477 bool havefinal = FALSE; 478 STRLEN len; 479 U8 *dstart, *dend; 480 U8 hibit = 0; 481 U8 *s = (U8*)SvPV_nomg(sv, len); 482 483 PERL_ARGS_ASSERT_DO_TRANS_COMPLEX_UTF8; 484 485 if (!SvUTF8(sv)) { 486 const U8 *t = s; 487 const U8 * const e = s + len; 488 while (t < e) { 489 const U8 ch = *t++; 490 hibit = !NATIVE_BYTE_IS_INVARIANT(ch); 491 if (hibit) { 492 s = bytes_to_utf8(s, &len); 493 break; 494 } 495 } 496 } 497 send = s + len; 498 start = s; 499 500 svp = hv_fetchs(hv, "FINAL", FALSE); 501 if (svp) { 502 final = SvUV(*svp); 503 havefinal = TRUE; 504 } 505 506 if (grows) { 507 /* d needs to be bigger than s, in case e.g. upgrading is required */ 508 Newx(d, len * 3 + UTF8_MAXBYTES, U8); 509 dend = d + len * 3; 510 dstart = d; 511 } 512 else { 513 dstart = d = s; 514 dend = d + len; 515 } 516 517 if (squash) { 518 UV puv = 0xfeedface; 519 while (s < send) { 520 UV uv = swash_fetch(rv, s, TRUE); 521 522 if (d > dend) { 523 const STRLEN clen = d - dstart; 524 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 525 if (!grows) 526 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 527 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 528 d = dstart + clen; 529 dend = dstart + nlen; 530 } 531 if (uv < none) { 532 matches++; 533 s += UTF8SKIP(s); 534 if (uv != puv) { 535 d = uvchr_to_utf8(d, uv); 536 puv = uv; 537 } 538 continue; 539 } 540 else if (uv == none) { /* "none" is unmapped character */ 541 const int i = UTF8SKIP(s); 542 Move(s, d, i, U8); 543 d += i; 544 s += i; 545 puv = 0xfeedface; 546 continue; 547 } 548 else if (uv == extra && !del) { 549 matches++; 550 if (havefinal) { 551 s += UTF8SKIP(s); 552 if (puv != final) { 553 d = uvchr_to_utf8(d, final); 554 puv = final; 555 } 556 } 557 else { 558 STRLEN len; 559 uv = utf8n_to_uvchr(s, send - s, &len, UTF8_ALLOW_DEFAULT); 560 if (uv != puv) { 561 Move(s, d, len, U8); 562 d += len; 563 puv = uv; 564 } 565 s += len; 566 } 567 continue; 568 } 569 matches++; /* "none+1" is delete character */ 570 s += UTF8SKIP(s); 571 } 572 } 573 else { 574 while (s < send) { 575 const UV uv = swash_fetch(rv, s, TRUE); 576 if (d > dend) { 577 const STRLEN clen = d - dstart; 578 const STRLEN nlen = dend - dstart + len + UTF8_MAXBYTES; 579 if (!grows) 580 Perl_croak(aTHX_ "panic: do_trans_complex_utf8 line %d",__LINE__); 581 Renew(dstart, nlen + UTF8_MAXBYTES, U8); 582 d = dstart + clen; 583 dend = dstart + nlen; 584 } 585 if (uv < none) { 586 matches++; 587 s += UTF8SKIP(s); 588 d = uvchr_to_utf8(d, uv); 589 continue; 590 } 591 else if (uv == none) { /* "none" is unmapped character */ 592 const int i = UTF8SKIP(s); 593 Move(s, d, i, U8); 594 d += i; 595 s += i; 596 continue; 597 } 598 else if (uv == extra && !del) { 599 matches++; 600 s += UTF8SKIP(s); 601 d = uvchr_to_utf8(d, final); 602 continue; 603 } 604 matches++; /* "none+1" is delete character */ 605 s += UTF8SKIP(s); 606 } 607 } 608 if (grows || hibit) { 609 sv_setpvn(sv, (char*)dstart, d - dstart); 610 Safefree(dstart); 611 if (grows && hibit) 612 Safefree(start); 613 } 614 else { 615 *d = '\0'; 616 SvCUR_set(sv, d - dstart); 617 } 618 SvUTF8_on(sv); 619 SvSETMAGIC(sv); 620 621 return matches; 622 } 623 624 I32 625 Perl_do_trans(pTHX_ SV *sv) 626 { 627 dVAR; 628 STRLEN len; 629 const I32 hasutf = (PL_op->op_private & 630 (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF)); 631 632 PERL_ARGS_ASSERT_DO_TRANS; 633 634 if (SvREADONLY(sv) && !(PL_op->op_private & OPpTRANS_IDENTICAL)) { 635 Perl_croak_no_modify(); 636 } 637 (void)SvPV_const(sv, len); 638 if (!len) 639 return 0; 640 if (!(PL_op->op_private & OPpTRANS_IDENTICAL)) { 641 if (!SvPOKp(sv) || SvTHINKFIRST(sv)) 642 (void)SvPV_force_nomg(sv, len); 643 (void)SvPOK_only_UTF8(sv); 644 } 645 646 DEBUG_t( Perl_deb(aTHX_ "2.TBL\n")); 647 648 switch (PL_op->op_private & ~hasutf & ( 649 OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF|OPpTRANS_IDENTICAL| 650 OPpTRANS_SQUASH|OPpTRANS_DELETE|OPpTRANS_COMPLEMENT)) { 651 case 0: 652 if (hasutf) 653 return do_trans_simple_utf8(sv); 654 else 655 return do_trans_simple(sv); 656 657 case OPpTRANS_IDENTICAL: 658 case OPpTRANS_IDENTICAL|OPpTRANS_COMPLEMENT: 659 if (hasutf) 660 return do_trans_count_utf8(sv); 661 else 662 return do_trans_count(sv); 663 664 default: 665 if (hasutf) 666 return do_trans_complex_utf8(sv); 667 else 668 return do_trans_complex(sv); 669 } 670 } 671 672 void 673 Perl_do_join(pTHX_ SV *sv, SV *delim, SV **mark, SV **sp) 674 { 675 dVAR; 676 SV ** const oldmark = mark; 677 I32 items = sp - mark; 678 STRLEN len; 679 STRLEN delimlen; 680 681 PERL_ARGS_ASSERT_DO_JOIN; 682 683 (void) SvPV_const(delim, delimlen); /* stringify and get the delimlen */ 684 /* SvCUR assumes it's SvPOK() and woe betide you if it's not. */ 685 686 mark++; 687 len = (items > 0 ? (delimlen * (items - 1) ) : 0); 688 SvUPGRADE(sv, SVt_PV); 689 if (SvLEN(sv) < len + items) { /* current length is way too short */ 690 while (items-- > 0) { 691 if (*mark && !SvGAMAGIC(*mark) && SvOK(*mark)) { 692 STRLEN tmplen; 693 SvPV_const(*mark, tmplen); 694 len += tmplen; 695 } 696 mark++; 697 } 698 SvGROW(sv, len + 1); /* so try to pre-extend */ 699 700 mark = oldmark; 701 items = sp - mark; 702 ++mark; 703 } 704 705 sv_setpvs(sv, ""); 706 /* sv_setpv retains old UTF8ness [perl #24846] */ 707 SvUTF8_off(sv); 708 709 if (TAINTING_get && SvMAGICAL(sv)) 710 SvTAINTED_off(sv); 711 712 if (items-- > 0) { 713 if (*mark) 714 sv_catsv(sv, *mark); 715 mark++; 716 } 717 718 if (delimlen) { 719 for (; items > 0; items--,mark++) { 720 sv_catsv_nomg(sv,delim); 721 sv_catsv(sv,*mark); 722 } 723 } 724 else { 725 for (; items > 0; items--,mark++) 726 sv_catsv(sv,*mark); 727 } 728 SvSETMAGIC(sv); 729 } 730 731 void 732 Perl_do_sprintf(pTHX_ SV *sv, I32 len, SV **sarg) 733 { 734 dVAR; 735 STRLEN patlen; 736 const char * const pat = SvPV_const(*sarg, patlen); 737 bool do_taint = FALSE; 738 739 PERL_ARGS_ASSERT_DO_SPRINTF; 740 741 if (SvTAINTED(*sarg)) 742 TAINT_PROPER( 743 (PL_op && PL_op->op_type < OP_max) 744 ? (PL_op->op_type == OP_PRTF) 745 ? "printf" 746 : PL_op_name[PL_op->op_type] 747 : "(unknown)" 748 ); 749 SvUTF8_off(sv); 750 if (DO_UTF8(*sarg)) 751 SvUTF8_on(sv); 752 sv_vsetpvfn(sv, pat, patlen, NULL, sarg + 1, len - 1, &do_taint); 753 SvSETMAGIC(sv); 754 if (do_taint) 755 SvTAINTED_on(sv); 756 } 757 758 /* currently converts input to bytes if possible, but doesn't sweat failure */ 759 UV 760 Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) 761 { 762 dVAR; 763 STRLEN srclen, len, uoffset, bitoffs = 0; 764 const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) 765 ? SV_UNDEF_RETURNS_NULL : 0); 766 unsigned char *s = (unsigned char *) 767 SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); 768 UV retnum = 0; 769 770 if (!s) { 771 s = (unsigned char *)""; 772 } 773 774 PERL_ARGS_ASSERT_DO_VECGET; 775 776 if (offset < 0) 777 return 0; 778 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 779 Perl_croak(aTHX_ "Illegal number of bits in vec"); 780 781 if (SvUTF8(sv)) { 782 (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); 783 /* PVX may have changed */ 784 s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); 785 } 786 787 if (size < 8) { 788 bitoffs = ((offset%8)*size)%8; 789 uoffset = offset/(8/size); 790 } 791 else if (size > 8) 792 uoffset = offset*(size/8); 793 else 794 uoffset = offset; 795 796 len = uoffset + (bitoffs + size + 7)/8; /* required number of bytes */ 797 if (len > srclen) { 798 if (size <= 8) 799 retnum = 0; 800 else { 801 if (size == 16) { 802 if (uoffset >= srclen) 803 retnum = 0; 804 else 805 retnum = (UV) s[uoffset] << 8; 806 } 807 else if (size == 32) { 808 if (uoffset >= srclen) 809 retnum = 0; 810 else if (uoffset + 1 >= srclen) 811 retnum = 812 ((UV) s[uoffset ] << 24); 813 else if (uoffset + 2 >= srclen) 814 retnum = 815 ((UV) s[uoffset ] << 24) + 816 ((UV) s[uoffset + 1] << 16); 817 else 818 retnum = 819 ((UV) s[uoffset ] << 24) + 820 ((UV) s[uoffset + 1] << 16) + 821 ( s[uoffset + 2] << 8); 822 } 823 #ifdef UV_IS_QUAD 824 else if (size == 64) { 825 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 826 "Bit vector size > 32 non-portable"); 827 if (uoffset >= srclen) 828 retnum = 0; 829 else if (uoffset + 1 >= srclen) 830 retnum = 831 (UV) s[uoffset ] << 56; 832 else if (uoffset + 2 >= srclen) 833 retnum = 834 ((UV) s[uoffset ] << 56) + 835 ((UV) s[uoffset + 1] << 48); 836 else if (uoffset + 3 >= srclen) 837 retnum = 838 ((UV) s[uoffset ] << 56) + 839 ((UV) s[uoffset + 1] << 48) + 840 ((UV) s[uoffset + 2] << 40); 841 else if (uoffset + 4 >= srclen) 842 retnum = 843 ((UV) s[uoffset ] << 56) + 844 ((UV) s[uoffset + 1] << 48) + 845 ((UV) s[uoffset + 2] << 40) + 846 ((UV) s[uoffset + 3] << 32); 847 else if (uoffset + 5 >= srclen) 848 retnum = 849 ((UV) s[uoffset ] << 56) + 850 ((UV) s[uoffset + 1] << 48) + 851 ((UV) s[uoffset + 2] << 40) + 852 ((UV) s[uoffset + 3] << 32) + 853 ( s[uoffset + 4] << 24); 854 else if (uoffset + 6 >= srclen) 855 retnum = 856 ((UV) s[uoffset ] << 56) + 857 ((UV) s[uoffset + 1] << 48) + 858 ((UV) s[uoffset + 2] << 40) + 859 ((UV) s[uoffset + 3] << 32) + 860 ((UV) s[uoffset + 4] << 24) + 861 ((UV) s[uoffset + 5] << 16); 862 else 863 retnum = 864 ((UV) s[uoffset ] << 56) + 865 ((UV) s[uoffset + 1] << 48) + 866 ((UV) s[uoffset + 2] << 40) + 867 ((UV) s[uoffset + 3] << 32) + 868 ((UV) s[uoffset + 4] << 24) + 869 ((UV) s[uoffset + 5] << 16) + 870 ( s[uoffset + 6] << 8); 871 } 872 #endif 873 } 874 } 875 else if (size < 8) 876 retnum = (s[uoffset] >> bitoffs) & ((1 << size) - 1); 877 else { 878 if (size == 8) 879 retnum = s[uoffset]; 880 else if (size == 16) 881 retnum = 882 ((UV) s[uoffset] << 8) + 883 s[uoffset + 1]; 884 else if (size == 32) 885 retnum = 886 ((UV) s[uoffset ] << 24) + 887 ((UV) s[uoffset + 1] << 16) + 888 ( s[uoffset + 2] << 8) + 889 s[uoffset + 3]; 890 #ifdef UV_IS_QUAD 891 else if (size == 64) { 892 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 893 "Bit vector size > 32 non-portable"); 894 retnum = 895 ((UV) s[uoffset ] << 56) + 896 ((UV) s[uoffset + 1] << 48) + 897 ((UV) s[uoffset + 2] << 40) + 898 ((UV) s[uoffset + 3] << 32) + 899 ((UV) s[uoffset + 4] << 24) + 900 ((UV) s[uoffset + 5] << 16) + 901 ( s[uoffset + 6] << 8) + 902 s[uoffset + 7]; 903 } 904 #endif 905 } 906 907 return retnum; 908 } 909 910 /* currently converts input to bytes if possible but doesn't sweat failures, 911 * although it does ensure that the string it clobbers is not marked as 912 * utf8-valid any more 913 */ 914 void 915 Perl_do_vecset(pTHX_ SV *sv) 916 { 917 dVAR; 918 SSize_t offset, bitoffs = 0; 919 int size; 920 unsigned char *s; 921 UV lval; 922 I32 mask; 923 STRLEN targlen; 924 STRLEN len; 925 SV * const targ = LvTARG(sv); 926 927 PERL_ARGS_ASSERT_DO_VECSET; 928 929 if (!targ) 930 return; 931 s = (unsigned char*)SvPV_force_flags(targ, targlen, 932 SV_GMAGIC | SV_UNDEF_RETURNS_NULL); 933 if (SvUTF8(targ)) { 934 /* This is handled by the SvPOK_only below... 935 if (!Perl_sv_utf8_downgrade(aTHX_ targ, TRUE)) 936 SvUTF8_off(targ); 937 */ 938 (void) Perl_sv_utf8_downgrade(aTHX_ targ, TRUE); 939 } 940 941 (void)SvPOK_only(targ); 942 lval = SvUV(sv); 943 offset = LvTARGOFF(sv); 944 if (offset < 0) 945 Perl_croak(aTHX_ "Negative offset to vec in lvalue context"); 946 size = LvTARGLEN(sv); 947 if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ 948 Perl_croak(aTHX_ "Illegal number of bits in vec"); 949 950 if (size < 8) { 951 bitoffs = ((offset%8)*size)%8; 952 offset /= 8/size; 953 } 954 else if (size > 8) 955 offset *= size/8; 956 957 len = offset + (bitoffs + size + 7)/8; /* required number of bytes */ 958 if (len > targlen) { 959 s = (unsigned char*)SvGROW(targ, len + 1); 960 (void)memzero((char *)(s + targlen), len - targlen + 1); 961 SvCUR_set(targ, len); 962 } 963 964 if (size < 8) { 965 mask = (1 << size) - 1; 966 lval &= mask; 967 s[offset] &= ~(mask << bitoffs); 968 s[offset] |= lval << bitoffs; 969 } 970 else { 971 if (size == 8) 972 s[offset ] = (U8)( lval & 0xff); 973 else if (size == 16) { 974 s[offset ] = (U8)((lval >> 8) & 0xff); 975 s[offset+1] = (U8)( lval & 0xff); 976 } 977 else if (size == 32) { 978 s[offset ] = (U8)((lval >> 24) & 0xff); 979 s[offset+1] = (U8)((lval >> 16) & 0xff); 980 s[offset+2] = (U8)((lval >> 8) & 0xff); 981 s[offset+3] = (U8)( lval & 0xff); 982 } 983 #ifdef UV_IS_QUAD 984 else if (size == 64) { 985 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE), 986 "Bit vector size > 32 non-portable"); 987 s[offset ] = (U8)((lval >> 56) & 0xff); 988 s[offset+1] = (U8)((lval >> 48) & 0xff); 989 s[offset+2] = (U8)((lval >> 40) & 0xff); 990 s[offset+3] = (U8)((lval >> 32) & 0xff); 991 s[offset+4] = (U8)((lval >> 24) & 0xff); 992 s[offset+5] = (U8)((lval >> 16) & 0xff); 993 s[offset+6] = (U8)((lval >> 8) & 0xff); 994 s[offset+7] = (U8)( lval & 0xff); 995 } 996 #endif 997 } 998 SvSETMAGIC(targ); 999 } 1000 1001 void 1002 Perl_do_vop(pTHX_ I32 optype, SV *sv, SV *left, SV *right) 1003 { 1004 dVAR; 1005 #ifdef LIBERAL 1006 long *dl; 1007 long *ll; 1008 long *rl; 1009 #endif 1010 char *dc; 1011 STRLEN leftlen; 1012 STRLEN rightlen; 1013 const char *lc; 1014 const char *rc; 1015 STRLEN len; 1016 STRLEN lensave; 1017 const char *lsave; 1018 const char *rsave; 1019 bool left_utf; 1020 bool right_utf; 1021 STRLEN needlen = 0; 1022 1023 PERL_ARGS_ASSERT_DO_VOP; 1024 1025 if (sv != left || (optype != OP_BIT_AND && !SvOK(sv))) 1026 sv_setpvs(sv, ""); /* avoid undef warning on |= and ^= */ 1027 if (sv == left) { 1028 lsave = lc = SvPV_force_nomg(left, leftlen); 1029 } 1030 else { 1031 lsave = lc = SvPV_nomg_const(left, leftlen); 1032 SvPV_force_nomg_nolen(sv); 1033 } 1034 rsave = rc = SvPV_nomg_const(right, rightlen); 1035 1036 /* This need to come after SvPV to ensure that string overloading has 1037 fired off. */ 1038 1039 left_utf = DO_UTF8(left); 1040 right_utf = DO_UTF8(right); 1041 1042 if (left_utf && !right_utf) { 1043 /* Avoid triggering overloading again by using temporaries. 1044 Maybe there should be a variant of sv_utf8_upgrade that takes pvn 1045 */ 1046 right = newSVpvn_flags(rsave, rightlen, SVs_TEMP); 1047 sv_utf8_upgrade(right); 1048 rsave = rc = SvPV_nomg_const(right, rightlen); 1049 right_utf = TRUE; 1050 } 1051 else if (!left_utf && right_utf) { 1052 left = newSVpvn_flags(lsave, leftlen, SVs_TEMP); 1053 sv_utf8_upgrade(left); 1054 lsave = lc = SvPV_nomg_const(left, leftlen); 1055 left_utf = TRUE; 1056 } 1057 1058 len = leftlen < rightlen ? leftlen : rightlen; 1059 lensave = len; 1060 SvCUR_set(sv, len); 1061 (void)SvPOK_only(sv); 1062 if ((left_utf || right_utf) && (sv == left || sv == right)) { 1063 needlen = optype == OP_BIT_AND ? len : leftlen + rightlen; 1064 Newxz(dc, needlen + 1, char); 1065 } 1066 else if (SvOK(sv) || SvTYPE(sv) > SVt_PVMG) { 1067 dc = SvPV_force_nomg_nolen(sv); 1068 if (SvLEN(sv) < len + 1) { 1069 dc = SvGROW(sv, len + 1); 1070 (void)memzero(dc + SvCUR(sv), len - SvCUR(sv) + 1); 1071 } 1072 if (optype != OP_BIT_AND && (left_utf || right_utf)) 1073 dc = SvGROW(sv, leftlen + rightlen + 1); 1074 } 1075 else { 1076 needlen = optype == OP_BIT_AND 1077 ? len : (leftlen > rightlen ? leftlen : rightlen); 1078 Newxz(dc, needlen + 1, char); 1079 sv_usepvn_flags(sv, dc, needlen, SV_HAS_TRAILING_NUL); 1080 dc = SvPVX(sv); /* sv_usepvn() calls Renew() */ 1081 } 1082 if (left_utf || right_utf) { 1083 UV duc, luc, ruc; 1084 char *dcorig = dc; 1085 char *dcsave = NULL; 1086 STRLEN lulen = leftlen; 1087 STRLEN rulen = rightlen; 1088 STRLEN ulen; 1089 1090 switch (optype) { 1091 case OP_BIT_AND: 1092 while (lulen && rulen) { 1093 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1094 lc += ulen; 1095 lulen -= ulen; 1096 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1097 rc += ulen; 1098 rulen -= ulen; 1099 duc = luc & ruc; 1100 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1101 } 1102 if (sv == left || sv == right) 1103 (void)sv_usepvn(sv, dcorig, needlen); 1104 SvCUR_set(sv, dc - dcorig); 1105 break; 1106 case OP_BIT_XOR: 1107 while (lulen && rulen) { 1108 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1109 lc += ulen; 1110 lulen -= ulen; 1111 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1112 rc += ulen; 1113 rulen -= ulen; 1114 duc = luc ^ ruc; 1115 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1116 } 1117 goto mop_up_utf; 1118 case OP_BIT_OR: 1119 while (lulen && rulen) { 1120 luc = utf8n_to_uvchr((U8*)lc, lulen, &ulen, UTF8_ALLOW_ANYUV); 1121 lc += ulen; 1122 lulen -= ulen; 1123 ruc = utf8n_to_uvchr((U8*)rc, rulen, &ulen, UTF8_ALLOW_ANYUV); 1124 rc += ulen; 1125 rulen -= ulen; 1126 duc = luc | ruc; 1127 dc = (char*)uvchr_to_utf8((U8*)dc, duc); 1128 } 1129 mop_up_utf: 1130 if (rulen) 1131 dcsave = savepvn(rc, rulen); 1132 else if (lulen) 1133 dcsave = savepvn(lc, lulen); 1134 if (sv == left || sv == right) 1135 (void)sv_usepvn(sv, dcorig, needlen); /* uses Renew(); defaults to nomg */ 1136 SvCUR_set(sv, dc - dcorig); 1137 if (rulen) 1138 sv_catpvn_nomg(sv, dcsave, rulen); 1139 else if (lulen) 1140 sv_catpvn_nomg(sv, dcsave, lulen); 1141 else 1142 *SvEND(sv) = '\0'; 1143 Safefree(dcsave); 1144 break; 1145 default: 1146 if (sv == left || sv == right) 1147 Safefree(dcorig); 1148 Perl_croak(aTHX_ "panic: do_vop called for op %u (%s)", 1149 (unsigned)optype, PL_op_name[optype]); 1150 } 1151 SvUTF8_on(sv); 1152 goto finish; 1153 } 1154 else 1155 #ifdef LIBERAL 1156 if (len >= sizeof(long)*4 && 1157 !((unsigned long)dc % sizeof(long)) && 1158 !((unsigned long)lc % sizeof(long)) && 1159 !((unsigned long)rc % sizeof(long))) /* It's almost always aligned... */ 1160 { 1161 const STRLEN remainder = len % (sizeof(long)*4); 1162 len /= (sizeof(long)*4); 1163 1164 dl = (long*)dc; 1165 ll = (long*)lc; 1166 rl = (long*)rc; 1167 1168 switch (optype) { 1169 case OP_BIT_AND: 1170 while (len--) { 1171 *dl++ = *ll++ & *rl++; 1172 *dl++ = *ll++ & *rl++; 1173 *dl++ = *ll++ & *rl++; 1174 *dl++ = *ll++ & *rl++; 1175 } 1176 break; 1177 case OP_BIT_XOR: 1178 while (len--) { 1179 *dl++ = *ll++ ^ *rl++; 1180 *dl++ = *ll++ ^ *rl++; 1181 *dl++ = *ll++ ^ *rl++; 1182 *dl++ = *ll++ ^ *rl++; 1183 } 1184 break; 1185 case OP_BIT_OR: 1186 while (len--) { 1187 *dl++ = *ll++ | *rl++; 1188 *dl++ = *ll++ | *rl++; 1189 *dl++ = *ll++ | *rl++; 1190 *dl++ = *ll++ | *rl++; 1191 } 1192 } 1193 1194 dc = (char*)dl; 1195 lc = (char*)ll; 1196 rc = (char*)rl; 1197 1198 len = remainder; 1199 } 1200 #endif 1201 { 1202 switch (optype) { 1203 case OP_BIT_AND: 1204 while (len--) 1205 *dc++ = *lc++ & *rc++; 1206 *dc = '\0'; 1207 break; 1208 case OP_BIT_XOR: 1209 while (len--) 1210 *dc++ = *lc++ ^ *rc++; 1211 goto mop_up; 1212 case OP_BIT_OR: 1213 while (len--) 1214 *dc++ = *lc++ | *rc++; 1215 mop_up: 1216 len = lensave; 1217 if (rightlen > len) 1218 sv_catpvn_nomg(sv, rsave + len, rightlen - len); 1219 else if (leftlen > (STRLEN)len) 1220 sv_catpvn_nomg(sv, lsave + len, leftlen - len); 1221 else 1222 *SvEND(sv) = '\0'; 1223 break; 1224 } 1225 } 1226 finish: 1227 SvTAINT(sv); 1228 } 1229 1230 OP * 1231 Perl_do_kv(pTHX) 1232 { 1233 dVAR; 1234 dSP; 1235 HV * const keys = MUTABLE_HV(POPs); 1236 HE *entry; 1237 const I32 gimme = GIMME_V; 1238 const I32 dokv = (PL_op->op_type == OP_RV2HV || PL_op->op_type == OP_PADHV); 1239 /* op_type is OP_RKEYS/OP_RVALUES if pp_rkeys delegated to here */ 1240 const I32 dokeys = dokv || (PL_op->op_type == OP_KEYS || PL_op->op_type == OP_RKEYS); 1241 const I32 dovalues = dokv || (PL_op->op_type == OP_VALUES || PL_op->op_type == OP_RVALUES); 1242 1243 (void)hv_iterinit(keys); /* always reset iterator regardless */ 1244 1245 if (gimme == G_VOID) 1246 RETURN; 1247 1248 if (gimme == G_SCALAR) { 1249 if (PL_op->op_flags & OPf_MOD || LVRET) { /* lvalue */ 1250 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ 1251 sv_magic(ret, NULL, PERL_MAGIC_nkeys, NULL, 0); 1252 LvTYPE(ret) = 'k'; 1253 LvTARG(ret) = SvREFCNT_inc_simple(keys); 1254 PUSHs(ret); 1255 } 1256 else { 1257 IV i; 1258 dTARGET; 1259 1260 if (! SvTIED_mg((const SV *)keys, PERL_MAGIC_tied) ) { 1261 i = HvUSEDKEYS(keys); 1262 } 1263 else { 1264 i = 0; 1265 while (hv_iternext(keys)) i++; 1266 } 1267 PUSHi( i ); 1268 } 1269 RETURN; 1270 } 1271 1272 EXTEND(SP, HvUSEDKEYS(keys) * (dokeys + dovalues)); 1273 1274 PUTBACK; /* hv_iternext and hv_iterval might clobber stack_sp */ 1275 while ((entry = hv_iternext(keys))) { 1276 SPAGAIN; 1277 if (dokeys) { 1278 SV* const sv = hv_iterkeysv(entry); 1279 XPUSHs(sv); /* won't clobber stack_sp */ 1280 } 1281 if (dovalues) { 1282 SV *tmpstr; 1283 PUTBACK; 1284 tmpstr = hv_iterval(keys,entry); 1285 DEBUG_H(Perl_sv_setpvf(aTHX_ tmpstr, "%lu%%%d=%lu", 1286 (unsigned long)HeHASH(entry), 1287 (int)HvMAX(keys)+1, 1288 (unsigned long)(HeHASH(entry) & HvMAX(keys)))); 1289 SPAGAIN; 1290 XPUSHs(tmpstr); 1291 } 1292 PUTBACK; 1293 } 1294 return NORMAL; 1295 } 1296 1297 /* 1298 * Local variables: 1299 * c-indentation-style: bsd 1300 * c-basic-offset: 4 1301 * indent-tabs-mode: nil 1302 * End: 1303 * 1304 * ex: set ts=8 sts=4 sw=4 et: 1305 */ 1306