1 /* pp.c
2 *
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
5 *
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
8 *
9 */
10
11 /*
12 * 'It's a big house this, and very peculiar. Always a bit more
13 * to discover, and no knowing what you'll find round a corner.
14 * And Elves, sir!' --Samwise Gamgee
15 *
16 * [p.225 of _The Lord of the Rings_, II/i: "Many Meetings"]
17 */
18
19 /* This file contains general pp ("push/pop") functions that execute the
20 * opcodes that make up a perl program. A typical pp function expects to
21 * find its arguments on the stack, and usually pushes its results onto
22 * the stack, hence the 'pp' terminology. Each OP structure contains
23 * a pointer to the relevant pp_foo() function.
24 */
25
26 #include "EXTERN.h"
27 #define PERL_IN_PP_C
28 #include "perl.h"
29 #include "keywords.h"
30
31 #include "invlist_inline.h"
32 #include "reentr.h"
33 #include "regcharclass.h"
34
35 /* variations on pp_null */
36
PP(pp_stub)37 PP(pp_stub)
38 {
39 dSP;
40 if (GIMME_V == G_SCALAR)
41 XPUSHs(&PL_sv_undef);
42 RETURN;
43 }
44
45 /* Pushy stuff. */
46
47
48
PP(pp_padcv)49 PP(pp_padcv)
50 {
51 dSP; dTARGET;
52 assert(SvTYPE(TARG) == SVt_PVCV);
53 XPUSHs(TARG);
54 RETURN;
55 }
56
PP(pp_introcv)57 PP(pp_introcv)
58 {
59 dTARGET;
60 SvPADSTALE_off(TARG);
61 return NORMAL;
62 }
63
PP(pp_clonecv)64 PP(pp_clonecv)
65 {
66 dTARGET;
67 CV * const protocv = PadnamePROTOCV(
68 PadlistNAMESARRAY(CvPADLIST(find_runcv(NULL)))[ARGTARG]
69 );
70 assert(SvTYPE(TARG) == SVt_PVCV);
71 assert(protocv);
72 if (CvISXSUB(protocv)) { /* constant */
73 /* XXX Should we clone it here? */
74 /* If this changes to use SAVECLEARSV, we can move the SAVECLEARSV
75 to introcv and remove the SvPADSTALE_off. */
76 SAVEPADSVANDMORTALIZE(ARGTARG);
77 PAD_SVl(ARGTARG) = SvREFCNT_inc_simple_NN(protocv);
78 }
79 else {
80 if (CvROOT(protocv)) {
81 assert(CvCLONE(protocv));
82 assert(!CvCLONED(protocv));
83 }
84 cv_clone_into(protocv,(CV *)TARG);
85 SAVECLEARSV(PAD_SVl(ARGTARG));
86 }
87 return NORMAL;
88 }
89
90 /* Translations. */
91
92 /* In some cases this function inspects PL_op. If this function is called
93 for new op types, more bool parameters may need to be added in place of
94 the checks.
95
96 When noinit is true, the absence of a gv will cause a retval of undef.
97 This is unrelated to the cv-to-gv assignment case.
98 */
99
100 static SV *
S_rv2gv(pTHX_ SV * sv,const bool vivify_sv,const bool strict,const bool noinit)101 S_rv2gv(pTHX_ SV *sv, const bool vivify_sv, const bool strict,
102 const bool noinit)
103 {
104 if (!isGV(sv) || SvFAKE(sv)) SvGETMAGIC(sv);
105 if (SvROK(sv)) {
106 if (SvAMAGIC(sv)) {
107 sv = amagic_deref_call(sv, to_gv_amg);
108 }
109 wasref:
110 sv = SvRV(sv);
111 if (SvTYPE(sv) == SVt_PVIO) {
112 GV * const gv = MUTABLE_GV(sv_newmortal());
113 gv_init(gv, 0, "__ANONIO__", 10, 0);
114 GvIOp(gv) = MUTABLE_IO(sv);
115 SvREFCNT_inc_void_NN(sv);
116 sv = MUTABLE_SV(gv);
117 }
118 else if (!isGV_with_GP(sv)) {
119 Perl_die(aTHX_ "Not a GLOB reference");
120 }
121 }
122 else {
123 if (!isGV_with_GP(sv)) {
124 if (!SvOK(sv)) {
125 /* If this is a 'my' scalar and flag is set then vivify
126 * NI-S 1999/05/07
127 */
128 if (vivify_sv && sv != &PL_sv_undef) {
129 GV *gv;
130 HV *stash;
131 if (SvREADONLY(sv))
132 Perl_croak_no_modify();
133 gv = MUTABLE_GV(newSV(0));
134 stash = CopSTASH(PL_curcop);
135 if (SvTYPE(stash) != SVt_PVHV) stash = NULL;
136 if (cUNOP->op_targ) {
137 SV * const namesv = PAD_SV(cUNOP->op_targ);
138 gv_init_sv(gv, stash, namesv, 0);
139 }
140 else {
141 gv_init_pv(gv, stash, "__ANONIO__", 0);
142 }
143 sv_setrv_noinc_mg(sv, MUTABLE_SV(gv));
144 goto wasref;
145 }
146 if (PL_op->op_flags & OPf_REF || strict) {
147 Perl_die(aTHX_ PL_no_usym, "a symbol");
148 }
149 if (ckWARN(WARN_UNINITIALIZED))
150 report_uninit(sv);
151 return &PL_sv_undef;
152 }
153 if (noinit)
154 {
155 if (!(sv = MUTABLE_SV(gv_fetchsv_nomg(
156 sv, GV_ADDMG, SVt_PVGV
157 ))))
158 return &PL_sv_undef;
159 }
160 else {
161 if (strict) {
162 Perl_die(aTHX_
163 PL_no_symref_sv,
164 sv,
165 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""),
166 "a symbol"
167 );
168 }
169 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpDONT_INIT_GV))
170 == OPpDONT_INIT_GV) {
171 /* We are the target of a coderef assignment. Return
172 the scalar unchanged, and let pp_sasssign deal with
173 things. */
174 return sv;
175 }
176 sv = MUTABLE_SV(gv_fetchsv_nomg(sv, GV_ADD, SVt_PVGV));
177 }
178 /* FAKE globs in the symbol table cause weird bugs (#77810) */
179 SvFAKE_off(sv);
180 }
181 }
182 if (SvFAKE(sv) && !(PL_op->op_private & OPpALLOW_FAKE)) {
183 SV *newsv = sv_newmortal();
184 sv_setsv_flags(newsv, sv, 0);
185 SvFAKE_off(newsv);
186 sv = newsv;
187 }
188 return sv;
189 }
190
PP(pp_rv2gv)191 PP(pp_rv2gv)
192 {
193 dSP; dTOPss;
194
195 sv = S_rv2gv(aTHX_
196 sv, PL_op->op_private & OPpDEREF,
197 PL_op->op_private & HINT_STRICT_REFS,
198 ((PL_op->op_flags & OPf_SPECIAL) && !(PL_op->op_flags & OPf_MOD))
199 || PL_op->op_type == OP_READLINE
200 );
201 if (PL_op->op_private & OPpLVAL_INTRO)
202 save_gp(MUTABLE_GV(sv), !(PL_op->op_flags & OPf_SPECIAL));
203 SETs(sv);
204 RETURN;
205 }
206
207 /* Helper function for pp_rv2sv and pp_rv2av */
208 GV *
Perl_softref2xv(pTHX_ SV * const sv,const char * const what,const svtype type,SV *** spp)209 Perl_softref2xv(pTHX_ SV *const sv, const char *const what,
210 const svtype type, SV ***spp)
211 {
212 GV *gv;
213
214 PERL_ARGS_ASSERT_SOFTREF2XV;
215
216 if (PL_op->op_private & HINT_STRICT_REFS) {
217 if (SvOK(sv))
218 Perl_die(aTHX_ PL_no_symref_sv, sv,
219 (SvPOKp(sv) && SvCUR(sv)>32 ? "..." : ""), what);
220 else
221 Perl_die(aTHX_ PL_no_usym, what);
222 }
223 if (!SvOK(sv)) {
224 if (
225 PL_op->op_flags & OPf_REF
226 )
227 Perl_die(aTHX_ PL_no_usym, what);
228 if (ckWARN(WARN_UNINITIALIZED))
229 report_uninit(sv);
230 if (type != SVt_PV && GIMME_V == G_LIST) {
231 (*spp)--;
232 return NULL;
233 }
234 **spp = &PL_sv_undef;
235 return NULL;
236 }
237 if ((PL_op->op_flags & OPf_SPECIAL) &&
238 !(PL_op->op_flags & OPf_MOD))
239 {
240 if (!(gv = gv_fetchsv_nomg(sv, GV_ADDMG, type)))
241 {
242 **spp = &PL_sv_undef;
243 return NULL;
244 }
245 }
246 else {
247 gv = gv_fetchsv_nomg(sv, GV_ADD, type);
248 }
249 return gv;
250 }
251
PP(pp_rv2sv)252 PP(pp_rv2sv)
253 {
254 dSP; dTOPss;
255 GV *gv = NULL;
256
257 SvGETMAGIC(sv);
258 if (SvROK(sv)) {
259 if (SvAMAGIC(sv)) {
260 sv = amagic_deref_call(sv, to_sv_amg);
261 }
262
263 sv = SvRV(sv);
264 if (SvTYPE(sv) >= SVt_PVAV)
265 DIE(aTHX_ "Not a SCALAR reference");
266 }
267 else {
268 gv = MUTABLE_GV(sv);
269
270 if (!isGV_with_GP(gv)) {
271 gv = Perl_softref2xv(aTHX_ sv, "a SCALAR", SVt_PV, &sp);
272 if (!gv)
273 RETURN;
274 }
275 sv = GvSVn(gv);
276 }
277 if (PL_op->op_flags & OPf_MOD) {
278 if (PL_op->op_private & OPpLVAL_INTRO) {
279 if (cUNOP->op_first->op_type == OP_NULL)
280 sv = save_scalar(MUTABLE_GV(TOPs));
281 else if (gv)
282 sv = save_scalar(gv);
283 else
284 Perl_croak(aTHX_ "%s", PL_no_localize_ref);
285 }
286 else if (PL_op->op_private & OPpDEREF)
287 sv = vivify_ref(sv, PL_op->op_private & OPpDEREF);
288 }
289 SPAGAIN; /* in case chasing soft refs reallocated the stack */
290 SETs(sv);
291 RETURN;
292 }
293
PP(pp_av2arylen)294 PP(pp_av2arylen)
295 {
296 dSP;
297 AV * const av = MUTABLE_AV(TOPs);
298 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
299 if (lvalue) {
300 SV ** const svp = Perl_av_arylen_p(aTHX_ MUTABLE_AV(av));
301 if (!*svp) {
302 *svp = newSV_type(SVt_PVMG);
303 sv_magic(*svp, MUTABLE_SV(av), PERL_MAGIC_arylen, NULL, 0);
304 }
305 SETs(*svp);
306 } else {
307 SETs(sv_2mortal(newSViv(AvFILL(MUTABLE_AV(av)))));
308 }
309 RETURN;
310 }
311
PP(pp_pos)312 PP(pp_pos)
313 {
314 dSP; dTOPss;
315
316 if (PL_op->op_flags & OPf_MOD || LVRET) {
317 SV * const ret = sv_2mortal(newSV_type(SVt_PVLV));/* Not TARG RT#67838 */
318 sv_magic(ret, NULL, PERL_MAGIC_pos, NULL, 0);
319 LvTYPE(ret) = '.';
320 LvTARG(ret) = SvREFCNT_inc_simple(sv);
321 SETs(ret); /* no SvSETMAGIC */
322 }
323 else {
324 const MAGIC * const mg = mg_find_mglob(sv);
325 if (mg && mg->mg_len != -1) {
326 STRLEN i = mg->mg_len;
327 if (PL_op->op_private & OPpTRUEBOOL)
328 SETs(i ? &PL_sv_yes : &PL_sv_zero);
329 else {
330 dTARGET;
331 if (mg->mg_flags & MGf_BYTES && DO_UTF8(sv))
332 i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
333 SETu(i);
334 }
335 return NORMAL;
336 }
337 SETs(&PL_sv_undef);
338 }
339 return NORMAL;
340 }
341
PP(pp_rv2cv)342 PP(pp_rv2cv)
343 {
344 dSP;
345 GV *gv;
346 HV *stash_unused;
347 const I32 flags = (PL_op->op_flags & OPf_SPECIAL)
348 ? GV_ADDMG
349 : ((PL_op->op_private & (OPpLVAL_INTRO|OPpMAY_RETURN_CONSTANT))
350 == OPpMAY_RETURN_CONSTANT)
351 ? GV_ADD|GV_NOEXPAND
352 : GV_ADD;
353 /* We usually try to add a non-existent subroutine in case of AUTOLOAD. */
354 /* (But not in defined().) */
355
356 CV *cv = sv_2cv(TOPs, &stash_unused, &gv, flags);
357 if (cv) NOOP;
358 else if ((flags == (GV_ADD|GV_NOEXPAND)) && gv && SvROK(gv)) {
359 cv = SvTYPE(SvRV(gv)) == SVt_PVCV
360 ? MUTABLE_CV(SvRV(gv))
361 : MUTABLE_CV(gv);
362 }
363 else
364 cv = MUTABLE_CV(&PL_sv_undef);
365 SETs(MUTABLE_SV(cv));
366 return NORMAL;
367 }
368
PP(pp_prototype)369 PP(pp_prototype)
370 {
371 dSP;
372 CV *cv;
373 HV *stash;
374 GV *gv;
375 SV *ret = &PL_sv_undef;
376
377 if (SvGMAGICAL(TOPs)) SETs(sv_mortalcopy(TOPs));
378 if (SvPOK(TOPs) && SvCUR(TOPs) >= 7) {
379 const char * s = SvPVX_const(TOPs);
380 if (memBEGINs(s, SvCUR(TOPs), "CORE::")) {
381 const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
382 if (!code)
383 DIE(aTHX_ "Can't find an opnumber for \"%" UTF8f "\"",
384 UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
385 {
386 SV * const sv = core_prototype(NULL, s + 6, code, NULL);
387 if (sv) ret = sv;
388 }
389 goto set;
390 }
391 }
392 cv = sv_2cv(TOPs, &stash, &gv, 0);
393 if (cv && SvPOK(cv))
394 ret = newSVpvn_flags(
395 CvPROTO(cv), CvPROTOLEN(cv), SVs_TEMP | SvUTF8(cv)
396 );
397 set:
398 SETs(ret);
399 RETURN;
400 }
401
PP(pp_anoncode)402 PP(pp_anoncode)
403 {
404 dSP;
405 CV *cv = MUTABLE_CV(PAD_SV(PL_op->op_targ));
406 if (CvCLONE(cv))
407 cv = MUTABLE_CV(sv_2mortal(MUTABLE_SV(cv_clone(cv))));
408 EXTEND(SP,1);
409 PUSHs(MUTABLE_SV(cv));
410 RETURN;
411 }
412
PP(pp_srefgen)413 PP(pp_srefgen)
414 {
415 dSP;
416 *SP = refto(*SP);
417 return NORMAL;
418 }
419
PP(pp_refgen)420 PP(pp_refgen)
421 {
422 dSP; dMARK;
423 if (GIMME_V != G_LIST) {
424 if (++MARK <= SP)
425 *MARK = *SP;
426 else
427 {
428 MEXTEND(SP, 1);
429 *MARK = &PL_sv_undef;
430 }
431 *MARK = refto(*MARK);
432 SP = MARK;
433 RETURN;
434 }
435 EXTEND_MORTAL(SP - MARK);
436 while (++MARK <= SP)
437 *MARK = refto(*MARK);
438 RETURN;
439 }
440
441 STATIC SV*
S_refto(pTHX_ SV * sv)442 S_refto(pTHX_ SV *sv)
443 {
444 SV* rv;
445
446 PERL_ARGS_ASSERT_REFTO;
447
448 if (SvTYPE(sv) == SVt_PVLV && LvTYPE(sv) == 'y') {
449 if (LvTARGLEN(sv))
450 vivify_defelem(sv);
451 if (!(sv = LvTARG(sv)))
452 sv = &PL_sv_undef;
453 else
454 SvREFCNT_inc_void_NN(sv);
455 }
456 else if (SvTYPE(sv) == SVt_PVAV) {
457 if (!AvREAL((const AV *)sv) && AvREIFY((const AV *)sv))
458 av_reify(MUTABLE_AV(sv));
459 SvTEMP_off(sv);
460 SvREFCNT_inc_void_NN(sv);
461 }
462 else if (SvPADTMP(sv)) {
463 sv = newSVsv(sv);
464 }
465 else if (UNLIKELY(SvSMAGICAL(sv) && mg_find(sv, PERL_MAGIC_nonelem)))
466 sv_unmagic(SvREFCNT_inc_simple_NN(sv), PERL_MAGIC_nonelem);
467 else {
468 SvTEMP_off(sv);
469 SvREFCNT_inc_void_NN(sv);
470 }
471 rv = sv_newmortal();
472 sv_setrv_noinc(rv, sv);
473 return rv;
474 }
475
PP(pp_ref)476 PP(pp_ref)
477 {
478 dSP;
479 SV * const sv = TOPs;
480
481 SvGETMAGIC(sv);
482 if (!SvROK(sv)) {
483 SETs(&PL_sv_no);
484 return NORMAL;
485 }
486
487 /* op is in boolean context? */
488 if ( (PL_op->op_private & OPpTRUEBOOL)
489 || ( (PL_op->op_private & OPpMAYBE_TRUEBOOL)
490 && block_gimme() == G_VOID))
491 {
492 /* refs are always true - unless it's to an object blessed into a
493 * class with a false name, i.e. "0". So we have to check for
494 * that remote possibility. The following is is basically an
495 * unrolled SvTRUE(sv_reftype(rv)) */
496 SV * const rv = SvRV(sv);
497 if (SvOBJECT(rv)) {
498 HV *stash = SvSTASH(rv);
499 HEK *hek = HvNAME_HEK(stash);
500 if (hek) {
501 I32 len = HEK_LEN(hek);
502 /* bail out and do it the hard way? */
503 if (UNLIKELY(
504 len == HEf_SVKEY
505 || (len == 1 && HEK_KEY(hek)[0] == '0')
506 ))
507 goto do_sv_ref;
508 }
509 }
510 SETs(&PL_sv_yes);
511 return NORMAL;
512 }
513
514 do_sv_ref:
515 {
516 dTARGET;
517 SETs(TARG);
518 sv_ref(TARG, SvRV(sv), TRUE);
519 SvSETMAGIC(TARG);
520 return NORMAL;
521 }
522
523 }
524
525
PP(pp_bless)526 PP(pp_bless)
527 {
528 dSP;
529 HV *stash;
530
531 if (MAXARG == 1)
532 {
533 curstash:
534 stash = CopSTASH(PL_curcop);
535 if (SvTYPE(stash) != SVt_PVHV)
536 Perl_croak(aTHX_ "Attempt to bless into a freed package");
537 }
538 else {
539 SV * const ssv = POPs;
540 STRLEN len;
541 const char *ptr;
542
543 if (!ssv) goto curstash;
544 SvGETMAGIC(ssv);
545 if (SvROK(ssv)) {
546 if (!SvAMAGIC(ssv)) {
547 frog:
548 Perl_croak(aTHX_ "Attempt to bless into a reference");
549 }
550 /* SvAMAGIC is on here, but it only means potentially overloaded,
551 so after stringification: */
552 ptr = SvPV_nomg_const(ssv,len);
553 /* We need to check the flag again: */
554 if (!SvAMAGIC(ssv)) goto frog;
555 }
556 else ptr = SvPV_nomg_const(ssv,len);
557 if (len == 0)
558 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
559 "Explicit blessing to '' (assuming package main)");
560 stash = gv_stashpvn(ptr, len, GV_ADD|SvUTF8(ssv));
561 }
562
563 (void)sv_bless(TOPs, stash);
564 RETURN;
565 }
566
PP(pp_gelem)567 PP(pp_gelem)
568 {
569 dSP;
570
571 SV *sv = POPs;
572 STRLEN len;
573 const char * const elem = SvPV_const(sv, len);
574 GV * const gv = MUTABLE_GV(TOPs);
575 SV * tmpRef = NULL;
576
577 sv = NULL;
578 if (elem) {
579 /* elem will always be NUL terminated. */
580 switch (*elem) {
581 case 'A':
582 if (memEQs(elem, len, "ARRAY"))
583 {
584 tmpRef = MUTABLE_SV(GvAV(gv));
585 if (tmpRef && !AvREAL((const AV *)tmpRef)
586 && AvREIFY((const AV *)tmpRef))
587 av_reify(MUTABLE_AV(tmpRef));
588 }
589 break;
590 case 'C':
591 if (memEQs(elem, len, "CODE"))
592 tmpRef = MUTABLE_SV(GvCVu(gv));
593 break;
594 case 'F':
595 if (memEQs(elem, len, "FILEHANDLE")) {
596 tmpRef = MUTABLE_SV(GvIOp(gv));
597 }
598 else
599 if (memEQs(elem, len, "FORMAT"))
600 tmpRef = MUTABLE_SV(GvFORM(gv));
601 break;
602 case 'G':
603 if (memEQs(elem, len, "GLOB"))
604 tmpRef = MUTABLE_SV(gv);
605 break;
606 case 'H':
607 if (memEQs(elem, len, "HASH"))
608 tmpRef = MUTABLE_SV(GvHV(gv));
609 break;
610 case 'I':
611 if (memEQs(elem, len, "IO"))
612 tmpRef = MUTABLE_SV(GvIOp(gv));
613 break;
614 case 'N':
615 if (memEQs(elem, len, "NAME"))
616 sv = newSVhek(GvNAME_HEK(gv));
617 break;
618 case 'P':
619 if (memEQs(elem, len, "PACKAGE")) {
620 const HV * const stash = GvSTASH(gv);
621 const HEK * const hek = stash ? HvNAME_HEK(stash) : NULL;
622 sv = hek ? newSVhek(hek) : newSVpvs("__ANON__");
623 }
624 break;
625 case 'S':
626 if (memEQs(elem, len, "SCALAR"))
627 tmpRef = GvSVn(gv);
628 break;
629 }
630 }
631 if (tmpRef)
632 sv = newRV(tmpRef);
633 if (sv)
634 sv_2mortal(sv);
635 else
636 sv = &PL_sv_undef;
637 SETs(sv);
638 RETURN;
639 }
640
641 /* Pattern matching */
642
PP(pp_study)643 PP(pp_study)
644 {
645 dSP; dTOPss;
646 STRLEN len;
647
648 (void)SvPV(sv, len);
649 if (len == 0 || len > I32_MAX || !SvPOK(sv) || SvUTF8(sv) || SvVALID(sv)) {
650 /* Historically, study was skipped in these cases. */
651 SETs(&PL_sv_no);
652 return NORMAL;
653 }
654
655 /* Make study a no-op. It's no longer useful and its existence
656 complicates matters elsewhere. */
657 SETs(&PL_sv_yes);
658 return NORMAL;
659 }
660
661
662 /* also used for: pp_transr() */
663
PP(pp_trans)664 PP(pp_trans)
665 {
666 dSP;
667 SV *sv;
668
669 if (PL_op->op_flags & OPf_STACKED)
670 sv = POPs;
671 else {
672 EXTEND(SP,1);
673 if (ARGTARG)
674 sv = PAD_SV(ARGTARG);
675 else {
676 sv = DEFSV;
677 }
678 }
679 if(PL_op->op_type == OP_TRANSR) {
680 STRLEN len;
681 const char * const pv = SvPV(sv,len);
682 SV * const newsv = newSVpvn_flags(pv, len, SVs_TEMP|SvUTF8(sv));
683 do_trans(newsv);
684 PUSHs(newsv);
685 }
686 else {
687 Size_t i = do_trans(sv);
688 mPUSHi((UV)i);
689 }
690 RETURN;
691 }
692
693 /* Lvalue operators. */
694
695 static size_t
S_do_chomp(pTHX_ SV * retval,SV * sv,bool chomping)696 S_do_chomp(pTHX_ SV *retval, SV *sv, bool chomping)
697 {
698 STRLEN len;
699 char *s;
700 size_t count = 0;
701
702 PERL_ARGS_ASSERT_DO_CHOMP;
703
704 if (chomping && (RsSNARF(PL_rs) || RsRECORD(PL_rs)))
705 return 0;
706 if (SvTYPE(sv) == SVt_PVAV) {
707 I32 i;
708 AV *const av = MUTABLE_AV(sv);
709 const I32 max = AvFILL(av);
710
711 for (i = 0; i <= max; i++) {
712 sv = MUTABLE_SV(av_fetch(av, i, FALSE));
713 if (sv && ((sv = *(SV**)sv), sv != &PL_sv_undef))
714 count += do_chomp(retval, sv, chomping);
715 }
716 return count;
717 }
718 else if (SvTYPE(sv) == SVt_PVHV) {
719 HV* const hv = MUTABLE_HV(sv);
720 HE* entry;
721 (void)hv_iterinit(hv);
722 while ((entry = hv_iternext(hv)))
723 count += do_chomp(retval, hv_iterval(hv,entry), chomping);
724 return count;
725 }
726 else if (SvREADONLY(sv)) {
727 Perl_croak_no_modify();
728 }
729
730 s = SvPV(sv, len);
731 if (chomping) {
732 if (s && len) {
733 char *temp_buffer = NULL;
734 SV *svrecode = NULL;
735 s += --len;
736 if (RsPARA(PL_rs)) {
737 if (*s != '\n')
738 goto nope_free_nothing;
739 ++count;
740 while (len && s[-1] == '\n') {
741 --len;
742 --s;
743 ++count;
744 }
745 }
746 else {
747 STRLEN rslen, rs_charlen;
748 const char *rsptr = SvPV_const(PL_rs, rslen);
749
750 rs_charlen = SvUTF8(PL_rs)
751 ? sv_len_utf8(PL_rs)
752 : rslen;
753
754 if (SvUTF8(PL_rs) != SvUTF8(sv)) {
755 /* Assumption is that rs is shorter than the scalar. */
756 if (SvUTF8(PL_rs)) {
757 /* RS is utf8, scalar is 8 bit. */
758 bool is_utf8 = TRUE;
759 temp_buffer = (char*)bytes_from_utf8((U8*)rsptr,
760 &rslen, &is_utf8);
761 if (is_utf8) {
762 /* Cannot downgrade, therefore cannot possibly match.
763 At this point, temp_buffer is not alloced, and
764 is the buffer inside PL_rs, so dont free it.
765 */
766 assert (temp_buffer == rsptr);
767 goto nope_free_sv;
768 }
769 rsptr = temp_buffer;
770 }
771 else {
772 /* RS is 8 bit, scalar is utf8. */
773 temp_buffer = (char*)bytes_to_utf8((U8*)rsptr, &rslen);
774 rsptr = temp_buffer;
775 }
776 }
777 if (rslen == 1) {
778 if (*s != *rsptr)
779 goto nope_free_all;
780 ++count;
781 }
782 else {
783 if (len < rslen - 1)
784 goto nope_free_all;
785 len -= rslen - 1;
786 s -= rslen - 1;
787 if (memNE(s, rsptr, rslen))
788 goto nope_free_all;
789 count += rs_charlen;
790 }
791 }
792 SvPV_force_nomg_nolen(sv);
793 SvCUR_set(sv, len);
794 *SvEND(sv) = '\0';
795 SvNIOK_off(sv);
796 SvSETMAGIC(sv);
797
798 nope_free_all:
799 Safefree(temp_buffer);
800 nope_free_sv:
801 SvREFCNT_dec(svrecode);
802 nope_free_nothing: ;
803 }
804 } else {
805 if (len && (!SvPOK(sv) || SvIsCOW(sv)))
806 s = SvPV_force_nomg(sv, len);
807 if (DO_UTF8(sv)) {
808 if (s && len) {
809 char * const send = s + len;
810 char * const start = s;
811 s = send - 1;
812 while (s > start && UTF8_IS_CONTINUATION(*s))
813 s--;
814 if (is_utf8_string((U8*)s, send - s)) {
815 sv_setpvn(retval, s, send - s);
816 *s = '\0';
817 SvCUR_set(sv, s - start);
818 SvNIOK_off(sv);
819 SvUTF8_on(retval);
820 }
821 }
822 else
823 SvPVCLEAR(retval);
824 }
825 else if (s && len) {
826 s += --len;
827 sv_setpvn(retval, s, 1);
828 *s = '\0';
829 SvCUR_set(sv, len);
830 SvUTF8_off(sv);
831 SvNIOK_off(sv);
832 }
833 else
834 SvPVCLEAR(retval);
835 SvSETMAGIC(sv);
836 }
837 return count;
838 }
839
840
841 /* also used for: pp_schomp() */
842
PP(pp_schop)843 PP(pp_schop)
844 {
845 dSP; dTARGET;
846 const bool chomping = PL_op->op_type == OP_SCHOMP;
847
848 const size_t count = do_chomp(TARG, TOPs, chomping);
849 if (chomping)
850 sv_setiv(TARG, count);
851 SETTARG;
852 return NORMAL;
853 }
854
855
856 /* also used for: pp_chomp() */
857
PP(pp_chop)858 PP(pp_chop)
859 {
860 dSP; dMARK; dTARGET; dORIGMARK;
861 const bool chomping = PL_op->op_type == OP_CHOMP;
862 size_t count = 0;
863
864 while (MARK < SP)
865 count += do_chomp(TARG, *++MARK, chomping);
866 if (chomping)
867 sv_setiv(TARG, count);
868 SP = ORIGMARK;
869 XPUSHTARG;
870 RETURN;
871 }
872
PP(pp_undef)873 PP(pp_undef)
874 {
875 dSP;
876 SV *sv;
877
878 if (!PL_op->op_private) {
879 EXTEND(SP, 1);
880 RETPUSHUNDEF;
881 }
882
883 sv = TOPs;
884 if (!sv)
885 {
886 SETs(&PL_sv_undef);
887 return NORMAL;
888 }
889
890 if (SvTHINKFIRST(sv))
891 sv_force_normal_flags(sv, SV_COW_DROP_PV|SV_IMMEDIATE_UNREF);
892
893 switch (SvTYPE(sv)) {
894 case SVt_NULL:
895 break;
896 case SVt_PVAV:
897 av_undef(MUTABLE_AV(sv));
898 break;
899 case SVt_PVHV:
900 hv_undef(MUTABLE_HV(sv));
901 break;
902 case SVt_PVCV:
903 if (cv_const_sv((const CV *)sv))
904 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
905 "Constant subroutine %" SVf " undefined",
906 SVfARG(CvANON((const CV *)sv)
907 ? newSVpvs_flags("(anonymous)", SVs_TEMP)
908 : sv_2mortal(newSVhek(
909 CvNAMED(sv)
910 ? CvNAME_HEK((CV *)sv)
911 : GvENAME_HEK(CvGV((const CV *)sv))
912 ))
913 ));
914 /* FALLTHROUGH */
915 case SVt_PVFM:
916 /* let user-undef'd sub keep its identity */
917 cv_undef_flags(MUTABLE_CV(sv), CV_UNDEF_KEEP_NAME);
918 break;
919 case SVt_PVGV:
920 assert(isGV_with_GP(sv));
921 assert(!SvFAKE(sv));
922 {
923 GP *gp;
924 HV *stash;
925
926 /* undef *Pkg::meth_name ... */
927 bool method_changed
928 = GvCVu((const GV *)sv) && (stash = GvSTASH((const GV *)sv))
929 && HvENAME_get(stash);
930 /* undef *Foo:: */
931 if((stash = GvHV((const GV *)sv))) {
932 if(HvENAME_get(stash))
933 SvREFCNT_inc_simple_void_NN(sv_2mortal((SV *)stash));
934 else stash = NULL;
935 }
936
937 SvREFCNT_inc_simple_void_NN(sv_2mortal(sv));
938 gp_free(MUTABLE_GV(sv));
939 Newxz(gp, 1, GP);
940 GvGP_set(sv, gp_ref(gp));
941 #ifndef PERL_DONT_CREATE_GVSV
942 GvSV(sv) = newSV(0);
943 #endif
944 GvLINE(sv) = CopLINE(PL_curcop);
945 GvEGV(sv) = MUTABLE_GV(sv);
946 GvMULTI_on(sv);
947
948 if(stash)
949 mro_package_moved(NULL, stash, (const GV *)sv, 0);
950 stash = NULL;
951 /* undef *Foo::ISA */
952 if( strEQ(GvNAME((const GV *)sv), "ISA")
953 && (stash = GvSTASH((const GV *)sv))
954 && (method_changed || HvENAME(stash)) )
955 mro_isa_changed_in(stash);
956 else if(method_changed)
957 mro_method_changed_in(
958 GvSTASH((const GV *)sv)
959 );
960
961 break;
962 }
963 default:
964 if (SvTYPE(sv) >= SVt_PV && SvPVX_const(sv) && SvLEN(sv)) {
965 SvPV_free(sv);
966 SvPV_set(sv, NULL);
967 SvLEN_set(sv, 0);
968 }
969 SvOK_off(sv);
970 SvSETMAGIC(sv);
971 }
972
973 SETs(&PL_sv_undef);
974 return NORMAL;
975 }
976
977
978 /* common "slow" code for pp_postinc and pp_postdec */
979
980 static OP *
S_postincdec_common(pTHX_ SV * sv,SV * targ)981 S_postincdec_common(pTHX_ SV *sv, SV *targ)
982 {
983 dSP;
984 const bool inc =
985 PL_op->op_type == OP_POSTINC || PL_op->op_type == OP_I_POSTINC;
986
987 if (SvROK(sv))
988 TARG = sv_newmortal();
989 sv_setsv(TARG, sv);
990 if (inc)
991 sv_inc_nomg(sv);
992 else
993 sv_dec_nomg(sv);
994 SvSETMAGIC(sv);
995 /* special case for undef: see thread at 2003-03/msg00536.html in archive */
996 if (inc && !SvOK(TARG))
997 sv_setiv(TARG, 0);
998 SETTARG;
999 return NORMAL;
1000 }
1001
1002
1003 /* also used for: pp_i_postinc() */
1004
PP(pp_postinc)1005 PP(pp_postinc)
1006 {
1007 dSP; dTARGET;
1008 SV *sv = TOPs;
1009
1010 /* special-case sv being a simple integer */
1011 if (LIKELY(((sv->sv_flags &
1012 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1013 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1014 == SVf_IOK))
1015 && SvIVX(sv) != IV_MAX)
1016 {
1017 IV iv = SvIVX(sv);
1018 SvIV_set(sv, iv + 1);
1019 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1020 SETs(TARG);
1021 return NORMAL;
1022 }
1023
1024 return S_postincdec_common(aTHX_ sv, TARG);
1025 }
1026
1027
1028 /* also used for: pp_i_postdec() */
1029
PP(pp_postdec)1030 PP(pp_postdec)
1031 {
1032 dSP; dTARGET;
1033 SV *sv = TOPs;
1034
1035 /* special-case sv being a simple integer */
1036 if (LIKELY(((sv->sv_flags &
1037 (SVf_THINKFIRST|SVs_GMG|SVf_IVisUV|
1038 SVf_IOK|SVf_NOK|SVf_POK|SVp_NOK|SVp_POK|SVf_ROK))
1039 == SVf_IOK))
1040 && SvIVX(sv) != IV_MIN)
1041 {
1042 IV iv = SvIVX(sv);
1043 SvIV_set(sv, iv - 1);
1044 TARGi(iv, 0); /* arg not GMG, so can't be tainted */
1045 SETs(TARG);
1046 return NORMAL;
1047 }
1048
1049 return S_postincdec_common(aTHX_ sv, TARG);
1050 }
1051
1052
1053 /* Ordinary operators. */
1054
PP(pp_pow)1055 PP(pp_pow)
1056 {
1057 dSP; dATARGET; SV *svl, *svr;
1058 #ifdef PERL_PRESERVE_IVUV
1059 bool is_int = 0;
1060 #endif
1061 tryAMAGICbin_MG(pow_amg, AMGf_assign|AMGf_numeric);
1062 svr = TOPs;
1063 svl = TOPm1s;
1064 #ifdef PERL_PRESERVE_IVUV
1065 /* For integer to integer power, we do the calculation by hand wherever
1066 we're sure it is safe; otherwise we call pow() and try to convert to
1067 integer afterwards. */
1068 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1069 UV power;
1070 bool baseuok;
1071 UV baseuv;
1072
1073 if (SvUOK(svr)) {
1074 power = SvUVX(svr);
1075 } else {
1076 const IV iv = SvIVX(svr);
1077 if (iv >= 0) {
1078 power = iv;
1079 } else {
1080 goto float_it; /* Can't do negative powers this way. */
1081 }
1082 }
1083
1084 baseuok = SvUOK(svl);
1085 if (baseuok) {
1086 baseuv = SvUVX(svl);
1087 } else {
1088 const IV iv = SvIVX(svl);
1089 if (iv >= 0) {
1090 baseuv = iv;
1091 baseuok = TRUE; /* effectively it's a UV now */
1092 } else {
1093 baseuv = -iv; /* abs, baseuok == false records sign */
1094 }
1095 }
1096 /* now we have integer ** positive integer. */
1097 is_int = 1;
1098
1099 /* foo & (foo - 1) is zero only for a power of 2. */
1100 if (!(baseuv & (baseuv - 1))) {
1101 /* We are raising power-of-2 to a positive integer.
1102 The logic here will work for any base (even non-integer
1103 bases) but it can be less accurate than
1104 pow (base,power) or exp (power * log (base)) when the
1105 intermediate values start to spill out of the mantissa.
1106 With powers of 2 we know this can't happen.
1107 And powers of 2 are the favourite thing for perl
1108 programmers to notice ** not doing what they mean. */
1109 NV result = 1.0;
1110 NV base = baseuok ? baseuv : -(NV)baseuv;
1111
1112 if (power & 1) {
1113 result *= base;
1114 }
1115 while (power >>= 1) {
1116 base *= base;
1117 if (power & 1) {
1118 result *= base;
1119 }
1120 }
1121 SP--;
1122 SETn( result );
1123 SvIV_please_nomg(svr);
1124 RETURN;
1125 } else {
1126 unsigned int highbit = 8 * sizeof(UV);
1127 unsigned int diff = 8 * sizeof(UV);
1128 while (diff >>= 1) {
1129 highbit -= diff;
1130 if (baseuv >> highbit) {
1131 highbit += diff;
1132 }
1133 }
1134 /* we now have baseuv < 2 ** highbit */
1135 if (power * highbit <= 8 * sizeof(UV)) {
1136 /* result will definitely fit in UV, so use UV math
1137 on same algorithm as above */
1138 UV result = 1;
1139 UV base = baseuv;
1140 const bool odd_power = cBOOL(power & 1);
1141 if (odd_power) {
1142 result *= base;
1143 }
1144 while (power >>= 1) {
1145 base *= base;
1146 if (power & 1) {
1147 result *= base;
1148 }
1149 }
1150 SP--;
1151 if (baseuok || !odd_power)
1152 /* answer is positive */
1153 SETu( result );
1154 else if (result <= (UV)IV_MAX)
1155 /* answer negative, fits in IV */
1156 SETi( -(IV)result );
1157 else if (result == (UV)IV_MIN)
1158 /* 2's complement assumption: special case IV_MIN */
1159 SETi( IV_MIN );
1160 else
1161 /* answer negative, doesn't fit */
1162 SETn( -(NV)result );
1163 RETURN;
1164 }
1165 }
1166 }
1167 float_it:
1168 #endif
1169 {
1170 NV right = SvNV_nomg(svr);
1171 NV left = SvNV_nomg(svl);
1172 (void)POPs;
1173
1174 #if defined(USE_LONG_DOUBLE) && defined(HAS_AIX_POWL_NEG_BASE_BUG)
1175 /*
1176 We are building perl with long double support and are on an AIX OS
1177 afflicted with a powl() function that wrongly returns NaNQ for any
1178 negative base. This was reported to IBM as PMR #23047-379 on
1179 03/06/2006. The problem exists in at least the following versions
1180 of AIX and the libm fileset, and no doubt others as well:
1181
1182 AIX 4.3.3-ML10 bos.adt.libm 4.3.3.50
1183 AIX 5.1.0-ML04 bos.adt.libm 5.1.0.29
1184 AIX 5.2.0 bos.adt.libm 5.2.0.85
1185
1186 So, until IBM fixes powl(), we provide the following workaround to
1187 handle the problem ourselves. Our logic is as follows: for
1188 negative bases (left), we use fmod(right, 2) to check if the
1189 exponent is an odd or even integer:
1190
1191 - if odd, powl(left, right) == -powl(-left, right)
1192 - if even, powl(left, right) == powl(-left, right)
1193
1194 If the exponent is not an integer, the result is rightly NaNQ, so
1195 we just return that (as NV_NAN).
1196 */
1197
1198 if (left < 0.0) {
1199 NV mod2 = Perl_fmod( right, 2.0 );
1200 if (mod2 == 1.0 || mod2 == -1.0) { /* odd integer */
1201 SETn( -Perl_pow( -left, right) );
1202 } else if (mod2 == 0.0) { /* even integer */
1203 SETn( Perl_pow( -left, right) );
1204 } else { /* fractional power */
1205 SETn( NV_NAN );
1206 }
1207 } else {
1208 SETn( Perl_pow( left, right) );
1209 }
1210 #else
1211 SETn( Perl_pow( left, right) );
1212 #endif /* HAS_AIX_POWL_NEG_BASE_BUG */
1213
1214 #ifdef PERL_PRESERVE_IVUV
1215 if (is_int)
1216 SvIV_please_nomg(svr);
1217 #endif
1218 RETURN;
1219 }
1220 }
1221
PP(pp_multiply)1222 PP(pp_multiply)
1223 {
1224 dSP; dATARGET; SV *svl, *svr;
1225 tryAMAGICbin_MG(mult_amg, AMGf_assign|AMGf_numeric);
1226 svr = TOPs;
1227 svl = TOPm1s;
1228
1229 #ifdef PERL_PRESERVE_IVUV
1230
1231 /* special-case some simple common cases */
1232 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1233 IV il, ir;
1234 U32 flags = (svl->sv_flags & svr->sv_flags);
1235 if (flags & SVf_IOK) {
1236 /* both args are simple IVs */
1237 UV topl, topr;
1238 il = SvIVX(svl);
1239 ir = SvIVX(svr);
1240 do_iv:
1241 topl = ((UV)il) >> (UVSIZE * 4 - 1);
1242 topr = ((UV)ir) >> (UVSIZE * 4 - 1);
1243
1244 /* if both are in a range that can't under/overflow, do a
1245 * simple integer multiply: if the top halves(*) of both numbers
1246 * are 00...00 or 11...11, then it's safe.
1247 * (*) for 32-bits, the "top half" is the top 17 bits,
1248 * for 64-bits, its 33 bits */
1249 if (!(
1250 ((topl+1) | (topr+1))
1251 & ( (((UV)1) << (UVSIZE * 4 + 1)) - 2) /* 11..110 */
1252 )) {
1253 SP--;
1254 TARGi(il * ir, 0); /* args not GMG, so can't be tainted */
1255 SETs(TARG);
1256 RETURN;
1257 }
1258 goto generic;
1259 }
1260 else if (flags & SVf_NOK) {
1261 /* both args are NVs */
1262 NV nl = SvNVX(svl);
1263 NV nr = SvNVX(svr);
1264 NV result;
1265
1266 if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
1267 /* nothing was lost by converting to IVs */
1268 goto do_iv;
1269 }
1270 SP--;
1271 result = nl * nr;
1272 # if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1273 if (Perl_isinf(result)) {
1274 Zero((U8*)&result + 8, 8, U8);
1275 }
1276 # endif
1277 TARGn(result, 0); /* args not GMG, so can't be tainted */
1278 SETs(TARG);
1279 RETURN;
1280 }
1281 }
1282
1283 generic:
1284
1285 if (SvIV_please_nomg(svr)) {
1286 /* Unless the left argument is integer in range we are going to have to
1287 use NV maths. Hence only attempt to coerce the right argument if
1288 we know the left is integer. */
1289 /* Left operand is defined, so is it IV? */
1290 if (SvIV_please_nomg(svl)) {
1291 bool auvok = SvUOK(svl);
1292 bool buvok = SvUOK(svr);
1293 const UV topmask = (~ (UV)0) << (4 * sizeof (UV));
1294 const UV botmask = ~((~ (UV)0) << (4 * sizeof (UV)));
1295 UV alow;
1296 UV ahigh;
1297 UV blow;
1298 UV bhigh;
1299
1300 if (auvok) {
1301 alow = SvUVX(svl);
1302 } else {
1303 const IV aiv = SvIVX(svl);
1304 if (aiv >= 0) {
1305 alow = aiv;
1306 auvok = TRUE; /* effectively it's a UV now */
1307 } else {
1308 /* abs, auvok == false records sign; Using 0- here and
1309 * later to silence bogus warning from MS VC */
1310 alow = (UV) (0 - (UV) aiv);
1311 }
1312 }
1313 if (buvok) {
1314 blow = SvUVX(svr);
1315 } else {
1316 const IV biv = SvIVX(svr);
1317 if (biv >= 0) {
1318 blow = biv;
1319 buvok = TRUE; /* effectively it's a UV now */
1320 } else {
1321 /* abs, buvok == false records sign */
1322 blow = (UV) (0 - (UV) biv);
1323 }
1324 }
1325
1326 /* If this does sign extension on unsigned it's time for plan B */
1327 ahigh = alow >> (4 * sizeof (UV));
1328 alow &= botmask;
1329 bhigh = blow >> (4 * sizeof (UV));
1330 blow &= botmask;
1331 if (ahigh && bhigh) {
1332 NOOP;
1333 /* eg 32 bit is at least 0x10000 * 0x10000 == 0x100000000
1334 which is overflow. Drop to NVs below. */
1335 } else if (!ahigh && !bhigh) {
1336 /* eg 32 bit is at most 0xFFFF * 0xFFFF == 0xFFFE0001
1337 so the unsigned multiply cannot overflow. */
1338 const UV product = alow * blow;
1339 if (auvok == buvok) {
1340 /* -ve * -ve or +ve * +ve gives a +ve result. */
1341 SP--;
1342 SETu( product );
1343 RETURN;
1344 } else if (product <= (UV)IV_MIN) {
1345 /* 2s complement assumption that (UV)-IV_MIN is correct. */
1346 /* -ve result, which could overflow an IV */
1347 SP--;
1348 /* can't negate IV_MIN, but there are aren't two
1349 * integers such that !ahigh && !bhigh, where the
1350 * product equals 0x800....000 */
1351 assert(product != (UV)IV_MIN);
1352 SETi( -(IV)product );
1353 RETURN;
1354 } /* else drop to NVs below. */
1355 } else {
1356 /* One operand is large, 1 small */
1357 UV product_middle;
1358 if (bhigh) {
1359 /* swap the operands */
1360 ahigh = bhigh;
1361 bhigh = blow; /* bhigh now the temp var for the swap */
1362 blow = alow;
1363 alow = bhigh;
1364 }
1365 /* now, ((ahigh * blow) << half_UV_len) + (alow * blow)
1366 multiplies can't overflow. shift can, add can, -ve can. */
1367 product_middle = ahigh * blow;
1368 if (!(product_middle & topmask)) {
1369 /* OK, (ahigh * blow) won't lose bits when we shift it. */
1370 UV product_low;
1371 product_middle <<= (4 * sizeof (UV));
1372 product_low = alow * blow;
1373
1374 /* as for pp_add, UV + something mustn't get smaller.
1375 IIRC ANSI mandates this wrapping *behaviour* for
1376 unsigned whatever the actual representation*/
1377 product_low += product_middle;
1378 if (product_low >= product_middle) {
1379 /* didn't overflow */
1380 if (auvok == buvok) {
1381 /* -ve * -ve or +ve * +ve gives a +ve result. */
1382 SP--;
1383 SETu( product_low );
1384 RETURN;
1385 } else if (product_low <= (UV)IV_MIN) {
1386 /* 2s complement assumption again */
1387 /* -ve result, which could overflow an IV */
1388 SP--;
1389 SETi(product_low == (UV)IV_MIN
1390 ? IV_MIN : -(IV)product_low);
1391 RETURN;
1392 } /* else drop to NVs below. */
1393 }
1394 } /* product_middle too large */
1395 } /* ahigh && bhigh */
1396 } /* SvIOK(svl) */
1397 } /* SvIOK(svr) */
1398 #endif
1399 {
1400 NV right = SvNV_nomg(svr);
1401 NV left = SvNV_nomg(svl);
1402 NV result = left * right;
1403
1404 (void)POPs;
1405 #if defined(__sgi) && defined(USE_LONG_DOUBLE) && LONG_DOUBLEKIND == LONG_DOUBLE_IS_DOUBLEDOUBLE_128_BIT_BE_BE && NVSIZE == 16
1406 if (Perl_isinf(result)) {
1407 Zero((U8*)&result + 8, 8, U8);
1408 }
1409 #endif
1410 SETn(result);
1411 RETURN;
1412 }
1413 }
1414
PP(pp_divide)1415 PP(pp_divide)
1416 {
1417 dSP; dATARGET; SV *svl, *svr;
1418 tryAMAGICbin_MG(div_amg, AMGf_assign|AMGf_numeric);
1419 svr = TOPs;
1420 svl = TOPm1s;
1421 /* Only try to do UV divide first
1422 if ((SLOPPYDIVIDE is true) or
1423 (PERL_PRESERVE_IVUV is true and one or both SV is a UV too large
1424 to preserve))
1425 The assumption is that it is better to use floating point divide
1426 whenever possible, only doing integer divide first if we can't be sure.
1427 If NV_PRESERVES_UV is true then we know at compile time that no UV
1428 can be too large to preserve, so don't need to compile the code to
1429 test the size of UVs. */
1430
1431 #if defined(SLOPPYDIVIDE) || (defined(PERL_PRESERVE_IVUV) && !defined(NV_PRESERVES_UV))
1432 # define PERL_TRY_UV_DIVIDE
1433 /* ensure that 20./5. == 4. */
1434 #endif
1435
1436 #ifdef PERL_TRY_UV_DIVIDE
1437 if (SvIV_please_nomg(svr) && SvIV_please_nomg(svl)) {
1438 bool left_non_neg = SvUOK(svl);
1439 bool right_non_neg = SvUOK(svr);
1440 UV left;
1441 UV right;
1442
1443 if (right_non_neg) {
1444 right = SvUVX(svr);
1445 }
1446 else {
1447 const IV biv = SvIVX(svr);
1448 if (biv >= 0) {
1449 right = biv;
1450 right_non_neg = TRUE; /* effectively it's a UV now */
1451 }
1452 else {
1453 right = -(UV)biv;
1454 }
1455 }
1456 /* historically undef()/0 gives a "Use of uninitialized value"
1457 warning before dieing, hence this test goes here.
1458 If it were immediately before the second SvIV_please, then
1459 DIE() would be invoked before left was even inspected, so
1460 no inspection would give no warning. */
1461 if (right == 0)
1462 DIE(aTHX_ "Illegal division by zero");
1463
1464 if (left_non_neg) {
1465 left = SvUVX(svl);
1466 }
1467 else {
1468 const IV aiv = SvIVX(svl);
1469 if (aiv >= 0) {
1470 left = aiv;
1471 left_non_neg = TRUE; /* effectively it's a UV now */
1472 }
1473 else {
1474 left = -(UV)aiv;
1475 }
1476 }
1477
1478 if (left >= right
1479 #ifdef SLOPPYDIVIDE
1480 /* For sloppy divide we always attempt integer division. */
1481 #else
1482 /* Otherwise we only attempt it if either or both operands
1483 would not be preserved by an NV. If both fit in NVs
1484 we fall through to the NV divide code below. However,
1485 as left >= right to ensure integer result here, we know that
1486 we can skip the test on the right operand - right big
1487 enough not to be preserved can't get here unless left is
1488 also too big. */
1489
1490 && (left > ((UV)1 << NV_PRESERVES_UV_BITS))
1491 #endif
1492 ) {
1493 /* Integer division can't overflow, but it can be imprecise. */
1494
1495 /* Modern compilers optimize division followed by
1496 * modulo into a single div instruction */
1497 const UV result = left / right;
1498 if (left % right == 0) {
1499 SP--; /* result is valid */
1500 if (left_non_neg == right_non_neg) {
1501 /* signs identical, result is positive. */
1502 SETu( result );
1503 RETURN;
1504 }
1505 /* 2s complement assumption */
1506 if (result <= (UV)IV_MIN)
1507 SETi(result == (UV)IV_MIN ? IV_MIN : -(IV)result);
1508 else {
1509 /* It's exact but too negative for IV. */
1510 SETn( -(NV)result );
1511 }
1512 RETURN;
1513 } /* tried integer divide but it was not an integer result */
1514 } /* else (PERL_ABS(result) < 1.0) or (both UVs in range for NV) */
1515 } /* one operand wasn't SvIOK */
1516 #endif /* PERL_TRY_UV_DIVIDE */
1517 {
1518 NV right = SvNV_nomg(svr);
1519 NV left = SvNV_nomg(svl);
1520 (void)POPs;(void)POPs;
1521 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
1522 if (! Perl_isnan(right) && right == 0.0)
1523 #else
1524 if (right == 0.0)
1525 #endif
1526 DIE(aTHX_ "Illegal division by zero");
1527 PUSHn( left / right );
1528 RETURN;
1529 }
1530 }
1531
PP(pp_modulo)1532 PP(pp_modulo)
1533 {
1534 dSP; dATARGET;
1535 tryAMAGICbin_MG(modulo_amg, AMGf_assign|AMGf_numeric);
1536 {
1537 UV left = 0;
1538 UV right = 0;
1539 bool left_neg = FALSE;
1540 bool right_neg = FALSE;
1541 bool use_double = FALSE;
1542 bool dright_valid = FALSE;
1543 NV dright = 0.0;
1544 NV dleft = 0.0;
1545 SV * const svr = TOPs;
1546 SV * const svl = TOPm1s;
1547 if (SvIV_please_nomg(svr)) {
1548 right_neg = !SvUOK(svr);
1549 if (!right_neg) {
1550 right = SvUVX(svr);
1551 } else {
1552 const IV biv = SvIVX(svr);
1553 if (biv >= 0) {
1554 right = biv;
1555 right_neg = FALSE; /* effectively it's a UV now */
1556 } else {
1557 right = (UV) (0 - (UV) biv);
1558 }
1559 }
1560 }
1561 else {
1562 dright = SvNV_nomg(svr);
1563 right_neg = dright < 0;
1564 if (right_neg)
1565 dright = -dright;
1566 if (dright < UV_MAX_P1) {
1567 right = U_V(dright);
1568 dright_valid = TRUE; /* In case we need to use double below. */
1569 } else {
1570 use_double = TRUE;
1571 }
1572 }
1573
1574 /* At this point use_double is only true if right is out of range for
1575 a UV. In range NV has been rounded down to nearest UV and
1576 use_double false. */
1577 if (!use_double && SvIV_please_nomg(svl)) {
1578 left_neg = !SvUOK(svl);
1579 if (!left_neg) {
1580 left = SvUVX(svl);
1581 } else {
1582 const IV aiv = SvIVX(svl);
1583 if (aiv >= 0) {
1584 left = aiv;
1585 left_neg = FALSE; /* effectively it's a UV now */
1586 } else {
1587 left = (UV) (0 - (UV) aiv);
1588 }
1589 }
1590 }
1591 else {
1592 dleft = SvNV_nomg(svl);
1593 left_neg = dleft < 0;
1594 if (left_neg)
1595 dleft = -dleft;
1596
1597 /* This should be exactly the 5.6 behaviour - if left and right are
1598 both in range for UV then use U_V() rather than floor. */
1599 if (!use_double) {
1600 if (dleft < UV_MAX_P1) {
1601 /* right was in range, so is dleft, so use UVs not double.
1602 */
1603 left = U_V(dleft);
1604 }
1605 /* left is out of range for UV, right was in range, so promote
1606 right (back) to double. */
1607 else {
1608 /* The +0.5 is used in 5.6 even though it is not strictly
1609 consistent with the implicit +0 floor in the U_V()
1610 inside the #if 1. */
1611 dleft = Perl_floor(dleft + 0.5);
1612 use_double = TRUE;
1613 if (dright_valid)
1614 dright = Perl_floor(dright + 0.5);
1615 else
1616 dright = right;
1617 }
1618 }
1619 }
1620 sp -= 2;
1621 if (use_double) {
1622 NV dans;
1623
1624 if (!dright)
1625 DIE(aTHX_ "Illegal modulus zero");
1626
1627 dans = Perl_fmod(dleft, dright);
1628 if ((left_neg != right_neg) && dans)
1629 dans = dright - dans;
1630 if (right_neg)
1631 dans = -dans;
1632 sv_setnv(TARG, dans);
1633 }
1634 else {
1635 UV ans;
1636
1637 if (!right)
1638 DIE(aTHX_ "Illegal modulus zero");
1639
1640 ans = left % right;
1641 if ((left_neg != right_neg) && ans)
1642 ans = right - ans;
1643 if (right_neg) {
1644 /* XXX may warn: unary minus operator applied to unsigned type */
1645 /* could change -foo to be (~foo)+1 instead */
1646 if (ans <= ~((UV)IV_MAX)+1)
1647 sv_setiv(TARG, ~ans+1);
1648 else
1649 sv_setnv(TARG, -(NV)ans);
1650 }
1651 else
1652 sv_setuv(TARG, ans);
1653 }
1654 PUSHTARG;
1655 RETURN;
1656 }
1657 }
1658
PP(pp_repeat)1659 PP(pp_repeat)
1660 {
1661 dSP; dATARGET;
1662 IV count;
1663 SV *sv;
1664 bool infnan = FALSE;
1665 const U8 gimme = GIMME_V;
1666
1667 if (gimme == G_LIST && PL_op->op_private & OPpREPEAT_DOLIST) {
1668 /* TODO: think of some way of doing list-repeat overloading ??? */
1669 sv = POPs;
1670 SvGETMAGIC(sv);
1671 }
1672 else {
1673 if (UNLIKELY(PL_op->op_private & OPpREPEAT_DOLIST)) {
1674 /* The parser saw this as a list repeat, and there
1675 are probably several items on the stack. But we're
1676 in scalar/void context, and there's no pp_list to save us
1677 now. So drop the rest of the items -- robin@kitsite.com
1678 */
1679 dMARK;
1680 if (MARK + 1 < SP) {
1681 MARK[1] = TOPm1s;
1682 MARK[2] = TOPs;
1683 }
1684 else {
1685 dTOPss;
1686 ASSUME(MARK + 1 == SP);
1687 MEXTEND(SP, 1);
1688 PUSHs(sv);
1689 MARK[1] = &PL_sv_undef;
1690 }
1691 SP = MARK + 2;
1692 }
1693 tryAMAGICbin_MG(repeat_amg, AMGf_assign);
1694 sv = POPs;
1695 }
1696
1697 if (SvIOKp(sv)) {
1698 if (SvUOK(sv)) {
1699 const UV uv = SvUV_nomg(sv);
1700 if (uv > IV_MAX)
1701 count = IV_MAX; /* The best we can do? */
1702 else
1703 count = uv;
1704 } else {
1705 count = SvIV_nomg(sv);
1706 }
1707 }
1708 else if (SvNOKp(sv)) {
1709 const NV nv = SvNV_nomg(sv);
1710 infnan = Perl_isinfnan(nv);
1711 if (UNLIKELY(infnan)) {
1712 count = 0;
1713 } else {
1714 if (nv < 0.0)
1715 count = -1; /* An arbitrary negative integer */
1716 else
1717 count = (IV)nv;
1718 }
1719 }
1720 else
1721 count = SvIV_nomg(sv);
1722
1723 if (infnan) {
1724 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1725 "Non-finite repeat count does nothing");
1726 } else if (count < 0) {
1727 count = 0;
1728 Perl_ck_warner(aTHX_ packWARN(WARN_NUMERIC),
1729 "Negative repeat count does nothing");
1730 }
1731
1732 if (gimme == G_LIST && PL_op->op_private & OPpREPEAT_DOLIST) {
1733 dMARK;
1734 const SSize_t items = SP - MARK;
1735 const U8 mod = PL_op->op_flags & OPf_MOD;
1736
1737 if (count > 1) {
1738 SSize_t max;
1739
1740 if ( items > SSize_t_MAX / count /* max would overflow */
1741 /* repeatcpy would overflow */
1742 || items > I32_MAX / (I32)sizeof(SV *)
1743 )
1744 Perl_croak(aTHX_ "%s","Out of memory during list extend");
1745 max = items * count;
1746 MEXTEND(MARK, max);
1747
1748 while (SP > MARK) {
1749 if (*SP) {
1750 if (mod && SvPADTMP(*SP)) {
1751 *SP = sv_mortalcopy(*SP);
1752 }
1753 SvTEMP_off((*SP));
1754 }
1755 SP--;
1756 }
1757 MARK++;
1758 repeatcpy((char*)(MARK + items), (char*)MARK,
1759 items * sizeof(const SV *), count - 1);
1760 SP += max;
1761 }
1762 else if (count <= 0)
1763 SP = MARK;
1764 }
1765 else { /* Note: mark already snarfed by pp_list */
1766 SV * const tmpstr = POPs;
1767 STRLEN len;
1768 bool isutf;
1769
1770 if (TARG != tmpstr)
1771 sv_setsv_nomg(TARG, tmpstr);
1772 SvPV_force_nomg(TARG, len);
1773 isutf = DO_UTF8(TARG);
1774 if (count != 1) {
1775 if (count < 1)
1776 SvCUR_set(TARG, 0);
1777 else {
1778 STRLEN max;
1779
1780 if ( len > (MEM_SIZE_MAX-1) / (UV)count /* max would overflow */
1781 || len > (U32)I32_MAX /* repeatcpy would overflow */
1782 )
1783 Perl_croak(aTHX_ "%s",
1784 "Out of memory during string extend");
1785 max = (UV)count * len + 1;
1786 SvGROW(TARG, max);
1787
1788 repeatcpy(SvPVX(TARG) + len, SvPVX(TARG), len, count - 1);
1789 SvCUR_set(TARG, SvCUR(TARG) * count);
1790 }
1791 *SvEND(TARG) = '\0';
1792 }
1793 if (isutf)
1794 (void)SvPOK_only_UTF8(TARG);
1795 else
1796 (void)SvPOK_only(TARG);
1797
1798 PUSHTARG;
1799 }
1800 RETURN;
1801 }
1802
PP(pp_subtract)1803 PP(pp_subtract)
1804 {
1805 dSP; dATARGET; bool useleft; SV *svl, *svr;
1806 tryAMAGICbin_MG(subtr_amg, AMGf_assign|AMGf_numeric);
1807 svr = TOPs;
1808 svl = TOPm1s;
1809
1810 #ifdef PERL_PRESERVE_IVUV
1811
1812 /* special-case some simple common cases */
1813 if (!((svl->sv_flags|svr->sv_flags) & (SVf_IVisUV|SVs_GMG))) {
1814 IV il, ir;
1815 U32 flags = (svl->sv_flags & svr->sv_flags);
1816 if (flags & SVf_IOK) {
1817 /* both args are simple IVs */
1818 UV topl, topr;
1819 il = SvIVX(svl);
1820 ir = SvIVX(svr);
1821 do_iv:
1822 topl = ((UV)il) >> (UVSIZE * 8 - 2);
1823 topr = ((UV)ir) >> (UVSIZE * 8 - 2);
1824
1825 /* if both are in a range that can't under/overflow, do a
1826 * simple integer subtract: if the top of both numbers
1827 * are 00 or 11, then it's safe */
1828 if (!( ((topl+1) | (topr+1)) & 2)) {
1829 SP--;
1830 TARGi(il - ir, 0); /* args not GMG, so can't be tainted */
1831 SETs(TARG);
1832 RETURN;
1833 }
1834 goto generic;
1835 }
1836 else if (flags & SVf_NOK) {
1837 /* both args are NVs */
1838 NV nl = SvNVX(svl);
1839 NV nr = SvNVX(svr);
1840
1841 if (lossless_NV_to_IV(nl, &il) && lossless_NV_to_IV(nr, &ir)) {
1842 /* nothing was lost by converting to IVs */
1843 goto do_iv;
1844 }
1845 SP--;
1846 TARGn(nl - nr, 0); /* args not GMG, so can't be tainted */
1847 SETs(TARG);
1848 RETURN;
1849 }
1850 }
1851
1852 generic:
1853
1854 useleft = USE_LEFT(svl);
1855 /* See comments in pp_add (in pp_hot.c) about Overflow, and how
1856 "bad things" happen if you rely on signed integers wrapping. */
1857 if (SvIV_please_nomg(svr)) {
1858 /* Unless the left argument is integer in range we are going to have to
1859 use NV maths. Hence only attempt to coerce the right argument if
1860 we know the left is integer. */
1861 UV auv = 0;
1862 bool auvok = FALSE;
1863 bool a_valid = 0;
1864
1865 if (!useleft) {
1866 auv = 0;
1867 a_valid = auvok = 1;
1868 /* left operand is undef, treat as zero. */
1869 } else {
1870 /* Left operand is defined, so is it IV? */
1871 if (SvIV_please_nomg(svl)) {
1872 if ((auvok = SvUOK(svl)))
1873 auv = SvUVX(svl);
1874 else {
1875 const IV aiv = SvIVX(svl);
1876 if (aiv >= 0) {
1877 auv = aiv;
1878 auvok = 1; /* Now acting as a sign flag. */
1879 } else {
1880 auv = (UV) (0 - (UV) aiv);
1881 }
1882 }
1883 a_valid = 1;
1884 }
1885 }
1886 if (a_valid) {
1887 bool result_good = 0;
1888 UV result;
1889 UV buv;
1890 bool buvok = SvUOK(svr);
1891
1892 if (buvok)
1893 buv = SvUVX(svr);
1894 else {
1895 const IV biv = SvIVX(svr);
1896 if (biv >= 0) {
1897 buv = biv;
1898 buvok = 1;
1899 } else
1900 buv = (UV) (0 - (UV) biv);
1901 }
1902 /* ?uvok if value is >= 0. basically, flagged as UV if it's +ve,
1903 else "IV" now, independent of how it came in.
1904 if a, b represents positive, A, B negative, a maps to -A etc
1905 a - b => (a - b)
1906 A - b => -(a + b)
1907 a - B => (a + b)
1908 A - B => -(a - b)
1909 all UV maths. negate result if A negative.
1910 subtract if signs same, add if signs differ. */
1911
1912 if (auvok ^ buvok) {
1913 /* Signs differ. */
1914 result = auv + buv;
1915 if (result >= auv)
1916 result_good = 1;
1917 } else {
1918 /* Signs same */
1919 if (auv >= buv) {
1920 result = auv - buv;
1921 /* Must get smaller */
1922 if (result <= auv)
1923 result_good = 1;
1924 } else {
1925 result = buv - auv;
1926 if (result <= buv) {
1927 /* result really should be -(auv-buv). as its negation
1928 of true value, need to swap our result flag */
1929 auvok = !auvok;
1930 result_good = 1;
1931 }
1932 }
1933 }
1934 if (result_good) {
1935 SP--;
1936 if (auvok)
1937 SETu( result );
1938 else {
1939 /* Negate result */
1940 if (result <= (UV)IV_MIN)
1941 SETi(result == (UV)IV_MIN
1942 ? IV_MIN : -(IV)result);
1943 else {
1944 /* result valid, but out of range for IV. */
1945 SETn( -(NV)result );
1946 }
1947 }
1948 RETURN;
1949 } /* Overflow, drop through to NVs. */
1950 }
1951 }
1952 #else
1953 useleft = USE_LEFT(svl);
1954 #endif
1955 {
1956 NV value = SvNV_nomg(svr);
1957 (void)POPs;
1958
1959 if (!useleft) {
1960 /* left operand is undef, treat as zero - value */
1961 SETn(-value);
1962 RETURN;
1963 }
1964 SETn( SvNV_nomg(svl) - value );
1965 RETURN;
1966 }
1967 }
1968
1969 #define IV_BITS (IVSIZE * 8)
1970
1971 /* Taking the right operand of bitwise shift operators, returns an int
1972 * indicating the shift amount clipped to the range [-IV_BITS, +IV_BITS].
1973 */
1974 static int
S_shift_amount(pTHX_ SV * const svr)1975 S_shift_amount(pTHX_ SV *const svr)
1976 {
1977 const IV iv = SvIV_nomg(svr);
1978
1979 /* Note that [INT_MIN, INT_MAX] cannot be used as the clipping bound;
1980 * INT_MIN will cause overflow in "shift = -shift;" in S_{iv,uv}_shift.
1981 */
1982 if (SvIsUV(svr))
1983 return SvUVX(svr) > IV_BITS ? IV_BITS : (int)SvUVX(svr);
1984 return iv < -IV_BITS ? -IV_BITS : iv > IV_BITS ? IV_BITS : (int)iv;
1985 }
1986
S_uv_shift(UV uv,int shift,bool left)1987 static UV S_uv_shift(UV uv, int shift, bool left)
1988 {
1989 if (shift < 0) {
1990 shift = -shift;
1991 left = !left;
1992 }
1993 if (UNLIKELY(shift >= IV_BITS)) {
1994 return 0;
1995 }
1996 return left ? uv << shift : uv >> shift;
1997 }
1998
S_iv_shift(IV iv,int shift,bool left)1999 static IV S_iv_shift(IV iv, int shift, bool left)
2000 {
2001 if (shift < 0) {
2002 shift = -shift;
2003 left = !left;
2004 }
2005
2006 if (UNLIKELY(shift >= IV_BITS)) {
2007 return iv < 0 && !left ? -1 : 0;
2008 }
2009
2010 /* For left shifts, perl 5 has chosen to treat the value as unsigned for
2011 * the purposes of shifting, then cast back to signed. This is very
2012 * different from Raku:
2013 *
2014 * $ raku -e 'say -2 +< 5'
2015 * -64
2016 *
2017 * $ ./perl -le 'print -2 << 5'
2018 * 18446744073709551552
2019 * */
2020 if (left) {
2021 return (IV) (((UV) iv) << shift);
2022 }
2023
2024 /* Here is right shift */
2025 return iv >> shift;
2026 }
2027
2028 #define UV_LEFT_SHIFT(uv, shift) S_uv_shift(uv, shift, TRUE)
2029 #define UV_RIGHT_SHIFT(uv, shift) S_uv_shift(uv, shift, FALSE)
2030 #define IV_LEFT_SHIFT(iv, shift) S_iv_shift(iv, shift, TRUE)
2031 #define IV_RIGHT_SHIFT(iv, shift) S_iv_shift(iv, shift, FALSE)
2032
PP(pp_left_shift)2033 PP(pp_left_shift)
2034 {
2035 dSP; dATARGET; SV *svl, *svr;
2036 tryAMAGICbin_MG(lshift_amg, AMGf_assign|AMGf_numeric);
2037 svr = POPs;
2038 svl = TOPs;
2039 {
2040 const int shift = S_shift_amount(aTHX_ svr);
2041 if (PL_op->op_private & OPpUSEINT) {
2042 SETi(IV_LEFT_SHIFT(SvIV_nomg(svl), shift));
2043 }
2044 else {
2045 SETu(UV_LEFT_SHIFT(SvUV_nomg(svl), shift));
2046 }
2047 RETURN;
2048 }
2049 }
2050
PP(pp_right_shift)2051 PP(pp_right_shift)
2052 {
2053 dSP; dATARGET; SV *svl, *svr;
2054 tryAMAGICbin_MG(rshift_amg, AMGf_assign|AMGf_numeric);
2055 svr = POPs;
2056 svl = TOPs;
2057 {
2058 const int shift = S_shift_amount(aTHX_ svr);
2059 if (PL_op->op_private & OPpUSEINT) {
2060 SETi(IV_RIGHT_SHIFT(SvIV_nomg(svl), shift));
2061 }
2062 else {
2063 SETu(UV_RIGHT_SHIFT(SvUV_nomg(svl), shift));
2064 }
2065 RETURN;
2066 }
2067 }
2068
PP(pp_lt)2069 PP(pp_lt)
2070 {
2071 dSP;
2072 SV *left, *right;
2073 U32 flags_and, flags_or;
2074
2075 tryAMAGICbin_MG(lt_amg, AMGf_numeric);
2076 right = POPs;
2077 left = TOPs;
2078 flags_and = SvFLAGS(left) & SvFLAGS(right);
2079 flags_or = SvFLAGS(left) | SvFLAGS(right);
2080
2081 SETs(boolSV(
2082 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
2083 ? (SvIVX(left) < SvIVX(right))
2084 : (flags_and & SVf_NOK)
2085 ? (SvNVX(left) < SvNVX(right))
2086 : (do_ncmp(left, right) == -1)
2087 ));
2088 RETURN;
2089 }
2090
PP(pp_gt)2091 PP(pp_gt)
2092 {
2093 dSP;
2094 SV *left, *right;
2095 U32 flags_and, flags_or;
2096
2097 tryAMAGICbin_MG(gt_amg, AMGf_numeric);
2098 right = POPs;
2099 left = TOPs;
2100 flags_and = SvFLAGS(left) & SvFLAGS(right);
2101 flags_or = SvFLAGS(left) | SvFLAGS(right);
2102
2103 SETs(boolSV(
2104 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
2105 ? (SvIVX(left) > SvIVX(right))
2106 : (flags_and & SVf_NOK)
2107 ? (SvNVX(left) > SvNVX(right))
2108 : (do_ncmp(left, right) == 1)
2109 ));
2110 RETURN;
2111 }
2112
PP(pp_le)2113 PP(pp_le)
2114 {
2115 dSP;
2116 SV *left, *right;
2117 U32 flags_and, flags_or;
2118
2119 tryAMAGICbin_MG(le_amg, AMGf_numeric);
2120 right = POPs;
2121 left = TOPs;
2122 flags_and = SvFLAGS(left) & SvFLAGS(right);
2123 flags_or = SvFLAGS(left) | SvFLAGS(right);
2124
2125 SETs(boolSV(
2126 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
2127 ? (SvIVX(left) <= SvIVX(right))
2128 : (flags_and & SVf_NOK)
2129 ? (SvNVX(left) <= SvNVX(right))
2130 : (do_ncmp(left, right) <= 0)
2131 ));
2132 RETURN;
2133 }
2134
PP(pp_ge)2135 PP(pp_ge)
2136 {
2137 dSP;
2138 SV *left, *right;
2139 U32 flags_and, flags_or;
2140
2141 tryAMAGICbin_MG(ge_amg, AMGf_numeric);
2142 right = POPs;
2143 left = TOPs;
2144 flags_and = SvFLAGS(left) & SvFLAGS(right);
2145 flags_or = SvFLAGS(left) | SvFLAGS(right);
2146
2147 SETs(boolSV(
2148 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
2149 ? (SvIVX(left) >= SvIVX(right))
2150 : (flags_and & SVf_NOK)
2151 ? (SvNVX(left) >= SvNVX(right))
2152 : ( (do_ncmp(left, right) & 2) == 0)
2153 ));
2154 RETURN;
2155 }
2156
PP(pp_ne)2157 PP(pp_ne)
2158 {
2159 dSP;
2160 SV *left, *right;
2161 U32 flags_and, flags_or;
2162
2163 tryAMAGICbin_MG(ne_amg, AMGf_numeric);
2164 right = POPs;
2165 left = TOPs;
2166 flags_and = SvFLAGS(left) & SvFLAGS(right);
2167 flags_or = SvFLAGS(left) | SvFLAGS(right);
2168
2169 SETs(boolSV(
2170 ( (flags_and & SVf_IOK) && ((flags_or & SVf_IVisUV) ==0 ) )
2171 ? (SvIVX(left) != SvIVX(right))
2172 : (flags_and & SVf_NOK)
2173 ? (SvNVX(left) != SvNVX(right))
2174 : (do_ncmp(left, right) != 0)
2175 ));
2176 RETURN;
2177 }
2178
2179 /* compare left and right SVs. Returns:
2180 * -1: <
2181 * 0: ==
2182 * 1: >
2183 * 2: left or right was a NaN
2184 */
2185 I32
Perl_do_ncmp(pTHX_ SV * const left,SV * const right)2186 Perl_do_ncmp(pTHX_ SV* const left, SV * const right)
2187 {
2188 PERL_ARGS_ASSERT_DO_NCMP;
2189 #ifdef PERL_PRESERVE_IVUV
2190 /* Fortunately it seems NaN isn't IOK */
2191 if (SvIV_please_nomg(right) && SvIV_please_nomg(left)) {
2192 if (!SvUOK(left)) {
2193 const IV leftiv = SvIVX(left);
2194 if (!SvUOK(right)) {
2195 /* ## IV <=> IV ## */
2196 const IV rightiv = SvIVX(right);
2197 return (leftiv > rightiv) - (leftiv < rightiv);
2198 }
2199 /* ## IV <=> UV ## */
2200 if (leftiv < 0)
2201 /* As (b) is a UV, it's >=0, so it must be < */
2202 return -1;
2203 {
2204 const UV rightuv = SvUVX(right);
2205 return ((UV)leftiv > rightuv) - ((UV)leftiv < rightuv);
2206 }
2207 }
2208
2209 if (SvUOK(right)) {
2210 /* ## UV <=> UV ## */
2211 const UV leftuv = SvUVX(left);
2212 const UV rightuv = SvUVX(right);
2213 return (leftuv > rightuv) - (leftuv < rightuv);
2214 }
2215 /* ## UV <=> IV ## */
2216 {
2217 const IV rightiv = SvIVX(right);
2218 if (rightiv < 0)
2219 /* As (a) is a UV, it's >=0, so it cannot be < */
2220 return 1;
2221 {
2222 const UV leftuv = SvUVX(left);
2223 return (leftuv > (UV)rightiv) - (leftuv < (UV)rightiv);
2224 }
2225 }
2226 NOT_REACHED; /* NOTREACHED */
2227 }
2228 #endif
2229 {
2230 NV const rnv = SvNV_nomg(right);
2231 NV const lnv = SvNV_nomg(left);
2232
2233 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2234 if (Perl_isnan(lnv) || Perl_isnan(rnv)) {
2235 return 2;
2236 }
2237 return (lnv > rnv) - (lnv < rnv);
2238 #else
2239 if (lnv < rnv)
2240 return -1;
2241 if (lnv > rnv)
2242 return 1;
2243 if (lnv == rnv)
2244 return 0;
2245 return 2;
2246 #endif
2247 }
2248 }
2249
2250
PP(pp_ncmp)2251 PP(pp_ncmp)
2252 {
2253 dSP;
2254 SV *left, *right;
2255 I32 value;
2256 tryAMAGICbin_MG(ncmp_amg, AMGf_numeric);
2257 right = POPs;
2258 left = TOPs;
2259 value = do_ncmp(left, right);
2260 if (value == 2) {
2261 SETs(&PL_sv_undef);
2262 }
2263 else {
2264 dTARGET;
2265 SETi(value);
2266 }
2267 RETURN;
2268 }
2269
2270
2271 /* also used for: pp_sge() pp_sgt() pp_slt() */
2272
PP(pp_sle)2273 PP(pp_sle)
2274 {
2275 dSP;
2276
2277 int amg_type = sle_amg;
2278 int multiplier = 1;
2279 int rhs = 1;
2280
2281 switch (PL_op->op_type) {
2282 case OP_SLT:
2283 amg_type = slt_amg;
2284 /* cmp < 0 */
2285 rhs = 0;
2286 break;
2287 case OP_SGT:
2288 amg_type = sgt_amg;
2289 /* cmp > 0 */
2290 multiplier = -1;
2291 rhs = 0;
2292 break;
2293 case OP_SGE:
2294 amg_type = sge_amg;
2295 /* cmp >= 0 */
2296 multiplier = -1;
2297 break;
2298 }
2299
2300 tryAMAGICbin_MG(amg_type, 0);
2301 {
2302 dPOPTOPssrl;
2303 const int cmp =
2304 #ifdef USE_LOCALE_COLLATE
2305 (IN_LC_RUNTIME(LC_COLLATE))
2306 ? sv_cmp_locale_flags(left, right, 0)
2307 :
2308 #endif
2309 sv_cmp_flags(left, right, 0);
2310 SETs(boolSV(cmp * multiplier < rhs));
2311 RETURN;
2312 }
2313 }
2314
PP(pp_seq)2315 PP(pp_seq)
2316 {
2317 dSP;
2318 tryAMAGICbin_MG(seq_amg, 0);
2319 {
2320 dPOPTOPssrl;
2321 SETs(boolSV(sv_eq_flags(left, right, 0)));
2322 RETURN;
2323 }
2324 }
2325
PP(pp_sne)2326 PP(pp_sne)
2327 {
2328 dSP;
2329 tryAMAGICbin_MG(sne_amg, 0);
2330 {
2331 dPOPTOPssrl;
2332 SETs(boolSV(!sv_eq_flags(left, right, 0)));
2333 RETURN;
2334 }
2335 }
2336
PP(pp_scmp)2337 PP(pp_scmp)
2338 {
2339 dSP; dTARGET;
2340 tryAMAGICbin_MG(scmp_amg, 0);
2341 {
2342 dPOPTOPssrl;
2343 const int cmp =
2344 #ifdef USE_LOCALE_COLLATE
2345 (IN_LC_RUNTIME(LC_COLLATE))
2346 ? sv_cmp_locale_flags(left, right, 0)
2347 :
2348 #endif
2349 sv_cmp_flags(left, right, 0);
2350 SETi( cmp );
2351 RETURN;
2352 }
2353 }
2354
PP(pp_bit_and)2355 PP(pp_bit_and)
2356 {
2357 dSP; dATARGET;
2358 tryAMAGICbin_MG(band_amg, AMGf_assign);
2359 {
2360 dPOPTOPssrl;
2361 if (SvNIOKp(left) || SvNIOKp(right)) {
2362 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2363 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2364 if (PL_op->op_private & OPpUSEINT) {
2365 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2366 SETi(i);
2367 }
2368 else {
2369 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2370 SETu(u);
2371 }
2372 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2373 if (right_ro_nonnum) SvNIOK_off(right);
2374 }
2375 else {
2376 do_vop(PL_op->op_type, TARG, left, right);
2377 SETTARG;
2378 }
2379 RETURN;
2380 }
2381 }
2382
PP(pp_nbit_and)2383 PP(pp_nbit_and)
2384 {
2385 dSP;
2386 tryAMAGICbin_MG(band_amg, AMGf_assign|AMGf_numarg);
2387 {
2388 dATARGET; dPOPTOPssrl;
2389 if (PL_op->op_private & OPpUSEINT) {
2390 const IV i = SvIV_nomg(left) & SvIV_nomg(right);
2391 SETi(i);
2392 }
2393 else {
2394 const UV u = SvUV_nomg(left) & SvUV_nomg(right);
2395 SETu(u);
2396 }
2397 }
2398 RETURN;
2399 }
2400
PP(pp_sbit_and)2401 PP(pp_sbit_and)
2402 {
2403 dSP;
2404 tryAMAGICbin_MG(sband_amg, AMGf_assign);
2405 {
2406 dATARGET; dPOPTOPssrl;
2407 do_vop(OP_BIT_AND, TARG, left, right);
2408 RETSETTARG;
2409 }
2410 }
2411
2412 /* also used for: pp_bit_xor() */
2413
PP(pp_bit_or)2414 PP(pp_bit_or)
2415 {
2416 dSP; dATARGET;
2417 const int op_type = PL_op->op_type;
2418
2419 tryAMAGICbin_MG((op_type == OP_BIT_OR ? bor_amg : bxor_amg), AMGf_assign);
2420 {
2421 dPOPTOPssrl;
2422 if (SvNIOKp(left) || SvNIOKp(right)) {
2423 const bool left_ro_nonnum = !SvNIOKp(left) && SvREADONLY(left);
2424 const bool right_ro_nonnum = !SvNIOKp(right) && SvREADONLY(right);
2425 if (PL_op->op_private & OPpUSEINT) {
2426 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2427 const IV r = SvIV_nomg(right);
2428 const IV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2429 SETi(result);
2430 }
2431 else {
2432 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2433 const UV r = SvUV_nomg(right);
2434 const UV result = op_type == OP_BIT_OR ? (l | r) : (l ^ r);
2435 SETu(result);
2436 }
2437 if (left_ro_nonnum && left != TARG) SvNIOK_off(left);
2438 if (right_ro_nonnum) SvNIOK_off(right);
2439 }
2440 else {
2441 do_vop(op_type, TARG, left, right);
2442 SETTARG;
2443 }
2444 RETURN;
2445 }
2446 }
2447
2448 /* also used for: pp_nbit_xor() */
2449
PP(pp_nbit_or)2450 PP(pp_nbit_or)
2451 {
2452 dSP;
2453 const int op_type = PL_op->op_type;
2454
2455 tryAMAGICbin_MG((op_type == OP_NBIT_OR ? bor_amg : bxor_amg),
2456 AMGf_assign|AMGf_numarg);
2457 {
2458 dATARGET; dPOPTOPssrl;
2459 if (PL_op->op_private & OPpUSEINT) {
2460 const IV l = (USE_LEFT(left) ? SvIV_nomg(left) : 0);
2461 const IV r = SvIV_nomg(right);
2462 const IV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2463 SETi(result);
2464 }
2465 else {
2466 const UV l = (USE_LEFT(left) ? SvUV_nomg(left) : 0);
2467 const UV r = SvUV_nomg(right);
2468 const UV result = op_type == OP_NBIT_OR ? (l | r) : (l ^ r);
2469 SETu(result);
2470 }
2471 }
2472 RETURN;
2473 }
2474
2475 /* also used for: pp_sbit_xor() */
2476
PP(pp_sbit_or)2477 PP(pp_sbit_or)
2478 {
2479 dSP;
2480 const int op_type = PL_op->op_type;
2481
2482 tryAMAGICbin_MG((op_type == OP_SBIT_OR ? sbor_amg : sbxor_amg),
2483 AMGf_assign);
2484 {
2485 dATARGET; dPOPTOPssrl;
2486 do_vop(op_type == OP_SBIT_OR ? OP_BIT_OR : OP_BIT_XOR, TARG, left,
2487 right);
2488 RETSETTARG;
2489 }
2490 }
2491
2492 PERL_STATIC_INLINE bool
S_negate_string(pTHX)2493 S_negate_string(pTHX)
2494 {
2495 dTARGET; dSP;
2496 STRLEN len;
2497 const char *s;
2498 SV * const sv = TOPs;
2499 if (!SvPOKp(sv) || SvNIOK(sv) || (!SvPOK(sv) && SvNIOKp(sv)))
2500 return FALSE;
2501 s = SvPV_nomg_const(sv, len);
2502 if (isIDFIRST(*s)) {
2503 sv_setpvs(TARG, "-");
2504 sv_catsv(TARG, sv);
2505 }
2506 else if (*s == '+' || (*s == '-' && !looks_like_number(sv))) {
2507 sv_setsv_nomg(TARG, sv);
2508 *SvPV_force_nomg(TARG, len) = *s == '-' ? '+' : '-';
2509 }
2510 else return FALSE;
2511 SETTARG;
2512 return TRUE;
2513 }
2514
PP(pp_negate)2515 PP(pp_negate)
2516 {
2517 dSP; dTARGET;
2518 tryAMAGICun_MG(neg_amg, AMGf_numeric);
2519 if (S_negate_string(aTHX)) return NORMAL;
2520 {
2521 SV * const sv = TOPs;
2522
2523 if (SvIOK(sv)) {
2524 /* It's publicly an integer */
2525 oops_its_an_int:
2526 if (SvIsUV(sv)) {
2527 if (SvIVX(sv) == IV_MIN) {
2528 /* 2s complement assumption. */
2529 SETi(SvIVX(sv)); /* special case: -((UV)IV_MAX+1) ==
2530 IV_MIN */
2531 return NORMAL;
2532 }
2533 else if (SvUVX(sv) <= IV_MAX) {
2534 SETi(-SvIVX(sv));
2535 return NORMAL;
2536 }
2537 }
2538 else if (SvIVX(sv) != IV_MIN) {
2539 SETi(-SvIVX(sv));
2540 return NORMAL;
2541 }
2542 #ifdef PERL_PRESERVE_IVUV
2543 else {
2544 SETu((UV)IV_MIN);
2545 return NORMAL;
2546 }
2547 #endif
2548 }
2549 if (SvNIOKp(sv) && (SvNIOK(sv) || !SvPOK(sv)))
2550 SETn(-SvNV_nomg(sv));
2551 else if (SvPOKp(sv) && SvIV_please_nomg(sv))
2552 goto oops_its_an_int;
2553 else
2554 SETn(-SvNV_nomg(sv));
2555 }
2556 return NORMAL;
2557 }
2558
PP(pp_not)2559 PP(pp_not)
2560 {
2561 dSP;
2562 SV *sv;
2563
2564 tryAMAGICun_MG(not_amg, 0);
2565 sv = *PL_stack_sp;
2566 *PL_stack_sp = boolSV(!SvTRUE_nomg_NN(sv));
2567 return NORMAL;
2568 }
2569
2570 static void
S_scomplement(pTHX_ SV * targ,SV * sv)2571 S_scomplement(pTHX_ SV *targ, SV *sv)
2572 {
2573 U8 *tmps;
2574 I32 anum;
2575 STRLEN len;
2576
2577 sv_copypv_nomg(TARG, sv);
2578 tmps = (U8*)SvPV_nomg(TARG, len);
2579
2580 if (SvUTF8(TARG)) {
2581 if (len && ! utf8_to_bytes(tmps, &len)) {
2582 Perl_croak(aTHX_ FATAL_ABOVE_FF_MSG, PL_op_desc[PL_op->op_type]);
2583 }
2584 SvCUR_set(TARG, len);
2585 SvUTF8_off(TARG);
2586 }
2587
2588 anum = len;
2589
2590 {
2591 long *tmpl;
2592 for ( ; anum && PTR2nat(tmps) % sizeof(long); anum--, tmps++)
2593 *tmps = ~*tmps;
2594 tmpl = (long*)tmps;
2595 for ( ; anum >= (I32)sizeof(long); anum -= (I32)sizeof(long), tmpl++)
2596 *tmpl = ~*tmpl;
2597 tmps = (U8*)tmpl;
2598 }
2599
2600 for ( ; anum > 0; anum--, tmps++)
2601 *tmps = ~*tmps;
2602 }
2603
PP(pp_complement)2604 PP(pp_complement)
2605 {
2606 dSP; dTARGET;
2607 tryAMAGICun_MG(compl_amg, AMGf_numeric);
2608 {
2609 dTOPss;
2610 if (SvNIOKp(sv)) {
2611 if (PL_op->op_private & OPpUSEINT) {
2612 const IV i = ~SvIV_nomg(sv);
2613 SETi(i);
2614 }
2615 else {
2616 const UV u = ~SvUV_nomg(sv);
2617 SETu(u);
2618 }
2619 }
2620 else {
2621 S_scomplement(aTHX_ TARG, sv);
2622 SETTARG;
2623 }
2624 return NORMAL;
2625 }
2626 }
2627
PP(pp_ncomplement)2628 PP(pp_ncomplement)
2629 {
2630 dSP;
2631 tryAMAGICun_MG(compl_amg, AMGf_numeric|AMGf_numarg);
2632 {
2633 dTARGET; dTOPss;
2634 if (PL_op->op_private & OPpUSEINT) {
2635 const IV i = ~SvIV_nomg(sv);
2636 SETi(i);
2637 }
2638 else {
2639 const UV u = ~SvUV_nomg(sv);
2640 SETu(u);
2641 }
2642 }
2643 return NORMAL;
2644 }
2645
PP(pp_scomplement)2646 PP(pp_scomplement)
2647 {
2648 dSP;
2649 tryAMAGICun_MG(scompl_amg, AMGf_numeric);
2650 {
2651 dTARGET; dTOPss;
2652 S_scomplement(aTHX_ TARG, sv);
2653 SETTARG;
2654 return NORMAL;
2655 }
2656 }
2657
2658 /* integer versions of some of the above */
2659
PP(pp_i_multiply)2660 PP(pp_i_multiply)
2661 {
2662 dSP; dATARGET;
2663 tryAMAGICbin_MG(mult_amg, AMGf_assign);
2664 {
2665 dPOPTOPiirl_nomg;
2666 SETi( left * right );
2667 RETURN;
2668 }
2669 }
2670
PP(pp_i_divide)2671 PP(pp_i_divide)
2672 {
2673 IV num;
2674 dSP; dATARGET;
2675 tryAMAGICbin_MG(div_amg, AMGf_assign);
2676 {
2677 dPOPTOPssrl;
2678 IV value = SvIV_nomg(right);
2679 if (value == 0)
2680 DIE(aTHX_ "Illegal division by zero");
2681 num = SvIV_nomg(left);
2682
2683 /* avoid FPE_INTOVF on some platforms when num is IV_MIN */
2684 if (value == -1)
2685 value = - num;
2686 else
2687 value = num / value;
2688 SETi(value);
2689 RETURN;
2690 }
2691 }
2692
PP(pp_i_modulo)2693 PP(pp_i_modulo)
2694 {
2695 dSP; dATARGET;
2696 tryAMAGICbin_MG(modulo_amg, AMGf_assign);
2697 {
2698 dPOPTOPiirl_nomg;
2699 if (!right)
2700 DIE(aTHX_ "Illegal modulus zero");
2701 /* avoid FPE_INTOVF on some platforms when left is IV_MIN */
2702 if (right == -1)
2703 SETi( 0 );
2704 else
2705 SETi( left % right );
2706 RETURN;
2707 }
2708 }
2709
PP(pp_i_add)2710 PP(pp_i_add)
2711 {
2712 dSP; dATARGET;
2713 tryAMAGICbin_MG(add_amg, AMGf_assign);
2714 {
2715 dPOPTOPiirl_ul_nomg;
2716 SETi( left + right );
2717 RETURN;
2718 }
2719 }
2720
PP(pp_i_subtract)2721 PP(pp_i_subtract)
2722 {
2723 dSP; dATARGET;
2724 tryAMAGICbin_MG(subtr_amg, AMGf_assign);
2725 {
2726 dPOPTOPiirl_ul_nomg;
2727 SETi( left - right );
2728 RETURN;
2729 }
2730 }
2731
PP(pp_i_lt)2732 PP(pp_i_lt)
2733 {
2734 dSP;
2735 tryAMAGICbin_MG(lt_amg, 0);
2736 {
2737 dPOPTOPiirl_nomg;
2738 SETs(boolSV(left < right));
2739 RETURN;
2740 }
2741 }
2742
PP(pp_i_gt)2743 PP(pp_i_gt)
2744 {
2745 dSP;
2746 tryAMAGICbin_MG(gt_amg, 0);
2747 {
2748 dPOPTOPiirl_nomg;
2749 SETs(boolSV(left > right));
2750 RETURN;
2751 }
2752 }
2753
PP(pp_i_le)2754 PP(pp_i_le)
2755 {
2756 dSP;
2757 tryAMAGICbin_MG(le_amg, 0);
2758 {
2759 dPOPTOPiirl_nomg;
2760 SETs(boolSV(left <= right));
2761 RETURN;
2762 }
2763 }
2764
PP(pp_i_ge)2765 PP(pp_i_ge)
2766 {
2767 dSP;
2768 tryAMAGICbin_MG(ge_amg, 0);
2769 {
2770 dPOPTOPiirl_nomg;
2771 SETs(boolSV(left >= right));
2772 RETURN;
2773 }
2774 }
2775
PP(pp_i_eq)2776 PP(pp_i_eq)
2777 {
2778 dSP;
2779 tryAMAGICbin_MG(eq_amg, 0);
2780 {
2781 dPOPTOPiirl_nomg;
2782 SETs(boolSV(left == right));
2783 RETURN;
2784 }
2785 }
2786
PP(pp_i_ne)2787 PP(pp_i_ne)
2788 {
2789 dSP;
2790 tryAMAGICbin_MG(ne_amg, 0);
2791 {
2792 dPOPTOPiirl_nomg;
2793 SETs(boolSV(left != right));
2794 RETURN;
2795 }
2796 }
2797
PP(pp_i_ncmp)2798 PP(pp_i_ncmp)
2799 {
2800 dSP; dTARGET;
2801 tryAMAGICbin_MG(ncmp_amg, 0);
2802 {
2803 dPOPTOPiirl_nomg;
2804 I32 value;
2805
2806 if (left > right)
2807 value = 1;
2808 else if (left < right)
2809 value = -1;
2810 else
2811 value = 0;
2812 SETi(value);
2813 RETURN;
2814 }
2815 }
2816
PP(pp_i_negate)2817 PP(pp_i_negate)
2818 {
2819 dSP; dTARGET;
2820 tryAMAGICun_MG(neg_amg, 0);
2821 if (S_negate_string(aTHX)) return NORMAL;
2822 {
2823 SV * const sv = TOPs;
2824 IV const i = SvIV_nomg(sv);
2825 SETi(-i);
2826 return NORMAL;
2827 }
2828 }
2829
2830 /* High falutin' math. */
2831
PP(pp_atan2)2832 PP(pp_atan2)
2833 {
2834 dSP; dTARGET;
2835 tryAMAGICbin_MG(atan2_amg, 0);
2836 {
2837 dPOPTOPnnrl_nomg;
2838 SETn(Perl_atan2(left, right));
2839 RETURN;
2840 }
2841 }
2842
2843
2844 /* also used for: pp_cos() pp_exp() pp_log() pp_sqrt() */
2845
PP(pp_sin)2846 PP(pp_sin)
2847 {
2848 dSP; dTARGET;
2849 int amg_type = fallback_amg;
2850 const char *neg_report = NULL;
2851 const int op_type = PL_op->op_type;
2852
2853 switch (op_type) {
2854 case OP_SIN: amg_type = sin_amg; break;
2855 case OP_COS: amg_type = cos_amg; break;
2856 case OP_EXP: amg_type = exp_amg; break;
2857 case OP_LOG: amg_type = log_amg; neg_report = "log"; break;
2858 case OP_SQRT: amg_type = sqrt_amg; neg_report = "sqrt"; break;
2859 }
2860
2861 assert(amg_type != fallback_amg);
2862
2863 tryAMAGICun_MG(amg_type, 0);
2864 {
2865 SV * const arg = TOPs;
2866 const NV value = SvNV_nomg(arg);
2867 #ifdef NV_NAN
2868 NV result = NV_NAN;
2869 #else
2870 NV result = 0.0;
2871 #endif
2872 if (neg_report) { /* log or sqrt */
2873 if (
2874 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2875 ! Perl_isnan(value) &&
2876 #endif
2877 (op_type == OP_LOG ? (value <= 0.0) : (value < 0.0))) {
2878 SET_NUMERIC_STANDARD();
2879 /* diag_listed_as: Can't take log of %g */
2880 DIE(aTHX_ "Can't take %s of %" NVgf, neg_report, value);
2881 }
2882 }
2883 switch (op_type) {
2884 default:
2885 case OP_SIN: result = Perl_sin(value); break;
2886 case OP_COS: result = Perl_cos(value); break;
2887 case OP_EXP: result = Perl_exp(value); break;
2888 case OP_LOG: result = Perl_log(value); break;
2889 case OP_SQRT: result = Perl_sqrt(value); break;
2890 }
2891 SETn(result);
2892 return NORMAL;
2893 }
2894 }
2895
2896 /* Support Configure command-line overrides for rand() functions.
2897 After 5.005, perhaps we should replace this by Configure support
2898 for drand48(), random(), or rand(). For 5.005, though, maintain
2899 compatibility by calling rand() but allow the user to override it.
2900 See INSTALL for details. --Andy Dougherty 15 July 1998
2901 */
2902 /* Now it's after 5.005, and Configure supports drand48() and random(),
2903 in addition to rand(). So the overrides should not be needed any more.
2904 --Jarkko Hietaniemi 27 September 1998
2905 */
2906
PP(pp_rand)2907 PP(pp_rand)
2908 {
2909 if (!PL_srand_called) {
2910 (void)seedDrand01((Rand_seed_t)seed());
2911 PL_srand_called = TRUE;
2912 }
2913 {
2914 dSP;
2915 NV value;
2916
2917 if (MAXARG < 1)
2918 {
2919 EXTEND(SP, 1);
2920 value = 1.0;
2921 }
2922 else {
2923 SV * const sv = POPs;
2924 if(!sv)
2925 value = 1.0;
2926 else
2927 value = SvNV(sv);
2928 }
2929 /* 1 of 2 things can be carried through SvNV, SP or TARG, SP was carried */
2930 #if defined(NAN_COMPARE_BROKEN) && defined(Perl_isnan)
2931 if (! Perl_isnan(value) && value == 0.0)
2932 #else
2933 if (value == 0.0)
2934 #endif
2935 value = 1.0;
2936 {
2937 dTARGET;
2938 PUSHs(TARG);
2939 PUTBACK;
2940 value *= Drand01();
2941 sv_setnv_mg(TARG, value);
2942 }
2943 }
2944 return NORMAL;
2945 }
2946
PP(pp_srand)2947 PP(pp_srand)
2948 {
2949 dSP; dTARGET;
2950 UV anum;
2951
2952 if (MAXARG >= 1 && (TOPs || POPs)) {
2953 SV *top;
2954 char *pv;
2955 STRLEN len;
2956 int flags;
2957
2958 top = POPs;
2959 pv = SvPV(top, len);
2960 flags = grok_number(pv, len, &anum);
2961
2962 if (!(flags & IS_NUMBER_IN_UV)) {
2963 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
2964 "Integer overflow in srand");
2965 anum = UV_MAX;
2966 }
2967 }
2968 else {
2969 anum = seed();
2970 }
2971
2972 (void)seedDrand01((Rand_seed_t)anum);
2973 PL_srand_called = TRUE;
2974 if (anum)
2975 XPUSHu(anum);
2976 else {
2977 /* Historically srand always returned true. We can avoid breaking
2978 that like this: */
2979 sv_setpvs(TARG, "0 but true");
2980 XPUSHTARG;
2981 }
2982 RETURN;
2983 }
2984
PP(pp_int)2985 PP(pp_int)
2986 {
2987 dSP; dTARGET;
2988 tryAMAGICun_MG(int_amg, AMGf_numeric);
2989 {
2990 SV * const sv = TOPs;
2991 const IV iv = SvIV_nomg(sv);
2992 /* XXX it's arguable that compiler casting to IV might be subtly
2993 different from modf (for numbers inside (IV_MIN,UV_MAX)) in which
2994 else preferring IV has introduced a subtle behaviour change bug. OTOH
2995 relying on floating point to be accurate is a bug. */
2996
2997 if (!SvOK(sv)) {
2998 SETu(0);
2999 }
3000 else if (SvIOK(sv)) {
3001 if (SvIsUV(sv))
3002 SETu(SvUV_nomg(sv));
3003 else
3004 SETi(iv);
3005 }
3006 else {
3007 const NV value = SvNV_nomg(sv);
3008 if (UNLIKELY(Perl_isinfnan(value)))
3009 SETn(value);
3010 else if (value >= 0.0) {
3011 if (value < (NV)UV_MAX + 0.5) {
3012 SETu(U_V(value));
3013 } else {
3014 SETn(Perl_floor(value));
3015 }
3016 }
3017 else {
3018 if (value > (NV)IV_MIN - 0.5) {
3019 SETi(I_V(value));
3020 } else {
3021 SETn(Perl_ceil(value));
3022 }
3023 }
3024 }
3025 }
3026 return NORMAL;
3027 }
3028
PP(pp_abs)3029 PP(pp_abs)
3030 {
3031 dSP; dTARGET;
3032 tryAMAGICun_MG(abs_amg, AMGf_numeric);
3033 {
3034 SV * const sv = TOPs;
3035 /* This will cache the NV value if string isn't actually integer */
3036 const IV iv = SvIV_nomg(sv);
3037
3038 if (!SvOK(sv)) {
3039 SETu(0);
3040 }
3041 else if (SvIOK(sv)) {
3042 /* IVX is precise */
3043 if (SvIsUV(sv)) {
3044 SETu(SvUV_nomg(sv)); /* force it to be numeric only */
3045 } else {
3046 if (iv >= 0) {
3047 SETi(iv);
3048 } else {
3049 if (iv != IV_MIN) {
3050 SETi(-iv);
3051 } else {
3052 /* 2s complement assumption. Also, not really needed as
3053 IV_MIN and -IV_MIN should both be %100...00 and NV-able */
3054 SETu((UV)IV_MIN);
3055 }
3056 }
3057 }
3058 } else{
3059 const NV value = SvNV_nomg(sv);
3060 SETn(Perl_fabs(value));
3061 }
3062 }
3063 return NORMAL;
3064 }
3065
3066
3067 /* also used for: pp_hex() */
3068
PP(pp_oct)3069 PP(pp_oct)
3070 {
3071 dSP; dTARGET;
3072 const char *tmps;
3073 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES;
3074 STRLEN len;
3075 NV result_nv;
3076 UV result_uv;
3077 SV* const sv = TOPs;
3078
3079 tmps = (SvPV_const(sv, len));
3080 if (DO_UTF8(sv)) {
3081 /* If Unicode, try to downgrade
3082 * If not possible, croak. */
3083 SV* const tsv = sv_2mortal(newSVsv(sv));
3084
3085 SvUTF8_on(tsv);
3086 sv_utf8_downgrade(tsv, FALSE);
3087 tmps = SvPV_const(tsv, len);
3088 }
3089 if (PL_op->op_type == OP_HEX)
3090 goto hex;
3091
3092 while (*tmps && len && isSPACE(*tmps))
3093 tmps++, len--;
3094 if (*tmps == '0')
3095 tmps++, len--;
3096 if (isALPHA_FOLD_EQ(*tmps, 'x')) {
3097 tmps++, len--;
3098 flags |= PERL_SCAN_DISALLOW_PREFIX;
3099 hex:
3100 result_uv = grok_hex (tmps, &len, &flags, &result_nv);
3101 }
3102 else if (isALPHA_FOLD_EQ(*tmps, 'b')) {
3103 tmps++, len--;
3104 flags |= PERL_SCAN_DISALLOW_PREFIX;
3105 result_uv = grok_bin (tmps, &len, &flags, &result_nv);
3106 }
3107 else {
3108 if (isALPHA_FOLD_EQ(*tmps, 'o')) {
3109 tmps++, len--;
3110 }
3111 result_uv = grok_oct (tmps, &len, &flags, &result_nv);
3112 }
3113
3114 if (flags & PERL_SCAN_GREATER_THAN_UV_MAX) {
3115 SETn(result_nv);
3116 }
3117 else {
3118 SETu(result_uv);
3119 }
3120 return NORMAL;
3121 }
3122
3123 /* String stuff. */
3124
3125
PP(pp_length)3126 PP(pp_length)
3127 {
3128 dSP; dTARGET;
3129 SV * const sv = TOPs;
3130
3131 U32 in_bytes = IN_BYTES;
3132 /* Simplest case shortcut:
3133 * set svflags to just the SVf_POK|SVs_GMG|SVf_UTF8 from the SV,
3134 * with the SVf_UTF8 flag inverted if under 'use bytes' (HINT_BYTES
3135 * set)
3136 */
3137 U32 svflags = (SvFLAGS(sv) ^ (in_bytes << 26)) & (SVf_POK|SVs_GMG|SVf_UTF8);
3138
3139 STATIC_ASSERT_STMT(SVf_UTF8 == (HINT_BYTES << 26));
3140 SETs(TARG);
3141
3142 if (LIKELY(svflags == SVf_POK))
3143 goto simple_pv;
3144
3145 if (svflags & SVs_GMG)
3146 mg_get(sv);
3147
3148 if (SvOK(sv)) {
3149 STRLEN len;
3150 if (!IN_BYTES) { /* reread to avoid using an C auto/register */
3151 if ((SvFLAGS(sv) & (SVf_POK|SVf_UTF8)) == SVf_POK)
3152 goto simple_pv;
3153 if ( SvPOK(sv) && (PL_op->op_private & OPpTRUEBOOL)) {
3154 /* no need to convert from bytes to chars */
3155 len = SvCUR(sv);
3156 goto return_bool;
3157 }
3158 len = sv_len_utf8_nomg(sv);
3159 }
3160 else {
3161 /* unrolled SvPV_nomg_const(sv,len) */
3162 if (SvPOK_nog(sv)) {
3163 simple_pv:
3164 len = SvCUR(sv);
3165 if (PL_op->op_private & OPpTRUEBOOL) {
3166 return_bool:
3167 SETs(len ? &PL_sv_yes : &PL_sv_zero);
3168 return NORMAL;
3169 }
3170 }
3171 else {
3172 (void)sv_2pv_flags(sv, &len, 0|SV_CONST_RETURN);
3173 }
3174 }
3175 TARGi((IV)(len), 1);
3176 }
3177 else {
3178 if (!SvPADTMP(TARG)) {
3179 /* OPpTARGET_MY: targ is var in '$lex = length()' */
3180 sv_set_undef(TARG);
3181 SvSETMAGIC(TARG);
3182 }
3183 else
3184 /* TARG is on stack at this point and is overwriten by SETs.
3185 * This branch is the odd one out, so put TARG by default on
3186 * stack earlier to let local SP go out of liveness sooner */
3187 SETs(&PL_sv_undef);
3188 }
3189 return NORMAL; /* no putback, SP didn't move in this opcode */
3190 }
3191
3192
3193 /* Returns false if substring is completely outside original string.
3194 No length is indicated by len_iv = 0 and len_is_uv = 0. len_is_uv must
3195 always be true for an explicit 0.
3196 */
3197 bool
Perl_translate_substr_offsets(STRLEN curlen,IV pos1_iv,bool pos1_is_uv,IV len_iv,bool len_is_uv,STRLEN * posp,STRLEN * lenp)3198 Perl_translate_substr_offsets( STRLEN curlen, IV pos1_iv,
3199 bool pos1_is_uv, IV len_iv,
3200 bool len_is_uv, STRLEN *posp,
3201 STRLEN *lenp)
3202 {
3203 IV pos2_iv;
3204 int pos2_is_uv;
3205
3206 PERL_ARGS_ASSERT_TRANSLATE_SUBSTR_OFFSETS;
3207
3208 if (!pos1_is_uv && pos1_iv < 0 && curlen) {
3209 pos1_is_uv = curlen-1 > ~(UV)pos1_iv;
3210 pos1_iv += curlen;
3211 }
3212 if ((pos1_is_uv || pos1_iv > 0) && (UV)pos1_iv > curlen)
3213 return FALSE;
3214
3215 if (len_iv || len_is_uv) {
3216 if (!len_is_uv && len_iv < 0) {
3217 pos2_iv = curlen + len_iv;
3218 if (curlen)
3219 pos2_is_uv = curlen-1 > ~(UV)len_iv;
3220 else
3221 pos2_is_uv = 0;
3222 } else { /* len_iv >= 0 */
3223 if (!pos1_is_uv && pos1_iv < 0) {
3224 pos2_iv = pos1_iv + len_iv;
3225 pos2_is_uv = (UV)len_iv > (UV)IV_MAX;
3226 } else {
3227 if ((UV)len_iv > curlen-(UV)pos1_iv)
3228 pos2_iv = curlen;
3229 else
3230 pos2_iv = pos1_iv+len_iv;
3231 pos2_is_uv = 1;
3232 }
3233 }
3234 }
3235 else {
3236 pos2_iv = curlen;
3237 pos2_is_uv = 1;
3238 }
3239
3240 if (!pos2_is_uv && pos2_iv < 0) {
3241 if (!pos1_is_uv && pos1_iv < 0)
3242 return FALSE;
3243 pos2_iv = 0;
3244 }
3245 else if (!pos1_is_uv && pos1_iv < 0)
3246 pos1_iv = 0;
3247
3248 if ((UV)pos2_iv < (UV)pos1_iv)
3249 pos2_iv = pos1_iv;
3250 if ((UV)pos2_iv > curlen)
3251 pos2_iv = curlen;
3252
3253 /* pos1_iv and pos2_iv both in 0..curlen, so the cast is safe */
3254 *posp = (STRLEN)( (UV)pos1_iv );
3255 *lenp = (STRLEN)( (UV)pos2_iv - (UV)pos1_iv );
3256
3257 return TRUE;
3258 }
3259
PP(pp_substr)3260 PP(pp_substr)
3261 {
3262 dSP; dTARGET;
3263 SV *sv;
3264 STRLEN curlen;
3265 STRLEN utf8_curlen;
3266 SV * pos_sv;
3267 IV pos1_iv;
3268 int pos1_is_uv;
3269 SV * len_sv;
3270 IV len_iv = 0;
3271 int len_is_uv = 0;
3272 I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3273 const bool rvalue = (GIMME_V != G_VOID);
3274 const char *tmps;
3275 SV *repl_sv = NULL;
3276 const char *repl = NULL;
3277 STRLEN repl_len;
3278 int num_args = PL_op->op_private & 7;
3279 bool repl_need_utf8_upgrade = FALSE;
3280
3281 if (num_args > 2) {
3282 if (num_args > 3) {
3283 if(!(repl_sv = POPs)) num_args--;
3284 }
3285 if ((len_sv = POPs)) {
3286 len_iv = SvIV(len_sv);
3287 len_is_uv = len_iv ? SvIOK_UV(len_sv) : 1;
3288 }
3289 else num_args--;
3290 }
3291 pos_sv = POPs;
3292 pos1_iv = SvIV(pos_sv);
3293 pos1_is_uv = SvIOK_UV(pos_sv);
3294 sv = POPs;
3295 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST) {
3296 assert(!repl_sv);
3297 repl_sv = POPs;
3298 }
3299 if (lvalue && !repl_sv) {
3300 SV * ret;
3301 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3302 sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0);
3303 LvTYPE(ret) = 'x';
3304 LvTARG(ret) = SvREFCNT_inc_simple(sv);
3305 LvTARGOFF(ret) =
3306 pos1_is_uv || pos1_iv >= 0
3307 ? (STRLEN)(UV)pos1_iv
3308 : (LvFLAGS(ret) |= LVf_NEG_OFF, (STRLEN)(UV)-pos1_iv);
3309 LvTARGLEN(ret) =
3310 len_is_uv || len_iv > 0
3311 ? (STRLEN)(UV)len_iv
3312 : (LvFLAGS(ret) |= LVf_NEG_LEN, (STRLEN)(UV)-len_iv);
3313
3314 PUSHs(ret); /* avoid SvSETMAGIC here */
3315 RETURN;
3316 }
3317 if (repl_sv) {
3318 repl = SvPV_const(repl_sv, repl_len);
3319 SvGETMAGIC(sv);
3320 if (SvROK(sv))
3321 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR),
3322 "Attempt to use reference as lvalue in substr"
3323 );
3324 tmps = SvPV_force_nomg(sv, curlen);
3325 if (DO_UTF8(repl_sv) && repl_len) {
3326 if (!DO_UTF8(sv)) {
3327 /* Upgrade the dest, and recalculate tmps in case the buffer
3328 * got reallocated; curlen may also have been changed */
3329 sv_utf8_upgrade_nomg(sv);
3330 tmps = SvPV_nomg(sv, curlen);
3331 }
3332 }
3333 else if (DO_UTF8(sv))
3334 repl_need_utf8_upgrade = TRUE;
3335 }
3336 else tmps = SvPV_const(sv, curlen);
3337 if (DO_UTF8(sv)) {
3338 utf8_curlen = sv_or_pv_len_utf8(sv, tmps, curlen);
3339 if (utf8_curlen == curlen)
3340 utf8_curlen = 0;
3341 else
3342 curlen = utf8_curlen;
3343 }
3344 else
3345 utf8_curlen = 0;
3346
3347 {
3348 STRLEN pos, len, byte_len, byte_pos;
3349
3350 if (!translate_substr_offsets(
3351 curlen, pos1_iv, pos1_is_uv, len_iv, len_is_uv, &pos, &len
3352 )) goto bound_fail;
3353
3354 byte_len = len;
3355 byte_pos = utf8_curlen
3356 ? sv_or_pv_pos_u2b(sv, tmps, pos, &byte_len) : pos;
3357
3358 tmps += byte_pos;
3359
3360 if (rvalue) {
3361 SvTAINTED_off(TARG); /* decontaminate */
3362 SvUTF8_off(TARG); /* decontaminate */
3363 sv_setpvn(TARG, tmps, byte_len);
3364 #ifdef USE_LOCALE_COLLATE
3365 sv_unmagic(TARG, PERL_MAGIC_collxfrm);
3366 #endif
3367 if (utf8_curlen)
3368 SvUTF8_on(TARG);
3369 }
3370
3371 if (repl) {
3372 SV* repl_sv_copy = NULL;
3373
3374 if (repl_need_utf8_upgrade) {
3375 repl_sv_copy = newSVsv(repl_sv);
3376 sv_utf8_upgrade(repl_sv_copy);
3377 repl = SvPV_const(repl_sv_copy, repl_len);
3378 }
3379 if (!SvOK(sv))
3380 SvPVCLEAR(sv);
3381 sv_insert_flags(sv, byte_pos, byte_len, repl, repl_len, 0);
3382 SvREFCNT_dec(repl_sv_copy);
3383 }
3384 }
3385 if (PL_op->op_private & OPpSUBSTR_REPL_FIRST)
3386 SP++;
3387 else if (rvalue) {
3388 SvSETMAGIC(TARG);
3389 PUSHs(TARG);
3390 }
3391 RETURN;
3392
3393 bound_fail:
3394 if (repl)
3395 Perl_croak(aTHX_ "substr outside of string");
3396 Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), "substr outside of string");
3397 RETPUSHUNDEF;
3398 }
3399
PP(pp_vec)3400 PP(pp_vec)
3401 {
3402 dSP;
3403 const IV size = POPi;
3404 SV* offsetsv = POPs;
3405 SV * const src = POPs;
3406 const I32 lvalue = PL_op->op_flags & OPf_MOD || LVRET;
3407 SV * ret;
3408 UV retuv;
3409 STRLEN offset = 0;
3410 char errflags = 0;
3411
3412 /* extract a STRLEN-ranged integer value from offsetsv into offset,
3413 * or flag that its out of range */
3414 {
3415 IV iv = SvIV(offsetsv);
3416
3417 /* avoid a large UV being wrapped to a negative value */
3418 if (SvIOK_UV(offsetsv) && SvUVX(offsetsv) > (UV)IV_MAX)
3419 errflags = LVf_OUT_OF_RANGE;
3420 else if (iv < 0)
3421 errflags = (LVf_NEG_OFF|LVf_OUT_OF_RANGE);
3422 #if PTRSIZE < IVSIZE
3423 else if (iv > Size_t_MAX)
3424 errflags = LVf_OUT_OF_RANGE;
3425 #endif
3426 else
3427 offset = (STRLEN)iv;
3428 }
3429
3430 retuv = errflags ? 0 : do_vecget(src, offset, size);
3431
3432 if (lvalue) { /* it's an lvalue! */
3433 ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */
3434 sv_magic(ret, NULL, PERL_MAGIC_vec, NULL, 0);
3435 LvTYPE(ret) = 'v';
3436 LvTARG(ret) = SvREFCNT_inc_simple(src);
3437 LvTARGOFF(ret) = offset;
3438 LvTARGLEN(ret) = size;
3439 LvFLAGS(ret) = errflags;
3440 }
3441 else {
3442 dTARGET;
3443 SvTAINTED_off(TARG); /* decontaminate */
3444 ret = TARG;
3445 }
3446
3447 sv_setuv(ret, retuv);
3448 if (!lvalue)
3449 SvSETMAGIC(ret);
3450 PUSHs(ret);
3451 RETURN;
3452 }
3453
3454
3455 /* also used for: pp_rindex() */
3456
PP(pp_index)3457 PP(pp_index)
3458 {
3459 dSP; dTARGET;
3460 SV *big;
3461 SV *little;
3462 SV *temp = NULL;
3463 STRLEN biglen;
3464 STRLEN llen = 0;
3465 SSize_t offset = 0;
3466 SSize_t retval;
3467 const char *big_p;
3468 const char *little_p;
3469 bool big_utf8;
3470 bool little_utf8;
3471 const bool is_index = PL_op->op_type == OP_INDEX;
3472 const bool threeargs = MAXARG >= 3 && (TOPs || ((void)POPs,0));
3473
3474 if (threeargs)
3475 offset = POPi;
3476 little = POPs;
3477 big = POPs;
3478 big_p = SvPV_const(big, biglen);
3479 little_p = SvPV_const(little, llen);
3480
3481 big_utf8 = DO_UTF8(big);
3482 little_utf8 = DO_UTF8(little);
3483 if (big_utf8 ^ little_utf8) {
3484 /* One needs to be upgraded. */
3485 if (little_utf8) {
3486 /* Well, maybe instead we might be able to downgrade the small
3487 string? */
3488 char * const pv = (char*)bytes_from_utf8((U8 *)little_p, &llen,
3489 &little_utf8);
3490 if (little_utf8) {
3491 /* If the large string is ISO-8859-1, and it's not possible to
3492 convert the small string to ISO-8859-1, then there is no
3493 way that it could be found anywhere by index. */
3494 retval = -1;
3495 goto push_result;
3496 }
3497
3498 /* At this point, pv is a malloc()ed string. So donate it to temp
3499 to ensure it will get free()d */
3500 little = temp = newSV(0);
3501 sv_usepvn(temp, pv, llen);
3502 little_p = SvPVX(little);
3503 } else {
3504 temp = newSVpvn(little_p, llen);
3505
3506 sv_utf8_upgrade(temp);
3507 little = temp;
3508 little_p = SvPV_const(little, llen);
3509 }
3510 }
3511 if (SvGAMAGIC(big)) {
3512 /* Life just becomes a lot easier if I use a temporary here.
3513 Otherwise I need to avoid calls to sv_pos_u2b(), which (dangerously)
3514 will trigger magic and overloading again, as will fbm_instr()
3515 */
3516 big = newSVpvn_flags(big_p, biglen,
3517 SVs_TEMP | (big_utf8 ? SVf_UTF8 : 0));
3518 big_p = SvPVX(big);
3519 }
3520 if (SvGAMAGIC(little) || (is_index && !SvOK(little))) {
3521 /* index && SvOK() is a hack. fbm_instr() calls SvPV_const, which will
3522 warn on undef, and we've already triggered a warning with the
3523 SvPV_const some lines above. We can't remove that, as we need to
3524 call some SvPV to trigger overloading early and find out if the
3525 string is UTF-8.
3526 This is all getting too messy. The API isn't quite clean enough,
3527 because data access has side effects.
3528 */
3529 little = newSVpvn_flags(little_p, llen,
3530 SVs_TEMP | (little_utf8 ? SVf_UTF8 : 0));
3531 little_p = SvPVX(little);
3532 }
3533
3534 if (!threeargs)
3535 offset = is_index ? 0 : biglen;
3536 else {
3537 if (big_utf8 && offset > 0)
3538 offset = sv_pos_u2b_flags(big, offset, 0, SV_CONST_RETURN);
3539 if (!is_index)
3540 offset += llen;
3541 }
3542 if (offset < 0)
3543 offset = 0;
3544 else if (offset > (SSize_t)biglen)
3545 offset = biglen;
3546 if (!(little_p = is_index
3547 ? fbm_instr((unsigned char*)big_p + offset,
3548 (unsigned char*)big_p + biglen, little, 0)
3549 : rninstr(big_p, big_p + offset,
3550 little_p, little_p + llen)))
3551 retval = -1;
3552 else {
3553 retval = little_p - big_p;
3554 if (retval > 1 && big_utf8)
3555 retval = sv_pos_b2u_flags(big, retval, SV_CONST_RETURN);
3556 }
3557 SvREFCNT_dec(temp);
3558
3559 push_result:
3560 /* OPpTRUEBOOL indicates an '== -1' has been optimised away */
3561 if (PL_op->op_private & OPpTRUEBOOL) {
3562 SV *result = ((retval != -1) ^ cBOOL(PL_op->op_private & OPpINDEX_BOOLNEG))
3563 ? &PL_sv_yes : &PL_sv_no;
3564 if (PL_op->op_private & OPpTARGET_MY) {
3565 /* $lex = (index() == -1) */
3566 sv_setsv_mg(TARG, result);
3567 PUSHs(TARG);
3568 }
3569 else {
3570 PUSHs(result);
3571 }
3572 }
3573 else
3574 PUSHi(retval);
3575 RETURN;
3576 }
3577
PP(pp_sprintf)3578 PP(pp_sprintf)
3579 {
3580 dSP; dMARK; dORIGMARK; dTARGET;
3581 SvTAINTED_off(TARG);
3582 do_sprintf(TARG, SP-MARK, MARK+1);
3583 TAINT_IF(SvTAINTED(TARG));
3584 SP = ORIGMARK;
3585 PUSHTARG;
3586 RETURN;
3587 }
3588
PP(pp_ord)3589 PP(pp_ord)
3590 {
3591 dSP; dTARGET;
3592
3593 SV *argsv = TOPs;
3594 STRLEN len;
3595 const U8 *s = (U8*)SvPV_const(argsv, len);
3596
3597 SETu(DO_UTF8(argsv)
3598 ? (len ? utf8n_to_uvchr(s, len, 0, UTF8_ALLOW_ANYUV) : 0)
3599 : (UV)(*s));
3600
3601 return NORMAL;
3602 }
3603
PP(pp_chr)3604 PP(pp_chr)
3605 {
3606 dSP; dTARGET;
3607 char *tmps;
3608 UV value;
3609 SV *top = TOPs;
3610
3611 SvGETMAGIC(top);
3612 if (UNLIKELY(SvAMAGIC(top)))
3613 top = sv_2num(top);
3614 if (UNLIKELY(isinfnansv(top)))
3615 Perl_croak(aTHX_ "Cannot chr %" NVgf, SvNV(top));
3616 else {
3617 if (!IN_BYTES /* under bytes, chr(-1) eq chr(0xff), etc. */
3618 && ((SvIOKp(top) && !SvIsUV(top) && SvIV_nomg(top) < 0)
3619 ||
3620 ((SvNOKp(top) || (SvOK(top) && !SvIsUV(top)))
3621 && SvNV_nomg(top) < 0.0)))
3622 {
3623 if (ckWARN(WARN_UTF8)) {
3624 if (SvGMAGICAL(top)) {
3625 SV *top2 = sv_newmortal();
3626 sv_setsv_nomg(top2, top);
3627 top = top2;
3628 }
3629 Perl_warner(aTHX_ packWARN(WARN_UTF8),
3630 "Invalid negative number (%" SVf ") in chr", SVfARG(top));
3631 }
3632 value = UNICODE_REPLACEMENT;
3633 } else {
3634 value = SvUV_nomg(top);
3635 }
3636 }
3637
3638 SvUPGRADE(TARG,SVt_PV);
3639
3640 if (value > 255 && !IN_BYTES) {
3641 SvGROW(TARG, (STRLEN)UVCHR_SKIP(value)+1);
3642 tmps = (char*)uvchr_to_utf8_flags((U8*)SvPVX(TARG), value, 0);
3643 SvCUR_set(TARG, tmps - SvPVX_const(TARG));
3644 *tmps = '\0';
3645 (void)SvPOK_only(TARG);
3646 SvUTF8_on(TARG);
3647 SETTARG;
3648 return NORMAL;
3649 }
3650
3651 SvGROW(TARG,2);
3652 SvCUR_set(TARG, 1);
3653 tmps = SvPVX(TARG);
3654 *tmps++ = (char)value;
3655 *tmps = '\0';
3656 (void)SvPOK_only(TARG);
3657
3658 SETTARG;
3659 return NORMAL;
3660 }
3661
PP(pp_crypt)3662 PP(pp_crypt)
3663 {
3664 #ifdef HAS_CRYPT
3665 dSP; dTARGET;
3666 dPOPTOPssrl;
3667 STRLEN len;
3668 const char *tmps = SvPV_const(left, len);
3669
3670 if (DO_UTF8(left)) {
3671 /* If Unicode, try to downgrade.
3672 * If not possible, croak.
3673 * Yes, we made this up. */
3674 SV* const tsv = newSVpvn_flags(tmps, len, SVf_UTF8|SVs_TEMP);
3675
3676 sv_utf8_downgrade(tsv, FALSE);
3677 tmps = SvPV_const(tsv, len);
3678 }
3679 # ifdef USE_ITHREADS
3680 # ifdef HAS_CRYPT_R
3681 if (!PL_reentrant_buffer->_crypt_struct_buffer) {
3682 /* This should be threadsafe because in ithreads there is only
3683 * one thread per interpreter. If this would not be true,
3684 * we would need a mutex to protect this malloc. */
3685 PL_reentrant_buffer->_crypt_struct_buffer =
3686 (struct crypt_data *)safemalloc(sizeof(struct crypt_data));
3687 # if defined(__GLIBC__) || defined(__EMX__)
3688 if (PL_reentrant_buffer->_crypt_struct_buffer) {
3689 PL_reentrant_buffer->_crypt_struct_buffer->initialized = 0;
3690 }
3691 # endif
3692 }
3693 # endif /* HAS_CRYPT_R */
3694 # endif /* USE_ITHREADS */
3695
3696 sv_setpv(TARG, PerlProc_crypt(tmps, SvPV_nolen_const(right)));
3697
3698 SvUTF8_off(TARG);
3699 SETTARG;
3700 RETURN;
3701 #else
3702 DIE(aTHX_
3703 "The crypt() function is unimplemented due to excessive paranoia.");
3704 #endif
3705 }
3706
3707 /* Generally UTF-8 and UTF-EBCDIC are indistinguishable at this level. So
3708 * most comments below say UTF-8, when in fact they mean UTF-EBCDIC as well */
3709
3710
3711 /* also used for: pp_lcfirst() */
3712
PP(pp_ucfirst)3713 PP(pp_ucfirst)
3714 {
3715 /* Actually is both lcfirst() and ucfirst(). Only the first character
3716 * changes. This means that possibly we can change in-place, ie., just
3717 * take the source and change that one character and store it back, but not
3718 * if read-only etc, or if the length changes */
3719
3720 dSP;
3721 SV *source = TOPs;
3722 STRLEN slen; /* slen is the byte length of the whole SV. */
3723 STRLEN need;
3724 SV *dest;
3725 bool inplace; /* ? Convert first char only, in-place */
3726 bool doing_utf8 = FALSE; /* ? using utf8 */
3727 bool convert_source_to_utf8 = FALSE; /* ? need to convert */
3728 const int op_type = PL_op->op_type;
3729 const U8 *s;
3730 U8 *d;
3731 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
3732 STRLEN ulen; /* ulen is the byte length of the original Unicode character
3733 * stored as UTF-8 at s. */
3734 STRLEN tculen; /* tculen is the byte length of the freshly titlecased (or
3735 * lowercased) character stored in tmpbuf. May be either
3736 * UTF-8 or not, but in either case is the number of bytes */
3737 bool remove_dot_above = FALSE;
3738
3739 s = (const U8*)SvPV_const(source, slen);
3740
3741 /* We may be able to get away with changing only the first character, in
3742 * place, but not if read-only, etc. Later we may discover more reasons to
3743 * not convert in-place. */
3744 inplace = !SvREADONLY(source) && SvPADTMP(source);
3745
3746 #ifdef USE_LOCALE_CTYPE
3747
3748 if (IN_LC_RUNTIME(LC_CTYPE)) {
3749 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
3750 }
3751
3752 #endif
3753
3754 /* First calculate what the changed first character should be. This affects
3755 * whether we can just swap it out, leaving the rest of the string unchanged,
3756 * or even if have to convert the dest to UTF-8 when the source isn't */
3757
3758 if (! slen) { /* If empty */
3759 need = 1; /* still need a trailing NUL */
3760 ulen = 0;
3761 *tmpbuf = '\0';
3762 }
3763 else if (DO_UTF8(source)) { /* Is the source utf8? */
3764 doing_utf8 = TRUE;
3765 ulen = UTF8SKIP(s);
3766
3767 if (op_type == OP_UCFIRST) {
3768 #ifdef USE_LOCALE_CTYPE
3769 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3770 #else
3771 _toTITLE_utf8_flags(s, s +slen, tmpbuf, &tculen, 0);
3772 #endif
3773 }
3774 else {
3775
3776 #ifdef USE_LOCALE_CTYPE
3777
3778 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, IN_LC_RUNTIME(LC_CTYPE));
3779
3780 /* In turkic locales, lower casing an 'I' normally yields U+0131,
3781 * LATIN SMALL LETTER DOTLESS I, but not if the grapheme also
3782 * contains a COMBINING DOT ABOVE. Instead it is treated like
3783 * LATIN CAPITAL LETTER I WITH DOT ABOVE lowercased to 'i'. The
3784 * call to lowercase above has handled this. But SpecialCasing.txt
3785 * says we are supposed to remove the COMBINING DOT ABOVE. We can
3786 * tell if we have this situation if I ==> i in a turkic locale. */
3787 if ( UNLIKELY(PL_in_utf8_turkic_locale)
3788 && IN_LC_RUNTIME(LC_CTYPE)
3789 && (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')))
3790 {
3791 /* Here, we know there was a COMBINING DOT ABOVE. We won't be
3792 * able to handle this in-place. */
3793 inplace = FALSE;
3794
3795 /* It seems likely that the DOT will immediately follow the
3796 * 'I'. If so, we can remove it simply by indicating to the
3797 * code below to start copying the source just beyond the DOT.
3798 * We know its length is 2 */
3799 if (LIKELY(memBEGINs(s + 1, s + slen, COMBINING_DOT_ABOVE_UTF8))) {
3800 ulen += 2;
3801 }
3802 else { /* But if it doesn't follow immediately, set a flag for
3803 the code below */
3804 remove_dot_above = TRUE;
3805 }
3806 }
3807 #else
3808 PERL_UNUSED_VAR(remove_dot_above);
3809
3810 _toLOWER_utf8_flags(s, s + slen, tmpbuf, &tculen, 0);
3811 #endif
3812
3813 }
3814
3815 /* we can't do in-place if the length changes. */
3816 if (ulen != tculen) inplace = FALSE;
3817 need = slen + 1 - ulen + tculen;
3818 }
3819 else { /* Non-zero length, non-UTF-8, Need to consider locale and if
3820 * latin1 is treated as caseless. Note that a locale takes
3821 * precedence */
3822 ulen = 1; /* Original character is 1 byte */
3823 tculen = 1; /* Most characters will require one byte, but this will
3824 * need to be overridden for the tricky ones */
3825 need = slen + 1;
3826
3827
3828 #ifdef USE_LOCALE_CTYPE
3829
3830 if (IN_LC_RUNTIME(LC_CTYPE)) {
3831 if ( UNLIKELY(PL_in_utf8_turkic_locale)
3832 && ( (op_type == OP_LCFIRST && UNLIKELY(*s == 'I'))
3833 || (op_type == OP_UCFIRST && UNLIKELY(*s == 'i'))))
3834 {
3835 if (*s == 'I') { /* lcfirst('I') */
3836 tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
3837 tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
3838 }
3839 else { /* ucfirst('i') */
3840 tmpbuf[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
3841 tmpbuf[1] = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
3842 }
3843 tculen = 2;
3844 inplace = FALSE;
3845 doing_utf8 = TRUE;
3846 convert_source_to_utf8 = TRUE;
3847 need += variant_under_utf8_count(s, s + slen);
3848 }
3849 else if (op_type == OP_LCFIRST) {
3850
3851 /* For lc, there are no gotchas for UTF-8 locales (other than
3852 * the turkish ones already handled above) */
3853 *tmpbuf = toLOWER_LC(*s);
3854 }
3855 else { /* ucfirst */
3856
3857 /* But for uc, some characters require special handling */
3858 if (IN_UTF8_CTYPE_LOCALE) {
3859 goto do_uni_rules;
3860 }
3861
3862 /* This would be a bug if any locales have upper and title case
3863 * different */
3864 *tmpbuf = (U8) toUPPER_LC(*s);
3865 }
3866 }
3867 else
3868 #endif
3869 /* Here, not in locale. If not using Unicode rules, is a simple
3870 * lower/upper, depending */
3871 if (! IN_UNI_8_BIT) {
3872 *tmpbuf = (op_type == OP_LCFIRST)
3873 ? toLOWER(*s)
3874 : toUPPER(*s);
3875 }
3876 else if (op_type == OP_LCFIRST) {
3877 /* lower case the first letter: no trickiness for any character */
3878 *tmpbuf = toLOWER_LATIN1(*s);
3879 }
3880 else {
3881 /* Here, is ucfirst non-UTF-8, not in locale (unless that locale is
3882 * non-turkic UTF-8, which we treat as not in locale), and cased
3883 * latin1 */
3884 UV title_ord;
3885 #ifdef USE_LOCALE_CTYPE
3886 do_uni_rules:
3887 #endif
3888
3889 title_ord = _to_upper_title_latin1(*s, tmpbuf, &tculen, 's');
3890 if (tculen > 1) {
3891 assert(tculen == 2);
3892
3893 /* If the result is an upper Latin1-range character, it can
3894 * still be represented in one byte, which is its ordinal */
3895 if (UTF8_IS_DOWNGRADEABLE_START(*tmpbuf)) {
3896 *tmpbuf = (U8) title_ord;
3897 tculen = 1;
3898 }
3899 else {
3900 /* Otherwise it became more than one ASCII character (in
3901 * the case of LATIN_SMALL_LETTER_SHARP_S) or changed to
3902 * beyond Latin1, so the number of bytes changed, so can't
3903 * replace just the first character in place. */
3904 inplace = FALSE;
3905
3906 /* If the result won't fit in a byte, the entire result
3907 * will have to be in UTF-8. Allocate enough space for the
3908 * expanded first byte, and if UTF-8, the rest of the input
3909 * string, some or all of which may also expand to two
3910 * bytes, plus the terminating NUL. */
3911 if (title_ord > 255) {
3912 doing_utf8 = TRUE;
3913 convert_source_to_utf8 = TRUE;
3914 need = slen
3915 + variant_under_utf8_count(s, s + slen)
3916 + 1;
3917
3918 /* The (converted) UTF-8 and UTF-EBCDIC lengths of all
3919 * characters whose title case is above 255 is
3920 * 2. */
3921 ulen = 2;
3922 }
3923 else { /* LATIN_SMALL_LETTER_SHARP_S expands by 1 byte */
3924 need = slen + 1 + 1;
3925 }
3926 }
3927 }
3928 } /* End of use Unicode (Latin1) semantics */
3929 } /* End of changing the case of the first character */
3930
3931 /* Here, have the first character's changed case stored in tmpbuf. Ready to
3932 * generate the result */
3933 if (inplace) {
3934
3935 /* We can convert in place. This means we change just the first
3936 * character without disturbing the rest; no need to grow */
3937 dest = source;
3938 s = d = (U8*)SvPV_force_nomg(source, slen);
3939 } else {
3940 dTARGET;
3941
3942 dest = TARG;
3943
3944 /* Here, we can't convert in place; we earlier calculated how much
3945 * space we will need, so grow to accommodate that */
3946 SvUPGRADE(dest, SVt_PV);
3947 d = (U8*)SvGROW(dest, need);
3948 (void)SvPOK_only(dest);
3949
3950 SETs(dest);
3951 }
3952
3953 if (doing_utf8) {
3954 if (! inplace) {
3955 if (! convert_source_to_utf8) {
3956
3957 /* Here both source and dest are in UTF-8, but have to create
3958 * the entire output. We initialize the result to be the
3959 * title/lower cased first character, and then append the rest
3960 * of the string. */
3961 sv_setpvn(dest, (char*)tmpbuf, tculen);
3962 if (slen > ulen) {
3963
3964 /* But this boolean being set means we are in a turkic
3965 * locale, and there is a DOT character that needs to be
3966 * removed, and it isn't immediately after the current
3967 * character. Keep concatenating characters to the output
3968 * one at a time, until we find the DOT, which we simply
3969 * skip */
3970 if (UNLIKELY(remove_dot_above)) {
3971 do {
3972 Size_t this_len = UTF8SKIP(s + ulen);
3973
3974 sv_catpvn(dest, (char*)(s + ulen), this_len);
3975
3976 ulen += this_len;
3977 if (memBEGINs(s + ulen, s + slen, COMBINING_DOT_ABOVE_UTF8)) {
3978 ulen += 2;
3979 break;
3980 }
3981 } while (s + ulen < s + slen);
3982 }
3983
3984 /* The rest of the string can be concatenated unchanged,
3985 * all at once */
3986 sv_catpvn(dest, (char*)(s + ulen), slen - ulen);
3987 }
3988 }
3989 else {
3990 const U8 *const send = s + slen;
3991
3992 /* Here the dest needs to be in UTF-8, but the source isn't,
3993 * except we earlier UTF-8'd the first character of the source
3994 * into tmpbuf. First put that into dest, and then append the
3995 * rest of the source, converting it to UTF-8 as we go. */
3996
3997 /* Assert tculen is 2 here because the only characters that
3998 * get to this part of the code have 2-byte UTF-8 equivalents */
3999 assert(tculen == 2);
4000 *d++ = *tmpbuf;
4001 *d++ = *(tmpbuf + 1);
4002 s++; /* We have just processed the 1st char */
4003
4004 while (s < send) {
4005 append_utf8_from_native_byte(*s, &d);
4006 s++;
4007 }
4008
4009 *d = '\0';
4010 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4011 }
4012 SvUTF8_on(dest);
4013 }
4014 else { /* in-place UTF-8. Just overwrite the first character */
4015 Copy(tmpbuf, d, tculen, U8);
4016 SvCUR_set(dest, need - 1);
4017 }
4018
4019 }
4020 else { /* Neither source nor dest are, nor need to be UTF-8 */
4021 if (slen) {
4022 if (inplace) { /* in-place, only need to change the 1st char */
4023 *d = *tmpbuf;
4024 }
4025 else { /* Not in-place */
4026
4027 /* Copy the case-changed character(s) from tmpbuf */
4028 Copy(tmpbuf, d, tculen, U8);
4029 d += tculen - 1; /* Code below expects d to point to final
4030 * character stored */
4031 }
4032 }
4033 else { /* empty source */
4034 /* See bug #39028: Don't taint if empty */
4035 *d = *s;
4036 }
4037
4038 /* In a "use bytes" we don't treat the source as UTF-8, but, still want
4039 * the destination to retain that flag */
4040 if (DO_UTF8(source))
4041 SvUTF8_on(dest);
4042
4043 if (!inplace) { /* Finish the rest of the string, unchanged */
4044 /* This will copy the trailing NUL */
4045 Copy(s + 1, d + 1, slen, U8);
4046 SvCUR_set(dest, need - 1);
4047 }
4048 }
4049 #ifdef USE_LOCALE_CTYPE
4050 if (IN_LC_RUNTIME(LC_CTYPE)) {
4051 TAINT;
4052 SvTAINTED_on(dest);
4053 }
4054 #endif
4055 if (dest != source && SvTAINTED(source))
4056 SvTAINT(dest);
4057 SvSETMAGIC(dest);
4058 return NORMAL;
4059 }
4060
PP(pp_uc)4061 PP(pp_uc)
4062 {
4063 dSP;
4064 SV *source = TOPs;
4065 STRLEN len;
4066 STRLEN min;
4067 SV *dest;
4068 const U8 *s;
4069 U8 *d;
4070
4071 SvGETMAGIC(source);
4072
4073 if ( SvPADTMP(source)
4074 && !SvREADONLY(source) && SvPOK(source)
4075 && !DO_UTF8(source)
4076 && (
4077 #ifdef USE_LOCALE_CTYPE
4078 (IN_LC_RUNTIME(LC_CTYPE))
4079 ? ! IN_UTF8_CTYPE_LOCALE
4080 :
4081 #endif
4082 ! IN_UNI_8_BIT))
4083 {
4084
4085 /* We can convert in place. The reason we can't if in UNI_8_BIT is to
4086 * make the loop tight, so we overwrite the source with the dest before
4087 * looking at it, and we need to look at the original source
4088 * afterwards. There would also need to be code added to handle
4089 * switching to not in-place in midstream if we run into characters
4090 * that change the length. Since being in locale overrides UNI_8_BIT,
4091 * that latter becomes irrelevant in the above test; instead for
4092 * locale, the size can't normally change, except if the locale is a
4093 * UTF-8 one */
4094 dest = source;
4095 s = d = (U8*)SvPV_force_nomg(source, len);
4096 min = len + 1;
4097 } else {
4098 dTARGET;
4099
4100 dest = TARG;
4101
4102 s = (const U8*)SvPV_nomg_const(source, len);
4103 min = len + 1;
4104
4105 SvUPGRADE(dest, SVt_PV);
4106 d = (U8*)SvGROW(dest, min);
4107 (void)SvPOK_only(dest);
4108
4109 SETs(dest);
4110 }
4111
4112 #ifdef USE_LOCALE_CTYPE
4113
4114 if (IN_LC_RUNTIME(LC_CTYPE)) {
4115 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4116 }
4117
4118 #endif
4119
4120 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4121 to check DO_UTF8 again here. */
4122
4123 if (DO_UTF8(source)) {
4124 const U8 *const send = s + len;
4125 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4126
4127 #define GREEK_CAPITAL_LETTER_IOTA 0x0399
4128 #define COMBINING_GREEK_YPOGEGRAMMENI 0x0345
4129 /* All occurrences of these are to be moved to follow any other marks.
4130 * This is context-dependent. We may not be passed enough context to
4131 * move the iota subscript beyond all of them, but we do the best we can
4132 * with what we're given. The result is always better than if we
4133 * hadn't done this. And, the problem would only arise if we are
4134 * passed a character without all its combining marks, which would be
4135 * the caller's mistake. The information this is based on comes from a
4136 * comment in Unicode SpecialCasing.txt, (and the Standard's text
4137 * itself) and so can't be checked properly to see if it ever gets
4138 * revised. But the likelihood of it changing is remote */
4139 bool in_iota_subscript = FALSE;
4140
4141 while (s < send) {
4142 STRLEN u;
4143 STRLEN ulen;
4144 UV uv;
4145 if (UNLIKELY(in_iota_subscript)) {
4146 UV cp = utf8_to_uvchr_buf(s, send, NULL);
4147
4148 if (! _invlist_contains_cp(PL_utf8_mark, cp)) {
4149
4150 /* A non-mark. Time to output the iota subscript */
4151 *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
4152 *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
4153 in_iota_subscript = FALSE;
4154 }
4155 }
4156
4157 /* Then handle the current character. Get the changed case value
4158 * and copy it to the output buffer */
4159
4160 u = UTF8SKIP(s);
4161 #ifdef USE_LOCALE_CTYPE
4162 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4163 #else
4164 uv = _toUPPER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4165 #endif
4166 if (uv == GREEK_CAPITAL_LETTER_IOTA
4167 && utf8_to_uvchr_buf(s, send, 0) == COMBINING_GREEK_YPOGEGRAMMENI)
4168 {
4169 in_iota_subscript = TRUE;
4170 }
4171 else {
4172 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4173 /* If the eventually required minimum size outgrows the
4174 * available space, we need to grow. */
4175 const UV o = d - (U8*)SvPVX_const(dest);
4176
4177 /* If someone uppercases one million U+03B0s we SvGROW()
4178 * one million times. Or we could try guessing how much to
4179 * allocate without allocating too much. But we can't
4180 * really guess without examining the rest of the string.
4181 * Such is life. See corresponding comment in lc code for
4182 * another option */
4183 d = o + (U8*) SvGROW(dest, min);
4184 }
4185 Copy(tmpbuf, d, ulen, U8);
4186 d += ulen;
4187 }
4188 s += u;
4189 }
4190 if (in_iota_subscript) {
4191 *d++ = UTF8_TWO_BYTE_HI(GREEK_CAPITAL_LETTER_IOTA);
4192 *d++ = UTF8_TWO_BYTE_LO(GREEK_CAPITAL_LETTER_IOTA);
4193 }
4194 SvUTF8_on(dest);
4195 *d = '\0';
4196
4197 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4198 }
4199 else { /* Not UTF-8 */
4200 if (len) {
4201 const U8 *const send = s + len;
4202
4203 /* Use locale casing if in locale; regular style if not treating
4204 * latin1 as having case; otherwise the latin1 casing. Do the
4205 * whole thing in a tight loop, for speed, */
4206 #ifdef USE_LOCALE_CTYPE
4207 if (IN_LC_RUNTIME(LC_CTYPE)) {
4208 if (IN_UTF8_CTYPE_LOCALE) {
4209 goto do_uni_rules;
4210 }
4211 for (; s < send; d++, s++)
4212 *d = (U8) toUPPER_LC(*s);
4213 }
4214 else
4215 #endif
4216 if (! IN_UNI_8_BIT) {
4217 for (; s < send; d++, s++) {
4218 *d = toUPPER(*s);
4219 }
4220 }
4221 else {
4222 #ifdef USE_LOCALE_CTYPE
4223 do_uni_rules:
4224 #endif
4225 for (; s < send; d++, s++) {
4226 Size_t extra;
4227
4228 *d = toUPPER_LATIN1_MOD(*s);
4229 if ( LIKELY(*d != LATIN_SMALL_LETTER_Y_WITH_DIAERESIS)
4230
4231 #ifdef USE_LOCALE_CTYPE
4232
4233 && (LIKELY( ! PL_in_utf8_turkic_locale
4234 || ! IN_LC_RUNTIME(LC_CTYPE))
4235 || *s != 'i')
4236 #endif
4237
4238 ) {
4239 continue;
4240 }
4241
4242 /* The mainstream case is the tight loop above. To avoid
4243 * extra tests in that, all three characters that always
4244 * require special handling are mapped by the MOD to the
4245 * one tested just above. Use the source to distinguish
4246 * between those cases */
4247
4248 #if UNICODE_MAJOR_VERSION > 2 \
4249 || (UNICODE_MAJOR_VERSION == 2 && UNICODE_DOT_VERSION >= 1 \
4250 && UNICODE_DOT_DOT_VERSION >= 8)
4251 if (*s == LATIN_SMALL_LETTER_SHARP_S) {
4252
4253 /* uc() of this requires 2 characters, but they are
4254 * ASCII. If not enough room, grow the string */
4255 if (SvLEN(dest) < ++min) {
4256 const UV o = d - (U8*)SvPVX_const(dest);
4257 d = o + (U8*) SvGROW(dest, min);
4258 }
4259 *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
4260 continue; /* Back to the tight loop; still in ASCII */
4261 }
4262 #endif
4263
4264 /* The other special handling characters have their
4265 * upper cases outside the latin1 range, hence need to be
4266 * in UTF-8, so the whole result needs to be in UTF-8.
4267 *
4268 * So, here we are somewhere in the middle of processing a
4269 * non-UTF-8 string, and realize that we will have to
4270 * convert the whole thing to UTF-8. What to do? There
4271 * are several possibilities. The simplest to code is to
4272 * convert what we have so far, set a flag, and continue on
4273 * in the loop. The flag would be tested each time through
4274 * the loop, and if set, the next character would be
4275 * converted to UTF-8 and stored. But, I (khw) didn't want
4276 * to slow down the mainstream case at all for this fairly
4277 * rare case, so I didn't want to add a test that didn't
4278 * absolutely have to be there in the loop, besides the
4279 * possibility that it would get too complicated for
4280 * optimizers to deal with. Another possibility is to just
4281 * give up, convert the source to UTF-8, and restart the
4282 * function that way. Another possibility is to convert
4283 * both what has already been processed and what is yet to
4284 * come separately to UTF-8, then jump into the loop that
4285 * handles UTF-8. But the most efficient time-wise of the
4286 * ones I could think of is what follows, and turned out to
4287 * not require much extra code.
4288 *
4289 * First, calculate the extra space needed for the
4290 * remainder of the source needing to be in UTF-8. Except
4291 * for the 'i' in Turkic locales, in UTF-8 strings, the
4292 * uppercase of a character below 256 occupies the same
4293 * number of bytes as the original. Therefore, the space
4294 * needed is the that number plus the number of characters
4295 * that become two bytes when converted to UTF-8, plus, in
4296 * turkish locales, the number of 'i's. */
4297
4298 extra = send - s + variant_under_utf8_count(s, send);
4299
4300 #ifdef USE_LOCALE_CTYPE
4301
4302 if (UNLIKELY(*s == 'i')) { /* We wouldn't get an 'i' here
4303 unless are in a Turkic
4304 locale */
4305 const U8 * s_peek = s;
4306
4307 do {
4308 extra++;
4309
4310 s_peek = (U8 *) memchr(s_peek + 1, 'i',
4311 send - (s_peek + 1));
4312 } while (s_peek != NULL);
4313 }
4314 #endif
4315
4316 /* Convert what we have so far into UTF-8, telling the
4317 * function that we know it should be converted, and to
4318 * allow extra space for what we haven't processed yet.
4319 *
4320 * This may cause the string pointer to move, so need to
4321 * save and re-find it. */
4322
4323 len = d - (U8*)SvPVX_const(dest);
4324 SvCUR_set(dest, len);
4325 len = sv_utf8_upgrade_flags_grow(dest,
4326 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4327 extra
4328 + 1 /* trailing NUL */ );
4329 d = (U8*)SvPVX(dest) + len;
4330
4331 /* Now process the remainder of the source, simultaneously
4332 * converting to upper and UTF-8.
4333 *
4334 * To avoid extra tests in the loop body, and since the
4335 * loop is so simple, split out the rare Turkic case into
4336 * its own loop */
4337
4338 #ifdef USE_LOCALE_CTYPE
4339 if ( UNLIKELY(PL_in_utf8_turkic_locale)
4340 && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE)))
4341 {
4342 for (; s < send; s++) {
4343 if (*s == 'i') {
4344 *d++ = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
4345 *d++ = UTF8_TWO_BYTE_LO(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
4346 }
4347 else {
4348 (void) _to_upper_title_latin1(*s, d, &len, 'S');
4349 d += len;
4350 }
4351 }
4352 }
4353 else
4354 #endif
4355 for (; s < send; s++) {
4356 (void) _to_upper_title_latin1(*s, d, &len, 'S');
4357 d += len;
4358 }
4359
4360 /* Here have processed the whole source; no need to
4361 * continue with the outer loop. Each character has been
4362 * converted to upper case and converted to UTF-8. */
4363 break;
4364 } /* End of processing all latin1-style chars */
4365 } /* End of processing all chars */
4366 } /* End of source is not empty */
4367
4368 if (source != dest) {
4369 *d = '\0'; /* Here d points to 1 after last char, add NUL */
4370 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4371 }
4372 } /* End of isn't utf8 */
4373 #ifdef USE_LOCALE_CTYPE
4374 if (IN_LC_RUNTIME(LC_CTYPE)) {
4375 TAINT;
4376 SvTAINTED_on(dest);
4377 }
4378 #endif
4379 if (dest != source && SvTAINTED(source))
4380 SvTAINT(dest);
4381 SvSETMAGIC(dest);
4382 return NORMAL;
4383 }
4384
PP(pp_lc)4385 PP(pp_lc)
4386 {
4387 dSP;
4388 SV *source = TOPs;
4389 STRLEN len;
4390 STRLEN min;
4391 SV *dest;
4392 const U8 *s;
4393 U8 *d;
4394 bool has_turkic_I = FALSE;
4395
4396 SvGETMAGIC(source);
4397
4398 if ( SvPADTMP(source)
4399 && !SvREADONLY(source) && SvPOK(source)
4400 && !DO_UTF8(source)
4401
4402 #ifdef USE_LOCALE_CTYPE
4403
4404 && ( LIKELY(! IN_LC_RUNTIME(LC_CTYPE))
4405 || LIKELY(! PL_in_utf8_turkic_locale))
4406
4407 #endif
4408
4409 ) {
4410
4411 /* We can convert in place, as, outside of Turkic UTF-8 locales,
4412 * lowercasing anything in the latin1 range (or else DO_UTF8 would have
4413 * been on) doesn't lengthen it. */
4414 dest = source;
4415 s = d = (U8*)SvPV_force_nomg(source, len);
4416 min = len + 1;
4417 } else {
4418 dTARGET;
4419
4420 dest = TARG;
4421
4422 s = (const U8*)SvPV_nomg_const(source, len);
4423 min = len + 1;
4424
4425 SvUPGRADE(dest, SVt_PV);
4426 d = (U8*)SvGROW(dest, min);
4427 (void)SvPOK_only(dest);
4428
4429 SETs(dest);
4430 }
4431
4432 #ifdef USE_LOCALE_CTYPE
4433
4434 if (IN_LC_RUNTIME(LC_CTYPE)) {
4435 const U8 * next_I;
4436
4437 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4438
4439 /* Lowercasing in a Turkic locale can cause non-UTF-8 to need to become
4440 * UTF-8 for the single case of the character 'I' */
4441 if ( UNLIKELY(PL_in_utf8_turkic_locale)
4442 && ! DO_UTF8(source)
4443 && (next_I = (U8 *) memchr(s, 'I', len)))
4444 {
4445 Size_t I_count = 0;
4446 const U8 *const send = s + len;
4447
4448 do {
4449 I_count++;
4450
4451 next_I = (U8 *) memchr(next_I + 1, 'I',
4452 send - (next_I + 1));
4453 } while (next_I != NULL);
4454
4455 /* Except for the 'I', in UTF-8 strings, the lower case of a
4456 * character below 256 occupies the same number of bytes as the
4457 * original. Therefore, the space needed is the original length
4458 * plus I_count plus the number of characters that become two bytes
4459 * when converted to UTF-8 */
4460 sv_utf8_upgrade_flags_grow(dest, 0, len
4461 + I_count
4462 + variant_under_utf8_count(s, send)
4463 + 1 /* Trailing NUL */ );
4464 d = (U8*)SvPVX(dest);
4465 has_turkic_I = TRUE;
4466 }
4467 }
4468
4469 #else
4470 PERL_UNUSED_VAR(has_turkic_I);
4471 #endif
4472
4473 /* Overloaded values may have toggled the UTF-8 flag on source, so we need
4474 to check DO_UTF8 again here. */
4475
4476 if (DO_UTF8(source)) {
4477 const U8 *const send = s + len;
4478 U8 tmpbuf[UTF8_MAXBYTES_CASE+1];
4479 bool remove_dot_above = FALSE;
4480
4481 while (s < send) {
4482 const STRLEN u = UTF8SKIP(s);
4483 STRLEN ulen;
4484
4485 #ifdef USE_LOCALE_CTYPE
4486
4487 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, IN_LC_RUNTIME(LC_CTYPE));
4488
4489 /* If we are in a Turkic locale, we have to do more work. As noted
4490 * in the comments for lcfirst, there is a special case if a 'I'
4491 * is in a grapheme with COMBINING DOT ABOVE UTF8. It turns into a
4492 * 'i', and the DOT must be removed. We check for that situation,
4493 * and set a flag if the DOT is there. Then each time through the
4494 * loop, we have to see if we need to remove the next DOT above,
4495 * and if so, do it. We know that there is a DOT because
4496 * _toLOWER_utf8_flags() wouldn't have returned 'i' unless there
4497 * was one in a proper position. */
4498 if ( UNLIKELY(PL_in_utf8_turkic_locale)
4499 && IN_LC_RUNTIME(LC_CTYPE))
4500 {
4501 if ( UNLIKELY(remove_dot_above)
4502 && memBEGINs(tmpbuf, sizeof(tmpbuf), COMBINING_DOT_ABOVE_UTF8))
4503 {
4504 s += u;
4505 remove_dot_above = FALSE;
4506 continue;
4507 }
4508 else if (UNLIKELY(*s == 'I' && tmpbuf[0] == 'i')) {
4509 remove_dot_above = TRUE;
4510 }
4511 }
4512 #else
4513 PERL_UNUSED_VAR(remove_dot_above);
4514
4515 _toLOWER_utf8_flags(s, send, tmpbuf, &ulen, 0);
4516 #endif
4517
4518 /* Here is where we would do context-sensitive actions for the
4519 * Greek final sigma. See the commit message for 86510fb15 for why
4520 * there isn't any */
4521
4522 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4523
4524 /* If the eventually required minimum size outgrows the
4525 * available space, we need to grow. */
4526 const UV o = d - (U8*)SvPVX_const(dest);
4527
4528 /* If someone lowercases one million U+0130s we SvGROW() one
4529 * million times. Or we could try guessing how much to
4530 * allocate without allocating too much. Such is life.
4531 * Another option would be to grow an extra byte or two more
4532 * each time we need to grow, which would cut down the million
4533 * to 500K, with little waste */
4534 d = o + (U8*) SvGROW(dest, min);
4535 }
4536
4537 /* Copy the newly lowercased letter to the output buffer we're
4538 * building */
4539 Copy(tmpbuf, d, ulen, U8);
4540 d += ulen;
4541 s += u;
4542 } /* End of looping through the source string */
4543 SvUTF8_on(dest);
4544 *d = '\0';
4545 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4546 } else { /* 'source' not utf8 */
4547 if (len) {
4548 const U8 *const send = s + len;
4549
4550 /* Use locale casing if in locale; regular style if not treating
4551 * latin1 as having case; otherwise the latin1 casing. Do the
4552 * whole thing in a tight loop, for speed, */
4553 #ifdef USE_LOCALE_CTYPE
4554 if (IN_LC_RUNTIME(LC_CTYPE)) {
4555 if (LIKELY( ! has_turkic_I)) {
4556 for (; s < send; d++, s++)
4557 *d = toLOWER_LC(*s);
4558 }
4559 else { /* This is the only case where lc() converts 'dest'
4560 into UTF-8 from a non-UTF-8 'source' */
4561 for (; s < send; s++) {
4562 if (*s == 'I') {
4563 *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
4564 *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
4565 }
4566 else {
4567 append_utf8_from_native_byte(toLOWER_LATIN1(*s), &d);
4568 }
4569 }
4570 }
4571 }
4572 else
4573 #endif
4574 if (! IN_UNI_8_BIT) {
4575 for (; s < send; d++, s++) {
4576 *d = toLOWER(*s);
4577 }
4578 }
4579 else {
4580 for (; s < send; d++, s++) {
4581 *d = toLOWER_LATIN1(*s);
4582 }
4583 }
4584 }
4585 if (source != dest) {
4586 *d = '\0';
4587 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4588 }
4589 }
4590 #ifdef USE_LOCALE_CTYPE
4591 if (IN_LC_RUNTIME(LC_CTYPE)) {
4592 TAINT;
4593 SvTAINTED_on(dest);
4594 }
4595 #endif
4596 if (dest != source && SvTAINTED(source))
4597 SvTAINT(dest);
4598 SvSETMAGIC(dest);
4599 return NORMAL;
4600 }
4601
PP(pp_quotemeta)4602 PP(pp_quotemeta)
4603 {
4604 dSP; dTARGET;
4605 SV * const sv = TOPs;
4606 STRLEN len;
4607 const char *s = SvPV_const(sv,len);
4608
4609 SvUTF8_off(TARG); /* decontaminate */
4610 if (len) {
4611 char *d;
4612 SvUPGRADE(TARG, SVt_PV);
4613 SvGROW(TARG, (len * 2) + 1);
4614 d = SvPVX(TARG);
4615 if (DO_UTF8(sv)) {
4616 while (len) {
4617 STRLEN ulen = UTF8SKIP(s);
4618 bool to_quote = FALSE;
4619
4620 if (UTF8_IS_INVARIANT(*s)) {
4621 if (_isQUOTEMETA(*s)) {
4622 to_quote = TRUE;
4623 }
4624 }
4625 else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(s, s + len)) {
4626 if (
4627 #ifdef USE_LOCALE_CTYPE
4628 /* In locale, we quote all non-ASCII Latin1 chars.
4629 * Otherwise use the quoting rules */
4630
4631 IN_LC_RUNTIME(LC_CTYPE)
4632 ||
4633 #endif
4634 _isQUOTEMETA(EIGHT_BIT_UTF8_TO_NATIVE(*s, *(s + 1))))
4635 {
4636 to_quote = TRUE;
4637 }
4638 }
4639 else if (is_QUOTEMETA_high(s)) {
4640 to_quote = TRUE;
4641 }
4642
4643 if (to_quote) {
4644 *d++ = '\\';
4645 }
4646 if (ulen > len)
4647 ulen = len;
4648 len -= ulen;
4649 while (ulen--)
4650 *d++ = *s++;
4651 }
4652 SvUTF8_on(TARG);
4653 }
4654 else if (IN_UNI_8_BIT) {
4655 while (len--) {
4656 if (_isQUOTEMETA(*s))
4657 *d++ = '\\';
4658 *d++ = *s++;
4659 }
4660 }
4661 else {
4662 /* For non UNI_8_BIT (and hence in locale) just quote all \W
4663 * including everything above ASCII */
4664 while (len--) {
4665 if (!isWORDCHAR_A(*s))
4666 *d++ = '\\';
4667 *d++ = *s++;
4668 }
4669 }
4670 *d = '\0';
4671 SvCUR_set(TARG, d - SvPVX_const(TARG));
4672 (void)SvPOK_only_UTF8(TARG);
4673 }
4674 else
4675 sv_setpvn(TARG, s, len);
4676 SETTARG;
4677 return NORMAL;
4678 }
4679
PP(pp_fc)4680 PP(pp_fc)
4681 {
4682 dTARGET;
4683 dSP;
4684 SV *source = TOPs;
4685 STRLEN len;
4686 STRLEN min;
4687 SV *dest;
4688 const U8 *s;
4689 const U8 *send;
4690 U8 *d;
4691 U8 tmpbuf[UTF8_MAXBYTES_CASE + 1];
4692 #if UNICODE_MAJOR_VERSION > 3 /* no multifolds in early Unicode */ \
4693 || (UNICODE_MAJOR_VERSION == 3 && ( UNICODE_DOT_VERSION > 0) \
4694 || UNICODE_DOT_DOT_VERSION > 0)
4695 const bool full_folding = TRUE; /* This variable is here so we can easily
4696 move to more generality later */
4697 #else
4698 const bool full_folding = FALSE;
4699 #endif
4700 const U8 flags = ( full_folding ? FOLD_FLAGS_FULL : 0 )
4701 #ifdef USE_LOCALE_CTYPE
4702 | ( IN_LC_RUNTIME(LC_CTYPE) ? FOLD_FLAGS_LOCALE : 0 )
4703 #endif
4704 ;
4705
4706 /* This is a facsimile of pp_lc, but with a thousand bugs thanks to me.
4707 * You are welcome(?) -Hugmeir
4708 */
4709
4710 SvGETMAGIC(source);
4711
4712 dest = TARG;
4713
4714 if (SvOK(source)) {
4715 s = (const U8*)SvPV_nomg_const(source, len);
4716 } else {
4717 if (ckWARN(WARN_UNINITIALIZED))
4718 report_uninit(source);
4719 s = (const U8*)"";
4720 len = 0;
4721 }
4722
4723 min = len + 1;
4724
4725 SvUPGRADE(dest, SVt_PV);
4726 d = (U8*)SvGROW(dest, min);
4727 (void)SvPOK_only(dest);
4728
4729 SETs(dest);
4730
4731 send = s + len;
4732
4733 #ifdef USE_LOCALE_CTYPE
4734
4735 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
4736 _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
4737 }
4738
4739 #endif
4740
4741 if (DO_UTF8(source)) { /* UTF-8 flagged string. */
4742 while (s < send) {
4743 const STRLEN u = UTF8SKIP(s);
4744 STRLEN ulen;
4745
4746 _toFOLD_utf8_flags(s, send, tmpbuf, &ulen, flags);
4747
4748 if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
4749 const UV o = d - (U8*)SvPVX_const(dest);
4750 d = o + (U8*) SvGROW(dest, min);
4751 }
4752
4753 Copy(tmpbuf, d, ulen, U8);
4754 d += ulen;
4755 s += u;
4756 }
4757 SvUTF8_on(dest);
4758 } /* Unflagged string */
4759 else if (len) {
4760 #ifdef USE_LOCALE_CTYPE
4761 if ( IN_LC_RUNTIME(LC_CTYPE) ) { /* Under locale */
4762 if (IN_UTF8_CTYPE_LOCALE) {
4763 goto do_uni_folding;
4764 }
4765 for (; s < send; d++, s++)
4766 *d = (U8) toFOLD_LC(*s);
4767 }
4768 else
4769 #endif
4770 if ( !IN_UNI_8_BIT ) { /* Under nothing, or bytes */
4771 for (; s < send; d++, s++)
4772 *d = toFOLD(*s);
4773 }
4774 else {
4775 #ifdef USE_LOCALE_CTYPE
4776 do_uni_folding:
4777 #endif
4778 /* For ASCII and the Latin-1 range, there's potentially three
4779 * troublesome folds:
4780 * \x{DF} (\N{LATIN SMALL LETTER SHARP S}), which under full
4781 * casefolding becomes 'ss';
4782 * \x{B5} (\N{MICRO SIGN}), which under any fold becomes
4783 * \x{3BC} (\N{GREEK SMALL LETTER MU})
4784 * I only in Turkic locales, this folds to \x{131}
4785 * \N{LATIN SMALL LETTER DOTLESS I}
4786 * For the rest, the casefold is their lowercase. */
4787 for (; s < send; d++, s++) {
4788 if ( UNLIKELY(*s == MICRO_SIGN)
4789 #ifdef USE_LOCALE_CTYPE
4790 || ( UNLIKELY(PL_in_utf8_turkic_locale)
4791 && UNLIKELY(IN_LC_RUNTIME(LC_CTYPE))
4792 && UNLIKELY(*s == 'I'))
4793 #endif
4794 ) {
4795 Size_t extra = send - s
4796 + variant_under_utf8_count(s, send);
4797
4798 /* \N{MICRO SIGN}'s casefold is \N{GREEK SMALL LETTER MU},
4799 * and 'I' in Turkic locales is \N{LATIN SMALL LETTER
4800 * DOTLESS I} both of which are outside of the latin-1
4801 * range. There's a couple of ways to deal with this -- khw
4802 * discusses them in pp_lc/uc, so go there :) What we do
4803 * here is upgrade what we had already casefolded, then
4804 * enter an inner loop that appends the rest of the
4805 * characters as UTF-8.
4806 *
4807 * First we calculate the needed size of the upgraded dest
4808 * beyond what's been processed already (the upgrade
4809 * function figures that out). Except for the 'I' in
4810 * Turkic locales, in UTF-8 strings, the fold case of a
4811 * character below 256 occupies the same number of bytes as
4812 * the original (even the Sharp S). Therefore, the space
4813 * needed is the number of bytes remaining plus the number
4814 * of characters that become two bytes when converted to
4815 * UTF-8 plus, in turkish locales, the number of 'I's */
4816
4817 if (UNLIKELY(*s == 'I')) {
4818 const U8 * s_peek = s;
4819
4820 do {
4821 extra++;
4822
4823 s_peek = (U8 *) memchr(s_peek + 1, 'I',
4824 send - (s_peek + 1));
4825 } while (s_peek != NULL);
4826 }
4827
4828 /* Growing may move things, so have to save and recalculate
4829 * 'd' */
4830 len = d - (U8*)SvPVX_const(dest);
4831 SvCUR_set(dest, len);
4832 len = sv_utf8_upgrade_flags_grow(dest,
4833 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
4834 extra
4835 + 1 /* Trailing NUL */ );
4836 d = (U8*)SvPVX(dest) + len;
4837
4838 if (*s == 'I') {
4839 *d++ = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
4840 *d++ = UTF8_TWO_BYTE_LO(LATIN_SMALL_LETTER_DOTLESS_I);
4841 }
4842 else {
4843 *d++ = UTF8_TWO_BYTE_HI(GREEK_SMALL_LETTER_MU);
4844 *d++ = UTF8_TWO_BYTE_LO(GREEK_SMALL_LETTER_MU);
4845 }
4846 s++;
4847
4848 for (; s < send; s++) {
4849 STRLEN ulen;
4850 _to_uni_fold_flags(*s, d, &ulen, flags);
4851 d += ulen;
4852 }
4853 break;
4854 }
4855 else if ( UNLIKELY(*s == LATIN_SMALL_LETTER_SHARP_S)
4856 && full_folding)
4857 {
4858 /* Under full casefolding, LATIN SMALL LETTER SHARP S
4859 * becomes "ss", which may require growing the SV. */
4860 if (SvLEN(dest) < ++min) {
4861 const UV o = d - (U8*)SvPVX_const(dest);
4862 d = o + (U8*) SvGROW(dest, min);
4863 }
4864 *(d)++ = 's';
4865 *d = 's';
4866 }
4867 else { /* Else, the fold is the lower case */
4868 *d = toLOWER_LATIN1(*s);
4869 }
4870 }
4871 }
4872 }
4873 *d = '\0';
4874 SvCUR_set(dest, d - (U8*)SvPVX_const(dest));
4875
4876 #ifdef USE_LOCALE_CTYPE
4877 if (IN_LC_RUNTIME(LC_CTYPE)) {
4878 TAINT;
4879 SvTAINTED_on(dest);
4880 }
4881 #endif
4882 if (SvTAINTED(source))
4883 SvTAINT(dest);
4884 SvSETMAGIC(dest);
4885 RETURN;
4886 }
4887
4888 /* Arrays. */
4889
PP(pp_aslice)4890 PP(pp_aslice)
4891 {
4892 dSP; dMARK; dORIGMARK;
4893 AV *const av = MUTABLE_AV(POPs);
4894 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
4895
4896 if (SvTYPE(av) == SVt_PVAV) {
4897 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
4898 bool can_preserve = FALSE;
4899
4900 if (localizing) {
4901 MAGIC *mg;
4902 HV *stash;
4903
4904 can_preserve = SvCANEXISTDELETE(av);
4905 }
4906
4907 if (lval && localizing) {
4908 SV **svp;
4909 SSize_t max = -1;
4910 for (svp = MARK + 1; svp <= SP; svp++) {
4911 const SSize_t elem = SvIV(*svp);
4912 if (elem > max)
4913 max = elem;
4914 }
4915 if (max > AvMAX(av))
4916 av_extend(av, max);
4917 }
4918
4919 while (++MARK <= SP) {
4920 SV **svp;
4921 SSize_t elem = SvIV(*MARK);
4922 bool preeminent = TRUE;
4923
4924 if (localizing && can_preserve) {
4925 /* If we can determine whether the element exist,
4926 * Try to preserve the existenceness of a tied array
4927 * element by using EXISTS and DELETE if possible.
4928 * Fallback to FETCH and STORE otherwise. */
4929 preeminent = av_exists(av, elem);
4930 }
4931
4932 svp = av_fetch(av, elem, lval);
4933 if (lval) {
4934 if (!svp || !*svp)
4935 DIE(aTHX_ PL_no_aelem, elem);
4936 if (localizing) {
4937 if (preeminent)
4938 save_aelem(av, elem, svp);
4939 else
4940 SAVEADELETE(av, elem);
4941 }
4942 }
4943 *MARK = svp ? *svp : &PL_sv_undef;
4944 }
4945 }
4946 if (GIMME_V != G_LIST) {
4947 MARK = ORIGMARK;
4948 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
4949 SP = MARK;
4950 }
4951 RETURN;
4952 }
4953
PP(pp_kvaslice)4954 PP(pp_kvaslice)
4955 {
4956 dSP; dMARK;
4957 AV *const av = MUTABLE_AV(POPs);
4958 I32 lval = (PL_op->op_flags & OPf_MOD);
4959 SSize_t items = SP - MARK;
4960
4961 if (PL_op->op_private & OPpMAYBE_LVSUB) {
4962 const I32 flags = is_lvalue_sub();
4963 if (flags) {
4964 if (!(flags & OPpENTERSUB_INARGS))
4965 /* diag_listed_as: Can't modify %s in %s */
4966 Perl_croak(aTHX_ "Can't modify index/value array slice in list assignment");
4967 lval = flags;
4968 }
4969 }
4970
4971 MEXTEND(SP,items);
4972 while (items > 1) {
4973 *(MARK+items*2-1) = *(MARK+items);
4974 items--;
4975 }
4976 items = SP-MARK;
4977 SP += items;
4978
4979 while (++MARK <= SP) {
4980 SV **svp;
4981
4982 svp = av_fetch(av, SvIV(*MARK), lval);
4983 if (lval) {
4984 if (!svp || !*svp || *svp == &PL_sv_undef) {
4985 DIE(aTHX_ PL_no_aelem, SvIV(*MARK));
4986 }
4987 *MARK = sv_mortalcopy(*MARK);
4988 }
4989 *++MARK = svp ? *svp : &PL_sv_undef;
4990 }
4991 if (GIMME_V != G_LIST) {
4992 MARK = SP - items*2;
4993 *++MARK = items > 0 ? *SP : &PL_sv_undef;
4994 SP = MARK;
4995 }
4996 RETURN;
4997 }
4998
4999
PP(pp_aeach)5000 PP(pp_aeach)
5001 {
5002 dSP;
5003 AV *array = MUTABLE_AV(POPs);
5004 const U8 gimme = GIMME_V;
5005 IV *iterp = Perl_av_iter_p(aTHX_ array);
5006 const IV current = (*iterp)++;
5007
5008 if (current > av_top_index(array)) {
5009 *iterp = 0;
5010 if (gimme == G_SCALAR)
5011 RETPUSHUNDEF;
5012 else
5013 RETURN;
5014 }
5015
5016 EXTEND(SP, 2);
5017 mPUSHi(current);
5018 if (gimme == G_LIST) {
5019 SV **const element = av_fetch(array, current, 0);
5020 PUSHs(element ? *element : &PL_sv_undef);
5021 }
5022 RETURN;
5023 }
5024
5025 /* also used for: pp_avalues()*/
PP(pp_akeys)5026 PP(pp_akeys)
5027 {
5028 dSP;
5029 AV *array = MUTABLE_AV(POPs);
5030 const U8 gimme = GIMME_V;
5031
5032 *Perl_av_iter_p(aTHX_ array) = 0;
5033
5034 if (gimme == G_SCALAR) {
5035 dTARGET;
5036 PUSHi(av_count(array));
5037 }
5038 else if (gimme == G_LIST) {
5039 if (UNLIKELY(PL_op->op_private & OPpMAYBE_LVSUB)) {
5040 const I32 flags = is_lvalue_sub();
5041 if (flags && !(flags & OPpENTERSUB_INARGS))
5042 /* diag_listed_as: Can't modify %s in %s */
5043 Perl_croak(aTHX_
5044 "Can't modify keys on array in list assignment");
5045 }
5046 {
5047 IV n = av_top_index(array);
5048 IV i;
5049
5050 EXTEND(SP, n + 1);
5051
5052 if ( PL_op->op_type == OP_AKEYS
5053 || ( PL_op->op_type == OP_AVHVSWITCH
5054 && (PL_op->op_private & 3) + OP_AEACH == OP_AKEYS ))
5055 {
5056 for (i = 0; i <= n; i++) {
5057 mPUSHi(i);
5058 }
5059 }
5060 else {
5061 for (i = 0; i <= n; i++) {
5062 SV *const *const elem = Perl_av_fetch(aTHX_ array, i, 0);
5063 PUSHs(elem ? *elem : &PL_sv_undef);
5064 }
5065 }
5066 }
5067 }
5068 RETURN;
5069 }
5070
5071 /* Associative arrays. */
5072
PP(pp_each)5073 PP(pp_each)
5074 {
5075 dSP;
5076 HV * hash = MUTABLE_HV(POPs);
5077 HE *entry;
5078 const U8 gimme = GIMME_V;
5079
5080 entry = hv_iternext(hash);
5081
5082 EXTEND(SP, 2);
5083 if (entry) {
5084 SV* const sv = hv_iterkeysv(entry);
5085 PUSHs(sv);
5086 if (gimme == G_LIST) {
5087 SV *val;
5088 val = hv_iterval(hash, entry);
5089 PUSHs(val);
5090 }
5091 }
5092 else if (gimme == G_SCALAR)
5093 RETPUSHUNDEF;
5094
5095 RETURN;
5096 }
5097
5098 STATIC OP *
S_do_delete_local(pTHX)5099 S_do_delete_local(pTHX)
5100 {
5101 dSP;
5102 const U8 gimme = GIMME_V;
5103 const MAGIC *mg;
5104 HV *stash;
5105 const bool sliced = !!(PL_op->op_private & OPpSLICE);
5106 SV **unsliced_keysv = sliced ? NULL : sp--;
5107 SV * const osv = POPs;
5108 SV **mark = sliced ? PL_stack_base + POPMARK : unsliced_keysv-1;
5109 dORIGMARK;
5110 const bool tied = SvRMAGICAL(osv)
5111 && mg_find((const SV *)osv, PERL_MAGIC_tied);
5112 const bool can_preserve = SvCANEXISTDELETE(osv);
5113 const U32 type = SvTYPE(osv);
5114 SV ** const end = sliced ? SP : unsliced_keysv;
5115
5116 if (type == SVt_PVHV) { /* hash element */
5117 HV * const hv = MUTABLE_HV(osv);
5118 while (++MARK <= end) {
5119 SV * const keysv = *MARK;
5120 SV *sv = NULL;
5121 bool preeminent = TRUE;
5122 if (can_preserve)
5123 preeminent = hv_exists_ent(hv, keysv, 0);
5124 if (tied) {
5125 HE *he = hv_fetch_ent(hv, keysv, 1, 0);
5126 if (he)
5127 sv = HeVAL(he);
5128 else
5129 preeminent = FALSE;
5130 }
5131 else {
5132 sv = hv_delete_ent(hv, keysv, 0, 0);
5133 if (preeminent)
5134 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
5135 }
5136 if (preeminent) {
5137 if (!sv) DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
5138 save_helem_flags(hv, keysv, &sv, SAVEf_KEEPOLDELEM);
5139 if (tied) {
5140 *MARK = sv_mortalcopy(sv);
5141 mg_clear(sv);
5142 } else
5143 *MARK = sv;
5144 }
5145 else {
5146 SAVEHDELETE(hv, keysv);
5147 *MARK = &PL_sv_undef;
5148 }
5149 }
5150 }
5151 else if (type == SVt_PVAV) { /* array element */
5152 if (PL_op->op_flags & OPf_SPECIAL) {
5153 AV * const av = MUTABLE_AV(osv);
5154 while (++MARK <= end) {
5155 SSize_t idx = SvIV(*MARK);
5156 SV *sv = NULL;
5157 bool preeminent = TRUE;
5158 if (can_preserve)
5159 preeminent = av_exists(av, idx);
5160 if (tied) {
5161 SV **svp = av_fetch(av, idx, 1);
5162 if (svp)
5163 sv = *svp;
5164 else
5165 preeminent = FALSE;
5166 }
5167 else {
5168 sv = av_delete(av, idx, 0);
5169 if (preeminent)
5170 SvREFCNT_inc_simple_void(sv); /* De-mortalize */
5171 }
5172 if (preeminent) {
5173 save_aelem_flags(av, idx, &sv, SAVEf_KEEPOLDELEM);
5174 if (tied) {
5175 *MARK = sv_mortalcopy(sv);
5176 mg_clear(sv);
5177 } else
5178 *MARK = sv;
5179 }
5180 else {
5181 SAVEADELETE(av, idx);
5182 *MARK = &PL_sv_undef;
5183 }
5184 }
5185 }
5186 else
5187 DIE(aTHX_ "panic: avhv_delete no longer supported");
5188 }
5189 else
5190 DIE(aTHX_ "Not a HASH reference");
5191 if (sliced) {
5192 if (gimme == G_VOID)
5193 SP = ORIGMARK;
5194 else if (gimme == G_SCALAR) {
5195 MARK = ORIGMARK;
5196 if (SP > MARK)
5197 *++MARK = *SP;
5198 else
5199 *++MARK = &PL_sv_undef;
5200 SP = MARK;
5201 }
5202 }
5203 else if (gimme != G_VOID)
5204 PUSHs(*unsliced_keysv);
5205
5206 RETURN;
5207 }
5208
PP(pp_delete)5209 PP(pp_delete)
5210 {
5211 dSP;
5212 U8 gimme;
5213 I32 discard;
5214
5215 if (PL_op->op_private & OPpLVAL_INTRO)
5216 return do_delete_local();
5217
5218 gimme = GIMME_V;
5219 discard = (gimme == G_VOID) ? G_DISCARD : 0;
5220
5221 if (PL_op->op_private & (OPpSLICE|OPpKVSLICE)) {
5222 dMARK; dORIGMARK;
5223 HV * const hv = MUTABLE_HV(POPs);
5224 const U32 hvtype = SvTYPE(hv);
5225 int skip = 0;
5226 if (PL_op->op_private & OPpKVSLICE) {
5227 SSize_t items = SP - MARK;
5228
5229 MEXTEND(SP,items);
5230 while (items > 1) {
5231 *(MARK+items*2-1) = *(MARK+items);
5232 items--;
5233 }
5234 items = SP - MARK;
5235 SP += items;
5236 skip = 1;
5237 }
5238 if (hvtype == SVt_PVHV) { /* hash element */
5239 while ((MARK += (1+skip)) <= SP) {
5240 SV * const sv = hv_delete_ent(hv, *(MARK-skip), discard, 0);
5241 *MARK = sv ? sv : &PL_sv_undef;
5242 }
5243 }
5244 else if (hvtype == SVt_PVAV) { /* array element */
5245 if (PL_op->op_flags & OPf_SPECIAL) {
5246 while ((MARK += (1+skip)) <= SP) {
5247 SV * const sv = av_delete(MUTABLE_AV(hv), SvIV(*(MARK-skip)), discard);
5248 *MARK = sv ? sv : &PL_sv_undef;
5249 }
5250 }
5251 }
5252 else
5253 DIE(aTHX_ "Not a HASH reference");
5254 if (discard)
5255 SP = ORIGMARK;
5256 else if (gimme == G_SCALAR) {
5257 MARK = ORIGMARK;
5258 if (SP > MARK)
5259 *++MARK = *SP;
5260 else
5261 *++MARK = &PL_sv_undef;
5262 SP = MARK;
5263 }
5264 }
5265 else {
5266 SV *keysv = POPs;
5267 HV * const hv = MUTABLE_HV(POPs);
5268 SV *sv = NULL;
5269 if (SvTYPE(hv) == SVt_PVHV)
5270 sv = hv_delete_ent(hv, keysv, discard, 0);
5271 else if (SvTYPE(hv) == SVt_PVAV) {
5272 if (PL_op->op_flags & OPf_SPECIAL)
5273 sv = av_delete(MUTABLE_AV(hv), SvIV(keysv), discard);
5274 else
5275 DIE(aTHX_ "panic: avhv_delete no longer supported");
5276 }
5277 else
5278 DIE(aTHX_ "Not a HASH reference");
5279 if (!sv)
5280 sv = &PL_sv_undef;
5281 if (!discard)
5282 PUSHs(sv);
5283 }
5284 RETURN;
5285 }
5286
PP(pp_exists)5287 PP(pp_exists)
5288 {
5289 dSP;
5290 SV *tmpsv;
5291 HV *hv;
5292
5293 if (UNLIKELY( PL_op->op_private & OPpEXISTS_SUB )) {
5294 GV *gv;
5295 SV * const sv = POPs;
5296 CV * const cv = sv_2cv(sv, &hv, &gv, 0);
5297 if (cv)
5298 RETPUSHYES;
5299 if (gv && isGV(gv) && GvCV(gv) && !GvCVGEN(gv))
5300 RETPUSHYES;
5301 RETPUSHNO;
5302 }
5303 tmpsv = POPs;
5304 hv = MUTABLE_HV(POPs);
5305 if (LIKELY( SvTYPE(hv) == SVt_PVHV )) {
5306 if (hv_exists_ent(hv, tmpsv, 0))
5307 RETPUSHYES;
5308 }
5309 else if (SvTYPE(hv) == SVt_PVAV) {
5310 if (PL_op->op_flags & OPf_SPECIAL) { /* array element */
5311 if (av_exists(MUTABLE_AV(hv), SvIV(tmpsv)))
5312 RETPUSHYES;
5313 }
5314 }
5315 else {
5316 DIE(aTHX_ "Not a HASH reference");
5317 }
5318 RETPUSHNO;
5319 }
5320
PP(pp_hslice)5321 PP(pp_hslice)
5322 {
5323 dSP; dMARK; dORIGMARK;
5324 HV * const hv = MUTABLE_HV(POPs);
5325 const I32 lval = (PL_op->op_flags & OPf_MOD || LVRET);
5326 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
5327 bool can_preserve = FALSE;
5328
5329 if (localizing) {
5330 MAGIC *mg;
5331 HV *stash;
5332
5333 if (SvCANEXISTDELETE(hv))
5334 can_preserve = TRUE;
5335 }
5336
5337 while (++MARK <= SP) {
5338 SV * const keysv = *MARK;
5339 SV **svp;
5340 HE *he;
5341 bool preeminent = TRUE;
5342
5343 if (localizing && can_preserve) {
5344 /* If we can determine whether the element exist,
5345 * try to preserve the existenceness of a tied hash
5346 * element by using EXISTS and DELETE if possible.
5347 * Fallback to FETCH and STORE otherwise. */
5348 preeminent = hv_exists_ent(hv, keysv, 0);
5349 }
5350
5351 he = hv_fetch_ent(hv, keysv, lval, 0);
5352 svp = he ? &HeVAL(he) : NULL;
5353
5354 if (lval) {
5355 if (!svp || !*svp || *svp == &PL_sv_undef) {
5356 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
5357 }
5358 if (localizing) {
5359 if (HvNAME_get(hv) && isGV_or_RVCV(*svp))
5360 save_gp(MUTABLE_GV(*svp), !(PL_op->op_flags & OPf_SPECIAL));
5361 else if (preeminent)
5362 save_helem_flags(hv, keysv, svp,
5363 (PL_op->op_flags & OPf_SPECIAL) ? 0 : SAVEf_SETMAGIC);
5364 else
5365 SAVEHDELETE(hv, keysv);
5366 }
5367 }
5368 *MARK = svp && *svp ? *svp : &PL_sv_undef;
5369 }
5370 if (GIMME_V != G_LIST) {
5371 MARK = ORIGMARK;
5372 *++MARK = SP > ORIGMARK ? *SP : &PL_sv_undef;
5373 SP = MARK;
5374 }
5375 RETURN;
5376 }
5377
PP(pp_kvhslice)5378 PP(pp_kvhslice)
5379 {
5380 dSP; dMARK;
5381 HV * const hv = MUTABLE_HV(POPs);
5382 I32 lval = (PL_op->op_flags & OPf_MOD);
5383 SSize_t items = SP - MARK;
5384
5385 if (PL_op->op_private & OPpMAYBE_LVSUB) {
5386 const I32 flags = is_lvalue_sub();
5387 if (flags) {
5388 if (!(flags & OPpENTERSUB_INARGS))
5389 /* diag_listed_as: Can't modify %s in %s */
5390 Perl_croak(aTHX_ "Can't modify key/value hash slice in %s assignment",
5391 GIMME_V == G_LIST ? "list" : "scalar");
5392 lval = flags;
5393 }
5394 }
5395
5396 MEXTEND(SP,items);
5397 while (items > 1) {
5398 *(MARK+items*2-1) = *(MARK+items);
5399 items--;
5400 }
5401 items = SP-MARK;
5402 SP += items;
5403
5404 while (++MARK <= SP) {
5405 SV * const keysv = *MARK;
5406 SV **svp;
5407 HE *he;
5408
5409 he = hv_fetch_ent(hv, keysv, lval, 0);
5410 svp = he ? &HeVAL(he) : NULL;
5411
5412 if (lval) {
5413 if (!svp || !*svp || *svp == &PL_sv_undef) {
5414 DIE(aTHX_ PL_no_helem_sv, SVfARG(keysv));
5415 }
5416 *MARK = sv_mortalcopy(*MARK);
5417 }
5418 *++MARK = svp && *svp ? *svp : &PL_sv_undef;
5419 }
5420 if (GIMME_V != G_LIST) {
5421 MARK = SP - items*2;
5422 *++MARK = items > 0 ? *SP : &PL_sv_undef;
5423 SP = MARK;
5424 }
5425 RETURN;
5426 }
5427
5428 /* List operators. */
5429
PP(pp_list)5430 PP(pp_list)
5431 {
5432 I32 markidx = POPMARK;
5433 if (GIMME_V != G_LIST) {
5434 /* don't initialize mark here, EXTEND() may move the stack */
5435 SV **mark;
5436 dSP;
5437 EXTEND(SP, 1); /* in case no arguments, as in @empty */
5438 mark = PL_stack_base + markidx;
5439 if (++MARK <= SP)
5440 *MARK = *SP; /* unwanted list, return last item */
5441 else
5442 *MARK = &PL_sv_undef;
5443 SP = MARK;
5444 PUTBACK;
5445 }
5446 return NORMAL;
5447 }
5448
PP(pp_lslice)5449 PP(pp_lslice)
5450 {
5451 dSP;
5452 SV ** const lastrelem = PL_stack_sp;
5453 SV ** const lastlelem = PL_stack_base + POPMARK;
5454 SV ** const firstlelem = PL_stack_base + POPMARK + 1;
5455 SV ** const firstrelem = lastlelem + 1;
5456 const U8 mod = PL_op->op_flags & OPf_MOD;
5457
5458 const I32 max = lastrelem - lastlelem;
5459 SV **lelem;
5460
5461 if (GIMME_V != G_LIST) {
5462 if (lastlelem < firstlelem) {
5463 EXTEND(SP, 1);
5464 *firstlelem = &PL_sv_undef;
5465 }
5466 else {
5467 I32 ix = SvIV(*lastlelem);
5468 if (ix < 0)
5469 ix += max;
5470 if (ix < 0 || ix >= max)
5471 *firstlelem = &PL_sv_undef;
5472 else
5473 *firstlelem = firstrelem[ix];
5474 }
5475 SP = firstlelem;
5476 RETURN;
5477 }
5478
5479 if (max == 0) {
5480 SP = firstlelem - 1;
5481 RETURN;
5482 }
5483
5484 for (lelem = firstlelem; lelem <= lastlelem; lelem++) {
5485 I32 ix = SvIV(*lelem);
5486 if (ix < 0)
5487 ix += max;
5488 if (ix < 0 || ix >= max)
5489 *lelem = &PL_sv_undef;
5490 else {
5491 if (!(*lelem = firstrelem[ix]))
5492 *lelem = &PL_sv_undef;
5493 else if (mod && SvPADTMP(*lelem)) {
5494 *lelem = firstrelem[ix] = sv_mortalcopy(*lelem);
5495 }
5496 }
5497 }
5498 SP = lastlelem;
5499 RETURN;
5500 }
5501
PP(pp_anonlist)5502 PP(pp_anonlist)
5503 {
5504 dSP; dMARK;
5505 const I32 items = SP - MARK;
5506 SV * const av = MUTABLE_SV(av_make(items, MARK+1));
5507 SP = MARK;
5508 mXPUSHs((PL_op->op_flags & OPf_SPECIAL)
5509 ? newRV_noinc(av) : av);
5510 RETURN;
5511 }
5512
PP(pp_anonhash)5513 PP(pp_anonhash)
5514 {
5515 dSP; dMARK; dORIGMARK;
5516 HV* const hv = newHV();
5517 SV* const retval = sv_2mortal( PL_op->op_flags & OPf_SPECIAL
5518 ? newRV_noinc(MUTABLE_SV(hv))
5519 : MUTABLE_SV(hv) );
5520 /* This isn't quite true for an odd sized list (it's one too few) but it's
5521 not worth the runtime +1 just to optimise for the warning case. */
5522 SSize_t pairs = (SP - MARK) >> 1;
5523 if (pairs > PERL_HASH_DEFAULT_HvMAX) {
5524 hv_ksplit(hv, pairs);
5525 }
5526
5527 while (MARK < SP) {
5528 SV * const key =
5529 (MARK++, SvGMAGICAL(*MARK) ? sv_mortalcopy(*MARK) : *MARK);
5530 SV *val;
5531 if (MARK < SP)
5532 {
5533 MARK++;
5534 SvGETMAGIC(*MARK);
5535 val = newSV(0);
5536 sv_setsv_nomg(val, *MARK);
5537 }
5538 else
5539 {
5540 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "Odd number of elements in anonymous hash");
5541 val = newSV(0);
5542 }
5543 (void)hv_store_ent(hv,key,val,0);
5544 }
5545 SP = ORIGMARK;
5546 XPUSHs(retval);
5547 RETURN;
5548 }
5549
PP(pp_splice)5550 PP(pp_splice)
5551 {
5552 dSP; dMARK; dORIGMARK;
5553 int num_args = (SP - MARK);
5554 AV *ary = MUTABLE_AV(*++MARK);
5555 SV **src;
5556 SV **dst;
5557 SSize_t i;
5558 SSize_t offset;
5559 SSize_t length;
5560 SSize_t newlen;
5561 SSize_t after;
5562 SSize_t diff;
5563 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5564
5565 if (mg) {
5566 return Perl_tied_method(aTHX_ SV_CONST(SPLICE), mark - 1, MUTABLE_SV(ary), mg,
5567 GIMME_V | TIED_METHOD_ARGUMENTS_ON_STACK,
5568 sp - mark);
5569 }
5570
5571 if (SvREADONLY(ary))
5572 Perl_croak_no_modify();
5573
5574 SP++;
5575
5576 if (++MARK < SP) {
5577 offset = i = SvIV(*MARK);
5578 if (offset < 0)
5579 offset += AvFILLp(ary) + 1;
5580 if (offset < 0)
5581 DIE(aTHX_ PL_no_aelem, i);
5582 if (++MARK < SP) {
5583 length = SvIVx(*MARK++);
5584 if (length < 0) {
5585 length += AvFILLp(ary) - offset + 1;
5586 if (length < 0)
5587 length = 0;
5588 }
5589 }
5590 else
5591 length = AvMAX(ary) + 1; /* close enough to infinity */
5592 }
5593 else {
5594 offset = 0;
5595 length = AvMAX(ary) + 1;
5596 }
5597 if (offset > AvFILLp(ary) + 1) {
5598 if (num_args > 2)
5599 Perl_ck_warner(aTHX_ packWARN(WARN_MISC), "splice() offset past end of array" );
5600 offset = AvFILLp(ary) + 1;
5601 }
5602 after = AvFILLp(ary) + 1 - (offset + length);
5603 if (after < 0) { /* not that much array */
5604 length += after; /* offset+length now in array */
5605 after = 0;
5606 if (!AvALLOC(ary))
5607 av_extend(ary, 0);
5608 }
5609
5610 /* At this point, MARK .. SP-1 is our new LIST */
5611
5612 newlen = SP - MARK;
5613 diff = newlen - length;
5614 if (newlen && !AvREAL(ary) && AvREIFY(ary))
5615 av_reify(ary);
5616
5617 /* make new elements SVs now: avoid problems if they're from the array */
5618 for (dst = MARK, i = newlen; i; i--) {
5619 SV * const h = *dst;
5620 *dst++ = newSVsv(h);
5621 }
5622
5623 if (diff < 0) { /* shrinking the area */
5624 SV **tmparyval = NULL;
5625 if (newlen) {
5626 Newx(tmparyval, newlen, SV*); /* so remember insertion */
5627 Copy(MARK, tmparyval, newlen, SV*);
5628 }
5629
5630 MARK = ORIGMARK + 1;
5631 if (GIMME_V == G_LIST) { /* copy return vals to stack */
5632 const bool real = cBOOL(AvREAL(ary));
5633 MEXTEND(MARK, length);
5634 if (real)
5635 EXTEND_MORTAL(length);
5636 for (i = 0, dst = MARK; i < length; i++) {
5637 if ((*dst = AvARRAY(ary)[i+offset])) {
5638 if (real)
5639 sv_2mortal(*dst); /* free them eventually */
5640 }
5641 else
5642 *dst = &PL_sv_undef;
5643 dst++;
5644 }
5645 MARK += length - 1;
5646 }
5647 else {
5648 *MARK = AvARRAY(ary)[offset+length-1];
5649 if (AvREAL(ary)) {
5650 sv_2mortal(*MARK);
5651 for (i = length - 1, dst = &AvARRAY(ary)[offset]; i > 0; i--)
5652 SvREFCNT_dec(*dst++); /* free them now */
5653 }
5654 if (!*MARK)
5655 *MARK = &PL_sv_undef;
5656 }
5657 AvFILLp(ary) += diff;
5658
5659 /* pull up or down? */
5660
5661 if (offset < after) { /* easier to pull up */
5662 if (offset) { /* esp. if nothing to pull */
5663 src = &AvARRAY(ary)[offset-1];
5664 dst = src - diff; /* diff is negative */
5665 for (i = offset; i > 0; i--) /* can't trust Copy */
5666 *dst-- = *src--;
5667 }
5668 dst = AvARRAY(ary);
5669 AvARRAY(ary) = AvARRAY(ary) - diff; /* diff is negative */
5670 AvMAX(ary) += diff;
5671 }
5672 else {
5673 if (after) { /* anything to pull down? */
5674 src = AvARRAY(ary) + offset + length;
5675 dst = src + diff; /* diff is negative */
5676 Move(src, dst, after, SV*);
5677 }
5678 dst = &AvARRAY(ary)[AvFILLp(ary)+1];
5679 /* avoid later double free */
5680 }
5681 i = -diff;
5682 while (i)
5683 dst[--i] = NULL;
5684
5685 if (newlen) {
5686 Copy( tmparyval, AvARRAY(ary) + offset, newlen, SV* );
5687 Safefree(tmparyval);
5688 }
5689 }
5690 else { /* no, expanding (or same) */
5691 SV** tmparyval = NULL;
5692 if (length) {
5693 Newx(tmparyval, length, SV*); /* so remember deletion */
5694 Copy(AvARRAY(ary)+offset, tmparyval, length, SV*);
5695 }
5696
5697 if (diff > 0) { /* expanding */
5698 /* push up or down? */
5699 if (offset < after && diff <= AvARRAY(ary) - AvALLOC(ary)) {
5700 if (offset) {
5701 src = AvARRAY(ary);
5702 dst = src - diff;
5703 Move(src, dst, offset, SV*);
5704 }
5705 AvARRAY(ary) = AvARRAY(ary) - diff;/* diff is positive */
5706 AvMAX(ary) += diff;
5707 AvFILLp(ary) += diff;
5708 }
5709 else {
5710 if (AvFILLp(ary) + diff >= AvMAX(ary)) /* oh, well */
5711 av_extend(ary, AvFILLp(ary) + diff);
5712 AvFILLp(ary) += diff;
5713
5714 if (after) {
5715 dst = AvARRAY(ary) + AvFILLp(ary);
5716 src = dst - diff;
5717 for (i = after; i; i--) {
5718 *dst-- = *src--;
5719 }
5720 }
5721 }
5722 }
5723
5724 if (newlen) {
5725 Copy( MARK, AvARRAY(ary) + offset, newlen, SV* );
5726 }
5727
5728 MARK = ORIGMARK + 1;
5729 if (GIMME_V == G_LIST) { /* copy return vals to stack */
5730 if (length) {
5731 const bool real = cBOOL(AvREAL(ary));
5732 if (real)
5733 EXTEND_MORTAL(length);
5734 for (i = 0, dst = MARK; i < length; i++) {
5735 if ((*dst = tmparyval[i])) {
5736 if (real)
5737 sv_2mortal(*dst); /* free them eventually */
5738 }
5739 else *dst = &PL_sv_undef;
5740 dst++;
5741 }
5742 }
5743 MARK += length - 1;
5744 }
5745 else if (length--) {
5746 *MARK = tmparyval[length];
5747 if (AvREAL(ary)) {
5748 sv_2mortal(*MARK);
5749 while (length-- > 0)
5750 SvREFCNT_dec(tmparyval[length]);
5751 }
5752 if (!*MARK)
5753 *MARK = &PL_sv_undef;
5754 }
5755 else
5756 *MARK = &PL_sv_undef;
5757 Safefree(tmparyval);
5758 }
5759
5760 if (SvMAGICAL(ary))
5761 mg_set(MUTABLE_SV(ary));
5762
5763 SP = MARK;
5764 RETURN;
5765 }
5766
PP(pp_push)5767 PP(pp_push)
5768 {
5769 dSP; dMARK; dORIGMARK; dTARGET;
5770 AV * const ary = MUTABLE_AV(*++MARK);
5771 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5772
5773 if (mg) {
5774 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5775 PUSHMARK(MARK);
5776 PUTBACK;
5777 ENTER_with_name("call_PUSH");
5778 call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
5779 LEAVE_with_name("call_PUSH");
5780 /* SPAGAIN; not needed: SP is assigned to immediately below */
5781 }
5782 else {
5783 /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we
5784 * only need to save locally, not on the save stack */
5785 U16 old_delaymagic = PL_delaymagic;
5786
5787 if (SvREADONLY(ary) && MARK < SP) Perl_croak_no_modify();
5788 PL_delaymagic = DM_DELAY;
5789 for (++MARK; MARK <= SP; MARK++) {
5790 SV *sv;
5791 if (*MARK) SvGETMAGIC(*MARK);
5792 sv = newSV(0);
5793 if (*MARK)
5794 sv_setsv_nomg(sv, *MARK);
5795 av_store(ary, AvFILLp(ary)+1, sv);
5796 }
5797 if (PL_delaymagic & DM_ARRAY_ISA)
5798 mg_set(MUTABLE_SV(ary));
5799 PL_delaymagic = old_delaymagic;
5800 }
5801 SP = ORIGMARK;
5802 if (OP_GIMME(PL_op, 0) != G_VOID) {
5803 PUSHi( AvFILL(ary) + 1 );
5804 }
5805 RETURN;
5806 }
5807
5808 /* also used for: pp_pop()*/
PP(pp_shift)5809 PP(pp_shift)
5810 {
5811 dSP;
5812 AV * const av = PL_op->op_flags & OPf_SPECIAL
5813 ? MUTABLE_AV(GvAVn(PL_defgv)) : MUTABLE_AV(POPs);
5814 SV * const sv = PL_op->op_type == OP_SHIFT ? av_shift(av) : av_pop(av);
5815 EXTEND(SP, 1);
5816 assert (sv);
5817 if (AvREAL(av))
5818 (void)sv_2mortal(sv);
5819 PUSHs(sv);
5820 RETURN;
5821 }
5822
PP(pp_unshift)5823 PP(pp_unshift)
5824 {
5825 dSP; dMARK; dORIGMARK; dTARGET;
5826 AV *ary = MUTABLE_AV(*++MARK);
5827 const MAGIC * const mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied);
5828
5829 if (mg) {
5830 *MARK-- = SvTIED_obj(MUTABLE_SV(ary), mg);
5831 PUSHMARK(MARK);
5832 PUTBACK;
5833 ENTER_with_name("call_UNSHIFT");
5834 call_sv(SV_CONST(UNSHIFT),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
5835 LEAVE_with_name("call_UNSHIFT");
5836 /* SPAGAIN; not needed: SP is assigned to immediately below */
5837 }
5838 else {
5839 /* PL_delaymagic is restored by JUMPENV_POP on dieing, so we
5840 * only need to save locally, not on the save stack */
5841 U16 old_delaymagic = PL_delaymagic;
5842 SSize_t i = 0;
5843
5844 av_unshift(ary, SP - MARK);
5845 PL_delaymagic = DM_DELAY;
5846 while (MARK < SP) {
5847 SV * const sv = newSVsv(*++MARK);
5848 (void)av_store(ary, i++, sv);
5849 }
5850 if (PL_delaymagic & DM_ARRAY_ISA)
5851 mg_set(MUTABLE_SV(ary));
5852 PL_delaymagic = old_delaymagic;
5853 }
5854 SP = ORIGMARK;
5855 if (OP_GIMME(PL_op, 0) != G_VOID) {
5856 PUSHi( AvFILL(ary) + 1 );
5857 }
5858 RETURN;
5859 }
5860
PP(pp_reverse)5861 PP(pp_reverse)
5862 {
5863 dSP; dMARK;
5864
5865 if (GIMME_V == G_LIST) {
5866 if (PL_op->op_private & OPpREVERSE_INPLACE) {
5867 AV *av;
5868
5869 /* See pp_sort() */
5870 assert( MARK+1 == SP && *SP && SvTYPE(*SP) == SVt_PVAV);
5871 (void)POPMARK; /* remove mark associated with ex-OP_AASSIGN */
5872 av = MUTABLE_AV((*SP));
5873 /* In-place reversing only happens in void context for the array
5874 * assignment. We don't need to push anything on the stack. */
5875 SP = MARK;
5876
5877 if (SvMAGICAL(av)) {
5878 SSize_t i, j;
5879 SV *tmp = sv_newmortal();
5880 /* For SvCANEXISTDELETE */
5881 HV *stash;
5882 const MAGIC *mg;
5883 bool can_preserve = SvCANEXISTDELETE(av);
5884
5885 for (i = 0, j = av_top_index(av); i < j; ++i, --j) {
5886 SV *begin, *end;
5887
5888 if (can_preserve) {
5889 if (!av_exists(av, i)) {
5890 if (av_exists(av, j)) {
5891 SV *sv = av_delete(av, j, 0);
5892 begin = *av_fetch(av, i, TRUE);
5893 sv_setsv_mg(begin, sv);
5894 }
5895 continue;
5896 }
5897 else if (!av_exists(av, j)) {
5898 SV *sv = av_delete(av, i, 0);
5899 end = *av_fetch(av, j, TRUE);
5900 sv_setsv_mg(end, sv);
5901 continue;
5902 }
5903 }
5904
5905 begin = *av_fetch(av, i, TRUE);
5906 end = *av_fetch(av, j, TRUE);
5907 sv_setsv(tmp, begin);
5908 sv_setsv_mg(begin, end);
5909 sv_setsv_mg(end, tmp);
5910 }
5911 }
5912 else {
5913 SV **begin = AvARRAY(av);
5914
5915 if (begin) {
5916 SV **end = begin + AvFILLp(av);
5917
5918 while (begin < end) {
5919 SV * const tmp = *begin;
5920 *begin++ = *end;
5921 *end-- = tmp;
5922 }
5923 }
5924 }
5925 }
5926 else {
5927 SV **oldsp = SP;
5928 MARK++;
5929 while (MARK < SP) {
5930 SV * const tmp = *MARK;
5931 *MARK++ = *SP;
5932 *SP-- = tmp;
5933 }
5934 /* safe as long as stack cannot get extended in the above */
5935 SP = oldsp;
5936 }
5937 }
5938 else {
5939 char *up;
5940 dTARGET;
5941 STRLEN len;
5942
5943 SvUTF8_off(TARG); /* decontaminate */
5944 if (SP - MARK > 1) {
5945 do_join(TARG, &PL_sv_no, MARK, SP);
5946 SP = MARK + 1;
5947 SETs(TARG);
5948 } else if (SP > MARK) {
5949 sv_setsv(TARG, *SP);
5950 SETs(TARG);
5951 } else {
5952 sv_setsv(TARG, DEFSV);
5953 XPUSHs(TARG);
5954 }
5955 SvSETMAGIC(TARG); /* remove any utf8 length magic */
5956
5957 up = SvPV_force(TARG, len);
5958 if (len > 1) {
5959 char *down;
5960 if (DO_UTF8(TARG)) { /* first reverse each character */
5961 U8* s = (U8*)SvPVX(TARG);
5962 const U8* send = (U8*)(s + len);
5963 while (s < send) {
5964 if (UTF8_IS_INVARIANT(*s)) {
5965 s++;
5966 continue;
5967 }
5968 else {
5969 if (!utf8_to_uvchr_buf(s, send, 0))
5970 break;
5971 up = (char*)s;
5972 s += UTF8SKIP(s);
5973 down = (char*)(s - 1);
5974 /* reverse this character */
5975 while (down > up) {
5976 const char tmp = *up;
5977 *up++ = *down;
5978 *down-- = tmp;
5979 }
5980 }
5981 }
5982 up = SvPVX(TARG);
5983 }
5984 down = SvPVX(TARG) + len - 1;
5985 while (down > up) {
5986 const char tmp = *up;
5987 *up++ = *down;
5988 *down-- = tmp;
5989 }
5990 (void)SvPOK_only_UTF8(TARG);
5991 }
5992 }
5993 RETURN;
5994 }
5995
PP(pp_split)5996 PP(pp_split)
5997 {
5998 dSP; dTARG;
5999 AV *ary = ( (PL_op->op_private & OPpSPLIT_ASSIGN) /* @a = split */
6000 && (PL_op->op_flags & OPf_STACKED)) /* @{expr} = split */
6001 ? (AV *)POPs : NULL;
6002 IV limit = POPi; /* note, negative is forever */
6003 SV * const sv = POPs;
6004 STRLEN len;
6005 const char *s = SvPV_const(sv, len);
6006 const bool do_utf8 = DO_UTF8(sv);
6007 const bool in_uni_8_bit = IN_UNI_8_BIT;
6008 const char *strend = s + len;
6009 PMOP *pm = cPMOPx(PL_op);
6010 REGEXP *rx;
6011 SV *dstr;
6012 const char *m;
6013 SSize_t iters = 0;
6014 const STRLEN slen = do_utf8
6015 ? utf8_length((U8*)s, (U8*)strend)
6016 : (STRLEN)(strend - s);
6017 SSize_t maxiters = slen + 10;
6018 I32 trailing_empty = 0;
6019 const char *orig;
6020 const IV origlimit = limit;
6021 bool realarray = 0;
6022 I32 base;
6023 const U8 gimme = GIMME_V;
6024 bool gimme_scalar;
6025 I32 oldsave = PL_savestack_ix;
6026 U32 flags = (do_utf8 ? SVf_UTF8 : 0) |
6027 SVs_TEMP; /* Make mortal SVs by default */
6028 MAGIC *mg = NULL;
6029
6030 rx = PM_GETRE(pm);
6031
6032 TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
6033 (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
6034
6035 /* handle @ary = split(...) optimisation */
6036 if (PL_op->op_private & OPpSPLIT_ASSIGN) {
6037 realarray = 1;
6038 if (!(PL_op->op_flags & OPf_STACKED)) {
6039 if (PL_op->op_private & OPpSPLIT_LEX) {
6040 if (PL_op->op_private & OPpLVAL_INTRO)
6041 SAVECLEARSV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff));
6042 ary = (AV *)PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff);
6043 }
6044 else {
6045 GV *gv =
6046 #ifdef USE_ITHREADS
6047 MUTABLE_GV(PAD_SVl(pm->op_pmreplrootu.op_pmtargetoff));
6048 #else
6049 pm->op_pmreplrootu.op_pmtargetgv;
6050 #endif
6051 if (PL_op->op_private & OPpLVAL_INTRO)
6052 ary = save_ary(gv);
6053 else
6054 ary = GvAVn(gv);
6055 }
6056 /* skip anything pushed by OPpLVAL_INTRO above */
6057 oldsave = PL_savestack_ix;
6058 }
6059
6060 /* Some defence against stack-not-refcounted bugs */
6061 (void)sv_2mortal(SvREFCNT_inc_simple_NN(ary));
6062
6063 if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
6064 PUSHMARK(SP);
6065 XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
6066 } else {
6067 flags &= ~SVs_TEMP; /* SVs will not be mortal */
6068 }
6069 }
6070
6071 base = SP - PL_stack_base;
6072 orig = s;
6073 if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
6074 if (do_utf8) {
6075 while (s < strend && isSPACE_utf8_safe(s, strend))
6076 s += UTF8SKIP(s);
6077 }
6078 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
6079 while (s < strend && isSPACE_LC(*s))
6080 s++;
6081 }
6082 else if (in_uni_8_bit) {
6083 while (s < strend && isSPACE_L1(*s))
6084 s++;
6085 }
6086 else {
6087 while (s < strend && isSPACE(*s))
6088 s++;
6089 }
6090 }
6091
6092 gimme_scalar = gimme == G_SCALAR && !ary;
6093
6094 if (!limit)
6095 limit = maxiters + 2;
6096 if (RX_EXTFLAGS(rx) & RXf_WHITE) {
6097 while (--limit) {
6098 m = s;
6099 /* this one uses 'm' and is a negative test */
6100 if (do_utf8) {
6101 while (m < strend && ! isSPACE_utf8_safe(m, strend) ) {
6102 const int t = UTF8SKIP(m);
6103 /* isSPACE_utf8_safe returns FALSE for malform utf8 */
6104 if (strend - m < t)
6105 m = strend;
6106 else
6107 m += t;
6108 }
6109 }
6110 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET)
6111 {
6112 while (m < strend && !isSPACE_LC(*m))
6113 ++m;
6114 }
6115 else if (in_uni_8_bit) {
6116 while (m < strend && !isSPACE_L1(*m))
6117 ++m;
6118 } else {
6119 while (m < strend && !isSPACE(*m))
6120 ++m;
6121 }
6122 if (m >= strend)
6123 break;
6124
6125 if (gimme_scalar) {
6126 iters++;
6127 if (m-s == 0)
6128 trailing_empty++;
6129 else
6130 trailing_empty = 0;
6131 } else {
6132 dstr = newSVpvn_flags(s, m-s, flags);
6133 XPUSHs(dstr);
6134 }
6135
6136 /* skip the whitespace found last */
6137 if (do_utf8)
6138 s = m + UTF8SKIP(m);
6139 else
6140 s = m + 1;
6141
6142 /* this one uses 's' and is a positive test */
6143 if (do_utf8) {
6144 while (s < strend && isSPACE_utf8_safe(s, strend) )
6145 s += UTF8SKIP(s);
6146 }
6147 else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET)
6148 {
6149 while (s < strend && isSPACE_LC(*s))
6150 ++s;
6151 }
6152 else if (in_uni_8_bit) {
6153 while (s < strend && isSPACE_L1(*s))
6154 ++s;
6155 } else {
6156 while (s < strend && isSPACE(*s))
6157 ++s;
6158 }
6159 }
6160 }
6161 else if (RX_EXTFLAGS(rx) & RXf_START_ONLY) {
6162 while (--limit) {
6163 for (m = s; m < strend && *m != '\n'; m++)
6164 ;
6165 m++;
6166 if (m >= strend)
6167 break;
6168
6169 if (gimme_scalar) {
6170 iters++;
6171 if (m-s == 0)
6172 trailing_empty++;
6173 else
6174 trailing_empty = 0;
6175 } else {
6176 dstr = newSVpvn_flags(s, m-s, flags);
6177 XPUSHs(dstr);
6178 }
6179 s = m;
6180 }
6181 }
6182 else if (RX_EXTFLAGS(rx) & RXf_NULL && !(s >= strend)) {
6183 /* This case boils down to deciding which is the smaller of:
6184 * limit - effectively a number of characters
6185 * slen - which already contains the number of characters in s
6186 *
6187 * The resulting number is the number of iters (for gimme_scalar)
6188 * or the number of SVs to create (!gimme_scalar). */
6189
6190 /* setting it to -1 will trigger a panic in EXTEND() */
6191 const SSize_t sslen = slen > SSize_t_MAX ? -1 : (SSize_t)slen;
6192 const IV items = limit - 1;
6193 if (sslen < items || items < 0) {
6194 iters = slen -1;
6195 limit = slen + 1;
6196 /* Note: The same result is returned if the following block
6197 * is removed, because of the "keep field after final delim?"
6198 * adjustment, but having the following makes the "correct"
6199 * behaviour more apparent. */
6200 if (gimme_scalar) {
6201 s = strend;
6202 iters++;
6203 }
6204 } else {
6205 iters = items;
6206 }
6207 if (!gimme_scalar) {
6208 /*
6209 Pre-extend the stack, either the number of bytes or
6210 characters in the string or a limited amount, triggered by:
6211 my ($x, $y) = split //, $str;
6212 or
6213 split //, $str, $i;
6214 */
6215 EXTEND(SP, limit);
6216 if (do_utf8) {
6217 while (--limit) {
6218 m = s;
6219 s += UTF8SKIP(s);
6220 dstr = newSVpvn_flags(m, s-m, flags);
6221 PUSHs(dstr);
6222 }
6223 } else {
6224 while (--limit) {
6225 dstr = newSVpvn_flags(s, 1, flags);
6226 PUSHs(dstr);
6227 s++;
6228 }
6229 }
6230 }
6231 }
6232 else if (do_utf8 == (RX_UTF8(rx) != 0) &&
6233 (RX_EXTFLAGS(rx) & RXf_USE_INTUIT) && !RX_NPARENS(rx)
6234 && (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
6235 && !(RX_EXTFLAGS(rx) & RXf_IS_ANCHORED)) {
6236 const int tail = (RX_EXTFLAGS(rx) & RXf_INTUIT_TAIL);
6237 SV * const csv = CALLREG_INTUIT_STRING(rx);
6238
6239 len = RX_MINLENRET(rx);
6240 if (len == 1 && !RX_UTF8(rx) && !tail) {
6241 const char c = *SvPV_nolen_const(csv);
6242 while (--limit) {
6243 for (m = s; m < strend && *m != c; m++)
6244 ;
6245 if (m >= strend)
6246 break;
6247 if (gimme_scalar) {
6248 iters++;
6249 if (m-s == 0)
6250 trailing_empty++;
6251 else
6252 trailing_empty = 0;
6253 } else {
6254 dstr = newSVpvn_flags(s, m-s, flags);
6255 XPUSHs(dstr);
6256 }
6257 /* The rx->minlen is in characters but we want to step
6258 * s ahead by bytes. */
6259 if (do_utf8)
6260 s = (char*)utf8_hop_forward((U8*) m, len, (U8*) strend);
6261 else
6262 s = m + len; /* Fake \n at the end */
6263 }
6264 }
6265 else {
6266 const bool multiline = (RX_EXTFLAGS(rx) & RXf_PMf_MULTILINE) ? 1 : 0;
6267
6268 while (s < strend && --limit &&
6269 (m = fbm_instr((unsigned char*)s, (unsigned char*)strend,
6270 csv, multiline ? FBMrf_MULTILINE : 0)) )
6271 {
6272 if (gimme_scalar) {
6273 iters++;
6274 if (m-s == 0)
6275 trailing_empty++;
6276 else
6277 trailing_empty = 0;
6278 } else {
6279 dstr = newSVpvn_flags(s, m-s, flags);
6280 XPUSHs(dstr);
6281 }
6282 /* The rx->minlen is in characters but we want to step
6283 * s ahead by bytes. */
6284 if (do_utf8)
6285 s = (char*)utf8_hop_forward((U8*)m, len, (U8 *) strend);
6286 else
6287 s = m + len; /* Fake \n at the end */
6288 }
6289 }
6290 }
6291 else {
6292 maxiters += slen * RX_NPARENS(rx);
6293 while (s < strend && --limit)
6294 {
6295 I32 rex_return;
6296 PUTBACK;
6297 rex_return = CALLREGEXEC(rx, (char*)s, (char*)strend, (char*)orig, 1,
6298 sv, NULL, 0);
6299 SPAGAIN;
6300 if (rex_return == 0)
6301 break;
6302 TAINT_IF(RX_MATCH_TAINTED(rx));
6303 /* we never pass the REXEC_COPY_STR flag, so it should
6304 * never get copied */
6305 assert(!RX_MATCH_COPIED(rx));
6306 m = RX_OFFS(rx)[0].start + orig;
6307
6308 if (gimme_scalar) {
6309 iters++;
6310 if (m-s == 0)
6311 trailing_empty++;
6312 else
6313 trailing_empty = 0;
6314 } else {
6315 dstr = newSVpvn_flags(s, m-s, flags);
6316 XPUSHs(dstr);
6317 }
6318 if (RX_NPARENS(rx)) {
6319 I32 i;
6320 for (i = 1; i <= (I32)RX_NPARENS(rx); i++) {
6321 s = RX_OFFS(rx)[i].start + orig;
6322 m = RX_OFFS(rx)[i].end + orig;
6323
6324 /* japhy (07/27/01) -- the (m && s) test doesn't catch
6325 parens that didn't match -- they should be set to
6326 undef, not the empty string */
6327 if (gimme_scalar) {
6328 iters++;
6329 if (m-s == 0)
6330 trailing_empty++;
6331 else
6332 trailing_empty = 0;
6333 } else {
6334 if (m >= orig && s >= orig) {
6335 dstr = newSVpvn_flags(s, m-s, flags);
6336 }
6337 else
6338 dstr = &PL_sv_undef; /* undef, not "" */
6339 XPUSHs(dstr);
6340 }
6341
6342 }
6343 }
6344 s = RX_OFFS(rx)[0].end + orig;
6345 }
6346 }
6347
6348 if (!gimme_scalar) {
6349 iters = (SP - PL_stack_base) - base;
6350 }
6351 if (iters > maxiters)
6352 DIE(aTHX_ "Split loop");
6353
6354 /* keep field after final delim? */
6355 if (s < strend || (iters && origlimit)) {
6356 if (!gimme_scalar) {
6357 const STRLEN l = strend - s;
6358 dstr = newSVpvn_flags(s, l, flags);
6359 XPUSHs(dstr);
6360 }
6361 iters++;
6362 }
6363 else if (!origlimit) {
6364 if (gimme_scalar) {
6365 iters -= trailing_empty;
6366 } else {
6367 while (iters > 0 && (!TOPs || !SvANY(TOPs) || SvCUR(TOPs) == 0)) {
6368 if (TOPs && !(flags & SVs_TEMP))
6369 sv_2mortal(TOPs);
6370 *SP-- = NULL;
6371 iters--;
6372 }
6373 }
6374 }
6375
6376 PUTBACK;
6377 LEAVE_SCOPE(oldsave);
6378 SPAGAIN;
6379 if (realarray) {
6380 if (!mg) {
6381 PUTBACK;
6382 if(AvREAL(ary)) {
6383 if (av_count(ary) > 0)
6384 av_clear(ary);
6385 } else {
6386 AvREAL_on(ary);
6387 AvREIFY_off(ary);
6388
6389 if (AvMAX(ary) > -1) {
6390 /* don't free mere refs */
6391 Zero(AvARRAY(ary), AvMAX(ary), SV*);
6392 }
6393 }
6394 if(AvMAX(ary) < iters)
6395 av_extend(ary,iters);
6396 SPAGAIN;
6397
6398 /* Need to copy the SV*s from the stack into ary */
6399 Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*);
6400 AvFILLp(ary) = iters - 1;
6401
6402 if (SvSMAGICAL(ary)) {
6403 PUTBACK;
6404 mg_set(MUTABLE_SV(ary));
6405 SPAGAIN;
6406 }
6407
6408 if (gimme != G_LIST) {
6409 /* SP points to the final SV* pushed to the stack. But the SV* */
6410 /* are not going to be used from the stack. Point SP to below */
6411 /* the first of these SV*. */
6412 SP -= iters;
6413 PUTBACK;
6414 }
6415 }
6416 else {
6417 PUTBACK;
6418 av_extend(ary,iters);
6419 av_clear(ary);
6420
6421 ENTER_with_name("call_PUSH");
6422 call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
6423 LEAVE_with_name("call_PUSH");
6424 SPAGAIN;
6425
6426 if (gimme == G_LIST) {
6427 SSize_t i;
6428 /* EXTEND should not be needed - we just popped them */
6429 EXTEND_SKIP(SP, iters);
6430 for (i=0; i < iters; i++) {
6431 SV **svp = av_fetch(ary, i, FALSE);
6432 PUSHs((svp) ? *svp : &PL_sv_undef);
6433 }
6434 RETURN;
6435 }
6436 }
6437 }
6438
6439 if (gimme != G_LIST) {
6440 GETTARGET;
6441 XPUSHi(iters);
6442 }
6443
6444 RETURN;
6445 }
6446
PP(pp_once)6447 PP(pp_once)
6448 {
6449 dSP;
6450 SV *const sv = PAD_SVl(PL_op->op_targ);
6451
6452 if (SvPADSTALE(sv)) {
6453 /* First time. */
6454 SvPADSTALE_off(sv);
6455 RETURNOP(cLOGOP->op_other);
6456 }
6457 RETURNOP(cLOGOP->op_next);
6458 }
6459
PP(pp_lock)6460 PP(pp_lock)
6461 {
6462 dSP;
6463 dTOPss;
6464 SV *retsv = sv;
6465 SvLOCK(sv);
6466 if (SvTYPE(retsv) == SVt_PVAV || SvTYPE(retsv) == SVt_PVHV
6467 || SvTYPE(retsv) == SVt_PVCV) {
6468 retsv = refto(retsv);
6469 }
6470 SETs(retsv);
6471 RETURN;
6472 }
6473
6474
6475 /* used for: pp_padany(), pp_custom(); plus any system ops
6476 * that aren't implemented on a particular platform */
6477
PP(unimplemented_op)6478 PP(unimplemented_op)
6479 {
6480 const Optype op_type = PL_op->op_type;
6481 /* Using OP_NAME() isn't going to be helpful here. Firstly, it doesn't cope
6482 with out of range op numbers - it only "special" cases op_custom.
6483 Secondly, as the three ops we "panic" on are padmy, mapstart and custom,
6484 if we get here for a custom op then that means that the custom op didn't
6485 have an implementation. Given that OP_NAME() looks up the custom op
6486 by its op_ppaddr, likely it will return NULL, unless someone (unhelpfully)
6487 registers &Perl_unimplemented_op as the address of their custom op.
6488 NULL doesn't generate a useful error message. "custom" does. */
6489 const char *const name = op_type >= OP_max
6490 ? "[out of range]" : PL_op_name[op_type];
6491 if(OP_IS_SOCKET(op_type))
6492 DIE(aTHX_ PL_no_sock_func, name);
6493 DIE(aTHX_ "panic: unimplemented op %s (#%d) called", name, op_type);
6494 }
6495
6496 static void
S_maybe_unwind_defav(pTHX)6497 S_maybe_unwind_defav(pTHX)
6498 {
6499 if (CX_CUR()->cx_type & CXp_HASARGS) {
6500 PERL_CONTEXT *cx = CX_CUR();
6501
6502 assert(CxHASARGS(cx));
6503 cx_popsub_args(cx);
6504 cx->cx_type &= ~CXp_HASARGS;
6505 }
6506 }
6507
6508 /* For sorting out arguments passed to a &CORE:: subroutine */
PP(pp_coreargs)6509 PP(pp_coreargs)
6510 {
6511 dSP;
6512 int opnum = SvIOK(cSVOP_sv) ? (int)SvUV(cSVOP_sv) : 0;
6513 int defgv = PL_opargs[opnum] & OA_DEFGV ||opnum==OP_GLOB, whicharg = 0;
6514 AV * const at_ = GvAV(PL_defgv);
6515 SV **svp = at_ ? AvARRAY(at_) : NULL;
6516 I32 minargs = 0, maxargs = 0, numargs = at_ ? AvFILLp(at_)+1 : 0;
6517 I32 oa = opnum ? PL_opargs[opnum] >> OASHIFT : 0;
6518 bool seen_question = 0;
6519 const char *err = NULL;
6520 const bool pushmark = PL_op->op_private & OPpCOREARGS_PUSHMARK;
6521
6522 /* Count how many args there are first, to get some idea how far to
6523 extend the stack. */
6524 while (oa) {
6525 if ((oa & 7) == OA_LIST) { maxargs = I32_MAX; break; }
6526 maxargs++;
6527 if (oa & OA_OPTIONAL) seen_question = 1;
6528 if (!seen_question) minargs++;
6529 oa >>= 4;
6530 }
6531
6532 if(numargs < minargs) err = "Not enough";
6533 else if(numargs > maxargs) err = "Too many";
6534 if (err)
6535 /* diag_listed_as: Too many arguments for %s */
6536 Perl_croak(aTHX_
6537 "%s arguments for %s", err,
6538 opnum ? PL_op_desc[opnum] : SvPV_nolen_const(cSVOP_sv)
6539 );
6540
6541 /* Reset the stack pointer. Without this, we end up returning our own
6542 arguments in list context, in addition to the values we are supposed
6543 to return. nextstate usually does this on sub entry, but we need
6544 to run the next op with the caller's hints, so we cannot have a
6545 nextstate. */
6546 SP = PL_stack_base + CX_CUR()->blk_oldsp;
6547
6548 if(!maxargs) RETURN;
6549
6550 /* We do this here, rather than with a separate pushmark op, as it has
6551 to come in between two things this function does (stack reset and
6552 arg pushing). This seems the easiest way to do it. */
6553 if (pushmark) {
6554 PUTBACK;
6555 (void)Perl_pp_pushmark(aTHX);
6556 }
6557
6558 EXTEND(SP, maxargs == I32_MAX ? numargs : maxargs);
6559 PUTBACK; /* The code below can die in various places. */
6560
6561 oa = PL_opargs[opnum] >> OASHIFT;
6562 for (; oa&&(numargs||!pushmark); (void)(numargs&&(++svp,--numargs))) {
6563 whicharg++;
6564 switch (oa & 7) {
6565 case OA_SCALAR:
6566 try_defsv:
6567 if (!numargs && defgv && whicharg == minargs + 1) {
6568 PUSHs(DEFSV);
6569 }
6570 else PUSHs(numargs ? svp && *svp ? *svp : &PL_sv_undef : NULL);
6571 break;
6572 case OA_LIST:
6573 while (numargs--) {
6574 PUSHs(svp && *svp ? *svp : &PL_sv_undef);
6575 svp++;
6576 }
6577 RETURN;
6578 case OA_AVREF:
6579 if (!numargs) {
6580 GV *gv;
6581 if (CvUNIQUE(find_runcv_where(FIND_RUNCV_level_eq,1,NULL)))
6582 gv = PL_argvgv;
6583 else {
6584 S_maybe_unwind_defav(aTHX);
6585 gv = PL_defgv;
6586 }
6587 PUSHs((SV *)GvAVn(gv));
6588 break;
6589 }
6590 if (!svp || !*svp || !SvROK(*svp)
6591 || SvTYPE(SvRV(*svp)) != SVt_PVAV)
6592 DIE(aTHX_
6593 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
6594 "Type of arg %d to &CORE::%s must be array reference",
6595 whicharg, PL_op_desc[opnum]
6596 );
6597 PUSHs(SvRV(*svp));
6598 break;
6599 case OA_HVREF:
6600 if (!svp || !*svp || !SvROK(*svp)
6601 || ( SvTYPE(SvRV(*svp)) != SVt_PVHV
6602 && ( opnum == OP_DBMCLOSE || opnum == OP_DBMOPEN
6603 || SvTYPE(SvRV(*svp)) != SVt_PVAV )))
6604 DIE(aTHX_
6605 /* diag_listed_as: Type of arg %d to &CORE::%s must be %s*/
6606 "Type of arg %d to &CORE::%s must be hash%s reference",
6607 whicharg, PL_op_desc[opnum],
6608 opnum == OP_DBMCLOSE || opnum == OP_DBMOPEN
6609 ? ""
6610 : " or array"
6611 );
6612 PUSHs(SvRV(*svp));
6613 break;
6614 case OA_FILEREF:
6615 if (!numargs) PUSHs(NULL);
6616 else if(svp && *svp && SvROK(*svp) && isGV_with_GP(SvRV(*svp)))
6617 /* no magic here, as the prototype will have added an extra
6618 refgen and we just want what was there before that */
6619 PUSHs(SvRV(*svp));
6620 else {
6621 const bool constr = PL_op->op_private & whicharg;
6622 PUSHs(S_rv2gv(aTHX_
6623 svp && *svp ? *svp : &PL_sv_undef,
6624 constr, cBOOL(CopHINTS_get(PL_curcop) & HINT_STRICT_REFS),
6625 !constr
6626 ));
6627 }
6628 break;
6629 case OA_SCALARREF:
6630 if (!numargs) goto try_defsv;
6631 else {
6632 const bool wantscalar =
6633 PL_op->op_private & OPpCOREARGS_SCALARMOD;
6634 if (!svp || !*svp || !SvROK(*svp)
6635 /* We have to permit globrefs even for the \$ proto, as
6636 *foo is indistinguishable from ${\*foo}, and the proto-
6637 type permits the latter. */
6638 || SvTYPE(SvRV(*svp)) > (
6639 wantscalar ? SVt_PVLV
6640 : opnum == OP_LOCK || opnum == OP_UNDEF
6641 ? SVt_PVCV
6642 : SVt_PVHV
6643 )
6644 )
6645 DIE(aTHX_
6646 "Type of arg %d to &CORE::%s must be %s",
6647 whicharg, PL_op_name[opnum],
6648 wantscalar
6649 ? "scalar reference"
6650 : opnum == OP_LOCK || opnum == OP_UNDEF
6651 ? "reference to one of [$@%&*]"
6652 : "reference to one of [$@%*]"
6653 );
6654 PUSHs(SvRV(*svp));
6655 if (opnum == OP_UNDEF && SvRV(*svp) == (SV *)PL_defgv) {
6656 /* Undo @_ localisation, so that sub exit does not undo
6657 part of our undeffing. */
6658 S_maybe_unwind_defav(aTHX);
6659 }
6660 }
6661 break;
6662 default:
6663 DIE(aTHX_ "panic: unknown OA_*: %x", (unsigned)(oa&7));
6664 }
6665 oa = oa >> 4;
6666 }
6667
6668 RETURN;
6669 }
6670
6671 /* Implement CORE::keys(),values(),each().
6672 *
6673 * We won't know until run-time whether the arg is an array or hash,
6674 * so this op calls
6675 *
6676 * pp_keys/pp_values/pp_each
6677 * or
6678 * pp_akeys/pp_avalues/pp_aeach
6679 *
6680 * as appropriate (or whatever pp function actually implements the OP_FOO
6681 * functionality for each FOO).
6682 */
6683
PP(pp_avhvswitch)6684 PP(pp_avhvswitch)
6685 {
6686 dSP;
6687 return PL_ppaddr[
6688 (SvTYPE(TOPs) == SVt_PVAV ? OP_AEACH : OP_EACH)
6689 + (PL_op->op_private & OPpAVHVSWITCH_MASK)
6690 ](aTHX);
6691 }
6692
PP(pp_runcv)6693 PP(pp_runcv)
6694 {
6695 dSP;
6696 CV *cv;
6697 if (PL_op->op_private & OPpOFFBYONE) {
6698 cv = find_runcv_where(FIND_RUNCV_level_eq, 1, NULL);
6699 }
6700 else cv = find_runcv(NULL);
6701 XPUSHs(CvEVAL(cv) ? &PL_sv_undef : sv_2mortal(newRV((SV *)cv)));
6702 RETURN;
6703 }
6704
6705 static void
S_localise_aelem_lval(pTHX_ AV * const av,SV * const keysv,const bool can_preserve)6706 S_localise_aelem_lval(pTHX_ AV * const av, SV * const keysv,
6707 const bool can_preserve)
6708 {
6709 const SSize_t ix = SvIV(keysv);
6710 if (can_preserve ? av_exists(av, ix) : TRUE) {
6711 SV ** const svp = av_fetch(av, ix, 1);
6712 if (!svp || !*svp)
6713 Perl_croak(aTHX_ PL_no_aelem, ix);
6714 save_aelem(av, ix, svp);
6715 }
6716 else
6717 SAVEADELETE(av, ix);
6718 }
6719
6720 static void
S_localise_helem_lval(pTHX_ HV * const hv,SV * const keysv,const bool can_preserve)6721 S_localise_helem_lval(pTHX_ HV * const hv, SV * const keysv,
6722 const bool can_preserve)
6723 {
6724 if (can_preserve ? hv_exists_ent(hv, keysv, 0) : TRUE) {
6725 HE * const he = hv_fetch_ent(hv, keysv, 1, 0);
6726 SV ** const svp = he ? &HeVAL(he) : NULL;
6727 if (!svp || !*svp)
6728 Perl_croak(aTHX_ PL_no_helem_sv, SVfARG(keysv));
6729 save_helem_flags(hv, keysv, svp, 0);
6730 }
6731 else
6732 SAVEHDELETE(hv, keysv);
6733 }
6734
6735 static void
S_localise_gv_slot(pTHX_ GV * gv,U8 type)6736 S_localise_gv_slot(pTHX_ GV *gv, U8 type)
6737 {
6738 if (type == OPpLVREF_SV) {
6739 save_pushptrptr(gv, SvREFCNT_inc_simple(GvSV(gv)), SAVEt_GVSV);
6740 GvSV(gv) = 0;
6741 }
6742 else if (type == OPpLVREF_AV)
6743 /* XXX Inefficient, as it creates a new AV, which we are
6744 about to clobber. */
6745 save_ary(gv);
6746 else {
6747 assert(type == OPpLVREF_HV);
6748 /* XXX Likewise inefficient. */
6749 save_hash(gv);
6750 }
6751 }
6752
6753
PP(pp_refassign)6754 PP(pp_refassign)
6755 {
6756 dSP;
6757 SV * const key = PL_op->op_private & OPpLVREF_ELEM ? POPs : NULL;
6758 SV * const left = PL_op->op_flags & OPf_STACKED ? POPs : NULL;
6759 dTOPss;
6760 const char *bad = NULL;
6761 const U8 type = PL_op->op_private & OPpLVREF_TYPE;
6762 if (!SvROK(sv)) DIE(aTHX_ "Assigned value is not a reference");
6763 switch (type) {
6764 case OPpLVREF_SV:
6765 if (SvTYPE(SvRV(sv)) > SVt_PVLV)
6766 bad = " SCALAR";
6767 break;
6768 case OPpLVREF_AV:
6769 if (SvTYPE(SvRV(sv)) != SVt_PVAV)
6770 bad = "n ARRAY";
6771 break;
6772 case OPpLVREF_HV:
6773 if (SvTYPE(SvRV(sv)) != SVt_PVHV)
6774 bad = " HASH";
6775 break;
6776 case OPpLVREF_CV:
6777 if (SvTYPE(SvRV(sv)) != SVt_PVCV)
6778 bad = " CODE";
6779 }
6780 if (bad)
6781 /* diag_listed_as: Assigned value is not %s reference */
6782 DIE(aTHX_ "Assigned value is not a%s reference", bad);
6783 {
6784 MAGIC *mg;
6785 HV *stash;
6786 switch (left ? SvTYPE(left) : 0) {
6787 case 0:
6788 {
6789 SV * const old = PAD_SV(ARGTARG);
6790 PAD_SETSV(ARGTARG, SvREFCNT_inc_NN(SvRV(sv)));
6791 SvREFCNT_dec(old);
6792 if ((PL_op->op_private & (OPpLVAL_INTRO|OPpPAD_STATE))
6793 == OPpLVAL_INTRO)
6794 SAVECLEARSV(PAD_SVl(ARGTARG));
6795 break;
6796 }
6797 case SVt_PVGV:
6798 if (PL_op->op_private & OPpLVAL_INTRO) {
6799 S_localise_gv_slot(aTHX_ (GV *)left, type);
6800 }
6801 gv_setref(left, sv);
6802 SvSETMAGIC(left);
6803 break;
6804 case SVt_PVAV:
6805 assert(key);
6806 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) {
6807 S_localise_aelem_lval(aTHX_ (AV *)left, key,
6808 SvCANEXISTDELETE(left));
6809 }
6810 av_store((AV *)left, SvIV(key), SvREFCNT_inc_simple_NN(SvRV(sv)));
6811 break;
6812 case SVt_PVHV:
6813 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) {
6814 assert(key);
6815 S_localise_helem_lval(aTHX_ (HV *)left, key,
6816 SvCANEXISTDELETE(left));
6817 }
6818 (void)hv_store_ent((HV *)left, key, SvREFCNT_inc_simple_NN(SvRV(sv)), 0);
6819 }
6820 if (PL_op->op_flags & OPf_MOD)
6821 SETs(sv_2mortal(newSVsv(sv)));
6822 /* XXX else can weak references go stale before they are read, e.g.,
6823 in leavesub? */
6824 RETURN;
6825 }
6826 }
6827
PP(pp_lvref)6828 PP(pp_lvref)
6829 {
6830 dSP;
6831 SV * const ret = sv_2mortal(newSV_type(SVt_PVMG));
6832 SV * const elem = PL_op->op_private & OPpLVREF_ELEM ? POPs : NULL;
6833 SV * const arg = PL_op->op_flags & OPf_STACKED ? POPs : NULL;
6834 MAGIC * const mg = sv_magicext(ret, arg, PERL_MAGIC_lvref,
6835 &PL_vtbl_lvref, (char *)elem,
6836 elem ? HEf_SVKEY : (I32)ARGTARG);
6837 mg->mg_private = PL_op->op_private;
6838 if (PL_op->op_private & OPpLVREF_ITER)
6839 mg->mg_flags |= MGf_PERSIST;
6840 if (UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)) {
6841 if (elem) {
6842 MAGIC *mg;
6843 HV *stash;
6844 assert(arg);
6845 {
6846 const bool can_preserve = SvCANEXISTDELETE(arg);
6847 if (SvTYPE(arg) == SVt_PVAV)
6848 S_localise_aelem_lval(aTHX_ (AV *)arg, elem, can_preserve);
6849 else
6850 S_localise_helem_lval(aTHX_ (HV *)arg, elem, can_preserve);
6851 }
6852 }
6853 else if (arg) {
6854 S_localise_gv_slot(aTHX_ (GV *)arg,
6855 PL_op->op_private & OPpLVREF_TYPE);
6856 }
6857 else if (!(PL_op->op_private & OPpPAD_STATE))
6858 SAVECLEARSV(PAD_SVl(ARGTARG));
6859 }
6860 XPUSHs(ret);
6861 RETURN;
6862 }
6863
PP(pp_lvrefslice)6864 PP(pp_lvrefslice)
6865 {
6866 dSP; dMARK;
6867 AV * const av = (AV *)POPs;
6868 const bool localizing = PL_op->op_private & OPpLVAL_INTRO;
6869 bool can_preserve = FALSE;
6870
6871 if (UNLIKELY(localizing)) {
6872 MAGIC *mg;
6873 HV *stash;
6874 SV **svp;
6875
6876 can_preserve = SvCANEXISTDELETE(av);
6877
6878 if (SvTYPE(av) == SVt_PVAV) {
6879 SSize_t max = -1;
6880
6881 for (svp = MARK + 1; svp <= SP; svp++) {
6882 const SSize_t elem = SvIV(*svp);
6883 if (elem > max)
6884 max = elem;
6885 }
6886 if (max > AvMAX(av))
6887 av_extend(av, max);
6888 }
6889 }
6890
6891 while (++MARK <= SP) {
6892 SV * const elemsv = *MARK;
6893 if (UNLIKELY(localizing)) {
6894 if (SvTYPE(av) == SVt_PVAV)
6895 S_localise_aelem_lval(aTHX_ av, elemsv, can_preserve);
6896 else
6897 S_localise_helem_lval(aTHX_ (HV *)av, elemsv, can_preserve);
6898 }
6899 *MARK = sv_2mortal(newSV_type(SVt_PVMG));
6900 sv_magic(*MARK,(SV *)av,PERL_MAGIC_lvref,(char *)elemsv,HEf_SVKEY);
6901 }
6902 RETURN;
6903 }
6904
PP(pp_lvavref)6905 PP(pp_lvavref)
6906 {
6907 if (PL_op->op_flags & OPf_STACKED)
6908 Perl_pp_rv2av(aTHX);
6909 else
6910 Perl_pp_padav(aTHX);
6911 {
6912 dSP;
6913 dTOPss;
6914 SETs(0); /* special alias marker that aassign recognises */
6915 XPUSHs(sv);
6916 RETURN;
6917 }
6918 }
6919
PP(pp_anonconst)6920 PP(pp_anonconst)
6921 {
6922 dSP;
6923 dTOPss;
6924 SETs(sv_2mortal((SV *)newCONSTSUB(SvTYPE(CopSTASH(PL_curcop))==SVt_PVHV
6925 ? CopSTASH(PL_curcop)
6926 : NULL,
6927 NULL, SvREFCNT_inc_simple_NN(sv))));
6928 RETURN;
6929 }
6930
6931
6932 /* process one subroutine argument - typically when the sub has a signature:
6933 * introduce PL_curpad[op_targ] and assign to it the value
6934 * for $: (OPf_STACKED ? *sp : $_[N])
6935 * for @/%: @_[N..$#_]
6936 *
6937 * It's equivalent to
6938 * my $foo = $_[N];
6939 * or
6940 * my $foo = (value-on-stack)
6941 * or
6942 * my @foo = @_[N..$#_]
6943 * etc
6944 */
6945
PP(pp_argelem)6946 PP(pp_argelem)
6947 {
6948 dTARG;
6949 SV *val;
6950 SV ** padentry;
6951 OP *o = PL_op;
6952 AV *defav = GvAV(PL_defgv); /* @_ */
6953 IV ix = PTR2IV(cUNOP_AUXo->op_aux);
6954 IV argc;
6955
6956 /* do 'my $var, @var or %var' action */
6957 padentry = &(PAD_SVl(o->op_targ));
6958 save_clearsv(padentry);
6959 targ = *padentry;
6960
6961 if ((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_SV) {
6962 if (o->op_flags & OPf_STACKED) {
6963 dSP;
6964 val = POPs;
6965 PUTBACK;
6966 }
6967 else {
6968 SV **svp;
6969 /* should already have been checked */
6970 assert(ix >= 0);
6971 #if IVSIZE > PTRSIZE
6972 assert(ix <= SSize_t_MAX);
6973 #endif
6974
6975 svp = av_fetch(defav, ix, FALSE);
6976 val = svp ? *svp : &PL_sv_undef;
6977 }
6978
6979 /* $var = $val */
6980
6981 /* cargo-culted from pp_sassign */
6982 assert(TAINTING_get || !TAINT_get);
6983 if (UNLIKELY(TAINT_get) && !SvTAINTED(val))
6984 TAINT_NOT;
6985
6986 SvSetMagicSV(targ, val);
6987 return o->op_next;
6988 }
6989
6990 /* must be AV or HV */
6991
6992 assert(!(o->op_flags & OPf_STACKED));
6993 argc = ((IV)AvFILL(defav) + 1) - ix;
6994
6995 /* This is a copy of the relevant parts of pp_aassign().
6996 */
6997 if ((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_AV) {
6998 IV i;
6999
7000 if (AvFILL((AV*)targ) > -1) {
7001 /* target should usually be empty. If we get get
7002 * here, someone's been doing some weird closure tricks.
7003 * Make a copy of all args before clearing the array,
7004 * to avoid the equivalent of @a = ($a[0]) prematurely freeing
7005 * elements. See similar code in pp_aassign.
7006 */
7007 for (i = 0; i < argc; i++) {
7008 SV **svp = av_fetch(defav, ix + i, FALSE);
7009 SV *newsv = newSV(0);
7010 sv_setsv_flags(newsv,
7011 svp ? *svp : &PL_sv_undef,
7012 (SV_DO_COW_SVSETSV|SV_NOSTEAL));
7013 if (!av_store(defav, ix + i, newsv))
7014 SvREFCNT_dec_NN(newsv);
7015 }
7016 av_clear((AV*)targ);
7017 }
7018
7019 if (argc <= 0)
7020 return o->op_next;
7021
7022 av_extend((AV*)targ, argc);
7023
7024 i = 0;
7025 while (argc--) {
7026 SV *tmpsv;
7027 SV **svp = av_fetch(defav, ix + i, FALSE);
7028 SV *val = svp ? *svp : &PL_sv_undef;
7029 tmpsv = newSV(0);
7030 sv_setsv(tmpsv, val);
7031 av_store((AV*)targ, i++, tmpsv);
7032 TAINT_NOT;
7033 }
7034
7035 }
7036 else {
7037 IV i;
7038
7039 assert((o->op_private & OPpARGELEM_MASK) == OPpARGELEM_HV);
7040
7041 if (SvRMAGICAL(targ) || HvUSEDKEYS((HV*)targ)) {
7042 /* see "target should usually be empty" comment above */
7043 for (i = 0; i < argc; i++) {
7044 SV **svp = av_fetch(defav, ix + i, FALSE);
7045 SV *newsv = newSV(0);
7046 sv_setsv_flags(newsv,
7047 svp ? *svp : &PL_sv_undef,
7048 (SV_DO_COW_SVSETSV|SV_NOSTEAL));
7049 if (!av_store(defav, ix + i, newsv))
7050 SvREFCNT_dec_NN(newsv);
7051 }
7052 hv_clear((HV*)targ);
7053 }
7054
7055 if (argc <= 0)
7056 return o->op_next;
7057 assert(argc % 2 == 0);
7058
7059 i = 0;
7060 while (argc) {
7061 SV *tmpsv;
7062 SV **svp;
7063 SV *key;
7064 SV *val;
7065
7066 svp = av_fetch(defav, ix + i++, FALSE);
7067 key = svp ? *svp : &PL_sv_undef;
7068 svp = av_fetch(defav, ix + i++, FALSE);
7069 val = svp ? *svp : &PL_sv_undef;
7070
7071 argc -= 2;
7072 if (UNLIKELY(SvGMAGICAL(key)))
7073 key = sv_mortalcopy(key);
7074 tmpsv = newSV(0);
7075 sv_setsv(tmpsv, val);
7076 hv_store_ent((HV*)targ, key, tmpsv, 0);
7077 TAINT_NOT;
7078 }
7079 }
7080
7081 return o->op_next;
7082 }
7083
7084 /* Handle a default value for one subroutine argument (typically as part
7085 * of a subroutine signature).
7086 * It's equivalent to
7087 * @_ > op_targ ? $_[op_targ] : result_of(op_other)
7088 *
7089 * Intended to be used where op_next is an OP_ARGELEM
7090 *
7091 * We abuse the op_targ field slightly: it's an index into @_ rather than
7092 * into PL_curpad.
7093 */
7094
PP(pp_argdefelem)7095 PP(pp_argdefelem)
7096 {
7097 OP * const o = PL_op;
7098 AV *defav = GvAV(PL_defgv); /* @_ */
7099 IV ix = (IV)o->op_targ;
7100
7101 assert(ix >= 0);
7102 #if IVSIZE > PTRSIZE
7103 assert(ix <= SSize_t_MAX);
7104 #endif
7105
7106 if (AvFILL(defav) >= ix) {
7107 dSP;
7108 SV **svp = av_fetch(defav, ix, FALSE);
7109 SV *val = svp ? *svp : &PL_sv_undef;
7110 XPUSHs(val);
7111 RETURN;
7112 }
7113 return cLOGOPo->op_other;
7114 }
7115
7116
7117 static SV *
S_find_runcv_name(void)7118 S_find_runcv_name(void)
7119 {
7120 dTHX;
7121 CV *cv;
7122 GV *gv;
7123 SV *sv;
7124
7125 cv = find_runcv(0);
7126 if (!cv)
7127 return &PL_sv_no;
7128
7129 gv = CvGV(cv);
7130 if (!gv)
7131 return &PL_sv_no;
7132
7133 sv = sv_2mortal(newSV(0));
7134 gv_fullname4(sv, gv, NULL, TRUE);
7135 return sv;
7136 }
7137
7138 /* Check a sub's arguments - i.e. that it has the correct number of args
7139 * (and anything else we might think of in future). Typically used with
7140 * signatured subs.
7141 */
7142
PP(pp_argcheck)7143 PP(pp_argcheck)
7144 {
7145 OP * const o = PL_op;
7146 struct op_argcheck_aux *aux = (struct op_argcheck_aux *)cUNOP_AUXo->op_aux;
7147 UV params = aux->params;
7148 UV opt_params = aux->opt_params;
7149 char slurpy = aux->slurpy;
7150 AV *defav = GvAV(PL_defgv); /* @_ */
7151 UV argc;
7152 bool too_few;
7153
7154 assert(!SvMAGICAL(defav));
7155 argc = (UV)(AvFILLp(defav) + 1);
7156 too_few = (argc < (params - opt_params));
7157
7158 if (UNLIKELY(too_few || (!slurpy && argc > params)))
7159
7160 /* diag_listed_as: Too few arguments for subroutine '%s' (got %d; expected %d) */
7161 /* diag_listed_as: Too few arguments for subroutine '%s' (got %d; expected at least %d) */
7162 /* diag_listed_as: Too many arguments for subroutine '%s' (got %d; expected %d) */
7163 /* diag_listed_as: Too many arguments for subroutine '%s' (got %d; expected at most %d)*/
7164 Perl_croak_caller("Too %s arguments for subroutine '%" SVf "' (got %" UVuf "; expected %s%" UVuf ")",
7165 too_few ? "few" : "many",
7166 S_find_runcv_name(),
7167 argc,
7168 too_few ? (slurpy || opt_params ? "at least " : "") : (opt_params ? "at most " : ""),
7169 too_few ? (params - opt_params) : params);
7170
7171 if (UNLIKELY(slurpy == '%' && argc > params && (argc - params) % 2))
7172 /* diag_listed_as: Odd name/value argument for subroutine '%s' */
7173 Perl_croak_caller("Odd name/value argument for subroutine '%" SVf "'",
7174 S_find_runcv_name());
7175
7176 return NORMAL;
7177 }
7178
PP(pp_isa)7179 PP(pp_isa)
7180 {
7181 dSP;
7182 SV *left, *right;
7183
7184 right = POPs;
7185 left = TOPs;
7186
7187 SETs(boolSV(sv_isa_sv(left, right)));
7188 RETURN;
7189 }
7190
PP(pp_cmpchain_and)7191 PP(pp_cmpchain_and)
7192 {
7193 dSP;
7194 SV *result = POPs;
7195 PUTBACK;
7196 if (SvTRUE_NN(result)) {
7197 return cLOGOP->op_other;
7198 } else {
7199 TOPs = result;
7200 return NORMAL;
7201 }
7202 }
7203
PP(pp_cmpchain_dup)7204 PP(pp_cmpchain_dup)
7205 {
7206 dSP;
7207 SV *right = TOPs;
7208 SV *left = TOPm1s;
7209 TOPm1s = right;
7210 TOPs = left;
7211 XPUSHs(right);
7212 RETURN;
7213 }
7214
7215 /*
7216 * ex: set ts=8 sts=4 sw=4 et:
7217 */
7218