1 /* peep.c
2 *
3 * Copyright (C) 1991-2022 by Larry Wall and others
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Artistic License, as specified in the README file.
7 *
8 */
9
10 /*
11 * Aragorn sped on up the hill. Every now and again he bent to the ground.
12 * Hobbits go light, and their footprints are not easy even for a Ranger to
13 * read, but not far from the top a spring crossed the path, and in the wet
14 * earth he saw what he was seeking.
15 * 'I read the signs aright,' he said to himself. 'Frodo ran to the hill-top.
16 * I wonder what he saw there? But he returned by the same way, and went down
17 * the hill again.'
18 */
19
20 /* This file contains functions for optimizing and finalizing the OP
21 * structures that hold a compiled perl program
22 */
23
24 #include "EXTERN.h"
25 #define PERL_IN_PEEP_C
26 #include "perl.h"
27
28
29 #define CALL_RPEEP(o) PL_rpeepp(aTHX_ o)
30
31
32 static void
S_scalar_slice_warning(pTHX_ const OP * o)33 S_scalar_slice_warning(pTHX_ const OP *o)
34 {
35 OP *kid;
36 const bool is_hash = o->op_type == OP_HSLICE
37 || (o->op_type == OP_NULL && o->op_targ == OP_HSLICE);
38 SV *name;
39
40 if (!(o->op_private & OPpSLICEWARNING))
41 return;
42 if (PL_parser && PL_parser->error_count)
43 /* This warning can be nonsensical when there is a syntax error. */
44 return;
45
46 kid = cLISTOPo->op_first;
47 kid = OpSIBLING(kid); /* get past pushmark */
48 /* weed out false positives: any ops that can return lists */
49 switch (kid->op_type) {
50 case OP_BACKTICK:
51 case OP_GLOB:
52 case OP_READLINE:
53 case OP_MATCH:
54 case OP_RV2AV:
55 case OP_EACH:
56 case OP_VALUES:
57 case OP_KEYS:
58 case OP_SPLIT:
59 case OP_LIST:
60 case OP_SORT:
61 case OP_REVERSE:
62 case OP_ENTERSUB:
63 case OP_CALLER:
64 case OP_LSTAT:
65 case OP_STAT:
66 case OP_READDIR:
67 case OP_SYSTEM:
68 case OP_TMS:
69 case OP_LOCALTIME:
70 case OP_GMTIME:
71 case OP_ENTEREVAL:
72 return;
73 }
74
75 /* Don't warn if we have a nulled list either. */
76 if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST)
77 return;
78
79 assert(OpSIBLING(kid));
80 name = op_varname(OpSIBLING(kid));
81 if (!name) /* XS module fiddling with the op tree */
82 return;
83 warn_elem_scalar_context(kid, name, is_hash, true);
84 }
85
86
87 /* info returned by S_sprintf_is_multiconcatable() */
88
89 struct sprintf_ismc_info {
90 SSize_t nargs; /* num of args to sprintf (not including the format) */
91 char *start; /* start of raw format string */
92 char *end; /* bytes after end of raw format string */
93 STRLEN total_len; /* total length (in bytes) of format string, not
94 including '%s' and half of '%%' */
95 STRLEN variant; /* number of bytes by which total_len_p would grow
96 if upgraded to utf8 */
97 bool utf8; /* whether the format is utf8 */
98 };
99
100 /* is the OP_SPRINTF o suitable for converting into a multiconcat op?
101 * i.e. its format argument is a const string with only '%s' and '%%'
102 * formats, and the number of args is known, e.g.
103 * sprintf "a=%s f=%s", $a[0], scalar(f());
104 * but not
105 * sprintf "i=%d a=%s f=%s", $i, @a, f();
106 *
107 * If successful, the sprintf_ismc_info struct pointed to by info will be
108 * populated.
109 */
110
111 STATIC bool
S_sprintf_is_multiconcatable(pTHX_ OP * o,struct sprintf_ismc_info * info)112 S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info)
113 {
114 OP *pm, *constop, *kid;
115 SV *sv;
116 char *s, *e, *p;
117 SSize_t nargs, nformats;
118 STRLEN cur, total_len, variant;
119 bool utf8;
120
121 /* if sprintf's behaviour changes, die here so that someone
122 * can decide whether to enhance this function or skip optimising
123 * under those new circumstances */
124 assert(!(o->op_flags & OPf_STACKED));
125 assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX));
126 assert(!(o->op_private & ~OPpARG4_MASK));
127
128 pm = cUNOPo->op_first;
129 if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */
130 return FALSE;
131 constop = OpSIBLING(pm);
132 if (!constop || constop->op_type != OP_CONST)
133 return FALSE;
134 sv = cSVOPx_sv(constop);
135 if (SvMAGICAL(sv) || !SvPOK(sv))
136 return FALSE;
137
138 s = SvPV(sv, cur);
139 e = s + cur;
140
141 /* Scan format for %% and %s and work out how many %s there are.
142 * Abandon if other format types are found.
143 */
144
145 nformats = 0;
146 total_len = 0;
147 variant = 0;
148
149 for (p = s; p < e; p++) {
150 if (*p != '%') {
151 total_len++;
152 if (!UTF8_IS_INVARIANT(*p))
153 variant++;
154 continue;
155 }
156 p++;
157 if (p >= e)
158 return FALSE; /* lone % at end gives "Invalid conversion" */
159 if (*p == '%')
160 total_len++;
161 else if (*p == 's')
162 nformats++;
163 else
164 return FALSE;
165 }
166
167 if (!nformats || nformats > PERL_MULTICONCAT_MAXARG)
168 return FALSE;
169
170 utf8 = cBOOL(SvUTF8(sv));
171 if (utf8)
172 variant = 0;
173
174 /* scan args; they must all be in scalar cxt */
175
176 nargs = 0;
177 kid = OpSIBLING(constop);
178
179 while (kid) {
180 if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR)
181 return FALSE;
182 nargs++;
183 kid = OpSIBLING(kid);
184 }
185
186 if (nargs != nformats)
187 return FALSE; /* e.g. sprintf("%s%s", $a); */
188
189
190 info->nargs = nargs;
191 info->start = s;
192 info->end = e;
193 info->total_len = total_len;
194 info->variant = variant;
195 info->utf8 = utf8;
196
197 return TRUE;
198 }
199
200 /* S_maybe_multiconcat():
201 *
202 * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly
203 * convert it (and its children) into an OP_MULTICONCAT. See the code
204 * comments just before pp_multiconcat() for the full details of what
205 * OP_MULTICONCAT supports.
206 *
207 * Basically we're looking for an optree with a chain of OP_CONCATS down
208 * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or
209 * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g.
210 *
211 * $x = "$a$b-$c"
212 *
213 * looks like
214 *
215 * SASSIGN
216 * |
217 * STRINGIFY -- PADSV[$x]
218 * |
219 * |
220 * ex-PUSHMARK -- CONCAT/S
221 * |
222 * CONCAT/S -- PADSV[$d]
223 * |
224 * CONCAT -- CONST["-"]
225 * |
226 * PADSV[$a] -- PADSV[$b]
227 *
228 * Note that at this stage the OP_SASSIGN may have already been optimised
229 * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT.
230 */
231
232 STATIC void
S_maybe_multiconcat(pTHX_ OP * o)233 S_maybe_multiconcat(pTHX_ OP *o)
234 {
235 OP *lastkidop; /* the right-most of any kids unshifted onto o */
236 OP *topop; /* the top-most op in the concat tree (often equals o,
237 unless there are assign/stringify ops above it */
238 OP *parentop; /* the parent op of topop (or itself if no parent) */
239 OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */
240 OP *targetop; /* the op corresponding to target=... or target.=... */
241 OP *stringop; /* the OP_STRINGIFY op, if any */
242 OP *nextop; /* used for recreating the op_next chain without consts */
243 OP *kid; /* general-purpose op pointer */
244 UNOP_AUX_item *aux;
245 UNOP_AUX_item *lenp;
246 char *const_str, *p;
247 struct sprintf_ismc_info sprintf_info;
248
249 /* store info about each arg in args[];
250 * toparg is the highest used slot; argp is a general
251 * pointer to args[] slots */
252 struct {
253 void *p; /* initially points to const sv (or null for op);
254 later, set to SvPV(constsv), with ... */
255 STRLEN len; /* ... len set to SvPV(..., len) */
256 } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1];
257
258 SSize_t nargs = 0;
259 SSize_t nconst = 0;
260 SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */
261 STRLEN variant;
262 bool utf8 = FALSE;
263 bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
264 the last-processed arg will the LHS of one,
265 as args are processed in reverse order */
266 U8 stacked_last = 0; /* whether the last seen concat op was STACKED */
267 STRLEN total_len = 0; /* sum of the lengths of the const segments */
268 U8 flags = 0; /* what will become the op_flags and ... */
269 U8 private_flags = 0; /* ... op_private of the multiconcat op */
270 bool is_sprintf = FALSE; /* we're optimising an sprintf */
271 bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */
272 bool prev_was_const = FALSE; /* previous arg was a const */
273
274 /* -----------------------------------------------------------------
275 * Phase 1:
276 *
277 * Examine the optree non-destructively to determine whether it's
278 * suitable to be converted into an OP_MULTICONCAT. Accumulate
279 * information about the optree in args[].
280 */
281
282 argp = args;
283 targmyop = NULL;
284 targetop = NULL;
285 stringop = NULL;
286 topop = o;
287 parentop = o;
288
289 assert( o->op_type == OP_SASSIGN
290 || o->op_type == OP_CONCAT
291 || o->op_type == OP_SPRINTF
292 || o->op_type == OP_STRINGIFY);
293
294 Zero(&sprintf_info, 1, struct sprintf_ismc_info);
295
296 /* first see if, at the top of the tree, there is an assign,
297 * append and/or stringify */
298
299 if (topop->op_type == OP_SASSIGN) {
300 /* expr = ..... */
301 if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN])
302 return;
303 if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
304 return;
305 assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */
306
307 parentop = topop;
308 topop = cBINOPo->op_first;
309 targetop = OpSIBLING(topop);
310 if (!targetop) /* probably some sort of syntax error */
311 return;
312
313 /* don't optimise away assign in 'local $foo = ....' */
314 if ( (targetop->op_private & OPpLVAL_INTRO)
315 /* these are the common ops which do 'local', but
316 * not all */
317 && ( targetop->op_type == OP_GVSV
318 || targetop->op_type == OP_RV2SV
319 || targetop->op_type == OP_AELEM
320 || targetop->op_type == OP_HELEM
321 )
322 )
323 return;
324 }
325 else if ( topop->op_type == OP_CONCAT
326 && (topop->op_flags & OPf_STACKED)
327 && (!(topop->op_private & OPpCONCAT_NESTED))
328 )
329 {
330 /* expr .= ..... */
331
332 /* OPpTARGET_MY shouldn't be able to be set here. If it is,
333 * decide what to do about it */
334 assert(!(o->op_private & OPpTARGET_MY));
335
336 /* barf on unknown flags */
337 assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY)));
338 private_flags |= OPpMULTICONCAT_APPEND;
339 targetop = cBINOPo->op_first;
340 parentop = topop;
341 topop = OpSIBLING(targetop);
342
343 /* $x .= <FOO> gets optimised to rcatline instead */
344 if (topop->op_type == OP_READLINE)
345 return;
346 }
347
348 if (targetop) {
349 /* Can targetop (the LHS) if it's a padsv, be optimised
350 * away and use OPpTARGET_MY instead?
351 */
352 if ( (targetop->op_type == OP_PADSV)
353 && !(targetop->op_private & OPpDEREF)
354 && !(targetop->op_private & OPpPAD_STATE)
355 /* we don't support 'my $x .= ...' */
356 && ( o->op_type == OP_SASSIGN
357 || !(targetop->op_private & OPpLVAL_INTRO))
358 )
359 is_targable = TRUE;
360 }
361
362 if (topop->op_type == OP_STRINGIFY) {
363 if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY])
364 return;
365 stringop = topop;
366
367 /* barf on unknown flags */
368 assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY)));
369
370 if ((topop->op_private & OPpTARGET_MY)) {
371 if (o->op_type == OP_SASSIGN)
372 return; /* can't have two assigns */
373 targmyop = topop;
374 }
375
376 private_flags |= OPpMULTICONCAT_STRINGIFY;
377 parentop = topop;
378 topop = cBINOPx(topop)->op_first;
379 assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK));
380 topop = OpSIBLING(topop);
381 }
382
383 if (topop->op_type == OP_SPRINTF) {
384 if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF])
385 return;
386 if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) {
387 nargs = sprintf_info.nargs;
388 total_len = sprintf_info.total_len;
389 variant = sprintf_info.variant;
390 utf8 = sprintf_info.utf8;
391 is_sprintf = TRUE;
392 private_flags |= OPpMULTICONCAT_FAKE;
393 toparg = argp;
394 /* we have an sprintf op rather than a concat optree.
395 * Skip most of the code below which is associated with
396 * processing that optree. We also skip phase 2, determining
397 * whether its cost effective to optimise, since for sprintf,
398 * multiconcat is *always* faster */
399 goto create_aux;
400 }
401 /* note that even if the sprintf itself isn't multiconcatable,
402 * the expression as a whole may be, e.g. in
403 * $x .= sprintf("%d",...)
404 * the sprintf op will be left as-is, but the concat/S op may
405 * be upgraded to multiconcat
406 */
407 }
408 else if (topop->op_type == OP_CONCAT) {
409 if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT])
410 return;
411
412 if ((topop->op_private & OPpTARGET_MY)) {
413 if (o->op_type == OP_SASSIGN || targmyop)
414 return; /* can't have two assigns */
415 targmyop = topop;
416 }
417 }
418
419 /* Is it safe to convert a sassign/stringify/concat op into
420 * a multiconcat? */
421 assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP);
422 assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP);
423 assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP);
424 assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP);
425 STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last)
426 == STRUCT_OFFSET(UNOP_AUX, op_aux));
427 STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last)
428 == STRUCT_OFFSET(UNOP_AUX, op_aux));
429
430 /* Now scan the down the tree looking for a series of
431 * CONCAT/OPf_STACKED ops on the LHS (with the last one not
432 * stacked). For example this tree:
433 *
434 * |
435 * CONCAT/STACKED
436 * |
437 * CONCAT/STACKED -- EXPR5
438 * |
439 * CONCAT/STACKED -- EXPR4
440 * |
441 * CONCAT -- EXPR3
442 * |
443 * EXPR1 -- EXPR2
444 *
445 * corresponds to an expression like
446 *
447 * (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5)
448 *
449 * Record info about each EXPR in args[]: in particular, whether it is
450 * a stringifiable OP_CONST and if so what the const sv is.
451 *
452 * The reason why the last concat can't be STACKED is the difference
453 * between
454 *
455 * ((($a .= $a) .= $a) .= $a) .= $a
456 *
457 * and
458 * $a . $a . $a . $a . $a
459 *
460 * The main difference between the optrees for those two constructs
461 * is the presence of the last STACKED. As well as modifying $a,
462 * the former sees the changed $a between each concat, so if $s is
463 * initially 'a', the first returns 'a' x 16, while the latter returns
464 * 'a' x 5. And pp_multiconcat can't handle that kind of thing.
465 */
466
467 kid = topop;
468
469 for (;;) {
470 OP *argop;
471 SV *sv;
472 bool last = FALSE;
473
474 if ( kid->op_type == OP_CONCAT
475 && !kid_is_last
476 ) {
477 OP *k1, *k2;
478 k1 = cUNOPx(kid)->op_first;
479 k2 = OpSIBLING(k1);
480 /* shouldn't happen except maybe after compile err? */
481 if (!k2)
482 return;
483
484 /* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */
485 if (kid->op_private & OPpTARGET_MY)
486 kid_is_last = TRUE;
487
488 stacked_last = (kid->op_flags & OPf_STACKED);
489 if (!stacked_last)
490 kid_is_last = TRUE;
491
492 kid = k1;
493 argop = k2;
494 }
495 else {
496 argop = kid;
497 last = TRUE;
498 }
499
500 if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2
501 || (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
502 {
503 /* At least two spare slots are needed to decompose both
504 * concat args. If there are no slots left, continue to
505 * examine the rest of the optree, but don't push new values
506 * on args[]. If the optree as a whole is legal for conversion
507 * (in particular that the last concat isn't STACKED), then
508 * the first PERL_MULTICONCAT_MAXARG elements of the optree
509 * can be converted into an OP_MULTICONCAT now, with the first
510 * child of that op being the remainder of the optree -
511 * which may itself later be converted to a multiconcat op
512 * too.
513 */
514 if (last) {
515 /* the last arg is the rest of the optree */
516 argp++->p = NULL;
517 nargs++;
518 }
519 }
520 else if ( argop->op_type == OP_CONST
521 && ((sv = cSVOPx_sv(argop)))
522 /* defer stringification until runtime of 'constant'
523 * things that might stringify variantly, e.g. the radix
524 * point of NVs, or overloaded RVs */
525 && (SvPOK(sv) || SvIOK(sv))
526 && (!SvGMAGICAL(sv))
527 ) {
528 if (argop->op_private & OPpCONST_STRICT)
529 no_bareword_allowed(argop);
530 argp++->p = sv;
531 utf8 |= cBOOL(SvUTF8(sv));
532 nconst++;
533 if (prev_was_const)
534 /* this const may be demoted back to a plain arg later;
535 * make sure we have enough arg slots left */
536 nadjconst++;
537 prev_was_const = !prev_was_const;
538 }
539 else {
540 argp++->p = NULL;
541 nargs++;
542 prev_was_const = FALSE;
543 }
544
545 if (last)
546 break;
547 }
548
549 toparg = argp - 1;
550
551 if (stacked_last)
552 return; /* we don't support ((A.=B).=C)...) */
553
554 /* look for two adjacent consts and don't fold them together:
555 * $o . "a" . "b"
556 * should do
557 * $o->concat("a")->concat("b")
558 * rather than
559 * $o->concat("ab")
560 * (but $o .= "a" . "b" should still fold)
561 */
562 {
563 bool seen_nonconst = FALSE;
564 for (argp = toparg; argp >= args; argp--) {
565 if (argp->p == NULL) {
566 seen_nonconst = TRUE;
567 continue;
568 }
569 if (!seen_nonconst)
570 continue;
571 if (argp[1].p) {
572 /* both previous and current arg were constants;
573 * leave the current OP_CONST as-is */
574 argp->p = NULL;
575 nconst--;
576 nargs++;
577 }
578 }
579 }
580
581 /* -----------------------------------------------------------------
582 * Phase 2:
583 *
584 * At this point we have determined that the optree *can* be converted
585 * into a multiconcat. Having gathered all the evidence, we now decide
586 * whether it *should*.
587 */
588
589
590 /* we need at least one concat action, e.g.:
591 *
592 * Y . Z
593 * X = Y . Z
594 * X .= Y
595 *
596 * otherwise we could be doing something like $x = "foo", which
597 * if treated as a concat, would fail to COW.
598 */
599 if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
600 return;
601
602 /* Benchmarking seems to indicate that we gain if:
603 * * we optimise at least two actions into a single multiconcat
604 * (e.g concat+concat, sassign+concat);
605 * * or if we can eliminate at least 1 OP_CONST;
606 * * or if we can eliminate a padsv via OPpTARGET_MY
607 */
608
609 if (
610 /* eliminated at least one OP_CONST */
611 nconst >= 1
612 /* eliminated an OP_SASSIGN */
613 || o->op_type == OP_SASSIGN
614 /* eliminated an OP_PADSV */
615 || (!targmyop && is_targable)
616 )
617 /* definitely a net gain to optimise */
618 goto optimise;
619
620 /* ... if not, what else? */
621
622 /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1):
623 * multiconcat is faster (due to not creating a temporary copy of
624 * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is
625 * faster.
626 */
627 if ( nconst == 0
628 && nargs == 2
629 && targmyop
630 && topop->op_type == OP_CONCAT
631 ) {
632 PADOFFSET t = targmyop->op_targ;
633 OP *k1 = cBINOPx(topop)->op_first;
634 OP *k2 = cBINOPx(topop)->op_last;
635 if ( k2->op_type == OP_PADSV
636 && k2->op_targ == t
637 && ( k1->op_type != OP_PADSV
638 || k1->op_targ != t)
639 )
640 goto optimise;
641 }
642
643 /* need at least two concats */
644 if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3)
645 return;
646
647
648
649 /* -----------------------------------------------------------------
650 * Phase 3:
651 *
652 * At this point the optree has been verified as ok to be optimised
653 * into an OP_MULTICONCAT. Now start changing things.
654 */
655
656 optimise:
657
658 /* stringify all const args and determine utf8ness */
659
660 variant = 0;
661 for (argp = args; argp <= toparg; argp++) {
662 SV *sv = (SV*)argp->p;
663 if (!sv)
664 continue; /* not a const op */
665 if (utf8 && !SvUTF8(sv))
666 sv_utf8_upgrade_nomg(sv);
667 argp->p = SvPV_nomg(sv, argp->len);
668 total_len += argp->len;
669
670 /* see if any strings would grow if converted to utf8 */
671 if (!utf8) {
672 variant += variant_under_utf8_count((U8 *) argp->p,
673 (U8 *) argp->p + argp->len);
674 }
675 }
676
677 /* create and populate aux struct */
678
679 create_aux:
680
681 aux = (UNOP_AUX_item*)PerlMemShared_malloc(
682 sizeof(UNOP_AUX_item)
683 * (
684 PERL_MULTICONCAT_HEADER_SIZE
685 + ((nargs + 1) * (variant ? 2 : 1))
686 )
687 );
688 const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1);
689
690 /* Extract all the non-const expressions from the concat tree then
691 * dispose of the old tree, e.g. convert the tree from this:
692 *
693 * o => SASSIGN
694 * |
695 * STRINGIFY -- TARGET
696 * |
697 * ex-PUSHMARK -- CONCAT
698 * |
699 * CONCAT -- EXPR5
700 * |
701 * CONCAT -- EXPR4
702 * |
703 * CONCAT -- EXPR3
704 * |
705 * EXPR1 -- EXPR2
706 *
707 *
708 * to:
709 *
710 * o => MULTICONCAT
711 * |
712 * ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET
713 *
714 * except that if EXPRi is an OP_CONST, it's discarded.
715 *
716 * During the conversion process, EXPR ops are stripped from the tree
717 * and unshifted onto o. Finally, any of o's remaining original
718 * children are discarded and o is converted into an OP_MULTICONCAT.
719 *
720 * In this middle of this, o may contain both: unshifted args on the
721 * left, and some remaining original args on the right. lastkidop
722 * is set to point to the right-most unshifted arg to delineate
723 * between the two sets.
724 */
725
726
727 if (is_sprintf) {
728 /* create a copy of the format with the %'s removed, and record
729 * the sizes of the const string segments in the aux struct */
730 char *q, *oldq;
731 lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
732
733 p = sprintf_info.start;
734 q = const_str;
735 oldq = q;
736 for (; p < sprintf_info.end; p++) {
737 if (*p == '%') {
738 p++;
739 if (*p != '%') {
740 (lenp++)->ssize = q - oldq;
741 oldq = q;
742 continue;
743 }
744 }
745 *q++ = *p;
746 }
747 lenp->ssize = q - oldq;
748 assert((STRLEN)(q - const_str) == total_len);
749
750 /* Attach all the args (i.e. the kids of the sprintf) to o (which
751 * may or may not be topop) The pushmark and const ops need to be
752 * kept in case they're an op_next entry point.
753 */
754 lastkidop = cLISTOPx(topop)->op_last;
755 kid = cUNOPx(topop)->op_first; /* pushmark */
756 op_null(kid);
757 op_null(OpSIBLING(kid)); /* const */
758 if (o != topop) {
759 kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */
760 op_sibling_splice(o, NULL, 0, kid); /* and attach to o */
761 lastkidop->op_next = o;
762 }
763 }
764 else {
765 p = const_str;
766 lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
767
768 lenp->ssize = -1;
769
770 /* Concatenate all const strings into const_str.
771 * Note that args[] contains the RHS args in reverse order, so
772 * we scan args[] from top to bottom to get constant strings
773 * in L-R order
774 */
775 for (argp = toparg; argp >= args; argp--) {
776 if (!argp->p)
777 /* not a const op */
778 (++lenp)->ssize = -1;
779 else {
780 STRLEN l = argp->len;
781 Copy(argp->p, p, l, char);
782 p += l;
783 if (lenp->ssize == -1)
784 lenp->ssize = l;
785 else
786 lenp->ssize += l;
787 }
788 }
789
790 kid = topop;
791 nextop = o;
792 lastkidop = NULL;
793
794 for (argp = args; argp <= toparg; argp++) {
795 /* only keep non-const args, except keep the first-in-next-chain
796 * arg no matter what it is (but nulled if OP_CONST), because it
797 * may be the entry point to this subtree from the previous
798 * op_next.
799 */
800 bool last = (argp == toparg);
801 OP *prev;
802
803 /* set prev to the sibling *before* the arg to be cut out,
804 * e.g. when cutting EXPR:
805 *
806 * |
807 * kid= CONCAT
808 * |
809 * prev= CONCAT -- EXPR
810 * |
811 */
812 if (argp == args && kid->op_type != OP_CONCAT) {
813 /* in e.g. '$x .= f(1)' there's no RHS concat tree
814 * so the expression to be cut isn't kid->op_last but
815 * kid itself */
816 OP *o1, *o2;
817 /* find the op before kid */
818 o1 = NULL;
819 o2 = cUNOPx(parentop)->op_first;
820 while (o2 && o2 != kid) {
821 o1 = o2;
822 o2 = OpSIBLING(o2);
823 }
824 assert(o2 == kid);
825 prev = o1;
826 kid = parentop;
827 }
828 else if (kid == o && lastkidop)
829 prev = last ? lastkidop : OpSIBLING(lastkidop);
830 else
831 prev = last ? NULL : cUNOPx(kid)->op_first;
832
833 if (!argp->p || last) {
834 /* cut RH op */
835 OP *aop = op_sibling_splice(kid, prev, 1, NULL);
836 /* and unshift to front of o */
837 op_sibling_splice(o, NULL, 0, aop);
838 /* record the right-most op added to o: later we will
839 * free anything to the right of it */
840 if (!lastkidop)
841 lastkidop = aop;
842 aop->op_next = nextop;
843 if (last) {
844 if (argp->p)
845 /* null the const at start of op_next chain */
846 op_null(aop);
847 }
848 else if (prev)
849 nextop = prev->op_next;
850 }
851
852 /* the last two arguments are both attached to the same concat op */
853 if (argp < toparg - 1)
854 kid = prev;
855 }
856 }
857
858 /* Populate the aux struct */
859
860 aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs;
861 aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str;
862 aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len;
863 aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str;
864 aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len;
865
866 /* if variant > 0, calculate a variant const string and lengths where
867 * the utf8 version of the string will take 'variant' more bytes than
868 * the plain one. */
869
870 if (variant) {
871 char *p = const_str;
872 STRLEN ulen = total_len + variant;
873 UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
874 UNOP_AUX_item *ulens = lens + (nargs + 1);
875 char *up = (char*)PerlMemShared_malloc(ulen);
876 SSize_t n;
877
878 aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up;
879 aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen;
880
881 for (n = 0; n < (nargs + 1); n++) {
882 SSize_t i;
883 char * orig_up = up;
884 for (i = (lens++)->ssize; i > 0; i--) {
885 U8 c = *p++;
886 append_utf8_from_native_byte(c, (U8**)&up);
887 }
888 (ulens++)->ssize = (i < 0) ? i : up - orig_up;
889 }
890 }
891
892 if (stringop) {
893 /* if there was a top(ish)-level OP_STRINGIFY, we need to keep
894 * that op's first child - an ex-PUSHMARK - because the op_next of
895 * the previous op may point to it (i.e. it's the entry point for
896 * the o optree)
897 */
898 OP *pmop =
899 (stringop == o)
900 ? op_sibling_splice(o, lastkidop, 1, NULL)
901 : op_sibling_splice(stringop, NULL, 1, NULL);
902 assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK));
903 op_sibling_splice(o, NULL, 0, pmop);
904 if (!lastkidop)
905 lastkidop = pmop;
906 }
907
908 /* Optimise
909 * target = A.B.C...
910 * target .= A.B.C...
911 */
912
913 if (targetop) {
914 assert(!targmyop);
915
916 if (o->op_type == OP_SASSIGN) {
917 /* Move the target subtree from being the last of o's children
918 * to being the last of o's preserved children.
919 * Note the difference between 'target = ...' and 'target .= ...':
920 * for the former, target is executed last; for the latter,
921 * first.
922 */
923 kid = OpSIBLING(lastkidop);
924 op_sibling_splice(o, kid, 1, NULL); /* cut target op */
925 op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */
926 lastkidop->op_next = kid->op_next;
927 lastkidop = targetop;
928 }
929 else {
930 /* Move the target subtree from being the first of o's
931 * original children to being the first of *all* o's children.
932 */
933 if (lastkidop) {
934 op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */
935 op_sibling_splice(o, NULL, 0, targetop); /* and paste*/
936 }
937 else {
938 /* if the RHS of .= doesn't contain a concat (e.g.
939 * $x .= "foo"), it gets missed by the "strip ops from the
940 * tree and add to o" loop earlier */
941 assert(topop->op_type != OP_CONCAT);
942 if (stringop) {
943 /* in e.g. $x .= "$y", move the $y expression
944 * from being a child of OP_STRINGIFY to being the
945 * second child of the OP_CONCAT
946 */
947 assert(cUNOPx(stringop)->op_first == topop);
948 op_sibling_splice(stringop, NULL, 1, NULL);
949 op_sibling_splice(o, cUNOPo->op_first, 0, topop);
950 }
951 assert(topop == OpSIBLING(cBINOPo->op_first));
952 if (toparg->p)
953 op_null(topop);
954 lastkidop = topop;
955 }
956 }
957
958 if (is_targable) {
959 /* optimise
960 * my $lex = A.B.C...
961 * $lex = A.B.C...
962 * $lex .= A.B.C...
963 * The original padsv op is kept but nulled in case it's the
964 * entry point for the optree (which it will be for
965 * '$lex .= ... '
966 */
967 private_flags |= OPpTARGET_MY;
968 private_flags |= (targetop->op_private & OPpLVAL_INTRO);
969 o->op_targ = targetop->op_targ;
970 targetop->op_targ = 0;
971 op_null(targetop);
972 }
973 else
974 flags |= OPf_STACKED;
975 }
976 else if (targmyop) {
977 private_flags |= OPpTARGET_MY;
978 if (o != targmyop) {
979 o->op_targ = targmyop->op_targ;
980 targmyop->op_targ = 0;
981 }
982 }
983
984 /* detach the emaciated husk of the sprintf/concat optree and free it */
985 for (;;) {
986 kid = op_sibling_splice(o, lastkidop, 1, NULL);
987 if (!kid)
988 break;
989 op_free(kid);
990 }
991
992 /* and convert o into a multiconcat */
993
994 o->op_flags = (flags|OPf_KIDS|stacked_last
995 |(o->op_flags & (OPf_WANT|OPf_PARENS)));
996 o->op_private = private_flags;
997 o->op_type = OP_MULTICONCAT;
998 o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT];
999 cUNOP_AUXo->op_aux = aux;
1000
1001
1002 /* add some PADTMPs, as needed, for the 'fallback to OP_CONCAT
1003 * behaviour if magic / overloaded etc present' code path */
1004
1005 /* general PADTMP for the target of each concat */
1006 aux[PERL_MULTICONCAT_IX_PADTMP0].pad_offset =
1007 pad_alloc(OP_MULTICONCAT, SVs_PADTMP);
1008
1009 /* PADTMP for recreating OP_CONST return values */
1010 aux[PERL_MULTICONCAT_IX_PADTMP1].pad_offset =
1011 (is_sprintf || nconst) ? pad_alloc(OP_MULTICONCAT, SVs_PADTMP) : 0;
1012
1013 /* PADTMP for stringifying the result */
1014 aux[PERL_MULTICONCAT_IX_PADTMP2].pad_offset =
1015 (o->op_private &OPpMULTICONCAT_STRINGIFY)
1016 ? pad_alloc(OP_MULTICONCAT, SVs_PADTMP) : 0;
1017 }
1018
1019
1020 /*
1021 =for apidoc_section $optree_manipulation
1022
1023 =for apidoc optimize_optree
1024
1025 This function applies some optimisations to the optree in top-down order.
1026 It is called before the peephole optimizer, which processes ops in
1027 execution order. Note that finalize_optree() also does a top-down scan,
1028 but is called *after* the peephole optimizer.
1029
1030 =cut
1031 */
1032
1033 void
Perl_optimize_optree(pTHX_ OP * o)1034 Perl_optimize_optree(pTHX_ OP* o)
1035 {
1036 PERL_ARGS_ASSERT_OPTIMIZE_OPTREE;
1037
1038 ENTER;
1039 SAVEVPTR(PL_curcop);
1040
1041 optimize_op(o);
1042
1043 LEAVE;
1044 }
1045
1046
1047 #define warn_implicit_snail_cvsig(o) S_warn_implicit_snail_cvsig(aTHX_ o)
1048 static void
S_warn_implicit_snail_cvsig(pTHX_ OP * o)1049 S_warn_implicit_snail_cvsig(pTHX_ OP *o)
1050 {
1051 CV *cv = PL_compcv;
1052 while(cv && CvEVAL(cv))
1053 cv = CvOUTSIDE(cv);
1054
1055 if(cv && CvSIGNATURE(cv))
1056 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1057 "Implicit use of @_ in %s with signatured subroutine is experimental", OP_DESC(o));
1058 }
1059
1060
1061 #define OP_ZOOM(o) (OP_TYPE_IS(o, OP_NULL) ? cUNOPx(o)->op_first : (o))
1062
1063 /* helper for optimize_optree() which optimises one op then recurses
1064 * to optimise any children.
1065 */
1066
1067 STATIC void
S_optimize_op(pTHX_ OP * o)1068 S_optimize_op(pTHX_ OP* o)
1069 {
1070 OP *top_op = o;
1071
1072 PERL_ARGS_ASSERT_OPTIMIZE_OP;
1073
1074 while (1) {
1075 OP * next_kid = NULL;
1076
1077 assert(o->op_type != OP_FREED);
1078
1079 switch (o->op_type) {
1080 case OP_NEXTSTATE:
1081 case OP_DBSTATE:
1082 PL_curcop = ((COP*)o); /* for warnings */
1083 break;
1084
1085
1086 case OP_CONCAT:
1087 case OP_SASSIGN:
1088 case OP_STRINGIFY:
1089 case OP_SPRINTF:
1090 S_maybe_multiconcat(aTHX_ o);
1091 break;
1092
1093 case OP_SUBST:
1094 if (cPMOPo->op_pmreplrootu.op_pmreplroot) {
1095 /* we can't assume that op_pmreplroot->op_sibparent == o
1096 * and that it is thus possible to walk back up the tree
1097 * past op_pmreplroot. So, although we try to avoid
1098 * recursing through op trees, do it here. After all,
1099 * there are unlikely to be many nested s///e's within
1100 * the replacement part of a s///e.
1101 */
1102 optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1103 }
1104 break;
1105
1106 case OP_RV2AV:
1107 {
1108 OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1109 CV *cv = PL_compcv;
1110 while(cv && CvEVAL(cv))
1111 cv = CvOUTSIDE(cv);
1112
1113 if(cv && CvSIGNATURE(cv) &&
1114 OP_TYPE_IS(first, OP_GV) && cGVOPx_gv(first) == PL_defgv) {
1115 OP *parent = op_parent(o);
1116 while(OP_TYPE_IS(parent, OP_NULL))
1117 parent = op_parent(parent);
1118
1119 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1120 "Use of @_ in %s with signatured subroutine is experimental", OP_DESC(parent));
1121 }
1122 break;
1123 }
1124
1125 case OP_SHIFT:
1126 case OP_POP:
1127 if(!CvUNIQUE(PL_compcv) && !(o->op_flags & OPf_KIDS))
1128 warn_implicit_snail_cvsig(o);
1129 break;
1130
1131 case OP_ENTERSUB:
1132 if(!(o->op_flags & OPf_STACKED))
1133 warn_implicit_snail_cvsig(o);
1134 break;
1135
1136 case OP_GOTO:
1137 {
1138 OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1139 OP *ffirst;
1140 if(OP_TYPE_IS(first, OP_SREFGEN) &&
1141 (ffirst = OP_ZOOM(cUNOPx(first)->op_first)) &&
1142 OP_TYPE_IS(ffirst, OP_RV2CV))
1143 warn_implicit_snail_cvsig(o);
1144 break;
1145 }
1146
1147 default:
1148 break;
1149 }
1150
1151 if (o->op_flags & OPf_KIDS)
1152 next_kid = cUNOPo->op_first;
1153
1154 /* if a kid hasn't been nominated to process, continue with the
1155 * next sibling, or if no siblings left, go back to the parent's
1156 * siblings and so on
1157 */
1158 while (!next_kid) {
1159 if (o == top_op)
1160 return; /* at top; no parents/siblings to try */
1161 if (OpHAS_SIBLING(o))
1162 next_kid = o->op_sibparent;
1163 else
1164 o = o->op_sibparent; /*try parent's next sibling */
1165 }
1166
1167 /* this label not yet used. Goto here if any code above sets
1168 * next-kid
1169 get_next_op:
1170 */
1171 o = next_kid;
1172 }
1173 }
1174
1175 /*
1176 =for apidoc finalize_optree
1177
1178 This function finalizes the optree. Should be called directly after
1179 the complete optree is built. It does some additional
1180 checking which can't be done in the normal C<ck_>xxx functions and makes
1181 the tree thread-safe.
1182
1183 =cut
1184 */
1185
1186 void
Perl_finalize_optree(pTHX_ OP * o)1187 Perl_finalize_optree(pTHX_ OP* o)
1188 {
1189 PERL_ARGS_ASSERT_FINALIZE_OPTREE;
1190
1191 ENTER;
1192 SAVEVPTR(PL_curcop);
1193
1194 finalize_op(o);
1195
1196 LEAVE;
1197 }
1198
1199
1200 /*
1201 =for apidoc traverse_op_tree
1202
1203 Return the next op in a depth-first traversal of the op tree,
1204 returning NULL when the traversal is complete.
1205
1206 The initial call must supply the root of the tree as both top and o.
1207
1208 For now it's static, but it may be exposed to the API in the future.
1209
1210 =cut
1211 */
1212
1213 STATIC OP*
S_traverse_op_tree(pTHX_ OP * top,OP * o)1214 S_traverse_op_tree(pTHX_ OP *top, OP *o) {
1215 OP *sib;
1216
1217 PERL_ARGS_ASSERT_TRAVERSE_OP_TREE;
1218
1219 if ((o->op_flags & OPf_KIDS) && cUNOPo->op_first) {
1220 return cUNOPo->op_first;
1221 }
1222 else if ((sib = OpSIBLING(o))) {
1223 return sib;
1224 }
1225 else {
1226 OP *parent = o->op_sibparent;
1227 assert(!(o->op_moresib));
1228 while (parent && parent != top) {
1229 OP *sib = OpSIBLING(parent);
1230 if (sib)
1231 return sib;
1232 parent = parent->op_sibparent;
1233 }
1234
1235 return NULL;
1236 }
1237 }
1238
1239 STATIC void
S_finalize_op(pTHX_ OP * o)1240 S_finalize_op(pTHX_ OP* o)
1241 {
1242 OP * const top = o;
1243 PERL_ARGS_ASSERT_FINALIZE_OP;
1244
1245 do {
1246 assert(o->op_type != OP_FREED);
1247
1248 switch (o->op_type) {
1249 case OP_NEXTSTATE:
1250 case OP_DBSTATE:
1251 PL_curcop = ((COP*)o); /* for warnings */
1252 break;
1253 case OP_EXEC:
1254 if (OpHAS_SIBLING(o)) {
1255 OP *sib = OpSIBLING(o);
1256 if (( sib->op_type == OP_NEXTSTATE || sib->op_type == OP_DBSTATE)
1257 && ckWARN(WARN_EXEC)
1258 && OpHAS_SIBLING(sib))
1259 {
1260 const OPCODE type = OpSIBLING(sib)->op_type;
1261 if (type != OP_EXIT && type != OP_WARN && type != OP_DIE) {
1262 const line_t oldline = CopLINE(PL_curcop);
1263 CopLINE_set(PL_curcop, CopLINE((COP*)sib));
1264 Perl_warner(aTHX_ packWARN(WARN_EXEC),
1265 "Statement unlikely to be reached");
1266 Perl_warner(aTHX_ packWARN(WARN_EXEC),
1267 "\t(Maybe you meant system() when you said exec()?)\n");
1268 CopLINE_set(PL_curcop, oldline);
1269 }
1270 }
1271 }
1272 break;
1273
1274 case OP_GV:
1275 if ((o->op_private & OPpEARLY_CV) && ckWARN(WARN_PROTOTYPE)) {
1276 GV * const gv = cGVOPo_gv;
1277 if (SvTYPE(gv) == SVt_PVGV && GvCV(gv) && SvPVX_const(GvCV(gv))) {
1278 /* XXX could check prototype here instead of just carping */
1279 SV * const sv = sv_newmortal();
1280 gv_efullname3(sv, gv, NULL);
1281 Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE),
1282 "%" SVf "() called too early to check prototype",
1283 SVfARG(sv));
1284 }
1285 }
1286 break;
1287
1288 case OP_CONST:
1289 if (cSVOPo->op_private & OPpCONST_STRICT)
1290 no_bareword_allowed(o);
1291 #ifdef USE_ITHREADS
1292 /* FALLTHROUGH */
1293 case OP_HINTSEVAL:
1294 op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
1295 #endif
1296 break;
1297
1298 #ifdef USE_ITHREADS
1299 /* Relocate all the METHOP's SVs to the pad for thread safety. */
1300 case OP_METHOD_NAMED:
1301 case OP_METHOD_SUPER:
1302 case OP_METHOD_REDIR:
1303 case OP_METHOD_REDIR_SUPER:
1304 op_relocate_sv(&cMETHOPo->op_u.op_meth_sv, &o->op_targ);
1305 break;
1306 #endif
1307
1308 case OP_HELEM: {
1309 UNOP *rop;
1310 SVOP *key_op;
1311 OP *kid;
1312
1313 if ((key_op = cSVOPx(cBINOPo->op_last))->op_type != OP_CONST)
1314 break;
1315
1316 rop = cUNOPx(cBINOPo->op_first);
1317
1318 goto check_keys;
1319
1320 case OP_HSLICE:
1321 S_scalar_slice_warning(aTHX_ o);
1322 /* FALLTHROUGH */
1323
1324 case OP_KVHSLICE:
1325 kid = OpSIBLING(cLISTOPo->op_first);
1326 if (/* I bet there's always a pushmark... */
1327 OP_TYPE_ISNT_AND_WASNT_NN(kid, OP_LIST)
1328 && OP_TYPE_ISNT_NN(kid, OP_CONST))
1329 {
1330 break;
1331 }
1332
1333 key_op = cSVOPx(kid->op_type == OP_CONST
1334 ? kid
1335 : OpSIBLING(kLISTOP->op_first));
1336
1337 rop = cUNOPx(cLISTOPo->op_last);
1338
1339 check_keys:
1340 if (o->op_private & OPpLVAL_INTRO || rop->op_type != OP_RV2HV)
1341 rop = NULL;
1342 check_hash_fields_and_hekify(rop, key_op, 1);
1343 break;
1344 }
1345 case OP_NULL:
1346 if (o->op_targ != OP_HSLICE && o->op_targ != OP_ASLICE)
1347 break;
1348 /* FALLTHROUGH */
1349 case OP_ASLICE:
1350 S_scalar_slice_warning(aTHX_ o);
1351 break;
1352
1353 case OP_SUBST: {
1354 if (cPMOPo->op_pmreplrootu.op_pmreplroot)
1355 finalize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1356 break;
1357 }
1358 default:
1359 break;
1360 }
1361
1362 #ifdef DEBUGGING
1363 if (o->op_flags & OPf_KIDS) {
1364 OP *kid;
1365
1366 /* check that op_last points to the last sibling, and that
1367 * the last op_sibling/op_sibparent field points back to the
1368 * parent, and that the only ops with KIDS are those which are
1369 * entitled to them */
1370 U32 type = o->op_type;
1371 U32 family;
1372 bool has_last;
1373
1374 if (type == OP_NULL) {
1375 type = o->op_targ;
1376 /* ck_glob creates a null UNOP with ex-type GLOB
1377 * (which is a list op. So pretend it wasn't a listop */
1378 if (type == OP_GLOB)
1379 type = OP_NULL;
1380 }
1381 family = PL_opargs[type] & OA_CLASS_MASK;
1382
1383 has_last = ( family == OA_BINOP
1384 || family == OA_LISTOP
1385 || family == OA_PMOP
1386 || family == OA_LOOP
1387 );
1388 assert( has_last /* has op_first and op_last, or ...
1389 ... has (or may have) op_first: */
1390 || family == OA_UNOP
1391 || family == OA_UNOP_AUX
1392 || family == OA_LOGOP
1393 || family == OA_BASEOP_OR_UNOP
1394 || family == OA_FILESTATOP
1395 || family == OA_LOOPEXOP
1396 || family == OA_METHOP
1397 || type == OP_CUSTOM
1398 || type == OP_NULL /* new_logop does this */
1399 );
1400
1401 for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) {
1402 if (!OpHAS_SIBLING(kid)) {
1403 if (has_last)
1404 assert(kid == cLISTOPo->op_last);
1405 assert(kid->op_sibparent == o);
1406 }
1407 }
1408 }
1409 #endif
1410 } while (( o = traverse_op_tree(top, o)) != NULL);
1411 }
1412
1413
1414 /*
1415 ---------------------------------------------------------
1416
1417 Common vars in list assignment
1418
1419 There now follows some enums and static functions for detecting
1420 common variables in list assignments. Here is a little essay I wrote
1421 for myself when trying to get my head around this. DAPM.
1422
1423 ----
1424
1425 First some random observations:
1426
1427 * If a lexical var is an alias of something else, e.g.
1428 for my $x ($lex, $pkg, $a[0]) {...}
1429 then the act of aliasing will increase the reference count of the SV
1430
1431 * If a package var is an alias of something else, it may still have a
1432 reference count of 1, depending on how the alias was created, e.g.
1433 in *a = *b, $a may have a refcount of 1 since the GP is shared
1434 with a single GvSV pointer to the SV. So If it's an alias of another
1435 package var, then RC may be 1; if it's an alias of another scalar, e.g.
1436 a lexical var or an array element, then it will have RC > 1.
1437
1438 * There are many ways to create a package alias; ultimately, XS code
1439 may quite legally do GvSV(gv) = SvREFCNT_inc(sv) for example, so
1440 run-time tracing mechanisms are unlikely to be able to catch all cases.
1441
1442 * When the LHS is all my declarations, the same vars can't appear directly
1443 on the RHS, but they can indirectly via closures, aliasing and lvalue
1444 subs. But those techniques all involve an increase in the lexical
1445 scalar's ref count.
1446
1447 * When the LHS is all lexical vars (but not necessarily my declarations),
1448 it is possible for the same lexicals to appear directly on the RHS, and
1449 without an increased ref count, since the stack isn't refcounted.
1450 This case can be detected at compile time by scanning for common lex
1451 vars with PL_generation.
1452
1453 * lvalue subs defeat common var detection, but they do at least
1454 return vars with a temporary ref count increment. Also, you can't
1455 tell at compile time whether a sub call is lvalue.
1456
1457
1458 So...
1459
1460 A: There are a few circumstances where there definitely can't be any
1461 commonality:
1462
1463 LHS empty: () = (...);
1464 RHS empty: (....) = ();
1465 RHS contains only constants or other 'can't possibly be shared'
1466 elements (e.g. ops that return PADTMPs): (...) = (1,2, length)
1467 i.e. they only contain ops not marked as dangerous, whose children
1468 are also not dangerous;
1469 LHS ditto;
1470 LHS contains a single scalar element: e.g. ($x) = (....); because
1471 after $x has been modified, it won't be used again on the RHS;
1472 RHS contains a single element with no aggregate on LHS: e.g.
1473 ($a,$b,$c) = ($x); again, once $a has been modified, its value
1474 won't be used again.
1475
1476 B: If LHS are all 'my' lexical var declarations (or safe ops, which
1477 we can ignore):
1478
1479 my ($a, $b, @c) = ...;
1480
1481 Due to closure and goto tricks, these vars may already have content.
1482 For the same reason, an element on the RHS may be a lexical or package
1483 alias of one of the vars on the left, or share common elements, for
1484 example:
1485
1486 my ($x,$y) = f(); # $x and $y on both sides
1487 sub f : lvalue { ($x,$y) = (1,2); $y, $x }
1488
1489 and
1490
1491 my $ra = f();
1492 my @a = @$ra; # elements of @a on both sides
1493 sub f { @a = 1..4; \@a }
1494
1495
1496 First, just consider scalar vars on LHS:
1497
1498 RHS is safe only if (A), or in addition,
1499 * contains only lexical *scalar* vars, where neither side's
1500 lexicals have been flagged as aliases
1501
1502 If RHS is not safe, then it's always legal to check LHS vars for
1503 RC==1, since the only RHS aliases will always be associated
1504 with an RC bump.
1505
1506 Note that in particular, RHS is not safe if:
1507
1508 * it contains package scalar vars; e.g.:
1509
1510 f();
1511 my ($x, $y) = (2, $x_alias);
1512 sub f { $x = 1; *x_alias = \$x; }
1513
1514 * It contains other general elements, such as flattened or
1515 * spliced or single array or hash elements, e.g.
1516
1517 f();
1518 my ($x,$y) = @a; # or $a[0] or @a{@b} etc
1519
1520 sub f {
1521 ($x, $y) = (1,2);
1522 use feature 'refaliasing';
1523 \($a[0], $a[1]) = \($y,$x);
1524 }
1525
1526 It doesn't matter if the array/hash is lexical or package.
1527
1528 * it contains a function call that happens to be an lvalue
1529 sub which returns one or more of the above, e.g.
1530
1531 f();
1532 my ($x,$y) = f();
1533
1534 sub f : lvalue {
1535 ($x, $y) = (1,2);
1536 *x1 = \$x;
1537 $y, $x1;
1538 }
1539
1540 (so a sub call on the RHS should be treated the same
1541 as having a package var on the RHS).
1542
1543 * any other "dangerous" thing, such an op or built-in that
1544 returns one of the above, e.g. pp_preinc
1545
1546
1547 If RHS is not safe, what we can do however is at compile time flag
1548 that the LHS are all my declarations, and at run time check whether
1549 all the LHS have RC == 1, and if so skip the full scan.
1550
1551 Now consider array and hash vars on LHS: e.g. my (...,@a) = ...;
1552
1553 Here the issue is whether there can be elements of @a on the RHS
1554 which will get prematurely freed when @a is cleared prior to
1555 assignment. This is only a problem if the aliasing mechanism
1556 is one which doesn't increase the refcount - only if RC == 1
1557 will the RHS element be prematurely freed.
1558
1559 Because the array/hash is being INTROed, it or its elements
1560 can't directly appear on the RHS:
1561
1562 my (@a) = ($a[0], @a, etc) # NOT POSSIBLE
1563
1564 but can indirectly, e.g.:
1565
1566 my $r = f();
1567 my (@a) = @$r;
1568 sub f { @a = 1..3; \@a }
1569
1570 So if the RHS isn't safe as defined by (A), we must always
1571 mortalise and bump the ref count of any remaining RHS elements
1572 when assigning to a non-empty LHS aggregate.
1573
1574 Lexical scalars on the RHS aren't safe if they've been involved in
1575 aliasing, e.g.
1576
1577 use feature 'refaliasing';
1578
1579 f();
1580 \(my $lex) = \$pkg;
1581 my @a = ($lex,3); # equivalent to ($a[0],3)
1582
1583 sub f {
1584 @a = (1,2);
1585 \$pkg = \$a[0];
1586 }
1587
1588 Similarly with lexical arrays and hashes on the RHS:
1589
1590 f();
1591 my @b;
1592 my @a = (@b);
1593
1594 sub f {
1595 @a = (1,2);
1596 \$b[0] = \$a[1];
1597 \$b[1] = \$a[0];
1598 }
1599
1600
1601
1602 C: As (B), but in addition the LHS may contain non-intro lexicals, e.g.
1603 my $a; ($a, my $b) = (....);
1604
1605 The difference between (B) and (C) is that it is now physically
1606 possible for the LHS vars to appear on the RHS too, where they
1607 are not reference counted; but in this case, the compile-time
1608 PL_generation sweep will detect such common vars.
1609
1610 So the rules for (C) differ from (B) in that if common vars are
1611 detected, the runtime "test RC==1" optimisation can no longer be used,
1612 and a full mark and sweep is required
1613
1614 D: As (C), but in addition the LHS may contain package vars.
1615
1616 Since package vars can be aliased without a corresponding refcount
1617 increase, all bets are off. It's only safe if (A). E.g.
1618
1619 my ($x, $y) = (1,2);
1620
1621 for $x_alias ($x) {
1622 ($x_alias, $y) = (3, $x); # whoops
1623 }
1624
1625 Ditto for LHS aggregate package vars.
1626
1627 E: Any other dangerous ops on LHS, e.g.
1628 (f(), $a[0], @$r) = (...);
1629
1630 this is similar to (E) in that all bets are off. In addition, it's
1631 impossible to determine at compile time whether the LHS
1632 contains a scalar or an aggregate, e.g.
1633
1634 sub f : lvalue { @a }
1635 (f()) = 1..3;
1636
1637 * ---------------------------------------------------------
1638 */
1639
1640 /* A set of bit flags returned by S_aassign_scan(). Each flag indicates
1641 * that at least one of the things flagged was seen.
1642 */
1643
1644 enum {
1645 AAS_MY_SCALAR = 0x001, /* my $scalar */
1646 AAS_MY_AGG = 0x002, /* aggregate: my @array or my %hash */
1647 AAS_LEX_SCALAR = 0x004, /* $lexical */
1648 AAS_LEX_AGG = 0x008, /* @lexical or %lexical aggregate */
1649 AAS_LEX_SCALAR_COMM = 0x010, /* $lexical seen on both sides */
1650 AAS_PKG_SCALAR = 0x020, /* $scalar (where $scalar is pkg var) */
1651 AAS_PKG_AGG = 0x040, /* package @array or %hash aggregate */
1652 AAS_DANGEROUS = 0x080, /* an op (other than the above)
1653 that's flagged OA_DANGEROUS */
1654 AAS_SAFE_SCALAR = 0x100, /* produces at least one scalar SV that's
1655 not in any of the categories above */
1656 AAS_DEFAV = 0x200 /* contains just a single '@_' on RHS */
1657 };
1658
1659 /* helper function for S_aassign_scan().
1660 * check a PAD-related op for commonality and/or set its generation number.
1661 * Returns a boolean indicating whether its shared */
1662
1663 static bool
S_aassign_padcheck(pTHX_ OP * o,bool rhs)1664 S_aassign_padcheck(pTHX_ OP* o, bool rhs)
1665 {
1666 if (PAD_COMPNAME_GEN(o->op_targ) == PERL_INT_MAX)
1667 /* lexical used in aliasing */
1668 return TRUE;
1669
1670 if (rhs)
1671 return cBOOL(PAD_COMPNAME_GEN(o->op_targ) == (STRLEN)PL_generation);
1672 else
1673 PAD_COMPNAME_GEN_set(o->op_targ, PL_generation);
1674
1675 return FALSE;
1676 }
1677
1678 /*
1679 Helper function for OPpASSIGN_COMMON* detection in rpeep().
1680 It scans the left or right hand subtree of the aassign op, and returns a
1681 set of flags indicating what sorts of things it found there.
1682 'rhs' indicates whether we're scanning the LHS or RHS. If the former, we
1683 set PL_generation on lexical vars; if the latter, we see if
1684 PL_generation matches.
1685 'scalars_p' is a pointer to a counter of the number of scalar SVs seen.
1686 This fn will increment it by the number seen. It's not intended to
1687 be an accurate count (especially as many ops can push a variable
1688 number of SVs onto the stack); rather it's used as to test whether there
1689 can be at most 1 SV pushed; so it's only meanings are "0, 1, many".
1690 */
1691
1692 static int
S_aassign_scan(pTHX_ OP * o,bool rhs,int * scalars_p)1693 S_aassign_scan(pTHX_ OP* o, bool rhs, int *scalars_p)
1694 {
1695 OP *top_op = o;
1696 OP *effective_top_op = o;
1697 int all_flags = 0;
1698
1699 while (1) {
1700 bool top = o == effective_top_op;
1701 int flags = 0;
1702 OP* next_kid = NULL;
1703
1704 /* first, look for a solitary @_ on the RHS */
1705 if ( rhs
1706 && top
1707 && (o->op_flags & OPf_KIDS)
1708 && OP_TYPE_IS_OR_WAS(o, OP_LIST)
1709 ) {
1710 OP *kid = cUNOPo->op_first;
1711 if ( ( kid->op_type == OP_PUSHMARK
1712 || kid->op_type == OP_PADRANGE) /* ex-pushmark */
1713 && ((kid = OpSIBLING(kid)))
1714 && !OpHAS_SIBLING(kid)
1715 && kid->op_type == OP_RV2AV
1716 && !(kid->op_flags & OPf_REF)
1717 && !(kid->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
1718 && ((kid->op_flags & OPf_WANT) == OPf_WANT_LIST)
1719 && ((kid = cUNOPx(kid)->op_first))
1720 && kid->op_type == OP_GV
1721 && cGVOPx_gv(kid) == PL_defgv
1722 )
1723 flags = AAS_DEFAV;
1724 }
1725
1726 switch (o->op_type) {
1727 case OP_GVSV:
1728 (*scalars_p)++;
1729 all_flags |= AAS_PKG_SCALAR;
1730 goto do_next;
1731
1732 case OP_PADAV:
1733 case OP_PADHV:
1734 (*scalars_p) += 2;
1735 /* if !top, could be e.g. @a[0,1] */
1736 all_flags |= (top && (o->op_flags & OPf_REF))
1737 ? ((o->op_private & OPpLVAL_INTRO)
1738 ? AAS_MY_AGG : AAS_LEX_AGG)
1739 : AAS_DANGEROUS;
1740 goto do_next;
1741
1742 case OP_PADSV:
1743 {
1744 int comm = S_aassign_padcheck(aTHX_ o, rhs)
1745 ? AAS_LEX_SCALAR_COMM : 0;
1746 (*scalars_p)++;
1747 all_flags |= (o->op_private & OPpLVAL_INTRO)
1748 ? (AAS_MY_SCALAR|comm) : (AAS_LEX_SCALAR|comm);
1749 goto do_next;
1750
1751 }
1752
1753 case OP_RV2AV:
1754 case OP_RV2HV:
1755 (*scalars_p) += 2;
1756 if (cUNOPx(o)->op_first->op_type != OP_GV)
1757 all_flags |= AAS_DANGEROUS; /* @{expr}, %{expr} */
1758 /* @pkg, %pkg */
1759 /* if !top, could be e.g. @a[0,1] */
1760 else if (top && (o->op_flags & OPf_REF))
1761 all_flags |= AAS_PKG_AGG;
1762 else
1763 all_flags |= AAS_DANGEROUS;
1764 goto do_next;
1765
1766 case OP_RV2SV:
1767 (*scalars_p)++;
1768 if (cUNOPx(o)->op_first->op_type != OP_GV) {
1769 (*scalars_p) += 2;
1770 all_flags |= AAS_DANGEROUS; /* ${expr} */
1771 }
1772 else
1773 all_flags |= AAS_PKG_SCALAR; /* $pkg */
1774 goto do_next;
1775
1776 case OP_SPLIT:
1777 if (o->op_private & OPpSPLIT_ASSIGN) {
1778 /* the assign in @a = split() has been optimised away
1779 * and the @a attached directly to the split op
1780 * Treat the array as appearing on the RHS, i.e.
1781 * ... = (@a = split)
1782 * is treated like
1783 * ... = @a;
1784 */
1785
1786 if (o->op_flags & OPf_STACKED) {
1787 /* @{expr} = split() - the array expression is tacked
1788 * on as an extra child to split - process kid */
1789 next_kid = cLISTOPo->op_last;
1790 goto do_next;
1791 }
1792
1793 /* ... else array is directly attached to split op */
1794 (*scalars_p) += 2;
1795 all_flags |= (PL_op->op_private & OPpSPLIT_LEX)
1796 ? ((o->op_private & OPpLVAL_INTRO)
1797 ? AAS_MY_AGG : AAS_LEX_AGG)
1798 : AAS_PKG_AGG;
1799 goto do_next;
1800 }
1801 (*scalars_p)++;
1802 /* other args of split can't be returned */
1803 all_flags |= AAS_SAFE_SCALAR;
1804 goto do_next;
1805
1806 case OP_UNDEF:
1807 /* undef on LHS following a var is significant, e.g.
1808 * my $x = 1;
1809 * @a = (($x, undef) = (2 => $x));
1810 * # @a shoul be (2,1) not (2,2)
1811 *
1812 * undef on RHS counts as a scalar:
1813 * ($x, $y) = (undef, $x); # 2 scalars on RHS: unsafe
1814 */
1815 if ((!rhs && *scalars_p) || rhs)
1816 (*scalars_p)++;
1817 flags = AAS_SAFE_SCALAR;
1818 break;
1819
1820 case OP_PUSHMARK:
1821 case OP_STUB:
1822 /* these are all no-ops; they don't push a potentially common SV
1823 * onto the stack, so they are neither AAS_DANGEROUS nor
1824 * AAS_SAFE_SCALAR */
1825 goto do_next;
1826
1827 case OP_PADRANGE: /* Ignore padrange; checking its siblings is enough */
1828 break;
1829
1830 case OP_NULL:
1831 case OP_LIST:
1832 /* these do nothing, but may have children */
1833 break;
1834
1835 default:
1836 if (PL_opargs[o->op_type] & OA_DANGEROUS) {
1837 (*scalars_p) += 2;
1838 flags = AAS_DANGEROUS;
1839 break;
1840 }
1841
1842 if ( (PL_opargs[o->op_type] & OA_TARGLEX)
1843 && (o->op_private & OPpTARGET_MY))
1844 {
1845 (*scalars_p)++;
1846 all_flags |= S_aassign_padcheck(aTHX_ o, rhs)
1847 ? AAS_LEX_SCALAR_COMM : AAS_LEX_SCALAR;
1848 goto do_next;
1849 }
1850
1851 /* if its an unrecognised, non-dangerous op, assume that it
1852 * is the cause of at least one safe scalar */
1853 (*scalars_p)++;
1854 flags = AAS_SAFE_SCALAR;
1855 break;
1856 }
1857
1858 all_flags |= flags;
1859
1860 /* by default, process all kids next
1861 * XXX this assumes that all other ops are "transparent" - i.e. that
1862 * they can return some of their children. While this true for e.g.
1863 * sort and grep, it's not true for e.g. map. We really need a
1864 * 'transparent' flag added to regen/opcodes
1865 */
1866 if (o->op_flags & OPf_KIDS) {
1867 next_kid = cUNOPo->op_first;
1868 /* these ops do nothing but may have children; but their
1869 * children should also be treated as top-level */
1870 if ( o == effective_top_op
1871 && (o->op_type == OP_NULL || o->op_type == OP_LIST)
1872 )
1873 effective_top_op = next_kid;
1874 }
1875
1876
1877 /* If next_kid is set, someone in the code above wanted us to process
1878 * that kid and all its remaining siblings. Otherwise, work our way
1879 * back up the tree */
1880 do_next:
1881 while (!next_kid) {
1882 if (o == top_op)
1883 return all_flags; /* at top; no parents/siblings to try */
1884 if (OpHAS_SIBLING(o)) {
1885 next_kid = o->op_sibparent;
1886 if (o == effective_top_op)
1887 effective_top_op = next_kid;
1888 }
1889 else if (o == effective_top_op)
1890 effective_top_op = o->op_sibparent;
1891 o = o->op_sibparent; /* try parent's next sibling */
1892 }
1893 o = next_kid;
1894 } /* while */
1895 }
1896
1897 /* S_maybe_multideref(): given an op_next chain of ops beginning at 'start'
1898 * that potentially represent a series of one or more aggregate derefs
1899 * (such as $a->[1]{$key}), examine the chain, and if appropriate, convert
1900 * the whole chain to a single OP_MULTIDEREF op (maybe with a few
1901 * additional ops left in too).
1902 *
1903 * The caller will have already verified that the first few ops in the
1904 * chain following 'start' indicate a multideref candidate, and will have
1905 * set 'orig_o' to the point further on in the chain where the first index
1906 * expression (if any) begins. 'orig_action' specifies what type of
1907 * beginning has already been determined by the ops between start..orig_o
1908 * (e.g. $lex_ary[], $pkg_ary->{}, expr->[], etc).
1909 *
1910 * 'hints' contains any hints flags that need adding (currently just
1911 * OPpHINT_STRICT_REFS) as found in any rv2av/hv skipped by the caller.
1912 */
1913
1914 STATIC void
S_maybe_multideref(pTHX_ OP * start,OP * orig_o,UV orig_action,U8 hints)1915 S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints)
1916 {
1917 int pass;
1918 UNOP_AUX_item *arg_buf = NULL;
1919 bool reset_start_targ = FALSE; /* start->op_targ needs zeroing */
1920 int index_skip = -1; /* don't output index arg on this action */
1921
1922 /* similar to regex compiling, do two passes; the first pass
1923 * determines whether the op chain is convertible and calculates the
1924 * buffer size; the second pass populates the buffer and makes any
1925 * changes necessary to ops (such as moving consts to the pad on
1926 * threaded builds).
1927 *
1928 * NB: for things like Coverity, note that both passes take the same
1929 * path through the logic tree (except for 'if (pass)' bits), since
1930 * both passes are following the same op_next chain; and in
1931 * particular, if it would return early on the second pass, it would
1932 * already have returned early on the first pass.
1933 */
1934 for (pass = 0; pass < 2; pass++) {
1935 OP *o = orig_o;
1936 UV action = orig_action;
1937 OP *first_elem_op = NULL; /* first seen aelem/helem */
1938 OP *top_op = NULL; /* highest [ah]elem/exists/del/rv2[ah]v */
1939 int action_count = 0; /* number of actions seen so far */
1940 int action_ix = 0; /* action_count % (actions per IV) */
1941 bool next_is_hash = FALSE; /* is the next lookup to be a hash? */
1942 bool is_last = FALSE; /* no more derefs to follow */
1943 bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */
1944 UV action_word = 0; /* all actions so far */
1945 size_t argi = 0;
1946 UNOP_AUX_item *action_ptr = arg_buf;
1947
1948 argi++; /* reserve slot for first action word */
1949
1950 switch (action) {
1951 case MDEREF_HV_gvsv_vivify_rv2hv_helem:
1952 case MDEREF_HV_gvhv_helem:
1953 next_is_hash = TRUE;
1954 /* FALLTHROUGH */
1955 case MDEREF_AV_gvsv_vivify_rv2av_aelem:
1956 case MDEREF_AV_gvav_aelem:
1957 if (pass) {
1958 #ifdef USE_ITHREADS
1959 arg_buf[argi].pad_offset = cPADOPx(start)->op_padix;
1960 /* stop it being swiped when nulled */
1961 cPADOPx(start)->op_padix = 0;
1962 #else
1963 arg_buf[argi].sv = cSVOPx(start)->op_sv;
1964 cSVOPx(start)->op_sv = NULL;
1965 #endif
1966 }
1967 argi++;
1968 break;
1969
1970 case MDEREF_HV_padhv_helem:
1971 case MDEREF_HV_padsv_vivify_rv2hv_helem:
1972 next_is_hash = TRUE;
1973 /* FALLTHROUGH */
1974 case MDEREF_AV_padav_aelem:
1975 case MDEREF_AV_padsv_vivify_rv2av_aelem:
1976 if (pass) {
1977 arg_buf[argi].pad_offset = start->op_targ;
1978 /* we skip setting op_targ = 0 for now, since the intact
1979 * OP_PADXV is needed by check_hash_fields_and_hekify */
1980 reset_start_targ = TRUE;
1981 }
1982 argi++;
1983 break;
1984
1985 case MDEREF_HV_pop_rv2hv_helem:
1986 next_is_hash = TRUE;
1987 /* FALLTHROUGH */
1988 case MDEREF_AV_pop_rv2av_aelem:
1989 break;
1990
1991 default:
1992 NOT_REACHED; /* NOTREACHED */
1993 return;
1994 }
1995
1996 while (!is_last) {
1997 /* look for another (rv2av/hv; get index;
1998 * aelem/helem/exists/delele) sequence */
1999
2000 OP *kid;
2001 bool is_deref;
2002 bool ok;
2003 UV index_type = MDEREF_INDEX_none;
2004
2005 if (action_count) {
2006 /* if this is not the first lookup, consume the rv2av/hv */
2007
2008 /* for N levels of aggregate lookup, we normally expect
2009 * that the first N-1 [ah]elem ops will be flagged as
2010 * /DEREF (so they autovivify if necessary), and the last
2011 * lookup op not to be.
2012 * For other things (like @{$h{k1}{k2}}) extra scope or
2013 * leave ops can appear, so abandon the effort in that
2014 * case */
2015 if (o->op_type != OP_RV2AV && o->op_type != OP_RV2HV)
2016 return;
2017
2018 /* rv2av or rv2hv sKR/1 */
2019
2020 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2021 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2022 if (o->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2023 return;
2024
2025 /* at this point, we wouldn't expect any of these
2026 * possible private flags:
2027 * OPpMAYBE_LVSUB, OPpOUR_INTRO, OPpLVAL_INTRO
2028 * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL (rv2hv only)
2029 */
2030 ASSUME(!(o->op_private &
2031 ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING)));
2032
2033 hints = (o->op_private & OPpHINT_STRICT_REFS);
2034
2035 /* make sure the type of the previous /DEREF matches the
2036 * type of the next lookup */
2037 ASSUME(o->op_type == (next_is_hash ? OP_RV2HV : OP_RV2AV));
2038 top_op = o;
2039
2040 action = next_is_hash
2041 ? MDEREF_HV_vivify_rv2hv_helem
2042 : MDEREF_AV_vivify_rv2av_aelem;
2043 o = o->op_next;
2044 }
2045
2046 /* if this is the second pass, and we're at the depth where
2047 * previously we encountered a non-simple index expression,
2048 * stop processing the index at this point */
2049 if (action_count != index_skip) {
2050
2051 /* look for one or more simple ops that return an array
2052 * index or hash key */
2053
2054 switch (o->op_type) {
2055 case OP_PADSV:
2056 /* it may be a lexical var index */
2057 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS
2058 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2059 ASSUME(!(o->op_private &
2060 ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2061
2062 if ( OP_GIMME(o,0) == G_SCALAR
2063 && !(o->op_flags & (OPf_REF|OPf_MOD))
2064 && o->op_private == 0)
2065 {
2066 if (pass)
2067 arg_buf[argi].pad_offset = o->op_targ;
2068 argi++;
2069 index_type = MDEREF_INDEX_padsv;
2070 o = o->op_next;
2071 }
2072 break;
2073
2074 case OP_CONST:
2075 if (next_is_hash) {
2076 /* it's a constant hash index */
2077 if (!(SvFLAGS(cSVOPo_sv) & (SVf_IOK|SVf_NOK|SVf_POK)))
2078 /* "use constant foo => FOO; $h{+foo}" for
2079 * some weird FOO, can leave you with constants
2080 * that aren't simple strings. It's not worth
2081 * the extra hassle for those edge cases */
2082 break;
2083
2084 {
2085 UNOP *rop = NULL;
2086 OP * helem_op = o->op_next;
2087
2088 ASSUME( helem_op->op_type == OP_HELEM
2089 || helem_op->op_type == OP_NULL
2090 || pass == 0);
2091 if (helem_op->op_type == OP_HELEM) {
2092 rop = cUNOPx(cBINOPx(helem_op)->op_first);
2093 if ( helem_op->op_private & OPpLVAL_INTRO
2094 || rop->op_type != OP_RV2HV
2095 )
2096 rop = NULL;
2097 }
2098 /* on first pass just check; on second pass
2099 * hekify */
2100 check_hash_fields_and_hekify(rop, cSVOPo, pass);
2101 }
2102
2103 if (pass) {
2104 #ifdef USE_ITHREADS
2105 /* Relocate sv to the pad for thread safety */
2106 op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
2107 arg_buf[argi].pad_offset = o->op_targ;
2108 o->op_targ = 0;
2109 #else
2110 arg_buf[argi].sv = cSVOPx_sv(o);
2111 #endif
2112 }
2113 }
2114 else {
2115 /* it's a constant array index */
2116 IV iv;
2117 SV *ix_sv = cSVOPo->op_sv;
2118 if (!SvIOK(ix_sv))
2119 break;
2120 iv = SvIV(ix_sv);
2121
2122 if ( action_count == 0
2123 && iv >= -128
2124 && iv <= 127
2125 && ( action == MDEREF_AV_padav_aelem
2126 || action == MDEREF_AV_gvav_aelem)
2127 )
2128 maybe_aelemfast = TRUE;
2129
2130 if (pass) {
2131 arg_buf[argi].iv = iv;
2132 SvREFCNT_dec_NN(cSVOPo->op_sv);
2133 }
2134 }
2135 if (pass)
2136 /* we've taken ownership of the SV */
2137 cSVOPo->op_sv = NULL;
2138 argi++;
2139 index_type = MDEREF_INDEX_const;
2140 o = o->op_next;
2141 break;
2142
2143 case OP_GV:
2144 /* it may be a package var index */
2145
2146 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS|OPf_SPECIAL)));
2147 ASSUME(!(o->op_private & ~(OPpEARLY_CV)));
2148 if ( (o->op_flags & ~(OPf_PARENS|OPf_SPECIAL)) != OPf_WANT_SCALAR
2149 || o->op_private != 0
2150 )
2151 break;
2152
2153 kid = o->op_next;
2154 if (kid->op_type != OP_RV2SV)
2155 break;
2156
2157 ASSUME(!(kid->op_flags &
2158 ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_REF
2159 |OPf_SPECIAL|OPf_PARENS)));
2160 ASSUME(!(kid->op_private &
2161 ~(OPpARG1_MASK
2162 |OPpHINT_STRICT_REFS|OPpOUR_INTRO
2163 |OPpDEREF|OPpLVAL_INTRO)));
2164 if( (kid->op_flags &~ OPf_PARENS)
2165 != (OPf_WANT_SCALAR|OPf_KIDS)
2166 || (kid->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS))
2167 )
2168 break;
2169
2170 if (pass) {
2171 #ifdef USE_ITHREADS
2172 arg_buf[argi].pad_offset = cPADOPx(o)->op_padix;
2173 /* stop it being swiped when nulled */
2174 cPADOPx(o)->op_padix = 0;
2175 #else
2176 arg_buf[argi].sv = cSVOPx(o)->op_sv;
2177 cSVOPo->op_sv = NULL;
2178 #endif
2179 }
2180 argi++;
2181 index_type = MDEREF_INDEX_gvsv;
2182 o = kid->op_next;
2183 break;
2184
2185 } /* switch */
2186 } /* action_count != index_skip */
2187
2188 action |= index_type;
2189
2190
2191 /* at this point we have either:
2192 * * detected what looks like a simple index expression,
2193 * and expect the next op to be an [ah]elem, or
2194 * an nulled [ah]elem followed by a delete or exists;
2195 * * found a more complex expression, so something other
2196 * than the above follows.
2197 */
2198
2199 /* possibly an optimised away [ah]elem (where op_next is
2200 * exists or delete) */
2201 if (o->op_type == OP_NULL)
2202 o = o->op_next;
2203
2204 /* at this point we're looking for an OP_AELEM, OP_HELEM,
2205 * OP_EXISTS or OP_DELETE */
2206
2207 /* if a custom array/hash access checker is in scope,
2208 * abandon optimisation attempt */
2209 if ( (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2210 && PL_check[o->op_type] != Perl_ck_null)
2211 return;
2212 /* similarly for customised exists and delete */
2213 if ( (o->op_type == OP_EXISTS)
2214 && PL_check[o->op_type] != Perl_ck_exists)
2215 return;
2216 if ( (o->op_type == OP_DELETE)
2217 && PL_check[o->op_type] != Perl_ck_delete)
2218 return;
2219
2220 if ( o->op_type != OP_AELEM
2221 || (o->op_private &
2222 (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB))
2223 )
2224 maybe_aelemfast = FALSE;
2225
2226 /* look for aelem/helem/exists/delete. If it's not the last elem
2227 * lookup, it *must* have OPpDEREF_AV/HV, but not many other
2228 * flags; if it's the last, then it mustn't have
2229 * OPpDEREF_AV/HV, but may have lots of other flags, like
2230 * OPpLVAL_INTRO etc
2231 */
2232
2233 if ( index_type == MDEREF_INDEX_none
2234 || ( o->op_type != OP_AELEM && o->op_type != OP_HELEM
2235 && o->op_type != OP_EXISTS && o->op_type != OP_DELETE)
2236 )
2237 ok = FALSE;
2238 else {
2239 /* we have aelem/helem/exists/delete with valid simple index */
2240
2241 is_deref = (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2242 && ( (o->op_private & OPpDEREF) == OPpDEREF_AV
2243 || (o->op_private & OPpDEREF) == OPpDEREF_HV);
2244
2245 /* This doesn't make much sense but is legal:
2246 * @{ local $x[0][0] } = 1
2247 * Since scope exit will undo the autovivification,
2248 * don't bother in the first place. The OP_LEAVE
2249 * assertion is in case there are other cases of both
2250 * OPpLVAL_INTRO and OPpDEREF which don't include a scope
2251 * exit that would undo the local - in which case this
2252 * block of code would need rethinking.
2253 */
2254 if (is_deref && (o->op_private & OPpLVAL_INTRO)) {
2255 #ifdef DEBUGGING
2256 OP *n = o->op_next;
2257 while (n && ( n->op_type == OP_NULL
2258 || n->op_type == OP_LIST
2259 || n->op_type == OP_SCALAR))
2260 n = n->op_next;
2261 assert(n && n->op_type == OP_LEAVE);
2262 #endif
2263 o->op_private &= ~OPpDEREF;
2264 is_deref = FALSE;
2265 }
2266
2267 if (is_deref) {
2268 ASSUME(!(o->op_flags &
2269 ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_PARENS)));
2270 ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpDEREF)));
2271
2272 ok = (o->op_flags &~ OPf_PARENS)
2273 == (OPf_WANT_SCALAR|OPf_KIDS|OPf_MOD)
2274 && !(o->op_private & ~(OPpDEREF|OPpARG2_MASK));
2275 }
2276 else if (o->op_type == OP_EXISTS) {
2277 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2278 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2279 ASSUME(!(o->op_private & ~(OPpARG1_MASK|OPpEXISTS_SUB)));
2280 ok = !(o->op_private & ~OPpARG1_MASK);
2281 }
2282 else if (o->op_type == OP_DELETE) {
2283 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2284 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2285 ASSUME(!(o->op_private &
2286 ~(OPpARG1_MASK|OPpSLICE|OPpLVAL_INTRO)));
2287 /* don't handle slices or 'local delete'; the latter
2288 * is fairly rare, and has a complex runtime */
2289 ok = !(o->op_private & ~OPpARG1_MASK);
2290 if (OP_TYPE_IS_OR_WAS(cUNOPo->op_first, OP_AELEM))
2291 /* skip handling run-tome error */
2292 ok = (ok && cBOOL(o->op_flags & OPf_SPECIAL));
2293 }
2294 else {
2295 ASSUME(o->op_type == OP_AELEM || o->op_type == OP_HELEM);
2296 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_MOD
2297 |OPf_PARENS|OPf_REF|OPf_SPECIAL)));
2298 ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpMAYBE_LVSUB
2299 |OPpLVAL_DEFER|OPpDEREF|OPpLVAL_INTRO)));
2300 ok = (o->op_private & OPpDEREF) != OPpDEREF_SV;
2301 }
2302 }
2303
2304 if (ok) {
2305 if (!first_elem_op)
2306 first_elem_op = o;
2307 top_op = o;
2308 if (is_deref) {
2309 next_is_hash = cBOOL((o->op_private & OPpDEREF) == OPpDEREF_HV);
2310 o = o->op_next;
2311 }
2312 else {
2313 is_last = TRUE;
2314 action |= MDEREF_FLAG_last;
2315 }
2316 }
2317 else {
2318 /* at this point we have something that started
2319 * promisingly enough (with rv2av or whatever), but failed
2320 * to find a simple index followed by an
2321 * aelem/helem/exists/delete. If this is the first action,
2322 * give up; but if we've already seen at least one
2323 * aelem/helem, then keep them and add a new action with
2324 * MDEREF_INDEX_none, which causes it to do the vivify
2325 * from the end of the previous lookup, and do the deref,
2326 * but stop at that point. So $a[0][expr] will do one
2327 * av_fetch, vivify and deref, then continue executing at
2328 * expr */
2329 if (!action_count)
2330 return;
2331 is_last = TRUE;
2332 index_skip = action_count;
2333 action |= MDEREF_FLAG_last;
2334 if (index_type != MDEREF_INDEX_none)
2335 argi--;
2336 }
2337
2338 action_word |= (action << (action_ix * MDEREF_SHIFT));
2339 action_ix++;
2340 action_count++;
2341 /* if there's no space for the next action, reserve a new slot
2342 * for it *before* we start adding args for that action */
2343 if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) {
2344 if (pass) {
2345 action_ptr->uv = action_word;
2346 action_ptr = arg_buf + argi;
2347 }
2348 action_word = 0;
2349 argi++;
2350 action_ix = 0;
2351 }
2352 } /* while !is_last */
2353
2354 /* success! */
2355
2356 if (!action_ix)
2357 /* slot reserved for next action word not now needed */
2358 argi--;
2359 else if (pass)
2360 action_ptr->uv = action_word;
2361
2362 if (pass) {
2363 OP *mderef;
2364 OP *p, *q;
2365
2366 mderef = newUNOP_AUX(OP_MULTIDEREF, 0, NULL, arg_buf);
2367 if (index_skip == -1) {
2368 mderef->op_flags = o->op_flags
2369 & (OPf_WANT|OPf_MOD|(next_is_hash ? OPf_SPECIAL : 0));
2370 if (o->op_type == OP_EXISTS)
2371 mderef->op_private = OPpMULTIDEREF_EXISTS;
2372 else if (o->op_type == OP_DELETE)
2373 mderef->op_private = OPpMULTIDEREF_DELETE;
2374 else
2375 mderef->op_private = o->op_private
2376 & (OPpMAYBE_LVSUB|OPpLVAL_DEFER|OPpLVAL_INTRO);
2377 }
2378 /* accumulate strictness from every level (although I don't think
2379 * they can actually vary) */
2380 mderef->op_private |= hints;
2381
2382 /* integrate the new multideref op into the optree and the
2383 * op_next chain.
2384 *
2385 * In general an op like aelem or helem has two child
2386 * sub-trees: the aggregate expression (a_expr) and the
2387 * index expression (i_expr):
2388 *
2389 * aelem
2390 * |
2391 * a_expr - i_expr
2392 *
2393 * The a_expr returns an AV or HV, while the i-expr returns an
2394 * index. In general a multideref replaces most or all of a
2395 * multi-level tree, e.g.
2396 *
2397 * exists
2398 * |
2399 * ex-aelem
2400 * |
2401 * rv2av - i_expr1
2402 * |
2403 * helem
2404 * |
2405 * rv2hv - i_expr2
2406 * |
2407 * aelem
2408 * |
2409 * a_expr - i_expr3
2410 *
2411 * With multideref, all the i_exprs will be simple vars or
2412 * constants, except that i_expr1 may be arbitrary in the case
2413 * of MDEREF_INDEX_none.
2414 *
2415 * The bottom-most a_expr will be either:
2416 * 1) a simple var (so padXv or gv+rv2Xv);
2417 * 2) a simple scalar var dereferenced (e.g. $r->[0]):
2418 * so a simple var with an extra rv2Xv;
2419 * 3) or an arbitrary expression.
2420 *
2421 * 'start', the first op in the execution chain, will point to
2422 * 1),2): the padXv or gv op;
2423 * 3): the rv2Xv which forms the last op in the a_expr
2424 * execution chain, and the top-most op in the a_expr
2425 * subtree.
2426 *
2427 * For all cases, the 'start' node is no longer required,
2428 * but we can't free it since one or more external nodes
2429 * may point to it. E.g. consider
2430 * $h{foo} = $a ? $b : $c
2431 * Here, both the op_next and op_other branches of the
2432 * cond_expr point to the gv[*h] of the hash expression, so
2433 * we can't free the 'start' op.
2434 *
2435 * For expr->[...], we need to save the subtree containing the
2436 * expression; for the other cases, we just need to save the
2437 * start node.
2438 * So in all cases, we null the start op and keep it around by
2439 * making it the child of the multideref op; for the expr->
2440 * case, the expr will be a subtree of the start node.
2441 *
2442 * So in the simple 1,2 case the optree above changes to
2443 *
2444 * ex-exists
2445 * |
2446 * multideref
2447 * |
2448 * ex-gv (or ex-padxv)
2449 *
2450 * with the op_next chain being
2451 *
2452 * -> ex-gv -> multideref -> op-following-ex-exists ->
2453 *
2454 * In the 3 case, we have
2455 *
2456 * ex-exists
2457 * |
2458 * multideref
2459 * |
2460 * ex-rv2xv
2461 * |
2462 * rest-of-a_expr
2463 * subtree
2464 *
2465 * and
2466 *
2467 * -> rest-of-a_expr subtree ->
2468 * ex-rv2xv -> multideref -> op-following-ex-exists ->
2469 *
2470 *
2471 * Where the last i_expr is non-simple (i.e. MDEREF_INDEX_none,
2472 * e.g. $a[0]{foo}[$x+1], the next rv2xv is nulled and the
2473 * multideref attached as the child, e.g.
2474 *
2475 * exists
2476 * |
2477 * ex-aelem
2478 * |
2479 * ex-rv2av - i_expr1
2480 * |
2481 * multideref
2482 * |
2483 * ex-whatever
2484 *
2485 */
2486
2487 /* if we free this op, don't free the pad entry */
2488 if (reset_start_targ)
2489 start->op_targ = 0;
2490
2491
2492 /* Cut the bit we need to save out of the tree and attach to
2493 * the multideref op, then free the rest of the tree */
2494
2495 /* find parent of node to be detached (for use by splice) */
2496 p = first_elem_op;
2497 if ( orig_action == MDEREF_AV_pop_rv2av_aelem
2498 || orig_action == MDEREF_HV_pop_rv2hv_helem)
2499 {
2500 /* there is an arbitrary expression preceding us, e.g.
2501 * expr->[..]? so we need to save the 'expr' subtree */
2502 if (p->op_type == OP_EXISTS || p->op_type == OP_DELETE)
2503 p = cUNOPx(p)->op_first;
2504 ASSUME( start->op_type == OP_RV2AV
2505 || start->op_type == OP_RV2HV);
2506 }
2507 else {
2508 /* either a padXv or rv2Xv+gv, maybe with an ex-Xelem
2509 * above for exists/delete. */
2510 while ( (p->op_flags & OPf_KIDS)
2511 && cUNOPx(p)->op_first != start
2512 )
2513 p = cUNOPx(p)->op_first;
2514 }
2515 ASSUME(cUNOPx(p)->op_first == start);
2516
2517 /* detach from main tree, and re-attach under the multideref */
2518 op_sibling_splice(mderef, NULL, 0,
2519 op_sibling_splice(p, NULL, 1, NULL));
2520 op_null(start);
2521
2522 start->op_next = mderef;
2523
2524 mderef->op_next = index_skip == -1 ? o->op_next : o;
2525
2526 /* excise and free the original tree, and replace with
2527 * the multideref op */
2528 p = op_sibling_splice(top_op, NULL, -1, mderef);
2529 while (p) {
2530 q = OpSIBLING(p);
2531 op_free(p);
2532 p = q;
2533 }
2534 op_null(top_op);
2535 }
2536 else {
2537 Size_t size = argi;
2538
2539 if (maybe_aelemfast && action_count == 1)
2540 return;
2541
2542 arg_buf = (UNOP_AUX_item*)PerlMemShared_malloc(
2543 sizeof(UNOP_AUX_item) * (size + 1));
2544 /* for dumping etc: store the length in a hidden first slot;
2545 * we set the op_aux pointer to the second slot */
2546 arg_buf->uv = size;
2547 arg_buf++;
2548 }
2549 } /* for (pass = ...) */
2550 }
2551
2552 /* See if the ops following o are such that o will always be executed in
2553 * boolean context: that is, the SV which o pushes onto the stack will
2554 * only ever be consumed by later ops via SvTRUE(sv) or similar.
2555 * If so, set a suitable private flag on o. Normally this will be
2556 * bool_flag; but see below why maybe_flag is needed too.
2557 *
2558 * Typically the two flags you pass will be the generic OPpTRUEBOOL and
2559 * OPpMAYBE_TRUEBOOL, buts it's possible that for some ops those bits may
2560 * already be taken, so you'll have to give that op two different flags.
2561 *
2562 * More explanation of 'maybe_flag' and 'safe_and' parameters.
2563 * The binary logical ops &&, ||, // (plus 'if' and 'unless' which use
2564 * those underlying ops) short-circuit, which means that rather than
2565 * necessarily returning a truth value, they may return the LH argument,
2566 * which may not be boolean. For example in $x = (keys %h || -1), keys
2567 * should return a key count rather than a boolean, even though its
2568 * sort-of being used in boolean context.
2569 *
2570 * So we only consider such logical ops to provide boolean context to
2571 * their LH argument if they themselves are in void or boolean context.
2572 * However, sometimes the context isn't known until run-time. In this
2573 * case the op is marked with the maybe_flag flag it.
2574 *
2575 * Consider the following.
2576 *
2577 * sub f { ....; if (%h) { .... } }
2578 *
2579 * This is actually compiled as
2580 *
2581 * sub f { ....; %h && do { .... } }
2582 *
2583 * Here we won't know until runtime whether the final statement (and hence
2584 * the &&) is in void context and so is safe to return a boolean value.
2585 * So mark o with maybe_flag rather than the bool_flag.
2586 * Note that there is cost associated with determining context at runtime
2587 * (e.g. a call to block_gimme()), so it may not be worth setting (at
2588 * compile time) and testing (at runtime) maybe_flag if the scalar verses
2589 * boolean costs savings are marginal.
2590 *
2591 * However, we can do slightly better with && (compared to || and //):
2592 * this op only returns its LH argument when that argument is false. In
2593 * this case, as long as the op promises to return a false value which is
2594 * valid in both boolean and scalar contexts, we can mark an op consumed
2595 * by && with bool_flag rather than maybe_flag.
2596 * For example as long as pp_padhv and pp_rv2hv return &PL_sv_zero rather
2597 * than &PL_sv_no for a false result in boolean context, then it's safe. An
2598 * op which promises to handle this case is indicated by setting safe_and
2599 * to true.
2600 */
2601
2602 static void
S_check_for_bool_cxt(OP * o,bool safe_and,U8 bool_flag,U8 maybe_flag)2603 S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag)
2604 {
2605 OP *lop;
2606 U8 flag = 0;
2607
2608 assert((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR);
2609
2610 /* OPpTARGET_MY and boolean context probably don't mix well.
2611 * If someone finds a valid use case, maybe add an extra flag to this
2612 * function which indicates its safe to do so for this op? */
2613 assert(!( (PL_opargs[o->op_type] & OA_TARGLEX)
2614 && (o->op_private & OPpTARGET_MY)));
2615
2616 lop = o->op_next;
2617
2618 while (lop) {
2619 switch (lop->op_type) {
2620 case OP_NULL:
2621 case OP_SCALAR:
2622 break;
2623
2624 /* these two consume the stack argument in the scalar case,
2625 * and treat it as a boolean in the non linenumber case */
2626 case OP_FLIP:
2627 case OP_FLOP:
2628 if ( ((lop->op_flags & OPf_WANT) == OPf_WANT_LIST)
2629 || (lop->op_private & OPpFLIP_LINENUM))
2630 {
2631 lop = NULL;
2632 break;
2633 }
2634 /* FALLTHROUGH */
2635 /* these never leave the original value on the stack */
2636 case OP_NOT:
2637 case OP_XOR:
2638 case OP_COND_EXPR:
2639 case OP_GREPWHILE:
2640 flag = bool_flag;
2641 lop = NULL;
2642 break;
2643
2644 /* OR DOR and AND evaluate their arg as a boolean, but then may
2645 * leave the original scalar value on the stack when following the
2646 * op_next route. If not in void context, we need to ensure
2647 * that whatever follows consumes the arg only in boolean context
2648 * too.
2649 */
2650 case OP_AND:
2651 if (safe_and) {
2652 flag = bool_flag;
2653 lop = NULL;
2654 break;
2655 }
2656 /* FALLTHROUGH */
2657 case OP_OR:
2658 case OP_DOR:
2659 if ((lop->op_flags & OPf_WANT) == OPf_WANT_VOID) {
2660 flag = bool_flag;
2661 lop = NULL;
2662 }
2663 else if (!(lop->op_flags & OPf_WANT)) {
2664 /* unknown context - decide at runtime */
2665 flag = maybe_flag;
2666 lop = NULL;
2667 }
2668 break;
2669
2670 default:
2671 lop = NULL;
2672 break;
2673 }
2674
2675 if (lop)
2676 lop = lop->op_next;
2677 }
2678
2679 o->op_private |= flag;
2680 }
2681
2682 /* mechanism for deferring recursion in rpeep() */
2683
2684 #define MAX_DEFERRED 4
2685
2686 #define DEFER(o) \
2687 STMT_START { \
2688 if (defer_ix == (MAX_DEFERRED-1)) { \
2689 OP **defer = defer_queue[defer_base]; \
2690 CALL_RPEEP(*defer); \
2691 op_prune_chain_head(defer); \
2692 defer_base = (defer_base + 1) % MAX_DEFERRED; \
2693 defer_ix--; \
2694 } \
2695 defer_queue[(defer_base + ++defer_ix) % MAX_DEFERRED] = &(o); \
2696 } STMT_END
2697
2698 #define IS_AND_OP(o) (o->op_type == OP_AND)
2699 #define IS_OR_OP(o) (o->op_type == OP_OR)
2700
2701 /* A peephole optimizer. We visit the ops in the order they're to execute.
2702 * See the comments at the top of this file for more details about when
2703 * peep() is called */
2704
2705 void
Perl_rpeep(pTHX_ OP * o)2706 Perl_rpeep(pTHX_ OP *o)
2707 {
2708 OP* oldop = NULL;
2709 OP* oldoldop = NULL;
2710 OP** defer_queue[MAX_DEFERRED] = { NULL }; /* small queue of deferred branches */
2711 int defer_base = 0;
2712 int defer_ix = -1;
2713
2714 if (!o || o->op_opt)
2715 return;
2716
2717 assert(o->op_type != OP_FREED);
2718
2719 ENTER;
2720 SAVEOP();
2721 SAVEVPTR(PL_curcop);
2722 for (;; o = o->op_next) {
2723 if (o && o->op_opt)
2724 o = NULL;
2725 if (!o) {
2726 while (defer_ix >= 0) {
2727 OP **defer =
2728 defer_queue[(defer_base + defer_ix--) % MAX_DEFERRED];
2729 CALL_RPEEP(*defer);
2730 op_prune_chain_head(defer);
2731 }
2732 break;
2733 }
2734
2735 redo:
2736
2737 /* oldoldop -> oldop -> o should be a chain of 3 adjacent ops */
2738 assert(!oldoldop || oldoldop->op_next == oldop);
2739 assert(!oldop || oldop->op_next == o);
2740
2741 /* By default, this op has now been optimised. A couple of cases below
2742 clear this again. */
2743 o->op_opt = 1;
2744 PL_op = o;
2745
2746 /* look for a series of 1 or more aggregate derefs, e.g.
2747 * $a[1]{foo}[$i]{$k}
2748 * and replace with a single OP_MULTIDEREF op.
2749 * Each index must be either a const, or a simple variable,
2750 *
2751 * First, look for likely combinations of starting ops,
2752 * corresponding to (global and lexical variants of)
2753 * $a[...] $h{...}
2754 * $r->[...] $r->{...}
2755 * (preceding expression)->[...]
2756 * (preceding expression)->{...}
2757 * and if so, call maybe_multideref() to do a full inspection
2758 * of the op chain and if appropriate, replace with an
2759 * OP_MULTIDEREF
2760 */
2761 {
2762 UV action;
2763 OP *o2 = o;
2764 U8 hints = 0;
2765
2766 switch (o2->op_type) {
2767 case OP_GV:
2768 /* $pkg[..] : gv[*pkg]
2769 * $pkg->[...]: gv[*pkg]; rv2sv sKM/DREFAV */
2770
2771 /* Fail if there are new op flag combinations that we're
2772 * not aware of, rather than:
2773 * * silently failing to optimise, or
2774 * * silently optimising the flag away.
2775 * If this ASSUME starts failing, examine what new flag
2776 * has been added to the op, and decide whether the
2777 * optimisation should still occur with that flag, then
2778 * update the code accordingly. This applies to all the
2779 * other ASSUMEs in the block of code too.
2780 */
2781 ASSUME(!(o2->op_flags &
2782 ~(OPf_WANT|OPf_MOD|OPf_PARENS|OPf_SPECIAL)));
2783 ASSUME(!(o2->op_private & ~OPpEARLY_CV));
2784
2785 o2 = o2->op_next;
2786
2787 if (o2->op_type == OP_RV2AV) {
2788 action = MDEREF_AV_gvav_aelem;
2789 goto do_deref;
2790 }
2791
2792 if (o2->op_type == OP_RV2HV) {
2793 action = MDEREF_HV_gvhv_helem;
2794 goto do_deref;
2795 }
2796
2797 if (o2->op_type != OP_RV2SV)
2798 break;
2799
2800 /* at this point we've seen gv,rv2sv, so the only valid
2801 * construct left is $pkg->[] or $pkg->{} */
2802
2803 ASSUME(!(o2->op_flags & OPf_STACKED));
2804 if ((o2->op_flags & (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2805 != (OPf_WANT_SCALAR|OPf_MOD))
2806 break;
2807
2808 ASSUME(!(o2->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS
2809 |OPpOUR_INTRO|OPpDEREF|OPpLVAL_INTRO)));
2810 if (o2->op_private & (OPpOUR_INTRO|OPpLVAL_INTRO))
2811 break;
2812 if ( (o2->op_private & OPpDEREF) != OPpDEREF_AV
2813 && (o2->op_private & OPpDEREF) != OPpDEREF_HV)
2814 break;
2815
2816 o2 = o2->op_next;
2817 if (o2->op_type == OP_RV2AV) {
2818 action = MDEREF_AV_gvsv_vivify_rv2av_aelem;
2819 goto do_deref;
2820 }
2821 if (o2->op_type == OP_RV2HV) {
2822 action = MDEREF_HV_gvsv_vivify_rv2hv_helem;
2823 goto do_deref;
2824 }
2825 break;
2826
2827 case OP_PADSV:
2828 /* $lex->[...]: padsv[$lex] sM/DREFAV */
2829
2830 ASSUME(!(o2->op_flags &
2831 ~(OPf_WANT|OPf_PARENS|OPf_REF|OPf_MOD|OPf_SPECIAL)));
2832 if ((o2->op_flags &
2833 (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2834 != (OPf_WANT_SCALAR|OPf_MOD))
2835 break;
2836
2837 ASSUME(!(o2->op_private &
2838 ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2839 /* skip if state or intro, or not a deref */
2840 if ( o2->op_private != OPpDEREF_AV
2841 && o2->op_private != OPpDEREF_HV)
2842 break;
2843
2844 o2 = o2->op_next;
2845 if (o2->op_type == OP_RV2AV) {
2846 action = MDEREF_AV_padsv_vivify_rv2av_aelem;
2847 goto do_deref;
2848 }
2849 if (o2->op_type == OP_RV2HV) {
2850 action = MDEREF_HV_padsv_vivify_rv2hv_helem;
2851 goto do_deref;
2852 }
2853 break;
2854
2855 case OP_PADAV:
2856 case OP_PADHV:
2857 /* $lex[..]: padav[@lex:1,2] sR *
2858 * or $lex{..}: padhv[%lex:1,2] sR */
2859 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_MOD|OPf_PARENS|
2860 OPf_REF|OPf_SPECIAL)));
2861 if ((o2->op_flags &
2862 (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2863 != (OPf_WANT_SCALAR|OPf_REF))
2864 break;
2865 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_REF))
2866 break;
2867 /* OPf_PARENS isn't currently used in this case;
2868 * if that changes, let us know! */
2869 ASSUME(!(o2->op_flags & OPf_PARENS));
2870
2871 /* at this point, we wouldn't expect any of the remaining
2872 * possible private flags:
2873 * OPpPAD_STATE, OPpLVAL_INTRO, OPpTRUEBOOL,
2874 * OPpMAYBE_TRUEBOOL, OPpMAYBE_LVSUB
2875 *
2876 * OPpSLICEWARNING shouldn't affect runtime
2877 */
2878 ASSUME(!(o2->op_private & ~(OPpSLICEWARNING)));
2879
2880 action = o2->op_type == OP_PADAV
2881 ? MDEREF_AV_padav_aelem
2882 : MDEREF_HV_padhv_helem;
2883 o2 = o2->op_next;
2884 S_maybe_multideref(aTHX_ o, o2, action, 0);
2885 break;
2886
2887
2888 case OP_RV2AV:
2889 case OP_RV2HV:
2890 action = o2->op_type == OP_RV2AV
2891 ? MDEREF_AV_pop_rv2av_aelem
2892 : MDEREF_HV_pop_rv2hv_helem;
2893 /* FALLTHROUGH */
2894 do_deref:
2895 /* (expr)->[...]: rv2av sKR/1;
2896 * (expr)->{...}: rv2hv sKR/1; */
2897
2898 ASSUME(o2->op_type == OP_RV2AV || o2->op_type == OP_RV2HV);
2899
2900 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2901 |OPf_REF|OPf_MOD|OPf_STACKED|OPf_SPECIAL)));
2902 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2903 break;
2904
2905 /* at this point, we wouldn't expect any of these
2906 * possible private flags:
2907 * OPpMAYBE_LVSUB, OPpLVAL_INTRO
2908 * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL, (rv2hv only)
2909 */
2910 ASSUME(!(o2->op_private &
2911 ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING
2912 |OPpOUR_INTRO)));
2913 hints |= (o2->op_private & OPpHINT_STRICT_REFS);
2914
2915 o2 = o2->op_next;
2916
2917 S_maybe_multideref(aTHX_ o, o2, action, hints);
2918 break;
2919
2920 default:
2921 break;
2922 }
2923 }
2924
2925
2926 switch (o->op_type) {
2927 case OP_DBSTATE:
2928 PL_curcop = ((COP*)o); /* for warnings */
2929 break;
2930 case OP_NEXTSTATE:
2931 PL_curcop = ((COP*)o); /* for warnings */
2932
2933 /* Optimise a "return ..." at the end of a sub to just be "...".
2934 * This saves 2 ops. Before:
2935 * 1 <;> nextstate(main 1 -e:1) v ->2
2936 * 4 <@> return K ->5
2937 * 2 <0> pushmark s ->3
2938 * - <1> ex-rv2sv sK/1 ->4
2939 * 3 <#> gvsv[*cat] s ->4
2940 *
2941 * After:
2942 * - <@> return K ->-
2943 * - <0> pushmark s ->2
2944 * - <1> ex-rv2sv sK/1 ->-
2945 * 2 <$> gvsv(*cat) s ->3
2946 */
2947 {
2948 OP *next = o->op_next;
2949 OP *sibling = OpSIBLING(o);
2950 if ( OP_TYPE_IS(next, OP_PUSHMARK)
2951 && OP_TYPE_IS(sibling, OP_RETURN)
2952 && OP_TYPE_IS(sibling->op_next, OP_LINESEQ)
2953 && ( OP_TYPE_IS(sibling->op_next->op_next, OP_LEAVESUB)
2954 ||OP_TYPE_IS(sibling->op_next->op_next,
2955 OP_LEAVESUBLV))
2956 && cUNOPx(sibling)->op_first == next
2957 && OpHAS_SIBLING(next) && OpSIBLING(next)->op_next
2958 && next->op_next
2959 ) {
2960 /* Look through the PUSHMARK's siblings for one that
2961 * points to the RETURN */
2962 OP *top = OpSIBLING(next);
2963 while (top && top->op_next) {
2964 if (top->op_next == sibling) {
2965 top->op_next = sibling->op_next;
2966 o->op_next = next->op_next;
2967 break;
2968 }
2969 top = OpSIBLING(top);
2970 }
2971 }
2972 }
2973
2974 /* Optimise 'my $x; my $y;' into 'my ($x, $y);'
2975 *
2976 * This latter form is then suitable for conversion into padrange
2977 * later on. Convert:
2978 *
2979 * nextstate1 -> padop1 -> nextstate2 -> padop2 -> nextstate3
2980 *
2981 * into:
2982 *
2983 * nextstate1 -> listop -> nextstate3
2984 * / \
2985 * pushmark -> padop1 -> padop2
2986 */
2987 if (o->op_next && (
2988 o->op_next->op_type == OP_PADSV
2989 || o->op_next->op_type == OP_PADAV
2990 || o->op_next->op_type == OP_PADHV
2991 )
2992 && !(o->op_next->op_private & ~OPpLVAL_INTRO)
2993 && o->op_next->op_next && o->op_next->op_next->op_type == OP_NEXTSTATE
2994 && o->op_next->op_next->op_next && (
2995 o->op_next->op_next->op_next->op_type == OP_PADSV
2996 || o->op_next->op_next->op_next->op_type == OP_PADAV
2997 || o->op_next->op_next->op_next->op_type == OP_PADHV
2998 )
2999 && !(o->op_next->op_next->op_next->op_private & ~OPpLVAL_INTRO)
3000 && o->op_next->op_next->op_next->op_next && o->op_next->op_next->op_next->op_next->op_type == OP_NEXTSTATE
3001 && (!CopLABEL((COP*)o)) /* Don't mess with labels */
3002 && (!CopLABEL((COP*)o->op_next->op_next)) /* ... */
3003 ) {
3004 OP *pad1, *ns2, *pad2, *ns3, *newop, *newpm;
3005
3006 pad1 = o->op_next;
3007 ns2 = pad1->op_next;
3008 pad2 = ns2->op_next;
3009 ns3 = pad2->op_next;
3010
3011 /* we assume here that the op_next chain is the same as
3012 * the op_sibling chain */
3013 assert(OpSIBLING(o) == pad1);
3014 assert(OpSIBLING(pad1) == ns2);
3015 assert(OpSIBLING(ns2) == pad2);
3016 assert(OpSIBLING(pad2) == ns3);
3017
3018 /* excise and delete ns2 */
3019 op_sibling_splice(NULL, pad1, 1, NULL);
3020 op_free(ns2);
3021
3022 /* excise pad1 and pad2 */
3023 op_sibling_splice(NULL, o, 2, NULL);
3024
3025 /* create new listop, with children consisting of:
3026 * a new pushmark, pad1, pad2. */
3027 newop = newLISTOP(OP_LIST, 0, pad1, pad2);
3028 newop->op_flags |= OPf_PARENS;
3029 newop->op_flags = (newop->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3030
3031 /* insert newop between o and ns3 */
3032 op_sibling_splice(NULL, o, 0, newop);
3033
3034 /*fixup op_next chain */
3035 newpm = cUNOPx(newop)->op_first; /* pushmark */
3036 o ->op_next = newpm;
3037 newpm->op_next = pad1;
3038 pad1 ->op_next = pad2;
3039 pad2 ->op_next = newop; /* listop */
3040 newop->op_next = ns3;
3041
3042 /* Ensure pushmark has this flag if padops do */
3043 if (pad1->op_flags & OPf_MOD && pad2->op_flags & OPf_MOD) {
3044 newpm->op_flags |= OPf_MOD;
3045 }
3046
3047 break;
3048 }
3049
3050 /* Two NEXTSTATEs in a row serve no purpose. Except if they happen
3051 to carry two labels. For now, take the easier option, and skip
3052 this optimisation if the first NEXTSTATE has a label.
3053 Yves asked what about if they have different hints or features?
3054 Tony thinks that as we remove the first of the pair it should
3055 be fine.
3056 */
3057 if (!CopLABEL((COP*)o) && !PERLDB_NOOPT) {
3058 OP *nextop = o->op_next;
3059 while (nextop) {
3060 switch (nextop->op_type) {
3061 case OP_NULL:
3062 case OP_SCALAR:
3063 case OP_LINESEQ:
3064 case OP_SCOPE:
3065 nextop = nextop->op_next;
3066 continue;
3067 }
3068 break;
3069 }
3070
3071 if (nextop && (nextop->op_type == OP_NEXTSTATE)) {
3072 op_null(o);
3073 if (oldop)
3074 oldop->op_next = nextop;
3075 o = nextop;
3076 /* Skip (old)oldop assignment since the current oldop's
3077 op_next already points to the next op. */
3078 goto redo;
3079 }
3080 }
3081 break;
3082
3083 case OP_CONCAT:
3084 if (o->op_next && o->op_next->op_type == OP_STRINGIFY) {
3085 if (o->op_next->op_private & OPpTARGET_MY) {
3086 if (o->op_flags & OPf_STACKED) /* chained concats */
3087 break; /* ignore_optimization */
3088 else {
3089 /* assert(PL_opargs[o->op_type] & OA_TARGLEX); */
3090 o->op_targ = o->op_next->op_targ;
3091 o->op_next->op_targ = 0;
3092 o->op_private |= OPpTARGET_MY;
3093 }
3094 }
3095 op_null(o->op_next);
3096 }
3097 break;
3098 case OP_STUB:
3099 if ((o->op_flags & OPf_WANT) != OPf_WANT_LIST) {
3100 break; /* Scalar stub must produce undef. List stub is noop */
3101 }
3102 goto nothin;
3103 case OP_NULL:
3104 if (o->op_targ == OP_NEXTSTATE
3105 || o->op_targ == OP_DBSTATE)
3106 {
3107 PL_curcop = ((COP*)o);
3108 }
3109 /* XXX: We avoid setting op_seq here to prevent later calls
3110 to rpeep() from mistakenly concluding that optimisation
3111 has already occurred. This doesn't fix the real problem,
3112 though (See 20010220.007 (#5874)). AMS 20010719 */
3113 /* op_seq functionality is now replaced by op_opt */
3114 o->op_opt = 0;
3115 /* FALLTHROUGH */
3116 case OP_SCALAR:
3117 case OP_LINESEQ:
3118 case OP_SCOPE:
3119 nothin:
3120 if (oldop) {
3121 oldop->op_next = o->op_next;
3122 o->op_opt = 0;
3123 continue;
3124 }
3125 break;
3126
3127 case OP_PUSHMARK:
3128
3129 /* Given
3130 5 repeat/DOLIST
3131 3 ex-list
3132 1 pushmark
3133 2 scalar or const
3134 4 const[0]
3135 convert repeat into a stub with no kids.
3136 */
3137 if (o->op_next->op_type == OP_CONST
3138 || ( o->op_next->op_type == OP_PADSV
3139 && !(o->op_next->op_private & OPpLVAL_INTRO))
3140 || ( o->op_next->op_type == OP_GV
3141 && o->op_next->op_next->op_type == OP_RV2SV
3142 && !(o->op_next->op_next->op_private
3143 & (OPpLVAL_INTRO|OPpOUR_INTRO))))
3144 {
3145 const OP *kid = o->op_next->op_next;
3146 if (o->op_next->op_type == OP_GV)
3147 kid = kid->op_next;
3148 /* kid is now the ex-list. */
3149 if (kid->op_type == OP_NULL
3150 && (kid = kid->op_next)->op_type == OP_CONST
3151 /* kid is now the repeat count. */
3152 && kid->op_next->op_type == OP_REPEAT
3153 && kid->op_next->op_private & OPpREPEAT_DOLIST
3154 && (kid->op_next->op_flags & OPf_WANT) == OPf_WANT_LIST
3155 && SvIOK(kSVOP_sv) && SvIVX(kSVOP_sv) == 0
3156 && oldop)
3157 {
3158 o = kid->op_next; /* repeat */
3159 oldop->op_next = o;
3160 op_free(cBINOPo->op_first);
3161 op_free(cBINOPo->op_last );
3162 o->op_flags &=~ OPf_KIDS;
3163 /* stub is a baseop; repeat is a binop */
3164 STATIC_ASSERT_STMT(sizeof(OP) <= sizeof(BINOP));
3165 OpTYPE_set(o, OP_STUB);
3166 o->op_private = 0;
3167 break;
3168 }
3169 }
3170
3171 /* Convert a series of PAD ops for my vars plus support into a
3172 * single padrange op. Basically
3173 *
3174 * pushmark -> pad[ahs]v -> pad[ahs]?v -> ... -> (list) -> rest
3175 *
3176 * becomes, depending on circumstances, one of
3177 *
3178 * padrange ----------------------------------> (list) -> rest
3179 * padrange --------------------------------------------> rest
3180 *
3181 * where all the pad indexes are sequential and of the same type
3182 * (INTRO or not).
3183 * We convert the pushmark into a padrange op, then skip
3184 * any other pad ops, and possibly some trailing ops.
3185 * Note that we don't null() the skipped ops, to make it
3186 * easier for Deparse to undo this optimisation (and none of
3187 * the skipped ops are holding any resources). It also makes
3188 * it easier for find_uninit_var(), as it can just ignore
3189 * padrange, and examine the original pad ops.
3190 */
3191 {
3192 OP *p;
3193 OP *followop = NULL; /* the op that will follow the padrange op */
3194 U8 count = 0;
3195 U8 intro = 0;
3196 PADOFFSET base = 0; /* init only to stop compiler whining */
3197 bool gvoid = 0; /* init only to stop compiler whining */
3198 bool defav = 0; /* seen (...) = @_ */
3199 bool reuse = 0; /* reuse an existing padrange op */
3200
3201 /* look for a pushmark -> gv[_] -> rv2av */
3202
3203 {
3204 OP *rv2av, *q;
3205 p = o->op_next;
3206 if ( p->op_type == OP_GV
3207 && cGVOPx_gv(p) == PL_defgv
3208 && (rv2av = p->op_next)
3209 && rv2av->op_type == OP_RV2AV
3210 && !(rv2av->op_flags & OPf_REF)
3211 && !(rv2av->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
3212 && ((rv2av->op_flags & OPf_WANT) == OPf_WANT_LIST)
3213 ) {
3214 q = rv2av->op_next;
3215 if (q->op_type == OP_NULL)
3216 q = q->op_next;
3217 if (q->op_type == OP_PUSHMARK) {
3218 defav = 1;
3219 p = q;
3220 }
3221 }
3222 }
3223 if (!defav) {
3224 p = o;
3225 }
3226
3227 /* scan for PAD ops */
3228
3229 for (p = p->op_next; p; p = p->op_next) {
3230 if (p->op_type == OP_NULL)
3231 continue;
3232
3233 if (( p->op_type != OP_PADSV
3234 && p->op_type != OP_PADAV
3235 && p->op_type != OP_PADHV
3236 )
3237 /* any private flag other than INTRO? e.g. STATE */
3238 || (p->op_private & ~OPpLVAL_INTRO)
3239 )
3240 break;
3241
3242 /* let $a[N] potentially be optimised into AELEMFAST_LEX
3243 * instead */
3244 if ( p->op_type == OP_PADAV
3245 && p->op_next
3246 && p->op_next->op_type == OP_CONST
3247 && p->op_next->op_next
3248 && p->op_next->op_next->op_type == OP_AELEM
3249 )
3250 break;
3251
3252 /* for 1st padop, note what type it is and the range
3253 * start; for the others, check that it's the same type
3254 * and that the targs are contiguous */
3255 if (count == 0) {
3256 intro = (p->op_private & OPpLVAL_INTRO);
3257 base = p->op_targ;
3258 gvoid = OP_GIMME(p,0) == G_VOID;
3259 }
3260 else {
3261 if ((p->op_private & OPpLVAL_INTRO) != intro)
3262 break;
3263 /* Note that you'd normally expect targs to be
3264 * contiguous in my($a,$b,$c), but that's not the case
3265 * when external modules start doing things, e.g.
3266 * Function::Parameters */
3267 if (p->op_targ != base + count)
3268 break;
3269 assert(p->op_targ == base + count);
3270 /* Either all the padops or none of the padops should
3271 be in void context. Since we only do the optimisa-
3272 tion for av/hv when the aggregate itself is pushed
3273 on to the stack (one item), there is no need to dis-
3274 tinguish list from scalar context. */
3275 if (gvoid != (OP_GIMME(p,0) == G_VOID))
3276 break;
3277 }
3278
3279 /* for AV, HV, only when we're not flattening */
3280 if ( p->op_type != OP_PADSV
3281 && !gvoid
3282 && !(p->op_flags & OPf_REF)
3283 )
3284 break;
3285
3286 if (count >= OPpPADRANGE_COUNTMASK)
3287 break;
3288
3289 /* there's a biggest base we can fit into a
3290 * SAVEt_CLEARPADRANGE in pp_padrange.
3291 * (The sizeof() stuff will be constant-folded, and is
3292 * intended to avoid getting "comparison is always false"
3293 * compiler warnings. See the comments above
3294 * MEM_WRAP_CHECK for more explanation on why we do this
3295 * in a weird way to avoid compiler warnings.)
3296 */
3297 if ( intro
3298 && (8*sizeof(base) >
3299 8*sizeof(UV)-OPpPADRANGE_COUNTSHIFT-SAVE_TIGHT_SHIFT
3300 ? (Size_t)base
3301 : (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3302 ) >
3303 (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3304 )
3305 break;
3306
3307 /* Success! We've got another valid pad op to optimise away */
3308 count++;
3309 followop = p->op_next;
3310 }
3311
3312 if (count < 1 || (count == 1 && !defav))
3313 break;
3314
3315 /* pp_padrange in specifically compile-time void context
3316 * skips pushing a mark and lexicals; in all other contexts
3317 * (including unknown till runtime) it pushes a mark and the
3318 * lexicals. We must be very careful then, that the ops we
3319 * optimise away would have exactly the same effect as the
3320 * padrange.
3321 * In particular in void context, we can only optimise to
3322 * a padrange if we see the complete sequence
3323 * pushmark, pad*v, ...., list
3324 * which has the net effect of leaving the markstack as it
3325 * was. Not pushing onto the stack (whereas padsv does touch
3326 * the stack) makes no difference in void context.
3327 */
3328 assert(followop);
3329 if (gvoid) {
3330 if (followop->op_type == OP_LIST
3331 && OP_GIMME(followop,0) == G_VOID
3332 )
3333 {
3334 followop = followop->op_next; /* skip OP_LIST */
3335
3336 /* consolidate two successive my(...);'s */
3337
3338 if ( oldoldop
3339 && oldoldop->op_type == OP_PADRANGE
3340 && (oldoldop->op_flags & OPf_WANT) == OPf_WANT_VOID
3341 && (oldoldop->op_private & OPpLVAL_INTRO) == intro
3342 && !(oldoldop->op_flags & OPf_SPECIAL)
3343 ) {
3344 U8 old_count;
3345 assert(oldoldop->op_next == oldop);
3346 assert( oldop->op_type == OP_NEXTSTATE
3347 || oldop->op_type == OP_DBSTATE);
3348 assert(oldop->op_next == o);
3349
3350 old_count
3351 = (oldoldop->op_private & OPpPADRANGE_COUNTMASK);
3352
3353 /* Do not assume pad offsets for $c and $d are con-
3354 tiguous in
3355 my ($a,$b,$c);
3356 my ($d,$e,$f);
3357 */
3358 if ( oldoldop->op_targ + old_count == base
3359 && old_count < OPpPADRANGE_COUNTMASK - count) {
3360 base = oldoldop->op_targ;
3361 count += old_count;
3362 reuse = 1;
3363 }
3364 }
3365
3366 /* if there's any immediately following singleton
3367 * my var's; then swallow them and the associated
3368 * nextstates; i.e.
3369 * my ($a,$b); my $c; my $d;
3370 * is treated as
3371 * my ($a,$b,$c,$d);
3372 */
3373
3374 while ( ((p = followop->op_next))
3375 && ( p->op_type == OP_PADSV
3376 || p->op_type == OP_PADAV
3377 || p->op_type == OP_PADHV)
3378 && (p->op_flags & OPf_WANT) == OPf_WANT_VOID
3379 && (p->op_private & OPpLVAL_INTRO) == intro
3380 && !(p->op_private & ~OPpLVAL_INTRO)
3381 && p->op_next
3382 && ( p->op_next->op_type == OP_NEXTSTATE
3383 || p->op_next->op_type == OP_DBSTATE)
3384 && count < OPpPADRANGE_COUNTMASK
3385 && base + count == p->op_targ
3386 ) {
3387 count++;
3388 followop = p->op_next;
3389 }
3390 }
3391 else
3392 break;
3393 }
3394
3395 if (reuse) {
3396 assert(oldoldop->op_type == OP_PADRANGE);
3397 oldoldop->op_next = followop;
3398 oldoldop->op_private = (intro | count);
3399 o = oldoldop;
3400 oldop = NULL;
3401 oldoldop = NULL;
3402 }
3403 else {
3404 /* Convert the pushmark into a padrange.
3405 * To make Deparse easier, we guarantee that a padrange was
3406 * *always* formerly a pushmark */
3407 assert(o->op_type == OP_PUSHMARK);
3408 o->op_next = followop;
3409 OpTYPE_set(o, OP_PADRANGE);
3410 o->op_targ = base;
3411 /* bit 7: INTRO; bit 6..0: count */
3412 o->op_private = (intro | count);
3413 o->op_flags = ((o->op_flags & ~(OPf_WANT|OPf_SPECIAL))
3414 | gvoid * OPf_WANT_VOID
3415 | (defav ? OPf_SPECIAL : 0));
3416 }
3417 break;
3418 }
3419
3420 case OP_RV2AV:
3421 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3422 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3423 break;
3424
3425 case OP_RV2HV:
3426 case OP_PADHV:
3427 /*'keys %h' in void or scalar context: skip the OP_KEYS
3428 * and perform the functionality directly in the RV2HV/PADHV
3429 * op
3430 */
3431 if (o->op_flags & OPf_REF) {
3432 OP *k = o->op_next;
3433 U8 want = (k->op_flags & OPf_WANT);
3434 if ( k
3435 && k->op_type == OP_KEYS
3436 && ( want == OPf_WANT_VOID
3437 || want == OPf_WANT_SCALAR)
3438 && !(k->op_private & OPpMAYBE_LVSUB)
3439 && !(k->op_flags & OPf_MOD)
3440 ) {
3441 o->op_next = k->op_next;
3442 o->op_flags &= ~(OPf_REF|OPf_WANT);
3443 o->op_flags |= want;
3444 o->op_private |= (o->op_type == OP_PADHV ?
3445 OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS);
3446 /* for keys(%lex), hold onto the OP_KEYS's targ
3447 * since padhv doesn't have its own targ to return
3448 * an int with */
3449 if (!(o->op_type ==OP_PADHV && want == OPf_WANT_SCALAR))
3450 op_null(k);
3451 }
3452 }
3453
3454 /* see if %h is used in boolean context */
3455 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3456 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
3457
3458
3459 if (o->op_type != OP_PADHV)
3460 break;
3461 /* FALLTHROUGH */
3462 case OP_PADAV:
3463 if ( o->op_type == OP_PADAV
3464 && (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
3465 )
3466 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3467 /* FALLTHROUGH */
3468 case OP_PADSV:
3469 /* Skip over state($x) in void context. */
3470 if (oldop && o->op_private == (OPpPAD_STATE|OPpLVAL_INTRO)
3471 && (o->op_flags & OPf_WANT) == OPf_WANT_VOID)
3472 {
3473 oldop->op_next = o->op_next;
3474 goto redo_nextstate;
3475 }
3476 if (o->op_type != OP_PADAV)
3477 break;
3478 /* FALLTHROUGH */
3479 case OP_GV:
3480 if (o->op_type == OP_PADAV || o->op_next->op_type == OP_RV2AV) {
3481 OP* const pop = (o->op_type == OP_PADAV) ?
3482 o->op_next : o->op_next->op_next;
3483 IV i;
3484 if (pop && pop->op_type == OP_CONST &&
3485 ((PL_op = pop->op_next)) &&
3486 pop->op_next->op_type == OP_AELEM &&
3487 !(pop->op_next->op_private &
3488 (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB)) &&
3489 (i = SvIV(cSVOPx(pop)->op_sv)) >= -128 && i <= 127)
3490 {
3491 GV *gv;
3492 if (cSVOPx(pop)->op_private & OPpCONST_STRICT)
3493 no_bareword_allowed(pop);
3494 if (o->op_type == OP_GV)
3495 op_null(o->op_next);
3496 op_null(pop->op_next);
3497 op_null(pop);
3498 o->op_flags |= pop->op_next->op_flags & OPf_MOD;
3499 o->op_next = pop->op_next->op_next;
3500 o->op_ppaddr = PL_ppaddr[OP_AELEMFAST];
3501 o->op_private = (U8)i;
3502 if (o->op_type == OP_GV) {
3503 gv = cGVOPo_gv;
3504 GvAVn(gv);
3505 o->op_type = OP_AELEMFAST;
3506 }
3507 else
3508 o->op_type = OP_AELEMFAST_LEX;
3509 }
3510 if (o->op_type != OP_GV)
3511 break;
3512 }
3513
3514 /* Remove $foo from the op_next chain in void context. */
3515 if (oldop
3516 && ( o->op_next->op_type == OP_RV2SV
3517 || o->op_next->op_type == OP_RV2AV
3518 || o->op_next->op_type == OP_RV2HV )
3519 && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3520 && !(o->op_next->op_private & OPpLVAL_INTRO))
3521 {
3522 oldop->op_next = o->op_next->op_next;
3523 /* Reprocess the previous op if it is a nextstate, to
3524 allow double-nextstate optimisation. */
3525 redo_nextstate:
3526 if (oldop->op_type == OP_NEXTSTATE) {
3527 oldop->op_opt = 0;
3528 o = oldop;
3529 oldop = oldoldop;
3530 oldoldop = NULL;
3531 goto redo;
3532 }
3533 o = oldop->op_next;
3534 goto redo;
3535 }
3536 else if (o->op_next->op_type == OP_RV2SV) {
3537 if (!(o->op_next->op_private & OPpDEREF)) {
3538 op_null(o->op_next);
3539 o->op_private |= o->op_next->op_private & (OPpLVAL_INTRO
3540 | OPpOUR_INTRO);
3541 o->op_next = o->op_next->op_next;
3542 OpTYPE_set(o, OP_GVSV);
3543 }
3544 }
3545 else if (o->op_next->op_type == OP_READLINE
3546 && o->op_next->op_next->op_type == OP_CONCAT
3547 && (o->op_next->op_next->op_flags & OPf_STACKED))
3548 {
3549 /* Turn "$a .= <FH>" into an OP_RCATLINE. AMS 20010917 */
3550 OpTYPE_set(o, OP_RCATLINE);
3551 o->op_flags |= OPf_STACKED;
3552 op_null(o->op_next->op_next);
3553 op_null(o->op_next);
3554 }
3555
3556 break;
3557
3558 case OP_NOT:
3559 break;
3560
3561 case OP_AND:
3562 case OP_OR:
3563 case OP_DOR:
3564 case OP_CMPCHAIN_AND:
3565 case OP_PUSHDEFER:
3566 while (cLOGOP->op_other->op_type == OP_NULL)
3567 cLOGOP->op_other = cLOGOP->op_other->op_next;
3568 while (o->op_next && ( o->op_type == o->op_next->op_type
3569 || o->op_next->op_type == OP_NULL))
3570 o->op_next = o->op_next->op_next;
3571
3572 /* If we're an OR and our next is an AND in void context, we'll
3573 follow its op_other on short circuit, same for reverse.
3574 We can't do this with OP_DOR since if it's true, its return
3575 value is the underlying value which must be evaluated
3576 by the next op. */
3577 if (o->op_next &&
3578 (
3579 (IS_AND_OP(o) && IS_OR_OP(o->op_next))
3580 || (IS_OR_OP(o) && IS_AND_OP(o->op_next))
3581 )
3582 && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3583 ) {
3584 o->op_next = cLOGOPx(o->op_next)->op_other;
3585 }
3586 DEFER(cLOGOP->op_other);
3587 o->op_opt = 1;
3588 break;
3589
3590 case OP_GREPWHILE:
3591 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3592 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3593 /* FALLTHROUGH */
3594 case OP_COND_EXPR:
3595 case OP_MAPWHILE:
3596 case OP_ANDASSIGN:
3597 case OP_ORASSIGN:
3598 case OP_DORASSIGN:
3599 case OP_RANGE:
3600 case OP_ONCE:
3601 case OP_ARGDEFELEM:
3602 while (cLOGOP->op_other->op_type == OP_NULL)
3603 cLOGOP->op_other = cLOGOP->op_other->op_next;
3604 DEFER(cLOGOP->op_other);
3605 break;
3606
3607 case OP_ENTERLOOP:
3608 case OP_ENTERITER:
3609 while (cLOOP->op_redoop->op_type == OP_NULL)
3610 cLOOP->op_redoop = cLOOP->op_redoop->op_next;
3611 while (cLOOP->op_nextop->op_type == OP_NULL)
3612 cLOOP->op_nextop = cLOOP->op_nextop->op_next;
3613 while (cLOOP->op_lastop->op_type == OP_NULL)
3614 cLOOP->op_lastop = cLOOP->op_lastop->op_next;
3615 /* a while(1) loop doesn't have an op_next that escapes the
3616 * loop, so we have to explicitly follow the op_lastop to
3617 * process the rest of the code */
3618 DEFER(cLOOP->op_lastop);
3619 break;
3620
3621 case OP_ENTERTRY:
3622 assert(cLOGOPo->op_other->op_type == OP_LEAVETRY);
3623 DEFER(cLOGOPo->op_other);
3624 break;
3625
3626 case OP_ENTERTRYCATCH:
3627 assert(cLOGOPo->op_other->op_type == OP_CATCH);
3628 /* catch body is the ->op_other of the OP_CATCH */
3629 DEFER(cLOGOPx(cLOGOPo->op_other)->op_other);
3630 break;
3631
3632 case OP_SUBST:
3633 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3634 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3635 assert(!(cPMOP->op_pmflags & PMf_ONCE));
3636 while (cPMOP->op_pmstashstartu.op_pmreplstart &&
3637 cPMOP->op_pmstashstartu.op_pmreplstart->op_type == OP_NULL)
3638 cPMOP->op_pmstashstartu.op_pmreplstart
3639 = cPMOP->op_pmstashstartu.op_pmreplstart->op_next;
3640 DEFER(cPMOP->op_pmstashstartu.op_pmreplstart);
3641 break;
3642
3643 case OP_SORT: {
3644 OP *oright;
3645
3646 if (o->op_flags & OPf_SPECIAL) {
3647 /* first arg is a code block */
3648 OP * const nullop = OpSIBLING(cLISTOP->op_first);
3649 OP * kid = cUNOPx(nullop)->op_first;
3650
3651 assert(nullop->op_type == OP_NULL);
3652 assert(kid->op_type == OP_SCOPE
3653 || (kid->op_type == OP_NULL && kid->op_targ == OP_LEAVE));
3654 /* since OP_SORT doesn't have a handy op_other-style
3655 * field that can point directly to the start of the code
3656 * block, store it in the otherwise-unused op_next field
3657 * of the top-level OP_NULL. This will be quicker at
3658 * run-time, and it will also allow us to remove leading
3659 * OP_NULLs by just messing with op_nexts without
3660 * altering the basic op_first/op_sibling layout. */
3661 kid = kLISTOP->op_first;
3662 assert(
3663 (kid->op_type == OP_NULL
3664 && ( kid->op_targ == OP_NEXTSTATE
3665 || kid->op_targ == OP_DBSTATE ))
3666 || kid->op_type == OP_STUB
3667 || kid->op_type == OP_ENTER
3668 || (PL_parser && PL_parser->error_count));
3669 nullop->op_next = kid->op_next;
3670 DEFER(nullop->op_next);
3671 }
3672
3673 /* check that RHS of sort is a single plain array */
3674 oright = cUNOPo->op_first;
3675 if (!oright || oright->op_type != OP_PUSHMARK)
3676 break;
3677
3678 if (o->op_private & OPpSORT_INPLACE)
3679 break;
3680
3681 /* reverse sort ... can be optimised. */
3682 if (!OpHAS_SIBLING(cUNOPo)) {
3683 /* Nothing follows us on the list. */
3684 OP * const reverse = o->op_next;
3685
3686 if (reverse->op_type == OP_REVERSE &&
3687 (reverse->op_flags & OPf_WANT) == OPf_WANT_LIST) {
3688 OP * const pushmark = cUNOPx(reverse)->op_first;
3689 if (pushmark && (pushmark->op_type == OP_PUSHMARK)
3690 && (OpSIBLING(cUNOPx(pushmark)) == o)) {
3691 /* reverse -> pushmark -> sort */
3692 o->op_private |= OPpSORT_REVERSE;
3693 op_null(reverse);
3694 pushmark->op_next = oright->op_next;
3695 op_null(oright);
3696 }
3697 }
3698 }
3699
3700 break;
3701 }
3702
3703 case OP_REVERSE: {
3704 OP *ourmark, *theirmark, *ourlast, *iter, *expushmark, *rv2av;
3705 OP *gvop = NULL;
3706 LISTOP *enter, *exlist;
3707
3708 if (o->op_private & OPpSORT_INPLACE)
3709 break;
3710
3711 enter = cLISTOPx(o->op_next);
3712 if (!enter)
3713 break;
3714 if (enter->op_type == OP_NULL) {
3715 enter = cLISTOPx(enter->op_next);
3716 if (!enter)
3717 break;
3718 }
3719 /* for $a (...) will have OP_GV then OP_RV2GV here.
3720 for (...) just has an OP_GV. */
3721 if (enter->op_type == OP_GV) {
3722 gvop = (OP *) enter;
3723 enter = cLISTOPx(enter->op_next);
3724 if (!enter)
3725 break;
3726 if (enter->op_type == OP_RV2GV) {
3727 enter = cLISTOPx(enter->op_next);
3728 if (!enter)
3729 break;
3730 }
3731 }
3732
3733 if (enter->op_type != OP_ENTERITER)
3734 break;
3735
3736 iter = enter->op_next;
3737 if (!iter || iter->op_type != OP_ITER)
3738 break;
3739
3740 expushmark = enter->op_first;
3741 if (!expushmark || expushmark->op_type != OP_NULL
3742 || expushmark->op_targ != OP_PUSHMARK)
3743 break;
3744
3745 exlist = cLISTOPx(OpSIBLING(expushmark));
3746 if (!exlist || exlist->op_type != OP_NULL
3747 || exlist->op_targ != OP_LIST)
3748 break;
3749
3750 if (exlist->op_last != o) {
3751 /* Mmm. Was expecting to point back to this op. */
3752 break;
3753 }
3754 theirmark = exlist->op_first;
3755 if (!theirmark || theirmark->op_type != OP_PUSHMARK)
3756 break;
3757
3758 if (OpSIBLING(theirmark) != o) {
3759 /* There's something between the mark and the reverse, eg
3760 for (1, reverse (...))
3761 so no go. */
3762 break;
3763 }
3764
3765 ourmark = cLISTOPo->op_first;
3766 if (!ourmark || ourmark->op_type != OP_PUSHMARK)
3767 break;
3768
3769 ourlast = cLISTOPo->op_last;
3770 if (!ourlast || ourlast->op_next != o)
3771 break;
3772
3773 rv2av = OpSIBLING(ourmark);
3774 if (rv2av && rv2av->op_type == OP_RV2AV && !OpHAS_SIBLING(rv2av)
3775 && rv2av->op_flags == (OPf_WANT_LIST | OPf_KIDS)) {
3776 /* We're just reversing a single array. */
3777 rv2av->op_flags = OPf_WANT_SCALAR | OPf_KIDS | OPf_REF;
3778 enter->op_flags |= OPf_STACKED;
3779 }
3780
3781 /* We don't have control over who points to theirmark, so sacrifice
3782 ours. */
3783 theirmark->op_next = ourmark->op_next;
3784 theirmark->op_flags = ourmark->op_flags;
3785 ourlast->op_next = gvop ? gvop : (OP *) enter;
3786 op_null(ourmark);
3787 op_null(o);
3788 enter->op_private |= OPpITER_REVERSED;
3789 iter->op_private |= OPpITER_REVERSED;
3790
3791 oldoldop = NULL;
3792 oldop = ourlast;
3793 o = oldop->op_next;
3794 goto redo;
3795 NOT_REACHED; /* NOTREACHED */
3796 break;
3797 }
3798
3799 case OP_UNDEF:
3800 if ((o->op_flags & OPf_KIDS) &&
3801 (cUNOPx(o)->op_first->op_type == OP_PADSV)) {
3802
3803 /* Convert:
3804 * undef
3805 * padsv[$x]
3806 * to:
3807 * undef[$x]
3808 */
3809
3810 OP * padsv = cUNOPx(o)->op_first;
3811 o->op_private = OPpTARGET_MY |
3812 (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3813 o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3814 op_null(padsv);
3815 /* Optimizer does NOT seem to fix up the padsv op_next ptr */
3816 if (oldoldop)
3817 oldoldop->op_next = o;
3818 oldop = oldoldop;
3819 oldoldop = NULL;
3820
3821 } else if (o->op_next->op_type == OP_PADSV) {
3822 OP * padsv = o->op_next;
3823 OP * sassign = (padsv->op_next &&
3824 padsv->op_next->op_type == OP_SASSIGN) ?
3825 padsv->op_next : NULL;
3826 if (sassign && cBINOPx(sassign)->op_first == o) {
3827 /* Convert:
3828 * sassign
3829 * undef
3830 * padsv[$x]
3831 * to:
3832 * undef[$x]
3833 * NOTE: undef does not have the "T" flag set in
3834 * regen/opcodes, as this would cause
3835 * S_maybe_targlex to do the optimization.
3836 * Seems easier to keep it all here, rather
3837 * than have an undef-specific branch in
3838 * S_maybe_targlex just to add the
3839 * OPpUNDEF_KEEP_PV flag.
3840 */
3841 o->op_private = OPpTARGET_MY | OPpUNDEF_KEEP_PV |
3842 (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3843 o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3844 op_null(padsv);
3845 op_null(sassign);
3846 /* Optimizer DOES seems to fix up the op_next ptrs */
3847 }
3848 }
3849 break;
3850
3851 case OP_QR:
3852 case OP_MATCH:
3853 if (!(cPMOP->op_pmflags & PMf_ONCE)) {
3854 assert (!cPMOP->op_pmstashstartu.op_pmreplstart);
3855 }
3856 break;
3857
3858 case OP_RUNCV:
3859 if (!(o->op_private & OPpOFFBYONE) && !CvCLONE(PL_compcv)
3860 && (!CvANON(PL_compcv) || (!PL_cv_has_eval && !PL_perldb)))
3861 {
3862 SV *sv;
3863 if (CvEVAL(PL_compcv)) sv = &PL_sv_undef;
3864 else {
3865 sv = newRV((SV *)PL_compcv);
3866 sv_rvweaken(sv);
3867 SvREADONLY_on(sv);
3868 }
3869 OpTYPE_set(o, OP_CONST);
3870 o->op_flags |= OPf_SPECIAL;
3871 cSVOPo->op_sv = sv;
3872 }
3873 break;
3874
3875 case OP_SASSIGN: {
3876 if (OP_GIMME(o,0) == G_VOID
3877 || ( o->op_next->op_type == OP_LINESEQ
3878 && ( o->op_next->op_next->op_type == OP_LEAVESUB
3879 || ( o->op_next->op_next->op_type == OP_RETURN
3880 && !CvLVALUE(PL_compcv)))))
3881 {
3882 OP *right = cBINOP->op_first;
3883 if (right) {
3884 /* sassign
3885 * RIGHT
3886 * substr
3887 * pushmark
3888 * arg1
3889 * arg2
3890 * ...
3891 * becomes
3892 *
3893 * ex-sassign
3894 * substr
3895 * pushmark
3896 * RIGHT
3897 * arg1
3898 * arg2
3899 * ...
3900 */
3901 OP *left = OpSIBLING(right);
3902 if (left->op_type == OP_SUBSTR
3903 && (left->op_private & 7) < 4) {
3904 op_null(o);
3905 /* cut out right */
3906 op_sibling_splice(o, NULL, 1, NULL);
3907 /* and insert it as second child of OP_SUBSTR */
3908 op_sibling_splice(left, cBINOPx(left)->op_first, 0,
3909 right);
3910 left->op_private |= OPpSUBSTR_REPL_FIRST;
3911 left->op_flags =
3912 (o->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3913 }
3914 }
3915 }
3916 OP* rhs = cBINOPx(o)->op_first;
3917 OP* lval = cBINOPx(o)->op_last;
3918
3919 /* Combine a simple SASSIGN OP with a PADSV lvalue child OP
3920 * into a single OP. */
3921
3922 /* This optimization covers arbitrarily complicated RHS OP
3923 * trees. Separate optimizations may exist for specific,
3924 * single RHS OPs, such as:
3925 * "my $foo = undef;" or "my $bar = $other_padsv;" */
3926
3927 if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
3928 && lval && (lval->op_type == OP_PADSV) &&
3929 !(lval->op_private & OPpDEREF)
3930 /* skip if padrange has already gazumped the padsv */
3931 && (lval == oldop)
3932 /* Memoize::Once produces a non-standard SASSIGN that
3933 * doesn't actually point to pp_sassign, has only one
3934 * child (PADSV), and gets to it via op_other rather
3935 * than op_next. Don't try to optimize this. */
3936 && (lval != rhs)
3937 ) {
3938 /* SASSIGN's bitfield flags, such as op_moresib and
3939 * op_slabbed, will be carried over unchanged. */
3940 OpTYPE_set(o, OP_PADSV_STORE);
3941
3942 /* Explicitly craft the new OP's op_flags, carrying
3943 * some bits over from the SASSIGN */
3944 o->op_flags = (
3945 OPf_KIDS | OPf_STACKED |
3946 (o->op_flags & (OPf_WANT|OPf_PARENS))
3947 );
3948
3949 /* Reset op_private flags, taking relevant private flags
3950 * from the PADSV */
3951 o->op_private = (lval->op_private &
3952 (OPpLVAL_INTRO|OPpPAD_STATE|OPpDEREF));
3953
3954 /* Steal the targ from the PADSV */
3955 o->op_targ = lval->op_targ; lval->op_targ = 0;
3956
3957 /* Fixup op_next ptrs */
3958 assert(oldop->op_type == OP_PADSV);
3959 /* oldoldop can be arbitrarily deep in the RHS OP tree */
3960 oldoldop->op_next = o;
3961
3962 /* Even when (rhs != oldoldop), rhs might still have a
3963 * relevant op_next ptr to lval. This is definitely true
3964 * when rhs is OP_NULL with a LOGOP kid (e.g. orassign).
3965 * There may be other cases. */
3966 if (rhs->op_next == lval)
3967 rhs->op_next = o;
3968
3969 /* Now null-out the PADSV */
3970 op_null(lval);
3971
3972 /* NULL the previous op ptrs, so rpeep can continue */
3973 oldoldop = NULL; oldop = NULL;
3974 }
3975
3976 /* Combine a simple SASSIGN OP with an AELEMFAST_LEX lvalue
3977 * into a single OP. This optimization covers arbitrarily
3978 * complicated RHS OP trees. */
3979
3980 if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
3981 && (lval->op_type == OP_NULL) && (lval->op_private == 2) &&
3982 (cBINOPx(lval)->op_first->op_type == OP_AELEMFAST_LEX)
3983 /* For efficiency, pp_aelemfastlex_store() doesn't push its
3984 * result onto the stack. For the relatively rare case of
3985 * the array assignment not in void context, we just do it
3986 * the old slow way. */
3987 && OP_GIMME(o,0) == G_VOID
3988 ) {
3989 OP * lex = cBINOPx(lval)->op_first;
3990 /* SASSIGN's bitfield flags, such as op_moresib and
3991 * op_slabbed, will be carried over unchanged. */
3992 OpTYPE_set(o, OP_AELEMFASTLEX_STORE);
3993
3994 /* Explicitly craft the new OP's op_flags, carrying
3995 * some bits over from the SASSIGN */
3996 o->op_flags = (
3997 OPf_KIDS | OPf_STACKED |
3998 (o->op_flags & (OPf_WANT|OPf_PARENS))
3999 );
4000
4001 /* Copy the AELEMFAST_LEX op->private, which contains
4002 * the key index. */
4003 o->op_private = lex->op_private;
4004
4005 /* Take the targ from the AELEMFAST_LEX */
4006 o->op_targ = lex->op_targ; lex->op_targ = 0;
4007
4008 assert(oldop->op_type == OP_AELEMFAST_LEX);
4009 /* oldoldop can be arbitrarily deep in the RHS OP tree */
4010 oldoldop->op_next = o;
4011
4012 /* Even when (rhs != oldoldop), rhs might still have a
4013 * relevant op_next ptr to lex. (Updating it here can
4014 * also cause other ops in the RHS to get the desired
4015 * op_next pointer, presumably thanks to the finalizer.)
4016 * This is definitely truewhen rhs is OP_NULL with a
4017 * LOGOP kid (e.g. orassign). There may be other cases. */
4018 if (rhs->op_next == lex)
4019 rhs->op_next = o;
4020
4021 /* Now null-out the AELEMFAST_LEX */
4022 op_null(lex);
4023
4024 /* NULL the previous op ptrs, so rpeep can continue */
4025 oldop = oldoldop; oldoldop = NULL;
4026 }
4027
4028 break;
4029 }
4030
4031 case OP_AASSIGN: {
4032 int l, r, lr, lscalars, rscalars;
4033
4034 /* handle common vars detection, e.g. ($a,$b) = ($b,$a).
4035 Note that we do this now rather than in newASSIGNOP(),
4036 since only by now are aliased lexicals flagged as such
4037
4038 See the essay "Common vars in list assignment" above for
4039 the full details of the rationale behind all the conditions
4040 below.
4041
4042 PL_generation sorcery:
4043 To detect whether there are common vars, the global var
4044 PL_generation is incremented for each assign op we scan.
4045 Then we run through all the lexical variables on the LHS,
4046 of the assignment, setting a spare slot in each of them to
4047 PL_generation. Then we scan the RHS, and if any lexicals
4048 already have that value, we know we've got commonality.
4049 Also, if the generation number is already set to
4050 PERL_INT_MAX, then the variable is involved in aliasing, so
4051 we also have potential commonality in that case.
4052 */
4053
4054 PL_generation++;
4055 /* scan LHS */
4056 lscalars = 0;
4057 l = S_aassign_scan(aTHX_ cLISTOPo->op_last, FALSE, &lscalars);
4058 /* scan RHS */
4059 rscalars = 0;
4060 r = S_aassign_scan(aTHX_ cLISTOPo->op_first, TRUE, &rscalars);
4061 lr = (l|r);
4062
4063
4064 /* After looking for things which are *always* safe, this main
4065 * if/else chain selects primarily based on the type of the
4066 * LHS, gradually working its way down from the more dangerous
4067 * to the more restrictive and thus safer cases */
4068
4069 if ( !l /* () = ....; */
4070 || !r /* .... = (); */
4071 || !(l & ~AAS_SAFE_SCALAR) /* (undef, pos()) = ...; */
4072 || !(r & ~AAS_SAFE_SCALAR) /* ... = (1,2,length,undef); */
4073 || (lscalars < 2) /* (undef, $x) = ... */
4074 ) {
4075 NOOP; /* always safe */
4076 }
4077 else if (l & AAS_DANGEROUS) {
4078 /* always dangerous */
4079 o->op_private |= OPpASSIGN_COMMON_SCALAR;
4080 o->op_private |= OPpASSIGN_COMMON_AGG;
4081 }
4082 else if (l & (AAS_PKG_SCALAR|AAS_PKG_AGG)) {
4083 /* package vars are always dangerous - too many
4084 * aliasing possibilities */
4085 if (l & AAS_PKG_SCALAR)
4086 o->op_private |= OPpASSIGN_COMMON_SCALAR;
4087 if (l & AAS_PKG_AGG)
4088 o->op_private |= OPpASSIGN_COMMON_AGG;
4089 }
4090 else if (l & ( AAS_MY_SCALAR|AAS_MY_AGG
4091 |AAS_LEX_SCALAR|AAS_LEX_AGG))
4092 {
4093 /* LHS contains only lexicals and safe ops */
4094
4095 if (l & (AAS_MY_AGG|AAS_LEX_AGG))
4096 o->op_private |= OPpASSIGN_COMMON_AGG;
4097
4098 if (l & (AAS_MY_SCALAR|AAS_LEX_SCALAR)) {
4099 if (lr & AAS_LEX_SCALAR_COMM)
4100 o->op_private |= OPpASSIGN_COMMON_SCALAR;
4101 else if ( !(l & AAS_LEX_SCALAR)
4102 && (r & AAS_DEFAV))
4103 {
4104 /* falsely mark
4105 * my (...) = @_
4106 * as scalar-safe for performance reasons.
4107 * (it will still have been marked _AGG if necessary */
4108 NOOP;
4109 }
4110 else if (r & (AAS_PKG_SCALAR|AAS_PKG_AGG|AAS_DANGEROUS))
4111 /* if there are only lexicals on the LHS and no
4112 * common ones on the RHS, then we assume that the
4113 * only way those lexicals could also get
4114 * on the RHS is via some sort of dereffing or
4115 * closure, e.g.
4116 * $r = \$lex;
4117 * ($lex, $x) = (1, $$r)
4118 * and in this case we assume the var must have
4119 * a bumped ref count. So if its ref count is 1,
4120 * it must only be on the LHS.
4121 */
4122 o->op_private |= OPpASSIGN_COMMON_RC1;
4123 }
4124 }
4125
4126 /* ... = ($x)
4127 * may have to handle aggregate on LHS, but we can't
4128 * have common scalars. */
4129 if (rscalars < 2)
4130 o->op_private &=
4131 ~(OPpASSIGN_COMMON_SCALAR|OPpASSIGN_COMMON_RC1);
4132
4133 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4134 S_check_for_bool_cxt(o, 1, OPpASSIGN_TRUEBOOL, 0);
4135 break;
4136 }
4137
4138 case OP_REF:
4139 case OP_BLESSED:
4140 /* if the op is used in boolean context, set the TRUEBOOL flag
4141 * which enables an optimisation at runtime which avoids creating
4142 * a stack temporary for known-true package names */
4143 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4144 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
4145 break;
4146
4147 case OP_LENGTH:
4148 /* see if the op is used in known boolean context,
4149 * but not if OA_TARGLEX optimisation is enabled */
4150 if ( (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
4151 && !(o->op_private & OPpTARGET_MY)
4152 )
4153 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4154 break;
4155
4156 case OP_POS:
4157 /* see if the op is used in known boolean context */
4158 if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4159 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4160 break;
4161
4162 case OP_CUSTOM: {
4163 Perl_cpeep_t cpeep =
4164 XopENTRYCUSTOM(o, xop_peep);
4165 if (cpeep)
4166 cpeep(aTHX_ o, oldop);
4167 break;
4168 }
4169
4170 }
4171 /* did we just null the current op? If so, re-process it to handle
4172 * eliding "empty" ops from the chain */
4173 if (o->op_type == OP_NULL && oldop && oldop->op_next == o) {
4174 o->op_opt = 0;
4175 o = oldop;
4176 }
4177 else {
4178 oldoldop = oldop;
4179 oldop = o;
4180 }
4181 }
4182 LEAVE;
4183 }
4184
4185 void
Perl_peep(pTHX_ OP * o)4186 Perl_peep(pTHX_ OP *o)
4187 {
4188 CALL_RPEEP(o);
4189 }
4190
4191 /*
4192 * ex: set ts=8 sts=4 sw=4 et:
4193 */
4194