xref: /openbsd/gnu/usr.bin/perl/peep.c (revision f2a19305)
1 /*    peep.c
2  *
3  *    Copyright (C) 1991-2022 by Larry Wall and others
4  *
5  *    You may distribute under the terms of either the GNU General Public
6  *    License or the Artistic License, as specified in the README file.
7  *
8  */
9 
10 /*
11  * Aragorn sped on up the hill. Every now and again he bent to the ground.
12  * Hobbits go light, and their footprints are not easy even for a Ranger to
13  * read, but not far from the top a spring crossed the path, and in the wet
14  * earth he saw what he was seeking.
15  * 'I read the signs aright,' he said to himself. 'Frodo ran to the hill-top.
16  * I wonder what he saw there? But he returned by the same way, and went down
17  * the hill again.'
18  */
19 
20 /* This file contains functions for optimizing and finalizing the OP
21  * structures that hold a compiled perl program
22  */
23 
24 #include "EXTERN.h"
25 #define PERL_IN_PEEP_C
26 #include "perl.h"
27 
28 
29 #define CALL_RPEEP(o) PL_rpeepp(aTHX_ o)
30 
31 
32 static void
S_scalar_slice_warning(pTHX_ const OP * o)33 S_scalar_slice_warning(pTHX_ const OP *o)
34 {
35     OP *kid;
36     const bool is_hash = o->op_type == OP_HSLICE
37                 || (o->op_type == OP_NULL && o->op_targ == OP_HSLICE);
38     SV *name;
39 
40     if (!(o->op_private & OPpSLICEWARNING))
41         return;
42     if (PL_parser && PL_parser->error_count)
43         /* This warning can be nonsensical when there is a syntax error. */
44         return;
45 
46     kid = cLISTOPo->op_first;
47     kid = OpSIBLING(kid); /* get past pushmark */
48     /* weed out false positives: any ops that can return lists */
49     switch (kid->op_type) {
50     case OP_BACKTICK:
51     case OP_GLOB:
52     case OP_READLINE:
53     case OP_MATCH:
54     case OP_RV2AV:
55     case OP_EACH:
56     case OP_VALUES:
57     case OP_KEYS:
58     case OP_SPLIT:
59     case OP_LIST:
60     case OP_SORT:
61     case OP_REVERSE:
62     case OP_ENTERSUB:
63     case OP_CALLER:
64     case OP_LSTAT:
65     case OP_STAT:
66     case OP_READDIR:
67     case OP_SYSTEM:
68     case OP_TMS:
69     case OP_LOCALTIME:
70     case OP_GMTIME:
71     case OP_ENTEREVAL:
72         return;
73     }
74 
75     /* Don't warn if we have a nulled list either. */
76     if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST)
77         return;
78 
79     assert(OpSIBLING(kid));
80     name = op_varname(OpSIBLING(kid));
81     if (!name) /* XS module fiddling with the op tree */
82         return;
83     warn_elem_scalar_context(kid, name, is_hash, true);
84 }
85 
86 
87 /* info returned by S_sprintf_is_multiconcatable() */
88 
89 struct sprintf_ismc_info {
90     SSize_t nargs;    /* num of args to sprintf (not including the format) */
91     char  *start;     /* start of raw format string */
92     char  *end;       /* bytes after end of raw format string */
93     STRLEN total_len; /* total length (in bytes) of format string, not
94                          including '%s' and  half of '%%' */
95     STRLEN variant;   /* number of bytes by which total_len_p would grow
96                          if upgraded to utf8 */
97     bool   utf8;      /* whether the format is utf8 */
98 };
99 
100 /* is the OP_SPRINTF o suitable for converting into a multiconcat op?
101  * i.e. its format argument is a const string with only '%s' and '%%'
102  * formats, and the number of args is known, e.g.
103  *    sprintf "a=%s f=%s", $a[0], scalar(f());
104  * but not
105  *    sprintf "i=%d a=%s f=%s", $i, @a, f();
106  *
107  * If successful, the sprintf_ismc_info struct pointed to by info will be
108  * populated.
109  */
110 
111 STATIC bool
S_sprintf_is_multiconcatable(pTHX_ OP * o,struct sprintf_ismc_info * info)112 S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info)
113 {
114     OP    *pm, *constop, *kid;
115     SV    *sv;
116     char  *s, *e, *p;
117     SSize_t nargs, nformats;
118     STRLEN cur, total_len, variant;
119     bool   utf8;
120 
121     /* if sprintf's behaviour changes, die here so that someone
122      * can decide whether to enhance this function or skip optimising
123      * under those new circumstances */
124     assert(!(o->op_flags & OPf_STACKED));
125     assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX));
126     assert(!(o->op_private & ~OPpARG4_MASK));
127 
128     pm = cUNOPo->op_first;
129     if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */
130         return FALSE;
131     constop = OpSIBLING(pm);
132     if (!constop || constop->op_type != OP_CONST)
133         return FALSE;
134     sv = cSVOPx_sv(constop);
135     if (SvMAGICAL(sv) || !SvPOK(sv))
136         return FALSE;
137 
138     s = SvPV(sv, cur);
139     e = s + cur;
140 
141     /* Scan format for %% and %s and work out how many %s there are.
142      * Abandon if other format types are found.
143      */
144 
145     nformats  = 0;
146     total_len = 0;
147     variant   = 0;
148 
149     for (p = s; p < e; p++) {
150         if (*p != '%') {
151             total_len++;
152             if (!UTF8_IS_INVARIANT(*p))
153                 variant++;
154             continue;
155         }
156         p++;
157         if (p >= e)
158             return FALSE; /* lone % at end gives "Invalid conversion" */
159         if (*p == '%')
160             total_len++;
161         else if (*p == 's')
162             nformats++;
163         else
164             return FALSE;
165     }
166 
167     if (!nformats || nformats > PERL_MULTICONCAT_MAXARG)
168         return FALSE;
169 
170     utf8 = cBOOL(SvUTF8(sv));
171     if (utf8)
172         variant = 0;
173 
174     /* scan args; they must all be in scalar cxt */
175 
176     nargs = 0;
177     kid = OpSIBLING(constop);
178 
179     while (kid) {
180         if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR)
181             return FALSE;
182         nargs++;
183         kid = OpSIBLING(kid);
184     }
185 
186     if (nargs != nformats)
187         return FALSE; /* e.g. sprintf("%s%s", $a); */
188 
189 
190     info->nargs      = nargs;
191     info->start      = s;
192     info->end        = e;
193     info->total_len  = total_len;
194     info->variant    = variant;
195     info->utf8       = utf8;
196 
197     return TRUE;
198 }
199 
200 /* S_maybe_multiconcat():
201  *
202  * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly
203  * convert it (and its children) into an OP_MULTICONCAT. See the code
204  * comments just before pp_multiconcat() for the full details of what
205  * OP_MULTICONCAT supports.
206  *
207  * Basically we're looking for an optree with a chain of OP_CONCATS down
208  * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or
209  * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g.
210  *
211  *      $x = "$a$b-$c"
212  *
213  *  looks like
214  *
215  *      SASSIGN
216  *         |
217  *      STRINGIFY   -- PADSV[$x]
218  *         |
219  *         |
220  *      ex-PUSHMARK -- CONCAT/S
221  *                        |
222  *                     CONCAT/S  -- PADSV[$d]
223  *                        |
224  *                     CONCAT    -- CONST["-"]
225  *                        |
226  *                     PADSV[$a] -- PADSV[$b]
227  *
228  * Note that at this stage the OP_SASSIGN may have already been optimised
229  * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT.
230  */
231 
232 STATIC void
S_maybe_multiconcat(pTHX_ OP * o)233 S_maybe_multiconcat(pTHX_ OP *o)
234 {
235     OP *lastkidop;   /* the right-most of any kids unshifted onto o */
236     OP *topop;       /* the top-most op in the concat tree (often equals o,
237                         unless there are assign/stringify ops above it */
238     OP *parentop;    /* the parent op of topop (or itself if no parent) */
239     OP *targmyop;    /* the op (if any) with the OPpTARGET_MY flag */
240     OP *targetop;    /* the op corresponding to target=... or target.=... */
241     OP *stringop;    /* the OP_STRINGIFY op, if any */
242     OP *nextop;      /* used for recreating the op_next chain without consts */
243     OP *kid;         /* general-purpose op pointer */
244     UNOP_AUX_item *aux;
245     UNOP_AUX_item *lenp;
246     char *const_str, *p;
247     struct sprintf_ismc_info sprintf_info;
248 
249                      /* store info about each arg in args[];
250                       * toparg is the highest used slot; argp is a general
251                       * pointer to args[] slots */
252     struct {
253         void *p;      /* initially points to const sv (or null for op);
254                          later, set to SvPV(constsv), with ... */
255         STRLEN len;   /* ... len set to SvPV(..., len) */
256     } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1];
257 
258     SSize_t nargs  = 0;
259     SSize_t nconst = 0;
260     SSize_t nadjconst  = 0; /* adjacent consts - may be demoted to args */
261     STRLEN variant;
262     bool utf8 = FALSE;
263     bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
264                                  the last-processed arg will the LHS of one,
265                                  as args are processed in reverse order */
266     U8   stacked_last = 0;   /* whether the last seen concat op was STACKED */
267     STRLEN total_len  = 0;   /* sum of the lengths of the const segments */
268     U8 flags          = 0;   /* what will become the op_flags and ... */
269     U8 private_flags  = 0;   /* ... op_private of the multiconcat op */
270     bool is_sprintf = FALSE; /* we're optimising an sprintf */
271     bool is_targable  = FALSE; /* targetop is an OPpTARGET_MY candidate */
272     bool prev_was_const = FALSE; /* previous arg was a const */
273 
274     /* -----------------------------------------------------------------
275      * Phase 1:
276      *
277      * Examine the optree non-destructively to determine whether it's
278      * suitable to be converted into an OP_MULTICONCAT. Accumulate
279      * information about the optree in args[].
280      */
281 
282     argp     = args;
283     targmyop = NULL;
284     targetop = NULL;
285     stringop = NULL;
286     topop    = o;
287     parentop = o;
288 
289     assert(   o->op_type == OP_SASSIGN
290            || o->op_type == OP_CONCAT
291            || o->op_type == OP_SPRINTF
292            || o->op_type == OP_STRINGIFY);
293 
294     Zero(&sprintf_info, 1, struct sprintf_ismc_info);
295 
296     /* first see if, at the top of the tree, there is an assign,
297      * append and/or stringify */
298 
299     if (topop->op_type == OP_SASSIGN) {
300         /* expr = ..... */
301         if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN])
302             return;
303         if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
304             return;
305         assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */
306 
307         parentop = topop;
308         topop = cBINOPo->op_first;
309         targetop = OpSIBLING(topop);
310         if (!targetop) /* probably some sort of syntax error */
311             return;
312 
313         /* don't optimise away assign in 'local $foo = ....' */
314         if (   (targetop->op_private & OPpLVAL_INTRO)
315             /* these are the common ops which do 'local', but
316              * not all */
317             && (   targetop->op_type == OP_GVSV
318                 || targetop->op_type == OP_RV2SV
319                 || targetop->op_type == OP_AELEM
320                 || targetop->op_type == OP_HELEM
321                 )
322         )
323             return;
324     }
325     else if (   topop->op_type == OP_CONCAT
326              && (topop->op_flags & OPf_STACKED)
327              && (!(topop->op_private & OPpCONCAT_NESTED))
328             )
329     {
330         /* expr .= ..... */
331 
332         /* OPpTARGET_MY shouldn't be able to be set here. If it is,
333          * decide what to do about it */
334         assert(!(o->op_private & OPpTARGET_MY));
335 
336         /* barf on unknown flags */
337         assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY)));
338         private_flags |= OPpMULTICONCAT_APPEND;
339         targetop = cBINOPo->op_first;
340         parentop = topop;
341         topop    = OpSIBLING(targetop);
342 
343         /* $x .= <FOO> gets optimised to rcatline instead */
344         if (topop->op_type == OP_READLINE)
345             return;
346     }
347 
348     if (targetop) {
349         /* Can targetop (the LHS) if it's a padsv, be optimised
350          * away and use OPpTARGET_MY instead?
351          */
352         if (    (targetop->op_type == OP_PADSV)
353             && !(targetop->op_private & OPpDEREF)
354             && !(targetop->op_private & OPpPAD_STATE)
355                /* we don't support 'my $x .= ...' */
356             && (   o->op_type == OP_SASSIGN
357                 || !(targetop->op_private & OPpLVAL_INTRO))
358         )
359             is_targable = TRUE;
360     }
361 
362     if (topop->op_type == OP_STRINGIFY) {
363         if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY])
364             return;
365         stringop = topop;
366 
367         /* barf on unknown flags */
368         assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY)));
369 
370         if ((topop->op_private & OPpTARGET_MY)) {
371             if (o->op_type == OP_SASSIGN)
372                 return; /* can't have two assigns */
373             targmyop = topop;
374         }
375 
376         private_flags |= OPpMULTICONCAT_STRINGIFY;
377         parentop = topop;
378         topop = cBINOPx(topop)->op_first;
379         assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK));
380         topop = OpSIBLING(topop);
381     }
382 
383     if (topop->op_type == OP_SPRINTF) {
384         if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF])
385             return;
386         if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) {
387             nargs     = sprintf_info.nargs;
388             total_len = sprintf_info.total_len;
389             variant   = sprintf_info.variant;
390             utf8      = sprintf_info.utf8;
391             is_sprintf = TRUE;
392             private_flags |= OPpMULTICONCAT_FAKE;
393             toparg = argp;
394             /* we have an sprintf op rather than a concat optree.
395              * Skip most of the code below which is associated with
396              * processing that optree. We also skip phase 2, determining
397              * whether its cost effective to optimise, since for sprintf,
398              * multiconcat is *always* faster */
399             goto create_aux;
400         }
401         /* note that even if the sprintf itself isn't multiconcatable,
402          * the expression as a whole may be, e.g. in
403          *    $x .= sprintf("%d",...)
404          * the sprintf op will be left as-is, but the concat/S op may
405          * be upgraded to multiconcat
406          */
407     }
408     else if (topop->op_type == OP_CONCAT) {
409         if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT])
410             return;
411 
412         if ((topop->op_private & OPpTARGET_MY)) {
413             if (o->op_type == OP_SASSIGN || targmyop)
414                 return; /* can't have two assigns */
415             targmyop = topop;
416         }
417     }
418 
419     /* Is it safe to convert a sassign/stringify/concat op into
420      * a multiconcat? */
421     assert((PL_opargs[OP_SASSIGN]   & OA_CLASS_MASK) == OA_BINOP);
422     assert((PL_opargs[OP_CONCAT]    & OA_CLASS_MASK) == OA_BINOP);
423     assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP);
424     assert((PL_opargs[OP_SPRINTF]   & OA_CLASS_MASK) == OA_LISTOP);
425     STATIC_ASSERT_STMT(   STRUCT_OFFSET(BINOP,    op_last)
426                        == STRUCT_OFFSET(UNOP_AUX, op_aux));
427     STATIC_ASSERT_STMT(   STRUCT_OFFSET(LISTOP,   op_last)
428                        == STRUCT_OFFSET(UNOP_AUX, op_aux));
429 
430     /* Now scan the down the tree looking for a series of
431      * CONCAT/OPf_STACKED ops on the LHS (with the last one not
432      * stacked). For example this tree:
433      *
434      *     |
435      *   CONCAT/STACKED
436      *     |
437      *   CONCAT/STACKED -- EXPR5
438      *     |
439      *   CONCAT/STACKED -- EXPR4
440      *     |
441      *   CONCAT -- EXPR3
442      *     |
443      *   EXPR1  -- EXPR2
444      *
445      * corresponds to an expression like
446      *
447      *   (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5)
448      *
449      * Record info about each EXPR in args[]: in particular, whether it is
450      * a stringifiable OP_CONST and if so what the const sv is.
451      *
452      * The reason why the last concat can't be STACKED is the difference
453      * between
454      *
455      *    ((($a .= $a) .= $a) .= $a) .= $a
456      *
457      * and
458      *    $a . $a . $a . $a . $a
459      *
460      * The main difference between the optrees for those two constructs
461      * is the presence of the last STACKED. As well as modifying $a,
462      * the former sees the changed $a between each concat, so if $s is
463      * initially 'a', the first returns 'a' x 16, while the latter returns
464      * 'a' x 5. And pp_multiconcat can't handle that kind of thing.
465      */
466 
467     kid = topop;
468 
469     for (;;) {
470         OP *argop;
471         SV *sv;
472         bool last = FALSE;
473 
474         if (    kid->op_type == OP_CONCAT
475             && !kid_is_last
476         ) {
477             OP *k1, *k2;
478             k1 = cUNOPx(kid)->op_first;
479             k2 = OpSIBLING(k1);
480             /* shouldn't happen except maybe after compile err? */
481             if (!k2)
482                 return;
483 
484             /* avoid turning (A . B . ($lex = C) ...)  into  (A . B . C ...) */
485             if (kid->op_private & OPpTARGET_MY)
486                 kid_is_last = TRUE;
487 
488             stacked_last = (kid->op_flags & OPf_STACKED);
489             if (!stacked_last)
490                 kid_is_last = TRUE;
491 
492             kid   = k1;
493             argop = k2;
494         }
495         else {
496             argop = kid;
497             last = TRUE;
498         }
499 
500         if (   nargs + nadjconst  >  PERL_MULTICONCAT_MAXARG        - 2
501             || (argp - args + 1)  > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
502         {
503             /* At least two spare slots are needed to decompose both
504              * concat args. If there are no slots left, continue to
505              * examine the rest of the optree, but don't push new values
506              * on args[]. If the optree as a whole is legal for conversion
507              * (in particular that the last concat isn't STACKED), then
508              * the first PERL_MULTICONCAT_MAXARG elements of the optree
509              * can be converted into an OP_MULTICONCAT now, with the first
510              * child of that op being the remainder of the optree -
511              * which may itself later be converted to a multiconcat op
512              * too.
513              */
514             if (last) {
515                 /* the last arg is the rest of the optree */
516                 argp++->p = NULL;
517                 nargs++;
518             }
519         }
520         else if (   argop->op_type == OP_CONST
521             && ((sv = cSVOPx_sv(argop)))
522             /* defer stringification until runtime of 'constant'
523              * things that might stringify variantly, e.g. the radix
524              * point of NVs, or overloaded RVs */
525             && (SvPOK(sv) || SvIOK(sv))
526             && (!SvGMAGICAL(sv))
527         ) {
528             if (argop->op_private & OPpCONST_STRICT)
529                 no_bareword_allowed(argop);
530             argp++->p = sv;
531             utf8   |= cBOOL(SvUTF8(sv));
532             nconst++;
533             if (prev_was_const)
534                 /* this const may be demoted back to a plain arg later;
535                  * make sure we have enough arg slots left */
536                 nadjconst++;
537             prev_was_const = !prev_was_const;
538         }
539         else {
540             argp++->p = NULL;
541             nargs++;
542             prev_was_const = FALSE;
543         }
544 
545         if (last)
546             break;
547     }
548 
549     toparg = argp - 1;
550 
551     if (stacked_last)
552         return; /* we don't support ((A.=B).=C)...) */
553 
554     /* look for two adjacent consts and don't fold them together:
555      *     $o . "a" . "b"
556      * should do
557      *     $o->concat("a")->concat("b")
558      * rather than
559      *     $o->concat("ab")
560      * (but $o .=  "a" . "b" should still fold)
561      */
562     {
563         bool seen_nonconst = FALSE;
564         for (argp = toparg; argp >= args; argp--) {
565             if (argp->p == NULL) {
566                 seen_nonconst = TRUE;
567                 continue;
568             }
569             if (!seen_nonconst)
570                 continue;
571             if (argp[1].p) {
572                 /* both previous and current arg were constants;
573                  * leave the current OP_CONST as-is */
574                 argp->p = NULL;
575                 nconst--;
576                 nargs++;
577             }
578         }
579     }
580 
581     /* -----------------------------------------------------------------
582      * Phase 2:
583      *
584      * At this point we have determined that the optree *can* be converted
585      * into a multiconcat. Having gathered all the evidence, we now decide
586      * whether it *should*.
587      */
588 
589 
590     /* we need at least one concat action, e.g.:
591      *
592      *  Y . Z
593      *  X = Y . Z
594      *  X .= Y
595      *
596      * otherwise we could be doing something like $x = "foo", which
597      * if treated as a concat, would fail to COW.
598      */
599     if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
600         return;
601 
602     /* Benchmarking seems to indicate that we gain if:
603      * * we optimise at least two actions into a single multiconcat
604      *    (e.g concat+concat, sassign+concat);
605      * * or if we can eliminate at least 1 OP_CONST;
606      * * or if we can eliminate a padsv via OPpTARGET_MY
607      */
608 
609     if (
610            /* eliminated at least one OP_CONST */
611            nconst >= 1
612            /* eliminated an OP_SASSIGN */
613         || o->op_type == OP_SASSIGN
614            /* eliminated an OP_PADSV */
615         || (!targmyop && is_targable)
616     )
617         /* definitely a net gain to optimise */
618         goto optimise;
619 
620     /* ... if not, what else? */
621 
622     /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1):
623      * multiconcat is faster (due to not creating a temporary copy of
624      * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is
625      * faster.
626      */
627     if (   nconst == 0
628          && nargs == 2
629          && targmyop
630          && topop->op_type == OP_CONCAT
631     ) {
632         PADOFFSET t = targmyop->op_targ;
633         OP *k1 = cBINOPx(topop)->op_first;
634         OP *k2 = cBINOPx(topop)->op_last;
635         if (   k2->op_type == OP_PADSV
636             && k2->op_targ == t
637             && (   k1->op_type != OP_PADSV
638                 || k1->op_targ != t)
639         )
640             goto optimise;
641     }
642 
643     /* need at least two concats */
644     if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3)
645         return;
646 
647 
648 
649     /* -----------------------------------------------------------------
650      * Phase 3:
651      *
652      * At this point the optree has been verified as ok to be optimised
653      * into an OP_MULTICONCAT. Now start changing things.
654      */
655 
656    optimise:
657 
658     /* stringify all const args and determine utf8ness */
659 
660     variant = 0;
661     for (argp = args; argp <= toparg; argp++) {
662         SV *sv = (SV*)argp->p;
663         if (!sv)
664             continue; /* not a const op */
665         if (utf8 && !SvUTF8(sv))
666             sv_utf8_upgrade_nomg(sv);
667         argp->p = SvPV_nomg(sv, argp->len);
668         total_len += argp->len;
669 
670         /* see if any strings would grow if converted to utf8 */
671         if (!utf8) {
672             variant += variant_under_utf8_count((U8 *) argp->p,
673                                                 (U8 *) argp->p + argp->len);
674         }
675     }
676 
677     /* create and populate aux struct */
678 
679   create_aux:
680 
681     aux = (UNOP_AUX_item*)PerlMemShared_malloc(
682                     sizeof(UNOP_AUX_item)
683                     *  (
684                            PERL_MULTICONCAT_HEADER_SIZE
685                          + ((nargs + 1) * (variant ? 2 : 1))
686                         )
687                     );
688     const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1);
689 
690     /* Extract all the non-const expressions from the concat tree then
691      * dispose of the old tree, e.g. convert the tree from this:
692      *
693      *  o => SASSIGN
694      *         |
695      *       STRINGIFY   -- TARGET
696      *         |
697      *       ex-PUSHMARK -- CONCAT
698      *                        |
699      *                      CONCAT -- EXPR5
700      *                        |
701      *                      CONCAT -- EXPR4
702      *                        |
703      *                      CONCAT -- EXPR3
704      *                        |
705      *                      EXPR1  -- EXPR2
706      *
707      *
708      * to:
709      *
710      *  o => MULTICONCAT
711      *         |
712      *       ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET
713      *
714      * except that if EXPRi is an OP_CONST, it's discarded.
715      *
716      * During the conversion process, EXPR ops are stripped from the tree
717      * and unshifted onto o. Finally, any of o's remaining original
718      * children are discarded and o is converted into an OP_MULTICONCAT.
719      *
720      * In this middle of this, o may contain both: unshifted args on the
721      * left, and some remaining original args on the right. lastkidop
722      * is set to point to the right-most unshifted arg to delineate
723      * between the two sets.
724      */
725 
726 
727     if (is_sprintf) {
728         /* create a copy of the format with the %'s removed, and record
729          * the sizes of the const string segments in the aux struct */
730         char *q, *oldq;
731         lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
732 
733         p    = sprintf_info.start;
734         q    = const_str;
735         oldq = q;
736         for (; p < sprintf_info.end; p++) {
737             if (*p == '%') {
738                 p++;
739                 if (*p != '%') {
740                     (lenp++)->ssize = q - oldq;
741                     oldq = q;
742                     continue;
743                 }
744             }
745             *q++ = *p;
746         }
747         lenp->ssize = q - oldq;
748         assert((STRLEN)(q - const_str) == total_len);
749 
750         /* Attach all the args (i.e. the kids of the sprintf) to o (which
751          * may or may not be topop) The pushmark and const ops need to be
752          * kept in case they're an op_next entry point.
753          */
754         lastkidop = cLISTOPx(topop)->op_last;
755         kid = cUNOPx(topop)->op_first; /* pushmark */
756         op_null(kid);
757         op_null(OpSIBLING(kid));       /* const */
758         if (o != topop) {
759             kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */
760             op_sibling_splice(o, NULL, 0, kid); /* and attach to o */
761             lastkidop->op_next = o;
762         }
763     }
764     else {
765         p = const_str;
766         lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
767 
768         lenp->ssize = -1;
769 
770         /* Concatenate all const strings into const_str.
771          * Note that args[] contains the RHS args in reverse order, so
772          * we scan args[] from top to bottom to get constant strings
773          * in L-R order
774          */
775         for (argp = toparg; argp >= args; argp--) {
776             if (!argp->p)
777                 /* not a const op */
778                 (++lenp)->ssize = -1;
779             else {
780                 STRLEN l = argp->len;
781                 Copy(argp->p, p, l, char);
782                 p += l;
783                 if (lenp->ssize == -1)
784                     lenp->ssize = l;
785                 else
786                     lenp->ssize += l;
787             }
788         }
789 
790         kid = topop;
791         nextop = o;
792         lastkidop = NULL;
793 
794         for (argp = args; argp <= toparg; argp++) {
795             /* only keep non-const args, except keep the first-in-next-chain
796              * arg no matter what it is (but nulled if OP_CONST), because it
797              * may be the entry point to this subtree from the previous
798              * op_next.
799              */
800             bool last = (argp == toparg);
801             OP *prev;
802 
803             /* set prev to the sibling *before* the arg to be cut out,
804              * e.g. when cutting EXPR:
805              *
806              *         |
807              * kid=  CONCAT
808              *         |
809              * prev= CONCAT -- EXPR
810              *         |
811              */
812             if (argp == args && kid->op_type != OP_CONCAT) {
813                 /* in e.g. '$x .= f(1)' there's no RHS concat tree
814                  * so the expression to be cut isn't kid->op_last but
815                  * kid itself */
816                 OP *o1, *o2;
817                 /* find the op before kid */
818                 o1 = NULL;
819                 o2 = cUNOPx(parentop)->op_first;
820                 while (o2 && o2 != kid) {
821                     o1 = o2;
822                     o2 = OpSIBLING(o2);
823                 }
824                 assert(o2 == kid);
825                 prev = o1;
826                 kid  = parentop;
827             }
828             else if (kid == o && lastkidop)
829                 prev = last ? lastkidop : OpSIBLING(lastkidop);
830             else
831                 prev = last ? NULL : cUNOPx(kid)->op_first;
832 
833             if (!argp->p || last) {
834                 /* cut RH op */
835                 OP *aop = op_sibling_splice(kid, prev, 1, NULL);
836                 /* and unshift to front of o */
837                 op_sibling_splice(o, NULL, 0, aop);
838                 /* record the right-most op added to o: later we will
839                  * free anything to the right of it */
840                 if (!lastkidop)
841                     lastkidop = aop;
842                 aop->op_next = nextop;
843                 if (last) {
844                     if (argp->p)
845                         /* null the const at start of op_next chain */
846                         op_null(aop);
847                 }
848                 else if (prev)
849                     nextop = prev->op_next;
850             }
851 
852             /* the last two arguments are both attached to the same concat op */
853             if (argp < toparg - 1)
854                 kid = prev;
855         }
856     }
857 
858     /* Populate the aux struct */
859 
860     aux[PERL_MULTICONCAT_IX_NARGS].ssize     = nargs;
861     aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv    = utf8 ? NULL : const_str;
862     aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ?    0 : total_len;
863     aux[PERL_MULTICONCAT_IX_UTF8_PV].pv     = const_str;
864     aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize  = total_len;
865 
866     /* if variant > 0, calculate a variant const string and lengths where
867      * the utf8 version of the string will take 'variant' more bytes than
868      * the plain one. */
869 
870     if (variant) {
871         char              *p = const_str;
872         STRLEN          ulen = total_len + variant;
873         UNOP_AUX_item  *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
874         UNOP_AUX_item *ulens = lens + (nargs + 1);
875         char             *up = (char*)PerlMemShared_malloc(ulen);
876         SSize_t            n;
877 
878         aux[PERL_MULTICONCAT_IX_UTF8_PV].pv    = up;
879         aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen;
880 
881         for (n = 0; n < (nargs + 1); n++) {
882             SSize_t i;
883             char * orig_up = up;
884             for (i = (lens++)->ssize; i > 0; i--) {
885                 U8 c = *p++;
886                 append_utf8_from_native_byte(c, (U8**)&up);
887             }
888             (ulens++)->ssize = (i < 0) ? i : up - orig_up;
889         }
890     }
891 
892     if (stringop) {
893         /* if there was a top(ish)-level OP_STRINGIFY, we need to keep
894          * that op's first child - an ex-PUSHMARK - because the op_next of
895          * the previous op may point to it (i.e. it's the entry point for
896          * the o optree)
897          */
898         OP *pmop =
899             (stringop == o)
900                 ? op_sibling_splice(o, lastkidop, 1, NULL)
901                 : op_sibling_splice(stringop, NULL, 1, NULL);
902         assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK));
903         op_sibling_splice(o, NULL, 0, pmop);
904         if (!lastkidop)
905             lastkidop = pmop;
906     }
907 
908     /* Optimise
909      *    target  = A.B.C...
910      *    target .= A.B.C...
911      */
912 
913     if (targetop) {
914         assert(!targmyop);
915 
916         if (o->op_type == OP_SASSIGN) {
917             /* Move the target subtree from being the last of o's children
918              * to being the last of o's preserved children.
919              * Note the difference between 'target = ...' and 'target .= ...':
920              * for the former, target is executed last; for the latter,
921              * first.
922              */
923             kid = OpSIBLING(lastkidop);
924             op_sibling_splice(o, kid, 1, NULL); /* cut target op */
925             op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */
926             lastkidop->op_next = kid->op_next;
927             lastkidop = targetop;
928         }
929         else {
930             /* Move the target subtree from being the first of o's
931              * original children to being the first of *all* o's children.
932              */
933             if (lastkidop) {
934                 op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */
935                 op_sibling_splice(o, NULL, 0, targetop);  /* and paste*/
936             }
937             else {
938                 /* if the RHS of .= doesn't contain a concat (e.g.
939                  * $x .= "foo"), it gets missed by the "strip ops from the
940                  * tree and add to o" loop earlier */
941                 assert(topop->op_type != OP_CONCAT);
942                 if (stringop) {
943                     /* in e.g. $x .= "$y", move the $y expression
944                      * from being a child of OP_STRINGIFY to being the
945                      * second child of the OP_CONCAT
946                      */
947                     assert(cUNOPx(stringop)->op_first == topop);
948                     op_sibling_splice(stringop, NULL, 1, NULL);
949                     op_sibling_splice(o, cUNOPo->op_first, 0, topop);
950                 }
951                 assert(topop == OpSIBLING(cBINOPo->op_first));
952                 if (toparg->p)
953                     op_null(topop);
954                 lastkidop = topop;
955             }
956         }
957 
958         if (is_targable) {
959             /* optimise
960              *  my $lex  = A.B.C...
961              *     $lex  = A.B.C...
962              *     $lex .= A.B.C...
963              * The original padsv op is kept but nulled in case it's the
964              * entry point for the optree (which it will be for
965              * '$lex .=  ... '
966              */
967             private_flags |= OPpTARGET_MY;
968             private_flags |= (targetop->op_private & OPpLVAL_INTRO);
969             o->op_targ = targetop->op_targ;
970             targetop->op_targ = 0;
971             op_null(targetop);
972         }
973         else
974             flags |= OPf_STACKED;
975     }
976     else if (targmyop) {
977         private_flags |= OPpTARGET_MY;
978         if (o != targmyop) {
979             o->op_targ = targmyop->op_targ;
980             targmyop->op_targ = 0;
981         }
982     }
983 
984     /* detach the emaciated husk of the sprintf/concat optree and free it */
985     for (;;) {
986         kid = op_sibling_splice(o, lastkidop, 1, NULL);
987         if (!kid)
988             break;
989         op_free(kid);
990     }
991 
992     /* and convert o into a multiconcat */
993 
994     o->op_flags        = (flags|OPf_KIDS|stacked_last
995                          |(o->op_flags & (OPf_WANT|OPf_PARENS)));
996     o->op_private      = private_flags;
997     o->op_type         = OP_MULTICONCAT;
998     o->op_ppaddr       = PL_ppaddr[OP_MULTICONCAT];
999     cUNOP_AUXo->op_aux = aux;
1000 }
1001 
1002 
1003 /*
1004 =for apidoc_section $optree_manipulation
1005 
1006 =for apidoc optimize_optree
1007 
1008 This function applies some optimisations to the optree in top-down order.
1009 It is called before the peephole optimizer, which processes ops in
1010 execution order. Note that finalize_optree() also does a top-down scan,
1011 but is called *after* the peephole optimizer.
1012 
1013 =cut
1014 */
1015 
1016 void
Perl_optimize_optree(pTHX_ OP * o)1017 Perl_optimize_optree(pTHX_ OP* o)
1018 {
1019     PERL_ARGS_ASSERT_OPTIMIZE_OPTREE;
1020 
1021     ENTER;
1022     SAVEVPTR(PL_curcop);
1023 
1024     optimize_op(o);
1025 
1026     LEAVE;
1027 }
1028 
1029 
1030 #define warn_implicit_snail_cvsig(o)  S_warn_implicit_snail_cvsig(aTHX_ o)
1031 static void
S_warn_implicit_snail_cvsig(pTHX_ OP * o)1032 S_warn_implicit_snail_cvsig(pTHX_ OP *o)
1033 {
1034     CV *cv = PL_compcv;
1035     while(cv && CvEVAL(cv))
1036         cv = CvOUTSIDE(cv);
1037 
1038     if(cv && CvSIGNATURE(cv))
1039         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1040             "Implicit use of @_ in %s with signatured subroutine is experimental", OP_DESC(o));
1041 }
1042 
1043 
1044 #define OP_ZOOM(o)  (OP_TYPE_IS(o, OP_NULL) ? cUNOPx(o)->op_first : (o))
1045 
1046 /* helper for optimize_optree() which optimises one op then recurses
1047  * to optimise any children.
1048  */
1049 
1050 STATIC void
S_optimize_op(pTHX_ OP * o)1051 S_optimize_op(pTHX_ OP* o)
1052 {
1053     OP *top_op = o;
1054 
1055     PERL_ARGS_ASSERT_OPTIMIZE_OP;
1056 
1057     while (1) {
1058         OP * next_kid = NULL;
1059 
1060         assert(o->op_type != OP_FREED);
1061 
1062         switch (o->op_type) {
1063         case OP_NEXTSTATE:
1064         case OP_DBSTATE:
1065             PL_curcop = ((COP*)o);		/* for warnings */
1066             break;
1067 
1068 
1069         case OP_CONCAT:
1070         case OP_SASSIGN:
1071         case OP_STRINGIFY:
1072         case OP_SPRINTF:
1073             S_maybe_multiconcat(aTHX_ o);
1074             break;
1075 
1076         case OP_SUBST:
1077             if (cPMOPo->op_pmreplrootu.op_pmreplroot) {
1078                 /* we can't assume that op_pmreplroot->op_sibparent == o
1079                  * and that it is thus possible to walk back up the tree
1080                  * past op_pmreplroot. So, although we try to avoid
1081                  * recursing through op trees, do it here. After all,
1082                  * there are unlikely to be many nested s///e's within
1083                  * the replacement part of a s///e.
1084                  */
1085                 optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1086             }
1087             break;
1088 
1089         case OP_RV2AV:
1090         {
1091             OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1092             CV *cv = PL_compcv;
1093             while(cv && CvEVAL(cv))
1094                 cv = CvOUTSIDE(cv);
1095 
1096             if(cv && CvSIGNATURE(cv) &&
1097                     OP_TYPE_IS(first, OP_GV) && cGVOPx_gv(first) == PL_defgv) {
1098                 OP *parent = op_parent(o);
1099                 while(OP_TYPE_IS(parent, OP_NULL))
1100                     parent = op_parent(parent);
1101 
1102                 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1103                     "Use of @_ in %s with signatured subroutine is experimental", OP_DESC(parent));
1104             }
1105             break;
1106         }
1107 
1108         case OP_SHIFT:
1109         case OP_POP:
1110             if(!CvUNIQUE(PL_compcv) && !(o->op_flags & OPf_KIDS))
1111                 warn_implicit_snail_cvsig(o);
1112             break;
1113 
1114         case OP_ENTERSUB:
1115             if(!(o->op_flags & OPf_STACKED))
1116                 warn_implicit_snail_cvsig(o);
1117             break;
1118 
1119         case OP_GOTO:
1120         {
1121             OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1122             OP *ffirst;
1123             if(OP_TYPE_IS(first, OP_SREFGEN) &&
1124                     (ffirst = OP_ZOOM(cUNOPx(first)->op_first)) &&
1125                     OP_TYPE_IS(ffirst, OP_RV2CV))
1126                 warn_implicit_snail_cvsig(o);
1127             break;
1128         }
1129 
1130         default:
1131             break;
1132         }
1133 
1134         if (o->op_flags & OPf_KIDS)
1135             next_kid = cUNOPo->op_first;
1136 
1137         /* if a kid hasn't been nominated to process, continue with the
1138          * next sibling, or if no siblings left, go back to the parent's
1139          * siblings and so on
1140          */
1141         while (!next_kid) {
1142             if (o == top_op)
1143                 return; /* at top; no parents/siblings to try */
1144             if (OpHAS_SIBLING(o))
1145                 next_kid = o->op_sibparent;
1146             else
1147                 o = o->op_sibparent; /*try parent's next sibling */
1148         }
1149 
1150       /* this label not yet used. Goto here if any code above sets
1151        * next-kid
1152        get_next_op:
1153        */
1154         o = next_kid;
1155     }
1156 }
1157 
1158 /*
1159 =for apidoc finalize_optree
1160 
1161 This function finalizes the optree.  Should be called directly after
1162 the complete optree is built.  It does some additional
1163 checking which can't be done in the normal C<ck_>xxx functions and makes
1164 the tree thread-safe.
1165 
1166 =cut
1167 */
1168 
1169 void
Perl_finalize_optree(pTHX_ OP * o)1170 Perl_finalize_optree(pTHX_ OP* o)
1171 {
1172     PERL_ARGS_ASSERT_FINALIZE_OPTREE;
1173 
1174     ENTER;
1175     SAVEVPTR(PL_curcop);
1176 
1177     finalize_op(o);
1178 
1179     LEAVE;
1180 }
1181 
1182 
1183 /*
1184 =for apidoc traverse_op_tree
1185 
1186 Return the next op in a depth-first traversal of the op tree,
1187 returning NULL when the traversal is complete.
1188 
1189 The initial call must supply the root of the tree as both top and o.
1190 
1191 For now it's static, but it may be exposed to the API in the future.
1192 
1193 =cut
1194 */
1195 
1196 STATIC OP*
S_traverse_op_tree(pTHX_ OP * top,OP * o)1197 S_traverse_op_tree(pTHX_ OP *top, OP *o) {
1198     OP *sib;
1199 
1200     PERL_ARGS_ASSERT_TRAVERSE_OP_TREE;
1201 
1202     if ((o->op_flags & OPf_KIDS) && cUNOPo->op_first) {
1203         return cUNOPo->op_first;
1204     }
1205     else if ((sib = OpSIBLING(o))) {
1206         return sib;
1207     }
1208     else {
1209         OP *parent = o->op_sibparent;
1210         assert(!(o->op_moresib));
1211         while (parent && parent != top) {
1212             OP *sib = OpSIBLING(parent);
1213             if (sib)
1214                 return sib;
1215             parent = parent->op_sibparent;
1216         }
1217 
1218         return NULL;
1219     }
1220 }
1221 
1222 STATIC void
S_finalize_op(pTHX_ OP * o)1223 S_finalize_op(pTHX_ OP* o)
1224 {
1225     OP * const top = o;
1226     PERL_ARGS_ASSERT_FINALIZE_OP;
1227 
1228     do {
1229         assert(o->op_type != OP_FREED);
1230 
1231         switch (o->op_type) {
1232         case OP_NEXTSTATE:
1233         case OP_DBSTATE:
1234             PL_curcop = ((COP*)o);		/* for warnings */
1235             break;
1236         case OP_EXEC:
1237             if (OpHAS_SIBLING(o)) {
1238                 OP *sib = OpSIBLING(o);
1239                 if ((  sib->op_type == OP_NEXTSTATE || sib->op_type == OP_DBSTATE)
1240                     && ckWARN(WARN_EXEC)
1241                     && OpHAS_SIBLING(sib))
1242                 {
1243                     const OPCODE type = OpSIBLING(sib)->op_type;
1244                     if (type != OP_EXIT && type != OP_WARN && type != OP_DIE) {
1245                         const line_t oldline = CopLINE(PL_curcop);
1246                         CopLINE_set(PL_curcop, CopLINE((COP*)sib));
1247                         Perl_warner(aTHX_ packWARN(WARN_EXEC),
1248                             "Statement unlikely to be reached");
1249                         Perl_warner(aTHX_ packWARN(WARN_EXEC),
1250                             "\t(Maybe you meant system() when you said exec()?)\n");
1251                         CopLINE_set(PL_curcop, oldline);
1252                     }
1253                 }
1254             }
1255             break;
1256 
1257         case OP_GV:
1258             if ((o->op_private & OPpEARLY_CV) && ckWARN(WARN_PROTOTYPE)) {
1259                 GV * const gv = cGVOPo_gv;
1260                 if (SvTYPE(gv) == SVt_PVGV && GvCV(gv) && SvPVX_const(GvCV(gv))) {
1261                     /* XXX could check prototype here instead of just carping */
1262                     SV * const sv = sv_newmortal();
1263                     gv_efullname3(sv, gv, NULL);
1264                     Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE),
1265                                 "%" SVf "() called too early to check prototype",
1266                                 SVfARG(sv));
1267                 }
1268             }
1269             break;
1270 
1271         case OP_CONST:
1272             if (cSVOPo->op_private & OPpCONST_STRICT)
1273                 no_bareword_allowed(o);
1274 #ifdef USE_ITHREADS
1275             /* FALLTHROUGH */
1276         case OP_HINTSEVAL:
1277             op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
1278 #endif
1279             break;
1280 
1281 #ifdef USE_ITHREADS
1282             /* Relocate all the METHOP's SVs to the pad for thread safety. */
1283         case OP_METHOD_NAMED:
1284         case OP_METHOD_SUPER:
1285         case OP_METHOD_REDIR:
1286         case OP_METHOD_REDIR_SUPER:
1287             op_relocate_sv(&cMETHOPo->op_u.op_meth_sv, &o->op_targ);
1288             break;
1289 #endif
1290 
1291         case OP_HELEM: {
1292             UNOP *rop;
1293             SVOP *key_op;
1294             OP *kid;
1295 
1296             if ((key_op = cSVOPx(cBINOPo->op_last))->op_type != OP_CONST)
1297                 break;
1298 
1299             rop = cUNOPx(cBINOPo->op_first);
1300 
1301             goto check_keys;
1302 
1303             case OP_HSLICE:
1304                 S_scalar_slice_warning(aTHX_ o);
1305                 /* FALLTHROUGH */
1306 
1307             case OP_KVHSLICE:
1308                 kid = OpSIBLING(cLISTOPo->op_first);
1309             if (/* I bet there's always a pushmark... */
1310                 OP_TYPE_ISNT_AND_WASNT_NN(kid, OP_LIST)
1311                 && OP_TYPE_ISNT_NN(kid, OP_CONST))
1312             {
1313                 break;
1314             }
1315 
1316             key_op = cSVOPx(kid->op_type == OP_CONST
1317                              ? kid
1318                              : OpSIBLING(kLISTOP->op_first));
1319 
1320             rop = cUNOPx(cLISTOPo->op_last);
1321 
1322         check_keys:
1323             if (o->op_private & OPpLVAL_INTRO || rop->op_type != OP_RV2HV)
1324                 rop = NULL;
1325             check_hash_fields_and_hekify(rop, key_op, 1);
1326             break;
1327         }
1328         case OP_NULL:
1329             if (o->op_targ != OP_HSLICE && o->op_targ != OP_ASLICE)
1330                 break;
1331             /* FALLTHROUGH */
1332         case OP_ASLICE:
1333             S_scalar_slice_warning(aTHX_ o);
1334             break;
1335 
1336         case OP_SUBST: {
1337             if (cPMOPo->op_pmreplrootu.op_pmreplroot)
1338                 finalize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1339             break;
1340         }
1341         default:
1342             break;
1343         }
1344 
1345 #ifdef DEBUGGING
1346         if (o->op_flags & OPf_KIDS) {
1347             OP *kid;
1348 
1349             /* check that op_last points to the last sibling, and that
1350              * the last op_sibling/op_sibparent field points back to the
1351              * parent, and that the only ops with KIDS are those which are
1352              * entitled to them */
1353             U32 type = o->op_type;
1354             U32 family;
1355             bool has_last;
1356 
1357             if (type == OP_NULL) {
1358                 type = o->op_targ;
1359                 /* ck_glob creates a null UNOP with ex-type GLOB
1360                  * (which is a list op. So pretend it wasn't a listop */
1361                 if (type == OP_GLOB)
1362                     type = OP_NULL;
1363             }
1364             family = PL_opargs[type] & OA_CLASS_MASK;
1365 
1366             has_last = (   family == OA_BINOP
1367                         || family == OA_LISTOP
1368                         || family == OA_PMOP
1369                         || family == OA_LOOP
1370                        );
1371             assert(  has_last /* has op_first and op_last, or ...
1372                   ... has (or may have) op_first: */
1373                   || family == OA_UNOP
1374                   || family == OA_UNOP_AUX
1375                   || family == OA_LOGOP
1376                   || family == OA_BASEOP_OR_UNOP
1377                   || family == OA_FILESTATOP
1378                   || family == OA_LOOPEXOP
1379                   || family == OA_METHOP
1380                   || type == OP_CUSTOM
1381                   || type == OP_NULL /* new_logop does this */
1382                   );
1383 
1384             for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) {
1385                 if (!OpHAS_SIBLING(kid)) {
1386                     if (has_last)
1387                         assert(kid == cLISTOPo->op_last);
1388                     assert(kid->op_sibparent == o);
1389                 }
1390             }
1391         }
1392 #endif
1393     } while (( o = traverse_op_tree(top, o)) != NULL);
1394 }
1395 
1396 
1397 /*
1398    ---------------------------------------------------------
1399 
1400    Common vars in list assignment
1401 
1402    There now follows some enums and static functions for detecting
1403    common variables in list assignments. Here is a little essay I wrote
1404    for myself when trying to get my head around this. DAPM.
1405 
1406    ----
1407 
1408    First some random observations:
1409 
1410    * If a lexical var is an alias of something else, e.g.
1411        for my $x ($lex, $pkg, $a[0]) {...}
1412      then the act of aliasing will increase the reference count of the SV
1413 
1414    * If a package var is an alias of something else, it may still have a
1415      reference count of 1, depending on how the alias was created, e.g.
1416      in *a = *b, $a may have a refcount of 1 since the GP is shared
1417      with a single GvSV pointer to the SV. So If it's an alias of another
1418      package var, then RC may be 1; if it's an alias of another scalar, e.g.
1419      a lexical var or an array element, then it will have RC > 1.
1420 
1421    * There are many ways to create a package alias; ultimately, XS code
1422      may quite legally do GvSV(gv) = SvREFCNT_inc(sv) for example, so
1423      run-time tracing mechanisms are unlikely to be able to catch all cases.
1424 
1425    * When the LHS is all my declarations, the same vars can't appear directly
1426      on the RHS, but they can indirectly via closures, aliasing and lvalue
1427      subs. But those techniques all involve an increase in the lexical
1428      scalar's ref count.
1429 
1430    * When the LHS is all lexical vars (but not necessarily my declarations),
1431      it is possible for the same lexicals to appear directly on the RHS, and
1432      without an increased ref count, since the stack isn't refcounted.
1433      This case can be detected at compile time by scanning for common lex
1434      vars with PL_generation.
1435 
1436    * lvalue subs defeat common var detection, but they do at least
1437      return vars with a temporary ref count increment. Also, you can't
1438      tell at compile time whether a sub call is lvalue.
1439 
1440 
1441    So...
1442 
1443    A: There are a few circumstances where there definitely can't be any
1444      commonality:
1445 
1446        LHS empty:  () = (...);
1447        RHS empty:  (....) = ();
1448        RHS contains only constants or other 'can't possibly be shared'
1449            elements (e.g. ops that return PADTMPs):  (...) = (1,2, length)
1450            i.e. they only contain ops not marked as dangerous, whose children
1451            are also not dangerous;
1452        LHS ditto;
1453        LHS contains a single scalar element: e.g. ($x) = (....); because
1454            after $x has been modified, it won't be used again on the RHS;
1455        RHS contains a single element with no aggregate on LHS: e.g.
1456            ($a,$b,$c)  = ($x); again, once $a has been modified, its value
1457            won't be used again.
1458 
1459    B: If LHS are all 'my' lexical var declarations (or safe ops, which
1460      we can ignore):
1461 
1462        my ($a, $b, @c) = ...;
1463 
1464        Due to closure and goto tricks, these vars may already have content.
1465        For the same reason, an element on the RHS may be a lexical or package
1466        alias of one of the vars on the left, or share common elements, for
1467        example:
1468 
1469            my ($x,$y) = f(); # $x and $y on both sides
1470            sub f : lvalue { ($x,$y) = (1,2); $y, $x }
1471 
1472        and
1473 
1474            my $ra = f();
1475            my @a = @$ra;  # elements of @a on both sides
1476            sub f { @a = 1..4; \@a }
1477 
1478 
1479        First, just consider scalar vars on LHS:
1480 
1481            RHS is safe only if (A), or in addition,
1482                * contains only lexical *scalar* vars, where neither side's
1483                  lexicals have been flagged as aliases
1484 
1485            If RHS is not safe, then it's always legal to check LHS vars for
1486            RC==1, since the only RHS aliases will always be associated
1487            with an RC bump.
1488 
1489            Note that in particular, RHS is not safe if:
1490 
1491                * it contains package scalar vars; e.g.:
1492 
1493                    f();
1494                    my ($x, $y) = (2, $x_alias);
1495                    sub f { $x = 1; *x_alias = \$x; }
1496 
1497                * It contains other general elements, such as flattened or
1498                * spliced or single array or hash elements, e.g.
1499 
1500                    f();
1501                    my ($x,$y) = @a; # or $a[0] or @a{@b} etc
1502 
1503                    sub f {
1504                        ($x, $y) = (1,2);
1505                        use feature 'refaliasing';
1506                        \($a[0], $a[1]) = \($y,$x);
1507                    }
1508 
1509                  It doesn't matter if the array/hash is lexical or package.
1510 
1511                * it contains a function call that happens to be an lvalue
1512                  sub which returns one or more of the above, e.g.
1513 
1514                    f();
1515                    my ($x,$y) = f();
1516 
1517                    sub f : lvalue {
1518                        ($x, $y) = (1,2);
1519                        *x1 = \$x;
1520                        $y, $x1;
1521                    }
1522 
1523                    (so a sub call on the RHS should be treated the same
1524                    as having a package var on the RHS).
1525 
1526                * any other "dangerous" thing, such an op or built-in that
1527                  returns one of the above, e.g. pp_preinc
1528 
1529 
1530            If RHS is not safe, what we can do however is at compile time flag
1531            that the LHS are all my declarations, and at run time check whether
1532            all the LHS have RC == 1, and if so skip the full scan.
1533 
1534        Now consider array and hash vars on LHS: e.g. my (...,@a) = ...;
1535 
1536            Here the issue is whether there can be elements of @a on the RHS
1537            which will get prematurely freed when @a is cleared prior to
1538            assignment. This is only a problem if the aliasing mechanism
1539            is one which doesn't increase the refcount - only if RC == 1
1540            will the RHS element be prematurely freed.
1541 
1542            Because the array/hash is being INTROed, it or its elements
1543            can't directly appear on the RHS:
1544 
1545                my (@a) = ($a[0], @a, etc) # NOT POSSIBLE
1546 
1547            but can indirectly, e.g.:
1548 
1549                my $r = f();
1550                my (@a) = @$r;
1551                sub f { @a = 1..3; \@a }
1552 
1553            So if the RHS isn't safe as defined by (A), we must always
1554            mortalise and bump the ref count of any remaining RHS elements
1555            when assigning to a non-empty LHS aggregate.
1556 
1557            Lexical scalars on the RHS aren't safe if they've been involved in
1558            aliasing, e.g.
1559 
1560                use feature 'refaliasing';
1561 
1562                f();
1563                \(my $lex) = \$pkg;
1564                my @a = ($lex,3); # equivalent to ($a[0],3)
1565 
1566                sub f {
1567                    @a = (1,2);
1568                    \$pkg = \$a[0];
1569                }
1570 
1571            Similarly with lexical arrays and hashes on the RHS:
1572 
1573                f();
1574                my @b;
1575                my @a = (@b);
1576 
1577                sub f {
1578                    @a = (1,2);
1579                    \$b[0] = \$a[1];
1580                    \$b[1] = \$a[0];
1581                }
1582 
1583 
1584 
1585    C: As (B), but in addition the LHS may contain non-intro lexicals, e.g.
1586        my $a; ($a, my $b) = (....);
1587 
1588        The difference between (B) and (C) is that it is now physically
1589        possible for the LHS vars to appear on the RHS too, where they
1590        are not reference counted; but in this case, the compile-time
1591        PL_generation sweep will detect such common vars.
1592 
1593        So the rules for (C) differ from (B) in that if common vars are
1594        detected, the runtime "test RC==1" optimisation can no longer be used,
1595        and a full mark and sweep is required
1596 
1597    D: As (C), but in addition the LHS may contain package vars.
1598 
1599        Since package vars can be aliased without a corresponding refcount
1600        increase, all bets are off. It's only safe if (A). E.g.
1601 
1602            my ($x, $y) = (1,2);
1603 
1604            for $x_alias ($x) {
1605                ($x_alias, $y) = (3, $x); # whoops
1606            }
1607 
1608        Ditto for LHS aggregate package vars.
1609 
1610    E: Any other dangerous ops on LHS, e.g.
1611            (f(), $a[0], @$r) = (...);
1612 
1613        this is similar to (E) in that all bets are off. In addition, it's
1614        impossible to determine at compile time whether the LHS
1615        contains a scalar or an aggregate, e.g.
1616 
1617            sub f : lvalue { @a }
1618            (f()) = 1..3;
1619 
1620 * ---------------------------------------------------------
1621 */
1622 
1623 /* A set of bit flags returned by S_aassign_scan(). Each flag indicates
1624  * that at least one of the things flagged was seen.
1625  */
1626 
1627 enum {
1628     AAS_MY_SCALAR       = 0x001, /* my $scalar */
1629     AAS_MY_AGG          = 0x002, /* aggregate: my @array or my %hash */
1630     AAS_LEX_SCALAR      = 0x004, /* $lexical */
1631     AAS_LEX_AGG         = 0x008, /* @lexical or %lexical aggregate */
1632     AAS_LEX_SCALAR_COMM = 0x010, /* $lexical seen on both sides */
1633     AAS_PKG_SCALAR      = 0x020, /* $scalar (where $scalar is pkg var) */
1634     AAS_PKG_AGG         = 0x040, /* package @array or %hash aggregate */
1635     AAS_DANGEROUS       = 0x080, /* an op (other than the above)
1636                                          that's flagged OA_DANGEROUS */
1637     AAS_SAFE_SCALAR     = 0x100, /* produces at least one scalar SV that's
1638                                         not in any of the categories above */
1639     AAS_DEFAV           = 0x200  /* contains just a single '@_' on RHS */
1640 };
1641 
1642 /* helper function for S_aassign_scan().
1643  * check a PAD-related op for commonality and/or set its generation number.
1644  * Returns a boolean indicating whether its shared */
1645 
1646 static bool
S_aassign_padcheck(pTHX_ OP * o,bool rhs)1647 S_aassign_padcheck(pTHX_ OP* o, bool rhs)
1648 {
1649     if (PAD_COMPNAME_GEN(o->op_targ) == PERL_INT_MAX)
1650         /* lexical used in aliasing */
1651         return TRUE;
1652 
1653     if (rhs)
1654         return cBOOL(PAD_COMPNAME_GEN(o->op_targ) == (STRLEN)PL_generation);
1655     else
1656         PAD_COMPNAME_GEN_set(o->op_targ, PL_generation);
1657 
1658     return FALSE;
1659 }
1660 
1661 /*
1662   Helper function for OPpASSIGN_COMMON* detection in rpeep().
1663   It scans the left or right hand subtree of the aassign op, and returns a
1664   set of flags indicating what sorts of things it found there.
1665   'rhs' indicates whether we're scanning the LHS or RHS. If the former, we
1666   set PL_generation on lexical vars; if the latter, we see if
1667   PL_generation matches.
1668   'scalars_p' is a pointer to a counter of the number of scalar SVs seen.
1669   This fn will increment it by the number seen. It's not intended to
1670   be an accurate count (especially as many ops can push a variable
1671   number of SVs onto the stack); rather it's used as to test whether there
1672   can be at most 1 SV pushed; so it's only meanings are "0, 1, many".
1673 */
1674 
1675 static int
S_aassign_scan(pTHX_ OP * o,bool rhs,int * scalars_p)1676 S_aassign_scan(pTHX_ OP* o, bool rhs, int *scalars_p)
1677 {
1678     OP *top_op           = o;
1679     OP *effective_top_op = o;
1680     int all_flags = 0;
1681 
1682     while (1) {
1683         bool top = o == effective_top_op;
1684         int flags = 0;
1685         OP* next_kid = NULL;
1686 
1687         /* first, look for a solitary @_ on the RHS */
1688         if (   rhs
1689             && top
1690             && (o->op_flags & OPf_KIDS)
1691             && OP_TYPE_IS_OR_WAS(o, OP_LIST)
1692         ) {
1693             OP *kid = cUNOPo->op_first;
1694             if (   (   kid->op_type == OP_PUSHMARK
1695                     || kid->op_type == OP_PADRANGE) /* ex-pushmark */
1696                 && ((kid = OpSIBLING(kid)))
1697                 && !OpHAS_SIBLING(kid)
1698                 && kid->op_type == OP_RV2AV
1699                 && !(kid->op_flags & OPf_REF)
1700                 && !(kid->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
1701                 && ((kid->op_flags & OPf_WANT) == OPf_WANT_LIST)
1702                 && ((kid = cUNOPx(kid)->op_first))
1703                 && kid->op_type == OP_GV
1704                 && cGVOPx_gv(kid) == PL_defgv
1705             )
1706                 flags = AAS_DEFAV;
1707         }
1708 
1709         switch (o->op_type) {
1710         case OP_GVSV:
1711             (*scalars_p)++;
1712             all_flags |= AAS_PKG_SCALAR;
1713             goto do_next;
1714 
1715         case OP_PADAV:
1716         case OP_PADHV:
1717             (*scalars_p) += 2;
1718             /* if !top, could be e.g. @a[0,1] */
1719             all_flags |=  (top && (o->op_flags & OPf_REF))
1720                             ? ((o->op_private & OPpLVAL_INTRO)
1721                                 ? AAS_MY_AGG : AAS_LEX_AGG)
1722                             : AAS_DANGEROUS;
1723             goto do_next;
1724 
1725         case OP_PADSV:
1726             {
1727                 int comm = S_aassign_padcheck(aTHX_ o, rhs)
1728                             ?  AAS_LEX_SCALAR_COMM : 0;
1729                 (*scalars_p)++;
1730                 all_flags |= (o->op_private & OPpLVAL_INTRO)
1731                     ? (AAS_MY_SCALAR|comm) : (AAS_LEX_SCALAR|comm);
1732                 goto do_next;
1733 
1734             }
1735 
1736         case OP_RV2AV:
1737         case OP_RV2HV:
1738             (*scalars_p) += 2;
1739             if (cUNOPx(o)->op_first->op_type != OP_GV)
1740                 all_flags |= AAS_DANGEROUS; /* @{expr}, %{expr} */
1741             /* @pkg, %pkg */
1742             /* if !top, could be e.g. @a[0,1] */
1743             else if (top && (o->op_flags & OPf_REF))
1744                 all_flags |= AAS_PKG_AGG;
1745             else
1746                 all_flags |= AAS_DANGEROUS;
1747             goto do_next;
1748 
1749         case OP_RV2SV:
1750             (*scalars_p)++;
1751             if (cUNOPx(o)->op_first->op_type != OP_GV) {
1752                 (*scalars_p) += 2;
1753                 all_flags |= AAS_DANGEROUS; /* ${expr} */
1754             }
1755             else
1756                 all_flags |= AAS_PKG_SCALAR; /* $pkg */
1757             goto do_next;
1758 
1759         case OP_SPLIT:
1760             if (o->op_private & OPpSPLIT_ASSIGN) {
1761                 /* the assign in @a = split() has been optimised away
1762                  * and the @a attached directly to the split op
1763                  * Treat the array as appearing on the RHS, i.e.
1764                  *    ... = (@a = split)
1765                  * is treated like
1766                  *    ... = @a;
1767                  */
1768 
1769                 if (o->op_flags & OPf_STACKED) {
1770                     /* @{expr} = split() - the array expression is tacked
1771                      * on as an extra child to split - process kid */
1772                     next_kid = cLISTOPo->op_last;
1773                     goto do_next;
1774                 }
1775 
1776                 /* ... else array is directly attached to split op */
1777                 (*scalars_p) += 2;
1778                 all_flags |= (PL_op->op_private & OPpSPLIT_LEX)
1779                                 ? ((o->op_private & OPpLVAL_INTRO)
1780                                     ? AAS_MY_AGG : AAS_LEX_AGG)
1781                                 : AAS_PKG_AGG;
1782                 goto do_next;
1783             }
1784             (*scalars_p)++;
1785             /* other args of split can't be returned */
1786             all_flags |= AAS_SAFE_SCALAR;
1787             goto do_next;
1788 
1789         case OP_UNDEF:
1790             /* undef on LHS following a var is significant, e.g.
1791              *    my $x = 1;
1792              *    @a = (($x, undef) = (2 => $x));
1793              *    # @a shoul be (2,1) not (2,2)
1794              *
1795              * undef on RHS counts as a scalar:
1796              *   ($x, $y)    = (undef, $x); # 2 scalars on RHS: unsafe
1797              */
1798             if ((!rhs && *scalars_p) || rhs)
1799                 (*scalars_p)++;
1800             flags = AAS_SAFE_SCALAR;
1801             break;
1802 
1803         case OP_PUSHMARK:
1804         case OP_STUB:
1805             /* these are all no-ops; they don't push a potentially common SV
1806              * onto the stack, so they are neither AAS_DANGEROUS nor
1807              * AAS_SAFE_SCALAR */
1808             goto do_next;
1809 
1810         case OP_PADRANGE: /* Ignore padrange; checking its siblings is enough */
1811             break;
1812 
1813         case OP_NULL:
1814         case OP_LIST:
1815             /* these do nothing, but may have children */
1816             break;
1817 
1818         default:
1819             if (PL_opargs[o->op_type] & OA_DANGEROUS) {
1820                 (*scalars_p) += 2;
1821                 flags = AAS_DANGEROUS;
1822                 break;
1823             }
1824 
1825             if (   (PL_opargs[o->op_type] & OA_TARGLEX)
1826                 && (o->op_private & OPpTARGET_MY))
1827             {
1828                 (*scalars_p)++;
1829                 all_flags |= S_aassign_padcheck(aTHX_ o, rhs)
1830                                 ? AAS_LEX_SCALAR_COMM : AAS_LEX_SCALAR;
1831                 goto do_next;
1832             }
1833 
1834             /* if its an unrecognised, non-dangerous op, assume that it
1835              * is the cause of at least one safe scalar */
1836             (*scalars_p)++;
1837             flags = AAS_SAFE_SCALAR;
1838             break;
1839         }
1840 
1841         all_flags |= flags;
1842 
1843         /* by default, process all kids next
1844          * XXX this assumes that all other ops are "transparent" - i.e. that
1845          * they can return some of their children. While this true for e.g.
1846          * sort and grep, it's not true for e.g. map. We really need a
1847          * 'transparent' flag added to regen/opcodes
1848          */
1849         if (o->op_flags & OPf_KIDS) {
1850             next_kid = cUNOPo->op_first;
1851             /* these ops do nothing but may have children; but their
1852              * children should also be treated as top-level */
1853             if (   o == effective_top_op
1854                 && (o->op_type == OP_NULL || o->op_type == OP_LIST)
1855             )
1856                 effective_top_op = next_kid;
1857         }
1858 
1859 
1860         /* If next_kid is set, someone in the code above wanted us to process
1861          * that kid and all its remaining siblings.  Otherwise, work our way
1862          * back up the tree */
1863       do_next:
1864         while (!next_kid) {
1865             if (o == top_op)
1866                 return all_flags; /* at top; no parents/siblings to try */
1867             if (OpHAS_SIBLING(o)) {
1868                 next_kid = o->op_sibparent;
1869                 if (o == effective_top_op)
1870                     effective_top_op = next_kid;
1871             }
1872             else if (o == effective_top_op)
1873               effective_top_op = o->op_sibparent;
1874             o = o->op_sibparent; /* try parent's next sibling */
1875         }
1876         o = next_kid;
1877     } /* while */
1878 }
1879 
1880 /* S_maybe_multideref(): given an op_next chain of ops beginning at 'start'
1881  * that potentially represent a series of one or more aggregate derefs
1882  * (such as $a->[1]{$key}), examine the chain, and if appropriate, convert
1883  * the whole chain to a single OP_MULTIDEREF op (maybe with a few
1884  * additional ops left in too).
1885  *
1886  * The caller will have already verified that the first few ops in the
1887  * chain following 'start' indicate a multideref candidate, and will have
1888  * set 'orig_o' to the point further on in the chain where the first index
1889  * expression (if any) begins.  'orig_action' specifies what type of
1890  * beginning has already been determined by the ops between start..orig_o
1891  * (e.g.  $lex_ary[], $pkg_ary->{}, expr->[], etc).
1892  *
1893  * 'hints' contains any hints flags that need adding (currently just
1894  * OPpHINT_STRICT_REFS) as found in any rv2av/hv skipped by the caller.
1895  */
1896 
1897 STATIC void
S_maybe_multideref(pTHX_ OP * start,OP * orig_o,UV orig_action,U8 hints)1898 S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints)
1899 {
1900     int pass;
1901     UNOP_AUX_item *arg_buf = NULL;
1902     bool reset_start_targ  = FALSE; /* start->op_targ needs zeroing */
1903     int index_skip         = -1;    /* don't output index arg on this action */
1904 
1905     /* similar to regex compiling, do two passes; the first pass
1906      * determines whether the op chain is convertible and calculates the
1907      * buffer size; the second pass populates the buffer and makes any
1908      * changes necessary to ops (such as moving consts to the pad on
1909      * threaded builds).
1910      *
1911      * NB: for things like Coverity, note that both passes take the same
1912      * path through the logic tree (except for 'if (pass)' bits), since
1913      * both passes are following the same op_next chain; and in
1914      * particular, if it would return early on the second pass, it would
1915      * already have returned early on the first pass.
1916      */
1917     for (pass = 0; pass < 2; pass++) {
1918         OP *o                = orig_o;
1919         UV action            = orig_action;
1920         OP *first_elem_op    = NULL;  /* first seen aelem/helem */
1921         OP *top_op           = NULL;  /* highest [ah]elem/exists/del/rv2[ah]v */
1922         int action_count     = 0;     /* number of actions seen so far */
1923         int action_ix        = 0;     /* action_count % (actions per IV) */
1924         bool next_is_hash    = FALSE; /* is the next lookup to be a hash? */
1925         bool is_last         = FALSE; /* no more derefs to follow */
1926         bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */
1927         UV action_word       = 0;     /* all actions so far */
1928         size_t argi          = 0;
1929         UNOP_AUX_item *action_ptr = arg_buf;
1930 
1931         argi++; /* reserve slot for first action word */
1932 
1933         switch (action) {
1934         case MDEREF_HV_gvsv_vivify_rv2hv_helem:
1935         case MDEREF_HV_gvhv_helem:
1936             next_is_hash = TRUE;
1937             /* FALLTHROUGH */
1938         case MDEREF_AV_gvsv_vivify_rv2av_aelem:
1939         case MDEREF_AV_gvav_aelem:
1940             if (pass) {
1941 #ifdef USE_ITHREADS
1942                 arg_buf[argi].pad_offset = cPADOPx(start)->op_padix;
1943                 /* stop it being swiped when nulled */
1944                 cPADOPx(start)->op_padix = 0;
1945 #else
1946                 arg_buf[argi].sv = cSVOPx(start)->op_sv;
1947                 cSVOPx(start)->op_sv = NULL;
1948 #endif
1949             }
1950             argi++;
1951             break;
1952 
1953         case MDEREF_HV_padhv_helem:
1954         case MDEREF_HV_padsv_vivify_rv2hv_helem:
1955             next_is_hash = TRUE;
1956             /* FALLTHROUGH */
1957         case MDEREF_AV_padav_aelem:
1958         case MDEREF_AV_padsv_vivify_rv2av_aelem:
1959             if (pass) {
1960                 arg_buf[argi].pad_offset = start->op_targ;
1961                 /* we skip setting op_targ = 0 for now, since the intact
1962                  * OP_PADXV is needed by check_hash_fields_and_hekify */
1963                 reset_start_targ = TRUE;
1964             }
1965             argi++;
1966             break;
1967 
1968         case MDEREF_HV_pop_rv2hv_helem:
1969             next_is_hash = TRUE;
1970             /* FALLTHROUGH */
1971         case MDEREF_AV_pop_rv2av_aelem:
1972             break;
1973 
1974         default:
1975             NOT_REACHED; /* NOTREACHED */
1976             return;
1977         }
1978 
1979         while (!is_last) {
1980             /* look for another (rv2av/hv; get index;
1981              * aelem/helem/exists/delele) sequence */
1982 
1983             OP *kid;
1984             bool is_deref;
1985             bool ok;
1986             UV index_type = MDEREF_INDEX_none;
1987 
1988             if (action_count) {
1989                 /* if this is not the first lookup, consume the rv2av/hv  */
1990 
1991                 /* for N levels of aggregate lookup, we normally expect
1992                  * that the first N-1 [ah]elem ops will be flagged as
1993                  * /DEREF (so they autovivify if necessary), and the last
1994                  * lookup op not to be.
1995                  * For other things (like @{$h{k1}{k2}}) extra scope or
1996                  * leave ops can appear, so abandon the effort in that
1997                  * case */
1998                 if (o->op_type != OP_RV2AV && o->op_type != OP_RV2HV)
1999                     return;
2000 
2001                 /* rv2av or rv2hv sKR/1 */
2002 
2003                 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2004                                             |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2005                 if (o->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2006                     return;
2007 
2008                 /* at this point, we wouldn't expect any of these
2009                  * possible private flags:
2010                  * OPpMAYBE_LVSUB, OPpOUR_INTRO, OPpLVAL_INTRO
2011                  * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL (rv2hv only)
2012                  */
2013                 ASSUME(!(o->op_private &
2014                     ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING)));
2015 
2016                 hints = (o->op_private & OPpHINT_STRICT_REFS);
2017 
2018                 /* make sure the type of the previous /DEREF matches the
2019                  * type of the next lookup */
2020                 ASSUME(o->op_type == (next_is_hash ? OP_RV2HV : OP_RV2AV));
2021                 top_op = o;
2022 
2023                 action = next_is_hash
2024                             ? MDEREF_HV_vivify_rv2hv_helem
2025                             : MDEREF_AV_vivify_rv2av_aelem;
2026                 o = o->op_next;
2027             }
2028 
2029             /* if this is the second pass, and we're at the depth where
2030              * previously we encountered a non-simple index expression,
2031              * stop processing the index at this point */
2032             if (action_count != index_skip) {
2033 
2034                 /* look for one or more simple ops that return an array
2035                  * index or hash key */
2036 
2037                 switch (o->op_type) {
2038                 case OP_PADSV:
2039                     /* it may be a lexical var index */
2040                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS
2041                                             |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2042                     ASSUME(!(o->op_private &
2043                             ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2044 
2045                     if (   OP_GIMME(o,0) == G_SCALAR
2046                         && !(o->op_flags & (OPf_REF|OPf_MOD))
2047                         && o->op_private == 0)
2048                     {
2049                         if (pass)
2050                             arg_buf[argi].pad_offset = o->op_targ;
2051                         argi++;
2052                         index_type = MDEREF_INDEX_padsv;
2053                         o = o->op_next;
2054                     }
2055                     break;
2056 
2057                 case OP_CONST:
2058                     if (next_is_hash) {
2059                         /* it's a constant hash index */
2060                         if (!(SvFLAGS(cSVOPo_sv) & (SVf_IOK|SVf_NOK|SVf_POK)))
2061                             /* "use constant foo => FOO; $h{+foo}" for
2062                              * some weird FOO, can leave you with constants
2063                              * that aren't simple strings. It's not worth
2064                              * the extra hassle for those edge cases */
2065                             break;
2066 
2067                         {
2068                             UNOP *rop = NULL;
2069                             OP * helem_op = o->op_next;
2070 
2071                             ASSUME(   helem_op->op_type == OP_HELEM
2072                                    || helem_op->op_type == OP_NULL
2073                                    || pass == 0);
2074                             if (helem_op->op_type == OP_HELEM) {
2075                                 rop = cUNOPx(cBINOPx(helem_op)->op_first);
2076                                 if (   helem_op->op_private & OPpLVAL_INTRO
2077                                     || rop->op_type != OP_RV2HV
2078                                 )
2079                                     rop = NULL;
2080                             }
2081                             /* on first pass just check; on second pass
2082                              * hekify */
2083                             check_hash_fields_and_hekify(rop, cSVOPo, pass);
2084                         }
2085 
2086                         if (pass) {
2087 #ifdef USE_ITHREADS
2088                             /* Relocate sv to the pad for thread safety */
2089                             op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
2090                             arg_buf[argi].pad_offset = o->op_targ;
2091                             o->op_targ = 0;
2092 #else
2093                             arg_buf[argi].sv = cSVOPx_sv(o);
2094 #endif
2095                         }
2096                     }
2097                     else {
2098                         /* it's a constant array index */
2099                         IV iv;
2100                         SV *ix_sv = cSVOPo->op_sv;
2101                         if (!SvIOK(ix_sv))
2102                             break;
2103                         iv = SvIV(ix_sv);
2104 
2105                         if (   action_count == 0
2106                             && iv >= -128
2107                             && iv <= 127
2108                             && (   action == MDEREF_AV_padav_aelem
2109                                 || action == MDEREF_AV_gvav_aelem)
2110                         )
2111                             maybe_aelemfast = TRUE;
2112 
2113                         if (pass) {
2114                             arg_buf[argi].iv = iv;
2115                             SvREFCNT_dec_NN(cSVOPo->op_sv);
2116                         }
2117                     }
2118                     if (pass)
2119                         /* we've taken ownership of the SV */
2120                         cSVOPo->op_sv = NULL;
2121                     argi++;
2122                     index_type = MDEREF_INDEX_const;
2123                     o = o->op_next;
2124                     break;
2125 
2126                 case OP_GV:
2127                     /* it may be a package var index */
2128 
2129                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS|OPf_SPECIAL)));
2130                     ASSUME(!(o->op_private & ~(OPpEARLY_CV)));
2131                     if (  (o->op_flags & ~(OPf_PARENS|OPf_SPECIAL)) != OPf_WANT_SCALAR
2132                         || o->op_private != 0
2133                     )
2134                         break;
2135 
2136                     kid = o->op_next;
2137                     if (kid->op_type != OP_RV2SV)
2138                         break;
2139 
2140                     ASSUME(!(kid->op_flags &
2141                             ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_REF
2142                              |OPf_SPECIAL|OPf_PARENS)));
2143                     ASSUME(!(kid->op_private &
2144                                     ~(OPpARG1_MASK
2145                                      |OPpHINT_STRICT_REFS|OPpOUR_INTRO
2146                                      |OPpDEREF|OPpLVAL_INTRO)));
2147                     if(   (kid->op_flags &~ OPf_PARENS)
2148                             != (OPf_WANT_SCALAR|OPf_KIDS)
2149                        || (kid->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS))
2150                     )
2151                         break;
2152 
2153                     if (pass) {
2154 #ifdef USE_ITHREADS
2155                         arg_buf[argi].pad_offset = cPADOPx(o)->op_padix;
2156                         /* stop it being swiped when nulled */
2157                         cPADOPx(o)->op_padix = 0;
2158 #else
2159                         arg_buf[argi].sv = cSVOPx(o)->op_sv;
2160                         cSVOPo->op_sv = NULL;
2161 #endif
2162                     }
2163                     argi++;
2164                     index_type = MDEREF_INDEX_gvsv;
2165                     o = kid->op_next;
2166                     break;
2167 
2168                 } /* switch */
2169             } /* action_count != index_skip */
2170 
2171             action |= index_type;
2172 
2173 
2174             /* at this point we have either:
2175              *   * detected what looks like a simple index expression,
2176              *     and expect the next op to be an [ah]elem, or
2177              *     an nulled  [ah]elem followed by a delete or exists;
2178              *  * found a more complex expression, so something other
2179              *    than the above follows.
2180              */
2181 
2182             /* possibly an optimised away [ah]elem (where op_next is
2183              * exists or delete) */
2184             if (o->op_type == OP_NULL)
2185                 o = o->op_next;
2186 
2187             /* at this point we're looking for an OP_AELEM, OP_HELEM,
2188              * OP_EXISTS or OP_DELETE */
2189 
2190             /* if a custom array/hash access checker is in scope,
2191              * abandon optimisation attempt */
2192             if (  (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2193                && PL_check[o->op_type] != Perl_ck_null)
2194                 return;
2195             /* similarly for customised exists and delete */
2196             if (  (o->op_type == OP_EXISTS)
2197                && PL_check[o->op_type] != Perl_ck_exists)
2198                 return;
2199             if (  (o->op_type == OP_DELETE)
2200                && PL_check[o->op_type] != Perl_ck_delete)
2201                 return;
2202 
2203             if (   o->op_type != OP_AELEM
2204                 || (o->op_private &
2205                       (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB))
2206                 )
2207                 maybe_aelemfast = FALSE;
2208 
2209             /* look for aelem/helem/exists/delete. If it's not the last elem
2210              * lookup, it *must* have OPpDEREF_AV/HV, but not many other
2211              * flags; if it's the last, then it mustn't have
2212              * OPpDEREF_AV/HV, but may have lots of other flags, like
2213              * OPpLVAL_INTRO etc
2214              */
2215 
2216             if (   index_type == MDEREF_INDEX_none
2217                 || (   o->op_type != OP_AELEM  && o->op_type != OP_HELEM
2218                     && o->op_type != OP_EXISTS && o->op_type != OP_DELETE)
2219             )
2220                 ok = FALSE;
2221             else {
2222                 /* we have aelem/helem/exists/delete with valid simple index */
2223 
2224                 is_deref =    (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2225                            && (   (o->op_private & OPpDEREF) == OPpDEREF_AV
2226                                || (o->op_private & OPpDEREF) == OPpDEREF_HV);
2227 
2228                 /* This doesn't make much sense but is legal:
2229                  *    @{ local $x[0][0] } = 1
2230                  * Since scope exit will undo the autovivification,
2231                  * don't bother in the first place. The OP_LEAVE
2232                  * assertion is in case there are other cases of both
2233                  * OPpLVAL_INTRO and OPpDEREF which don't include a scope
2234                  * exit that would undo the local - in which case this
2235                  * block of code would need rethinking.
2236                  */
2237                 if (is_deref && (o->op_private & OPpLVAL_INTRO)) {
2238 #ifdef DEBUGGING
2239                     OP *n = o->op_next;
2240                     while (n && (  n->op_type == OP_NULL
2241                                 || n->op_type == OP_LIST
2242                                 || n->op_type == OP_SCALAR))
2243                         n = n->op_next;
2244                     assert(n && n->op_type == OP_LEAVE);
2245 #endif
2246                     o->op_private &= ~OPpDEREF;
2247                     is_deref = FALSE;
2248                 }
2249 
2250                 if (is_deref) {
2251                     ASSUME(!(o->op_flags &
2252                                  ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_PARENS)));
2253                     ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpDEREF)));
2254 
2255                     ok =    (o->op_flags &~ OPf_PARENS)
2256                                == (OPf_WANT_SCALAR|OPf_KIDS|OPf_MOD)
2257                          && !(o->op_private & ~(OPpDEREF|OPpARG2_MASK));
2258                 }
2259                 else if (o->op_type == OP_EXISTS) {
2260                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2261                                 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2262                     ASSUME(!(o->op_private & ~(OPpARG1_MASK|OPpEXISTS_SUB)));
2263                     ok =  !(o->op_private & ~OPpARG1_MASK);
2264                 }
2265                 else if (o->op_type == OP_DELETE) {
2266                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2267                                 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2268                     ASSUME(!(o->op_private &
2269                                     ~(OPpARG1_MASK|OPpSLICE|OPpLVAL_INTRO)));
2270                     /* don't handle slices or 'local delete'; the latter
2271                      * is fairly rare, and has a complex runtime */
2272                     ok =  !(o->op_private & ~OPpARG1_MASK);
2273                     if (OP_TYPE_IS_OR_WAS(cUNOPo->op_first, OP_AELEM))
2274                         /* skip handling run-tome error */
2275                         ok = (ok && cBOOL(o->op_flags & OPf_SPECIAL));
2276                 }
2277                 else {
2278                     ASSUME(o->op_type == OP_AELEM || o->op_type == OP_HELEM);
2279                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_MOD
2280                                             |OPf_PARENS|OPf_REF|OPf_SPECIAL)));
2281                     ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpMAYBE_LVSUB
2282                                     |OPpLVAL_DEFER|OPpDEREF|OPpLVAL_INTRO)));
2283                     ok = (o->op_private & OPpDEREF) != OPpDEREF_SV;
2284                 }
2285             }
2286 
2287             if (ok) {
2288                 if (!first_elem_op)
2289                     first_elem_op = o;
2290                 top_op = o;
2291                 if (is_deref) {
2292                     next_is_hash = cBOOL((o->op_private & OPpDEREF) == OPpDEREF_HV);
2293                     o = o->op_next;
2294                 }
2295                 else {
2296                     is_last = TRUE;
2297                     action |= MDEREF_FLAG_last;
2298                 }
2299             }
2300             else {
2301                 /* at this point we have something that started
2302                  * promisingly enough (with rv2av or whatever), but failed
2303                  * to find a simple index followed by an
2304                  * aelem/helem/exists/delete. If this is the first action,
2305                  * give up; but if we've already seen at least one
2306                  * aelem/helem, then keep them and add a new action with
2307                  * MDEREF_INDEX_none, which causes it to do the vivify
2308                  * from the end of the previous lookup, and do the deref,
2309                  * but stop at that point. So $a[0][expr] will do one
2310                  * av_fetch, vivify and deref, then continue executing at
2311                  * expr */
2312                 if (!action_count)
2313                     return;
2314                 is_last = TRUE;
2315                 index_skip = action_count;
2316                 action |= MDEREF_FLAG_last;
2317                 if (index_type != MDEREF_INDEX_none)
2318                     argi--;
2319             }
2320 
2321             action_word |= (action << (action_ix * MDEREF_SHIFT));
2322             action_ix++;
2323             action_count++;
2324             /* if there's no space for the next action, reserve a new slot
2325              * for it *before* we start adding args for that action */
2326             if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) {
2327                 if (pass) {
2328                     action_ptr->uv = action_word;
2329                     action_ptr = arg_buf + argi;
2330                 }
2331                 action_word = 0;
2332                 argi++;
2333                 action_ix = 0;
2334             }
2335         } /* while !is_last */
2336 
2337         /* success! */
2338 
2339         if (!action_ix)
2340             /* slot reserved for next action word not now needed */
2341             argi--;
2342         else if (pass)
2343             action_ptr->uv = action_word;
2344 
2345         if (pass) {
2346             OP *mderef;
2347             OP *p, *q;
2348 
2349             mderef = newUNOP_AUX(OP_MULTIDEREF, 0, NULL, arg_buf);
2350             if (index_skip == -1) {
2351                 mderef->op_flags = o->op_flags
2352                         & (OPf_WANT|OPf_MOD|(next_is_hash ? OPf_SPECIAL : 0));
2353                 if (o->op_type == OP_EXISTS)
2354                     mderef->op_private = OPpMULTIDEREF_EXISTS;
2355                 else if (o->op_type == OP_DELETE)
2356                     mderef->op_private = OPpMULTIDEREF_DELETE;
2357                 else
2358                     mderef->op_private = o->op_private
2359                         & (OPpMAYBE_LVSUB|OPpLVAL_DEFER|OPpLVAL_INTRO);
2360             }
2361             /* accumulate strictness from every level (although I don't think
2362              * they can actually vary) */
2363             mderef->op_private |= hints;
2364 
2365             /* integrate the new multideref op into the optree and the
2366              * op_next chain.
2367              *
2368              * In general an op like aelem or helem has two child
2369              * sub-trees: the aggregate expression (a_expr) and the
2370              * index expression (i_expr):
2371              *
2372              *     aelem
2373              *       |
2374              *     a_expr - i_expr
2375              *
2376              * The a_expr returns an AV or HV, while the i-expr returns an
2377              * index. In general a multideref replaces most or all of a
2378              * multi-level tree, e.g.
2379              *
2380              *     exists
2381              *       |
2382              *     ex-aelem
2383              *       |
2384              *     rv2av  - i_expr1
2385              *       |
2386              *     helem
2387              *       |
2388              *     rv2hv  - i_expr2
2389              *       |
2390              *     aelem
2391              *       |
2392              *     a_expr - i_expr3
2393              *
2394              * With multideref, all the i_exprs will be simple vars or
2395              * constants, except that i_expr1 may be arbitrary in the case
2396              * of MDEREF_INDEX_none.
2397              *
2398              * The bottom-most a_expr will be either:
2399              *   1) a simple var (so padXv or gv+rv2Xv);
2400              *   2) a simple scalar var dereferenced (e.g. $r->[0]):
2401              *      so a simple var with an extra rv2Xv;
2402              *   3) or an arbitrary expression.
2403              *
2404              * 'start', the first op in the execution chain, will point to
2405              *   1),2): the padXv or gv op;
2406              *   3):    the rv2Xv which forms the last op in the a_expr
2407              *          execution chain, and the top-most op in the a_expr
2408              *          subtree.
2409              *
2410              * For all cases, the 'start' node is no longer required,
2411              * but we can't free it since one or more external nodes
2412              * may point to it. E.g. consider
2413              *     $h{foo} = $a ? $b : $c
2414              * Here, both the op_next and op_other branches of the
2415              * cond_expr point to the gv[*h] of the hash expression, so
2416              * we can't free the 'start' op.
2417              *
2418              * For expr->[...], we need to save the subtree containing the
2419              * expression; for the other cases, we just need to save the
2420              * start node.
2421              * So in all cases, we null the start op and keep it around by
2422              * making it the child of the multideref op; for the expr->
2423              * case, the expr will be a subtree of the start node.
2424              *
2425              * So in the simple 1,2 case the  optree above changes to
2426              *
2427              *     ex-exists
2428              *       |
2429              *     multideref
2430              *       |
2431              *     ex-gv (or ex-padxv)
2432              *
2433              *  with the op_next chain being
2434              *
2435              *  -> ex-gv -> multideref -> op-following-ex-exists ->
2436              *
2437              *  In the 3 case, we have
2438              *
2439              *     ex-exists
2440              *       |
2441              *     multideref
2442              *       |
2443              *     ex-rv2xv
2444              *       |
2445              *    rest-of-a_expr
2446              *      subtree
2447              *
2448              *  and
2449              *
2450              *  -> rest-of-a_expr subtree ->
2451              *    ex-rv2xv -> multideref -> op-following-ex-exists ->
2452              *
2453              *
2454              * Where the last i_expr is non-simple (i.e. MDEREF_INDEX_none,
2455              * e.g. $a[0]{foo}[$x+1], the next rv2xv is nulled and the
2456              * multideref attached as the child, e.g.
2457              *
2458              *     exists
2459              *       |
2460              *     ex-aelem
2461              *       |
2462              *     ex-rv2av  - i_expr1
2463              *       |
2464              *     multideref
2465              *       |
2466              *     ex-whatever
2467              *
2468              */
2469 
2470             /* if we free this op, don't free the pad entry */
2471             if (reset_start_targ)
2472                 start->op_targ = 0;
2473 
2474 
2475             /* Cut the bit we need to save out of the tree and attach to
2476              * the multideref op, then free the rest of the tree */
2477 
2478             /* find parent of node to be detached (for use by splice) */
2479             p = first_elem_op;
2480             if (   orig_action == MDEREF_AV_pop_rv2av_aelem
2481                 || orig_action == MDEREF_HV_pop_rv2hv_helem)
2482             {
2483                 /* there is an arbitrary expression preceding us, e.g.
2484                  * expr->[..]? so we need to save the 'expr' subtree */
2485                 if (p->op_type == OP_EXISTS || p->op_type == OP_DELETE)
2486                     p = cUNOPx(p)->op_first;
2487                 ASSUME(   start->op_type == OP_RV2AV
2488                        || start->op_type == OP_RV2HV);
2489             }
2490             else {
2491                 /* either a padXv or rv2Xv+gv, maybe with an ex-Xelem
2492                  * above for exists/delete. */
2493                 while (   (p->op_flags & OPf_KIDS)
2494                        && cUNOPx(p)->op_first != start
2495                 )
2496                     p = cUNOPx(p)->op_first;
2497             }
2498             ASSUME(cUNOPx(p)->op_first == start);
2499 
2500             /* detach from main tree, and re-attach under the multideref */
2501             op_sibling_splice(mderef, NULL, 0,
2502                     op_sibling_splice(p, NULL, 1, NULL));
2503             op_null(start);
2504 
2505             start->op_next = mderef;
2506 
2507             mderef->op_next = index_skip == -1 ? o->op_next : o;
2508 
2509             /* excise and free the original tree, and replace with
2510              * the multideref op */
2511             p = op_sibling_splice(top_op, NULL, -1, mderef);
2512             while (p) {
2513                 q = OpSIBLING(p);
2514                 op_free(p);
2515                 p = q;
2516             }
2517             op_null(top_op);
2518         }
2519         else {
2520             Size_t size = argi;
2521 
2522             if (maybe_aelemfast && action_count == 1)
2523                 return;
2524 
2525             arg_buf = (UNOP_AUX_item*)PerlMemShared_malloc(
2526                                 sizeof(UNOP_AUX_item) * (size + 1));
2527             /* for dumping etc: store the length in a hidden first slot;
2528              * we set the op_aux pointer to the second slot */
2529             arg_buf->uv = size;
2530             arg_buf++;
2531         }
2532     } /* for (pass = ...) */
2533 }
2534 
2535 /* See if the ops following o are such that o will always be executed in
2536  * boolean context: that is, the SV which o pushes onto the stack will
2537  * only ever be consumed by later ops via SvTRUE(sv) or similar.
2538  * If so, set a suitable private flag on o. Normally this will be
2539  * bool_flag; but see below why maybe_flag is needed too.
2540  *
2541  * Typically the two flags you pass will be the generic OPpTRUEBOOL and
2542  * OPpMAYBE_TRUEBOOL, buts it's possible that for some ops those bits may
2543  * already be taken, so you'll have to give that op two different flags.
2544  *
2545  * More explanation of 'maybe_flag' and 'safe_and' parameters.
2546  * The binary logical ops &&, ||, // (plus 'if' and 'unless' which use
2547  * those underlying ops) short-circuit, which means that rather than
2548  * necessarily returning a truth value, they may return the LH argument,
2549  * which may not be boolean. For example in $x = (keys %h || -1), keys
2550  * should return a key count rather than a boolean, even though its
2551  * sort-of being used in boolean context.
2552  *
2553  * So we only consider such logical ops to provide boolean context to
2554  * their LH argument if they themselves are in void or boolean context.
2555  * However, sometimes the context isn't known until run-time. In this
2556  * case the op is marked with the maybe_flag flag it.
2557  *
2558  * Consider the following.
2559  *
2560  *     sub f { ....;  if (%h) { .... } }
2561  *
2562  * This is actually compiled as
2563  *
2564  *     sub f { ....;  %h && do { .... } }
2565  *
2566  * Here we won't know until runtime whether the final statement (and hence
2567  * the &&) is in void context and so is safe to return a boolean value.
2568  * So mark o with maybe_flag rather than the bool_flag.
2569  * Note that there is cost associated with determining context at runtime
2570  * (e.g. a call to block_gimme()), so it may not be worth setting (at
2571  * compile time) and testing (at runtime) maybe_flag if the scalar verses
2572  * boolean costs savings are marginal.
2573  *
2574  * However, we can do slightly better with && (compared to || and //):
2575  * this op only returns its LH argument when that argument is false. In
2576  * this case, as long as the op promises to return a false value which is
2577  * valid in both boolean and scalar contexts, we can mark an op consumed
2578  * by && with bool_flag rather than maybe_flag.
2579  * For example as long as pp_padhv and pp_rv2hv return &PL_sv_zero rather
2580  * than &PL_sv_no for a false result in boolean context, then it's safe. An
2581  * op which promises to handle this case is indicated by setting safe_and
2582  * to true.
2583  */
2584 
2585 static void
S_check_for_bool_cxt(OP * o,bool safe_and,U8 bool_flag,U8 maybe_flag)2586 S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag)
2587 {
2588     OP *lop;
2589     U8 flag = 0;
2590 
2591     assert((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR);
2592 
2593     /* OPpTARGET_MY and boolean context probably don't mix well.
2594      * If someone finds a valid use case, maybe add an extra flag to this
2595      * function which indicates its safe to do so for this op? */
2596     assert(!(   (PL_opargs[o->op_type] & OA_TARGLEX)
2597              && (o->op_private & OPpTARGET_MY)));
2598 
2599     lop = o->op_next;
2600 
2601     while (lop) {
2602         switch (lop->op_type) {
2603         case OP_NULL:
2604         case OP_SCALAR:
2605             break;
2606 
2607         /* these two consume the stack argument in the scalar case,
2608          * and treat it as a boolean in the non linenumber case */
2609         case OP_FLIP:
2610         case OP_FLOP:
2611             if (   ((lop->op_flags & OPf_WANT) == OPf_WANT_LIST)
2612                 || (lop->op_private & OPpFLIP_LINENUM))
2613             {
2614                 lop = NULL;
2615                 break;
2616             }
2617             /* FALLTHROUGH */
2618         /* these never leave the original value on the stack */
2619         case OP_NOT:
2620         case OP_XOR:
2621         case OP_COND_EXPR:
2622         case OP_GREPWHILE:
2623             flag = bool_flag;
2624             lop = NULL;
2625             break;
2626 
2627         /* OR DOR and AND evaluate their arg as a boolean, but then may
2628          * leave the original scalar value on the stack when following the
2629          * op_next route. If not in void context, we need to ensure
2630          * that whatever follows consumes the arg only in boolean context
2631          * too.
2632          */
2633         case OP_AND:
2634             if (safe_and) {
2635                 flag = bool_flag;
2636                 lop = NULL;
2637                 break;
2638             }
2639             /* FALLTHROUGH */
2640         case OP_OR:
2641         case OP_DOR:
2642             if ((lop->op_flags & OPf_WANT) == OPf_WANT_VOID) {
2643                 flag = bool_flag;
2644                 lop = NULL;
2645             }
2646             else if (!(lop->op_flags & OPf_WANT)) {
2647                 /* unknown context - decide at runtime */
2648                 flag = maybe_flag;
2649                 lop = NULL;
2650             }
2651             break;
2652 
2653         default:
2654             lop = NULL;
2655             break;
2656         }
2657 
2658         if (lop)
2659             lop = lop->op_next;
2660     }
2661 
2662     o->op_private |= flag;
2663 }
2664 
2665 /* mechanism for deferring recursion in rpeep() */
2666 
2667 #define MAX_DEFERRED 4
2668 
2669 #define DEFER(o) \
2670   STMT_START { \
2671     if (defer_ix == (MAX_DEFERRED-1)) { \
2672         OP **defer = defer_queue[defer_base]; \
2673         CALL_RPEEP(*defer); \
2674         op_prune_chain_head(defer); \
2675         defer_base = (defer_base + 1) % MAX_DEFERRED; \
2676         defer_ix--; \
2677     } \
2678     defer_queue[(defer_base + ++defer_ix) % MAX_DEFERRED] = &(o); \
2679   } STMT_END
2680 
2681 #define IS_AND_OP(o)   (o->op_type == OP_AND)
2682 #define IS_OR_OP(o)    (o->op_type == OP_OR)
2683 
2684 /* A peephole optimizer.  We visit the ops in the order they're to execute.
2685  * See the comments at the top of this file for more details about when
2686  * peep() is called */
2687 
2688 void
Perl_rpeep(pTHX_ OP * o)2689 Perl_rpeep(pTHX_ OP *o)
2690 {
2691     OP* oldop = NULL;
2692     OP* oldoldop = NULL;
2693     OP** defer_queue[MAX_DEFERRED] = { NULL }; /* small queue of deferred branches */
2694     int defer_base = 0;
2695     int defer_ix = -1;
2696 
2697     if (!o || o->op_opt)
2698         return;
2699 
2700     assert(o->op_type != OP_FREED);
2701 
2702     ENTER;
2703     SAVEOP();
2704     SAVEVPTR(PL_curcop);
2705     for (;; o = o->op_next) {
2706         if (o && o->op_opt)
2707             o = NULL;
2708         if (!o) {
2709             while (defer_ix >= 0) {
2710                 OP **defer =
2711                         defer_queue[(defer_base + defer_ix--) % MAX_DEFERRED];
2712                 CALL_RPEEP(*defer);
2713                 op_prune_chain_head(defer);
2714             }
2715             break;
2716         }
2717 
2718       redo:
2719 
2720         /* oldoldop -> oldop -> o should be a chain of 3 adjacent ops */
2721         assert(!oldoldop || oldoldop->op_next == oldop);
2722         assert(!oldop    || oldop->op_next    == o);
2723 
2724         /* By default, this op has now been optimised. A couple of cases below
2725            clear this again.  */
2726         o->op_opt = 1;
2727         PL_op = o;
2728 
2729         /* look for a series of 1 or more aggregate derefs, e.g.
2730          *   $a[1]{foo}[$i]{$k}
2731          * and replace with a single OP_MULTIDEREF op.
2732          * Each index must be either a const, or a simple variable,
2733          *
2734          * First, look for likely combinations of starting ops,
2735          * corresponding to (global and lexical variants of)
2736          *     $a[...]   $h{...}
2737          *     $r->[...] $r->{...}
2738          *     (preceding expression)->[...]
2739          *     (preceding expression)->{...}
2740          * and if so, call maybe_multideref() to do a full inspection
2741          * of the op chain and if appropriate, replace with an
2742          * OP_MULTIDEREF
2743          */
2744         {
2745             UV action;
2746             OP *o2 = o;
2747             U8 hints = 0;
2748 
2749             switch (o2->op_type) {
2750             case OP_GV:
2751                 /* $pkg[..]   :   gv[*pkg]
2752                  * $pkg->[...]:   gv[*pkg]; rv2sv sKM/DREFAV */
2753 
2754                 /* Fail if there are new op flag combinations that we're
2755                  * not aware of, rather than:
2756                  *  * silently failing to optimise, or
2757                  *  * silently optimising the flag away.
2758                  * If this ASSUME starts failing, examine what new flag
2759                  * has been added to the op, and decide whether the
2760                  * optimisation should still occur with that flag, then
2761                  * update the code accordingly. This applies to all the
2762                  * other ASSUMEs in the block of code too.
2763                  */
2764                 ASSUME(!(o2->op_flags &
2765                             ~(OPf_WANT|OPf_MOD|OPf_PARENS|OPf_SPECIAL)));
2766                 ASSUME(!(o2->op_private & ~OPpEARLY_CV));
2767 
2768                 o2 = o2->op_next;
2769 
2770                 if (o2->op_type == OP_RV2AV) {
2771                     action = MDEREF_AV_gvav_aelem;
2772                     goto do_deref;
2773                 }
2774 
2775                 if (o2->op_type == OP_RV2HV) {
2776                     action = MDEREF_HV_gvhv_helem;
2777                     goto do_deref;
2778                 }
2779 
2780                 if (o2->op_type != OP_RV2SV)
2781                     break;
2782 
2783                 /* at this point we've seen gv,rv2sv, so the only valid
2784                  * construct left is $pkg->[] or $pkg->{} */
2785 
2786                 ASSUME(!(o2->op_flags & OPf_STACKED));
2787                 if ((o2->op_flags & (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2788                             != (OPf_WANT_SCALAR|OPf_MOD))
2789                     break;
2790 
2791                 ASSUME(!(o2->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS
2792                                     |OPpOUR_INTRO|OPpDEREF|OPpLVAL_INTRO)));
2793                 if (o2->op_private & (OPpOUR_INTRO|OPpLVAL_INTRO))
2794                     break;
2795                 if (   (o2->op_private & OPpDEREF) != OPpDEREF_AV
2796                     && (o2->op_private & OPpDEREF) != OPpDEREF_HV)
2797                     break;
2798 
2799                 o2 = o2->op_next;
2800                 if (o2->op_type == OP_RV2AV) {
2801                     action = MDEREF_AV_gvsv_vivify_rv2av_aelem;
2802                     goto do_deref;
2803                 }
2804                 if (o2->op_type == OP_RV2HV) {
2805                     action = MDEREF_HV_gvsv_vivify_rv2hv_helem;
2806                     goto do_deref;
2807                 }
2808                 break;
2809 
2810             case OP_PADSV:
2811                 /* $lex->[...]: padsv[$lex] sM/DREFAV */
2812 
2813                 ASSUME(!(o2->op_flags &
2814                     ~(OPf_WANT|OPf_PARENS|OPf_REF|OPf_MOD|OPf_SPECIAL)));
2815                 if ((o2->op_flags &
2816                         (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2817                      != (OPf_WANT_SCALAR|OPf_MOD))
2818                     break;
2819 
2820                 ASSUME(!(o2->op_private &
2821                                 ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2822                 /* skip if state or intro, or not a deref */
2823                 if (      o2->op_private != OPpDEREF_AV
2824                        && o2->op_private != OPpDEREF_HV)
2825                     break;
2826 
2827                 o2 = o2->op_next;
2828                 if (o2->op_type == OP_RV2AV) {
2829                     action = MDEREF_AV_padsv_vivify_rv2av_aelem;
2830                     goto do_deref;
2831                 }
2832                 if (o2->op_type == OP_RV2HV) {
2833                     action = MDEREF_HV_padsv_vivify_rv2hv_helem;
2834                     goto do_deref;
2835                 }
2836                 break;
2837 
2838             case OP_PADAV:
2839             case OP_PADHV:
2840                 /*    $lex[..]:  padav[@lex:1,2] sR *
2841                  * or $lex{..}:  padhv[%lex:1,2] sR */
2842                 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_MOD|OPf_PARENS|
2843                                             OPf_REF|OPf_SPECIAL)));
2844                 if ((o2->op_flags &
2845                         (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2846                      != (OPf_WANT_SCALAR|OPf_REF))
2847                     break;
2848                 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_REF))
2849                     break;
2850                 /* OPf_PARENS isn't currently used in this case;
2851                  * if that changes, let us know! */
2852                 ASSUME(!(o2->op_flags & OPf_PARENS));
2853 
2854                 /* at this point, we wouldn't expect any of the remaining
2855                  * possible private flags:
2856                  * OPpPAD_STATE, OPpLVAL_INTRO, OPpTRUEBOOL,
2857                  * OPpMAYBE_TRUEBOOL, OPpMAYBE_LVSUB
2858                  *
2859                  * OPpSLICEWARNING shouldn't affect runtime
2860                  */
2861                 ASSUME(!(o2->op_private & ~(OPpSLICEWARNING)));
2862 
2863                 action = o2->op_type == OP_PADAV
2864                             ? MDEREF_AV_padav_aelem
2865                             : MDEREF_HV_padhv_helem;
2866                 o2 = o2->op_next;
2867                 S_maybe_multideref(aTHX_ o, o2, action, 0);
2868                 break;
2869 
2870 
2871             case OP_RV2AV:
2872             case OP_RV2HV:
2873                 action = o2->op_type == OP_RV2AV
2874                             ? MDEREF_AV_pop_rv2av_aelem
2875                             : MDEREF_HV_pop_rv2hv_helem;
2876                 /* FALLTHROUGH */
2877             do_deref:
2878                 /* (expr)->[...]:  rv2av sKR/1;
2879                  * (expr)->{...}:  rv2hv sKR/1; */
2880 
2881                 ASSUME(o2->op_type == OP_RV2AV || o2->op_type == OP_RV2HV);
2882 
2883                 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2884                                 |OPf_REF|OPf_MOD|OPf_STACKED|OPf_SPECIAL)));
2885                 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2886                     break;
2887 
2888                 /* at this point, we wouldn't expect any of these
2889                  * possible private flags:
2890                  * OPpMAYBE_LVSUB, OPpLVAL_INTRO
2891                  * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL, (rv2hv only)
2892                  */
2893                 ASSUME(!(o2->op_private &
2894                     ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING
2895                      |OPpOUR_INTRO)));
2896                 hints |= (o2->op_private & OPpHINT_STRICT_REFS);
2897 
2898                 o2 = o2->op_next;
2899 
2900                 S_maybe_multideref(aTHX_ o, o2, action, hints);
2901                 break;
2902 
2903             default:
2904                 break;
2905             }
2906         }
2907 
2908 
2909         switch (o->op_type) {
2910         case OP_DBSTATE:
2911             PL_curcop = ((COP*)o);		/* for warnings */
2912             break;
2913         case OP_NEXTSTATE:
2914             PL_curcop = ((COP*)o);		/* for warnings */
2915 
2916             /* Optimise a "return ..." at the end of a sub to just be "...".
2917              * This saves 2 ops. Before:
2918              * 1  <;> nextstate(main 1 -e:1) v ->2
2919              * 4  <@> return K ->5
2920              * 2    <0> pushmark s ->3
2921              * -    <1> ex-rv2sv sK/1 ->4
2922              * 3      <#> gvsv[*cat] s ->4
2923              *
2924              * After:
2925              * -  <@> return K ->-
2926              * -    <0> pushmark s ->2
2927              * -    <1> ex-rv2sv sK/1 ->-
2928              * 2      <$> gvsv(*cat) s ->3
2929              */
2930             {
2931                 OP *next = o->op_next;
2932                 OP *sibling = OpSIBLING(o);
2933                 if (   OP_TYPE_IS(next, OP_PUSHMARK)
2934                     && OP_TYPE_IS(sibling, OP_RETURN)
2935                     && OP_TYPE_IS(sibling->op_next, OP_LINESEQ)
2936                     && ( OP_TYPE_IS(sibling->op_next->op_next, OP_LEAVESUB)
2937                        ||OP_TYPE_IS(sibling->op_next->op_next,
2938                                     OP_LEAVESUBLV))
2939                     && cUNOPx(sibling)->op_first == next
2940                     && OpHAS_SIBLING(next) && OpSIBLING(next)->op_next
2941                     && next->op_next
2942                 ) {
2943                     /* Look through the PUSHMARK's siblings for one that
2944                      * points to the RETURN */
2945                     OP *top = OpSIBLING(next);
2946                     while (top && top->op_next) {
2947                         if (top->op_next == sibling) {
2948                             top->op_next = sibling->op_next;
2949                             o->op_next = next->op_next;
2950                             break;
2951                         }
2952                         top = OpSIBLING(top);
2953                     }
2954                 }
2955             }
2956 
2957             /* Optimise 'my $x; my $y;' into 'my ($x, $y);'
2958              *
2959              * This latter form is then suitable for conversion into padrange
2960              * later on. Convert:
2961              *
2962              *   nextstate1 -> padop1 -> nextstate2 -> padop2 -> nextstate3
2963              *
2964              * into:
2965              *
2966              *   nextstate1 ->     listop     -> nextstate3
2967              *                 /            \
2968              *         pushmark -> padop1 -> padop2
2969              */
2970             if (o->op_next && (
2971                     o->op_next->op_type == OP_PADSV
2972                  || o->op_next->op_type == OP_PADAV
2973                  || o->op_next->op_type == OP_PADHV
2974                 )
2975                 && !(o->op_next->op_private & ~OPpLVAL_INTRO)
2976                 && o->op_next->op_next && o->op_next->op_next->op_type == OP_NEXTSTATE
2977                 && o->op_next->op_next->op_next && (
2978                     o->op_next->op_next->op_next->op_type == OP_PADSV
2979                  || o->op_next->op_next->op_next->op_type == OP_PADAV
2980                  || o->op_next->op_next->op_next->op_type == OP_PADHV
2981                 )
2982                 && !(o->op_next->op_next->op_next->op_private & ~OPpLVAL_INTRO)
2983                 && o->op_next->op_next->op_next->op_next && o->op_next->op_next->op_next->op_next->op_type == OP_NEXTSTATE
2984                 && (!CopLABEL((COP*)o)) /* Don't mess with labels */
2985                 && (!CopLABEL((COP*)o->op_next->op_next)) /* ... */
2986             ) {
2987                 OP *pad1, *ns2, *pad2, *ns3, *newop, *newpm;
2988 
2989                 pad1 =    o->op_next;
2990                 ns2  = pad1->op_next;
2991                 pad2 =  ns2->op_next;
2992                 ns3  = pad2->op_next;
2993 
2994                 /* we assume here that the op_next chain is the same as
2995                  * the op_sibling chain */
2996                 assert(OpSIBLING(o)    == pad1);
2997                 assert(OpSIBLING(pad1) == ns2);
2998                 assert(OpSIBLING(ns2)  == pad2);
2999                 assert(OpSIBLING(pad2) == ns3);
3000 
3001                 /* excise and delete ns2 */
3002                 op_sibling_splice(NULL, pad1, 1, NULL);
3003                 op_free(ns2);
3004 
3005                 /* excise pad1 and pad2 */
3006                 op_sibling_splice(NULL, o, 2, NULL);
3007 
3008                 /* create new listop, with children consisting of:
3009                  * a new pushmark, pad1, pad2. */
3010                 newop = newLISTOP(OP_LIST, 0, pad1, pad2);
3011                 newop->op_flags |= OPf_PARENS;
3012                 newop->op_flags = (newop->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3013 
3014                 /* insert newop between o and ns3 */
3015                 op_sibling_splice(NULL, o, 0, newop);
3016 
3017                 /*fixup op_next chain */
3018                 newpm = cUNOPx(newop)->op_first; /* pushmark */
3019                 o    ->op_next = newpm;
3020                 newpm->op_next = pad1;
3021                 pad1 ->op_next = pad2;
3022                 pad2 ->op_next = newop; /* listop */
3023                 newop->op_next = ns3;
3024 
3025                 /* Ensure pushmark has this flag if padops do */
3026                 if (pad1->op_flags & OPf_MOD && pad2->op_flags & OPf_MOD) {
3027                     newpm->op_flags |= OPf_MOD;
3028                 }
3029 
3030                 break;
3031             }
3032 
3033             /* Two NEXTSTATEs in a row serve no purpose. Except if they happen
3034                to carry two labels. For now, take the easier option, and skip
3035                this optimisation if the first NEXTSTATE has a label.
3036                Yves asked what about if they have different hints or features?
3037                Tony thinks that as we remove the first of the pair it should
3038                be fine.
3039             */
3040             if (!CopLABEL((COP*)o) && !PERLDB_NOOPT) {
3041                 OP *nextop = o->op_next;
3042                 while (nextop) {
3043                     switch (nextop->op_type) {
3044                         case OP_NULL:
3045                         case OP_SCALAR:
3046                         case OP_LINESEQ:
3047                         case OP_SCOPE:
3048                             nextop = nextop->op_next;
3049                             continue;
3050                     }
3051                     break;
3052                 }
3053 
3054                 if (nextop && (nextop->op_type == OP_NEXTSTATE)) {
3055                     op_null(o);
3056                     if (oldop)
3057                         oldop->op_next = nextop;
3058                     o = nextop;
3059                     /* Skip (old)oldop assignment since the current oldop's
3060                        op_next already points to the next op.  */
3061                     goto redo;
3062                 }
3063             }
3064             break;
3065 
3066         case OP_CONCAT:
3067             if (o->op_next && o->op_next->op_type == OP_STRINGIFY) {
3068                 if (o->op_next->op_private & OPpTARGET_MY) {
3069                     if (o->op_flags & OPf_STACKED) /* chained concats */
3070                         break; /* ignore_optimization */
3071                     else {
3072                         /* assert(PL_opargs[o->op_type] & OA_TARGLEX); */
3073                         o->op_targ = o->op_next->op_targ;
3074                         o->op_next->op_targ = 0;
3075                         o->op_private |= OPpTARGET_MY;
3076                     }
3077                 }
3078                 op_null(o->op_next);
3079             }
3080             break;
3081         case OP_STUB:
3082             if ((o->op_flags & OPf_WANT) != OPf_WANT_LIST) {
3083                 break; /* Scalar stub must produce undef.  List stub is noop */
3084             }
3085             goto nothin;
3086         case OP_NULL:
3087             if (o->op_targ == OP_NEXTSTATE
3088                 || o->op_targ == OP_DBSTATE)
3089             {
3090                 PL_curcop = ((COP*)o);
3091             }
3092             /* XXX: We avoid setting op_seq here to prevent later calls
3093                to rpeep() from mistakenly concluding that optimisation
3094                has already occurred. This doesn't fix the real problem,
3095                though (See 20010220.007 (#5874)). AMS 20010719 */
3096             /* op_seq functionality is now replaced by op_opt */
3097             o->op_opt = 0;
3098             /* FALLTHROUGH */
3099         case OP_SCALAR:
3100         case OP_LINESEQ:
3101         case OP_SCOPE:
3102         nothin:
3103             if (oldop) {
3104                 oldop->op_next = o->op_next;
3105                 o->op_opt = 0;
3106                 continue;
3107             }
3108             break;
3109 
3110         case OP_PUSHMARK:
3111 
3112             /* Given
3113                  5 repeat/DOLIST
3114                  3   ex-list
3115                  1     pushmark
3116                  2     scalar or const
3117                  4   const[0]
3118                convert repeat into a stub with no kids.
3119              */
3120             if (o->op_next->op_type == OP_CONST
3121              || (  o->op_next->op_type == OP_PADSV
3122                 && !(o->op_next->op_private & OPpLVAL_INTRO))
3123              || (  o->op_next->op_type == OP_GV
3124                 && o->op_next->op_next->op_type == OP_RV2SV
3125                 && !(o->op_next->op_next->op_private
3126                         & (OPpLVAL_INTRO|OPpOUR_INTRO))))
3127             {
3128                 const OP *kid = o->op_next->op_next;
3129                 if (o->op_next->op_type == OP_GV)
3130                    kid = kid->op_next;
3131                 /* kid is now the ex-list.  */
3132                 if (kid->op_type == OP_NULL
3133                  && (kid = kid->op_next)->op_type == OP_CONST
3134                     /* kid is now the repeat count.  */
3135                  && kid->op_next->op_type == OP_REPEAT
3136                  && kid->op_next->op_private & OPpREPEAT_DOLIST
3137                  && (kid->op_next->op_flags & OPf_WANT) == OPf_WANT_LIST
3138                  && SvIOK(kSVOP_sv) && SvIVX(kSVOP_sv) == 0
3139                  && oldop)
3140                 {
3141                     o = kid->op_next; /* repeat */
3142                     oldop->op_next = o;
3143                     op_free(cBINOPo->op_first);
3144                     op_free(cBINOPo->op_last );
3145                     o->op_flags &=~ OPf_KIDS;
3146                     /* stub is a baseop; repeat is a binop */
3147                     STATIC_ASSERT_STMT(sizeof(OP) <= sizeof(BINOP));
3148                     OpTYPE_set(o, OP_STUB);
3149                     o->op_private = 0;
3150                     break;
3151                 }
3152             }
3153 
3154             /* If the pushmark is associated with an empty anonhash
3155              * or anonlist, null out the pushmark and swap in a
3156              * specialised op for the parent.
3157              *     4        <@> anonhash sK* ->5
3158              *     3           <0> pushmark s ->4
3159              * becomes:
3160              *     3        <@> emptyavhv sK* ->4
3161              *     -           <0> pushmark s ->3
3162              */
3163             if (!OpHAS_SIBLING(o) && (o->op_next == o->op_sibparent) && (
3164                 (o->op_next->op_type == OP_ANONHASH) ||
3165                 (o->op_next->op_type == OP_ANONLIST) ) &&
3166                 (o->op_next->op_flags & OPf_SPECIAL) ) {
3167 
3168                 OP* anon = o->op_next;
3169                 /* These next two are _potentially_ a padsv and an sassign */
3170                 OP* padsv = anon->op_next;
3171                 OP* sassign = (padsv) ? padsv->op_next: NULL;
3172 
3173                 anon->op_private = (anon->op_type == OP_ANONLIST) ?
3174                                                 0 : OPpEMPTYAVHV_IS_HV;
3175                 OpTYPE_set(anon, OP_EMPTYAVHV);
3176                 op_null(o);
3177                 o = anon;
3178                 if (oldop) /* A previous optimization may have NULLED it */
3179                     oldop->op_next = anon;
3180 
3181                 /* Further optimise scalar assignment of an empty anonhash
3182                  * or anonlist by subsuming the padsv & sassign OPs. */
3183                 if ((padsv->op_type == OP_PADSV) &&
3184                     !(padsv->op_private & OPpDEREF) &&
3185                     sassign && (sassign->op_type == OP_SASSIGN) ){
3186 
3187                     /* Take some public flags from the sassign */
3188                     anon->op_flags = OPf_KIDS | OPf_SPECIAL |
3189                         (anon->op_flags & OPf_PARENS) |
3190                         (sassign->op_flags & (OPf_WANT|OPf_PARENS));
3191 
3192                     /* Take some private flags from the padsv */
3193                     anon->op_private |= OPpTARGET_MY |
3194                         (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3195 
3196                     /* Take the targ slot from the padsv*/
3197                     anon->op_targ = padsv->op_targ;
3198                     padsv->op_targ = 0;
3199 
3200                     /* Clean up */
3201                     anon->op_next = sassign->op_next;
3202                     op_null(padsv);
3203                     op_null(sassign);
3204                 }
3205                 break;
3206 
3207             }
3208 
3209 
3210             /* Convert a series of PAD ops for my vars plus support into a
3211              * single padrange op. Basically
3212              *
3213              *    pushmark -> pad[ahs]v -> pad[ahs]?v -> ... -> (list) -> rest
3214              *
3215              * becomes, depending on circumstances, one of
3216              *
3217              *    padrange  ----------------------------------> (list) -> rest
3218              *    padrange  --------------------------------------------> rest
3219              *
3220              * where all the pad indexes are sequential and of the same type
3221              * (INTRO or not).
3222              * We convert the pushmark into a padrange op, then skip
3223              * any other pad ops, and possibly some trailing ops.
3224              * Note that we don't null() the skipped ops, to make it
3225              * easier for Deparse to undo this optimisation (and none of
3226              * the skipped ops are holding any resources). It also makes
3227              * it easier for find_uninit_var(), as it can just ignore
3228              * padrange, and examine the original pad ops.
3229              */
3230         {
3231             OP *p;
3232             OP *followop = NULL; /* the op that will follow the padrange op */
3233             U8 count = 0;
3234             U8 intro = 0;
3235             PADOFFSET base = 0; /* init only to stop compiler whining */
3236             bool gvoid = 0;     /* init only to stop compiler whining */
3237             bool defav = 0;  /* seen (...) = @_ */
3238             bool reuse = 0;  /* reuse an existing padrange op */
3239 
3240             /* look for a pushmark -> gv[_] -> rv2av */
3241 
3242             {
3243                 OP *rv2av, *q;
3244                 p = o->op_next;
3245                 if (   p->op_type == OP_GV
3246                     && cGVOPx_gv(p) == PL_defgv
3247                     && (rv2av = p->op_next)
3248                     && rv2av->op_type == OP_RV2AV
3249                     && !(rv2av->op_flags & OPf_REF)
3250                     && !(rv2av->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
3251                     && ((rv2av->op_flags & OPf_WANT) == OPf_WANT_LIST)
3252                 ) {
3253                     q = rv2av->op_next;
3254                     if (q->op_type == OP_NULL)
3255                         q = q->op_next;
3256                     if (q->op_type == OP_PUSHMARK) {
3257                         defav = 1;
3258                         p = q;
3259                     }
3260                 }
3261             }
3262             if (!defav) {
3263                 p = o;
3264             }
3265 
3266             /* scan for PAD ops */
3267 
3268             for (p = p->op_next; p; p = p->op_next) {
3269                 if (p->op_type == OP_NULL)
3270                     continue;
3271 
3272                 if ((     p->op_type != OP_PADSV
3273                        && p->op_type != OP_PADAV
3274                        && p->op_type != OP_PADHV
3275                     )
3276                       /* any private flag other than INTRO? e.g. STATE */
3277                    || (p->op_private & ~OPpLVAL_INTRO)
3278                 )
3279                     break;
3280 
3281                 /* let $a[N] potentially be optimised into AELEMFAST_LEX
3282                  * instead */
3283                 if (   p->op_type == OP_PADAV
3284                     && p->op_next
3285                     && p->op_next->op_type == OP_CONST
3286                     && p->op_next->op_next
3287                     && p->op_next->op_next->op_type == OP_AELEM
3288                 )
3289                     break;
3290 
3291                 /* for 1st padop, note what type it is and the range
3292                  * start; for the others, check that it's the same type
3293                  * and that the targs are contiguous */
3294                 if (count == 0) {
3295                     intro = (p->op_private & OPpLVAL_INTRO);
3296                     base = p->op_targ;
3297                     gvoid = OP_GIMME(p,0) == G_VOID;
3298                 }
3299                 else {
3300                     if ((p->op_private & OPpLVAL_INTRO) != intro)
3301                         break;
3302                     /* Note that you'd normally  expect targs to be
3303                      * contiguous in my($a,$b,$c), but that's not the case
3304                      * when external modules start doing things, e.g.
3305                      * Function::Parameters */
3306                     if (p->op_targ != base + count)
3307                         break;
3308                     assert(p->op_targ == base + count);
3309                     /* Either all the padops or none of the padops should
3310                        be in void context.  Since we only do the optimisa-
3311                        tion for av/hv when the aggregate itself is pushed
3312                        on to the stack (one item), there is no need to dis-
3313                        tinguish list from scalar context.  */
3314                     if (gvoid != (OP_GIMME(p,0) == G_VOID))
3315                         break;
3316                 }
3317 
3318                 /* for AV, HV, only when we're not flattening */
3319                 if (   p->op_type != OP_PADSV
3320                     && !gvoid
3321                     && !(p->op_flags & OPf_REF)
3322                 )
3323                     break;
3324 
3325                 if (count >= OPpPADRANGE_COUNTMASK)
3326                     break;
3327 
3328                 /* there's a biggest base we can fit into a
3329                  * SAVEt_CLEARPADRANGE in pp_padrange.
3330                  * (The sizeof() stuff will be constant-folded, and is
3331                  * intended to avoid getting "comparison is always false"
3332                  * compiler warnings. See the comments above
3333                  * MEM_WRAP_CHECK for more explanation on why we do this
3334                  * in a weird way to avoid compiler warnings.)
3335                  */
3336                 if (   intro
3337                     && (8*sizeof(base) >
3338                         8*sizeof(UV)-OPpPADRANGE_COUNTSHIFT-SAVE_TIGHT_SHIFT
3339                         ? (Size_t)base
3340                         : (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3341                         ) >
3342                         (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3343                 )
3344                     break;
3345 
3346                 /* Success! We've got another valid pad op to optimise away */
3347                 count++;
3348                 followop = p->op_next;
3349             }
3350 
3351             if (count < 1 || (count == 1 && !defav))
3352                 break;
3353 
3354             /* pp_padrange in specifically compile-time void context
3355              * skips pushing a mark and lexicals; in all other contexts
3356              * (including unknown till runtime) it pushes a mark and the
3357              * lexicals. We must be very careful then, that the ops we
3358              * optimise away would have exactly the same effect as the
3359              * padrange.
3360              * In particular in void context, we can only optimise to
3361              * a padrange if we see the complete sequence
3362              *     pushmark, pad*v, ...., list
3363              * which has the net effect of leaving the markstack as it
3364              * was.  Not pushing onto the stack (whereas padsv does touch
3365              * the stack) makes no difference in void context.
3366              */
3367             assert(followop);
3368             if (gvoid) {
3369                 if (followop->op_type == OP_LIST
3370                         && OP_GIMME(followop,0) == G_VOID
3371                    )
3372                 {
3373                     followop = followop->op_next; /* skip OP_LIST */
3374 
3375                     /* consolidate two successive my(...);'s */
3376 
3377                     if (   oldoldop
3378                         && oldoldop->op_type == OP_PADRANGE
3379                         && (oldoldop->op_flags & OPf_WANT) == OPf_WANT_VOID
3380                         && (oldoldop->op_private & OPpLVAL_INTRO) == intro
3381                         && !(oldoldop->op_flags & OPf_SPECIAL)
3382                     ) {
3383                         U8 old_count;
3384                         assert(oldoldop->op_next == oldop);
3385                         assert(   oldop->op_type == OP_NEXTSTATE
3386                                || oldop->op_type == OP_DBSTATE);
3387                         assert(oldop->op_next == o);
3388 
3389                         old_count
3390                             = (oldoldop->op_private & OPpPADRANGE_COUNTMASK);
3391 
3392                        /* Do not assume pad offsets for $c and $d are con-
3393                           tiguous in
3394                             my ($a,$b,$c);
3395                             my ($d,$e,$f);
3396                         */
3397                         if (  oldoldop->op_targ + old_count == base
3398                            && old_count < OPpPADRANGE_COUNTMASK - count) {
3399                             base = oldoldop->op_targ;
3400                             count += old_count;
3401                             reuse = 1;
3402                         }
3403                     }
3404 
3405                     /* if there's any immediately following singleton
3406                      * my var's; then swallow them and the associated
3407                      * nextstates; i.e.
3408                      *    my ($a,$b); my $c; my $d;
3409                      * is treated as
3410                      *    my ($a,$b,$c,$d);
3411                      */
3412 
3413                     while (    ((p = followop->op_next))
3414                             && (  p->op_type == OP_PADSV
3415                                || p->op_type == OP_PADAV
3416                                || p->op_type == OP_PADHV)
3417                             && (p->op_flags & OPf_WANT) == OPf_WANT_VOID
3418                             && (p->op_private & OPpLVAL_INTRO) == intro
3419                             && !(p->op_private & ~OPpLVAL_INTRO)
3420                             && p->op_next
3421                             && (   p->op_next->op_type == OP_NEXTSTATE
3422                                 || p->op_next->op_type == OP_DBSTATE)
3423                             && count < OPpPADRANGE_COUNTMASK
3424                             && base + count == p->op_targ
3425                     ) {
3426                         count++;
3427                         followop = p->op_next;
3428                     }
3429                 }
3430                 else
3431                     break;
3432             }
3433 
3434             if (reuse) {
3435                 assert(oldoldop->op_type == OP_PADRANGE);
3436                 oldoldop->op_next = followop;
3437                 oldoldop->op_private = (intro | count);
3438                 o = oldoldop;
3439                 oldop = NULL;
3440                 oldoldop = NULL;
3441             }
3442             else {
3443                 /* Convert the pushmark into a padrange.
3444                  * To make Deparse easier, we guarantee that a padrange was
3445                  * *always* formerly a pushmark */
3446                 assert(o->op_type == OP_PUSHMARK);
3447                 o->op_next = followop;
3448                 OpTYPE_set(o, OP_PADRANGE);
3449                 o->op_targ = base;
3450                 /* bit 7: INTRO; bit 6..0: count */
3451                 o->op_private = (intro | count);
3452                 o->op_flags = ((o->op_flags & ~(OPf_WANT|OPf_SPECIAL))
3453                               | gvoid * OPf_WANT_VOID
3454                               | (defav ? OPf_SPECIAL : 0));
3455             }
3456             break;
3457         }
3458 
3459         case OP_RV2AV:
3460             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3461                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3462             break;
3463 
3464         case OP_RV2HV:
3465         case OP_PADHV:
3466             /*'keys %h' in void or scalar context: skip the OP_KEYS
3467              * and perform the functionality directly in the RV2HV/PADHV
3468              * op
3469              */
3470             if (o->op_flags & OPf_REF) {
3471                 OP *k = o->op_next;
3472                 U8 want = (k->op_flags & OPf_WANT);
3473                 if (   k
3474                     && k->op_type == OP_KEYS
3475                     && (   want == OPf_WANT_VOID
3476                         || want == OPf_WANT_SCALAR)
3477                     && !(k->op_private & OPpMAYBE_LVSUB)
3478                     && !(k->op_flags & OPf_MOD)
3479                 ) {
3480                     o->op_next     = k->op_next;
3481                     o->op_flags   &= ~(OPf_REF|OPf_WANT);
3482                     o->op_flags   |= want;
3483                     o->op_private |= (o->op_type == OP_PADHV ?
3484                                       OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS);
3485                     /* for keys(%lex), hold onto the OP_KEYS's targ
3486                      * since padhv doesn't have its own targ to return
3487                      * an int with */
3488                     if (!(o->op_type ==OP_PADHV && want == OPf_WANT_SCALAR))
3489                         op_null(k);
3490                 }
3491             }
3492 
3493             /* see if %h is used in boolean context */
3494             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3495                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
3496 
3497 
3498             if (o->op_type != OP_PADHV)
3499                 break;
3500             /* FALLTHROUGH */
3501         case OP_PADAV:
3502             if (   o->op_type == OP_PADAV
3503                 && (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
3504             )
3505                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3506             /* FALLTHROUGH */
3507         case OP_PADSV:
3508             /* Skip over state($x) in void context.  */
3509             if (oldop && o->op_private == (OPpPAD_STATE|OPpLVAL_INTRO)
3510              && (o->op_flags & OPf_WANT) == OPf_WANT_VOID)
3511             {
3512                 oldop->op_next = o->op_next;
3513                 goto redo_nextstate;
3514             }
3515             if (o->op_type != OP_PADAV)
3516                 break;
3517             /* FALLTHROUGH */
3518         case OP_GV:
3519             if (o->op_type == OP_PADAV || o->op_next->op_type == OP_RV2AV) {
3520                 OP* const pop = (o->op_type == OP_PADAV) ?
3521                             o->op_next : o->op_next->op_next;
3522                 IV i;
3523                 if (pop && pop->op_type == OP_CONST &&
3524                     ((PL_op = pop->op_next)) &&
3525                     pop->op_next->op_type == OP_AELEM &&
3526                     !(pop->op_next->op_private &
3527                       (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB)) &&
3528                     (i = SvIV(cSVOPx(pop)->op_sv)) >= -128 && i <= 127)
3529                 {
3530                     GV *gv;
3531                     if (cSVOPx(pop)->op_private & OPpCONST_STRICT)
3532                         no_bareword_allowed(pop);
3533                     if (o->op_type == OP_GV)
3534                         op_null(o->op_next);
3535                     op_null(pop->op_next);
3536                     op_null(pop);
3537                     o->op_flags |= pop->op_next->op_flags & OPf_MOD;
3538                     o->op_next = pop->op_next->op_next;
3539                     o->op_ppaddr = PL_ppaddr[OP_AELEMFAST];
3540                     o->op_private = (U8)i;
3541                     if (o->op_type == OP_GV) {
3542                         gv = cGVOPo_gv;
3543                         GvAVn(gv);
3544                         o->op_type = OP_AELEMFAST;
3545                     }
3546                     else
3547                         o->op_type = OP_AELEMFAST_LEX;
3548                 }
3549                 if (o->op_type != OP_GV)
3550                     break;
3551             }
3552 
3553             /* Remove $foo from the op_next chain in void context.  */
3554             if (oldop
3555              && (  o->op_next->op_type == OP_RV2SV
3556                 || o->op_next->op_type == OP_RV2AV
3557                 || o->op_next->op_type == OP_RV2HV  )
3558              && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3559              && !(o->op_next->op_private & OPpLVAL_INTRO))
3560             {
3561                 oldop->op_next = o->op_next->op_next;
3562                 /* Reprocess the previous op if it is a nextstate, to
3563                    allow double-nextstate optimisation.  */
3564               redo_nextstate:
3565                 if (oldop->op_type == OP_NEXTSTATE) {
3566                     oldop->op_opt = 0;
3567                     o = oldop;
3568                     oldop = oldoldop;
3569                     oldoldop = NULL;
3570                     goto redo;
3571                 }
3572                 o = oldop->op_next;
3573                 goto redo;
3574             }
3575             else if (o->op_next->op_type == OP_RV2SV) {
3576                 if (!(o->op_next->op_private & OPpDEREF)) {
3577                     op_null(o->op_next);
3578                     o->op_private |= o->op_next->op_private & (OPpLVAL_INTRO
3579                                                                | OPpOUR_INTRO);
3580                     o->op_next = o->op_next->op_next;
3581                     OpTYPE_set(o, OP_GVSV);
3582                 }
3583             }
3584             else if (o->op_next->op_type == OP_READLINE
3585                     && o->op_next->op_next->op_type == OP_CONCAT
3586                     && (o->op_next->op_next->op_flags & OPf_STACKED))
3587             {
3588                 /* Turn "$a .= <FH>" into an OP_RCATLINE. AMS 20010917 */
3589                 OpTYPE_set(o, OP_RCATLINE);
3590                 o->op_flags |= OPf_STACKED;
3591                 op_null(o->op_next->op_next);
3592                 op_null(o->op_next);
3593             }
3594 
3595             break;
3596 
3597         case OP_NOT:
3598             break;
3599 
3600         case OP_AND:
3601         case OP_OR:
3602         case OP_DOR:
3603         case OP_CMPCHAIN_AND:
3604         case OP_PUSHDEFER:
3605             while (cLOGOP->op_other->op_type == OP_NULL)
3606                 cLOGOP->op_other = cLOGOP->op_other->op_next;
3607             while (o->op_next && (   o->op_type == o->op_next->op_type
3608                                   || o->op_next->op_type == OP_NULL))
3609                 o->op_next = o->op_next->op_next;
3610 
3611             /* If we're an OR and our next is an AND in void context, we'll
3612                follow its op_other on short circuit, same for reverse.
3613                We can't do this with OP_DOR since if it's true, its return
3614                value is the underlying value which must be evaluated
3615                by the next op. */
3616             if (o->op_next &&
3617                 (
3618                     (IS_AND_OP(o) && IS_OR_OP(o->op_next))
3619                  || (IS_OR_OP(o) && IS_AND_OP(o->op_next))
3620                 )
3621                 && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3622             ) {
3623                 o->op_next = cLOGOPx(o->op_next)->op_other;
3624             }
3625             DEFER(cLOGOP->op_other);
3626             o->op_opt = 1;
3627             break;
3628 
3629         case OP_GREPWHILE:
3630             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3631                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3632             /* FALLTHROUGH */
3633         case OP_COND_EXPR:
3634         case OP_MAPWHILE:
3635         case OP_ANDASSIGN:
3636         case OP_ORASSIGN:
3637         case OP_DORASSIGN:
3638         case OP_RANGE:
3639         case OP_ONCE:
3640         case OP_ARGDEFELEM:
3641             while (cLOGOP->op_other->op_type == OP_NULL)
3642                 cLOGOP->op_other = cLOGOP->op_other->op_next;
3643             DEFER(cLOGOP->op_other);
3644             break;
3645 
3646         case OP_ENTERLOOP:
3647         case OP_ENTERITER:
3648             while (cLOOP->op_redoop->op_type == OP_NULL)
3649                 cLOOP->op_redoop = cLOOP->op_redoop->op_next;
3650             while (cLOOP->op_nextop->op_type == OP_NULL)
3651                 cLOOP->op_nextop = cLOOP->op_nextop->op_next;
3652             while (cLOOP->op_lastop->op_type == OP_NULL)
3653                 cLOOP->op_lastop = cLOOP->op_lastop->op_next;
3654             /* a while(1) loop doesn't have an op_next that escapes the
3655              * loop, so we have to explicitly follow the op_lastop to
3656              * process the rest of the code */
3657             DEFER(cLOOP->op_lastop);
3658             break;
3659 
3660         case OP_ENTERTRY:
3661             assert(cLOGOPo->op_other->op_type == OP_LEAVETRY);
3662             DEFER(cLOGOPo->op_other);
3663             break;
3664 
3665         case OP_ENTERTRYCATCH:
3666             assert(cLOGOPo->op_other->op_type == OP_CATCH);
3667             /* catch body is the ->op_other of the OP_CATCH */
3668             DEFER(cLOGOPx(cLOGOPo->op_other)->op_other);
3669             break;
3670 
3671         case OP_SUBST:
3672             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3673                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3674             assert(!(cPMOP->op_pmflags & PMf_ONCE));
3675             while (cPMOP->op_pmstashstartu.op_pmreplstart &&
3676                    cPMOP->op_pmstashstartu.op_pmreplstart->op_type == OP_NULL)
3677                 cPMOP->op_pmstashstartu.op_pmreplstart
3678                     = cPMOP->op_pmstashstartu.op_pmreplstart->op_next;
3679             DEFER(cPMOP->op_pmstashstartu.op_pmreplstart);
3680             break;
3681 
3682         case OP_SORT: {
3683             OP *oright;
3684 
3685             if (o->op_flags & OPf_SPECIAL) {
3686                 /* first arg is a code block */
3687                 OP * const nullop = OpSIBLING(cLISTOP->op_first);
3688                 OP * kid          = cUNOPx(nullop)->op_first;
3689 
3690                 assert(nullop->op_type == OP_NULL);
3691                 assert(kid->op_type == OP_SCOPE
3692                  || (kid->op_type == OP_NULL && kid->op_targ == OP_LEAVE));
3693                 /* since OP_SORT doesn't have a handy op_other-style
3694                  * field that can point directly to the start of the code
3695                  * block, store it in the otherwise-unused op_next field
3696                  * of the top-level OP_NULL. This will be quicker at
3697                  * run-time, and it will also allow us to remove leading
3698                  * OP_NULLs by just messing with op_nexts without
3699                  * altering the basic op_first/op_sibling layout. */
3700                 kid = kLISTOP->op_first;
3701                 assert(
3702                       (kid->op_type == OP_NULL
3703                       && (  kid->op_targ == OP_NEXTSTATE
3704                          || kid->op_targ == OP_DBSTATE  ))
3705                     || kid->op_type == OP_STUB
3706                     || kid->op_type == OP_ENTER
3707                     || (PL_parser && PL_parser->error_count));
3708                 nullop->op_next = kid->op_next;
3709                 DEFER(nullop->op_next);
3710             }
3711 
3712             /* check that RHS of sort is a single plain array */
3713             oright = cUNOPo->op_first;
3714             if (!oright || oright->op_type != OP_PUSHMARK)
3715                 break;
3716 
3717             if (o->op_private & OPpSORT_INPLACE)
3718                 break;
3719 
3720             /* reverse sort ... can be optimised.  */
3721             if (!OpHAS_SIBLING(cUNOPo)) {
3722                 /* Nothing follows us on the list. */
3723                 OP * const reverse = o->op_next;
3724 
3725                 if (reverse->op_type == OP_REVERSE &&
3726                     (reverse->op_flags & OPf_WANT) == OPf_WANT_LIST) {
3727                     OP * const pushmark = cUNOPx(reverse)->op_first;
3728                     if (pushmark && (pushmark->op_type == OP_PUSHMARK)
3729                         && (OpSIBLING(cUNOPx(pushmark)) == o)) {
3730                         /* reverse -> pushmark -> sort */
3731                         o->op_private |= OPpSORT_REVERSE;
3732                         op_null(reverse);
3733                         pushmark->op_next = oright->op_next;
3734                         op_null(oright);
3735                     }
3736                 }
3737             }
3738 
3739             break;
3740         }
3741 
3742         case OP_REVERSE: {
3743             OP *ourmark, *theirmark, *ourlast, *iter, *expushmark, *rv2av;
3744             OP *gvop = NULL;
3745             LISTOP *enter, *exlist;
3746 
3747             if (o->op_private & OPpSORT_INPLACE)
3748                 break;
3749 
3750             enter = cLISTOPx(o->op_next);
3751             if (!enter)
3752                 break;
3753             if (enter->op_type == OP_NULL) {
3754                 enter = cLISTOPx(enter->op_next);
3755                 if (!enter)
3756                     break;
3757             }
3758             /* for $a (...) will have OP_GV then OP_RV2GV here.
3759                for (...) just has an OP_GV.  */
3760             if (enter->op_type == OP_GV) {
3761                 gvop = (OP *) enter;
3762                 enter = cLISTOPx(enter->op_next);
3763                 if (!enter)
3764                     break;
3765                 if (enter->op_type == OP_RV2GV) {
3766                   enter = cLISTOPx(enter->op_next);
3767                   if (!enter)
3768                     break;
3769                 }
3770             }
3771 
3772             if (enter->op_type != OP_ENTERITER)
3773                 break;
3774 
3775             iter = enter->op_next;
3776             if (!iter || iter->op_type != OP_ITER)
3777                 break;
3778 
3779             expushmark = enter->op_first;
3780             if (!expushmark || expushmark->op_type != OP_NULL
3781                 || expushmark->op_targ != OP_PUSHMARK)
3782                 break;
3783 
3784             exlist = cLISTOPx(OpSIBLING(expushmark));
3785             if (!exlist || exlist->op_type != OP_NULL
3786                 || exlist->op_targ != OP_LIST)
3787                 break;
3788 
3789             if (exlist->op_last != o) {
3790                 /* Mmm. Was expecting to point back to this op.  */
3791                 break;
3792             }
3793             theirmark = exlist->op_first;
3794             if (!theirmark || theirmark->op_type != OP_PUSHMARK)
3795                 break;
3796 
3797             if (OpSIBLING(theirmark) != o) {
3798                 /* There's something between the mark and the reverse, eg
3799                    for (1, reverse (...))
3800                    so no go.  */
3801                 break;
3802             }
3803 
3804             ourmark = cLISTOPo->op_first;
3805             if (!ourmark || ourmark->op_type != OP_PUSHMARK)
3806                 break;
3807 
3808             ourlast = cLISTOPo->op_last;
3809             if (!ourlast || ourlast->op_next != o)
3810                 break;
3811 
3812             rv2av = OpSIBLING(ourmark);
3813             if (rv2av && rv2av->op_type == OP_RV2AV && !OpHAS_SIBLING(rv2av)
3814                 && rv2av->op_flags == (OPf_WANT_LIST | OPf_KIDS)) {
3815                 /* We're just reversing a single array.  */
3816                 rv2av->op_flags = OPf_WANT_SCALAR | OPf_KIDS | OPf_REF;
3817                 enter->op_flags |= OPf_STACKED;
3818             }
3819 
3820             /* We don't have control over who points to theirmark, so sacrifice
3821                ours.  */
3822             theirmark->op_next = ourmark->op_next;
3823             theirmark->op_flags = ourmark->op_flags;
3824             ourlast->op_next = gvop ? gvop : (OP *) enter;
3825             op_null(ourmark);
3826             op_null(o);
3827             enter->op_private |= OPpITER_REVERSED;
3828             iter->op_private |= OPpITER_REVERSED;
3829 
3830             oldoldop = NULL;
3831             oldop    = ourlast;
3832             o        = oldop->op_next;
3833             goto redo;
3834             NOT_REACHED; /* NOTREACHED */
3835             break;
3836         }
3837 
3838         case OP_UNDEF:
3839             if ((o->op_flags & OPf_KIDS) &&
3840                 (cUNOPx(o)->op_first->op_type == OP_PADSV)) {
3841 
3842                 /* Convert:
3843                  *     undef
3844                  *       padsv[$x]
3845                  * to:
3846                  *     undef[$x]
3847                  */
3848 
3849                 OP * padsv = cUNOPx(o)->op_first;
3850                 o->op_private = OPpTARGET_MY |
3851                         (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3852                 o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3853                 op_null(padsv);
3854                 /* Optimizer does NOT seem to fix up the padsv op_next ptr */
3855                 if (oldoldop)
3856                     oldoldop->op_next = o;
3857                 oldop = oldoldop;
3858                 oldoldop = NULL;
3859 
3860             } else if (o->op_next->op_type == OP_PADSV) {
3861                 OP * padsv = o->op_next;
3862                 OP * sassign = (padsv->op_next &&
3863                         padsv->op_next->op_type == OP_SASSIGN) ?
3864                         padsv->op_next : NULL;
3865                 if (sassign && cBINOPx(sassign)->op_first == o) {
3866                     /* Convert:
3867                      *     sassign
3868                      *       undef
3869                      *       padsv[$x]
3870                      * to:
3871                      *     undef[$x]
3872                      * NOTE: undef does not have the "T" flag set in
3873                      *       regen/opcodes, as this would cause
3874                      *       S_maybe_targlex to do the optimization.
3875                      *       Seems easier to keep it all here, rather
3876                      *       than have an undef-specific branch in
3877                      *       S_maybe_targlex just to add the
3878                      *       OPpUNDEF_KEEP_PV flag.
3879                      */
3880                      o->op_private = OPpTARGET_MY | OPpUNDEF_KEEP_PV |
3881                          (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3882                      o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3883                      op_null(padsv);
3884                      op_null(sassign);
3885                      /* Optimizer DOES seems to fix up the op_next ptrs */
3886                 }
3887             }
3888             break;
3889 
3890         case OP_QR:
3891         case OP_MATCH:
3892             if (!(cPMOP->op_pmflags & PMf_ONCE)) {
3893                 assert (!cPMOP->op_pmstashstartu.op_pmreplstart);
3894             }
3895             break;
3896 
3897         case OP_RUNCV:
3898             if (!(o->op_private & OPpOFFBYONE) && !CvCLONE(PL_compcv)
3899              && (!CvANON(PL_compcv) || (!PL_cv_has_eval && !PL_perldb)))
3900             {
3901                 SV *sv;
3902                 if (CvEVAL(PL_compcv)) sv = &PL_sv_undef;
3903                 else {
3904                     sv = newRV((SV *)PL_compcv);
3905                     sv_rvweaken(sv);
3906                     SvREADONLY_on(sv);
3907                 }
3908                 OpTYPE_set(o, OP_CONST);
3909                 o->op_flags |= OPf_SPECIAL;
3910                 cSVOPo->op_sv = sv;
3911             }
3912             break;
3913 
3914         case OP_SASSIGN: {
3915             if (OP_GIMME(o,0) == G_VOID
3916              || (  o->op_next->op_type == OP_LINESEQ
3917                 && (  o->op_next->op_next->op_type == OP_LEAVESUB
3918                    || (  o->op_next->op_next->op_type == OP_RETURN
3919                       && !CvLVALUE(PL_compcv)))))
3920             {
3921                 OP *right = cBINOP->op_first;
3922                 if (right) {
3923                     /*   sassign
3924                     *      RIGHT
3925                     *      substr
3926                     *         pushmark
3927                     *         arg1
3928                     *         arg2
3929                     *         ...
3930                     * becomes
3931                     *
3932                     *  ex-sassign
3933                     *     substr
3934                     *        pushmark
3935                     *        RIGHT
3936                     *        arg1
3937                     *        arg2
3938                     *        ...
3939                     */
3940                     OP *left = OpSIBLING(right);
3941                     if (left->op_type == OP_SUBSTR
3942                          && (left->op_private & 7) < 4) {
3943                         op_null(o);
3944                         /* cut out right */
3945                         op_sibling_splice(o, NULL, 1, NULL);
3946                         /* and insert it as second child of OP_SUBSTR */
3947                         op_sibling_splice(left, cBINOPx(left)->op_first, 0,
3948                                     right);
3949                         left->op_private |= OPpSUBSTR_REPL_FIRST;
3950                         left->op_flags =
3951                             (o->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3952                     }
3953                 }
3954             }
3955             OP* rhs = cBINOPx(o)->op_first;
3956             OP* lval = cBINOPx(o)->op_last;
3957 
3958             /* Combine a simple SASSIGN OP with a PADSV lvalue child OP
3959              * into a single OP. */
3960 
3961             /* This optimization covers arbitrarily complicated RHS OP
3962              * trees. Separate optimizations may exist for specific,
3963              * single RHS OPs, such as:
3964              * "my $foo = undef;" or "my $bar = $other_padsv;" */
3965 
3966             if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
3967                  && lval && (lval->op_type == OP_PADSV) &&
3968                 !(lval->op_private & OPpDEREF)
3969                  /* skip if padrange has already gazumped the padsv */
3970                  && (lval == oldop)
3971                  /* Memoize::Once produces a non-standard SASSIGN that
3972                   * doesn't actually point to pp_sassign, has only one
3973                   * child (PADSV), and gets to it via op_other rather
3974                   * than op_next. Don't try to optimize this. */
3975                  && (lval != rhs)
3976                ) {
3977                 /* SASSIGN's bitfield flags, such as op_moresib and
3978                  * op_slabbed, will be carried over unchanged. */
3979                 OpTYPE_set(o, OP_PADSV_STORE);
3980 
3981                 /* Explicitly craft the new OP's op_flags, carrying
3982                  * some bits over from the SASSIGN */
3983                 o->op_flags = (
3984                     OPf_KIDS | OPf_STACKED |
3985                     (o->op_flags & (OPf_WANT|OPf_PARENS))
3986                 );
3987 
3988                 /* Reset op_private flags, taking relevant private flags
3989                  * from the PADSV */
3990                 o->op_private = (lval->op_private &
3991                                 (OPpLVAL_INTRO|OPpPAD_STATE|OPpDEREF));
3992 
3993                 /* Steal the targ from the PADSV */
3994                 o->op_targ = lval->op_targ; lval->op_targ = 0;
3995 
3996                 /* Fixup op_next ptrs */
3997                 assert(oldop->op_type == OP_PADSV);
3998                 /* oldoldop can be arbitrarily deep in the RHS OP tree */
3999                 oldoldop->op_next = o;
4000 
4001                 /* Even when (rhs != oldoldop), rhs might still have a
4002                  * relevant op_next ptr to lval. This is definitely true
4003                  * when rhs is OP_NULL with a LOGOP kid (e.g. orassign).
4004                  * There may be other cases. */
4005                 if (rhs->op_next == lval)
4006                     rhs->op_next = o;
4007 
4008                 /* Now null-out the PADSV */
4009                 op_null(lval);
4010 
4011                 /* NULL the previous op ptrs, so rpeep can continue */
4012                 oldoldop = NULL; oldop = NULL;
4013             }
4014 
4015             /* Combine a simple SASSIGN OP with an AELEMFAST_LEX lvalue
4016              * into a single OP. This optimization covers arbitrarily
4017              * complicated RHS OP trees. */
4018 
4019             if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
4020                 && (lval->op_type == OP_NULL) && (lval->op_private == 2) &&
4021                 (cBINOPx(lval)->op_first->op_type == OP_AELEMFAST_LEX)
4022             ) {
4023                 OP * lex = cBINOPx(lval)->op_first;
4024                 /* SASSIGN's bitfield flags, such as op_moresib and
4025                  * op_slabbed, will be carried over unchanged. */
4026                 OpTYPE_set(o, OP_AELEMFASTLEX_STORE);
4027 
4028                 /* Explicitly craft the new OP's op_flags, carrying
4029                  * some bits over from the SASSIGN */
4030                 o->op_flags = (
4031                     OPf_KIDS | OPf_STACKED |
4032                     (o->op_flags & (OPf_WANT|OPf_PARENS))
4033                 );
4034 
4035                 /* Copy the AELEMFAST_LEX op->private, which contains
4036                  * the key index. */
4037                 o->op_private = lex->op_private;
4038 
4039                 /* Take the targ from the AELEMFAST_LEX */
4040                 o->op_targ = lex->op_targ; lex->op_targ = 0;
4041 
4042                 assert(oldop->op_type == OP_AELEMFAST_LEX);
4043                 /* oldoldop can be arbitrarily deep in the RHS OP tree */
4044                 oldoldop->op_next = o;
4045 
4046                 /* Even when (rhs != oldoldop), rhs might still have a
4047                  * relevant op_next ptr to lex. (Updating it here can
4048                  * also cause other ops in the RHS to get the desired
4049                  * op_next pointer, presumably thanks to the finalizer.)
4050                  * This is definitely truewhen rhs is OP_NULL with a
4051                  * LOGOP kid (e.g. orassign). There may be other cases. */
4052                 if (rhs->op_next == lex)
4053                     rhs->op_next = o;
4054 
4055                 /* Now null-out the AELEMFAST_LEX */
4056                 op_null(lex);
4057 
4058                 /* NULL the previous op ptrs, so rpeep can continue */
4059                 oldop = oldoldop; oldoldop = NULL;
4060             }
4061 
4062             break;
4063         }
4064 
4065         case OP_AASSIGN: {
4066             int l, r, lr, lscalars, rscalars;
4067 
4068             /* handle common vars detection, e.g. ($a,$b) = ($b,$a).
4069                Note that we do this now rather than in newASSIGNOP(),
4070                since only by now are aliased lexicals flagged as such
4071 
4072                See the essay "Common vars in list assignment" above for
4073                the full details of the rationale behind all the conditions
4074                below.
4075 
4076                PL_generation sorcery:
4077                To detect whether there are common vars, the global var
4078                PL_generation is incremented for each assign op we scan.
4079                Then we run through all the lexical variables on the LHS,
4080                of the assignment, setting a spare slot in each of them to
4081                PL_generation.  Then we scan the RHS, and if any lexicals
4082                already have that value, we know we've got commonality.
4083                Also, if the generation number is already set to
4084                PERL_INT_MAX, then the variable is involved in aliasing, so
4085                we also have potential commonality in that case.
4086              */
4087 
4088             PL_generation++;
4089             /* scan LHS */
4090             lscalars = 0;
4091             l = S_aassign_scan(aTHX_ cLISTOPo->op_last,  FALSE, &lscalars);
4092             /* scan RHS */
4093             rscalars = 0;
4094             r = S_aassign_scan(aTHX_ cLISTOPo->op_first, TRUE, &rscalars);
4095             lr = (l|r);
4096 
4097 
4098             /* After looking for things which are *always* safe, this main
4099              * if/else chain selects primarily based on the type of the
4100              * LHS, gradually working its way down from the more dangerous
4101              * to the more restrictive and thus safer cases */
4102 
4103             if (   !l                      /* () = ....; */
4104                 || !r                      /* .... = (); */
4105                 || !(l & ~AAS_SAFE_SCALAR) /* (undef, pos()) = ...; */
4106                 || !(r & ~AAS_SAFE_SCALAR) /* ... = (1,2,length,undef); */
4107                 || (lscalars < 2)          /* (undef, $x) = ... */
4108             ) {
4109                 NOOP; /* always safe */
4110             }
4111             else if (l & AAS_DANGEROUS) {
4112                 /* always dangerous */
4113                 o->op_private |= OPpASSIGN_COMMON_SCALAR;
4114                 o->op_private |= OPpASSIGN_COMMON_AGG;
4115             }
4116             else if (l & (AAS_PKG_SCALAR|AAS_PKG_AGG)) {
4117                 /* package vars are always dangerous - too many
4118                  * aliasing possibilities */
4119                 if (l & AAS_PKG_SCALAR)
4120                     o->op_private |= OPpASSIGN_COMMON_SCALAR;
4121                 if (l & AAS_PKG_AGG)
4122                     o->op_private |= OPpASSIGN_COMMON_AGG;
4123             }
4124             else if (l & ( AAS_MY_SCALAR|AAS_MY_AGG
4125                           |AAS_LEX_SCALAR|AAS_LEX_AGG))
4126             {
4127                 /* LHS contains only lexicals and safe ops */
4128 
4129                 if (l & (AAS_MY_AGG|AAS_LEX_AGG))
4130                     o->op_private |= OPpASSIGN_COMMON_AGG;
4131 
4132                 if (l & (AAS_MY_SCALAR|AAS_LEX_SCALAR)) {
4133                     if (lr & AAS_LEX_SCALAR_COMM)
4134                         o->op_private |= OPpASSIGN_COMMON_SCALAR;
4135                     else if (   !(l & AAS_LEX_SCALAR)
4136                              && (r & AAS_DEFAV))
4137                     {
4138                         /* falsely mark
4139                          *    my (...) = @_
4140                          * as scalar-safe for performance reasons.
4141                          * (it will still have been marked _AGG if necessary */
4142                         NOOP;
4143                     }
4144                     else if (r  & (AAS_PKG_SCALAR|AAS_PKG_AGG|AAS_DANGEROUS))
4145                         /* if there are only lexicals on the LHS and no
4146                          * common ones on the RHS, then we assume that the
4147                          * only way those lexicals could also get
4148                          * on the RHS is via some sort of dereffing or
4149                          * closure, e.g.
4150                          *    $r = \$lex;
4151                          *    ($lex, $x) = (1, $$r)
4152                          * and in this case we assume the var must have
4153                          *  a bumped ref count. So if its ref count is 1,
4154                          *  it must only be on the LHS.
4155                          */
4156                         o->op_private |= OPpASSIGN_COMMON_RC1;
4157                 }
4158             }
4159 
4160             /* ... = ($x)
4161              * may have to handle aggregate on LHS, but we can't
4162              * have common scalars. */
4163             if (rscalars < 2)
4164                 o->op_private &=
4165                         ~(OPpASSIGN_COMMON_SCALAR|OPpASSIGN_COMMON_RC1);
4166 
4167             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4168                 S_check_for_bool_cxt(o, 1, OPpASSIGN_TRUEBOOL, 0);
4169             break;
4170         }
4171 
4172         case OP_REF:
4173         case OP_BLESSED:
4174             /* if the op is used in boolean context, set the TRUEBOOL flag
4175              * which enables an optimisation at runtime which avoids creating
4176              * a stack temporary for known-true package names */
4177             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4178                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
4179             break;
4180 
4181         case OP_LENGTH:
4182             /* see if the op is used in known boolean context,
4183              * but not if OA_TARGLEX optimisation is enabled */
4184             if (   (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
4185                 && !(o->op_private & OPpTARGET_MY)
4186             )
4187                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4188             break;
4189 
4190         case OP_POS:
4191             /* see if the op is used in known boolean context */
4192             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4193                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4194             break;
4195 
4196         case OP_CUSTOM: {
4197             Perl_cpeep_t cpeep =
4198                 XopENTRYCUSTOM(o, xop_peep);
4199             if (cpeep)
4200                 cpeep(aTHX_ o, oldop);
4201             break;
4202         }
4203 
4204         }
4205         /* did we just null the current op? If so, re-process it to handle
4206          * eliding "empty" ops from the chain */
4207         if (o->op_type == OP_NULL && oldop && oldop->op_next == o) {
4208             o->op_opt = 0;
4209             o = oldop;
4210         }
4211         else {
4212             oldoldop = oldop;
4213             oldop = o;
4214         }
4215     }
4216     LEAVE;
4217 }
4218 
4219 void
Perl_peep(pTHX_ OP * o)4220 Perl_peep(pTHX_ OP *o)
4221 {
4222     CALL_RPEEP(o);
4223 }
4224 
4225 /*
4226  * ex: set ts=8 sts=4 sw=4 et:
4227  */
4228