xref: /openbsd/gnu/usr.bin/perl/peep.c (revision 5486feef)
1 /*    peep.c
2  *
3  *    Copyright (C) 1991-2022 by Larry Wall and others
4  *
5  *    You may distribute under the terms of either the GNU General Public
6  *    License or the Artistic License, as specified in the README file.
7  *
8  */
9 
10 /*
11  * Aragorn sped on up the hill. Every now and again he bent to the ground.
12  * Hobbits go light, and their footprints are not easy even for a Ranger to
13  * read, but not far from the top a spring crossed the path, and in the wet
14  * earth he saw what he was seeking.
15  * 'I read the signs aright,' he said to himself. 'Frodo ran to the hill-top.
16  * I wonder what he saw there? But he returned by the same way, and went down
17  * the hill again.'
18  */
19 
20 /* This file contains functions for optimizing and finalizing the OP
21  * structures that hold a compiled perl program
22  */
23 
24 #include "EXTERN.h"
25 #define PERL_IN_PEEP_C
26 #include "perl.h"
27 
28 
29 #define CALL_RPEEP(o) PL_rpeepp(aTHX_ o)
30 
31 
32 static void
S_scalar_slice_warning(pTHX_ const OP * o)33 S_scalar_slice_warning(pTHX_ const OP *o)
34 {
35     OP *kid;
36     const bool is_hash = o->op_type == OP_HSLICE
37                 || (o->op_type == OP_NULL && o->op_targ == OP_HSLICE);
38     SV *name;
39 
40     if (!(o->op_private & OPpSLICEWARNING))
41         return;
42     if (PL_parser && PL_parser->error_count)
43         /* This warning can be nonsensical when there is a syntax error. */
44         return;
45 
46     kid = cLISTOPo->op_first;
47     kid = OpSIBLING(kid); /* get past pushmark */
48     /* weed out false positives: any ops that can return lists */
49     switch (kid->op_type) {
50     case OP_BACKTICK:
51     case OP_GLOB:
52     case OP_READLINE:
53     case OP_MATCH:
54     case OP_RV2AV:
55     case OP_EACH:
56     case OP_VALUES:
57     case OP_KEYS:
58     case OP_SPLIT:
59     case OP_LIST:
60     case OP_SORT:
61     case OP_REVERSE:
62     case OP_ENTERSUB:
63     case OP_CALLER:
64     case OP_LSTAT:
65     case OP_STAT:
66     case OP_READDIR:
67     case OP_SYSTEM:
68     case OP_TMS:
69     case OP_LOCALTIME:
70     case OP_GMTIME:
71     case OP_ENTEREVAL:
72         return;
73     }
74 
75     /* Don't warn if we have a nulled list either. */
76     if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST)
77         return;
78 
79     assert(OpSIBLING(kid));
80     name = op_varname(OpSIBLING(kid));
81     if (!name) /* XS module fiddling with the op tree */
82         return;
83     warn_elem_scalar_context(kid, name, is_hash, true);
84 }
85 
86 
87 /* info returned by S_sprintf_is_multiconcatable() */
88 
89 struct sprintf_ismc_info {
90     SSize_t nargs;    /* num of args to sprintf (not including the format) */
91     char  *start;     /* start of raw format string */
92     char  *end;       /* bytes after end of raw format string */
93     STRLEN total_len; /* total length (in bytes) of format string, not
94                          including '%s' and  half of '%%' */
95     STRLEN variant;   /* number of bytes by which total_len_p would grow
96                          if upgraded to utf8 */
97     bool   utf8;      /* whether the format is utf8 */
98 };
99 
100 /* is the OP_SPRINTF o suitable for converting into a multiconcat op?
101  * i.e. its format argument is a const string with only '%s' and '%%'
102  * formats, and the number of args is known, e.g.
103  *    sprintf "a=%s f=%s", $a[0], scalar(f());
104  * but not
105  *    sprintf "i=%d a=%s f=%s", $i, @a, f();
106  *
107  * If successful, the sprintf_ismc_info struct pointed to by info will be
108  * populated.
109  */
110 
111 STATIC bool
S_sprintf_is_multiconcatable(pTHX_ OP * o,struct sprintf_ismc_info * info)112 S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info)
113 {
114     OP    *pm, *constop, *kid;
115     SV    *sv;
116     char  *s, *e, *p;
117     SSize_t nargs, nformats;
118     STRLEN cur, total_len, variant;
119     bool   utf8;
120 
121     /* if sprintf's behaviour changes, die here so that someone
122      * can decide whether to enhance this function or skip optimising
123      * under those new circumstances */
124     assert(!(o->op_flags & OPf_STACKED));
125     assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX));
126     assert(!(o->op_private & ~OPpARG4_MASK));
127 
128     pm = cUNOPo->op_first;
129     if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */
130         return FALSE;
131     constop = OpSIBLING(pm);
132     if (!constop || constop->op_type != OP_CONST)
133         return FALSE;
134     sv = cSVOPx_sv(constop);
135     if (SvMAGICAL(sv) || !SvPOK(sv))
136         return FALSE;
137 
138     s = SvPV(sv, cur);
139     e = s + cur;
140 
141     /* Scan format for %% and %s and work out how many %s there are.
142      * Abandon if other format types are found.
143      */
144 
145     nformats  = 0;
146     total_len = 0;
147     variant   = 0;
148 
149     for (p = s; p < e; p++) {
150         if (*p != '%') {
151             total_len++;
152             if (!UTF8_IS_INVARIANT(*p))
153                 variant++;
154             continue;
155         }
156         p++;
157         if (p >= e)
158             return FALSE; /* lone % at end gives "Invalid conversion" */
159         if (*p == '%')
160             total_len++;
161         else if (*p == 's')
162             nformats++;
163         else
164             return FALSE;
165     }
166 
167     if (!nformats || nformats > PERL_MULTICONCAT_MAXARG)
168         return FALSE;
169 
170     utf8 = cBOOL(SvUTF8(sv));
171     if (utf8)
172         variant = 0;
173 
174     /* scan args; they must all be in scalar cxt */
175 
176     nargs = 0;
177     kid = OpSIBLING(constop);
178 
179     while (kid) {
180         if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR)
181             return FALSE;
182         nargs++;
183         kid = OpSIBLING(kid);
184     }
185 
186     if (nargs != nformats)
187         return FALSE; /* e.g. sprintf("%s%s", $a); */
188 
189 
190     info->nargs      = nargs;
191     info->start      = s;
192     info->end        = e;
193     info->total_len  = total_len;
194     info->variant    = variant;
195     info->utf8       = utf8;
196 
197     return TRUE;
198 }
199 
200 /* S_maybe_multiconcat():
201  *
202  * given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly
203  * convert it (and its children) into an OP_MULTICONCAT. See the code
204  * comments just before pp_multiconcat() for the full details of what
205  * OP_MULTICONCAT supports.
206  *
207  * Basically we're looking for an optree with a chain of OP_CONCATS down
208  * the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or
209  * OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g.
210  *
211  *      $x = "$a$b-$c"
212  *
213  *  looks like
214  *
215  *      SASSIGN
216  *         |
217  *      STRINGIFY   -- PADSV[$x]
218  *         |
219  *         |
220  *      ex-PUSHMARK -- CONCAT/S
221  *                        |
222  *                     CONCAT/S  -- PADSV[$d]
223  *                        |
224  *                     CONCAT    -- CONST["-"]
225  *                        |
226  *                     PADSV[$a] -- PADSV[$b]
227  *
228  * Note that at this stage the OP_SASSIGN may have already been optimised
229  * away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT.
230  */
231 
232 STATIC void
S_maybe_multiconcat(pTHX_ OP * o)233 S_maybe_multiconcat(pTHX_ OP *o)
234 {
235     OP *lastkidop;   /* the right-most of any kids unshifted onto o */
236     OP *topop;       /* the top-most op in the concat tree (often equals o,
237                         unless there are assign/stringify ops above it */
238     OP *parentop;    /* the parent op of topop (or itself if no parent) */
239     OP *targmyop;    /* the op (if any) with the OPpTARGET_MY flag */
240     OP *targetop;    /* the op corresponding to target=... or target.=... */
241     OP *stringop;    /* the OP_STRINGIFY op, if any */
242     OP *nextop;      /* used for recreating the op_next chain without consts */
243     OP *kid;         /* general-purpose op pointer */
244     UNOP_AUX_item *aux;
245     UNOP_AUX_item *lenp;
246     char *const_str, *p;
247     struct sprintf_ismc_info sprintf_info;
248 
249                      /* store info about each arg in args[];
250                       * toparg is the highest used slot; argp is a general
251                       * pointer to args[] slots */
252     struct {
253         void *p;      /* initially points to const sv (or null for op);
254                          later, set to SvPV(constsv), with ... */
255         STRLEN len;   /* ... len set to SvPV(..., len) */
256     } *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1];
257 
258     SSize_t nargs  = 0;
259     SSize_t nconst = 0;
260     SSize_t nadjconst  = 0; /* adjacent consts - may be demoted to args */
261     STRLEN variant;
262     bool utf8 = FALSE;
263     bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
264                                  the last-processed arg will the LHS of one,
265                                  as args are processed in reverse order */
266     U8   stacked_last = 0;   /* whether the last seen concat op was STACKED */
267     STRLEN total_len  = 0;   /* sum of the lengths of the const segments */
268     U8 flags          = 0;   /* what will become the op_flags and ... */
269     U8 private_flags  = 0;   /* ... op_private of the multiconcat op */
270     bool is_sprintf = FALSE; /* we're optimising an sprintf */
271     bool is_targable  = FALSE; /* targetop is an OPpTARGET_MY candidate */
272     bool prev_was_const = FALSE; /* previous arg was a const */
273 
274     /* -----------------------------------------------------------------
275      * Phase 1:
276      *
277      * Examine the optree non-destructively to determine whether it's
278      * suitable to be converted into an OP_MULTICONCAT. Accumulate
279      * information about the optree in args[].
280      */
281 
282     argp     = args;
283     targmyop = NULL;
284     targetop = NULL;
285     stringop = NULL;
286     topop    = o;
287     parentop = o;
288 
289     assert(   o->op_type == OP_SASSIGN
290            || o->op_type == OP_CONCAT
291            || o->op_type == OP_SPRINTF
292            || o->op_type == OP_STRINGIFY);
293 
294     Zero(&sprintf_info, 1, struct sprintf_ismc_info);
295 
296     /* first see if, at the top of the tree, there is an assign,
297      * append and/or stringify */
298 
299     if (topop->op_type == OP_SASSIGN) {
300         /* expr = ..... */
301         if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN])
302             return;
303         if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
304             return;
305         assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */
306 
307         parentop = topop;
308         topop = cBINOPo->op_first;
309         targetop = OpSIBLING(topop);
310         if (!targetop) /* probably some sort of syntax error */
311             return;
312 
313         /* don't optimise away assign in 'local $foo = ....' */
314         if (   (targetop->op_private & OPpLVAL_INTRO)
315             /* these are the common ops which do 'local', but
316              * not all */
317             && (   targetop->op_type == OP_GVSV
318                 || targetop->op_type == OP_RV2SV
319                 || targetop->op_type == OP_AELEM
320                 || targetop->op_type == OP_HELEM
321                 )
322         )
323             return;
324     }
325     else if (   topop->op_type == OP_CONCAT
326              && (topop->op_flags & OPf_STACKED)
327              && (!(topop->op_private & OPpCONCAT_NESTED))
328             )
329     {
330         /* expr .= ..... */
331 
332         /* OPpTARGET_MY shouldn't be able to be set here. If it is,
333          * decide what to do about it */
334         assert(!(o->op_private & OPpTARGET_MY));
335 
336         /* barf on unknown flags */
337         assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY)));
338         private_flags |= OPpMULTICONCAT_APPEND;
339         targetop = cBINOPo->op_first;
340         parentop = topop;
341         topop    = OpSIBLING(targetop);
342 
343         /* $x .= <FOO> gets optimised to rcatline instead */
344         if (topop->op_type == OP_READLINE)
345             return;
346     }
347 
348     if (targetop) {
349         /* Can targetop (the LHS) if it's a padsv, be optimised
350          * away and use OPpTARGET_MY instead?
351          */
352         if (    (targetop->op_type == OP_PADSV)
353             && !(targetop->op_private & OPpDEREF)
354             && !(targetop->op_private & OPpPAD_STATE)
355                /* we don't support 'my $x .= ...' */
356             && (   o->op_type == OP_SASSIGN
357                 || !(targetop->op_private & OPpLVAL_INTRO))
358         )
359             is_targable = TRUE;
360     }
361 
362     if (topop->op_type == OP_STRINGIFY) {
363         if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY])
364             return;
365         stringop = topop;
366 
367         /* barf on unknown flags */
368         assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY)));
369 
370         if ((topop->op_private & OPpTARGET_MY)) {
371             if (o->op_type == OP_SASSIGN)
372                 return; /* can't have two assigns */
373             targmyop = topop;
374         }
375 
376         private_flags |= OPpMULTICONCAT_STRINGIFY;
377         parentop = topop;
378         topop = cBINOPx(topop)->op_first;
379         assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK));
380         topop = OpSIBLING(topop);
381     }
382 
383     if (topop->op_type == OP_SPRINTF) {
384         if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF])
385             return;
386         if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) {
387             nargs     = sprintf_info.nargs;
388             total_len = sprintf_info.total_len;
389             variant   = sprintf_info.variant;
390             utf8      = sprintf_info.utf8;
391             is_sprintf = TRUE;
392             private_flags |= OPpMULTICONCAT_FAKE;
393             toparg = argp;
394             /* we have an sprintf op rather than a concat optree.
395              * Skip most of the code below which is associated with
396              * processing that optree. We also skip phase 2, determining
397              * whether its cost effective to optimise, since for sprintf,
398              * multiconcat is *always* faster */
399             goto create_aux;
400         }
401         /* note that even if the sprintf itself isn't multiconcatable,
402          * the expression as a whole may be, e.g. in
403          *    $x .= sprintf("%d",...)
404          * the sprintf op will be left as-is, but the concat/S op may
405          * be upgraded to multiconcat
406          */
407     }
408     else if (topop->op_type == OP_CONCAT) {
409         if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT])
410             return;
411 
412         if ((topop->op_private & OPpTARGET_MY)) {
413             if (o->op_type == OP_SASSIGN || targmyop)
414                 return; /* can't have two assigns */
415             targmyop = topop;
416         }
417     }
418 
419     /* Is it safe to convert a sassign/stringify/concat op into
420      * a multiconcat? */
421     assert((PL_opargs[OP_SASSIGN]   & OA_CLASS_MASK) == OA_BINOP);
422     assert((PL_opargs[OP_CONCAT]    & OA_CLASS_MASK) == OA_BINOP);
423     assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP);
424     assert((PL_opargs[OP_SPRINTF]   & OA_CLASS_MASK) == OA_LISTOP);
425     STATIC_ASSERT_STMT(   STRUCT_OFFSET(BINOP,    op_last)
426                        == STRUCT_OFFSET(UNOP_AUX, op_aux));
427     STATIC_ASSERT_STMT(   STRUCT_OFFSET(LISTOP,   op_last)
428                        == STRUCT_OFFSET(UNOP_AUX, op_aux));
429 
430     /* Now scan the down the tree looking for a series of
431      * CONCAT/OPf_STACKED ops on the LHS (with the last one not
432      * stacked). For example this tree:
433      *
434      *     |
435      *   CONCAT/STACKED
436      *     |
437      *   CONCAT/STACKED -- EXPR5
438      *     |
439      *   CONCAT/STACKED -- EXPR4
440      *     |
441      *   CONCAT -- EXPR3
442      *     |
443      *   EXPR1  -- EXPR2
444      *
445      * corresponds to an expression like
446      *
447      *   (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5)
448      *
449      * Record info about each EXPR in args[]: in particular, whether it is
450      * a stringifiable OP_CONST and if so what the const sv is.
451      *
452      * The reason why the last concat can't be STACKED is the difference
453      * between
454      *
455      *    ((($a .= $a) .= $a) .= $a) .= $a
456      *
457      * and
458      *    $a . $a . $a . $a . $a
459      *
460      * The main difference between the optrees for those two constructs
461      * is the presence of the last STACKED. As well as modifying $a,
462      * the former sees the changed $a between each concat, so if $s is
463      * initially 'a', the first returns 'a' x 16, while the latter returns
464      * 'a' x 5. And pp_multiconcat can't handle that kind of thing.
465      */
466 
467     kid = topop;
468 
469     for (;;) {
470         OP *argop;
471         SV *sv;
472         bool last = FALSE;
473 
474         if (    kid->op_type == OP_CONCAT
475             && !kid_is_last
476         ) {
477             OP *k1, *k2;
478             k1 = cUNOPx(kid)->op_first;
479             k2 = OpSIBLING(k1);
480             /* shouldn't happen except maybe after compile err? */
481             if (!k2)
482                 return;
483 
484             /* avoid turning (A . B . ($lex = C) ...)  into  (A . B . C ...) */
485             if (kid->op_private & OPpTARGET_MY)
486                 kid_is_last = TRUE;
487 
488             stacked_last = (kid->op_flags & OPf_STACKED);
489             if (!stacked_last)
490                 kid_is_last = TRUE;
491 
492             kid   = k1;
493             argop = k2;
494         }
495         else {
496             argop = kid;
497             last = TRUE;
498         }
499 
500         if (   nargs + nadjconst  >  PERL_MULTICONCAT_MAXARG        - 2
501             || (argp - args + 1)  > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
502         {
503             /* At least two spare slots are needed to decompose both
504              * concat args. If there are no slots left, continue to
505              * examine the rest of the optree, but don't push new values
506              * on args[]. If the optree as a whole is legal for conversion
507              * (in particular that the last concat isn't STACKED), then
508              * the first PERL_MULTICONCAT_MAXARG elements of the optree
509              * can be converted into an OP_MULTICONCAT now, with the first
510              * child of that op being the remainder of the optree -
511              * which may itself later be converted to a multiconcat op
512              * too.
513              */
514             if (last) {
515                 /* the last arg is the rest of the optree */
516                 argp++->p = NULL;
517                 nargs++;
518             }
519         }
520         else if (   argop->op_type == OP_CONST
521             && ((sv = cSVOPx_sv(argop)))
522             /* defer stringification until runtime of 'constant'
523              * things that might stringify variantly, e.g. the radix
524              * point of NVs, or overloaded RVs */
525             && (SvPOK(sv) || SvIOK(sv))
526             && (!SvGMAGICAL(sv))
527         ) {
528             if (argop->op_private & OPpCONST_STRICT)
529                 no_bareword_allowed(argop);
530             argp++->p = sv;
531             utf8   |= cBOOL(SvUTF8(sv));
532             nconst++;
533             if (prev_was_const)
534                 /* this const may be demoted back to a plain arg later;
535                  * make sure we have enough arg slots left */
536                 nadjconst++;
537             prev_was_const = !prev_was_const;
538         }
539         else {
540             argp++->p = NULL;
541             nargs++;
542             prev_was_const = FALSE;
543         }
544 
545         if (last)
546             break;
547     }
548 
549     toparg = argp - 1;
550 
551     if (stacked_last)
552         return; /* we don't support ((A.=B).=C)...) */
553 
554     /* look for two adjacent consts and don't fold them together:
555      *     $o . "a" . "b"
556      * should do
557      *     $o->concat("a")->concat("b")
558      * rather than
559      *     $o->concat("ab")
560      * (but $o .=  "a" . "b" should still fold)
561      */
562     {
563         bool seen_nonconst = FALSE;
564         for (argp = toparg; argp >= args; argp--) {
565             if (argp->p == NULL) {
566                 seen_nonconst = TRUE;
567                 continue;
568             }
569             if (!seen_nonconst)
570                 continue;
571             if (argp[1].p) {
572                 /* both previous and current arg were constants;
573                  * leave the current OP_CONST as-is */
574                 argp->p = NULL;
575                 nconst--;
576                 nargs++;
577             }
578         }
579     }
580 
581     /* -----------------------------------------------------------------
582      * Phase 2:
583      *
584      * At this point we have determined that the optree *can* be converted
585      * into a multiconcat. Having gathered all the evidence, we now decide
586      * whether it *should*.
587      */
588 
589 
590     /* we need at least one concat action, e.g.:
591      *
592      *  Y . Z
593      *  X = Y . Z
594      *  X .= Y
595      *
596      * otherwise we could be doing something like $x = "foo", which
597      * if treated as a concat, would fail to COW.
598      */
599     if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
600         return;
601 
602     /* Benchmarking seems to indicate that we gain if:
603      * * we optimise at least two actions into a single multiconcat
604      *    (e.g concat+concat, sassign+concat);
605      * * or if we can eliminate at least 1 OP_CONST;
606      * * or if we can eliminate a padsv via OPpTARGET_MY
607      */
608 
609     if (
610            /* eliminated at least one OP_CONST */
611            nconst >= 1
612            /* eliminated an OP_SASSIGN */
613         || o->op_type == OP_SASSIGN
614            /* eliminated an OP_PADSV */
615         || (!targmyop && is_targable)
616     )
617         /* definitely a net gain to optimise */
618         goto optimise;
619 
620     /* ... if not, what else? */
621 
622     /* special-case '$lex1 = expr . $lex1' (where expr isn't lex1):
623      * multiconcat is faster (due to not creating a temporary copy of
624      * $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is
625      * faster.
626      */
627     if (   nconst == 0
628          && nargs == 2
629          && targmyop
630          && topop->op_type == OP_CONCAT
631     ) {
632         PADOFFSET t = targmyop->op_targ;
633         OP *k1 = cBINOPx(topop)->op_first;
634         OP *k2 = cBINOPx(topop)->op_last;
635         if (   k2->op_type == OP_PADSV
636             && k2->op_targ == t
637             && (   k1->op_type != OP_PADSV
638                 || k1->op_targ != t)
639         )
640             goto optimise;
641     }
642 
643     /* need at least two concats */
644     if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3)
645         return;
646 
647 
648 
649     /* -----------------------------------------------------------------
650      * Phase 3:
651      *
652      * At this point the optree has been verified as ok to be optimised
653      * into an OP_MULTICONCAT. Now start changing things.
654      */
655 
656    optimise:
657 
658     /* stringify all const args and determine utf8ness */
659 
660     variant = 0;
661     for (argp = args; argp <= toparg; argp++) {
662         SV *sv = (SV*)argp->p;
663         if (!sv)
664             continue; /* not a const op */
665         if (utf8 && !SvUTF8(sv))
666             sv_utf8_upgrade_nomg(sv);
667         argp->p = SvPV_nomg(sv, argp->len);
668         total_len += argp->len;
669 
670         /* see if any strings would grow if converted to utf8 */
671         if (!utf8) {
672             variant += variant_under_utf8_count((U8 *) argp->p,
673                                                 (U8 *) argp->p + argp->len);
674         }
675     }
676 
677     /* create and populate aux struct */
678 
679   create_aux:
680 
681     aux = (UNOP_AUX_item*)PerlMemShared_malloc(
682                     sizeof(UNOP_AUX_item)
683                     *  (
684                            PERL_MULTICONCAT_HEADER_SIZE
685                          + ((nargs + 1) * (variant ? 2 : 1))
686                         )
687                     );
688     const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1);
689 
690     /* Extract all the non-const expressions from the concat tree then
691      * dispose of the old tree, e.g. convert the tree from this:
692      *
693      *  o => SASSIGN
694      *         |
695      *       STRINGIFY   -- TARGET
696      *         |
697      *       ex-PUSHMARK -- CONCAT
698      *                        |
699      *                      CONCAT -- EXPR5
700      *                        |
701      *                      CONCAT -- EXPR4
702      *                        |
703      *                      CONCAT -- EXPR3
704      *                        |
705      *                      EXPR1  -- EXPR2
706      *
707      *
708      * to:
709      *
710      *  o => MULTICONCAT
711      *         |
712      *       ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET
713      *
714      * except that if EXPRi is an OP_CONST, it's discarded.
715      *
716      * During the conversion process, EXPR ops are stripped from the tree
717      * and unshifted onto o. Finally, any of o's remaining original
718      * children are discarded and o is converted into an OP_MULTICONCAT.
719      *
720      * In this middle of this, o may contain both: unshifted args on the
721      * left, and some remaining original args on the right. lastkidop
722      * is set to point to the right-most unshifted arg to delineate
723      * between the two sets.
724      */
725 
726 
727     if (is_sprintf) {
728         /* create a copy of the format with the %'s removed, and record
729          * the sizes of the const string segments in the aux struct */
730         char *q, *oldq;
731         lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
732 
733         p    = sprintf_info.start;
734         q    = const_str;
735         oldq = q;
736         for (; p < sprintf_info.end; p++) {
737             if (*p == '%') {
738                 p++;
739                 if (*p != '%') {
740                     (lenp++)->ssize = q - oldq;
741                     oldq = q;
742                     continue;
743                 }
744             }
745             *q++ = *p;
746         }
747         lenp->ssize = q - oldq;
748         assert((STRLEN)(q - const_str) == total_len);
749 
750         /* Attach all the args (i.e. the kids of the sprintf) to o (which
751          * may or may not be topop) The pushmark and const ops need to be
752          * kept in case they're an op_next entry point.
753          */
754         lastkidop = cLISTOPx(topop)->op_last;
755         kid = cUNOPx(topop)->op_first; /* pushmark */
756         op_null(kid);
757         op_null(OpSIBLING(kid));       /* const */
758         if (o != topop) {
759             kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */
760             op_sibling_splice(o, NULL, 0, kid); /* and attach to o */
761             lastkidop->op_next = o;
762         }
763     }
764     else {
765         p = const_str;
766         lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
767 
768         lenp->ssize = -1;
769 
770         /* Concatenate all const strings into const_str.
771          * Note that args[] contains the RHS args in reverse order, so
772          * we scan args[] from top to bottom to get constant strings
773          * in L-R order
774          */
775         for (argp = toparg; argp >= args; argp--) {
776             if (!argp->p)
777                 /* not a const op */
778                 (++lenp)->ssize = -1;
779             else {
780                 STRLEN l = argp->len;
781                 Copy(argp->p, p, l, char);
782                 p += l;
783                 if (lenp->ssize == -1)
784                     lenp->ssize = l;
785                 else
786                     lenp->ssize += l;
787             }
788         }
789 
790         kid = topop;
791         nextop = o;
792         lastkidop = NULL;
793 
794         for (argp = args; argp <= toparg; argp++) {
795             /* only keep non-const args, except keep the first-in-next-chain
796              * arg no matter what it is (but nulled if OP_CONST), because it
797              * may be the entry point to this subtree from the previous
798              * op_next.
799              */
800             bool last = (argp == toparg);
801             OP *prev;
802 
803             /* set prev to the sibling *before* the arg to be cut out,
804              * e.g. when cutting EXPR:
805              *
806              *         |
807              * kid=  CONCAT
808              *         |
809              * prev= CONCAT -- EXPR
810              *         |
811              */
812             if (argp == args && kid->op_type != OP_CONCAT) {
813                 /* in e.g. '$x .= f(1)' there's no RHS concat tree
814                  * so the expression to be cut isn't kid->op_last but
815                  * kid itself */
816                 OP *o1, *o2;
817                 /* find the op before kid */
818                 o1 = NULL;
819                 o2 = cUNOPx(parentop)->op_first;
820                 while (o2 && o2 != kid) {
821                     o1 = o2;
822                     o2 = OpSIBLING(o2);
823                 }
824                 assert(o2 == kid);
825                 prev = o1;
826                 kid  = parentop;
827             }
828             else if (kid == o && lastkidop)
829                 prev = last ? lastkidop : OpSIBLING(lastkidop);
830             else
831                 prev = last ? NULL : cUNOPx(kid)->op_first;
832 
833             if (!argp->p || last) {
834                 /* cut RH op */
835                 OP *aop = op_sibling_splice(kid, prev, 1, NULL);
836                 /* and unshift to front of o */
837                 op_sibling_splice(o, NULL, 0, aop);
838                 /* record the right-most op added to o: later we will
839                  * free anything to the right of it */
840                 if (!lastkidop)
841                     lastkidop = aop;
842                 aop->op_next = nextop;
843                 if (last) {
844                     if (argp->p)
845                         /* null the const at start of op_next chain */
846                         op_null(aop);
847                 }
848                 else if (prev)
849                     nextop = prev->op_next;
850             }
851 
852             /* the last two arguments are both attached to the same concat op */
853             if (argp < toparg - 1)
854                 kid = prev;
855         }
856     }
857 
858     /* Populate the aux struct */
859 
860     aux[PERL_MULTICONCAT_IX_NARGS].ssize     = nargs;
861     aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv    = utf8 ? NULL : const_str;
862     aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ?    0 : total_len;
863     aux[PERL_MULTICONCAT_IX_UTF8_PV].pv     = const_str;
864     aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize  = total_len;
865 
866     /* if variant > 0, calculate a variant const string and lengths where
867      * the utf8 version of the string will take 'variant' more bytes than
868      * the plain one. */
869 
870     if (variant) {
871         char              *p = const_str;
872         STRLEN          ulen = total_len + variant;
873         UNOP_AUX_item  *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
874         UNOP_AUX_item *ulens = lens + (nargs + 1);
875         char             *up = (char*)PerlMemShared_malloc(ulen);
876         SSize_t            n;
877 
878         aux[PERL_MULTICONCAT_IX_UTF8_PV].pv    = up;
879         aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen;
880 
881         for (n = 0; n < (nargs + 1); n++) {
882             SSize_t i;
883             char * orig_up = up;
884             for (i = (lens++)->ssize; i > 0; i--) {
885                 U8 c = *p++;
886                 append_utf8_from_native_byte(c, (U8**)&up);
887             }
888             (ulens++)->ssize = (i < 0) ? i : up - orig_up;
889         }
890     }
891 
892     if (stringop) {
893         /* if there was a top(ish)-level OP_STRINGIFY, we need to keep
894          * that op's first child - an ex-PUSHMARK - because the op_next of
895          * the previous op may point to it (i.e. it's the entry point for
896          * the o optree)
897          */
898         OP *pmop =
899             (stringop == o)
900                 ? op_sibling_splice(o, lastkidop, 1, NULL)
901                 : op_sibling_splice(stringop, NULL, 1, NULL);
902         assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK));
903         op_sibling_splice(o, NULL, 0, pmop);
904         if (!lastkidop)
905             lastkidop = pmop;
906     }
907 
908     /* Optimise
909      *    target  = A.B.C...
910      *    target .= A.B.C...
911      */
912 
913     if (targetop) {
914         assert(!targmyop);
915 
916         if (o->op_type == OP_SASSIGN) {
917             /* Move the target subtree from being the last of o's children
918              * to being the last of o's preserved children.
919              * Note the difference between 'target = ...' and 'target .= ...':
920              * for the former, target is executed last; for the latter,
921              * first.
922              */
923             kid = OpSIBLING(lastkidop);
924             op_sibling_splice(o, kid, 1, NULL); /* cut target op */
925             op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */
926             lastkidop->op_next = kid->op_next;
927             lastkidop = targetop;
928         }
929         else {
930             /* Move the target subtree from being the first of o's
931              * original children to being the first of *all* o's children.
932              */
933             if (lastkidop) {
934                 op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */
935                 op_sibling_splice(o, NULL, 0, targetop);  /* and paste*/
936             }
937             else {
938                 /* if the RHS of .= doesn't contain a concat (e.g.
939                  * $x .= "foo"), it gets missed by the "strip ops from the
940                  * tree and add to o" loop earlier */
941                 assert(topop->op_type != OP_CONCAT);
942                 if (stringop) {
943                     /* in e.g. $x .= "$y", move the $y expression
944                      * from being a child of OP_STRINGIFY to being the
945                      * second child of the OP_CONCAT
946                      */
947                     assert(cUNOPx(stringop)->op_first == topop);
948                     op_sibling_splice(stringop, NULL, 1, NULL);
949                     op_sibling_splice(o, cUNOPo->op_first, 0, topop);
950                 }
951                 assert(topop == OpSIBLING(cBINOPo->op_first));
952                 if (toparg->p)
953                     op_null(topop);
954                 lastkidop = topop;
955             }
956         }
957 
958         if (is_targable) {
959             /* optimise
960              *  my $lex  = A.B.C...
961              *     $lex  = A.B.C...
962              *     $lex .= A.B.C...
963              * The original padsv op is kept but nulled in case it's the
964              * entry point for the optree (which it will be for
965              * '$lex .=  ... '
966              */
967             private_flags |= OPpTARGET_MY;
968             private_flags |= (targetop->op_private & OPpLVAL_INTRO);
969             o->op_targ = targetop->op_targ;
970             targetop->op_targ = 0;
971             op_null(targetop);
972         }
973         else
974             flags |= OPf_STACKED;
975     }
976     else if (targmyop) {
977         private_flags |= OPpTARGET_MY;
978         if (o != targmyop) {
979             o->op_targ = targmyop->op_targ;
980             targmyop->op_targ = 0;
981         }
982     }
983 
984     /* detach the emaciated husk of the sprintf/concat optree and free it */
985     for (;;) {
986         kid = op_sibling_splice(o, lastkidop, 1, NULL);
987         if (!kid)
988             break;
989         op_free(kid);
990     }
991 
992     /* and convert o into a multiconcat */
993 
994     o->op_flags        = (flags|OPf_KIDS|stacked_last
995                          |(o->op_flags & (OPf_WANT|OPf_PARENS)));
996     o->op_private      = private_flags;
997     o->op_type         = OP_MULTICONCAT;
998     o->op_ppaddr       = PL_ppaddr[OP_MULTICONCAT];
999     cUNOP_AUXo->op_aux = aux;
1000 
1001 
1002     /* add some PADTMPs, as needed, for the 'fallback to OP_CONCAT
1003      * behaviour if magic / overloaded etc present' code path */
1004 
1005     /* general PADTMP for the target of each concat */
1006     aux[PERL_MULTICONCAT_IX_PADTMP0].pad_offset =
1007                             pad_alloc(OP_MULTICONCAT, SVs_PADTMP);
1008 
1009     /* PADTMP for recreating OP_CONST return values */
1010     aux[PERL_MULTICONCAT_IX_PADTMP1].pad_offset =
1011         (is_sprintf || nconst) ? pad_alloc(OP_MULTICONCAT, SVs_PADTMP) : 0;
1012 
1013     /* PADTMP for stringifying the result */
1014     aux[PERL_MULTICONCAT_IX_PADTMP2].pad_offset =
1015     (o->op_private &OPpMULTICONCAT_STRINGIFY)
1016             ? pad_alloc(OP_MULTICONCAT, SVs_PADTMP) : 0;
1017 }
1018 
1019 
1020 /*
1021 =for apidoc_section $optree_manipulation
1022 
1023 =for apidoc optimize_optree
1024 
1025 This function applies some optimisations to the optree in top-down order.
1026 It is called before the peephole optimizer, which processes ops in
1027 execution order. Note that finalize_optree() also does a top-down scan,
1028 but is called *after* the peephole optimizer.
1029 
1030 =cut
1031 */
1032 
1033 void
Perl_optimize_optree(pTHX_ OP * o)1034 Perl_optimize_optree(pTHX_ OP* o)
1035 {
1036     PERL_ARGS_ASSERT_OPTIMIZE_OPTREE;
1037 
1038     ENTER;
1039     SAVEVPTR(PL_curcop);
1040 
1041     optimize_op(o);
1042 
1043     LEAVE;
1044 }
1045 
1046 
1047 #define warn_implicit_snail_cvsig(o)  S_warn_implicit_snail_cvsig(aTHX_ o)
1048 static void
S_warn_implicit_snail_cvsig(pTHX_ OP * o)1049 S_warn_implicit_snail_cvsig(pTHX_ OP *o)
1050 {
1051     CV *cv = PL_compcv;
1052     while(cv && CvEVAL(cv))
1053         cv = CvOUTSIDE(cv);
1054 
1055     if(cv && CvSIGNATURE(cv))
1056         Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1057             "Implicit use of @_ in %s with signatured subroutine is experimental", OP_DESC(o));
1058 }
1059 
1060 
1061 #define OP_ZOOM(o)  (OP_TYPE_IS(o, OP_NULL) ? cUNOPx(o)->op_first : (o))
1062 
1063 /* helper for optimize_optree() which optimises one op then recurses
1064  * to optimise any children.
1065  */
1066 
1067 STATIC void
S_optimize_op(pTHX_ OP * o)1068 S_optimize_op(pTHX_ OP* o)
1069 {
1070     OP *top_op = o;
1071 
1072     PERL_ARGS_ASSERT_OPTIMIZE_OP;
1073 
1074     while (1) {
1075         OP * next_kid = NULL;
1076 
1077         assert(o->op_type != OP_FREED);
1078 
1079         switch (o->op_type) {
1080         case OP_NEXTSTATE:
1081         case OP_DBSTATE:
1082             PL_curcop = ((COP*)o);		/* for warnings */
1083             break;
1084 
1085 
1086         case OP_CONCAT:
1087         case OP_SASSIGN:
1088         case OP_STRINGIFY:
1089         case OP_SPRINTF:
1090             S_maybe_multiconcat(aTHX_ o);
1091             break;
1092 
1093         case OP_SUBST:
1094             if (cPMOPo->op_pmreplrootu.op_pmreplroot) {
1095                 /* we can't assume that op_pmreplroot->op_sibparent == o
1096                  * and that it is thus possible to walk back up the tree
1097                  * past op_pmreplroot. So, although we try to avoid
1098                  * recursing through op trees, do it here. After all,
1099                  * there are unlikely to be many nested s///e's within
1100                  * the replacement part of a s///e.
1101                  */
1102                 optimize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1103             }
1104             break;
1105 
1106         case OP_RV2AV:
1107         {
1108             OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1109             CV *cv = PL_compcv;
1110             while(cv && CvEVAL(cv))
1111                 cv = CvOUTSIDE(cv);
1112 
1113             if(cv && CvSIGNATURE(cv) &&
1114                     OP_TYPE_IS(first, OP_GV) && cGVOPx_gv(first) == PL_defgv) {
1115                 OP *parent = op_parent(o);
1116                 while(OP_TYPE_IS(parent, OP_NULL))
1117                     parent = op_parent(parent);
1118 
1119                 Perl_ck_warner_d(aTHX_ packWARN(WARN_EXPERIMENTAL__ARGS_ARRAY_WITH_SIGNATURES),
1120                     "Use of @_ in %s with signatured subroutine is experimental", OP_DESC(parent));
1121             }
1122             break;
1123         }
1124 
1125         case OP_SHIFT:
1126         case OP_POP:
1127             if(!CvUNIQUE(PL_compcv) && !(o->op_flags & OPf_KIDS))
1128                 warn_implicit_snail_cvsig(o);
1129             break;
1130 
1131         case OP_ENTERSUB:
1132             if(!(o->op_flags & OPf_STACKED))
1133                 warn_implicit_snail_cvsig(o);
1134             break;
1135 
1136         case OP_GOTO:
1137         {
1138             OP *first = (o->op_flags & OPf_KIDS) ? cUNOPo->op_first : NULL;
1139             OP *ffirst;
1140             if(OP_TYPE_IS(first, OP_SREFGEN) &&
1141                     (ffirst = OP_ZOOM(cUNOPx(first)->op_first)) &&
1142                     OP_TYPE_IS(ffirst, OP_RV2CV))
1143                 warn_implicit_snail_cvsig(o);
1144             break;
1145         }
1146 
1147         default:
1148             break;
1149         }
1150 
1151         if (o->op_flags & OPf_KIDS)
1152             next_kid = cUNOPo->op_first;
1153 
1154         /* if a kid hasn't been nominated to process, continue with the
1155          * next sibling, or if no siblings left, go back to the parent's
1156          * siblings and so on
1157          */
1158         while (!next_kid) {
1159             if (o == top_op)
1160                 return; /* at top; no parents/siblings to try */
1161             if (OpHAS_SIBLING(o))
1162                 next_kid = o->op_sibparent;
1163             else
1164                 o = o->op_sibparent; /*try parent's next sibling */
1165         }
1166 
1167       /* this label not yet used. Goto here if any code above sets
1168        * next-kid
1169        get_next_op:
1170        */
1171         o = next_kid;
1172     }
1173 }
1174 
1175 /*
1176 =for apidoc finalize_optree
1177 
1178 This function finalizes the optree.  Should be called directly after
1179 the complete optree is built.  It does some additional
1180 checking which can't be done in the normal C<ck_>xxx functions and makes
1181 the tree thread-safe.
1182 
1183 =cut
1184 */
1185 
1186 void
Perl_finalize_optree(pTHX_ OP * o)1187 Perl_finalize_optree(pTHX_ OP* o)
1188 {
1189     PERL_ARGS_ASSERT_FINALIZE_OPTREE;
1190 
1191     ENTER;
1192     SAVEVPTR(PL_curcop);
1193 
1194     finalize_op(o);
1195 
1196     LEAVE;
1197 }
1198 
1199 
1200 /*
1201 =for apidoc traverse_op_tree
1202 
1203 Return the next op in a depth-first traversal of the op tree,
1204 returning NULL when the traversal is complete.
1205 
1206 The initial call must supply the root of the tree as both top and o.
1207 
1208 For now it's static, but it may be exposed to the API in the future.
1209 
1210 =cut
1211 */
1212 
1213 STATIC OP*
S_traverse_op_tree(pTHX_ OP * top,OP * o)1214 S_traverse_op_tree(pTHX_ OP *top, OP *o) {
1215     OP *sib;
1216 
1217     PERL_ARGS_ASSERT_TRAVERSE_OP_TREE;
1218 
1219     if ((o->op_flags & OPf_KIDS) && cUNOPo->op_first) {
1220         return cUNOPo->op_first;
1221     }
1222     else if ((sib = OpSIBLING(o))) {
1223         return sib;
1224     }
1225     else {
1226         OP *parent = o->op_sibparent;
1227         assert(!(o->op_moresib));
1228         while (parent && parent != top) {
1229             OP *sib = OpSIBLING(parent);
1230             if (sib)
1231                 return sib;
1232             parent = parent->op_sibparent;
1233         }
1234 
1235         return NULL;
1236     }
1237 }
1238 
1239 STATIC void
S_finalize_op(pTHX_ OP * o)1240 S_finalize_op(pTHX_ OP* o)
1241 {
1242     OP * const top = o;
1243     PERL_ARGS_ASSERT_FINALIZE_OP;
1244 
1245     do {
1246         assert(o->op_type != OP_FREED);
1247 
1248         switch (o->op_type) {
1249         case OP_NEXTSTATE:
1250         case OP_DBSTATE:
1251             PL_curcop = ((COP*)o);		/* for warnings */
1252             break;
1253         case OP_EXEC:
1254             if (OpHAS_SIBLING(o)) {
1255                 OP *sib = OpSIBLING(o);
1256                 if ((  sib->op_type == OP_NEXTSTATE || sib->op_type == OP_DBSTATE)
1257                     && ckWARN(WARN_EXEC)
1258                     && OpHAS_SIBLING(sib))
1259                 {
1260                     const OPCODE type = OpSIBLING(sib)->op_type;
1261                     if (type != OP_EXIT && type != OP_WARN && type != OP_DIE) {
1262                         const line_t oldline = CopLINE(PL_curcop);
1263                         CopLINE_set(PL_curcop, CopLINE((COP*)sib));
1264                         Perl_warner(aTHX_ packWARN(WARN_EXEC),
1265                             "Statement unlikely to be reached");
1266                         Perl_warner(aTHX_ packWARN(WARN_EXEC),
1267                             "\t(Maybe you meant system() when you said exec()?)\n");
1268                         CopLINE_set(PL_curcop, oldline);
1269                     }
1270                 }
1271             }
1272             break;
1273 
1274         case OP_GV:
1275             if ((o->op_private & OPpEARLY_CV) && ckWARN(WARN_PROTOTYPE)) {
1276                 GV * const gv = cGVOPo_gv;
1277                 if (SvTYPE(gv) == SVt_PVGV && GvCV(gv) && SvPVX_const(GvCV(gv))) {
1278                     /* XXX could check prototype here instead of just carping */
1279                     SV * const sv = sv_newmortal();
1280                     gv_efullname3(sv, gv, NULL);
1281                     Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE),
1282                                 "%" SVf "() called too early to check prototype",
1283                                 SVfARG(sv));
1284                 }
1285             }
1286             break;
1287 
1288         case OP_CONST:
1289             if (cSVOPo->op_private & OPpCONST_STRICT)
1290                 no_bareword_allowed(o);
1291 #ifdef USE_ITHREADS
1292             /* FALLTHROUGH */
1293         case OP_HINTSEVAL:
1294             op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
1295 #endif
1296             break;
1297 
1298 #ifdef USE_ITHREADS
1299             /* Relocate all the METHOP's SVs to the pad for thread safety. */
1300         case OP_METHOD_NAMED:
1301         case OP_METHOD_SUPER:
1302         case OP_METHOD_REDIR:
1303         case OP_METHOD_REDIR_SUPER:
1304             op_relocate_sv(&cMETHOPo->op_u.op_meth_sv, &o->op_targ);
1305             break;
1306 #endif
1307 
1308         case OP_HELEM: {
1309             UNOP *rop;
1310             SVOP *key_op;
1311             OP *kid;
1312 
1313             if ((key_op = cSVOPx(cBINOPo->op_last))->op_type != OP_CONST)
1314                 break;
1315 
1316             rop = cUNOPx(cBINOPo->op_first);
1317 
1318             goto check_keys;
1319 
1320             case OP_HSLICE:
1321                 S_scalar_slice_warning(aTHX_ o);
1322                 /* FALLTHROUGH */
1323 
1324             case OP_KVHSLICE:
1325                 kid = OpSIBLING(cLISTOPo->op_first);
1326             if (/* I bet there's always a pushmark... */
1327                 OP_TYPE_ISNT_AND_WASNT_NN(kid, OP_LIST)
1328                 && OP_TYPE_ISNT_NN(kid, OP_CONST))
1329             {
1330                 break;
1331             }
1332 
1333             key_op = cSVOPx(kid->op_type == OP_CONST
1334                              ? kid
1335                              : OpSIBLING(kLISTOP->op_first));
1336 
1337             rop = cUNOPx(cLISTOPo->op_last);
1338 
1339         check_keys:
1340             if (o->op_private & OPpLVAL_INTRO || rop->op_type != OP_RV2HV)
1341                 rop = NULL;
1342             check_hash_fields_and_hekify(rop, key_op, 1);
1343             break;
1344         }
1345         case OP_NULL:
1346             if (o->op_targ != OP_HSLICE && o->op_targ != OP_ASLICE)
1347                 break;
1348             /* FALLTHROUGH */
1349         case OP_ASLICE:
1350             S_scalar_slice_warning(aTHX_ o);
1351             break;
1352 
1353         case OP_SUBST: {
1354             if (cPMOPo->op_pmreplrootu.op_pmreplroot)
1355                 finalize_op(cPMOPo->op_pmreplrootu.op_pmreplroot);
1356             break;
1357         }
1358         default:
1359             break;
1360         }
1361 
1362 #ifdef DEBUGGING
1363         if (o->op_flags & OPf_KIDS) {
1364             OP *kid;
1365 
1366             /* check that op_last points to the last sibling, and that
1367              * the last op_sibling/op_sibparent field points back to the
1368              * parent, and that the only ops with KIDS are those which are
1369              * entitled to them */
1370             U32 type = o->op_type;
1371             U32 family;
1372             bool has_last;
1373 
1374             if (type == OP_NULL) {
1375                 type = o->op_targ;
1376                 /* ck_glob creates a null UNOP with ex-type GLOB
1377                  * (which is a list op. So pretend it wasn't a listop */
1378                 if (type == OP_GLOB)
1379                     type = OP_NULL;
1380             }
1381             family = PL_opargs[type] & OA_CLASS_MASK;
1382 
1383             has_last = (   family == OA_BINOP
1384                         || family == OA_LISTOP
1385                         || family == OA_PMOP
1386                         || family == OA_LOOP
1387                        );
1388             assert(  has_last /* has op_first and op_last, or ...
1389                   ... has (or may have) op_first: */
1390                   || family == OA_UNOP
1391                   || family == OA_UNOP_AUX
1392                   || family == OA_LOGOP
1393                   || family == OA_BASEOP_OR_UNOP
1394                   || family == OA_FILESTATOP
1395                   || family == OA_LOOPEXOP
1396                   || family == OA_METHOP
1397                   || type == OP_CUSTOM
1398                   || type == OP_NULL /* new_logop does this */
1399                   );
1400 
1401             for (kid = cUNOPo->op_first; kid; kid = OpSIBLING(kid)) {
1402                 if (!OpHAS_SIBLING(kid)) {
1403                     if (has_last)
1404                         assert(kid == cLISTOPo->op_last);
1405                     assert(kid->op_sibparent == o);
1406                 }
1407             }
1408         }
1409 #endif
1410     } while (( o = traverse_op_tree(top, o)) != NULL);
1411 }
1412 
1413 
1414 /*
1415    ---------------------------------------------------------
1416 
1417    Common vars in list assignment
1418 
1419    There now follows some enums and static functions for detecting
1420    common variables in list assignments. Here is a little essay I wrote
1421    for myself when trying to get my head around this. DAPM.
1422 
1423    ----
1424 
1425    First some random observations:
1426 
1427    * If a lexical var is an alias of something else, e.g.
1428        for my $x ($lex, $pkg, $a[0]) {...}
1429      then the act of aliasing will increase the reference count of the SV
1430 
1431    * If a package var is an alias of something else, it may still have a
1432      reference count of 1, depending on how the alias was created, e.g.
1433      in *a = *b, $a may have a refcount of 1 since the GP is shared
1434      with a single GvSV pointer to the SV. So If it's an alias of another
1435      package var, then RC may be 1; if it's an alias of another scalar, e.g.
1436      a lexical var or an array element, then it will have RC > 1.
1437 
1438    * There are many ways to create a package alias; ultimately, XS code
1439      may quite legally do GvSV(gv) = SvREFCNT_inc(sv) for example, so
1440      run-time tracing mechanisms are unlikely to be able to catch all cases.
1441 
1442    * When the LHS is all my declarations, the same vars can't appear directly
1443      on the RHS, but they can indirectly via closures, aliasing and lvalue
1444      subs. But those techniques all involve an increase in the lexical
1445      scalar's ref count.
1446 
1447    * When the LHS is all lexical vars (but not necessarily my declarations),
1448      it is possible for the same lexicals to appear directly on the RHS, and
1449      without an increased ref count, since the stack isn't refcounted.
1450      This case can be detected at compile time by scanning for common lex
1451      vars with PL_generation.
1452 
1453    * lvalue subs defeat common var detection, but they do at least
1454      return vars with a temporary ref count increment. Also, you can't
1455      tell at compile time whether a sub call is lvalue.
1456 
1457 
1458    So...
1459 
1460    A: There are a few circumstances where there definitely can't be any
1461      commonality:
1462 
1463        LHS empty:  () = (...);
1464        RHS empty:  (....) = ();
1465        RHS contains only constants or other 'can't possibly be shared'
1466            elements (e.g. ops that return PADTMPs):  (...) = (1,2, length)
1467            i.e. they only contain ops not marked as dangerous, whose children
1468            are also not dangerous;
1469        LHS ditto;
1470        LHS contains a single scalar element: e.g. ($x) = (....); because
1471            after $x has been modified, it won't be used again on the RHS;
1472        RHS contains a single element with no aggregate on LHS: e.g.
1473            ($a,$b,$c)  = ($x); again, once $a has been modified, its value
1474            won't be used again.
1475 
1476    B: If LHS are all 'my' lexical var declarations (or safe ops, which
1477      we can ignore):
1478 
1479        my ($a, $b, @c) = ...;
1480 
1481        Due to closure and goto tricks, these vars may already have content.
1482        For the same reason, an element on the RHS may be a lexical or package
1483        alias of one of the vars on the left, or share common elements, for
1484        example:
1485 
1486            my ($x,$y) = f(); # $x and $y on both sides
1487            sub f : lvalue { ($x,$y) = (1,2); $y, $x }
1488 
1489        and
1490 
1491            my $ra = f();
1492            my @a = @$ra;  # elements of @a on both sides
1493            sub f { @a = 1..4; \@a }
1494 
1495 
1496        First, just consider scalar vars on LHS:
1497 
1498            RHS is safe only if (A), or in addition,
1499                * contains only lexical *scalar* vars, where neither side's
1500                  lexicals have been flagged as aliases
1501 
1502            If RHS is not safe, then it's always legal to check LHS vars for
1503            RC==1, since the only RHS aliases will always be associated
1504            with an RC bump.
1505 
1506            Note that in particular, RHS is not safe if:
1507 
1508                * it contains package scalar vars; e.g.:
1509 
1510                    f();
1511                    my ($x, $y) = (2, $x_alias);
1512                    sub f { $x = 1; *x_alias = \$x; }
1513 
1514                * It contains other general elements, such as flattened or
1515                * spliced or single array or hash elements, e.g.
1516 
1517                    f();
1518                    my ($x,$y) = @a; # or $a[0] or @a{@b} etc
1519 
1520                    sub f {
1521                        ($x, $y) = (1,2);
1522                        use feature 'refaliasing';
1523                        \($a[0], $a[1]) = \($y,$x);
1524                    }
1525 
1526                  It doesn't matter if the array/hash is lexical or package.
1527 
1528                * it contains a function call that happens to be an lvalue
1529                  sub which returns one or more of the above, e.g.
1530 
1531                    f();
1532                    my ($x,$y) = f();
1533 
1534                    sub f : lvalue {
1535                        ($x, $y) = (1,2);
1536                        *x1 = \$x;
1537                        $y, $x1;
1538                    }
1539 
1540                    (so a sub call on the RHS should be treated the same
1541                    as having a package var on the RHS).
1542 
1543                * any other "dangerous" thing, such an op or built-in that
1544                  returns one of the above, e.g. pp_preinc
1545 
1546 
1547            If RHS is not safe, what we can do however is at compile time flag
1548            that the LHS are all my declarations, and at run time check whether
1549            all the LHS have RC == 1, and if so skip the full scan.
1550 
1551        Now consider array and hash vars on LHS: e.g. my (...,@a) = ...;
1552 
1553            Here the issue is whether there can be elements of @a on the RHS
1554            which will get prematurely freed when @a is cleared prior to
1555            assignment. This is only a problem if the aliasing mechanism
1556            is one which doesn't increase the refcount - only if RC == 1
1557            will the RHS element be prematurely freed.
1558 
1559            Because the array/hash is being INTROed, it or its elements
1560            can't directly appear on the RHS:
1561 
1562                my (@a) = ($a[0], @a, etc) # NOT POSSIBLE
1563 
1564            but can indirectly, e.g.:
1565 
1566                my $r = f();
1567                my (@a) = @$r;
1568                sub f { @a = 1..3; \@a }
1569 
1570            So if the RHS isn't safe as defined by (A), we must always
1571            mortalise and bump the ref count of any remaining RHS elements
1572            when assigning to a non-empty LHS aggregate.
1573 
1574            Lexical scalars on the RHS aren't safe if they've been involved in
1575            aliasing, e.g.
1576 
1577                use feature 'refaliasing';
1578 
1579                f();
1580                \(my $lex) = \$pkg;
1581                my @a = ($lex,3); # equivalent to ($a[0],3)
1582 
1583                sub f {
1584                    @a = (1,2);
1585                    \$pkg = \$a[0];
1586                }
1587 
1588            Similarly with lexical arrays and hashes on the RHS:
1589 
1590                f();
1591                my @b;
1592                my @a = (@b);
1593 
1594                sub f {
1595                    @a = (1,2);
1596                    \$b[0] = \$a[1];
1597                    \$b[1] = \$a[0];
1598                }
1599 
1600 
1601 
1602    C: As (B), but in addition the LHS may contain non-intro lexicals, e.g.
1603        my $a; ($a, my $b) = (....);
1604 
1605        The difference between (B) and (C) is that it is now physically
1606        possible for the LHS vars to appear on the RHS too, where they
1607        are not reference counted; but in this case, the compile-time
1608        PL_generation sweep will detect such common vars.
1609 
1610        So the rules for (C) differ from (B) in that if common vars are
1611        detected, the runtime "test RC==1" optimisation can no longer be used,
1612        and a full mark and sweep is required
1613 
1614    D: As (C), but in addition the LHS may contain package vars.
1615 
1616        Since package vars can be aliased without a corresponding refcount
1617        increase, all bets are off. It's only safe if (A). E.g.
1618 
1619            my ($x, $y) = (1,2);
1620 
1621            for $x_alias ($x) {
1622                ($x_alias, $y) = (3, $x); # whoops
1623            }
1624 
1625        Ditto for LHS aggregate package vars.
1626 
1627    E: Any other dangerous ops on LHS, e.g.
1628            (f(), $a[0], @$r) = (...);
1629 
1630        this is similar to (E) in that all bets are off. In addition, it's
1631        impossible to determine at compile time whether the LHS
1632        contains a scalar or an aggregate, e.g.
1633 
1634            sub f : lvalue { @a }
1635            (f()) = 1..3;
1636 
1637 * ---------------------------------------------------------
1638 */
1639 
1640 /* A set of bit flags returned by S_aassign_scan(). Each flag indicates
1641  * that at least one of the things flagged was seen.
1642  */
1643 
1644 enum {
1645     AAS_MY_SCALAR       = 0x001, /* my $scalar */
1646     AAS_MY_AGG          = 0x002, /* aggregate: my @array or my %hash */
1647     AAS_LEX_SCALAR      = 0x004, /* $lexical */
1648     AAS_LEX_AGG         = 0x008, /* @lexical or %lexical aggregate */
1649     AAS_LEX_SCALAR_COMM = 0x010, /* $lexical seen on both sides */
1650     AAS_PKG_SCALAR      = 0x020, /* $scalar (where $scalar is pkg var) */
1651     AAS_PKG_AGG         = 0x040, /* package @array or %hash aggregate */
1652     AAS_DANGEROUS       = 0x080, /* an op (other than the above)
1653                                          that's flagged OA_DANGEROUS */
1654     AAS_SAFE_SCALAR     = 0x100, /* produces at least one scalar SV that's
1655                                         not in any of the categories above */
1656     AAS_DEFAV           = 0x200  /* contains just a single '@_' on RHS */
1657 };
1658 
1659 /* helper function for S_aassign_scan().
1660  * check a PAD-related op for commonality and/or set its generation number.
1661  * Returns a boolean indicating whether its shared */
1662 
1663 static bool
S_aassign_padcheck(pTHX_ OP * o,bool rhs)1664 S_aassign_padcheck(pTHX_ OP* o, bool rhs)
1665 {
1666     if (PAD_COMPNAME_GEN(o->op_targ) == PERL_INT_MAX)
1667         /* lexical used in aliasing */
1668         return TRUE;
1669 
1670     if (rhs)
1671         return cBOOL(PAD_COMPNAME_GEN(o->op_targ) == (STRLEN)PL_generation);
1672     else
1673         PAD_COMPNAME_GEN_set(o->op_targ, PL_generation);
1674 
1675     return FALSE;
1676 }
1677 
1678 /*
1679   Helper function for OPpASSIGN_COMMON* detection in rpeep().
1680   It scans the left or right hand subtree of the aassign op, and returns a
1681   set of flags indicating what sorts of things it found there.
1682   'rhs' indicates whether we're scanning the LHS or RHS. If the former, we
1683   set PL_generation on lexical vars; if the latter, we see if
1684   PL_generation matches.
1685   'scalars_p' is a pointer to a counter of the number of scalar SVs seen.
1686   This fn will increment it by the number seen. It's not intended to
1687   be an accurate count (especially as many ops can push a variable
1688   number of SVs onto the stack); rather it's used as to test whether there
1689   can be at most 1 SV pushed; so it's only meanings are "0, 1, many".
1690 */
1691 
1692 static int
S_aassign_scan(pTHX_ OP * o,bool rhs,int * scalars_p)1693 S_aassign_scan(pTHX_ OP* o, bool rhs, int *scalars_p)
1694 {
1695     OP *top_op           = o;
1696     OP *effective_top_op = o;
1697     int all_flags = 0;
1698 
1699     while (1) {
1700         bool top = o == effective_top_op;
1701         int flags = 0;
1702         OP* next_kid = NULL;
1703 
1704         /* first, look for a solitary @_ on the RHS */
1705         if (   rhs
1706             && top
1707             && (o->op_flags & OPf_KIDS)
1708             && OP_TYPE_IS_OR_WAS(o, OP_LIST)
1709         ) {
1710             OP *kid = cUNOPo->op_first;
1711             if (   (   kid->op_type == OP_PUSHMARK
1712                     || kid->op_type == OP_PADRANGE) /* ex-pushmark */
1713                 && ((kid = OpSIBLING(kid)))
1714                 && !OpHAS_SIBLING(kid)
1715                 && kid->op_type == OP_RV2AV
1716                 && !(kid->op_flags & OPf_REF)
1717                 && !(kid->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
1718                 && ((kid->op_flags & OPf_WANT) == OPf_WANT_LIST)
1719                 && ((kid = cUNOPx(kid)->op_first))
1720                 && kid->op_type == OP_GV
1721                 && cGVOPx_gv(kid) == PL_defgv
1722             )
1723                 flags = AAS_DEFAV;
1724         }
1725 
1726         switch (o->op_type) {
1727         case OP_GVSV:
1728             (*scalars_p)++;
1729             all_flags |= AAS_PKG_SCALAR;
1730             goto do_next;
1731 
1732         case OP_PADAV:
1733         case OP_PADHV:
1734             (*scalars_p) += 2;
1735             /* if !top, could be e.g. @a[0,1] */
1736             all_flags |=  (top && (o->op_flags & OPf_REF))
1737                             ? ((o->op_private & OPpLVAL_INTRO)
1738                                 ? AAS_MY_AGG : AAS_LEX_AGG)
1739                             : AAS_DANGEROUS;
1740             goto do_next;
1741 
1742         case OP_PADSV:
1743             {
1744                 int comm = S_aassign_padcheck(aTHX_ o, rhs)
1745                             ?  AAS_LEX_SCALAR_COMM : 0;
1746                 (*scalars_p)++;
1747                 all_flags |= (o->op_private & OPpLVAL_INTRO)
1748                     ? (AAS_MY_SCALAR|comm) : (AAS_LEX_SCALAR|comm);
1749                 goto do_next;
1750 
1751             }
1752 
1753         case OP_RV2AV:
1754         case OP_RV2HV:
1755             (*scalars_p) += 2;
1756             if (cUNOPx(o)->op_first->op_type != OP_GV)
1757                 all_flags |= AAS_DANGEROUS; /* @{expr}, %{expr} */
1758             /* @pkg, %pkg */
1759             /* if !top, could be e.g. @a[0,1] */
1760             else if (top && (o->op_flags & OPf_REF))
1761                 all_flags |= AAS_PKG_AGG;
1762             else
1763                 all_flags |= AAS_DANGEROUS;
1764             goto do_next;
1765 
1766         case OP_RV2SV:
1767             (*scalars_p)++;
1768             if (cUNOPx(o)->op_first->op_type != OP_GV) {
1769                 (*scalars_p) += 2;
1770                 all_flags |= AAS_DANGEROUS; /* ${expr} */
1771             }
1772             else
1773                 all_flags |= AAS_PKG_SCALAR; /* $pkg */
1774             goto do_next;
1775 
1776         case OP_SPLIT:
1777             if (o->op_private & OPpSPLIT_ASSIGN) {
1778                 /* the assign in @a = split() has been optimised away
1779                  * and the @a attached directly to the split op
1780                  * Treat the array as appearing on the RHS, i.e.
1781                  *    ... = (@a = split)
1782                  * is treated like
1783                  *    ... = @a;
1784                  */
1785 
1786                 if (o->op_flags & OPf_STACKED) {
1787                     /* @{expr} = split() - the array expression is tacked
1788                      * on as an extra child to split - process kid */
1789                     next_kid = cLISTOPo->op_last;
1790                     goto do_next;
1791                 }
1792 
1793                 /* ... else array is directly attached to split op */
1794                 (*scalars_p) += 2;
1795                 all_flags |= (PL_op->op_private & OPpSPLIT_LEX)
1796                                 ? ((o->op_private & OPpLVAL_INTRO)
1797                                     ? AAS_MY_AGG : AAS_LEX_AGG)
1798                                 : AAS_PKG_AGG;
1799                 goto do_next;
1800             }
1801             (*scalars_p)++;
1802             /* other args of split can't be returned */
1803             all_flags |= AAS_SAFE_SCALAR;
1804             goto do_next;
1805 
1806         case OP_UNDEF:
1807             /* undef on LHS following a var is significant, e.g.
1808              *    my $x = 1;
1809              *    @a = (($x, undef) = (2 => $x));
1810              *    # @a shoul be (2,1) not (2,2)
1811              *
1812              * undef on RHS counts as a scalar:
1813              *   ($x, $y)    = (undef, $x); # 2 scalars on RHS: unsafe
1814              */
1815             if ((!rhs && *scalars_p) || rhs)
1816                 (*scalars_p)++;
1817             flags = AAS_SAFE_SCALAR;
1818             break;
1819 
1820         case OP_PUSHMARK:
1821         case OP_STUB:
1822             /* these are all no-ops; they don't push a potentially common SV
1823              * onto the stack, so they are neither AAS_DANGEROUS nor
1824              * AAS_SAFE_SCALAR */
1825             goto do_next;
1826 
1827         case OP_PADRANGE: /* Ignore padrange; checking its siblings is enough */
1828             break;
1829 
1830         case OP_NULL:
1831         case OP_LIST:
1832             /* these do nothing, but may have children */
1833             break;
1834 
1835         default:
1836             if (PL_opargs[o->op_type] & OA_DANGEROUS) {
1837                 (*scalars_p) += 2;
1838                 flags = AAS_DANGEROUS;
1839                 break;
1840             }
1841 
1842             if (   (PL_opargs[o->op_type] & OA_TARGLEX)
1843                 && (o->op_private & OPpTARGET_MY))
1844             {
1845                 (*scalars_p)++;
1846                 all_flags |= S_aassign_padcheck(aTHX_ o, rhs)
1847                                 ? AAS_LEX_SCALAR_COMM : AAS_LEX_SCALAR;
1848                 goto do_next;
1849             }
1850 
1851             /* if its an unrecognised, non-dangerous op, assume that it
1852              * is the cause of at least one safe scalar */
1853             (*scalars_p)++;
1854             flags = AAS_SAFE_SCALAR;
1855             break;
1856         }
1857 
1858         all_flags |= flags;
1859 
1860         /* by default, process all kids next
1861          * XXX this assumes that all other ops are "transparent" - i.e. that
1862          * they can return some of their children. While this true for e.g.
1863          * sort and grep, it's not true for e.g. map. We really need a
1864          * 'transparent' flag added to regen/opcodes
1865          */
1866         if (o->op_flags & OPf_KIDS) {
1867             next_kid = cUNOPo->op_first;
1868             /* these ops do nothing but may have children; but their
1869              * children should also be treated as top-level */
1870             if (   o == effective_top_op
1871                 && (o->op_type == OP_NULL || o->op_type == OP_LIST)
1872             )
1873                 effective_top_op = next_kid;
1874         }
1875 
1876 
1877         /* If next_kid is set, someone in the code above wanted us to process
1878          * that kid and all its remaining siblings.  Otherwise, work our way
1879          * back up the tree */
1880       do_next:
1881         while (!next_kid) {
1882             if (o == top_op)
1883                 return all_flags; /* at top; no parents/siblings to try */
1884             if (OpHAS_SIBLING(o)) {
1885                 next_kid = o->op_sibparent;
1886                 if (o == effective_top_op)
1887                     effective_top_op = next_kid;
1888             }
1889             else if (o == effective_top_op)
1890               effective_top_op = o->op_sibparent;
1891             o = o->op_sibparent; /* try parent's next sibling */
1892         }
1893         o = next_kid;
1894     } /* while */
1895 }
1896 
1897 /* S_maybe_multideref(): given an op_next chain of ops beginning at 'start'
1898  * that potentially represent a series of one or more aggregate derefs
1899  * (such as $a->[1]{$key}), examine the chain, and if appropriate, convert
1900  * the whole chain to a single OP_MULTIDEREF op (maybe with a few
1901  * additional ops left in too).
1902  *
1903  * The caller will have already verified that the first few ops in the
1904  * chain following 'start' indicate a multideref candidate, and will have
1905  * set 'orig_o' to the point further on in the chain where the first index
1906  * expression (if any) begins.  'orig_action' specifies what type of
1907  * beginning has already been determined by the ops between start..orig_o
1908  * (e.g.  $lex_ary[], $pkg_ary->{}, expr->[], etc).
1909  *
1910  * 'hints' contains any hints flags that need adding (currently just
1911  * OPpHINT_STRICT_REFS) as found in any rv2av/hv skipped by the caller.
1912  */
1913 
1914 STATIC void
S_maybe_multideref(pTHX_ OP * start,OP * orig_o,UV orig_action,U8 hints)1915 S_maybe_multideref(pTHX_ OP *start, OP *orig_o, UV orig_action, U8 hints)
1916 {
1917     int pass;
1918     UNOP_AUX_item *arg_buf = NULL;
1919     bool reset_start_targ  = FALSE; /* start->op_targ needs zeroing */
1920     int index_skip         = -1;    /* don't output index arg on this action */
1921 
1922     /* similar to regex compiling, do two passes; the first pass
1923      * determines whether the op chain is convertible and calculates the
1924      * buffer size; the second pass populates the buffer and makes any
1925      * changes necessary to ops (such as moving consts to the pad on
1926      * threaded builds).
1927      *
1928      * NB: for things like Coverity, note that both passes take the same
1929      * path through the logic tree (except for 'if (pass)' bits), since
1930      * both passes are following the same op_next chain; and in
1931      * particular, if it would return early on the second pass, it would
1932      * already have returned early on the first pass.
1933      */
1934     for (pass = 0; pass < 2; pass++) {
1935         OP *o                = orig_o;
1936         UV action            = orig_action;
1937         OP *first_elem_op    = NULL;  /* first seen aelem/helem */
1938         OP *top_op           = NULL;  /* highest [ah]elem/exists/del/rv2[ah]v */
1939         int action_count     = 0;     /* number of actions seen so far */
1940         int action_ix        = 0;     /* action_count % (actions per IV) */
1941         bool next_is_hash    = FALSE; /* is the next lookup to be a hash? */
1942         bool is_last         = FALSE; /* no more derefs to follow */
1943         bool maybe_aelemfast = FALSE; /* we can replace with aelemfast? */
1944         UV action_word       = 0;     /* all actions so far */
1945         size_t argi          = 0;
1946         UNOP_AUX_item *action_ptr = arg_buf;
1947 
1948         argi++; /* reserve slot for first action word */
1949 
1950         switch (action) {
1951         case MDEREF_HV_gvsv_vivify_rv2hv_helem:
1952         case MDEREF_HV_gvhv_helem:
1953             next_is_hash = TRUE;
1954             /* FALLTHROUGH */
1955         case MDEREF_AV_gvsv_vivify_rv2av_aelem:
1956         case MDEREF_AV_gvav_aelem:
1957             if (pass) {
1958 #ifdef USE_ITHREADS
1959                 arg_buf[argi].pad_offset = cPADOPx(start)->op_padix;
1960                 /* stop it being swiped when nulled */
1961                 cPADOPx(start)->op_padix = 0;
1962 #else
1963                 arg_buf[argi].sv = cSVOPx(start)->op_sv;
1964                 cSVOPx(start)->op_sv = NULL;
1965 #endif
1966             }
1967             argi++;
1968             break;
1969 
1970         case MDEREF_HV_padhv_helem:
1971         case MDEREF_HV_padsv_vivify_rv2hv_helem:
1972             next_is_hash = TRUE;
1973             /* FALLTHROUGH */
1974         case MDEREF_AV_padav_aelem:
1975         case MDEREF_AV_padsv_vivify_rv2av_aelem:
1976             if (pass) {
1977                 arg_buf[argi].pad_offset = start->op_targ;
1978                 /* we skip setting op_targ = 0 for now, since the intact
1979                  * OP_PADXV is needed by check_hash_fields_and_hekify */
1980                 reset_start_targ = TRUE;
1981             }
1982             argi++;
1983             break;
1984 
1985         case MDEREF_HV_pop_rv2hv_helem:
1986             next_is_hash = TRUE;
1987             /* FALLTHROUGH */
1988         case MDEREF_AV_pop_rv2av_aelem:
1989             break;
1990 
1991         default:
1992             NOT_REACHED; /* NOTREACHED */
1993             return;
1994         }
1995 
1996         while (!is_last) {
1997             /* look for another (rv2av/hv; get index;
1998              * aelem/helem/exists/delele) sequence */
1999 
2000             OP *kid;
2001             bool is_deref;
2002             bool ok;
2003             UV index_type = MDEREF_INDEX_none;
2004 
2005             if (action_count) {
2006                 /* if this is not the first lookup, consume the rv2av/hv  */
2007 
2008                 /* for N levels of aggregate lookup, we normally expect
2009                  * that the first N-1 [ah]elem ops will be flagged as
2010                  * /DEREF (so they autovivify if necessary), and the last
2011                  * lookup op not to be.
2012                  * For other things (like @{$h{k1}{k2}}) extra scope or
2013                  * leave ops can appear, so abandon the effort in that
2014                  * case */
2015                 if (o->op_type != OP_RV2AV && o->op_type != OP_RV2HV)
2016                     return;
2017 
2018                 /* rv2av or rv2hv sKR/1 */
2019 
2020                 ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2021                                             |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2022                 if (o->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2023                     return;
2024 
2025                 /* at this point, we wouldn't expect any of these
2026                  * possible private flags:
2027                  * OPpMAYBE_LVSUB, OPpOUR_INTRO, OPpLVAL_INTRO
2028                  * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL (rv2hv only)
2029                  */
2030                 ASSUME(!(o->op_private &
2031                     ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING)));
2032 
2033                 hints = (o->op_private & OPpHINT_STRICT_REFS);
2034 
2035                 /* make sure the type of the previous /DEREF matches the
2036                  * type of the next lookup */
2037                 ASSUME(o->op_type == (next_is_hash ? OP_RV2HV : OP_RV2AV));
2038                 top_op = o;
2039 
2040                 action = next_is_hash
2041                             ? MDEREF_HV_vivify_rv2hv_helem
2042                             : MDEREF_AV_vivify_rv2av_aelem;
2043                 o = o->op_next;
2044             }
2045 
2046             /* if this is the second pass, and we're at the depth where
2047              * previously we encountered a non-simple index expression,
2048              * stop processing the index at this point */
2049             if (action_count != index_skip) {
2050 
2051                 /* look for one or more simple ops that return an array
2052                  * index or hash key */
2053 
2054                 switch (o->op_type) {
2055                 case OP_PADSV:
2056                     /* it may be a lexical var index */
2057                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS
2058                                             |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2059                     ASSUME(!(o->op_private &
2060                             ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2061 
2062                     if (   OP_GIMME(o,0) == G_SCALAR
2063                         && !(o->op_flags & (OPf_REF|OPf_MOD))
2064                         && o->op_private == 0)
2065                     {
2066                         if (pass)
2067                             arg_buf[argi].pad_offset = o->op_targ;
2068                         argi++;
2069                         index_type = MDEREF_INDEX_padsv;
2070                         o = o->op_next;
2071                     }
2072                     break;
2073 
2074                 case OP_CONST:
2075                     if (next_is_hash) {
2076                         /* it's a constant hash index */
2077                         if (!(SvFLAGS(cSVOPo_sv) & (SVf_IOK|SVf_NOK|SVf_POK)))
2078                             /* "use constant foo => FOO; $h{+foo}" for
2079                              * some weird FOO, can leave you with constants
2080                              * that aren't simple strings. It's not worth
2081                              * the extra hassle for those edge cases */
2082                             break;
2083 
2084                         {
2085                             UNOP *rop = NULL;
2086                             OP * helem_op = o->op_next;
2087 
2088                             ASSUME(   helem_op->op_type == OP_HELEM
2089                                    || helem_op->op_type == OP_NULL
2090                                    || pass == 0);
2091                             if (helem_op->op_type == OP_HELEM) {
2092                                 rop = cUNOPx(cBINOPx(helem_op)->op_first);
2093                                 if (   helem_op->op_private & OPpLVAL_INTRO
2094                                     || rop->op_type != OP_RV2HV
2095                                 )
2096                                     rop = NULL;
2097                             }
2098                             /* on first pass just check; on second pass
2099                              * hekify */
2100                             check_hash_fields_and_hekify(rop, cSVOPo, pass);
2101                         }
2102 
2103                         if (pass) {
2104 #ifdef USE_ITHREADS
2105                             /* Relocate sv to the pad for thread safety */
2106                             op_relocate_sv(&cSVOPo->op_sv, &o->op_targ);
2107                             arg_buf[argi].pad_offset = o->op_targ;
2108                             o->op_targ = 0;
2109 #else
2110                             arg_buf[argi].sv = cSVOPx_sv(o);
2111 #endif
2112                         }
2113                     }
2114                     else {
2115                         /* it's a constant array index */
2116                         IV iv;
2117                         SV *ix_sv = cSVOPo->op_sv;
2118                         if (!SvIOK(ix_sv))
2119                             break;
2120                         iv = SvIV(ix_sv);
2121 
2122                         if (   action_count == 0
2123                             && iv >= -128
2124                             && iv <= 127
2125                             && (   action == MDEREF_AV_padav_aelem
2126                                 || action == MDEREF_AV_gvav_aelem)
2127                         )
2128                             maybe_aelemfast = TRUE;
2129 
2130                         if (pass) {
2131                             arg_buf[argi].iv = iv;
2132                             SvREFCNT_dec_NN(cSVOPo->op_sv);
2133                         }
2134                     }
2135                     if (pass)
2136                         /* we've taken ownership of the SV */
2137                         cSVOPo->op_sv = NULL;
2138                     argi++;
2139                     index_type = MDEREF_INDEX_const;
2140                     o = o->op_next;
2141                     break;
2142 
2143                 case OP_GV:
2144                     /* it may be a package var index */
2145 
2146                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_PARENS|OPf_SPECIAL)));
2147                     ASSUME(!(o->op_private & ~(OPpEARLY_CV)));
2148                     if (  (o->op_flags & ~(OPf_PARENS|OPf_SPECIAL)) != OPf_WANT_SCALAR
2149                         || o->op_private != 0
2150                     )
2151                         break;
2152 
2153                     kid = o->op_next;
2154                     if (kid->op_type != OP_RV2SV)
2155                         break;
2156 
2157                     ASSUME(!(kid->op_flags &
2158                             ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_REF
2159                              |OPf_SPECIAL|OPf_PARENS)));
2160                     ASSUME(!(kid->op_private &
2161                                     ~(OPpARG1_MASK
2162                                      |OPpHINT_STRICT_REFS|OPpOUR_INTRO
2163                                      |OPpDEREF|OPpLVAL_INTRO)));
2164                     if(   (kid->op_flags &~ OPf_PARENS)
2165                             != (OPf_WANT_SCALAR|OPf_KIDS)
2166                        || (kid->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS))
2167                     )
2168                         break;
2169 
2170                     if (pass) {
2171 #ifdef USE_ITHREADS
2172                         arg_buf[argi].pad_offset = cPADOPx(o)->op_padix;
2173                         /* stop it being swiped when nulled */
2174                         cPADOPx(o)->op_padix = 0;
2175 #else
2176                         arg_buf[argi].sv = cSVOPx(o)->op_sv;
2177                         cSVOPo->op_sv = NULL;
2178 #endif
2179                     }
2180                     argi++;
2181                     index_type = MDEREF_INDEX_gvsv;
2182                     o = kid->op_next;
2183                     break;
2184 
2185                 } /* switch */
2186             } /* action_count != index_skip */
2187 
2188             action |= index_type;
2189 
2190 
2191             /* at this point we have either:
2192              *   * detected what looks like a simple index expression,
2193              *     and expect the next op to be an [ah]elem, or
2194              *     an nulled  [ah]elem followed by a delete or exists;
2195              *  * found a more complex expression, so something other
2196              *    than the above follows.
2197              */
2198 
2199             /* possibly an optimised away [ah]elem (where op_next is
2200              * exists or delete) */
2201             if (o->op_type == OP_NULL)
2202                 o = o->op_next;
2203 
2204             /* at this point we're looking for an OP_AELEM, OP_HELEM,
2205              * OP_EXISTS or OP_DELETE */
2206 
2207             /* if a custom array/hash access checker is in scope,
2208              * abandon optimisation attempt */
2209             if (  (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2210                && PL_check[o->op_type] != Perl_ck_null)
2211                 return;
2212             /* similarly for customised exists and delete */
2213             if (  (o->op_type == OP_EXISTS)
2214                && PL_check[o->op_type] != Perl_ck_exists)
2215                 return;
2216             if (  (o->op_type == OP_DELETE)
2217                && PL_check[o->op_type] != Perl_ck_delete)
2218                 return;
2219 
2220             if (   o->op_type != OP_AELEM
2221                 || (o->op_private &
2222                       (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB))
2223                 )
2224                 maybe_aelemfast = FALSE;
2225 
2226             /* look for aelem/helem/exists/delete. If it's not the last elem
2227              * lookup, it *must* have OPpDEREF_AV/HV, but not many other
2228              * flags; if it's the last, then it mustn't have
2229              * OPpDEREF_AV/HV, but may have lots of other flags, like
2230              * OPpLVAL_INTRO etc
2231              */
2232 
2233             if (   index_type == MDEREF_INDEX_none
2234                 || (   o->op_type != OP_AELEM  && o->op_type != OP_HELEM
2235                     && o->op_type != OP_EXISTS && o->op_type != OP_DELETE)
2236             )
2237                 ok = FALSE;
2238             else {
2239                 /* we have aelem/helem/exists/delete with valid simple index */
2240 
2241                 is_deref =    (o->op_type == OP_AELEM || o->op_type == OP_HELEM)
2242                            && (   (o->op_private & OPpDEREF) == OPpDEREF_AV
2243                                || (o->op_private & OPpDEREF) == OPpDEREF_HV);
2244 
2245                 /* This doesn't make much sense but is legal:
2246                  *    @{ local $x[0][0] } = 1
2247                  * Since scope exit will undo the autovivification,
2248                  * don't bother in the first place. The OP_LEAVE
2249                  * assertion is in case there are other cases of both
2250                  * OPpLVAL_INTRO and OPpDEREF which don't include a scope
2251                  * exit that would undo the local - in which case this
2252                  * block of code would need rethinking.
2253                  */
2254                 if (is_deref && (o->op_private & OPpLVAL_INTRO)) {
2255 #ifdef DEBUGGING
2256                     OP *n = o->op_next;
2257                     while (n && (  n->op_type == OP_NULL
2258                                 || n->op_type == OP_LIST
2259                                 || n->op_type == OP_SCALAR))
2260                         n = n->op_next;
2261                     assert(n && n->op_type == OP_LEAVE);
2262 #endif
2263                     o->op_private &= ~OPpDEREF;
2264                     is_deref = FALSE;
2265                 }
2266 
2267                 if (is_deref) {
2268                     ASSUME(!(o->op_flags &
2269                                  ~(OPf_WANT|OPf_KIDS|OPf_MOD|OPf_PARENS)));
2270                     ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpDEREF)));
2271 
2272                     ok =    (o->op_flags &~ OPf_PARENS)
2273                                == (OPf_WANT_SCALAR|OPf_KIDS|OPf_MOD)
2274                          && !(o->op_private & ~(OPpDEREF|OPpARG2_MASK));
2275                 }
2276                 else if (o->op_type == OP_EXISTS) {
2277                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2278                                 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2279                     ASSUME(!(o->op_private & ~(OPpARG1_MASK|OPpEXISTS_SUB)));
2280                     ok =  !(o->op_private & ~OPpARG1_MASK);
2281                 }
2282                 else if (o->op_type == OP_DELETE) {
2283                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2284                                 |OPf_REF|OPf_MOD|OPf_SPECIAL)));
2285                     ASSUME(!(o->op_private &
2286                                     ~(OPpARG1_MASK|OPpSLICE|OPpLVAL_INTRO)));
2287                     /* don't handle slices or 'local delete'; the latter
2288                      * is fairly rare, and has a complex runtime */
2289                     ok =  !(o->op_private & ~OPpARG1_MASK);
2290                     if (OP_TYPE_IS_OR_WAS(cUNOPo->op_first, OP_AELEM))
2291                         /* skip handling run-tome error */
2292                         ok = (ok && cBOOL(o->op_flags & OPf_SPECIAL));
2293                 }
2294                 else {
2295                     ASSUME(o->op_type == OP_AELEM || o->op_type == OP_HELEM);
2296                     ASSUME(!(o->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_MOD
2297                                             |OPf_PARENS|OPf_REF|OPf_SPECIAL)));
2298                     ASSUME(!(o->op_private & ~(OPpARG2_MASK|OPpMAYBE_LVSUB
2299                                     |OPpLVAL_DEFER|OPpDEREF|OPpLVAL_INTRO)));
2300                     ok = (o->op_private & OPpDEREF) != OPpDEREF_SV;
2301                 }
2302             }
2303 
2304             if (ok) {
2305                 if (!first_elem_op)
2306                     first_elem_op = o;
2307                 top_op = o;
2308                 if (is_deref) {
2309                     next_is_hash = cBOOL((o->op_private & OPpDEREF) == OPpDEREF_HV);
2310                     o = o->op_next;
2311                 }
2312                 else {
2313                     is_last = TRUE;
2314                     action |= MDEREF_FLAG_last;
2315                 }
2316             }
2317             else {
2318                 /* at this point we have something that started
2319                  * promisingly enough (with rv2av or whatever), but failed
2320                  * to find a simple index followed by an
2321                  * aelem/helem/exists/delete. If this is the first action,
2322                  * give up; but if we've already seen at least one
2323                  * aelem/helem, then keep them and add a new action with
2324                  * MDEREF_INDEX_none, which causes it to do the vivify
2325                  * from the end of the previous lookup, and do the deref,
2326                  * but stop at that point. So $a[0][expr] will do one
2327                  * av_fetch, vivify and deref, then continue executing at
2328                  * expr */
2329                 if (!action_count)
2330                     return;
2331                 is_last = TRUE;
2332                 index_skip = action_count;
2333                 action |= MDEREF_FLAG_last;
2334                 if (index_type != MDEREF_INDEX_none)
2335                     argi--;
2336             }
2337 
2338             action_word |= (action << (action_ix * MDEREF_SHIFT));
2339             action_ix++;
2340             action_count++;
2341             /* if there's no space for the next action, reserve a new slot
2342              * for it *before* we start adding args for that action */
2343             if ((action_ix + 1) * MDEREF_SHIFT > UVSIZE*8) {
2344                 if (pass) {
2345                     action_ptr->uv = action_word;
2346                     action_ptr = arg_buf + argi;
2347                 }
2348                 action_word = 0;
2349                 argi++;
2350                 action_ix = 0;
2351             }
2352         } /* while !is_last */
2353 
2354         /* success! */
2355 
2356         if (!action_ix)
2357             /* slot reserved for next action word not now needed */
2358             argi--;
2359         else if (pass)
2360             action_ptr->uv = action_word;
2361 
2362         if (pass) {
2363             OP *mderef;
2364             OP *p, *q;
2365 
2366             mderef = newUNOP_AUX(OP_MULTIDEREF, 0, NULL, arg_buf);
2367             if (index_skip == -1) {
2368                 mderef->op_flags = o->op_flags
2369                         & (OPf_WANT|OPf_MOD|(next_is_hash ? OPf_SPECIAL : 0));
2370                 if (o->op_type == OP_EXISTS)
2371                     mderef->op_private = OPpMULTIDEREF_EXISTS;
2372                 else if (o->op_type == OP_DELETE)
2373                     mderef->op_private = OPpMULTIDEREF_DELETE;
2374                 else
2375                     mderef->op_private = o->op_private
2376                         & (OPpMAYBE_LVSUB|OPpLVAL_DEFER|OPpLVAL_INTRO);
2377             }
2378             /* accumulate strictness from every level (although I don't think
2379              * they can actually vary) */
2380             mderef->op_private |= hints;
2381 
2382             /* integrate the new multideref op into the optree and the
2383              * op_next chain.
2384              *
2385              * In general an op like aelem or helem has two child
2386              * sub-trees: the aggregate expression (a_expr) and the
2387              * index expression (i_expr):
2388              *
2389              *     aelem
2390              *       |
2391              *     a_expr - i_expr
2392              *
2393              * The a_expr returns an AV or HV, while the i-expr returns an
2394              * index. In general a multideref replaces most or all of a
2395              * multi-level tree, e.g.
2396              *
2397              *     exists
2398              *       |
2399              *     ex-aelem
2400              *       |
2401              *     rv2av  - i_expr1
2402              *       |
2403              *     helem
2404              *       |
2405              *     rv2hv  - i_expr2
2406              *       |
2407              *     aelem
2408              *       |
2409              *     a_expr - i_expr3
2410              *
2411              * With multideref, all the i_exprs will be simple vars or
2412              * constants, except that i_expr1 may be arbitrary in the case
2413              * of MDEREF_INDEX_none.
2414              *
2415              * The bottom-most a_expr will be either:
2416              *   1) a simple var (so padXv or gv+rv2Xv);
2417              *   2) a simple scalar var dereferenced (e.g. $r->[0]):
2418              *      so a simple var with an extra rv2Xv;
2419              *   3) or an arbitrary expression.
2420              *
2421              * 'start', the first op in the execution chain, will point to
2422              *   1),2): the padXv or gv op;
2423              *   3):    the rv2Xv which forms the last op in the a_expr
2424              *          execution chain, and the top-most op in the a_expr
2425              *          subtree.
2426              *
2427              * For all cases, the 'start' node is no longer required,
2428              * but we can't free it since one or more external nodes
2429              * may point to it. E.g. consider
2430              *     $h{foo} = $a ? $b : $c
2431              * Here, both the op_next and op_other branches of the
2432              * cond_expr point to the gv[*h] of the hash expression, so
2433              * we can't free the 'start' op.
2434              *
2435              * For expr->[...], we need to save the subtree containing the
2436              * expression; for the other cases, we just need to save the
2437              * start node.
2438              * So in all cases, we null the start op and keep it around by
2439              * making it the child of the multideref op; for the expr->
2440              * case, the expr will be a subtree of the start node.
2441              *
2442              * So in the simple 1,2 case the  optree above changes to
2443              *
2444              *     ex-exists
2445              *       |
2446              *     multideref
2447              *       |
2448              *     ex-gv (or ex-padxv)
2449              *
2450              *  with the op_next chain being
2451              *
2452              *  -> ex-gv -> multideref -> op-following-ex-exists ->
2453              *
2454              *  In the 3 case, we have
2455              *
2456              *     ex-exists
2457              *       |
2458              *     multideref
2459              *       |
2460              *     ex-rv2xv
2461              *       |
2462              *    rest-of-a_expr
2463              *      subtree
2464              *
2465              *  and
2466              *
2467              *  -> rest-of-a_expr subtree ->
2468              *    ex-rv2xv -> multideref -> op-following-ex-exists ->
2469              *
2470              *
2471              * Where the last i_expr is non-simple (i.e. MDEREF_INDEX_none,
2472              * e.g. $a[0]{foo}[$x+1], the next rv2xv is nulled and the
2473              * multideref attached as the child, e.g.
2474              *
2475              *     exists
2476              *       |
2477              *     ex-aelem
2478              *       |
2479              *     ex-rv2av  - i_expr1
2480              *       |
2481              *     multideref
2482              *       |
2483              *     ex-whatever
2484              *
2485              */
2486 
2487             /* if we free this op, don't free the pad entry */
2488             if (reset_start_targ)
2489                 start->op_targ = 0;
2490 
2491 
2492             /* Cut the bit we need to save out of the tree and attach to
2493              * the multideref op, then free the rest of the tree */
2494 
2495             /* find parent of node to be detached (for use by splice) */
2496             p = first_elem_op;
2497             if (   orig_action == MDEREF_AV_pop_rv2av_aelem
2498                 || orig_action == MDEREF_HV_pop_rv2hv_helem)
2499             {
2500                 /* there is an arbitrary expression preceding us, e.g.
2501                  * expr->[..]? so we need to save the 'expr' subtree */
2502                 if (p->op_type == OP_EXISTS || p->op_type == OP_DELETE)
2503                     p = cUNOPx(p)->op_first;
2504                 ASSUME(   start->op_type == OP_RV2AV
2505                        || start->op_type == OP_RV2HV);
2506             }
2507             else {
2508                 /* either a padXv or rv2Xv+gv, maybe with an ex-Xelem
2509                  * above for exists/delete. */
2510                 while (   (p->op_flags & OPf_KIDS)
2511                        && cUNOPx(p)->op_first != start
2512                 )
2513                     p = cUNOPx(p)->op_first;
2514             }
2515             ASSUME(cUNOPx(p)->op_first == start);
2516 
2517             /* detach from main tree, and re-attach under the multideref */
2518             op_sibling_splice(mderef, NULL, 0,
2519                     op_sibling_splice(p, NULL, 1, NULL));
2520             op_null(start);
2521 
2522             start->op_next = mderef;
2523 
2524             mderef->op_next = index_skip == -1 ? o->op_next : o;
2525 
2526             /* excise and free the original tree, and replace with
2527              * the multideref op */
2528             p = op_sibling_splice(top_op, NULL, -1, mderef);
2529             while (p) {
2530                 q = OpSIBLING(p);
2531                 op_free(p);
2532                 p = q;
2533             }
2534             op_null(top_op);
2535         }
2536         else {
2537             Size_t size = argi;
2538 
2539             if (maybe_aelemfast && action_count == 1)
2540                 return;
2541 
2542             arg_buf = (UNOP_AUX_item*)PerlMemShared_malloc(
2543                                 sizeof(UNOP_AUX_item) * (size + 1));
2544             /* for dumping etc: store the length in a hidden first slot;
2545              * we set the op_aux pointer to the second slot */
2546             arg_buf->uv = size;
2547             arg_buf++;
2548         }
2549     } /* for (pass = ...) */
2550 }
2551 
2552 /* See if the ops following o are such that o will always be executed in
2553  * boolean context: that is, the SV which o pushes onto the stack will
2554  * only ever be consumed by later ops via SvTRUE(sv) or similar.
2555  * If so, set a suitable private flag on o. Normally this will be
2556  * bool_flag; but see below why maybe_flag is needed too.
2557  *
2558  * Typically the two flags you pass will be the generic OPpTRUEBOOL and
2559  * OPpMAYBE_TRUEBOOL, buts it's possible that for some ops those bits may
2560  * already be taken, so you'll have to give that op two different flags.
2561  *
2562  * More explanation of 'maybe_flag' and 'safe_and' parameters.
2563  * The binary logical ops &&, ||, // (plus 'if' and 'unless' which use
2564  * those underlying ops) short-circuit, which means that rather than
2565  * necessarily returning a truth value, they may return the LH argument,
2566  * which may not be boolean. For example in $x = (keys %h || -1), keys
2567  * should return a key count rather than a boolean, even though its
2568  * sort-of being used in boolean context.
2569  *
2570  * So we only consider such logical ops to provide boolean context to
2571  * their LH argument if they themselves are in void or boolean context.
2572  * However, sometimes the context isn't known until run-time. In this
2573  * case the op is marked with the maybe_flag flag it.
2574  *
2575  * Consider the following.
2576  *
2577  *     sub f { ....;  if (%h) { .... } }
2578  *
2579  * This is actually compiled as
2580  *
2581  *     sub f { ....;  %h && do { .... } }
2582  *
2583  * Here we won't know until runtime whether the final statement (and hence
2584  * the &&) is in void context and so is safe to return a boolean value.
2585  * So mark o with maybe_flag rather than the bool_flag.
2586  * Note that there is cost associated with determining context at runtime
2587  * (e.g. a call to block_gimme()), so it may not be worth setting (at
2588  * compile time) and testing (at runtime) maybe_flag if the scalar verses
2589  * boolean costs savings are marginal.
2590  *
2591  * However, we can do slightly better with && (compared to || and //):
2592  * this op only returns its LH argument when that argument is false. In
2593  * this case, as long as the op promises to return a false value which is
2594  * valid in both boolean and scalar contexts, we can mark an op consumed
2595  * by && with bool_flag rather than maybe_flag.
2596  * For example as long as pp_padhv and pp_rv2hv return &PL_sv_zero rather
2597  * than &PL_sv_no for a false result in boolean context, then it's safe. An
2598  * op which promises to handle this case is indicated by setting safe_and
2599  * to true.
2600  */
2601 
2602 static void
S_check_for_bool_cxt(OP * o,bool safe_and,U8 bool_flag,U8 maybe_flag)2603 S_check_for_bool_cxt(OP*o, bool safe_and, U8 bool_flag, U8 maybe_flag)
2604 {
2605     OP *lop;
2606     U8 flag = 0;
2607 
2608     assert((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR);
2609 
2610     /* OPpTARGET_MY and boolean context probably don't mix well.
2611      * If someone finds a valid use case, maybe add an extra flag to this
2612      * function which indicates its safe to do so for this op? */
2613     assert(!(   (PL_opargs[o->op_type] & OA_TARGLEX)
2614              && (o->op_private & OPpTARGET_MY)));
2615 
2616     lop = o->op_next;
2617 
2618     while (lop) {
2619         switch (lop->op_type) {
2620         case OP_NULL:
2621         case OP_SCALAR:
2622             break;
2623 
2624         /* these two consume the stack argument in the scalar case,
2625          * and treat it as a boolean in the non linenumber case */
2626         case OP_FLIP:
2627         case OP_FLOP:
2628             if (   ((lop->op_flags & OPf_WANT) == OPf_WANT_LIST)
2629                 || (lop->op_private & OPpFLIP_LINENUM))
2630             {
2631                 lop = NULL;
2632                 break;
2633             }
2634             /* FALLTHROUGH */
2635         /* these never leave the original value on the stack */
2636         case OP_NOT:
2637         case OP_XOR:
2638         case OP_COND_EXPR:
2639         case OP_GREPWHILE:
2640             flag = bool_flag;
2641             lop = NULL;
2642             break;
2643 
2644         /* OR DOR and AND evaluate their arg as a boolean, but then may
2645          * leave the original scalar value on the stack when following the
2646          * op_next route. If not in void context, we need to ensure
2647          * that whatever follows consumes the arg only in boolean context
2648          * too.
2649          */
2650         case OP_AND:
2651             if (safe_and) {
2652                 flag = bool_flag;
2653                 lop = NULL;
2654                 break;
2655             }
2656             /* FALLTHROUGH */
2657         case OP_OR:
2658         case OP_DOR:
2659             if ((lop->op_flags & OPf_WANT) == OPf_WANT_VOID) {
2660                 flag = bool_flag;
2661                 lop = NULL;
2662             }
2663             else if (!(lop->op_flags & OPf_WANT)) {
2664                 /* unknown context - decide at runtime */
2665                 flag = maybe_flag;
2666                 lop = NULL;
2667             }
2668             break;
2669 
2670         default:
2671             lop = NULL;
2672             break;
2673         }
2674 
2675         if (lop)
2676             lop = lop->op_next;
2677     }
2678 
2679     o->op_private |= flag;
2680 }
2681 
2682 /* mechanism for deferring recursion in rpeep() */
2683 
2684 #define MAX_DEFERRED 4
2685 
2686 #define DEFER(o) \
2687   STMT_START { \
2688     if (defer_ix == (MAX_DEFERRED-1)) { \
2689         OP **defer = defer_queue[defer_base]; \
2690         CALL_RPEEP(*defer); \
2691         op_prune_chain_head(defer); \
2692         defer_base = (defer_base + 1) % MAX_DEFERRED; \
2693         defer_ix--; \
2694     } \
2695     defer_queue[(defer_base + ++defer_ix) % MAX_DEFERRED] = &(o); \
2696   } STMT_END
2697 
2698 #define IS_AND_OP(o)   (o->op_type == OP_AND)
2699 #define IS_OR_OP(o)    (o->op_type == OP_OR)
2700 
2701 /* A peephole optimizer.  We visit the ops in the order they're to execute.
2702  * See the comments at the top of this file for more details about when
2703  * peep() is called */
2704 
2705 void
Perl_rpeep(pTHX_ OP * o)2706 Perl_rpeep(pTHX_ OP *o)
2707 {
2708     OP* oldop = NULL;
2709     OP* oldoldop = NULL;
2710     OP** defer_queue[MAX_DEFERRED] = { NULL }; /* small queue of deferred branches */
2711     int defer_base = 0;
2712     int defer_ix = -1;
2713 
2714     if (!o || o->op_opt)
2715         return;
2716 
2717     assert(o->op_type != OP_FREED);
2718 
2719     ENTER;
2720     SAVEOP();
2721     SAVEVPTR(PL_curcop);
2722     for (;; o = o->op_next) {
2723         if (o && o->op_opt)
2724             o = NULL;
2725         if (!o) {
2726             while (defer_ix >= 0) {
2727                 OP **defer =
2728                         defer_queue[(defer_base + defer_ix--) % MAX_DEFERRED];
2729                 CALL_RPEEP(*defer);
2730                 op_prune_chain_head(defer);
2731             }
2732             break;
2733         }
2734 
2735       redo:
2736 
2737         /* oldoldop -> oldop -> o should be a chain of 3 adjacent ops */
2738         assert(!oldoldop || oldoldop->op_next == oldop);
2739         assert(!oldop    || oldop->op_next    == o);
2740 
2741         /* By default, this op has now been optimised. A couple of cases below
2742            clear this again.  */
2743         o->op_opt = 1;
2744         PL_op = o;
2745 
2746         /* look for a series of 1 or more aggregate derefs, e.g.
2747          *   $a[1]{foo}[$i]{$k}
2748          * and replace with a single OP_MULTIDEREF op.
2749          * Each index must be either a const, or a simple variable,
2750          *
2751          * First, look for likely combinations of starting ops,
2752          * corresponding to (global and lexical variants of)
2753          *     $a[...]   $h{...}
2754          *     $r->[...] $r->{...}
2755          *     (preceding expression)->[...]
2756          *     (preceding expression)->{...}
2757          * and if so, call maybe_multideref() to do a full inspection
2758          * of the op chain and if appropriate, replace with an
2759          * OP_MULTIDEREF
2760          */
2761         {
2762             UV action;
2763             OP *o2 = o;
2764             U8 hints = 0;
2765 
2766             switch (o2->op_type) {
2767             case OP_GV:
2768                 /* $pkg[..]   :   gv[*pkg]
2769                  * $pkg->[...]:   gv[*pkg]; rv2sv sKM/DREFAV */
2770 
2771                 /* Fail if there are new op flag combinations that we're
2772                  * not aware of, rather than:
2773                  *  * silently failing to optimise, or
2774                  *  * silently optimising the flag away.
2775                  * If this ASSUME starts failing, examine what new flag
2776                  * has been added to the op, and decide whether the
2777                  * optimisation should still occur with that flag, then
2778                  * update the code accordingly. This applies to all the
2779                  * other ASSUMEs in the block of code too.
2780                  */
2781                 ASSUME(!(o2->op_flags &
2782                             ~(OPf_WANT|OPf_MOD|OPf_PARENS|OPf_SPECIAL)));
2783                 ASSUME(!(o2->op_private & ~OPpEARLY_CV));
2784 
2785                 o2 = o2->op_next;
2786 
2787                 if (o2->op_type == OP_RV2AV) {
2788                     action = MDEREF_AV_gvav_aelem;
2789                     goto do_deref;
2790                 }
2791 
2792                 if (o2->op_type == OP_RV2HV) {
2793                     action = MDEREF_HV_gvhv_helem;
2794                     goto do_deref;
2795                 }
2796 
2797                 if (o2->op_type != OP_RV2SV)
2798                     break;
2799 
2800                 /* at this point we've seen gv,rv2sv, so the only valid
2801                  * construct left is $pkg->[] or $pkg->{} */
2802 
2803                 ASSUME(!(o2->op_flags & OPf_STACKED));
2804                 if ((o2->op_flags & (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2805                             != (OPf_WANT_SCALAR|OPf_MOD))
2806                     break;
2807 
2808                 ASSUME(!(o2->op_private & ~(OPpARG1_MASK|HINT_STRICT_REFS
2809                                     |OPpOUR_INTRO|OPpDEREF|OPpLVAL_INTRO)));
2810                 if (o2->op_private & (OPpOUR_INTRO|OPpLVAL_INTRO))
2811                     break;
2812                 if (   (o2->op_private & OPpDEREF) != OPpDEREF_AV
2813                     && (o2->op_private & OPpDEREF) != OPpDEREF_HV)
2814                     break;
2815 
2816                 o2 = o2->op_next;
2817                 if (o2->op_type == OP_RV2AV) {
2818                     action = MDEREF_AV_gvsv_vivify_rv2av_aelem;
2819                     goto do_deref;
2820                 }
2821                 if (o2->op_type == OP_RV2HV) {
2822                     action = MDEREF_HV_gvsv_vivify_rv2hv_helem;
2823                     goto do_deref;
2824                 }
2825                 break;
2826 
2827             case OP_PADSV:
2828                 /* $lex->[...]: padsv[$lex] sM/DREFAV */
2829 
2830                 ASSUME(!(o2->op_flags &
2831                     ~(OPf_WANT|OPf_PARENS|OPf_REF|OPf_MOD|OPf_SPECIAL)));
2832                 if ((o2->op_flags &
2833                         (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2834                      != (OPf_WANT_SCALAR|OPf_MOD))
2835                     break;
2836 
2837                 ASSUME(!(o2->op_private &
2838                                 ~(OPpPAD_STATE|OPpDEREF|OPpLVAL_INTRO)));
2839                 /* skip if state or intro, or not a deref */
2840                 if (      o2->op_private != OPpDEREF_AV
2841                        && o2->op_private != OPpDEREF_HV)
2842                     break;
2843 
2844                 o2 = o2->op_next;
2845                 if (o2->op_type == OP_RV2AV) {
2846                     action = MDEREF_AV_padsv_vivify_rv2av_aelem;
2847                     goto do_deref;
2848                 }
2849                 if (o2->op_type == OP_RV2HV) {
2850                     action = MDEREF_HV_padsv_vivify_rv2hv_helem;
2851                     goto do_deref;
2852                 }
2853                 break;
2854 
2855             case OP_PADAV:
2856             case OP_PADHV:
2857                 /*    $lex[..]:  padav[@lex:1,2] sR *
2858                  * or $lex{..}:  padhv[%lex:1,2] sR */
2859                 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_MOD|OPf_PARENS|
2860                                             OPf_REF|OPf_SPECIAL)));
2861                 if ((o2->op_flags &
2862                         (OPf_WANT|OPf_REF|OPf_MOD|OPf_SPECIAL))
2863                      != (OPf_WANT_SCALAR|OPf_REF))
2864                     break;
2865                 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_REF))
2866                     break;
2867                 /* OPf_PARENS isn't currently used in this case;
2868                  * if that changes, let us know! */
2869                 ASSUME(!(o2->op_flags & OPf_PARENS));
2870 
2871                 /* at this point, we wouldn't expect any of the remaining
2872                  * possible private flags:
2873                  * OPpPAD_STATE, OPpLVAL_INTRO, OPpTRUEBOOL,
2874                  * OPpMAYBE_TRUEBOOL, OPpMAYBE_LVSUB
2875                  *
2876                  * OPpSLICEWARNING shouldn't affect runtime
2877                  */
2878                 ASSUME(!(o2->op_private & ~(OPpSLICEWARNING)));
2879 
2880                 action = o2->op_type == OP_PADAV
2881                             ? MDEREF_AV_padav_aelem
2882                             : MDEREF_HV_padhv_helem;
2883                 o2 = o2->op_next;
2884                 S_maybe_multideref(aTHX_ o, o2, action, 0);
2885                 break;
2886 
2887 
2888             case OP_RV2AV:
2889             case OP_RV2HV:
2890                 action = o2->op_type == OP_RV2AV
2891                             ? MDEREF_AV_pop_rv2av_aelem
2892                             : MDEREF_HV_pop_rv2hv_helem;
2893                 /* FALLTHROUGH */
2894             do_deref:
2895                 /* (expr)->[...]:  rv2av sKR/1;
2896                  * (expr)->{...}:  rv2hv sKR/1; */
2897 
2898                 ASSUME(o2->op_type == OP_RV2AV || o2->op_type == OP_RV2HV);
2899 
2900                 ASSUME(!(o2->op_flags & ~(OPf_WANT|OPf_KIDS|OPf_PARENS
2901                                 |OPf_REF|OPf_MOD|OPf_STACKED|OPf_SPECIAL)));
2902                 if (o2->op_flags != (OPf_WANT_SCALAR|OPf_KIDS|OPf_REF))
2903                     break;
2904 
2905                 /* at this point, we wouldn't expect any of these
2906                  * possible private flags:
2907                  * OPpMAYBE_LVSUB, OPpLVAL_INTRO
2908                  * OPpTRUEBOOL, OPpMAYBE_TRUEBOOL, (rv2hv only)
2909                  */
2910                 ASSUME(!(o2->op_private &
2911                     ~(OPpHINT_STRICT_REFS|OPpARG1_MASK|OPpSLICEWARNING
2912                      |OPpOUR_INTRO)));
2913                 hints |= (o2->op_private & OPpHINT_STRICT_REFS);
2914 
2915                 o2 = o2->op_next;
2916 
2917                 S_maybe_multideref(aTHX_ o, o2, action, hints);
2918                 break;
2919 
2920             default:
2921                 break;
2922             }
2923         }
2924 
2925 
2926         switch (o->op_type) {
2927         case OP_DBSTATE:
2928             PL_curcop = ((COP*)o);		/* for warnings */
2929             break;
2930         case OP_NEXTSTATE:
2931             PL_curcop = ((COP*)o);		/* for warnings */
2932 
2933             /* Optimise a "return ..." at the end of a sub to just be "...".
2934              * This saves 2 ops. Before:
2935              * 1  <;> nextstate(main 1 -e:1) v ->2
2936              * 4  <@> return K ->5
2937              * 2    <0> pushmark s ->3
2938              * -    <1> ex-rv2sv sK/1 ->4
2939              * 3      <#> gvsv[*cat] s ->4
2940              *
2941              * After:
2942              * -  <@> return K ->-
2943              * -    <0> pushmark s ->2
2944              * -    <1> ex-rv2sv sK/1 ->-
2945              * 2      <$> gvsv(*cat) s ->3
2946              */
2947             {
2948                 OP *next = o->op_next;
2949                 OP *sibling = OpSIBLING(o);
2950                 if (   OP_TYPE_IS(next, OP_PUSHMARK)
2951                     && OP_TYPE_IS(sibling, OP_RETURN)
2952                     && OP_TYPE_IS(sibling->op_next, OP_LINESEQ)
2953                     && ( OP_TYPE_IS(sibling->op_next->op_next, OP_LEAVESUB)
2954                        ||OP_TYPE_IS(sibling->op_next->op_next,
2955                                     OP_LEAVESUBLV))
2956                     && cUNOPx(sibling)->op_first == next
2957                     && OpHAS_SIBLING(next) && OpSIBLING(next)->op_next
2958                     && next->op_next
2959                 ) {
2960                     /* Look through the PUSHMARK's siblings for one that
2961                      * points to the RETURN */
2962                     OP *top = OpSIBLING(next);
2963                     while (top && top->op_next) {
2964                         if (top->op_next == sibling) {
2965                             top->op_next = sibling->op_next;
2966                             o->op_next = next->op_next;
2967                             break;
2968                         }
2969                         top = OpSIBLING(top);
2970                     }
2971                 }
2972             }
2973 
2974             /* Optimise 'my $x; my $y;' into 'my ($x, $y);'
2975              *
2976              * This latter form is then suitable for conversion into padrange
2977              * later on. Convert:
2978              *
2979              *   nextstate1 -> padop1 -> nextstate2 -> padop2 -> nextstate3
2980              *
2981              * into:
2982              *
2983              *   nextstate1 ->     listop     -> nextstate3
2984              *                 /            \
2985              *         pushmark -> padop1 -> padop2
2986              */
2987             if (o->op_next && (
2988                     o->op_next->op_type == OP_PADSV
2989                  || o->op_next->op_type == OP_PADAV
2990                  || o->op_next->op_type == OP_PADHV
2991                 )
2992                 && !(o->op_next->op_private & ~OPpLVAL_INTRO)
2993                 && o->op_next->op_next && o->op_next->op_next->op_type == OP_NEXTSTATE
2994                 && o->op_next->op_next->op_next && (
2995                     o->op_next->op_next->op_next->op_type == OP_PADSV
2996                  || o->op_next->op_next->op_next->op_type == OP_PADAV
2997                  || o->op_next->op_next->op_next->op_type == OP_PADHV
2998                 )
2999                 && !(o->op_next->op_next->op_next->op_private & ~OPpLVAL_INTRO)
3000                 && o->op_next->op_next->op_next->op_next && o->op_next->op_next->op_next->op_next->op_type == OP_NEXTSTATE
3001                 && (!CopLABEL((COP*)o)) /* Don't mess with labels */
3002                 && (!CopLABEL((COP*)o->op_next->op_next)) /* ... */
3003             ) {
3004                 OP *pad1, *ns2, *pad2, *ns3, *newop, *newpm;
3005 
3006                 pad1 =    o->op_next;
3007                 ns2  = pad1->op_next;
3008                 pad2 =  ns2->op_next;
3009                 ns3  = pad2->op_next;
3010 
3011                 /* we assume here that the op_next chain is the same as
3012                  * the op_sibling chain */
3013                 assert(OpSIBLING(o)    == pad1);
3014                 assert(OpSIBLING(pad1) == ns2);
3015                 assert(OpSIBLING(ns2)  == pad2);
3016                 assert(OpSIBLING(pad2) == ns3);
3017 
3018                 /* excise and delete ns2 */
3019                 op_sibling_splice(NULL, pad1, 1, NULL);
3020                 op_free(ns2);
3021 
3022                 /* excise pad1 and pad2 */
3023                 op_sibling_splice(NULL, o, 2, NULL);
3024 
3025                 /* create new listop, with children consisting of:
3026                  * a new pushmark, pad1, pad2. */
3027                 newop = newLISTOP(OP_LIST, 0, pad1, pad2);
3028                 newop->op_flags |= OPf_PARENS;
3029                 newop->op_flags = (newop->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3030 
3031                 /* insert newop between o and ns3 */
3032                 op_sibling_splice(NULL, o, 0, newop);
3033 
3034                 /*fixup op_next chain */
3035                 newpm = cUNOPx(newop)->op_first; /* pushmark */
3036                 o    ->op_next = newpm;
3037                 newpm->op_next = pad1;
3038                 pad1 ->op_next = pad2;
3039                 pad2 ->op_next = newop; /* listop */
3040                 newop->op_next = ns3;
3041 
3042                 /* Ensure pushmark has this flag if padops do */
3043                 if (pad1->op_flags & OPf_MOD && pad2->op_flags & OPf_MOD) {
3044                     newpm->op_flags |= OPf_MOD;
3045                 }
3046 
3047                 break;
3048             }
3049 
3050             /* Two NEXTSTATEs in a row serve no purpose. Except if they happen
3051                to carry two labels. For now, take the easier option, and skip
3052                this optimisation if the first NEXTSTATE has a label.
3053                Yves asked what about if they have different hints or features?
3054                Tony thinks that as we remove the first of the pair it should
3055                be fine.
3056             */
3057             if (!CopLABEL((COP*)o) && !PERLDB_NOOPT) {
3058                 OP *nextop = o->op_next;
3059                 while (nextop) {
3060                     switch (nextop->op_type) {
3061                         case OP_NULL:
3062                         case OP_SCALAR:
3063                         case OP_LINESEQ:
3064                         case OP_SCOPE:
3065                             nextop = nextop->op_next;
3066                             continue;
3067                     }
3068                     break;
3069                 }
3070 
3071                 if (nextop && (nextop->op_type == OP_NEXTSTATE)) {
3072                     op_null(o);
3073                     if (oldop)
3074                         oldop->op_next = nextop;
3075                     o = nextop;
3076                     /* Skip (old)oldop assignment since the current oldop's
3077                        op_next already points to the next op.  */
3078                     goto redo;
3079                 }
3080             }
3081             break;
3082 
3083         case OP_CONCAT:
3084             if (o->op_next && o->op_next->op_type == OP_STRINGIFY) {
3085                 if (o->op_next->op_private & OPpTARGET_MY) {
3086                     if (o->op_flags & OPf_STACKED) /* chained concats */
3087                         break; /* ignore_optimization */
3088                     else {
3089                         /* assert(PL_opargs[o->op_type] & OA_TARGLEX); */
3090                         o->op_targ = o->op_next->op_targ;
3091                         o->op_next->op_targ = 0;
3092                         o->op_private |= OPpTARGET_MY;
3093                     }
3094                 }
3095                 op_null(o->op_next);
3096             }
3097             break;
3098         case OP_STUB:
3099             if ((o->op_flags & OPf_WANT) != OPf_WANT_LIST) {
3100                 break; /* Scalar stub must produce undef.  List stub is noop */
3101             }
3102             goto nothin;
3103         case OP_NULL:
3104             if (o->op_targ == OP_NEXTSTATE
3105                 || o->op_targ == OP_DBSTATE)
3106             {
3107                 PL_curcop = ((COP*)o);
3108             }
3109             /* XXX: We avoid setting op_seq here to prevent later calls
3110                to rpeep() from mistakenly concluding that optimisation
3111                has already occurred. This doesn't fix the real problem,
3112                though (See 20010220.007 (#5874)). AMS 20010719 */
3113             /* op_seq functionality is now replaced by op_opt */
3114             o->op_opt = 0;
3115             /* FALLTHROUGH */
3116         case OP_SCALAR:
3117         case OP_LINESEQ:
3118         case OP_SCOPE:
3119         nothin:
3120             if (oldop) {
3121                 oldop->op_next = o->op_next;
3122                 o->op_opt = 0;
3123                 continue;
3124             }
3125             break;
3126 
3127         case OP_PUSHMARK:
3128 
3129             /* Given
3130                  5 repeat/DOLIST
3131                  3   ex-list
3132                  1     pushmark
3133                  2     scalar or const
3134                  4   const[0]
3135                convert repeat into a stub with no kids.
3136              */
3137             if (o->op_next->op_type == OP_CONST
3138              || (  o->op_next->op_type == OP_PADSV
3139                 && !(o->op_next->op_private & OPpLVAL_INTRO))
3140              || (  o->op_next->op_type == OP_GV
3141                 && o->op_next->op_next->op_type == OP_RV2SV
3142                 && !(o->op_next->op_next->op_private
3143                         & (OPpLVAL_INTRO|OPpOUR_INTRO))))
3144             {
3145                 const OP *kid = o->op_next->op_next;
3146                 if (o->op_next->op_type == OP_GV)
3147                    kid = kid->op_next;
3148                 /* kid is now the ex-list.  */
3149                 if (kid->op_type == OP_NULL
3150                  && (kid = kid->op_next)->op_type == OP_CONST
3151                     /* kid is now the repeat count.  */
3152                  && kid->op_next->op_type == OP_REPEAT
3153                  && kid->op_next->op_private & OPpREPEAT_DOLIST
3154                  && (kid->op_next->op_flags & OPf_WANT) == OPf_WANT_LIST
3155                  && SvIOK(kSVOP_sv) && SvIVX(kSVOP_sv) == 0
3156                  && oldop)
3157                 {
3158                     o = kid->op_next; /* repeat */
3159                     oldop->op_next = o;
3160                     op_free(cBINOPo->op_first);
3161                     op_free(cBINOPo->op_last );
3162                     o->op_flags &=~ OPf_KIDS;
3163                     /* stub is a baseop; repeat is a binop */
3164                     STATIC_ASSERT_STMT(sizeof(OP) <= sizeof(BINOP));
3165                     OpTYPE_set(o, OP_STUB);
3166                     o->op_private = 0;
3167                     break;
3168                 }
3169             }
3170 
3171             /* Convert a series of PAD ops for my vars plus support into a
3172              * single padrange op. Basically
3173              *
3174              *    pushmark -> pad[ahs]v -> pad[ahs]?v -> ... -> (list) -> rest
3175              *
3176              * becomes, depending on circumstances, one of
3177              *
3178              *    padrange  ----------------------------------> (list) -> rest
3179              *    padrange  --------------------------------------------> rest
3180              *
3181              * where all the pad indexes are sequential and of the same type
3182              * (INTRO or not).
3183              * We convert the pushmark into a padrange op, then skip
3184              * any other pad ops, and possibly some trailing ops.
3185              * Note that we don't null() the skipped ops, to make it
3186              * easier for Deparse to undo this optimisation (and none of
3187              * the skipped ops are holding any resources). It also makes
3188              * it easier for find_uninit_var(), as it can just ignore
3189              * padrange, and examine the original pad ops.
3190              */
3191         {
3192             OP *p;
3193             OP *followop = NULL; /* the op that will follow the padrange op */
3194             U8 count = 0;
3195             U8 intro = 0;
3196             PADOFFSET base = 0; /* init only to stop compiler whining */
3197             bool gvoid = 0;     /* init only to stop compiler whining */
3198             bool defav = 0;  /* seen (...) = @_ */
3199             bool reuse = 0;  /* reuse an existing padrange op */
3200 
3201             /* look for a pushmark -> gv[_] -> rv2av */
3202 
3203             {
3204                 OP *rv2av, *q;
3205                 p = o->op_next;
3206                 if (   p->op_type == OP_GV
3207                     && cGVOPx_gv(p) == PL_defgv
3208                     && (rv2av = p->op_next)
3209                     && rv2av->op_type == OP_RV2AV
3210                     && !(rv2av->op_flags & OPf_REF)
3211                     && !(rv2av->op_private & (OPpLVAL_INTRO|OPpMAYBE_LVSUB))
3212                     && ((rv2av->op_flags & OPf_WANT) == OPf_WANT_LIST)
3213                 ) {
3214                     q = rv2av->op_next;
3215                     if (q->op_type == OP_NULL)
3216                         q = q->op_next;
3217                     if (q->op_type == OP_PUSHMARK) {
3218                         defav = 1;
3219                         p = q;
3220                     }
3221                 }
3222             }
3223             if (!defav) {
3224                 p = o;
3225             }
3226 
3227             /* scan for PAD ops */
3228 
3229             for (p = p->op_next; p; p = p->op_next) {
3230                 if (p->op_type == OP_NULL)
3231                     continue;
3232 
3233                 if ((     p->op_type != OP_PADSV
3234                        && p->op_type != OP_PADAV
3235                        && p->op_type != OP_PADHV
3236                     )
3237                       /* any private flag other than INTRO? e.g. STATE */
3238                    || (p->op_private & ~OPpLVAL_INTRO)
3239                 )
3240                     break;
3241 
3242                 /* let $a[N] potentially be optimised into AELEMFAST_LEX
3243                  * instead */
3244                 if (   p->op_type == OP_PADAV
3245                     && p->op_next
3246                     && p->op_next->op_type == OP_CONST
3247                     && p->op_next->op_next
3248                     && p->op_next->op_next->op_type == OP_AELEM
3249                 )
3250                     break;
3251 
3252                 /* for 1st padop, note what type it is and the range
3253                  * start; for the others, check that it's the same type
3254                  * and that the targs are contiguous */
3255                 if (count == 0) {
3256                     intro = (p->op_private & OPpLVAL_INTRO);
3257                     base = p->op_targ;
3258                     gvoid = OP_GIMME(p,0) == G_VOID;
3259                 }
3260                 else {
3261                     if ((p->op_private & OPpLVAL_INTRO) != intro)
3262                         break;
3263                     /* Note that you'd normally  expect targs to be
3264                      * contiguous in my($a,$b,$c), but that's not the case
3265                      * when external modules start doing things, e.g.
3266                      * Function::Parameters */
3267                     if (p->op_targ != base + count)
3268                         break;
3269                     assert(p->op_targ == base + count);
3270                     /* Either all the padops or none of the padops should
3271                        be in void context.  Since we only do the optimisa-
3272                        tion for av/hv when the aggregate itself is pushed
3273                        on to the stack (one item), there is no need to dis-
3274                        tinguish list from scalar context.  */
3275                     if (gvoid != (OP_GIMME(p,0) == G_VOID))
3276                         break;
3277                 }
3278 
3279                 /* for AV, HV, only when we're not flattening */
3280                 if (   p->op_type != OP_PADSV
3281                     && !gvoid
3282                     && !(p->op_flags & OPf_REF)
3283                 )
3284                     break;
3285 
3286                 if (count >= OPpPADRANGE_COUNTMASK)
3287                     break;
3288 
3289                 /* there's a biggest base we can fit into a
3290                  * SAVEt_CLEARPADRANGE in pp_padrange.
3291                  * (The sizeof() stuff will be constant-folded, and is
3292                  * intended to avoid getting "comparison is always false"
3293                  * compiler warnings. See the comments above
3294                  * MEM_WRAP_CHECK for more explanation on why we do this
3295                  * in a weird way to avoid compiler warnings.)
3296                  */
3297                 if (   intro
3298                     && (8*sizeof(base) >
3299                         8*sizeof(UV)-OPpPADRANGE_COUNTSHIFT-SAVE_TIGHT_SHIFT
3300                         ? (Size_t)base
3301                         : (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3302                         ) >
3303                         (UV_MAX >> (OPpPADRANGE_COUNTSHIFT+SAVE_TIGHT_SHIFT))
3304                 )
3305                     break;
3306 
3307                 /* Success! We've got another valid pad op to optimise away */
3308                 count++;
3309                 followop = p->op_next;
3310             }
3311 
3312             if (count < 1 || (count == 1 && !defav))
3313                 break;
3314 
3315             /* pp_padrange in specifically compile-time void context
3316              * skips pushing a mark and lexicals; in all other contexts
3317              * (including unknown till runtime) it pushes a mark and the
3318              * lexicals. We must be very careful then, that the ops we
3319              * optimise away would have exactly the same effect as the
3320              * padrange.
3321              * In particular in void context, we can only optimise to
3322              * a padrange if we see the complete sequence
3323              *     pushmark, pad*v, ...., list
3324              * which has the net effect of leaving the markstack as it
3325              * was.  Not pushing onto the stack (whereas padsv does touch
3326              * the stack) makes no difference in void context.
3327              */
3328             assert(followop);
3329             if (gvoid) {
3330                 if (followop->op_type == OP_LIST
3331                         && OP_GIMME(followop,0) == G_VOID
3332                    )
3333                 {
3334                     followop = followop->op_next; /* skip OP_LIST */
3335 
3336                     /* consolidate two successive my(...);'s */
3337 
3338                     if (   oldoldop
3339                         && oldoldop->op_type == OP_PADRANGE
3340                         && (oldoldop->op_flags & OPf_WANT) == OPf_WANT_VOID
3341                         && (oldoldop->op_private & OPpLVAL_INTRO) == intro
3342                         && !(oldoldop->op_flags & OPf_SPECIAL)
3343                     ) {
3344                         U8 old_count;
3345                         assert(oldoldop->op_next == oldop);
3346                         assert(   oldop->op_type == OP_NEXTSTATE
3347                                || oldop->op_type == OP_DBSTATE);
3348                         assert(oldop->op_next == o);
3349 
3350                         old_count
3351                             = (oldoldop->op_private & OPpPADRANGE_COUNTMASK);
3352 
3353                        /* Do not assume pad offsets for $c and $d are con-
3354                           tiguous in
3355                             my ($a,$b,$c);
3356                             my ($d,$e,$f);
3357                         */
3358                         if (  oldoldop->op_targ + old_count == base
3359                            && old_count < OPpPADRANGE_COUNTMASK - count) {
3360                             base = oldoldop->op_targ;
3361                             count += old_count;
3362                             reuse = 1;
3363                         }
3364                     }
3365 
3366                     /* if there's any immediately following singleton
3367                      * my var's; then swallow them and the associated
3368                      * nextstates; i.e.
3369                      *    my ($a,$b); my $c; my $d;
3370                      * is treated as
3371                      *    my ($a,$b,$c,$d);
3372                      */
3373 
3374                     while (    ((p = followop->op_next))
3375                             && (  p->op_type == OP_PADSV
3376                                || p->op_type == OP_PADAV
3377                                || p->op_type == OP_PADHV)
3378                             && (p->op_flags & OPf_WANT) == OPf_WANT_VOID
3379                             && (p->op_private & OPpLVAL_INTRO) == intro
3380                             && !(p->op_private & ~OPpLVAL_INTRO)
3381                             && p->op_next
3382                             && (   p->op_next->op_type == OP_NEXTSTATE
3383                                 || p->op_next->op_type == OP_DBSTATE)
3384                             && count < OPpPADRANGE_COUNTMASK
3385                             && base + count == p->op_targ
3386                     ) {
3387                         count++;
3388                         followop = p->op_next;
3389                     }
3390                 }
3391                 else
3392                     break;
3393             }
3394 
3395             if (reuse) {
3396                 assert(oldoldop->op_type == OP_PADRANGE);
3397                 oldoldop->op_next = followop;
3398                 oldoldop->op_private = (intro | count);
3399                 o = oldoldop;
3400                 oldop = NULL;
3401                 oldoldop = NULL;
3402             }
3403             else {
3404                 /* Convert the pushmark into a padrange.
3405                  * To make Deparse easier, we guarantee that a padrange was
3406                  * *always* formerly a pushmark */
3407                 assert(o->op_type == OP_PUSHMARK);
3408                 o->op_next = followop;
3409                 OpTYPE_set(o, OP_PADRANGE);
3410                 o->op_targ = base;
3411                 /* bit 7: INTRO; bit 6..0: count */
3412                 o->op_private = (intro | count);
3413                 o->op_flags = ((o->op_flags & ~(OPf_WANT|OPf_SPECIAL))
3414                               | gvoid * OPf_WANT_VOID
3415                               | (defav ? OPf_SPECIAL : 0));
3416             }
3417             break;
3418         }
3419 
3420         case OP_RV2AV:
3421             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3422                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3423             break;
3424 
3425         case OP_RV2HV:
3426         case OP_PADHV:
3427             /*'keys %h' in void or scalar context: skip the OP_KEYS
3428              * and perform the functionality directly in the RV2HV/PADHV
3429              * op
3430              */
3431             if (o->op_flags & OPf_REF) {
3432                 OP *k = o->op_next;
3433                 U8 want = (k->op_flags & OPf_WANT);
3434                 if (   k
3435                     && k->op_type == OP_KEYS
3436                     && (   want == OPf_WANT_VOID
3437                         || want == OPf_WANT_SCALAR)
3438                     && !(k->op_private & OPpMAYBE_LVSUB)
3439                     && !(k->op_flags & OPf_MOD)
3440                 ) {
3441                     o->op_next     = k->op_next;
3442                     o->op_flags   &= ~(OPf_REF|OPf_WANT);
3443                     o->op_flags   |= want;
3444                     o->op_private |= (o->op_type == OP_PADHV ?
3445                                       OPpPADHV_ISKEYS : OPpRV2HV_ISKEYS);
3446                     /* for keys(%lex), hold onto the OP_KEYS's targ
3447                      * since padhv doesn't have its own targ to return
3448                      * an int with */
3449                     if (!(o->op_type ==OP_PADHV && want == OPf_WANT_SCALAR))
3450                         op_null(k);
3451                 }
3452             }
3453 
3454             /* see if %h is used in boolean context */
3455             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3456                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
3457 
3458 
3459             if (o->op_type != OP_PADHV)
3460                 break;
3461             /* FALLTHROUGH */
3462         case OP_PADAV:
3463             if (   o->op_type == OP_PADAV
3464                 && (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
3465             )
3466                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3467             /* FALLTHROUGH */
3468         case OP_PADSV:
3469             /* Skip over state($x) in void context.  */
3470             if (oldop && o->op_private == (OPpPAD_STATE|OPpLVAL_INTRO)
3471              && (o->op_flags & OPf_WANT) == OPf_WANT_VOID)
3472             {
3473                 oldop->op_next = o->op_next;
3474                 goto redo_nextstate;
3475             }
3476             if (o->op_type != OP_PADAV)
3477                 break;
3478             /* FALLTHROUGH */
3479         case OP_GV:
3480             if (o->op_type == OP_PADAV || o->op_next->op_type == OP_RV2AV) {
3481                 OP* const pop = (o->op_type == OP_PADAV) ?
3482                             o->op_next : o->op_next->op_next;
3483                 IV i;
3484                 if (pop && pop->op_type == OP_CONST &&
3485                     ((PL_op = pop->op_next)) &&
3486                     pop->op_next->op_type == OP_AELEM &&
3487                     !(pop->op_next->op_private &
3488                       (OPpLVAL_INTRO|OPpLVAL_DEFER|OPpDEREF|OPpMAYBE_LVSUB)) &&
3489                     (i = SvIV(cSVOPx(pop)->op_sv)) >= -128 && i <= 127)
3490                 {
3491                     GV *gv;
3492                     if (cSVOPx(pop)->op_private & OPpCONST_STRICT)
3493                         no_bareword_allowed(pop);
3494                     if (o->op_type == OP_GV)
3495                         op_null(o->op_next);
3496                     op_null(pop->op_next);
3497                     op_null(pop);
3498                     o->op_flags |= pop->op_next->op_flags & OPf_MOD;
3499                     o->op_next = pop->op_next->op_next;
3500                     o->op_ppaddr = PL_ppaddr[OP_AELEMFAST];
3501                     o->op_private = (U8)i;
3502                     if (o->op_type == OP_GV) {
3503                         gv = cGVOPo_gv;
3504                         GvAVn(gv);
3505                         o->op_type = OP_AELEMFAST;
3506                     }
3507                     else
3508                         o->op_type = OP_AELEMFAST_LEX;
3509                 }
3510                 if (o->op_type != OP_GV)
3511                     break;
3512             }
3513 
3514             /* Remove $foo from the op_next chain in void context.  */
3515             if (oldop
3516              && (  o->op_next->op_type == OP_RV2SV
3517                 || o->op_next->op_type == OP_RV2AV
3518                 || o->op_next->op_type == OP_RV2HV  )
3519              && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3520              && !(o->op_next->op_private & OPpLVAL_INTRO))
3521             {
3522                 oldop->op_next = o->op_next->op_next;
3523                 /* Reprocess the previous op if it is a nextstate, to
3524                    allow double-nextstate optimisation.  */
3525               redo_nextstate:
3526                 if (oldop->op_type == OP_NEXTSTATE) {
3527                     oldop->op_opt = 0;
3528                     o = oldop;
3529                     oldop = oldoldop;
3530                     oldoldop = NULL;
3531                     goto redo;
3532                 }
3533                 o = oldop->op_next;
3534                 goto redo;
3535             }
3536             else if (o->op_next->op_type == OP_RV2SV) {
3537                 if (!(o->op_next->op_private & OPpDEREF)) {
3538                     op_null(o->op_next);
3539                     o->op_private |= o->op_next->op_private & (OPpLVAL_INTRO
3540                                                                | OPpOUR_INTRO);
3541                     o->op_next = o->op_next->op_next;
3542                     OpTYPE_set(o, OP_GVSV);
3543                 }
3544             }
3545             else if (o->op_next->op_type == OP_READLINE
3546                     && o->op_next->op_next->op_type == OP_CONCAT
3547                     && (o->op_next->op_next->op_flags & OPf_STACKED))
3548             {
3549                 /* Turn "$a .= <FH>" into an OP_RCATLINE. AMS 20010917 */
3550                 OpTYPE_set(o, OP_RCATLINE);
3551                 o->op_flags |= OPf_STACKED;
3552                 op_null(o->op_next->op_next);
3553                 op_null(o->op_next);
3554             }
3555 
3556             break;
3557 
3558         case OP_NOT:
3559             break;
3560 
3561         case OP_AND:
3562         case OP_OR:
3563         case OP_DOR:
3564         case OP_CMPCHAIN_AND:
3565         case OP_PUSHDEFER:
3566             while (cLOGOP->op_other->op_type == OP_NULL)
3567                 cLOGOP->op_other = cLOGOP->op_other->op_next;
3568             while (o->op_next && (   o->op_type == o->op_next->op_type
3569                                   || o->op_next->op_type == OP_NULL))
3570                 o->op_next = o->op_next->op_next;
3571 
3572             /* If we're an OR and our next is an AND in void context, we'll
3573                follow its op_other on short circuit, same for reverse.
3574                We can't do this with OP_DOR since if it's true, its return
3575                value is the underlying value which must be evaluated
3576                by the next op. */
3577             if (o->op_next &&
3578                 (
3579                     (IS_AND_OP(o) && IS_OR_OP(o->op_next))
3580                  || (IS_OR_OP(o) && IS_AND_OP(o->op_next))
3581                 )
3582                 && (o->op_next->op_flags & OPf_WANT) == OPf_WANT_VOID
3583             ) {
3584                 o->op_next = cLOGOPx(o->op_next)->op_other;
3585             }
3586             DEFER(cLOGOP->op_other);
3587             o->op_opt = 1;
3588             break;
3589 
3590         case OP_GREPWHILE:
3591             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3592                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3593             /* FALLTHROUGH */
3594         case OP_COND_EXPR:
3595         case OP_MAPWHILE:
3596         case OP_ANDASSIGN:
3597         case OP_ORASSIGN:
3598         case OP_DORASSIGN:
3599         case OP_RANGE:
3600         case OP_ONCE:
3601         case OP_ARGDEFELEM:
3602             while (cLOGOP->op_other->op_type == OP_NULL)
3603                 cLOGOP->op_other = cLOGOP->op_other->op_next;
3604             DEFER(cLOGOP->op_other);
3605             break;
3606 
3607         case OP_ENTERLOOP:
3608         case OP_ENTERITER:
3609             while (cLOOP->op_redoop->op_type == OP_NULL)
3610                 cLOOP->op_redoop = cLOOP->op_redoop->op_next;
3611             while (cLOOP->op_nextop->op_type == OP_NULL)
3612                 cLOOP->op_nextop = cLOOP->op_nextop->op_next;
3613             while (cLOOP->op_lastop->op_type == OP_NULL)
3614                 cLOOP->op_lastop = cLOOP->op_lastop->op_next;
3615             /* a while(1) loop doesn't have an op_next that escapes the
3616              * loop, so we have to explicitly follow the op_lastop to
3617              * process the rest of the code */
3618             DEFER(cLOOP->op_lastop);
3619             break;
3620 
3621         case OP_ENTERTRY:
3622             assert(cLOGOPo->op_other->op_type == OP_LEAVETRY);
3623             DEFER(cLOGOPo->op_other);
3624             break;
3625 
3626         case OP_ENTERTRYCATCH:
3627             assert(cLOGOPo->op_other->op_type == OP_CATCH);
3628             /* catch body is the ->op_other of the OP_CATCH */
3629             DEFER(cLOGOPx(cLOGOPo->op_other)->op_other);
3630             break;
3631 
3632         case OP_SUBST:
3633             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
3634                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
3635             assert(!(cPMOP->op_pmflags & PMf_ONCE));
3636             while (cPMOP->op_pmstashstartu.op_pmreplstart &&
3637                    cPMOP->op_pmstashstartu.op_pmreplstart->op_type == OP_NULL)
3638                 cPMOP->op_pmstashstartu.op_pmreplstart
3639                     = cPMOP->op_pmstashstartu.op_pmreplstart->op_next;
3640             DEFER(cPMOP->op_pmstashstartu.op_pmreplstart);
3641             break;
3642 
3643         case OP_SORT: {
3644             OP *oright;
3645 
3646             if (o->op_flags & OPf_SPECIAL) {
3647                 /* first arg is a code block */
3648                 OP * const nullop = OpSIBLING(cLISTOP->op_first);
3649                 OP * kid          = cUNOPx(nullop)->op_first;
3650 
3651                 assert(nullop->op_type == OP_NULL);
3652                 assert(kid->op_type == OP_SCOPE
3653                  || (kid->op_type == OP_NULL && kid->op_targ == OP_LEAVE));
3654                 /* since OP_SORT doesn't have a handy op_other-style
3655                  * field that can point directly to the start of the code
3656                  * block, store it in the otherwise-unused op_next field
3657                  * of the top-level OP_NULL. This will be quicker at
3658                  * run-time, and it will also allow us to remove leading
3659                  * OP_NULLs by just messing with op_nexts without
3660                  * altering the basic op_first/op_sibling layout. */
3661                 kid = kLISTOP->op_first;
3662                 assert(
3663                       (kid->op_type == OP_NULL
3664                       && (  kid->op_targ == OP_NEXTSTATE
3665                          || kid->op_targ == OP_DBSTATE  ))
3666                     || kid->op_type == OP_STUB
3667                     || kid->op_type == OP_ENTER
3668                     || (PL_parser && PL_parser->error_count));
3669                 nullop->op_next = kid->op_next;
3670                 DEFER(nullop->op_next);
3671             }
3672 
3673             /* check that RHS of sort is a single plain array */
3674             oright = cUNOPo->op_first;
3675             if (!oright || oright->op_type != OP_PUSHMARK)
3676                 break;
3677 
3678             if (o->op_private & OPpSORT_INPLACE)
3679                 break;
3680 
3681             /* reverse sort ... can be optimised.  */
3682             if (!OpHAS_SIBLING(cUNOPo)) {
3683                 /* Nothing follows us on the list. */
3684                 OP * const reverse = o->op_next;
3685 
3686                 if (reverse->op_type == OP_REVERSE &&
3687                     (reverse->op_flags & OPf_WANT) == OPf_WANT_LIST) {
3688                     OP * const pushmark = cUNOPx(reverse)->op_first;
3689                     if (pushmark && (pushmark->op_type == OP_PUSHMARK)
3690                         && (OpSIBLING(cUNOPx(pushmark)) == o)) {
3691                         /* reverse -> pushmark -> sort */
3692                         o->op_private |= OPpSORT_REVERSE;
3693                         op_null(reverse);
3694                         pushmark->op_next = oright->op_next;
3695                         op_null(oright);
3696                     }
3697                 }
3698             }
3699 
3700             break;
3701         }
3702 
3703         case OP_REVERSE: {
3704             OP *ourmark, *theirmark, *ourlast, *iter, *expushmark, *rv2av;
3705             OP *gvop = NULL;
3706             LISTOP *enter, *exlist;
3707 
3708             if (o->op_private & OPpSORT_INPLACE)
3709                 break;
3710 
3711             enter = cLISTOPx(o->op_next);
3712             if (!enter)
3713                 break;
3714             if (enter->op_type == OP_NULL) {
3715                 enter = cLISTOPx(enter->op_next);
3716                 if (!enter)
3717                     break;
3718             }
3719             /* for $a (...) will have OP_GV then OP_RV2GV here.
3720                for (...) just has an OP_GV.  */
3721             if (enter->op_type == OP_GV) {
3722                 gvop = (OP *) enter;
3723                 enter = cLISTOPx(enter->op_next);
3724                 if (!enter)
3725                     break;
3726                 if (enter->op_type == OP_RV2GV) {
3727                   enter = cLISTOPx(enter->op_next);
3728                   if (!enter)
3729                     break;
3730                 }
3731             }
3732 
3733             if (enter->op_type != OP_ENTERITER)
3734                 break;
3735 
3736             iter = enter->op_next;
3737             if (!iter || iter->op_type != OP_ITER)
3738                 break;
3739 
3740             expushmark = enter->op_first;
3741             if (!expushmark || expushmark->op_type != OP_NULL
3742                 || expushmark->op_targ != OP_PUSHMARK)
3743                 break;
3744 
3745             exlist = cLISTOPx(OpSIBLING(expushmark));
3746             if (!exlist || exlist->op_type != OP_NULL
3747                 || exlist->op_targ != OP_LIST)
3748                 break;
3749 
3750             if (exlist->op_last != o) {
3751                 /* Mmm. Was expecting to point back to this op.  */
3752                 break;
3753             }
3754             theirmark = exlist->op_first;
3755             if (!theirmark || theirmark->op_type != OP_PUSHMARK)
3756                 break;
3757 
3758             if (OpSIBLING(theirmark) != o) {
3759                 /* There's something between the mark and the reverse, eg
3760                    for (1, reverse (...))
3761                    so no go.  */
3762                 break;
3763             }
3764 
3765             ourmark = cLISTOPo->op_first;
3766             if (!ourmark || ourmark->op_type != OP_PUSHMARK)
3767                 break;
3768 
3769             ourlast = cLISTOPo->op_last;
3770             if (!ourlast || ourlast->op_next != o)
3771                 break;
3772 
3773             rv2av = OpSIBLING(ourmark);
3774             if (rv2av && rv2av->op_type == OP_RV2AV && !OpHAS_SIBLING(rv2av)
3775                 && rv2av->op_flags == (OPf_WANT_LIST | OPf_KIDS)) {
3776                 /* We're just reversing a single array.  */
3777                 rv2av->op_flags = OPf_WANT_SCALAR | OPf_KIDS | OPf_REF;
3778                 enter->op_flags |= OPf_STACKED;
3779             }
3780 
3781             /* We don't have control over who points to theirmark, so sacrifice
3782                ours.  */
3783             theirmark->op_next = ourmark->op_next;
3784             theirmark->op_flags = ourmark->op_flags;
3785             ourlast->op_next = gvop ? gvop : (OP *) enter;
3786             op_null(ourmark);
3787             op_null(o);
3788             enter->op_private |= OPpITER_REVERSED;
3789             iter->op_private |= OPpITER_REVERSED;
3790 
3791             oldoldop = NULL;
3792             oldop    = ourlast;
3793             o        = oldop->op_next;
3794             goto redo;
3795             NOT_REACHED; /* NOTREACHED */
3796             break;
3797         }
3798 
3799         case OP_UNDEF:
3800             if ((o->op_flags & OPf_KIDS) &&
3801                 (cUNOPx(o)->op_first->op_type == OP_PADSV)) {
3802 
3803                 /* Convert:
3804                  *     undef
3805                  *       padsv[$x]
3806                  * to:
3807                  *     undef[$x]
3808                  */
3809 
3810                 OP * padsv = cUNOPx(o)->op_first;
3811                 o->op_private = OPpTARGET_MY |
3812                         (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3813                 o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3814                 op_null(padsv);
3815                 /* Optimizer does NOT seem to fix up the padsv op_next ptr */
3816                 if (oldoldop)
3817                     oldoldop->op_next = o;
3818                 oldop = oldoldop;
3819                 oldoldop = NULL;
3820 
3821             } else if (o->op_next->op_type == OP_PADSV) {
3822                 OP * padsv = o->op_next;
3823                 OP * sassign = (padsv->op_next &&
3824                         padsv->op_next->op_type == OP_SASSIGN) ?
3825                         padsv->op_next : NULL;
3826                 if (sassign && cBINOPx(sassign)->op_first == o) {
3827                     /* Convert:
3828                      *     sassign
3829                      *       undef
3830                      *       padsv[$x]
3831                      * to:
3832                      *     undef[$x]
3833                      * NOTE: undef does not have the "T" flag set in
3834                      *       regen/opcodes, as this would cause
3835                      *       S_maybe_targlex to do the optimization.
3836                      *       Seems easier to keep it all here, rather
3837                      *       than have an undef-specific branch in
3838                      *       S_maybe_targlex just to add the
3839                      *       OPpUNDEF_KEEP_PV flag.
3840                      */
3841                      o->op_private = OPpTARGET_MY | OPpUNDEF_KEEP_PV |
3842                          (padsv->op_private & (OPpLVAL_INTRO|OPpPAD_STATE));
3843                      o->op_targ = padsv->op_targ; padsv->op_targ = 0;
3844                      op_null(padsv);
3845                      op_null(sassign);
3846                      /* Optimizer DOES seems to fix up the op_next ptrs */
3847                 }
3848             }
3849             break;
3850 
3851         case OP_QR:
3852         case OP_MATCH:
3853             if (!(cPMOP->op_pmflags & PMf_ONCE)) {
3854                 assert (!cPMOP->op_pmstashstartu.op_pmreplstart);
3855             }
3856             break;
3857 
3858         case OP_RUNCV:
3859             if (!(o->op_private & OPpOFFBYONE) && !CvCLONE(PL_compcv)
3860              && (!CvANON(PL_compcv) || (!PL_cv_has_eval && !PL_perldb)))
3861             {
3862                 SV *sv;
3863                 if (CvEVAL(PL_compcv)) sv = &PL_sv_undef;
3864                 else {
3865                     sv = newRV((SV *)PL_compcv);
3866                     sv_rvweaken(sv);
3867                     SvREADONLY_on(sv);
3868                 }
3869                 OpTYPE_set(o, OP_CONST);
3870                 o->op_flags |= OPf_SPECIAL;
3871                 cSVOPo->op_sv = sv;
3872             }
3873             break;
3874 
3875         case OP_SASSIGN: {
3876             if (OP_GIMME(o,0) == G_VOID
3877              || (  o->op_next->op_type == OP_LINESEQ
3878                 && (  o->op_next->op_next->op_type == OP_LEAVESUB
3879                    || (  o->op_next->op_next->op_type == OP_RETURN
3880                       && !CvLVALUE(PL_compcv)))))
3881             {
3882                 OP *right = cBINOP->op_first;
3883                 if (right) {
3884                     /*   sassign
3885                     *      RIGHT
3886                     *      substr
3887                     *         pushmark
3888                     *         arg1
3889                     *         arg2
3890                     *         ...
3891                     * becomes
3892                     *
3893                     *  ex-sassign
3894                     *     substr
3895                     *        pushmark
3896                     *        RIGHT
3897                     *        arg1
3898                     *        arg2
3899                     *        ...
3900                     */
3901                     OP *left = OpSIBLING(right);
3902                     if (left->op_type == OP_SUBSTR
3903                          && (left->op_private & 7) < 4) {
3904                         op_null(o);
3905                         /* cut out right */
3906                         op_sibling_splice(o, NULL, 1, NULL);
3907                         /* and insert it as second child of OP_SUBSTR */
3908                         op_sibling_splice(left, cBINOPx(left)->op_first, 0,
3909                                     right);
3910                         left->op_private |= OPpSUBSTR_REPL_FIRST;
3911                         left->op_flags =
3912                             (o->op_flags & ~OPf_WANT) | OPf_WANT_VOID;
3913                     }
3914                 }
3915             }
3916             OP* rhs = cBINOPx(o)->op_first;
3917             OP* lval = cBINOPx(o)->op_last;
3918 
3919             /* Combine a simple SASSIGN OP with a PADSV lvalue child OP
3920              * into a single OP. */
3921 
3922             /* This optimization covers arbitrarily complicated RHS OP
3923              * trees. Separate optimizations may exist for specific,
3924              * single RHS OPs, such as:
3925              * "my $foo = undef;" or "my $bar = $other_padsv;" */
3926 
3927             if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
3928                  && lval && (lval->op_type == OP_PADSV) &&
3929                 !(lval->op_private & OPpDEREF)
3930                  /* skip if padrange has already gazumped the padsv */
3931                  && (lval == oldop)
3932                  /* Memoize::Once produces a non-standard SASSIGN that
3933                   * doesn't actually point to pp_sassign, has only one
3934                   * child (PADSV), and gets to it via op_other rather
3935                   * than op_next. Don't try to optimize this. */
3936                  && (lval != rhs)
3937                ) {
3938                 /* SASSIGN's bitfield flags, such as op_moresib and
3939                  * op_slabbed, will be carried over unchanged. */
3940                 OpTYPE_set(o, OP_PADSV_STORE);
3941 
3942                 /* Explicitly craft the new OP's op_flags, carrying
3943                  * some bits over from the SASSIGN */
3944                 o->op_flags = (
3945                     OPf_KIDS | OPf_STACKED |
3946                     (o->op_flags & (OPf_WANT|OPf_PARENS))
3947                 );
3948 
3949                 /* Reset op_private flags, taking relevant private flags
3950                  * from the PADSV */
3951                 o->op_private = (lval->op_private &
3952                                 (OPpLVAL_INTRO|OPpPAD_STATE|OPpDEREF));
3953 
3954                 /* Steal the targ from the PADSV */
3955                 o->op_targ = lval->op_targ; lval->op_targ = 0;
3956 
3957                 /* Fixup op_next ptrs */
3958                 assert(oldop->op_type == OP_PADSV);
3959                 /* oldoldop can be arbitrarily deep in the RHS OP tree */
3960                 oldoldop->op_next = o;
3961 
3962                 /* Even when (rhs != oldoldop), rhs might still have a
3963                  * relevant op_next ptr to lval. This is definitely true
3964                  * when rhs is OP_NULL with a LOGOP kid (e.g. orassign).
3965                  * There may be other cases. */
3966                 if (rhs->op_next == lval)
3967                     rhs->op_next = o;
3968 
3969                 /* Now null-out the PADSV */
3970                 op_null(lval);
3971 
3972                 /* NULL the previous op ptrs, so rpeep can continue */
3973                 oldoldop = NULL; oldop = NULL;
3974             }
3975 
3976             /* Combine a simple SASSIGN OP with an AELEMFAST_LEX lvalue
3977              * into a single OP. This optimization covers arbitrarily
3978              * complicated RHS OP trees. */
3979 
3980             if (!(o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
3981                 && (lval->op_type == OP_NULL) && (lval->op_private == 2) &&
3982                 (cBINOPx(lval)->op_first->op_type == OP_AELEMFAST_LEX)
3983                  /* For efficiency, pp_aelemfastlex_store() doesn't push its
3984                   * result onto the stack. For the relatively rare case of
3985                   * the array assignment not in void context, we just do it
3986                   * the old slow way. */
3987                  && OP_GIMME(o,0) == G_VOID
3988             ) {
3989                 OP * lex = cBINOPx(lval)->op_first;
3990                 /* SASSIGN's bitfield flags, such as op_moresib and
3991                  * op_slabbed, will be carried over unchanged. */
3992                 OpTYPE_set(o, OP_AELEMFASTLEX_STORE);
3993 
3994                 /* Explicitly craft the new OP's op_flags, carrying
3995                  * some bits over from the SASSIGN */
3996                 o->op_flags = (
3997                     OPf_KIDS | OPf_STACKED |
3998                     (o->op_flags & (OPf_WANT|OPf_PARENS))
3999                 );
4000 
4001                 /* Copy the AELEMFAST_LEX op->private, which contains
4002                  * the key index. */
4003                 o->op_private = lex->op_private;
4004 
4005                 /* Take the targ from the AELEMFAST_LEX */
4006                 o->op_targ = lex->op_targ; lex->op_targ = 0;
4007 
4008                 assert(oldop->op_type == OP_AELEMFAST_LEX);
4009                 /* oldoldop can be arbitrarily deep in the RHS OP tree */
4010                 oldoldop->op_next = o;
4011 
4012                 /* Even when (rhs != oldoldop), rhs might still have a
4013                  * relevant op_next ptr to lex. (Updating it here can
4014                  * also cause other ops in the RHS to get the desired
4015                  * op_next pointer, presumably thanks to the finalizer.)
4016                  * This is definitely truewhen rhs is OP_NULL with a
4017                  * LOGOP kid (e.g. orassign). There may be other cases. */
4018                 if (rhs->op_next == lex)
4019                     rhs->op_next = o;
4020 
4021                 /* Now null-out the AELEMFAST_LEX */
4022                 op_null(lex);
4023 
4024                 /* NULL the previous op ptrs, so rpeep can continue */
4025                 oldop = oldoldop; oldoldop = NULL;
4026             }
4027 
4028             break;
4029         }
4030 
4031         case OP_AASSIGN: {
4032             int l, r, lr, lscalars, rscalars;
4033 
4034             /* handle common vars detection, e.g. ($a,$b) = ($b,$a).
4035                Note that we do this now rather than in newASSIGNOP(),
4036                since only by now are aliased lexicals flagged as such
4037 
4038                See the essay "Common vars in list assignment" above for
4039                the full details of the rationale behind all the conditions
4040                below.
4041 
4042                PL_generation sorcery:
4043                To detect whether there are common vars, the global var
4044                PL_generation is incremented for each assign op we scan.
4045                Then we run through all the lexical variables on the LHS,
4046                of the assignment, setting a spare slot in each of them to
4047                PL_generation.  Then we scan the RHS, and if any lexicals
4048                already have that value, we know we've got commonality.
4049                Also, if the generation number is already set to
4050                PERL_INT_MAX, then the variable is involved in aliasing, so
4051                we also have potential commonality in that case.
4052              */
4053 
4054             PL_generation++;
4055             /* scan LHS */
4056             lscalars = 0;
4057             l = S_aassign_scan(aTHX_ cLISTOPo->op_last,  FALSE, &lscalars);
4058             /* scan RHS */
4059             rscalars = 0;
4060             r = S_aassign_scan(aTHX_ cLISTOPo->op_first, TRUE, &rscalars);
4061             lr = (l|r);
4062 
4063 
4064             /* After looking for things which are *always* safe, this main
4065              * if/else chain selects primarily based on the type of the
4066              * LHS, gradually working its way down from the more dangerous
4067              * to the more restrictive and thus safer cases */
4068 
4069             if (   !l                      /* () = ....; */
4070                 || !r                      /* .... = (); */
4071                 || !(l & ~AAS_SAFE_SCALAR) /* (undef, pos()) = ...; */
4072                 || !(r & ~AAS_SAFE_SCALAR) /* ... = (1,2,length,undef); */
4073                 || (lscalars < 2)          /* (undef, $x) = ... */
4074             ) {
4075                 NOOP; /* always safe */
4076             }
4077             else if (l & AAS_DANGEROUS) {
4078                 /* always dangerous */
4079                 o->op_private |= OPpASSIGN_COMMON_SCALAR;
4080                 o->op_private |= OPpASSIGN_COMMON_AGG;
4081             }
4082             else if (l & (AAS_PKG_SCALAR|AAS_PKG_AGG)) {
4083                 /* package vars are always dangerous - too many
4084                  * aliasing possibilities */
4085                 if (l & AAS_PKG_SCALAR)
4086                     o->op_private |= OPpASSIGN_COMMON_SCALAR;
4087                 if (l & AAS_PKG_AGG)
4088                     o->op_private |= OPpASSIGN_COMMON_AGG;
4089             }
4090             else if (l & ( AAS_MY_SCALAR|AAS_MY_AGG
4091                           |AAS_LEX_SCALAR|AAS_LEX_AGG))
4092             {
4093                 /* LHS contains only lexicals and safe ops */
4094 
4095                 if (l & (AAS_MY_AGG|AAS_LEX_AGG))
4096                     o->op_private |= OPpASSIGN_COMMON_AGG;
4097 
4098                 if (l & (AAS_MY_SCALAR|AAS_LEX_SCALAR)) {
4099                     if (lr & AAS_LEX_SCALAR_COMM)
4100                         o->op_private |= OPpASSIGN_COMMON_SCALAR;
4101                     else if (   !(l & AAS_LEX_SCALAR)
4102                              && (r & AAS_DEFAV))
4103                     {
4104                         /* falsely mark
4105                          *    my (...) = @_
4106                          * as scalar-safe for performance reasons.
4107                          * (it will still have been marked _AGG if necessary */
4108                         NOOP;
4109                     }
4110                     else if (r  & (AAS_PKG_SCALAR|AAS_PKG_AGG|AAS_DANGEROUS))
4111                         /* if there are only lexicals on the LHS and no
4112                          * common ones on the RHS, then we assume that the
4113                          * only way those lexicals could also get
4114                          * on the RHS is via some sort of dereffing or
4115                          * closure, e.g.
4116                          *    $r = \$lex;
4117                          *    ($lex, $x) = (1, $$r)
4118                          * and in this case we assume the var must have
4119                          *  a bumped ref count. So if its ref count is 1,
4120                          *  it must only be on the LHS.
4121                          */
4122                         o->op_private |= OPpASSIGN_COMMON_RC1;
4123                 }
4124             }
4125 
4126             /* ... = ($x)
4127              * may have to handle aggregate on LHS, but we can't
4128              * have common scalars. */
4129             if (rscalars < 2)
4130                 o->op_private &=
4131                         ~(OPpASSIGN_COMMON_SCALAR|OPpASSIGN_COMMON_RC1);
4132 
4133             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4134                 S_check_for_bool_cxt(o, 1, OPpASSIGN_TRUEBOOL, 0);
4135             break;
4136         }
4137 
4138         case OP_REF:
4139         case OP_BLESSED:
4140             /* if the op is used in boolean context, set the TRUEBOOL flag
4141              * which enables an optimisation at runtime which avoids creating
4142              * a stack temporary for known-true package names */
4143             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4144                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, OPpMAYBE_TRUEBOOL);
4145             break;
4146 
4147         case OP_LENGTH:
4148             /* see if the op is used in known boolean context,
4149              * but not if OA_TARGLEX optimisation is enabled */
4150             if (   (o->op_flags & OPf_WANT) == OPf_WANT_SCALAR
4151                 && !(o->op_private & OPpTARGET_MY)
4152             )
4153                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4154             break;
4155 
4156         case OP_POS:
4157             /* see if the op is used in known boolean context */
4158             if ((o->op_flags & OPf_WANT) == OPf_WANT_SCALAR)
4159                 S_check_for_bool_cxt(o, 1, OPpTRUEBOOL, 0);
4160             break;
4161 
4162         case OP_CUSTOM: {
4163             Perl_cpeep_t cpeep =
4164                 XopENTRYCUSTOM(o, xop_peep);
4165             if (cpeep)
4166                 cpeep(aTHX_ o, oldop);
4167             break;
4168         }
4169 
4170         }
4171         /* did we just null the current op? If so, re-process it to handle
4172          * eliding "empty" ops from the chain */
4173         if (o->op_type == OP_NULL && oldop && oldop->op_next == o) {
4174             o->op_opt = 0;
4175             o = oldop;
4176         }
4177         else {
4178             oldoldop = oldop;
4179             oldop = o;
4180         }
4181     }
4182     LEAVE;
4183 }
4184 
4185 void
Perl_peep(pTHX_ OP * o)4186 Perl_peep(pTHX_ OP *o)
4187 {
4188     CALL_RPEEP(o);
4189 }
4190 
4191 /*
4192  * ex: set ts=8 sts=4 sw=4 et:
4193  */
4194