1{-
2(c) The GRASP/AQUA Project, Glasgow University, 1992-1998
3
4\section[SimplCore]{Driver for simplifying @Core@ programs}
5-}
6
7{-# LANGUAGE CPP #-}
8
9module SimplCore ( core2core, simplifyExpr ) where
10
11#include "HsVersions.h"
12
13import GhcPrelude
14
15import DynFlags
16import CoreSyn
17import HscTypes
18import CSE              ( cseProgram )
19import Rules            ( mkRuleBase, unionRuleBase,
20                          extendRuleBaseList, ruleCheckProgram, addRuleInfo,
21                          getRules )
22import PprCore          ( pprCoreBindings, pprCoreExpr )
23import OccurAnal        ( occurAnalysePgm, occurAnalyseExpr )
24import IdInfo
25import CoreStats        ( coreBindsSize, coreBindsStats, exprSize )
26import CoreUtils        ( mkTicks, stripTicksTop )
27import CoreLint         ( endPass, lintPassResult, dumpPassResult,
28                          lintAnnots )
29import Simplify         ( simplTopBinds, simplExpr, simplRules )
30import SimplUtils       ( simplEnvForGHCi, activeRule, activeUnfolding )
31import SimplEnv
32import SimplMonad
33import CoreMonad
34import qualified ErrUtils as Err
35import FloatIn          ( floatInwards )
36import FloatOut         ( floatOutwards )
37import FamInstEnv
38import Id
39import ErrUtils         ( withTiming, withTimingD )
40import BasicTypes       ( CompilerPhase(..), isDefaultInlinePragma, defaultInlinePragma )
41import VarSet
42import VarEnv
43import LiberateCase     ( liberateCase )
44import SAT              ( doStaticArgs )
45import Specialise       ( specProgram)
46import SpecConstr       ( specConstrProgram)
47import DmdAnal          ( dmdAnalProgram )
48import CallArity        ( callArityAnalProgram )
49import Exitify          ( exitifyProgram )
50import WorkWrap         ( wwTopBinds )
51import SrcLoc
52import Util
53import Module
54import Plugins          ( withPlugins, installCoreToDos )
55import DynamicLoading  -- ( initializePlugins )
56
57import UniqSupply       ( UniqSupply, mkSplitUniqSupply, splitUniqSupply )
58import UniqFM
59import Outputable
60import Control.Monad
61import qualified GHC.LanguageExtensions as LangExt
62{-
63************************************************************************
64*                                                                      *
65\subsection{The driver for the simplifier}
66*                                                                      *
67************************************************************************
68-}
69
70core2core :: HscEnv -> ModGuts -> IO ModGuts
71core2core hsc_env guts@(ModGuts { mg_module  = mod
72                                , mg_loc     = loc
73                                , mg_deps    = deps
74                                , mg_rdr_env = rdr_env })
75  = do { -- make sure all plugins are loaded
76
77       ; let builtin_passes = getCoreToDo dflags
78             orph_mods = mkModuleSet (mod : dep_orphs deps)
79             uniq_mask = 's'
80       ;
81       ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base uniq_mask mod
82                                    orph_mods print_unqual loc $
83                           do { hsc_env' <- getHscEnv
84                              ; dflags' <- liftIO $ initializePlugins hsc_env'
85                                                      (hsc_dflags hsc_env')
86                              ; all_passes <- withPlugins dflags'
87                                                installCoreToDos
88                                                builtin_passes
89                              ; runCorePasses all_passes guts }
90
91       ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats
92             "Grand total simplifier statistics"
93             (pprSimplCount stats)
94
95       ; return guts2 }
96  where
97    dflags         = hsc_dflags hsc_env
98    home_pkg_rules = hptRules hsc_env (dep_mods deps)
99    hpt_rule_base  = mkRuleBase home_pkg_rules
100    print_unqual   = mkPrintUnqualified dflags rdr_env
101    -- mod: get the module out of the current HscEnv so we can retrieve it from the monad.
102    -- This is very convienent for the users of the monad (e.g. plugins do not have to
103    -- consume the ModGuts to find the module) but somewhat ugly because mg_module may
104    -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which
105    -- would mean our cached value would go out of date.
106
107{-
108************************************************************************
109*                                                                      *
110           Generating the main optimisation pipeline
111*                                                                      *
112************************************************************************
113-}
114
115getCoreToDo :: DynFlags -> [CoreToDo]
116getCoreToDo dflags
117  = flatten_todos core_todo
118  where
119    opt_level     = optLevel           dflags
120    phases        = simplPhases        dflags
121    max_iter      = maxSimplIterations dflags
122    rule_check    = ruleCheck          dflags
123    call_arity    = gopt Opt_CallArity                    dflags
124    exitification = gopt Opt_Exitification                dflags
125    strictness    = gopt Opt_Strictness                   dflags
126    full_laziness = gopt Opt_FullLaziness                 dflags
127    do_specialise = gopt Opt_Specialise                   dflags
128    do_float_in   = gopt Opt_FloatIn                      dflags
129    cse           = gopt Opt_CSE                          dflags
130    spec_constr   = gopt Opt_SpecConstr                   dflags
131    liberate_case = gopt Opt_LiberateCase                 dflags
132    late_dmd_anal = gopt Opt_LateDmdAnal                  dflags
133    late_specialise = gopt Opt_LateSpecialise             dflags
134    static_args   = gopt Opt_StaticArgumentTransformation dflags
135    rules_on      = gopt Opt_EnableRewriteRules           dflags
136    eta_expand_on = gopt Opt_DoLambdaEtaExpansion         dflags
137    ww_on         = gopt Opt_WorkerWrapper                dflags
138    static_ptrs   = xopt LangExt.StaticPointers           dflags
139
140    maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase)
141
142    maybe_strictness_before phase
143      = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness
144
145    base_mode = SimplMode { sm_phase      = panic "base_mode"
146                          , sm_names      = []
147                          , sm_dflags     = dflags
148                          , sm_rules      = rules_on
149                          , sm_eta_expand = eta_expand_on
150                          , sm_inline     = True
151                          , sm_case_case  = True }
152
153    simpl_phase phase names iter
154      = CoreDoPasses
155      $   [ maybe_strictness_before phase
156          , CoreDoSimplify iter
157                (base_mode { sm_phase = Phase phase
158                           , sm_names = names })
159
160          , maybe_rule_check (Phase phase) ]
161
162    simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter
163                                | phase <- [phases, phases-1 .. 1] ]
164
165
166        -- initial simplify: mk specialiser happy: minimum effort please
167    simpl_gently = CoreDoSimplify max_iter
168                       (base_mode { sm_phase = InitialPhase
169                                  , sm_names = ["Gentle"]
170                                  , sm_rules = rules_on   -- Note [RULEs enabled in SimplGently]
171                                  , sm_inline = True
172                                              -- See Note [Inline in InitialPhase]
173                                  , sm_case_case = False })
174                          -- Don't do case-of-case transformations.
175                          -- This makes full laziness work better
176
177    strictness_pass = if ww_on
178                       then [CoreDoStrictness,CoreDoWorkerWrapper]
179                       else [CoreDoStrictness]
180
181
182    -- New demand analyser
183    demand_analyser = (CoreDoPasses (
184                           strictness_pass ++
185                           [simpl_phase 0 ["post-worker-wrapper"] max_iter]
186                           ))
187
188    -- Static forms are moved to the top level with the FloatOut pass.
189    -- See Note [Grand plan for static forms] in StaticPtrTable.
190    static_ptrs_float_outwards =
191      runWhen static_ptrs $ CoreDoPasses
192        [ simpl_gently -- Float Out can't handle type lets (sometimes created
193                       -- by simpleOptPgm via mkParallelBindings)
194        , CoreDoFloatOutwards FloatOutSwitches
195          { floatOutLambdas   = Just 0
196          , floatOutConstants = True
197          , floatOutOverSatApps = False
198          , floatToTopLevelOnly = True
199          }
200        ]
201
202    core_todo =
203     if opt_level == 0 then
204       [ static_ptrs_float_outwards,
205         CoreDoSimplify max_iter
206             (base_mode { sm_phase = Phase 0
207                        , sm_names = ["Non-opt simplification"] })
208       ]
209
210     else {- opt_level >= 1 -} [
211
212    -- We want to do the static argument transform before full laziness as it
213    -- may expose extra opportunities to float things outwards. However, to fix
214    -- up the output of the transformation we need at do at least one simplify
215    -- after this before anything else
216        runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]),
217
218        -- initial simplify: mk specialiser happy: minimum effort please
219        simpl_gently,
220
221        -- Specialisation is best done before full laziness
222        -- so that overloaded functions have all their dictionary lambdas manifest
223        runWhen do_specialise CoreDoSpecialising,
224
225        if full_laziness then
226           CoreDoFloatOutwards FloatOutSwitches {
227                                 floatOutLambdas   = Just 0,
228                                 floatOutConstants = True,
229                                 floatOutOverSatApps = False,
230                                 floatToTopLevelOnly = False }
231                -- Was: gentleFloatOutSwitches
232                --
233                -- I have no idea why, but not floating constants to
234                -- top level is very bad in some cases.
235                --
236                -- Notably: p_ident in spectral/rewrite
237                --          Changing from "gentle" to "constantsOnly"
238                --          improved rewrite's allocation by 19%, and
239                --          made 0.0% difference to any other nofib
240                --          benchmark
241                --
242                -- Not doing floatOutOverSatApps yet, we'll do
243                -- that later on when we've had a chance to get more
244                -- accurate arity information.  In fact it makes no
245                -- difference at all to performance if we do it here,
246                -- but maybe we save some unnecessary to-and-fro in
247                -- the simplifier.
248        else
249           -- Even with full laziness turned off, we still need to float static
250           -- forms to the top level. See Note [Grand plan for static forms] in
251           -- StaticPtrTable.
252           static_ptrs_float_outwards,
253
254        simpl_phases,
255
256                -- Phase 0: allow all Ids to be inlined now
257                -- This gets foldr inlined before strictness analysis
258
259                -- At least 3 iterations because otherwise we land up with
260                -- huge dead expressions because of an infelicity in the
261                -- simplifier.
262                --      let k = BIG in foldr k z xs
263                -- ==>  let k = BIG in letrec go = \xs -> ...(k x).... in go xs
264                -- ==>  let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs
265                -- Don't stop now!
266        simpl_phase 0 ["main"] (max max_iter 3),
267
268        runWhen do_float_in CoreDoFloatInwards,
269            -- Run float-inwards immediately before the strictness analyser
270            -- Doing so pushes bindings nearer their use site and hence makes
271            -- them more likely to be strict. These bindings might only show
272            -- up after the inlining from simplification.  Example in fulsom,
273            -- Csg.calc, where an arg of timesDouble thereby becomes strict.
274
275        runWhen call_arity $ CoreDoPasses
276            [ CoreDoCallArity
277            , simpl_phase 0 ["post-call-arity"] max_iter
278            ],
279
280        runWhen strictness demand_analyser,
281
282        runWhen exitification CoreDoExitify,
283            -- See note [Placement of the exitification pass]
284
285        runWhen full_laziness $
286           CoreDoFloatOutwards FloatOutSwitches {
287                                 floatOutLambdas     = floatLamArgs dflags,
288                                 floatOutConstants   = True,
289                                 floatOutOverSatApps = True,
290                                 floatToTopLevelOnly = False },
291                -- nofib/spectral/hartel/wang doubles in speed if you
292                -- do full laziness late in the day.  It only happens
293                -- after fusion and other stuff, so the early pass doesn't
294                -- catch it.  For the record, the redex is
295                --        f_el22 (f_el21 r_midblock)
296
297
298        runWhen cse CoreCSE,
299                -- We want CSE to follow the final full-laziness pass, because it may
300                -- succeed in commoning up things floated out by full laziness.
301                -- CSE used to rely on the no-shadowing invariant, but it doesn't any more
302
303        runWhen do_float_in CoreDoFloatInwards,
304
305        maybe_rule_check (Phase 0),
306
307                -- Case-liberation for -O2.  This should be after
308                -- strictness analysis and the simplification which follows it.
309        runWhen liberate_case (CoreDoPasses [
310            CoreLiberateCase,
311            simpl_phase 0 ["post-liberate-case"] max_iter
312            ]),         -- Run the simplifier after LiberateCase to vastly
313                        -- reduce the possibility of shadowing
314                        -- Reason: see Note [Shadowing] in SpecConstr.hs
315
316        runWhen spec_constr CoreDoSpecConstr,
317
318        maybe_rule_check (Phase 0),
319
320        runWhen late_specialise
321          (CoreDoPasses [ CoreDoSpecialising
322                        , simpl_phase 0 ["post-late-spec"] max_iter]),
323
324        -- LiberateCase can yield new CSE opportunities because it peels
325        -- off one layer of a recursive function (concretely, I saw this
326        -- in wheel-sieve1), and I'm guessing that SpecConstr can too
327        -- And CSE is a very cheap pass. So it seems worth doing here.
328        runWhen ((liberate_case || spec_constr) && cse) CoreCSE,
329
330        -- Final clean-up simplification:
331        simpl_phase 0 ["final"] max_iter,
332
333        runWhen late_dmd_anal $ CoreDoPasses (
334            strictness_pass ++
335            [simpl_phase 0 ["post-late-ww"] max_iter]
336          ),
337
338        -- Final run of the demand_analyser, ensures that one-shot thunks are
339        -- really really one-shot thunks. Only needed if the demand analyser
340        -- has run at all. See Note [Final Demand Analyser run] in DmdAnal
341        -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution
342        -- can become /exponentially/ more expensive. See #11731, #12996.
343        runWhen (strictness || late_dmd_anal) CoreDoStrictness,
344
345        maybe_rule_check (Phase 0)
346     ]
347
348    -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity.
349    flatten_todos [] = []
350    flatten_todos (CoreDoNothing : rest) = flatten_todos rest
351    flatten_todos (CoreDoPasses passes : rest) =
352      flatten_todos passes ++ flatten_todos rest
353    flatten_todos (todo : rest) = todo : flatten_todos rest
354
355{- Note [Inline in InitialPhase]
356~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
357In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is
358confusing for users because when they say INLINE they expect the function to inline
359right away.
360
361So now we do inlining immediately, even in the InitialPhase, assuming that the
362Id's Activation allows it.
363
364This is a surprisingly big deal. Compiler performance improved a lot
365when I made this change:
366
367   perf/compiler/T5837.run            T5837 [stat too good] (normal)
368   perf/compiler/parsing001.run       parsing001 [stat too good] (normal)
369   perf/compiler/T12234.run           T12234 [stat too good] (optasm)
370   perf/compiler/T9020.run            T9020 [stat too good] (optasm)
371   perf/compiler/T3064.run            T3064 [stat too good] (normal)
372   perf/compiler/T9961.run            T9961 [stat too good] (normal)
373   perf/compiler/T13056.run           T13056 [stat too good] (optasm)
374   perf/compiler/T9872d.run           T9872d [stat too good] (normal)
375   perf/compiler/T783.run             T783 [stat too good] (normal)
376   perf/compiler/T12227.run           T12227 [stat too good] (normal)
377   perf/should_run/lazy-bs-alloc.run  lazy-bs-alloc [stat too good] (normal)
378   perf/compiler/T1969.run            T1969 [stat too good] (normal)
379   perf/compiler/T9872a.run           T9872a [stat too good] (normal)
380   perf/compiler/T9872c.run           T9872c [stat too good] (normal)
381   perf/compiler/T9872b.run           T9872b [stat too good] (normal)
382   perf/compiler/T9872d.run           T9872d [stat too good] (normal)
383
384Note [RULEs enabled in SimplGently]
385~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
386RULES are enabled when doing "gentle" simplification.  Two reasons:
387
388  * We really want the class-op cancellation to happen:
389        op (df d1 d2) --> $cop3 d1 d2
390    because this breaks the mutual recursion between 'op' and 'df'
391
392  * I wanted the RULE
393        lift String ===> ...
394    to work in Template Haskell when simplifying
395    splices, so we get simpler code for literal strings
396
397But watch out: list fusion can prevent floating.  So use phase control
398to switch off those rules until after floating.
399
400************************************************************************
401*                                                                      *
402                  The CoreToDo interpreter
403*                                                                      *
404************************************************************************
405-}
406
407runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts
408runCorePasses passes guts
409  = foldM do_pass guts passes
410  where
411    do_pass guts CoreDoNothing = return guts
412    do_pass guts (CoreDoPasses ps) = runCorePasses ps guts
413    do_pass guts pass = do
414       withTimingD (ppr pass <+> brackets (ppr mod))
415                   (const ()) $ do
416            { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts
417            ; endPass pass (mg_binds guts') (mg_rules guts')
418            ; return guts' }
419
420    mod = mg_module guts
421
422doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts
423doCorePass pass@(CoreDoSimplify {})  = {-# SCC "Simplify" #-}
424                                       simplifyPgm pass
425
426doCorePass CoreCSE                   = {-# SCC "CommonSubExpr" #-}
427                                       doPass cseProgram
428
429doCorePass CoreLiberateCase          = {-# SCC "LiberateCase" #-}
430                                       doPassD liberateCase
431
432doCorePass CoreDoFloatInwards        = {-# SCC "FloatInwards" #-}
433                                       floatInwards
434
435doCorePass (CoreDoFloatOutwards f)   = {-# SCC "FloatOutwards" #-}
436                                       doPassDUM (floatOutwards f)
437
438doCorePass CoreDoStaticArgs          = {-# SCC "StaticArgs" #-}
439                                       doPassU doStaticArgs
440
441doCorePass CoreDoCallArity           = {-# SCC "CallArity" #-}
442                                       doPassD callArityAnalProgram
443
444doCorePass CoreDoExitify             = {-# SCC "Exitify" #-}
445                                       doPass exitifyProgram
446
447doCorePass CoreDoStrictness          = {-# SCC "NewStranal" #-}
448                                       doPassDFM dmdAnalProgram
449
450doCorePass CoreDoWorkerWrapper       = {-# SCC "WorkWrap" #-}
451                                       doPassDFU wwTopBinds
452
453doCorePass CoreDoSpecialising        = {-# SCC "Specialise" #-}
454                                       specProgram
455
456doCorePass CoreDoSpecConstr          = {-# SCC "SpecConstr" #-}
457                                       specConstrProgram
458
459doCorePass CoreDoPrintCore              = observe   printCore
460doCorePass (CoreDoRuleCheck phase pat)  = ruleCheckPass phase pat
461doCorePass CoreDoNothing                = return
462doCorePass (CoreDoPasses passes)        = runCorePasses passes
463
464doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass
465
466doCorePass pass@CoreDesugar          = pprPanic "doCorePass" (ppr pass)
467doCorePass pass@CoreDesugarOpt       = pprPanic "doCorePass" (ppr pass)
468doCorePass pass@CoreTidy             = pprPanic "doCorePass" (ppr pass)
469doCorePass pass@CorePrep             = pprPanic "doCorePass" (ppr pass)
470doCorePass pass@CoreOccurAnal        = pprPanic "doCorePass" (ppr pass)
471
472{-
473************************************************************************
474*                                                                      *
475\subsection{Core pass combinators}
476*                                                                      *
477************************************************************************
478-}
479
480printCore :: DynFlags -> CoreProgram -> IO ()
481printCore dflags binds
482    = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds)
483
484ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts
485ruleCheckPass current_phase pat guts =
486    withTimingD (text "RuleCheck"<+>brackets (ppr $ mg_module guts))
487                (const ()) $ do
488    { rb <- getRuleBase
489    ; dflags <- getDynFlags
490    ; vis_orphs <- getVisibleOrphanMods
491    ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn
492                        ++ (mg_rules guts)
493    ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan
494                   (defaultDumpStyle dflags)
495                   (ruleCheckProgram current_phase pat
496                      rule_fn (mg_binds guts))
497    ; return guts }
498
499doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
500doPassDUM do_pass = doPassM $ \binds -> do
501    dflags <- getDynFlags
502    us     <- getUniqueSupplyM
503    liftIO $ do_pass dflags us binds
504
505doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
506doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags))
507
508doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
509doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags)
510
511doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
512doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us)
513
514doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
515doPassU do_pass = doPassDU (const do_pass)
516
517doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts
518doPassDFM do_pass guts = do
519    dflags <- getDynFlags
520    p_fam_env <- getPackageFamInstEnv
521    let fam_envs = (p_fam_env, mg_fam_inst_env guts)
522    doPassM (liftIO . do_pass dflags fam_envs) guts
523
524doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
525doPassDFU do_pass guts = do
526    dflags <- getDynFlags
527    us     <- getUniqueSupplyM
528    p_fam_env <- getPackageFamInstEnv
529    let fam_envs = (p_fam_env, mg_fam_inst_env guts)
530    doPass (do_pass dflags fam_envs us) guts
531
532-- Most passes return no stats and don't change rules: these combinators
533-- let us lift them to the full blown ModGuts+CoreM world
534doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts
535doPassM bind_f guts = do
536    binds' <- bind_f (mg_binds guts)
537    return (guts { mg_binds = binds' })
538
539doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts
540doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) }
541
542-- Observer passes just peek; don't modify the bindings at all
543observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts
544observe do_pass = doPassM $ \binds -> do
545    dflags <- getDynFlags
546    _ <- liftIO $ do_pass dflags binds
547    return binds
548
549{-
550************************************************************************
551*                                                                      *
552        Gentle simplification
553*                                                                      *
554************************************************************************
555-}
556
557simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do
558             -> CoreExpr
559             -> IO CoreExpr
560-- simplifyExpr is called by the driver to simplify an
561-- expression typed in at the interactive prompt
562--
563-- Also used by Template Haskell
564simplifyExpr dflags expr
565  = withTiming dflags (text "Simplify [expr]") (const ()) $
566    do  {
567        ; us <-  mkSplitUniqSupply 's'
568
569        ; let sz = exprSize expr
570
571        ; (expr', counts) <- initSmpl dflags emptyRuleEnv
572                               emptyFamInstEnvs us sz
573                               (simplExprGently (simplEnvForGHCi dflags) expr)
574
575        ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags)
576                  "Simplifier statistics" (pprSimplCount counts)
577
578        ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression"
579                        (pprCoreExpr expr')
580
581        ; return expr'
582        }
583
584simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr
585-- Simplifies an expression
586--      does occurrence analysis, then simplification
587--      and repeats (twice currently) because one pass
588--      alone leaves tons of crud.
589-- Used (a) for user expressions typed in at the interactive prompt
590--      (b) the LHS and RHS of a RULE
591--      (c) Template Haskell splices
592--
593-- The name 'Gently' suggests that the SimplMode is SimplGently,
594-- and in fact that is so.... but the 'Gently' in simplExprGently doesn't
595-- enforce that; it just simplifies the expression twice
596
597-- It's important that simplExprGently does eta reduction; see
598-- Note [Simplifying the left-hand side of a RULE] above.  The
599-- simplifier does indeed do eta reduction (it's in Simplify.completeLam)
600-- but only if -O is on.
601
602simplExprGently env expr = do
603    expr1 <- simplExpr env (occurAnalyseExpr expr)
604    simplExpr env (occurAnalyseExpr expr1)
605
606{-
607************************************************************************
608*                                                                      *
609\subsection{The driver for the simplifier}
610*                                                                      *
611************************************************************************
612-}
613
614simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts
615simplifyPgm pass guts
616  = do { hsc_env <- getHscEnv
617       ; us <- getUniqueSupplyM
618       ; rb <- getRuleBase
619       ; liftIOWithCount $
620         simplifyPgmIO pass hsc_env us rb guts }
621
622simplifyPgmIO :: CoreToDo
623              -> HscEnv
624              -> UniqSupply
625              -> RuleBase
626              -> ModGuts
627              -> IO (SimplCount, ModGuts)  -- New bindings
628
629simplifyPgmIO pass@(CoreDoSimplify max_iterations mode)
630              hsc_env us hpt_rule_base
631              guts@(ModGuts { mg_module = this_mod
632                            , mg_rdr_env = rdr_env
633                            , mg_deps = deps
634                            , mg_binds = binds, mg_rules = rules
635                            , mg_fam_inst_env = fam_inst_env })
636  = do { (termination_msg, it_count, counts_out, guts')
637           <- do_iteration us 1 [] binds rules
638
639        ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags &&
640                                dopt Opt_D_dump_simpl_stats  dflags)
641                  "Simplifier statistics for following pass"
642                  (vcat [text termination_msg <+> text "after" <+> ppr it_count
643                                              <+> text "iterations",
644                         blankLine,
645                         pprSimplCount counts_out])
646
647        ; return (counts_out, guts')
648    }
649  where
650    dflags       = hsc_dflags hsc_env
651    print_unqual = mkPrintUnqualified dflags rdr_env
652    simpl_env    = mkSimplEnv mode
653    active_rule  = activeRule mode
654    active_unf   = activeUnfolding mode
655
656    do_iteration :: UniqSupply
657                 -> Int          -- Counts iterations
658                 -> [SimplCount] -- Counts from earlier iterations, reversed
659                 -> CoreProgram  -- Bindings in
660                 -> [CoreRule]   -- and orphan rules
661                 -> IO (String, Int, SimplCount, ModGuts)
662
663    do_iteration us iteration_no counts_so_far binds rules
664        -- iteration_no is the number of the iteration we are
665        -- about to begin, with '1' for the first
666      | iteration_no > max_iterations   -- Stop if we've run out of iterations
667      = WARN( debugIsOn && (max_iterations > 2)
668            , hang (text "Simplifier bailing out after" <+> int max_iterations
669                    <+> text "iterations"
670                    <+> (brackets $ hsep $ punctuate comma $
671                         map (int . simplCountN) (reverse counts_so_far)))
672                 2 (text "Size =" <+> ppr (coreBindsStats binds)))
673
674                -- Subtract 1 from iteration_no to get the
675                -- number of iterations we actually completed
676        return ( "Simplifier baled out", iteration_no - 1
677               , totalise counts_so_far
678               , guts { mg_binds = binds, mg_rules = rules } )
679
680      -- Try and force thunks off the binds; significantly reduces
681      -- space usage, especially with -O.  JRS, 000620.
682      | let sz = coreBindsSize binds
683      , () <- sz `seq` ()     -- Force it
684      = do {
685                -- Occurrence analysis
686           let { tagged_binds = {-# SCC "OccAnal" #-}
687                     occurAnalysePgm this_mod active_unf active_rule rules
688                                     binds
689               } ;
690           Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis"
691                     (pprCoreBindings tagged_binds);
692
693                -- Get any new rules, and extend the rule base
694                -- See Note [Overall plumbing for rules] in Rules.hs
695                -- We need to do this regularly, because simplification can
696                -- poke on IdInfo thunks, which in turn brings in new rules
697                -- behind the scenes.  Otherwise there's a danger we'll simply
698                -- miss the rules for Ids hidden inside imported inlinings
699           eps <- hscEPS hsc_env ;
700           let  { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps)
701                ; rule_base2 = extendRuleBaseList rule_base1 rules
702                ; fam_envs = (eps_fam_inst_env eps, fam_inst_env)
703                ; vis_orphs = this_mod : dep_orphs deps } ;
704
705                -- Simplify the program
706           ((binds1, rules1), counts1) <-
707             initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $
708               do { (floats, env1) <- {-# SCC "SimplTopBinds" #-}
709                                      simplTopBinds simpl_env tagged_binds
710
711                      -- Apply the substitution to rules defined in this module
712                      -- for imported Ids.  Eg  RULE map my_f = blah
713                      -- If we have a substitution my_f :-> other_f, we'd better
714                      -- apply it to the rule to, or it'll never match
715                  ; rules1 <- simplRules env1 Nothing rules Nothing
716
717                  ; return (getTopFloatBinds floats, rules1) } ;
718
719                -- Stop if nothing happened; don't dump output
720                -- See Note [Which transformations are innocuous] in CoreMonad
721           if isZeroSimplCount counts1 then
722                return ( "Simplifier reached fixed point", iteration_no
723                       , totalise (counts1 : counts_so_far)  -- Include "free" ticks
724                       , guts { mg_binds = binds1, mg_rules = rules1 } )
725           else do {
726                -- Short out indirections
727                -- We do this *after* at least one run of the simplifier
728                -- because indirection-shorting uses the export flag on *occurrences*
729                -- and that isn't guaranteed to be ok until after the first run propagates
730                -- stuff from the binding site to its occurrences
731                --
732                -- ToDo: alas, this means that indirection-shorting does not happen at all
733                --       if the simplifier does nothing (not common, I know, but unsavoury)
734           let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ;
735
736                -- Dump the result of this iteration
737           dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ;
738           lintPassResult hsc_env pass binds2 ;
739
740                -- Loop
741           do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1
742           } }
743      | otherwise = panic "do_iteration"
744      where
745        (us1, us2) = splitUniqSupply us
746
747        -- Remember the counts_so_far are reversed
748        totalise :: [SimplCount] -> SimplCount
749        totalise = foldr (\c acc -> acc `plusSimplCount` c)
750                         (zeroSimplCount dflags)
751
752simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO"
753
754-------------------
755dump_end_iteration :: DynFlags -> PrintUnqualified -> Int
756                   -> SimplCount -> CoreProgram -> [CoreRule] -> IO ()
757dump_end_iteration dflags print_unqual iteration_no counts binds rules
758  = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules
759  where
760    mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations
761            | otherwise                               = Nothing
762            -- Show details if Opt_D_dump_simpl_iterations is on
763
764    hdr = text "Simplifier iteration=" <> int iteration_no
765    pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr
766                     , pprSimplCount counts
767                     , text "---- End of simplifier counts for" <+> hdr ]
768
769{-
770************************************************************************
771*                                                                      *
772                Shorting out indirections
773*                                                                      *
774************************************************************************
775
776If we have this:
777
778        x_local = <expression>
779        ...bindings...
780        x_exported = x_local
781
782where x_exported is exported, and x_local is not, then we replace it with this:
783
784        x_exported = <expression>
785        x_local = x_exported
786        ...bindings...
787
788Without this we never get rid of the x_exported = x_local thing.  This
789save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and
790makes strictness information propagate better.  This used to happen in
791the final phase, but it's tidier to do it here.
792
793Note [Messing up the exported Id's RULES]
794~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
795We must be careful about discarding (obviously) or even merging the
796RULES on the exported Id. The example that went bad on me at one stage
797was this one:
798
799    iterate :: (a -> a) -> a -> [a]
800        [Exported]
801    iterate = iterateList
802
803    iterateFB c f x = x `c` iterateFB c f (f x)
804    iterateList f x =  x : iterateList f (f x)
805        [Not exported]
806
807    {-# RULES
808    "iterate"   forall f x.     iterate f x = build (\c _n -> iterateFB c f x)
809    "iterateFB"                 iterateFB (:) = iterateList
810     #-}
811
812This got shorted out to:
813
814    iterateList :: (a -> a) -> a -> [a]
815    iterateList = iterate
816
817    iterateFB c f x = x `c` iterateFB c f (f x)
818    iterate f x =  x : iterate f (f x)
819
820    {-# RULES
821    "iterate"   forall f x.     iterate f x = build (\c _n -> iterateFB c f x)
822    "iterateFB"                 iterateFB (:) = iterate
823     #-}
824
825And now we get an infinite loop in the rule system
826        iterate f x -> build (\cn -> iterateFB c f x)
827                    -> iterateFB (:) f x
828                    -> iterate f x
829
830Old "solution":
831        use rule switching-off pragmas to get rid
832        of iterateList in the first place
833
834But in principle the user *might* want rules that only apply to the Id
835he says.  And inline pragmas are similar
836   {-# NOINLINE f #-}
837   f = local
838   local = <stuff>
839Then we do not want to get rid of the NOINLINE.
840
841Hence hasShortableIdinfo.
842
843
844Note [Rules and indirection-zapping]
845~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
846Problem: what if x_exported has a RULE that mentions something in ...bindings...?
847Then the things mentioned can be out of scope!  Solution
848 a) Make sure that in this pass the usage-info from x_exported is
849        available for ...bindings...
850 b) If there are any such RULES, rec-ify the entire top-level.
851    It'll get sorted out next time round
852
853Other remarks
854~~~~~~~~~~~~~
855If more than one exported thing is equal to a local thing (i.e., the
856local thing really is shared), then we do one only:
857\begin{verbatim}
858        x_local = ....
859        x_exported1 = x_local
860        x_exported2 = x_local
861==>
862        x_exported1 = ....
863
864        x_exported2 = x_exported1
865\end{verbatim}
866
867We rely on prior eta reduction to simplify things like
868\begin{verbatim}
869        x_exported = /\ tyvars -> x_local tyvars
870==>
871        x_exported = x_local
872\end{verbatim}
873Hence,there's a possibility of leaving unchanged something like this:
874\begin{verbatim}
875        x_local = ....
876        x_exported1 = x_local Int
877\end{verbatim}
878By the time we've thrown away the types in STG land this
879could be eliminated.  But I don't think it's very common
880and it's dangerous to do this fiddling in STG land
881because we might elminate a binding that's mentioned in the
882unfolding for something.
883
884Note [Indirection zapping and ticks]
885~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
886Unfortunately this is another place where we need a special case for
887ticks. The following happens quite regularly:
888
889        x_local = <expression>
890        x_exported = tick<x> x_local
891
892Which we want to become:
893
894        x_exported =  tick<x> <expression>
895
896As it makes no sense to keep the tick and the expression on separate
897bindings. Note however that that this might increase the ticks scoping
898over the execution of x_local, so we can only do this for floatable
899ticks. More often than not, other references will be unfoldings of
900x_exported, and therefore carry the tick anyway.
901-}
902
903type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks
904
905shortOutIndirections :: CoreProgram -> CoreProgram
906shortOutIndirections binds
907  | isEmptyVarEnv ind_env = binds
908  | no_need_to_flatten    = binds'                      -- See Note [Rules and indirect-zapping]
909  | otherwise             = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff
910  where
911    ind_env            = makeIndEnv binds
912    -- These exported Ids are the subjects  of the indirection-elimination
913    exp_ids            = map fst $ nonDetEltsUFM ind_env
914      -- It's OK to use nonDetEltsUFM here because we forget the ordering
915      -- by immediately converting to a set or check if all the elements
916      -- satisfy a predicate.
917    exp_id_set         = mkVarSet exp_ids
918    no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids
919    binds'             = concatMap zap binds
920
921    zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)]
922    zap (Rec pairs)       = [Rec (concatMap zapPair pairs)]
923
924    zapPair (bndr, rhs)
925        | bndr `elemVarSet` exp_id_set
926        = []   -- Kill the exported-id binding
927
928        | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr
929        , (exp_id', lcl_id') <- transferIdInfo exp_id bndr
930        =      -- Turn a local-id binding into two bindings
931               --    exp_id = rhs; lcl_id = exp_id
932          [ (exp_id', mkTicks ticks rhs),
933            (lcl_id', Var exp_id') ]
934
935        | otherwise
936        = [(bndr,rhs)]
937
938makeIndEnv :: [CoreBind] -> IndEnv
939makeIndEnv binds
940  = foldl' add_bind emptyVarEnv binds
941  where
942    add_bind :: IndEnv -> CoreBind -> IndEnv
943    add_bind env (NonRec exported_id rhs) = add_pair env (exported_id, rhs)
944    add_bind env (Rec pairs)              = foldl' add_pair env pairs
945
946    add_pair :: IndEnv -> (Id,CoreExpr) -> IndEnv
947    add_pair env (exported_id, exported)
948        | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported
949        , shortMeOut env exported_id local_id
950        = extendVarEnv env local_id (exported_id, ticks)
951    add_pair env _ = env
952
953-----------------
954shortMeOut :: IndEnv -> Id -> Id -> Bool
955shortMeOut ind_env exported_id local_id
956-- The if-then-else stuff is just so I can get a pprTrace to see
957-- how often I don't get shorting out because of IdInfo stuff
958  = if isExportedId exported_id &&              -- Only if this is exported
959
960       isLocalId local_id &&                    -- Only if this one is defined in this
961                                                --      module, so that we *can* change its
962                                                --      binding to be the exported thing!
963
964       not (isExportedId local_id) &&           -- Only if this one is not itself exported,
965                                                --      since the transformation will nuke it
966
967       not (local_id `elemVarEnv` ind_env)      -- Only if not already substituted for
968    then
969        if hasShortableIdInfo exported_id
970        then True       -- See Note [Messing up the exported Id's IdInfo]
971        else WARN( True, text "Not shorting out:" <+> ppr exported_id )
972             False
973    else
974        False
975
976-----------------
977hasShortableIdInfo :: Id -> Bool
978-- True if there is no user-attached IdInfo on exported_id,
979-- so we can safely discard it
980-- See Note [Messing up the exported Id's IdInfo]
981hasShortableIdInfo id
982  =  isEmptyRuleInfo (ruleInfo info)
983  && isDefaultInlinePragma (inlinePragInfo info)
984  && not (isStableUnfolding (unfoldingInfo info))
985  where
986     info = idInfo id
987
988-----------------
989{- Note [Transferring IdInfo]
990~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
991If we have
992     lcl_id = e; exp_id = lcl_id
993
994and lcl_id has useful IdInfo, we don't want to discard it by going
995     gbl_id = e; lcl_id = gbl_id
996
997Instead, transfer IdInfo from lcl_id to exp_id, specifically
998* (Stable) unfolding
999* Strictness
1000* Rules
1001* Inline pragma
1002
1003Overwriting, rather than merging, seems to work ok.
1004
1005We also zap the InlinePragma on the lcl_id. It might originally
1006have had a NOINLINE, which we have now transferred; and we really
1007want the lcl_id to inline now that its RHS is trivial!
1008-}
1009
1010transferIdInfo :: Id -> Id -> (Id, Id)
1011-- See Note [Transferring IdInfo]
1012transferIdInfo exported_id local_id
1013  = ( modifyIdInfo transfer exported_id
1014    , local_id `setInlinePragma` defaultInlinePragma )
1015  where
1016    local_info = idInfo local_id
1017    transfer exp_info = exp_info `setStrictnessInfo`    strictnessInfo local_info
1018                                 `setUnfoldingInfo`     unfoldingInfo local_info
1019                                 `setInlinePragInfo`    inlinePragInfo local_info
1020                                 `setRuleInfo`          addRuleInfo (ruleInfo exp_info) new_info
1021    new_info = setRuleInfoHead (idName exported_id)
1022                               (ruleInfo local_info)
1023        -- Remember to set the function-name field of the
1024        -- rules as we transfer them from one function to another
1025