1{- 2(c) The GRASP/AQUA Project, Glasgow University, 1992-1998 3 4\section[SimplCore]{Driver for simplifying @Core@ programs} 5-} 6 7{-# LANGUAGE CPP #-} 8 9module SimplCore ( core2core, simplifyExpr ) where 10 11#include "HsVersions.h" 12 13import GhcPrelude 14 15import DynFlags 16import CoreSyn 17import HscTypes 18import CSE ( cseProgram ) 19import Rules ( mkRuleBase, unionRuleBase, 20 extendRuleBaseList, ruleCheckProgram, addRuleInfo, 21 getRules ) 22import PprCore ( pprCoreBindings, pprCoreExpr ) 23import OccurAnal ( occurAnalysePgm, occurAnalyseExpr ) 24import IdInfo 25import CoreStats ( coreBindsSize, coreBindsStats, exprSize ) 26import CoreUtils ( mkTicks, stripTicksTop ) 27import CoreLint ( endPass, lintPassResult, dumpPassResult, 28 lintAnnots ) 29import Simplify ( simplTopBinds, simplExpr, simplRules ) 30import SimplUtils ( simplEnvForGHCi, activeRule, activeUnfolding ) 31import SimplEnv 32import SimplMonad 33import CoreMonad 34import qualified ErrUtils as Err 35import FloatIn ( floatInwards ) 36import FloatOut ( floatOutwards ) 37import FamInstEnv 38import Id 39import ErrUtils ( withTiming, withTimingD ) 40import BasicTypes ( CompilerPhase(..), isDefaultInlinePragma, defaultInlinePragma ) 41import VarSet 42import VarEnv 43import LiberateCase ( liberateCase ) 44import SAT ( doStaticArgs ) 45import Specialise ( specProgram) 46import SpecConstr ( specConstrProgram) 47import DmdAnal ( dmdAnalProgram ) 48import CallArity ( callArityAnalProgram ) 49import Exitify ( exitifyProgram ) 50import WorkWrap ( wwTopBinds ) 51import SrcLoc 52import Util 53import Module 54import Plugins ( withPlugins, installCoreToDos ) 55import DynamicLoading -- ( initializePlugins ) 56 57import UniqSupply ( UniqSupply, mkSplitUniqSupply, splitUniqSupply ) 58import UniqFM 59import Outputable 60import Control.Monad 61import qualified GHC.LanguageExtensions as LangExt 62{- 63************************************************************************ 64* * 65\subsection{The driver for the simplifier} 66* * 67************************************************************************ 68-} 69 70core2core :: HscEnv -> ModGuts -> IO ModGuts 71core2core hsc_env guts@(ModGuts { mg_module = mod 72 , mg_loc = loc 73 , mg_deps = deps 74 , mg_rdr_env = rdr_env }) 75 = do { -- make sure all plugins are loaded 76 77 ; let builtin_passes = getCoreToDo dflags 78 orph_mods = mkModuleSet (mod : dep_orphs deps) 79 uniq_mask = 's' 80 ; 81 ; (guts2, stats) <- runCoreM hsc_env hpt_rule_base uniq_mask mod 82 orph_mods print_unqual loc $ 83 do { hsc_env' <- getHscEnv 84 ; dflags' <- liftIO $ initializePlugins hsc_env' 85 (hsc_dflags hsc_env') 86 ; all_passes <- withPlugins dflags' 87 installCoreToDos 88 builtin_passes 89 ; runCorePasses all_passes guts } 90 91 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl_stats 92 "Grand total simplifier statistics" 93 (pprSimplCount stats) 94 95 ; return guts2 } 96 where 97 dflags = hsc_dflags hsc_env 98 home_pkg_rules = hptRules hsc_env (dep_mods deps) 99 hpt_rule_base = mkRuleBase home_pkg_rules 100 print_unqual = mkPrintUnqualified dflags rdr_env 101 -- mod: get the module out of the current HscEnv so we can retrieve it from the monad. 102 -- This is very convienent for the users of the monad (e.g. plugins do not have to 103 -- consume the ModGuts to find the module) but somewhat ugly because mg_module may 104 -- _theoretically_ be changed during the Core pipeline (it's part of ModGuts), which 105 -- would mean our cached value would go out of date. 106 107{- 108************************************************************************ 109* * 110 Generating the main optimisation pipeline 111* * 112************************************************************************ 113-} 114 115getCoreToDo :: DynFlags -> [CoreToDo] 116getCoreToDo dflags 117 = flatten_todos core_todo 118 where 119 opt_level = optLevel dflags 120 phases = simplPhases dflags 121 max_iter = maxSimplIterations dflags 122 rule_check = ruleCheck dflags 123 call_arity = gopt Opt_CallArity dflags 124 exitification = gopt Opt_Exitification dflags 125 strictness = gopt Opt_Strictness dflags 126 full_laziness = gopt Opt_FullLaziness dflags 127 do_specialise = gopt Opt_Specialise dflags 128 do_float_in = gopt Opt_FloatIn dflags 129 cse = gopt Opt_CSE dflags 130 spec_constr = gopt Opt_SpecConstr dflags 131 liberate_case = gopt Opt_LiberateCase dflags 132 late_dmd_anal = gopt Opt_LateDmdAnal dflags 133 late_specialise = gopt Opt_LateSpecialise dflags 134 static_args = gopt Opt_StaticArgumentTransformation dflags 135 rules_on = gopt Opt_EnableRewriteRules dflags 136 eta_expand_on = gopt Opt_DoLambdaEtaExpansion dflags 137 ww_on = gopt Opt_WorkerWrapper dflags 138 static_ptrs = xopt LangExt.StaticPointers dflags 139 140 maybe_rule_check phase = runMaybe rule_check (CoreDoRuleCheck phase) 141 142 maybe_strictness_before phase 143 = runWhen (phase `elem` strictnessBefore dflags) CoreDoStrictness 144 145 base_mode = SimplMode { sm_phase = panic "base_mode" 146 , sm_names = [] 147 , sm_dflags = dflags 148 , sm_rules = rules_on 149 , sm_eta_expand = eta_expand_on 150 , sm_inline = True 151 , sm_case_case = True } 152 153 simpl_phase phase names iter 154 = CoreDoPasses 155 $ [ maybe_strictness_before phase 156 , CoreDoSimplify iter 157 (base_mode { sm_phase = Phase phase 158 , sm_names = names }) 159 160 , maybe_rule_check (Phase phase) ] 161 162 simpl_phases = CoreDoPasses [ simpl_phase phase ["main"] max_iter 163 | phase <- [phases, phases-1 .. 1] ] 164 165 166 -- initial simplify: mk specialiser happy: minimum effort please 167 simpl_gently = CoreDoSimplify max_iter 168 (base_mode { sm_phase = InitialPhase 169 , sm_names = ["Gentle"] 170 , sm_rules = rules_on -- Note [RULEs enabled in SimplGently] 171 , sm_inline = True 172 -- See Note [Inline in InitialPhase] 173 , sm_case_case = False }) 174 -- Don't do case-of-case transformations. 175 -- This makes full laziness work better 176 177 strictness_pass = if ww_on 178 then [CoreDoStrictness,CoreDoWorkerWrapper] 179 else [CoreDoStrictness] 180 181 182 -- New demand analyser 183 demand_analyser = (CoreDoPasses ( 184 strictness_pass ++ 185 [simpl_phase 0 ["post-worker-wrapper"] max_iter] 186 )) 187 188 -- Static forms are moved to the top level with the FloatOut pass. 189 -- See Note [Grand plan for static forms] in StaticPtrTable. 190 static_ptrs_float_outwards = 191 runWhen static_ptrs $ CoreDoPasses 192 [ simpl_gently -- Float Out can't handle type lets (sometimes created 193 -- by simpleOptPgm via mkParallelBindings) 194 , CoreDoFloatOutwards FloatOutSwitches 195 { floatOutLambdas = Just 0 196 , floatOutConstants = True 197 , floatOutOverSatApps = False 198 , floatToTopLevelOnly = True 199 } 200 ] 201 202 core_todo = 203 if opt_level == 0 then 204 [ static_ptrs_float_outwards, 205 CoreDoSimplify max_iter 206 (base_mode { sm_phase = Phase 0 207 , sm_names = ["Non-opt simplification"] }) 208 ] 209 210 else {- opt_level >= 1 -} [ 211 212 -- We want to do the static argument transform before full laziness as it 213 -- may expose extra opportunities to float things outwards. However, to fix 214 -- up the output of the transformation we need at do at least one simplify 215 -- after this before anything else 216 runWhen static_args (CoreDoPasses [ simpl_gently, CoreDoStaticArgs ]), 217 218 -- initial simplify: mk specialiser happy: minimum effort please 219 simpl_gently, 220 221 -- Specialisation is best done before full laziness 222 -- so that overloaded functions have all their dictionary lambdas manifest 223 runWhen do_specialise CoreDoSpecialising, 224 225 if full_laziness then 226 CoreDoFloatOutwards FloatOutSwitches { 227 floatOutLambdas = Just 0, 228 floatOutConstants = True, 229 floatOutOverSatApps = False, 230 floatToTopLevelOnly = False } 231 -- Was: gentleFloatOutSwitches 232 -- 233 -- I have no idea why, but not floating constants to 234 -- top level is very bad in some cases. 235 -- 236 -- Notably: p_ident in spectral/rewrite 237 -- Changing from "gentle" to "constantsOnly" 238 -- improved rewrite's allocation by 19%, and 239 -- made 0.0% difference to any other nofib 240 -- benchmark 241 -- 242 -- Not doing floatOutOverSatApps yet, we'll do 243 -- that later on when we've had a chance to get more 244 -- accurate arity information. In fact it makes no 245 -- difference at all to performance if we do it here, 246 -- but maybe we save some unnecessary to-and-fro in 247 -- the simplifier. 248 else 249 -- Even with full laziness turned off, we still need to float static 250 -- forms to the top level. See Note [Grand plan for static forms] in 251 -- StaticPtrTable. 252 static_ptrs_float_outwards, 253 254 simpl_phases, 255 256 -- Phase 0: allow all Ids to be inlined now 257 -- This gets foldr inlined before strictness analysis 258 259 -- At least 3 iterations because otherwise we land up with 260 -- huge dead expressions because of an infelicity in the 261 -- simplifier. 262 -- let k = BIG in foldr k z xs 263 -- ==> let k = BIG in letrec go = \xs -> ...(k x).... in go xs 264 -- ==> let k = BIG in letrec go = \xs -> ...(BIG x).... in go xs 265 -- Don't stop now! 266 simpl_phase 0 ["main"] (max max_iter 3), 267 268 runWhen do_float_in CoreDoFloatInwards, 269 -- Run float-inwards immediately before the strictness analyser 270 -- Doing so pushes bindings nearer their use site and hence makes 271 -- them more likely to be strict. These bindings might only show 272 -- up after the inlining from simplification. Example in fulsom, 273 -- Csg.calc, where an arg of timesDouble thereby becomes strict. 274 275 runWhen call_arity $ CoreDoPasses 276 [ CoreDoCallArity 277 , simpl_phase 0 ["post-call-arity"] max_iter 278 ], 279 280 runWhen strictness demand_analyser, 281 282 runWhen exitification CoreDoExitify, 283 -- See note [Placement of the exitification pass] 284 285 runWhen full_laziness $ 286 CoreDoFloatOutwards FloatOutSwitches { 287 floatOutLambdas = floatLamArgs dflags, 288 floatOutConstants = True, 289 floatOutOverSatApps = True, 290 floatToTopLevelOnly = False }, 291 -- nofib/spectral/hartel/wang doubles in speed if you 292 -- do full laziness late in the day. It only happens 293 -- after fusion and other stuff, so the early pass doesn't 294 -- catch it. For the record, the redex is 295 -- f_el22 (f_el21 r_midblock) 296 297 298 runWhen cse CoreCSE, 299 -- We want CSE to follow the final full-laziness pass, because it may 300 -- succeed in commoning up things floated out by full laziness. 301 -- CSE used to rely on the no-shadowing invariant, but it doesn't any more 302 303 runWhen do_float_in CoreDoFloatInwards, 304 305 maybe_rule_check (Phase 0), 306 307 -- Case-liberation for -O2. This should be after 308 -- strictness analysis and the simplification which follows it. 309 runWhen liberate_case (CoreDoPasses [ 310 CoreLiberateCase, 311 simpl_phase 0 ["post-liberate-case"] max_iter 312 ]), -- Run the simplifier after LiberateCase to vastly 313 -- reduce the possibility of shadowing 314 -- Reason: see Note [Shadowing] in SpecConstr.hs 315 316 runWhen spec_constr CoreDoSpecConstr, 317 318 maybe_rule_check (Phase 0), 319 320 runWhen late_specialise 321 (CoreDoPasses [ CoreDoSpecialising 322 , simpl_phase 0 ["post-late-spec"] max_iter]), 323 324 -- LiberateCase can yield new CSE opportunities because it peels 325 -- off one layer of a recursive function (concretely, I saw this 326 -- in wheel-sieve1), and I'm guessing that SpecConstr can too 327 -- And CSE is a very cheap pass. So it seems worth doing here. 328 runWhen ((liberate_case || spec_constr) && cse) CoreCSE, 329 330 -- Final clean-up simplification: 331 simpl_phase 0 ["final"] max_iter, 332 333 runWhen late_dmd_anal $ CoreDoPasses ( 334 strictness_pass ++ 335 [simpl_phase 0 ["post-late-ww"] max_iter] 336 ), 337 338 -- Final run of the demand_analyser, ensures that one-shot thunks are 339 -- really really one-shot thunks. Only needed if the demand analyser 340 -- has run at all. See Note [Final Demand Analyser run] in DmdAnal 341 -- It is EXTREMELY IMPORTANT to run this pass, otherwise execution 342 -- can become /exponentially/ more expensive. See #11731, #12996. 343 runWhen (strictness || late_dmd_anal) CoreDoStrictness, 344 345 maybe_rule_check (Phase 0) 346 ] 347 348 -- Remove 'CoreDoNothing' and flatten 'CoreDoPasses' for clarity. 349 flatten_todos [] = [] 350 flatten_todos (CoreDoNothing : rest) = flatten_todos rest 351 flatten_todos (CoreDoPasses passes : rest) = 352 flatten_todos passes ++ flatten_todos rest 353 flatten_todos (todo : rest) = todo : flatten_todos rest 354 355{- Note [Inline in InitialPhase] 356~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 357In GHC 8 and earlier we did not inline anything in the InitialPhase. But that is 358confusing for users because when they say INLINE they expect the function to inline 359right away. 360 361So now we do inlining immediately, even in the InitialPhase, assuming that the 362Id's Activation allows it. 363 364This is a surprisingly big deal. Compiler performance improved a lot 365when I made this change: 366 367 perf/compiler/T5837.run T5837 [stat too good] (normal) 368 perf/compiler/parsing001.run parsing001 [stat too good] (normal) 369 perf/compiler/T12234.run T12234 [stat too good] (optasm) 370 perf/compiler/T9020.run T9020 [stat too good] (optasm) 371 perf/compiler/T3064.run T3064 [stat too good] (normal) 372 perf/compiler/T9961.run T9961 [stat too good] (normal) 373 perf/compiler/T13056.run T13056 [stat too good] (optasm) 374 perf/compiler/T9872d.run T9872d [stat too good] (normal) 375 perf/compiler/T783.run T783 [stat too good] (normal) 376 perf/compiler/T12227.run T12227 [stat too good] (normal) 377 perf/should_run/lazy-bs-alloc.run lazy-bs-alloc [stat too good] (normal) 378 perf/compiler/T1969.run T1969 [stat too good] (normal) 379 perf/compiler/T9872a.run T9872a [stat too good] (normal) 380 perf/compiler/T9872c.run T9872c [stat too good] (normal) 381 perf/compiler/T9872b.run T9872b [stat too good] (normal) 382 perf/compiler/T9872d.run T9872d [stat too good] (normal) 383 384Note [RULEs enabled in SimplGently] 385~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 386RULES are enabled when doing "gentle" simplification. Two reasons: 387 388 * We really want the class-op cancellation to happen: 389 op (df d1 d2) --> $cop3 d1 d2 390 because this breaks the mutual recursion between 'op' and 'df' 391 392 * I wanted the RULE 393 lift String ===> ... 394 to work in Template Haskell when simplifying 395 splices, so we get simpler code for literal strings 396 397But watch out: list fusion can prevent floating. So use phase control 398to switch off those rules until after floating. 399 400************************************************************************ 401* * 402 The CoreToDo interpreter 403* * 404************************************************************************ 405-} 406 407runCorePasses :: [CoreToDo] -> ModGuts -> CoreM ModGuts 408runCorePasses passes guts 409 = foldM do_pass guts passes 410 where 411 do_pass guts CoreDoNothing = return guts 412 do_pass guts (CoreDoPasses ps) = runCorePasses ps guts 413 do_pass guts pass = do 414 withTimingD (ppr pass <+> brackets (ppr mod)) 415 (const ()) $ do 416 { guts' <- lintAnnots (ppr pass) (doCorePass pass) guts 417 ; endPass pass (mg_binds guts') (mg_rules guts') 418 ; return guts' } 419 420 mod = mg_module guts 421 422doCorePass :: CoreToDo -> ModGuts -> CoreM ModGuts 423doCorePass pass@(CoreDoSimplify {}) = {-# SCC "Simplify" #-} 424 simplifyPgm pass 425 426doCorePass CoreCSE = {-# SCC "CommonSubExpr" #-} 427 doPass cseProgram 428 429doCorePass CoreLiberateCase = {-# SCC "LiberateCase" #-} 430 doPassD liberateCase 431 432doCorePass CoreDoFloatInwards = {-# SCC "FloatInwards" #-} 433 floatInwards 434 435doCorePass (CoreDoFloatOutwards f) = {-# SCC "FloatOutwards" #-} 436 doPassDUM (floatOutwards f) 437 438doCorePass CoreDoStaticArgs = {-# SCC "StaticArgs" #-} 439 doPassU doStaticArgs 440 441doCorePass CoreDoCallArity = {-# SCC "CallArity" #-} 442 doPassD callArityAnalProgram 443 444doCorePass CoreDoExitify = {-# SCC "Exitify" #-} 445 doPass exitifyProgram 446 447doCorePass CoreDoStrictness = {-# SCC "NewStranal" #-} 448 doPassDFM dmdAnalProgram 449 450doCorePass CoreDoWorkerWrapper = {-# SCC "WorkWrap" #-} 451 doPassDFU wwTopBinds 452 453doCorePass CoreDoSpecialising = {-# SCC "Specialise" #-} 454 specProgram 455 456doCorePass CoreDoSpecConstr = {-# SCC "SpecConstr" #-} 457 specConstrProgram 458 459doCorePass CoreDoPrintCore = observe printCore 460doCorePass (CoreDoRuleCheck phase pat) = ruleCheckPass phase pat 461doCorePass CoreDoNothing = return 462doCorePass (CoreDoPasses passes) = runCorePasses passes 463 464doCorePass (CoreDoPluginPass _ pass) = {-# SCC "Plugin" #-} pass 465 466doCorePass pass@CoreDesugar = pprPanic "doCorePass" (ppr pass) 467doCorePass pass@CoreDesugarOpt = pprPanic "doCorePass" (ppr pass) 468doCorePass pass@CoreTidy = pprPanic "doCorePass" (ppr pass) 469doCorePass pass@CorePrep = pprPanic "doCorePass" (ppr pass) 470doCorePass pass@CoreOccurAnal = pprPanic "doCorePass" (ppr pass) 471 472{- 473************************************************************************ 474* * 475\subsection{Core pass combinators} 476* * 477************************************************************************ 478-} 479 480printCore :: DynFlags -> CoreProgram -> IO () 481printCore dflags binds 482 = Err.dumpIfSet dflags True "Print Core" (pprCoreBindings binds) 483 484ruleCheckPass :: CompilerPhase -> String -> ModGuts -> CoreM ModGuts 485ruleCheckPass current_phase pat guts = 486 withTimingD (text "RuleCheck"<+>brackets (ppr $ mg_module guts)) 487 (const ()) $ do 488 { rb <- getRuleBase 489 ; dflags <- getDynFlags 490 ; vis_orphs <- getVisibleOrphanMods 491 ; let rule_fn fn = getRules (RuleEnv rb vis_orphs) fn 492 ++ (mg_rules guts) 493 ; liftIO $ putLogMsg dflags NoReason Err.SevDump noSrcSpan 494 (defaultDumpStyle dflags) 495 (ruleCheckProgram current_phase pat 496 rule_fn (mg_binds guts)) 497 ; return guts } 498 499doPassDUM :: (DynFlags -> UniqSupply -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts 500doPassDUM do_pass = doPassM $ \binds -> do 501 dflags <- getDynFlags 502 us <- getUniqueSupplyM 503 liftIO $ do_pass dflags us binds 504 505doPassDM :: (DynFlags -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts 506doPassDM do_pass = doPassDUM (\dflags -> const (do_pass dflags)) 507 508doPassD :: (DynFlags -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts 509doPassD do_pass = doPassDM (\dflags -> return . do_pass dflags) 510 511doPassDU :: (DynFlags -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts 512doPassDU do_pass = doPassDUM (\dflags us -> return . do_pass dflags us) 513 514doPassU :: (UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts 515doPassU do_pass = doPassDU (const do_pass) 516 517doPassDFM :: (DynFlags -> FamInstEnvs -> CoreProgram -> IO CoreProgram) -> ModGuts -> CoreM ModGuts 518doPassDFM do_pass guts = do 519 dflags <- getDynFlags 520 p_fam_env <- getPackageFamInstEnv 521 let fam_envs = (p_fam_env, mg_fam_inst_env guts) 522 doPassM (liftIO . do_pass dflags fam_envs) guts 523 524doPassDFU :: (DynFlags -> FamInstEnvs -> UniqSupply -> CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts 525doPassDFU do_pass guts = do 526 dflags <- getDynFlags 527 us <- getUniqueSupplyM 528 p_fam_env <- getPackageFamInstEnv 529 let fam_envs = (p_fam_env, mg_fam_inst_env guts) 530 doPass (do_pass dflags fam_envs us) guts 531 532-- Most passes return no stats and don't change rules: these combinators 533-- let us lift them to the full blown ModGuts+CoreM world 534doPassM :: Monad m => (CoreProgram -> m CoreProgram) -> ModGuts -> m ModGuts 535doPassM bind_f guts = do 536 binds' <- bind_f (mg_binds guts) 537 return (guts { mg_binds = binds' }) 538 539doPass :: (CoreProgram -> CoreProgram) -> ModGuts -> CoreM ModGuts 540doPass bind_f guts = return $ guts { mg_binds = bind_f (mg_binds guts) } 541 542-- Observer passes just peek; don't modify the bindings at all 543observe :: (DynFlags -> CoreProgram -> IO a) -> ModGuts -> CoreM ModGuts 544observe do_pass = doPassM $ \binds -> do 545 dflags <- getDynFlags 546 _ <- liftIO $ do_pass dflags binds 547 return binds 548 549{- 550************************************************************************ 551* * 552 Gentle simplification 553* * 554************************************************************************ 555-} 556 557simplifyExpr :: DynFlags -- includes spec of what core-to-core passes to do 558 -> CoreExpr 559 -> IO CoreExpr 560-- simplifyExpr is called by the driver to simplify an 561-- expression typed in at the interactive prompt 562-- 563-- Also used by Template Haskell 564simplifyExpr dflags expr 565 = withTiming dflags (text "Simplify [expr]") (const ()) $ 566 do { 567 ; us <- mkSplitUniqSupply 's' 568 569 ; let sz = exprSize expr 570 571 ; (expr', counts) <- initSmpl dflags emptyRuleEnv 572 emptyFamInstEnvs us sz 573 (simplExprGently (simplEnvForGHCi dflags) expr) 574 575 ; Err.dumpIfSet dflags (dopt Opt_D_dump_simpl_stats dflags) 576 "Simplifier statistics" (pprSimplCount counts) 577 578 ; Err.dumpIfSet_dyn dflags Opt_D_dump_simpl "Simplified expression" 579 (pprCoreExpr expr') 580 581 ; return expr' 582 } 583 584simplExprGently :: SimplEnv -> CoreExpr -> SimplM CoreExpr 585-- Simplifies an expression 586-- does occurrence analysis, then simplification 587-- and repeats (twice currently) because one pass 588-- alone leaves tons of crud. 589-- Used (a) for user expressions typed in at the interactive prompt 590-- (b) the LHS and RHS of a RULE 591-- (c) Template Haskell splices 592-- 593-- The name 'Gently' suggests that the SimplMode is SimplGently, 594-- and in fact that is so.... but the 'Gently' in simplExprGently doesn't 595-- enforce that; it just simplifies the expression twice 596 597-- It's important that simplExprGently does eta reduction; see 598-- Note [Simplifying the left-hand side of a RULE] above. The 599-- simplifier does indeed do eta reduction (it's in Simplify.completeLam) 600-- but only if -O is on. 601 602simplExprGently env expr = do 603 expr1 <- simplExpr env (occurAnalyseExpr expr) 604 simplExpr env (occurAnalyseExpr expr1) 605 606{- 607************************************************************************ 608* * 609\subsection{The driver for the simplifier} 610* * 611************************************************************************ 612-} 613 614simplifyPgm :: CoreToDo -> ModGuts -> CoreM ModGuts 615simplifyPgm pass guts 616 = do { hsc_env <- getHscEnv 617 ; us <- getUniqueSupplyM 618 ; rb <- getRuleBase 619 ; liftIOWithCount $ 620 simplifyPgmIO pass hsc_env us rb guts } 621 622simplifyPgmIO :: CoreToDo 623 -> HscEnv 624 -> UniqSupply 625 -> RuleBase 626 -> ModGuts 627 -> IO (SimplCount, ModGuts) -- New bindings 628 629simplifyPgmIO pass@(CoreDoSimplify max_iterations mode) 630 hsc_env us hpt_rule_base 631 guts@(ModGuts { mg_module = this_mod 632 , mg_rdr_env = rdr_env 633 , mg_deps = deps 634 , mg_binds = binds, mg_rules = rules 635 , mg_fam_inst_env = fam_inst_env }) 636 = do { (termination_msg, it_count, counts_out, guts') 637 <- do_iteration us 1 [] binds rules 638 639 ; Err.dumpIfSet dflags (dopt Opt_D_verbose_core2core dflags && 640 dopt Opt_D_dump_simpl_stats dflags) 641 "Simplifier statistics for following pass" 642 (vcat [text termination_msg <+> text "after" <+> ppr it_count 643 <+> text "iterations", 644 blankLine, 645 pprSimplCount counts_out]) 646 647 ; return (counts_out, guts') 648 } 649 where 650 dflags = hsc_dflags hsc_env 651 print_unqual = mkPrintUnqualified dflags rdr_env 652 simpl_env = mkSimplEnv mode 653 active_rule = activeRule mode 654 active_unf = activeUnfolding mode 655 656 do_iteration :: UniqSupply 657 -> Int -- Counts iterations 658 -> [SimplCount] -- Counts from earlier iterations, reversed 659 -> CoreProgram -- Bindings in 660 -> [CoreRule] -- and orphan rules 661 -> IO (String, Int, SimplCount, ModGuts) 662 663 do_iteration us iteration_no counts_so_far binds rules 664 -- iteration_no is the number of the iteration we are 665 -- about to begin, with '1' for the first 666 | iteration_no > max_iterations -- Stop if we've run out of iterations 667 = WARN( debugIsOn && (max_iterations > 2) 668 , hang (text "Simplifier bailing out after" <+> int max_iterations 669 <+> text "iterations" 670 <+> (brackets $ hsep $ punctuate comma $ 671 map (int . simplCountN) (reverse counts_so_far))) 672 2 (text "Size =" <+> ppr (coreBindsStats binds))) 673 674 -- Subtract 1 from iteration_no to get the 675 -- number of iterations we actually completed 676 return ( "Simplifier baled out", iteration_no - 1 677 , totalise counts_so_far 678 , guts { mg_binds = binds, mg_rules = rules } ) 679 680 -- Try and force thunks off the binds; significantly reduces 681 -- space usage, especially with -O. JRS, 000620. 682 | let sz = coreBindsSize binds 683 , () <- sz `seq` () -- Force it 684 = do { 685 -- Occurrence analysis 686 let { tagged_binds = {-# SCC "OccAnal" #-} 687 occurAnalysePgm this_mod active_unf active_rule rules 688 binds 689 } ; 690 Err.dumpIfSet_dyn dflags Opt_D_dump_occur_anal "Occurrence analysis" 691 (pprCoreBindings tagged_binds); 692 693 -- Get any new rules, and extend the rule base 694 -- See Note [Overall plumbing for rules] in Rules.hs 695 -- We need to do this regularly, because simplification can 696 -- poke on IdInfo thunks, which in turn brings in new rules 697 -- behind the scenes. Otherwise there's a danger we'll simply 698 -- miss the rules for Ids hidden inside imported inlinings 699 eps <- hscEPS hsc_env ; 700 let { rule_base1 = unionRuleBase hpt_rule_base (eps_rule_base eps) 701 ; rule_base2 = extendRuleBaseList rule_base1 rules 702 ; fam_envs = (eps_fam_inst_env eps, fam_inst_env) 703 ; vis_orphs = this_mod : dep_orphs deps } ; 704 705 -- Simplify the program 706 ((binds1, rules1), counts1) <- 707 initSmpl dflags (mkRuleEnv rule_base2 vis_orphs) fam_envs us1 sz $ 708 do { (floats, env1) <- {-# SCC "SimplTopBinds" #-} 709 simplTopBinds simpl_env tagged_binds 710 711 -- Apply the substitution to rules defined in this module 712 -- for imported Ids. Eg RULE map my_f = blah 713 -- If we have a substitution my_f :-> other_f, we'd better 714 -- apply it to the rule to, or it'll never match 715 ; rules1 <- simplRules env1 Nothing rules Nothing 716 717 ; return (getTopFloatBinds floats, rules1) } ; 718 719 -- Stop if nothing happened; don't dump output 720 -- See Note [Which transformations are innocuous] in CoreMonad 721 if isZeroSimplCount counts1 then 722 return ( "Simplifier reached fixed point", iteration_no 723 , totalise (counts1 : counts_so_far) -- Include "free" ticks 724 , guts { mg_binds = binds1, mg_rules = rules1 } ) 725 else do { 726 -- Short out indirections 727 -- We do this *after* at least one run of the simplifier 728 -- because indirection-shorting uses the export flag on *occurrences* 729 -- and that isn't guaranteed to be ok until after the first run propagates 730 -- stuff from the binding site to its occurrences 731 -- 732 -- ToDo: alas, this means that indirection-shorting does not happen at all 733 -- if the simplifier does nothing (not common, I know, but unsavoury) 734 let { binds2 = {-# SCC "ZapInd" #-} shortOutIndirections binds1 } ; 735 736 -- Dump the result of this iteration 737 dump_end_iteration dflags print_unqual iteration_no counts1 binds2 rules1 ; 738 lintPassResult hsc_env pass binds2 ; 739 740 -- Loop 741 do_iteration us2 (iteration_no + 1) (counts1:counts_so_far) binds2 rules1 742 } } 743 | otherwise = panic "do_iteration" 744 where 745 (us1, us2) = splitUniqSupply us 746 747 -- Remember the counts_so_far are reversed 748 totalise :: [SimplCount] -> SimplCount 749 totalise = foldr (\c acc -> acc `plusSimplCount` c) 750 (zeroSimplCount dflags) 751 752simplifyPgmIO _ _ _ _ _ = panic "simplifyPgmIO" 753 754------------------- 755dump_end_iteration :: DynFlags -> PrintUnqualified -> Int 756 -> SimplCount -> CoreProgram -> [CoreRule] -> IO () 757dump_end_iteration dflags print_unqual iteration_no counts binds rules 758 = dumpPassResult dflags print_unqual mb_flag hdr pp_counts binds rules 759 where 760 mb_flag | dopt Opt_D_dump_simpl_iterations dflags = Just Opt_D_dump_simpl_iterations 761 | otherwise = Nothing 762 -- Show details if Opt_D_dump_simpl_iterations is on 763 764 hdr = text "Simplifier iteration=" <> int iteration_no 765 pp_counts = vcat [ text "---- Simplifier counts for" <+> hdr 766 , pprSimplCount counts 767 , text "---- End of simplifier counts for" <+> hdr ] 768 769{- 770************************************************************************ 771* * 772 Shorting out indirections 773* * 774************************************************************************ 775 776If we have this: 777 778 x_local = <expression> 779 ...bindings... 780 x_exported = x_local 781 782where x_exported is exported, and x_local is not, then we replace it with this: 783 784 x_exported = <expression> 785 x_local = x_exported 786 ...bindings... 787 788Without this we never get rid of the x_exported = x_local thing. This 789save a gratuitous jump (from \tr{x_exported} to \tr{x_local}), and 790makes strictness information propagate better. This used to happen in 791the final phase, but it's tidier to do it here. 792 793Note [Messing up the exported Id's RULES] 794~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 795We must be careful about discarding (obviously) or even merging the 796RULES on the exported Id. The example that went bad on me at one stage 797was this one: 798 799 iterate :: (a -> a) -> a -> [a] 800 [Exported] 801 iterate = iterateList 802 803 iterateFB c f x = x `c` iterateFB c f (f x) 804 iterateList f x = x : iterateList f (f x) 805 [Not exported] 806 807 {-# RULES 808 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x) 809 "iterateFB" iterateFB (:) = iterateList 810 #-} 811 812This got shorted out to: 813 814 iterateList :: (a -> a) -> a -> [a] 815 iterateList = iterate 816 817 iterateFB c f x = x `c` iterateFB c f (f x) 818 iterate f x = x : iterate f (f x) 819 820 {-# RULES 821 "iterate" forall f x. iterate f x = build (\c _n -> iterateFB c f x) 822 "iterateFB" iterateFB (:) = iterate 823 #-} 824 825And now we get an infinite loop in the rule system 826 iterate f x -> build (\cn -> iterateFB c f x) 827 -> iterateFB (:) f x 828 -> iterate f x 829 830Old "solution": 831 use rule switching-off pragmas to get rid 832 of iterateList in the first place 833 834But in principle the user *might* want rules that only apply to the Id 835he says. And inline pragmas are similar 836 {-# NOINLINE f #-} 837 f = local 838 local = <stuff> 839Then we do not want to get rid of the NOINLINE. 840 841Hence hasShortableIdinfo. 842 843 844Note [Rules and indirection-zapping] 845~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 846Problem: what if x_exported has a RULE that mentions something in ...bindings...? 847Then the things mentioned can be out of scope! Solution 848 a) Make sure that in this pass the usage-info from x_exported is 849 available for ...bindings... 850 b) If there are any such RULES, rec-ify the entire top-level. 851 It'll get sorted out next time round 852 853Other remarks 854~~~~~~~~~~~~~ 855If more than one exported thing is equal to a local thing (i.e., the 856local thing really is shared), then we do one only: 857\begin{verbatim} 858 x_local = .... 859 x_exported1 = x_local 860 x_exported2 = x_local 861==> 862 x_exported1 = .... 863 864 x_exported2 = x_exported1 865\end{verbatim} 866 867We rely on prior eta reduction to simplify things like 868\begin{verbatim} 869 x_exported = /\ tyvars -> x_local tyvars 870==> 871 x_exported = x_local 872\end{verbatim} 873Hence,there's a possibility of leaving unchanged something like this: 874\begin{verbatim} 875 x_local = .... 876 x_exported1 = x_local Int 877\end{verbatim} 878By the time we've thrown away the types in STG land this 879could be eliminated. But I don't think it's very common 880and it's dangerous to do this fiddling in STG land 881because we might elminate a binding that's mentioned in the 882unfolding for something. 883 884Note [Indirection zapping and ticks] 885~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 886Unfortunately this is another place where we need a special case for 887ticks. The following happens quite regularly: 888 889 x_local = <expression> 890 x_exported = tick<x> x_local 891 892Which we want to become: 893 894 x_exported = tick<x> <expression> 895 896As it makes no sense to keep the tick and the expression on separate 897bindings. Note however that that this might increase the ticks scoping 898over the execution of x_local, so we can only do this for floatable 899ticks. More often than not, other references will be unfoldings of 900x_exported, and therefore carry the tick anyway. 901-} 902 903type IndEnv = IdEnv (Id, [Tickish Var]) -- Maps local_id -> exported_id, ticks 904 905shortOutIndirections :: CoreProgram -> CoreProgram 906shortOutIndirections binds 907 | isEmptyVarEnv ind_env = binds 908 | no_need_to_flatten = binds' -- See Note [Rules and indirect-zapping] 909 | otherwise = [Rec (flattenBinds binds')] -- for this no_need_to_flatten stuff 910 where 911 ind_env = makeIndEnv binds 912 -- These exported Ids are the subjects of the indirection-elimination 913 exp_ids = map fst $ nonDetEltsUFM ind_env 914 -- It's OK to use nonDetEltsUFM here because we forget the ordering 915 -- by immediately converting to a set or check if all the elements 916 -- satisfy a predicate. 917 exp_id_set = mkVarSet exp_ids 918 no_need_to_flatten = all (null . ruleInfoRules . idSpecialisation) exp_ids 919 binds' = concatMap zap binds 920 921 zap (NonRec bndr rhs) = [NonRec b r | (b,r) <- zapPair (bndr,rhs)] 922 zap (Rec pairs) = [Rec (concatMap zapPair pairs)] 923 924 zapPair (bndr, rhs) 925 | bndr `elemVarSet` exp_id_set 926 = [] -- Kill the exported-id binding 927 928 | Just (exp_id, ticks) <- lookupVarEnv ind_env bndr 929 , (exp_id', lcl_id') <- transferIdInfo exp_id bndr 930 = -- Turn a local-id binding into two bindings 931 -- exp_id = rhs; lcl_id = exp_id 932 [ (exp_id', mkTicks ticks rhs), 933 (lcl_id', Var exp_id') ] 934 935 | otherwise 936 = [(bndr,rhs)] 937 938makeIndEnv :: [CoreBind] -> IndEnv 939makeIndEnv binds 940 = foldl' add_bind emptyVarEnv binds 941 where 942 add_bind :: IndEnv -> CoreBind -> IndEnv 943 add_bind env (NonRec exported_id rhs) = add_pair env (exported_id, rhs) 944 add_bind env (Rec pairs) = foldl' add_pair env pairs 945 946 add_pair :: IndEnv -> (Id,CoreExpr) -> IndEnv 947 add_pair env (exported_id, exported) 948 | (ticks, Var local_id) <- stripTicksTop tickishFloatable exported 949 , shortMeOut env exported_id local_id 950 = extendVarEnv env local_id (exported_id, ticks) 951 add_pair env _ = env 952 953----------------- 954shortMeOut :: IndEnv -> Id -> Id -> Bool 955shortMeOut ind_env exported_id local_id 956-- The if-then-else stuff is just so I can get a pprTrace to see 957-- how often I don't get shorting out because of IdInfo stuff 958 = if isExportedId exported_id && -- Only if this is exported 959 960 isLocalId local_id && -- Only if this one is defined in this 961 -- module, so that we *can* change its 962 -- binding to be the exported thing! 963 964 not (isExportedId local_id) && -- Only if this one is not itself exported, 965 -- since the transformation will nuke it 966 967 not (local_id `elemVarEnv` ind_env) -- Only if not already substituted for 968 then 969 if hasShortableIdInfo exported_id 970 then True -- See Note [Messing up the exported Id's IdInfo] 971 else WARN( True, text "Not shorting out:" <+> ppr exported_id ) 972 False 973 else 974 False 975 976----------------- 977hasShortableIdInfo :: Id -> Bool 978-- True if there is no user-attached IdInfo on exported_id, 979-- so we can safely discard it 980-- See Note [Messing up the exported Id's IdInfo] 981hasShortableIdInfo id 982 = isEmptyRuleInfo (ruleInfo info) 983 && isDefaultInlinePragma (inlinePragInfo info) 984 && not (isStableUnfolding (unfoldingInfo info)) 985 where 986 info = idInfo id 987 988----------------- 989{- Note [Transferring IdInfo] 990~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 991If we have 992 lcl_id = e; exp_id = lcl_id 993 994and lcl_id has useful IdInfo, we don't want to discard it by going 995 gbl_id = e; lcl_id = gbl_id 996 997Instead, transfer IdInfo from lcl_id to exp_id, specifically 998* (Stable) unfolding 999* Strictness 1000* Rules 1001* Inline pragma 1002 1003Overwriting, rather than merging, seems to work ok. 1004 1005We also zap the InlinePragma on the lcl_id. It might originally 1006have had a NOINLINE, which we have now transferred; and we really 1007want the lcl_id to inline now that its RHS is trivial! 1008-} 1009 1010transferIdInfo :: Id -> Id -> (Id, Id) 1011-- See Note [Transferring IdInfo] 1012transferIdInfo exported_id local_id 1013 = ( modifyIdInfo transfer exported_id 1014 , local_id `setInlinePragma` defaultInlinePragma ) 1015 where 1016 local_info = idInfo local_id 1017 transfer exp_info = exp_info `setStrictnessInfo` strictnessInfo local_info 1018 `setUnfoldingInfo` unfoldingInfo local_info 1019 `setInlinePragInfo` inlinePragInfo local_info 1020 `setRuleInfo` addRuleInfo (ruleInfo exp_info) new_info 1021 new_info = setRuleInfoHead (idName exported_id) 1022 (ruleInfo local_info) 1023 -- Remember to set the function-name field of the 1024 -- rules as we transfer them from one function to another 1025