1Module for producing GLR (Tomita) parsing code. 2This module is designed as an extension to the Haskell parser generator Happy. 3 4(c) University of Durham, Ben Medlock 2001 5 -- initial code, for structure parsing 6(c) University of Durham, Paul Callaghan 2004 7 -- extension to semantic rules, and various optimisations 8%----------------------------------------------------------------------------- 9 10> module ProduceGLRCode ( produceGLRParser 11> , DecodeOption(..) 12> , FilterOption(..) 13> , GhcExts(..) 14> , Options 15> ) where 16 17> import Paths_happy ( version ) 18> import GenUtils ( mapDollarDollar ) 19> import GenUtils ( str, char, nl, brack, brack', interleave, maybestr ) 20> import Grammar 21> import Data.Array 22> import Data.Char ( isSpace, isAlphaNum ) 23> import Data.List ( nub, (\\), sort, find, tails ) 24> import Data.Version ( showVersion ) 25 26%----------------------------------------------------------------------------- 27File and Function Names 28 29> base_template, lib_template :: String -> String 30> base_template td = td ++ "/GLR_Base" -- NB Happy uses / too 31> lib_template td = td ++ "/GLR_Lib" -- Windows accepts this? 32 33--- 34prefix for production names, to avoid name clashes 35 36> prefix :: String 37> prefix = "G_" 38 39%----------------------------------------------------------------------------- 40This type represents choice of decoding style for the result 41 42> data DecodeOption 43> = TreeDecode 44> | LabelDecode 45 46--- 47This type represents whether filtering done or not 48 49> data FilterOption 50> = NoFiltering 51> | UseFiltering 52 53--- 54This type represents whether GHC extensions are used or not 55 - extra values are imports and ghc options reqd 56 57> data GhcExts 58> = NoGhcExts 59> | UseGhcExts String String -- imports and options 60 61--- 62this is where the exts matter 63 64> show_st :: GhcExts -> {-State-}Int -> String 65> show_st UseGhcExts{} = (++"#") . show 66> show_st NoGhcExts = show 67 68--- 69 70> type DebugMode = Bool 71> type Options = (DecodeOption, FilterOption, GhcExts) 72 73 74%----------------------------------------------------------------------------- 75Main exported function 76 77> produceGLRParser 78> :: FilePath -- Output file name 79> -> String -- Templates directory 80> -> ActionTable -- LR tables 81> -> GotoTable -- LR tables 82> -> Maybe String -- Module header 83> -> Maybe String -- User-defined stuff (token DT, lexer etc.) 84> -> (DebugMode,Options) -- selecting code-gen style 85> -> Grammar -- Happy Grammar 86> -> IO () 87 88> produceGLRParser outfilename template_dir action goto header trailer options g 89> = do 90> let basename = takeWhile (/='.') outfilename 91> let tbls = (action,goto) 92> (parseName,_,_,_) <- case starts g of 93> [s] -> return s 94> s:_ -> do 95> putStrLn "GLR-Happy doesn't support multiple start points (yet)" 96> putStrLn "Defaulting to first start point." 97> return s 98> [] -> error "produceGLRParser: []" 99> mkFiles basename tbls parseName template_dir header trailer options g 100 101 102%----------------------------------------------------------------------------- 103"mkFiles" generates the files containing the Tomita parsing code. 104It produces two files - one for the data (small template), and one for 105the driver and data strs (large template). 106 107> mkFiles :: FilePath -- Root of Output file name 108> -> (ActionTable 109> ,GotoTable) -- LR tables 110> -> String -- Start parse function name 111> -> String -- Templates directory 112> -> Maybe String -- Module header 113> -> Maybe String -- User-defined stuff (token DT, lexer etc.) 114> -> (DebugMode,Options) -- selecting code-gen style 115> -> Grammar -- Happy Grammar 116> -> IO () 117> 118> mkFiles basename tables start templdir header trailer (debug,options) g 119> = do 120> let debug_ext = if debug then "-debug" else "" 121> let (ext,imps,opts) = case ghcExts_opt of 122> UseGhcExts is os -> ("-ghc", is, os) 123> _ -> ("", "", "") 124> base <- readFile (base_template templdir) 125> --writeFile (basename ++ ".si") (unlines $ map show sem_info) 126> writeFile (basename ++ "Data.hs") (content base opts $ "") 127 128> lib <- readFile (lib_template templdir ++ ext ++ debug_ext) 129> writeFile (basename ++ ".hs") (lib_content imps opts lib) 130> where 131> (_,_,ghcExts_opt) = options 132 133Extract the module name from the given module declaration, if it exists. 134 135> m_mod_decl = find isModKW . zip [0..] . tails . (' ':) =<< header 136> isModKW (_, c0:'m':'o':'d':'u':'l':'e':c1:_) = not (validIDChar c0 || validIDChar c1) 137> isModKW _ = False 138> validIDChar c = isAlphaNum c || c `elem` "_'" 139> validModNameChar c = validIDChar c || c == '.' 140> data_mod = mod_name ++ "Data" 141> mod_name = case m_mod_decl of 142> Just (_, md) -> takeWhile validModNameChar (dropWhile (not . validModNameChar) (drop 8 md)) 143 144Or use a default based upon the filename (original behaviour). 145 146> Nothing -> reverse . takeWhile (`notElem` "\\/") $ reverse basename 147 148Remove the module declaration from the header so that the remainder of 149the header can be used in the generated code. 150 151> header_sans_mod = flip (maybe header) m_mod_decl $ \ (mi, _) -> do 152> hdr <- header 153 154Extract the string that comes before the module declaration... 155 156> let (before, mod_decl) = splitAt mi hdr 157 158> let isWhereKW (c0:'w':'h':'e':'r':'e':c1:_) = not (validIDChar c0 || validIDChar c1) 159> isWhereKW _ = False 160> let where_after = dropWhile (not . isWhereKW) . tails . (++ "\n") $ mod_decl 161> let after = drop 6 . concat . take 1 $ where_after 162 163...and combine it with the string that comes after the 'where' keyword. 164 165> return $ before ++ "\n" ++ after 166 167> (sem_def, sem_info) = mkGSemType options g 168> table_text = mkTbls tables sem_info (ghcExts_opt) g 169 170> header_parts = fmap (span (\x -> take 3 (dropWhile isSpace x) == "{-#") 171> . lines) 172> header_sans_mod 173> -- Split off initial options, if they are present 174> -- Assume these options ONLY related to code which is in 175> -- parser tail or in sem. rules 176 177> content base_defs opts 178> = str ("{-# OPTIONS " ++ opts ++ " #-}") .nl 179> . str (unlines $ maybe [] fst header_parts) .nl 180> . nl 181> . str (comment "data") .nl .nl 182> . str ("module " ++ data_mod ++ " where") .nl 183 184> . nl 185> . maybestr (fmap (unlines.snd) header_parts) .nl 186> . nl 187> . str base_defs .nl 188> . nl 189 190> . let count_nls = length . filter (=='\n') 191> pre_trailer = maybe 0 count_nls header_sans_mod -- check fmt below 192> + count_nls base_defs 193> + 10 -- for the other stuff 194> post_trailer = pre_trailer + maybe 0 count_nls trailer + 4 195> in 196> str ("{-# LINE " ++ show pre_trailer ++ " " 197> ++ show (basename ++ "Data.hs") ++ "#-}") 198> -- This should show a location in basename.y -- but Happy 199> -- doesn't pass this info through. But we still avoid being 200> -- told a location in GLR_Base! 201> . nl 202> . nl 203> . maybestr trailer 204> .nl 205> .nl 206> . str ("{-# LINE " ++ show post_trailer ++ " " 207> ++ show (basename ++ "Data.hs") ++ "#-}") 208> . nl 209> . nl 210 211> . mkGSymbols g .nl 212> . nl 213> . sem_def .nl 214> . nl 215> . mkSemObjects options (monad_sub g) sem_info .nl 216> . nl 217> . mkDecodeUtils options (monad_sub g) sem_info .nl 218> . nl 219> . user_def_token_code (token_type g) .nl 220> . nl 221> . table_text 222 223> lib_content imps opts lib_text 224> = let (pre,_drop_me : post) = break (== "fakeimport DATA") $ lines lib_text 225> in 226> unlines [ "{-# OPTIONS " ++ opts ++ " #-}\n" 227> , comment "driver" ++ "\n" 228> , "module " ++ mod_name ++ "(" 229> , case lexer g of 230> Nothing -> "" 231> Just (lf,_) -> " " ++ lf ++ "," 232> , " " ++ start 233> , "" 234> , unlines pre 235> , imps 236> , "import " ++ data_mod 237> , start ++ " = glr_parse " 238> , "use_filtering = " ++ show use_filtering 239> , "top_symbol = " ++ prefix ++ start_prod 240> , unlines post 241> ] 242> start_prod = token_names g ! (let (_,_,i,_) = head $ starts g in i) 243> use_filtering = case options of (_, UseFiltering,_) -> True 244> _ -> False 245 246> comment :: String -> String 247> comment which 248> = "-- parser (" ++ which ++ ") produced by Happy (GLR) Version " ++ 249> showVersion version 250 251> user_def_token_code :: String -> String -> String 252> user_def_token_code tokenType 253> = str "type UserDefTok = " . str tokenType . nl 254> . str "instance TreeDecode " . brack tokenType . str " where" . nl 255> . str " decode_b f (Branch (SemTok t) []) = [happy_return t]" . nl 256> . str "instance LabelDecode " . brack tokenType . str " where" . nl 257> . str " unpack (SemTok t) = t" . nl 258 259 260%----------------------------------------------------------------------------- 261Formats the tables as code. 262 263> mkTbls :: (ActionTable -- Action table from Happy 264> ,GotoTable) -- Goto table from Happy 265> -> SemInfo -- info about production mapping 266> -> GhcExts -- Use unboxed values? 267> -> Grammar -- Happy Grammar 268> -> ShowS 269> 270> mkTbls (action,goto) sem_info exts g 271> = let gsMap = mkGSymMap g 272> semfn_map = mk_semfn_map sem_info 273> in 274> writeActionTbl action gsMap (semfn_map !) exts g 275> . writeGotoTbl goto gsMap exts 276 277 278%----------------------------------------------------------------------------- 279Create a mapping of Happy grammar symbol integers to the data representation 280that will be used for them in the GLR parser. 281 282> mkGSymMap :: Grammar -> [(Name,String)] 283> mkGSymMap g 284> = [ -- (errorTok, prefix ++ "Error") 285> ] 286> ++ [ (i, prefix ++ (token_names g) ! i) 287> | i <- user_non_terminals g ] -- Non-terminals 288> ++ [ (i, "HappyTok (" ++ mkMatch tok ++ ")") 289> | (i,tok) <- token_specs g ] -- Tokens (terminals) 290> ++ [(eof_term g,"HappyEOF")] -- EOF symbol (internal terminal) 291> where 292> mkMatch tok = case mapDollarDollar tok of 293> Nothing -> tok 294> Just fn -> fn "_" 295 296> toGSym :: [(Int, String)] -> Int -> String 297> toGSym gsMap i 298> = case lookup i gsMap of 299> Nothing -> error $ "No representation for symbol " ++ show i 300> Just g -> g 301 302 303%----------------------------------------------------------------------------- 304Take the ActionTable from Happy and turn it into a String representing a 305function that can be included as the action table in the GLR parser. 306It also shares identical reduction values as CAFs 307 308> writeActionTbl 309> :: ActionTable -> [(Int,String)] -> (Name->String) 310> -> GhcExts -> Grammar -> ShowS 311> writeActionTbl acTbl gsMap semfn_map exts g 312> = interleave "\n" 313> $ map str 314> $ mkLines ++ [errorLine] ++ mkReductions 315> where 316> name = "action" 317> mkLines = concatMap (mkState) (assocs acTbl) 318> errorLine = name ++ " _ _ = Error" 319> mkState (i,arr) 320> = filter (/="") $ map (mkLine i) (assocs arr) 321> 322> mkLine state (symInt,action) 323> | symInt == errorTok -- skip error productions 324> = "" -- NB see ProduceCode's handling of these 325> | otherwise 326> = case action of 327> LR'Fail -> "" 328> LR'MustFail -> "" 329> _ -> unwords [ startLine , mkAct action ] 330> where 331> startLine 332> = unwords [ name , show_st exts state, "(" , getTok , ") =" ] 333> getTok = let tok = toGSym gsMap symInt 334> in case mapDollarDollar tok of 335> Nothing -> tok 336> Just f -> f "_" 337> mkAct act 338> = case act of 339> LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []" 340> LR'Reduce r _ -> "Reduce " ++ "[" ++ mkRed r ++ "]" 341> LR'Accept -> "Accept" 342> LR'Multiple rs (LR'Shift st _) 343> -> "Shift " ++ show st ++ " " ++ mkReds rs 344> LR'Multiple rs r@(LR'Reduce{}) 345> -> "Reduce " ++ mkReds (r:rs) 346> _ -> error "writeActionTbl/mkAct: Unhandled case" 347> where 348> mkReds rs = "[" ++ tail (concat [ "," ++ mkRed r | LR'Reduce r _ <- rs ]) ++ "]" 349 350> mkRed r = "red_" ++ show r 351> mkReductions = [ mkRedDefn p | p@(_,(n,_,_,_)) <- zip [0..] $ productions g 352> , n `notElem` start_productions g ] 353 354> mkRedDefn (r, (lhs_id, rhs_ids, (_code,_dollar_vars), _)) 355> = mkRed r ++ " = ("++ lhs ++ "," ++ show arity ++ " :: Int," ++ sem ++")" 356> where 357> lhs = toGSym gsMap $ lhs_id 358> arity = length rhs_ids 359> sem = semfn_map r 360 361 362%----------------------------------------------------------------------------- 363Do the same with the Happy goto table. 364 365> writeGotoTbl :: GotoTable -> [(Int,String)] -> GhcExts -> ShowS 366> writeGotoTbl goTbl gsMap exts 367> = interleave "\n" (map str $ filter (not.null) mkLines) 368> . str errorLine . nl 369> where 370> name = "goto" 371> errorLine = "goto _ _ = " ++ show_st exts (negate 1) 372> mkLines = map mkState (assocs goTbl) 373> 374> mkState (i,arr) 375> = unlines $ filter (/="") $ map (mkLine i) (assocs arr) 376> 377> mkLine state (ntInt,goto) 378> = case goto of 379> NoGoto -> "" 380> Goto st -> unwords [ startLine , show_st exts st ] 381> where 382> startLine 383> = unwords [ name , show_st exts state, getGSym , "=" ] 384> getGSym = toGSym gsMap ntInt 385 386 387%----------------------------------------------------------------------------- 388Create the 'GSymbol' ADT for the symbols in the grammar 389 390> mkGSymbols :: Grammar -> ShowS 391> mkGSymbols g 392> = str dec 393> . str eof 394> . str tok 395> . interleave "\n" [ str " | " . str prefix . str sym . str " " 396> | sym <- syms ] 397> . str der 398> -- ++ eq_inst 399> -- ++ ord_inst 400> where 401> dec = "data GSymbol" 402> eof = " = HappyEOF" 403> tok = " | HappyTok {-!Int-} (" ++ token_type g ++ ")" 404> der = " deriving (Show,Eq,Ord)" 405> syms = [ token_names g ! i | i <- user_non_terminals g ] 406 407NOTES: 408Was considering avoiding use of Eq/Ord over tokens, but this then means 409hand-coding the Eq/Ord classes since we're over-riding the usual order 410except in one case. 411 412maybe possible to form a union and do some juggling, but this isn't that 413easy, eg input type of "action". 414 415plus, issues about how token info gets into TreeDecode sem values - which 416might be tricky to arrange. 417<> eq_inst = "instance Eq GSymbol where" 418<> : " HappyTok i _ == HappyTok j _ = i == j" 419<> : [ " i == j = fromEnum i == fromEnum j" 420 421 422 423%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 424Semantic actions on rules. 425 426These are stored in a union type "GSem", and the semantic values are held 427on the branches created at the appropriate reduction. 428 429"GSem" type has one constructor per distinct type of semantic action and 430pattern of child usage. 431 432 433%----------------------------------------------------------------------------- 434Creating a type for storing semantic rules 435 - also collects information on code structure and constructor names, for 436 use in later stages. 437 438> type SemInfo 439> = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])] 440 441> mkGSemType :: Options -> Grammar -> (ShowS, SemInfo) 442> mkGSemType (TreeDecode,_,_) g 443> = (def, map snd syms) 444> where 445> mtype s = case monad_sub g of 446> Nothing -> s 447> Just (ty,_,_) -> ty ++ ' ' : brack s "" 448 449> def = str "data GSem" . nl 450> . str " = NoSem" . nl 451> . str (" | SemTok (" ++ token_type g ++ ")") . nl 452> . interleave "\n" [ str " | " . str sym . str " " 453> | sym <- map fst syms ] 454> . str "instance Show GSem where" . nl 455> . interleave "\n" [ str " show " . str c . str "{} = " . str (show c) 456> | (_,c,_,_) <- map snd syms ] 457 458> syms = [ (c_name ++ " (" ++ ty ++ ")", (rty, c_name, mask, prod_info)) 459> | (i,this@(mask,args,rty)) <- zip [0..] (nub $ map fst info) 460> -- find unique types (plus mask) 461> , let c_name = "Sem_" ++ show i 462> , let mrty = mtype rty 463> , let ty = foldr (\l r -> l ++ " -> " ++ r) mrty args 464 465> , let code_info = [ j_code | (that, j_code) <- info, this == that ] 466> , let prod_info = [ ((i,k), code, js) 467> | (k,code) <- zip [0..] (nub $ map snd code_info) 468> , let js = [ j | (j,code2) <- code_info 469> , code == code2 ] 470> ] 471> -- collect specific info about productions with this type 472> ] 473 474> info = [ ((var_mask, args, i_ty), (j,(ts_pats,code))) 475> | i <- user_non_terminals g 476> , let i_ty = typeOf i 477> , j <- lookupProdsOfName g i -- all prod numbers 478> , let (_,ts,(raw_code,dollar_vars),_) = lookupProdNo g j 479> , let var_mask = map (\x -> x - 1) vars_used 480> where vars_used = sort $ nub dollar_vars 481> , let args = [ typeOf $ ts !! v | v <- var_mask ] 482> , let code | all isSpace raw_code = "()" 483> | otherwise = raw_code 484> , let ts_pats = [ (k+1,c) | k <- var_mask 485> , (t,c) <- token_specs g 486> , ts !! k == t ] 487> ] 488 489> typeOf n | n `elem` terminals g = token_type g 490> | otherwise = case types g ! n of 491> Nothing -> "()" -- default 492> Just t -> t 493 494> -- NB expects that such labels are Showable 495> mkGSemType (LabelDecode,_,_) g 496> = (def, map snd syms) 497> where 498> def = str "data GSem" . nl 499> . str " = NoSem" . nl 500> . str (" | SemTok (" ++ token_type g ++ ")") 501> . interleave "\n" [ str " | " . str sym . str " " 502> | sym <- map fst syms ] 503> . str " deriving (Show)" . nl 504 505> syms = [ (c_name ++ " (" ++ ty ++ ")", (ty, c_name, mask, prod_info)) 506> | (i,this@(mask,ty)) <- zip [0..] (nub $ map fst info) 507> -- find unique types 508> , let c_name = "Sem_" ++ show i 509> , let code_info = [ j_code | (that, j_code) <- info, this == that ] 510> , let prod_info = [ ((i,k), code, js) 511> | (k,code) <- zip [0..] (nub $ map snd code_info) 512> , let js = [ j | (j,code2) <- code_info 513> , code == code2 ] 514 515> ] 516> -- collect specific info about productions with this type 517> ] 518 519> info = [ ((var_mask,i_ty), (j,(ts_pats,code))) 520> | i <- user_non_terminals g 521> , let i_ty = typeOf i 522> , j <- lookupProdsOfName g i -- all prod numbers 523> , let (_,ts,(code,dollar_vars),_) = lookupProdNo g j 524> , let var_mask = map (\x -> x - 1) vars_used 525> where vars_used = sort $ nub dollar_vars 526> , let ts_pats = [ (k+1,c) | k <- var_mask 527> , (t,c) <- token_specs g 528> , ts !! k == t ] 529> ] 530 531> typeOf n = case types g ! n of 532> Nothing -> "()" -- default 533> Just t -> t 534 535 536%--------------------------------------- 537Creates the appropriate semantic values. 538 - for label-decode, these are the code, but abstracted over the child indices 539 - for tree-decode, these are the code abstracted over the children's values 540 541> mkSemObjects :: Options -> MonadInfo -> SemInfo -> ShowS 542> mkSemObjects (LabelDecode,filter_opt,_) _ sem_info 543> = interleave "\n" 544> $ [ str (mkSemFn_Name ij) 545> . str (" ns@(" ++ pat ++ "happy_rest) = ") 546> . str (" Branch (" ++ c_name ++ " (" ++ code ++ ")) ") 547> . str (nodes filter_opt) 548> | (_ty, c_name, mask, prod_info) <- sem_info 549> , (ij, (pats,code), _ps) <- prod_info 550> , let pat | null mask = "" 551> | otherwise = concatMap (\v -> mk_tok_binder pats (v+1) ++ ":") 552> [0..maximum mask] 553 554> , let nodes NoFiltering = "ns" 555> nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask 556> ] 557> where 558> mk_tok_binder pats v 559> = mk_binder (\s -> "(_,_,HappyTok (" ++ s ++ "))") pats v "" 560 561 562> mkSemObjects (TreeDecode,filter_opt,_) monad_info sem_info 563> = interleave "\n" 564> $ [ str (mkSemFn_Name ij) 565> . str (" ns@(" ++ pat ++ "happy_rest) = ") 566> . str (" Branch (" ++ c_name ++ " (" ++ sem ++ ")) ") 567> . str (nodes filter_opt) 568> | (_ty, c_name, mask, prod_info) <- sem_info 569> , (ij, (pats,code), _) <- prod_info 570> , let indent c = init $ unlines $ map (replicate 4 ' '++) $ lines c 571> , let mcode = case monad_info of 572> Nothing -> code 573> Just (_,_,rtn) -> case code of 574> '%':code' -> "\n" ++ indent code' 575> _ -> rtn ++ " (" ++ code ++ ")" 576> , let sem = foldr (\v t -> mk_lambda pats (v + 1) "" ++ t) mcode mask 577> , let pat | null mask = "" 578> | otherwise = concatMap (\v -> mkHappyVar (v+1) ":") 579> [0..maximum mask] 580> , let nodes NoFiltering = "ns" 581> nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask 582> ] 583 584> mk_lambda :: [(Int, String)] -> Int -> String -> String 585> mk_lambda pats v 586> = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v 587 588> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String 589> mk_binder wrap pats v 590> = case lookup v pats of 591> Nothing -> mkHappyVar v 592> Just p -> case mapDollarDollar p of 593> Nothing -> wrap . mkHappyVar v . showChar '@' . brack p 594> Just fn -> wrap . brack' (fn . mkHappyVar v) 595 596 597--- 598standardise the naming scheme 599 600> mkSemFn_Name :: (Int, Int) -> String 601> mkSemFn_Name (i,j) = "semfn_" ++ show i ++ "_" ++ show j 602 603--- 604maps production name to the underlying (possibly shared) semantic function 605 606> mk_semfn_map :: SemInfo -> Array Name String 607> mk_semfn_map sem_info 608> = array (0,maximum $ map fst prod_map) prod_map 609> where 610> prod_map = [ (p, mkSemFn_Name ij) 611> | (_,_,_,pi') <- sem_info, (ij,_,ps) <- pi', p <- ps ] 612 613 614%----------------------------------------------------------------------------- 615Create default decoding functions 616 617Idea is that sem rules are stored as functions in the AbsSyn names, and 618only unpacked when needed. Using classes here to manage the unpacking. 619 620> mkDecodeUtils :: Options -> MonadInfo -> SemInfo -> ShowS 621> mkDecodeUtils (TreeDecode,filter_opt,_) monad_info seminfo 622> = interleave "\n" 623> $ map str (monad_defs monad_info) 624> ++ map mk_inst ty_cs 625> where 626> ty_cs = [ (ty, [ (c_name, mask) 627> | (ty2, c_name, mask, _j_vs) <- seminfo 628> , ty2 == ty 629> ]) 630> | ty <- nub [ ty | (ty,_,_,_) <- seminfo ] 631> ] -- group by same type 632 633> mk_inst (ty, cs_vs) 634> = str ("instance TreeDecode (" ++ ty ++ ") where ") . nl 635> . interleave "\n" 636> [ str " " 637> . str ("decode_b f (Branch (" ++ c_name ++ " s)") 638> . str (" (" ++ var_pat ++ ")) = ") 639> . cross_prod monad_info "s" (nodes filter_opt) 640> | (c_name, vs) <- cs_vs 641> , let vars = [ "b_" ++ show n | n <- var_range filter_opt vs ] 642> , let var_pat = foldr (\l r -> l ++ ":" ++ r) "_" vars 643> , let nodes NoFiltering = [ vars !! n | n <- vs ] 644> nodes UseFiltering = vars 645> ] 646 647> var_range _ [] = [] 648> var_range NoFiltering vs = [0 .. maximum vs ] 649> var_range UseFiltering vs = [0 .. length vs - 1] 650 651> cross_prod Nothing s_var nodes 652> = cross_prod_ (char '[' . str s_var . char ']') 653> (map str nodes) 654> cross_prod (Just (_,_,rtn)) s_var nodes 655> = str "map happy_join $ " 656> . cross_prod_ (char '[' . str rtn . char ' ' . str s_var . char ']') 657> (map str nodes) 658 659> cross_prod_ = foldl (\s a -> brack' 660> $ str "cross_fn" 661> . char ' ' . s 662> . str " $ decode f " 663> . a) 664 665 666 667> mkDecodeUtils (LabelDecode,_,_) monad_info seminfo 668> = interleave "\n" 669> $ map str 670> $ monad_defs monad_info ++ concatMap (mk_inst) ty_cs 671> where 672> ty_cs = [ (ty, [ (c_name, mask) 673> | (ty2, c_name, mask, _) <- seminfo 674> , ty2 == ty 675> ]) 676> | ty <- nub [ ty | (ty,_,_,_) <- seminfo ] 677> ] -- group by same type 678 679> mk_inst (ty, cns) 680> = ("instance LabelDecode (" ++ ty ++ ") where ") 681> : [ " unpack (" ++ c_name ++ " s) = s" 682> | (c_name, _mask) <- cns ] 683 684 685--- 686This selects the info used for monadic parser generation 687 688> type MonadInfo = Maybe (String,String,String) 689> monad_sub :: Grammar -> MonadInfo 690> monad_sub g 691> = case monad g of 692> (True, _, ty,bd,ret) -> Just (ty,bd,ret) 693> _ -> Nothing 694> -- TMP: only use monad info if it was user-declared, and ignore ctxt 695> -- TMP: otherwise default to non-monadic code 696> -- TMP: (NB not sure of consequences of monads-everywhere yet) 697 698 699--- 700form the various monad-related defs. 701 702> monad_defs :: MonadInfo -> [String] 703> monad_defs Nothing 704> = [ "type Decode_Result a = a" 705> , "happy_ap = ($)" 706> , "happy_return = id"] 707> monad_defs (Just (ty,tn,rtn)) 708> = [ "happy_join x = (" ++ tn ++ ") x id" 709> , "happy_ap f a = (" ++ tn ++ ") f (\\f -> (" ++ tn ++ ") a (\\a -> " ++ rtn ++ "(f a)))" 710> , "type Decode_Result a = " ++ brack ty " a" 711> , "happy_return = " ++ rtn ++ " :: a -> Decode_Result a" 712> ] 713 714%----------------------------------------------------------------------------- 715Util Functions 716 717--- 718remove Happy-generated start symbols. 719 720> user_non_terminals :: Grammar -> [Name] 721> user_non_terminals g 722> = non_terminals g \\ start_productions g 723 724> start_productions :: Grammar -> [Name] 725> start_productions g = [ s | (_,s,_,_) <- starts g ] 726 727 728--- 729 730> mkHappyVar :: Int -> String -> String 731> mkHappyVar n = str "happy_var_" . shows n 732