1Module for producing GLR (Tomita) parsing code.
2This module is designed as an extension to the Haskell parser generator Happy.
3
4(c) University of Durham, Ben Medlock 2001
5        -- initial code, for structure parsing
6(c) University of Durham, Paul Callaghan 2004
7        -- extension to semantic rules, and various optimisations
8%-----------------------------------------------------------------------------
9
10> module ProduceGLRCode ( produceGLRParser
11>                       , DecodeOption(..)
12>                       , FilterOption(..)
13>                       , GhcExts(..)
14>                       , Options
15>                       ) where
16
17> import Paths_happy ( version )
18> import GenUtils ( mapDollarDollar )
19> import GenUtils ( str, char, nl, brack, brack', interleave, maybestr )
20> import Grammar
21> import Data.Array
22> import Data.Char ( isSpace, isAlphaNum )
23> import Data.List ( nub, (\\), sort, find, tails )
24> import Data.Version ( showVersion )
25
26%-----------------------------------------------------------------------------
27File and Function Names
28
29> base_template, lib_template :: String -> String
30> base_template td = td ++ "/GLR_Base"          -- NB Happy uses / too
31> lib_template  td = td ++ "/GLR_Lib"           -- Windows accepts this?
32
33---
34prefix for production names, to avoid name clashes
35
36> prefix :: String
37> prefix = "G_"
38
39%-----------------------------------------------------------------------------
40This type represents choice of decoding style for the result
41
42> data DecodeOption
43>  = TreeDecode
44>  | LabelDecode
45
46---
47This type represents whether filtering done or not
48
49> data FilterOption
50>  = NoFiltering
51>  | UseFiltering
52
53---
54This type represents whether GHC extensions are used or not
55 - extra values are imports and ghc options reqd
56
57> data GhcExts
58>  = NoGhcExts
59>  | UseGhcExts String String           -- imports and options
60
61---
62this is where the exts matter
63
64> show_st :: GhcExts -> {-State-}Int -> String
65> show_st UseGhcExts{} = (++"#") . show
66> show_st NoGhcExts    = show
67
68---
69
70> type DebugMode = Bool
71> type Options = (DecodeOption, FilterOption, GhcExts)
72
73
74%-----------------------------------------------------------------------------
75Main exported function
76
77> produceGLRParser
78>        :: FilePath      -- Output file name
79>        -> String        -- Templates directory
80>        -> ActionTable   -- LR tables
81>        -> GotoTable     -- LR tables
82>        -> Maybe String  -- Module header
83>        -> Maybe String  -- User-defined stuff (token DT, lexer etc.)
84>        -> (DebugMode,Options)       -- selecting code-gen style
85>        -> Grammar       -- Happy Grammar
86>        -> IO ()
87
88> produceGLRParser outfilename template_dir action goto header trailer options g
89>  = do
90>     let basename  = takeWhile (/='.') outfilename
91>     let tbls  = (action,goto)
92>     (parseName,_,_,_) <- case starts g of
93>                          [s] -> return s
94>                          s:_ -> do
95>                                    putStrLn "GLR-Happy doesn't support multiple start points (yet)"
96>                                    putStrLn "Defaulting to first start point."
97>                                    return s
98>                          [] -> error "produceGLRParser: []"
99>     mkFiles basename tbls parseName template_dir header trailer options g
100
101
102%-----------------------------------------------------------------------------
103"mkFiles" generates the files containing the Tomita parsing code.
104It produces two files - one for the data (small template), and one for
105the driver and data strs (large template).
106
107> mkFiles :: FilePath     -- Root of Output file name
108>        -> (ActionTable
109>           ,GotoTable)   -- LR tables
110>        -> String        -- Start parse function name
111>        -> String        -- Templates directory
112>        -> Maybe String  -- Module header
113>        -> Maybe String  -- User-defined stuff (token DT, lexer etc.)
114>        -> (DebugMode,Options)       -- selecting code-gen style
115>        -> Grammar       -- Happy Grammar
116>        -> IO ()
117>
118> mkFiles basename tables start templdir header trailer (debug,options) g
119>  = do
120>       let debug_ext = if debug then "-debug" else ""
121>       let (ext,imps,opts) = case ghcExts_opt of
122>                               UseGhcExts is os -> ("-ghc", is, os)
123>                               _                -> ("", "", "")
124>       base <- readFile (base_template templdir)
125>       --writeFile (basename ++ ".si") (unlines $ map show sem_info)
126>       writeFile (basename ++ "Data.hs") (content base opts $ "")
127
128>       lib <- readFile (lib_template templdir ++ ext ++ debug_ext)
129>       writeFile (basename ++ ".hs") (lib_content imps opts lib)
130>  where
131>   (_,_,ghcExts_opt) = options
132
133Extract the module name from the given module declaration, if it exists.
134
135>   m_mod_decl = find isModKW . zip [0..] . tails . (' ':) =<< header
136>   isModKW (_, c0:'m':'o':'d':'u':'l':'e':c1:_) = not (validIDChar c0 || validIDChar c1)
137>   isModKW _                                    = False
138>   validIDChar c      = isAlphaNum c || c `elem` "_'"
139>   validModNameChar c = validIDChar c || c == '.'
140>   data_mod = mod_name ++ "Data"
141>   mod_name = case m_mod_decl of
142>     Just (_, md) -> takeWhile validModNameChar (dropWhile (not . validModNameChar) (drop 8 md))
143
144Or use a default based upon the filename (original behaviour).
145
146>     Nothing      -> reverse . takeWhile (`notElem` "\\/") $ reverse basename
147
148Remove the module declaration from the header so that the remainder of
149the header can be used in the generated code.
150
151>   header_sans_mod = flip (maybe header) m_mod_decl $ \ (mi, _) -> do
152>       hdr <- header
153
154Extract the string that comes before the module declaration...
155
156>       let (before, mod_decl) = splitAt mi hdr
157
158>       let isWhereKW (c0:'w':'h':'e':'r':'e':c1:_) = not (validIDChar c0 || validIDChar c1)
159>           isWhereKW _ = False
160>       let where_after = dropWhile (not . isWhereKW) . tails . (++ "\n") $ mod_decl
161>       let after = drop 6 . concat . take 1 $ where_after
162
163...and combine it with the string that comes after the 'where' keyword.
164
165>       return $ before ++ "\n" ++ after
166
167>   (sem_def, sem_info) = mkGSemType options g
168>   table_text = mkTbls tables sem_info (ghcExts_opt) g
169
170>   header_parts = fmap (span (\x -> take 3 (dropWhile isSpace x) == "{-#")
171>                                  . lines)
172>                       header_sans_mod
173>       -- Split off initial options, if they are present
174>       -- Assume these options ONLY related to code which is in
175>       --   parser tail or in sem. rules
176
177>   content base_defs opts
178>    = str ("{-# OPTIONS " ++ opts ++ " #-}")    .nl
179>    . str (unlines $ maybe [] fst header_parts) .nl
180>    . nl
181>    . str (comment "data")                      .nl .nl
182>    . str ("module " ++ data_mod ++ " where")   .nl
183
184>     . nl
185>     . maybestr (fmap (unlines.snd) header_parts) .nl
186>     . nl
187>     . str base_defs .nl
188>     . nl
189
190>    . let count_nls     = length . filter (=='\n')
191>          pre_trailer   = maybe 0 count_nls header_sans_mod -- check fmt below
192>                        + count_nls base_defs
193>                        + 10                           -- for the other stuff
194>          post_trailer  = pre_trailer + maybe 0 count_nls trailer + 4
195>      in
196>         str ("{-# LINE " ++ show pre_trailer ++ " "
197>                          ++ show (basename ++ "Data.hs") ++ "#-}")
198>               -- This should show a location in basename.y -- but Happy
199>               -- doesn't pass this info through. But we still avoid being
200>               -- told a location in GLR_Base!
201>       . nl
202>       . nl
203>       . maybestr trailer
204>       .nl
205>       .nl
206>       . str ("{-# LINE " ++ show post_trailer ++ " "
207>                          ++ show (basename ++ "Data.hs") ++ "#-}")
208>       . nl
209>       . nl
210
211>     . mkGSymbols g     .nl
212>     . nl
213>     . sem_def          .nl
214>     . nl
215>     . mkSemObjects  options (monad_sub g) sem_info      .nl
216>     . nl
217>     . mkDecodeUtils options (monad_sub g) sem_info      .nl
218>     . nl
219>     . user_def_token_code (token_type g)                .nl
220>     . nl
221>     . table_text
222
223>   lib_content imps opts lib_text
224>    = let (pre,_drop_me : post) = break (== "fakeimport DATA") $ lines lib_text
225>      in
226>      unlines [ "{-# OPTIONS " ++ opts ++ " #-}\n"
227>              , comment "driver" ++ "\n"
228>              , "module " ++ mod_name ++ "("
229>              , case lexer g of
230>                  Nothing     -> ""
231>                  Just (lf,_) -> "  " ++ lf ++ ","
232>              , "  " ++ start
233>              , ""
234>              , unlines pre
235>              , imps
236>              , "import " ++ data_mod
237>              , start ++ " = glr_parse "
238>              , "use_filtering = " ++ show use_filtering
239>              , "top_symbol = " ++ prefix ++ start_prod
240>              , unlines post
241>              ]
242>   start_prod = token_names g ! (let (_,_,i,_) = head $ starts g in i)
243>   use_filtering = case options of (_, UseFiltering,_) -> True
244>                                   _                   -> False
245
246> comment :: String -> String
247> comment which
248>  = "-- parser (" ++ which ++ ") produced by Happy (GLR) Version " ++
249>       showVersion version
250
251> user_def_token_code :: String -> String -> String
252> user_def_token_code tokenType
253>  = str "type UserDefTok = " . str tokenType                     . nl
254>  . str "instance TreeDecode " . brack tokenType . str " where"  . nl
255>  . str "  decode_b f (Branch (SemTok t) []) = [happy_return t]" . nl
256>  . str "instance LabelDecode " . brack tokenType . str " where" . nl
257>  . str "  unpack (SemTok t) = t"                                . nl
258
259
260%-----------------------------------------------------------------------------
261Formats the tables as code.
262
263> mkTbls :: (ActionTable        -- Action table from Happy
264>           ,GotoTable)         -- Goto table from Happy
265>        -> SemInfo             -- info about production mapping
266>        -> GhcExts             -- Use unboxed values?
267>        -> Grammar             -- Happy Grammar
268>        -> ShowS
269>
270> mkTbls (action,goto) sem_info exts g
271>  = let gsMap = mkGSymMap g
272>        semfn_map = mk_semfn_map sem_info
273>    in
274>      writeActionTbl action gsMap (semfn_map !) exts g
275>    . writeGotoTbl   goto   gsMap exts
276
277
278%-----------------------------------------------------------------------------
279Create a mapping of Happy grammar symbol integers to the data representation
280that will be used for them in the GLR parser.
281
282> mkGSymMap :: Grammar -> [(Name,String)]
283> mkGSymMap g
284>  =    [ -- (errorTok, prefix ++ "Error")
285>       ]
286>    ++ [ (i, prefix ++ (token_names g) ! i)
287>       | i <- user_non_terminals g ]   -- Non-terminals
288>    ++ [ (i, "HappyTok (" ++ mkMatch tok ++ ")")
289>       | (i,tok) <- token_specs g ]    -- Tokens (terminals)
290>    ++ [(eof_term g,"HappyEOF")]       -- EOF symbol (internal terminal)
291>  where
292>   mkMatch tok = case mapDollarDollar tok of
293>                   Nothing -> tok
294>                   Just fn -> fn "_"
295
296> toGSym :: [(Int, String)] -> Int -> String
297> toGSym gsMap i
298>  = case lookup i gsMap of
299>     Nothing -> error $ "No representation for symbol " ++ show i
300>     Just g  -> g
301
302
303%-----------------------------------------------------------------------------
304Take the ActionTable from Happy and turn it into a String representing a
305function that can be included as the action table in the GLR parser.
306It also shares identical reduction values as CAFs
307
308> writeActionTbl
309>  :: ActionTable -> [(Int,String)] -> (Name->String)
310>                                       -> GhcExts -> Grammar -> ShowS
311> writeActionTbl acTbl gsMap semfn_map exts g
312>  = interleave "\n"
313>  $ map str
314>  $ mkLines ++ [errorLine] ++ mkReductions
315>  where
316>   name      = "action"
317>   mkLines   = concatMap (mkState) (assocs acTbl)
318>   errorLine = name ++ " _ _ = Error"
319>   mkState (i,arr)
320>    = filter (/="") $ map (mkLine i) (assocs arr)
321>
322>   mkLine state (symInt,action)
323>    | symInt == errorTok       -- skip error productions
324>    = ""                       -- NB see ProduceCode's handling of these
325>    | otherwise
326>    = case action of
327>       LR'Fail     -> ""
328>       LR'MustFail -> ""
329>       _           -> unwords [ startLine , mkAct action ]
330>    where
331>     startLine
332>      = unwords [ name , show_st exts state, "(" , getTok , ") =" ]
333>     getTok = let tok = toGSym gsMap symInt
334>              in case mapDollarDollar tok of
335>                   Nothing -> tok
336>                   Just f  -> f "_"
337>   mkAct act
338>    = case act of
339>       LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []"
340>       LR'Reduce r    _ -> "Reduce " ++ "[" ++ mkRed r ++ "]"
341>       LR'Accept        -> "Accept"
342>       LR'Multiple rs (LR'Shift st _)
343>                        -> "Shift " ++ show st ++ " " ++ mkReds rs
344>       LR'Multiple rs r@(LR'Reduce{})
345>                        -> "Reduce " ++ mkReds (r:rs)
346>       _ -> error "writeActionTbl/mkAct: Unhandled case"
347>    where
348>     mkReds rs = "[" ++ tail (concat [ "," ++ mkRed r | LR'Reduce r _ <- rs ]) ++ "]"
349
350>   mkRed r = "red_" ++ show r
351>   mkReductions = [ mkRedDefn p | p@(_,(n,_,_,_)) <- zip [0..] $ productions g
352>                                , n `notElem` start_productions g ]
353
354>   mkRedDefn (r, (lhs_id, rhs_ids, (_code,_dollar_vars), _))
355>    = mkRed r ++ " = ("++ lhs ++ "," ++ show arity ++ " :: Int," ++ sem ++")"
356>      where
357>         lhs = toGSym gsMap $ lhs_id
358>         arity = length rhs_ids
359>         sem = semfn_map r
360
361
362%-----------------------------------------------------------------------------
363Do the same with the Happy goto table.
364
365> writeGotoTbl :: GotoTable -> [(Int,String)] -> GhcExts -> ShowS
366> writeGotoTbl goTbl gsMap exts
367>  = interleave "\n" (map str $ filter (not.null) mkLines)
368>  . str errorLine . nl
369>  where
370>   name    = "goto"
371>   errorLine = "goto _ _ = " ++ show_st exts (negate 1)
372>   mkLines = map mkState (assocs goTbl)
373>
374>   mkState (i,arr)
375>    = unlines $ filter (/="") $ map (mkLine i) (assocs arr)
376>
377>   mkLine state (ntInt,goto)
378>    = case goto of
379>       NoGoto  -> ""
380>       Goto st -> unwords [ startLine , show_st exts st ]
381>    where
382>     startLine
383>      = unwords [ name , show_st exts state, getGSym , "=" ]
384>     getGSym = toGSym gsMap ntInt
385
386
387%-----------------------------------------------------------------------------
388Create the 'GSymbol' ADT for the symbols in the grammar
389
390> mkGSymbols :: Grammar -> ShowS
391> mkGSymbols g
392>  = str dec
393>  . str eof
394>  . str tok
395>  . interleave "\n" [ str " | " . str prefix . str sym . str " "
396>                    | sym <- syms ]
397>  . str der
398>    -- ++ eq_inst
399>    -- ++ ord_inst
400>  where
401>   dec  = "data GSymbol"
402>   eof  = " = HappyEOF"
403>   tok  = " | HappyTok {-!Int-} (" ++ token_type g ++ ")"
404>   der  = "   deriving (Show,Eq,Ord)"
405>   syms = [ token_names g ! i | i <- user_non_terminals g ]
406
407NOTES:
408Was considering avoiding use of Eq/Ord over tokens, but this then means
409hand-coding the Eq/Ord classes since we're over-riding the usual order
410except in one case.
411
412maybe possible to form a union and do some juggling, but this isn't that
413easy, eg input type of "action".
414
415plus, issues about how token info gets into TreeDecode sem values - which
416might be tricky to arrange.
417<>   eq_inst = "instance Eq GSymbol where"
418<>           : "  HappyTok i _ == HappyTok j _ = i == j"
419<>           : [ "  i == j = fromEnum i == fromEnum j"
420
421
422
423%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
424Semantic actions on rules.
425
426These are stored in a union type "GSem", and the semantic values are held
427on the branches created at the appropriate reduction.
428
429"GSem" type has one constructor per distinct type of semantic action and
430pattern of child usage.
431
432
433%-----------------------------------------------------------------------------
434Creating a type for storing semantic rules
435 - also collects information on code structure and constructor names, for
436   use in later stages.
437
438> type SemInfo
439>  = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])]
440
441> mkGSemType :: Options -> Grammar -> (ShowS, SemInfo)
442> mkGSemType (TreeDecode,_,_) g
443>  = (def, map snd syms)
444>  where
445>   mtype s = case monad_sub g of
446>               Nothing       -> s
447>               Just (ty,_,_) -> ty ++ ' ' : brack s ""
448
449>   def  = str "data GSem" . nl
450>        . str " = NoSem"  . nl
451>        . str (" | SemTok (" ++  token_type g ++ ")") . nl
452>        . interleave "\n" [ str " | " . str sym . str " "
453>                          | sym <- map fst syms ]
454>        . str "instance Show GSem where" . nl
455>        . interleave "\n" [ str "  show " . str c . str "{} = " . str (show c)
456>                          | (_,c,_,_) <- map snd syms ]
457
458>   syms = [ (c_name ++ " (" ++ ty ++ ")", (rty, c_name, mask, prod_info))
459>          | (i,this@(mask,args,rty)) <- zip [0..] (nub $ map fst info)
460>                                               -- find unique types (plus mask)
461>          , let c_name = "Sem_" ++ show i
462>          , let mrty = mtype rty
463>          , let ty = foldr (\l r -> l ++ " -> " ++ r) mrty args
464
465>          , let code_info = [ j_code | (that, j_code) <- info, this == that ]
466>          , let prod_info = [ ((i,k), code, js)
467>                            | (k,code) <- zip [0..] (nub $ map snd code_info)
468>                            , let js = [ j | (j,code2) <- code_info
469>                                           , code == code2 ]
470>                            ]
471>            -- collect specific info about productions with this type
472>          ]
473
474>   info = [ ((var_mask, args, i_ty), (j,(ts_pats,code)))
475>          | i <- user_non_terminals g
476>          , let i_ty = typeOf i
477>          , j <- lookupProdsOfName g i  -- all prod numbers
478>          , let (_,ts,(raw_code,dollar_vars),_) = lookupProdNo g j
479>          , let var_mask = map (\x -> x - 1) vars_used
480>                           where vars_used = sort $ nub dollar_vars
481>          , let args = [ typeOf $ ts !! v | v <- var_mask ]
482>          , let code | all isSpace raw_code = "()"
483>                     | otherwise            = raw_code
484>          , let ts_pats = [ (k+1,c) | k <- var_mask
485>                                    , (t,c) <- token_specs g
486>                                    , ts !! k == t ]
487>          ]
488
489>   typeOf n | n `elem` terminals g = token_type g
490>            | otherwise            = case types g ! n of
491>                                       Nothing -> "()"         -- default
492>                                       Just t  -> t
493
494> -- NB expects that such labels are Showable
495> mkGSemType (LabelDecode,_,_) g
496>  = (def, map snd syms)
497>  where
498>   def = str "data GSem" . nl
499>       . str " = NoSem"  . nl
500>       . str (" | SemTok (" ++  token_type g ++ ")")
501>       . interleave "\n" [ str " | "  . str sym . str " "
502>                         | sym <- map fst syms ]
503>       . str "   deriving (Show)" . nl
504
505>   syms = [ (c_name ++ " (" ++ ty ++ ")", (ty, c_name, mask, prod_info))
506>          | (i,this@(mask,ty)) <- zip [0..] (nub $ map fst info)
507>                                               -- find unique types
508>          , let c_name = "Sem_" ++ show i
509>          , let code_info = [ j_code | (that, j_code) <- info, this == that ]
510>          , let prod_info = [ ((i,k), code, js)
511>                            | (k,code) <- zip [0..] (nub $ map snd code_info)
512>                            , let js = [ j | (j,code2) <- code_info
513>                                           , code == code2 ]
514
515>                            ]
516>            -- collect specific info about productions with this type
517>          ]
518
519>   info = [ ((var_mask,i_ty), (j,(ts_pats,code)))
520>          | i <- user_non_terminals g
521>          , let i_ty = typeOf i
522>          , j <- lookupProdsOfName g i  -- all prod numbers
523>          , let (_,ts,(code,dollar_vars),_) = lookupProdNo g j
524>          , let var_mask = map (\x -> x - 1) vars_used
525>                           where vars_used = sort $ nub dollar_vars
526>          , let ts_pats = [ (k+1,c) | k <- var_mask
527>                                    , (t,c) <- token_specs g
528>                                    , ts !! k == t ]
529>          ]
530
531>   typeOf n = case types g ! n of
532>                Nothing -> "()"                -- default
533>                Just t  -> t
534
535
536%---------------------------------------
537Creates the appropriate semantic values.
538 - for label-decode, these are the code, but abstracted over the child indices
539 - for tree-decode, these are the code abstracted over the children's values
540
541> mkSemObjects :: Options -> MonadInfo -> SemInfo -> ShowS
542> mkSemObjects (LabelDecode,filter_opt,_) _ sem_info
543>  = interleave "\n"
544>  $ [   str (mkSemFn_Name ij)
545>      . str (" ns@(" ++ pat ++ "happy_rest) = ")
546>      . str (" Branch (" ++ c_name ++ " (" ++ code ++ ")) ")
547>      . str (nodes filter_opt)
548>    | (_ty, c_name, mask, prod_info) <- sem_info
549>    , (ij, (pats,code), _ps) <- prod_info
550>    , let pat | null mask = ""
551>              | otherwise = concatMap (\v -> mk_tok_binder pats (v+1) ++ ":")
552>                                      [0..maximum mask]
553
554>    , let nodes NoFiltering  = "ns"
555>          nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
556>    ]
557>    where
558>       mk_tok_binder pats v
559>        = mk_binder (\s -> "(_,_,HappyTok (" ++ s ++ "))") pats v ""
560
561
562> mkSemObjects (TreeDecode,filter_opt,_) monad_info sem_info
563>  = interleave "\n"
564>  $ [   str (mkSemFn_Name ij)
565>      . str (" ns@(" ++ pat ++ "happy_rest) = ")
566>      . str (" Branch (" ++ c_name ++ " (" ++ sem ++ ")) ")
567>      . str (nodes filter_opt)
568>    | (_ty, c_name, mask, prod_info) <- sem_info
569>    , (ij, (pats,code), _) <- prod_info
570>    , let indent c = init $ unlines $ map (replicate 4 ' '++) $ lines c
571>    , let mcode = case monad_info of
572>                    Nothing -> code
573>                    Just (_,_,rtn) -> case code of
574>                                        '%':code' -> "\n" ++ indent code'
575>                                        _         -> rtn ++ " (" ++ code ++ ")"
576>    , let sem = foldr (\v t -> mk_lambda pats (v + 1) "" ++ t) mcode mask
577>    , let pat | null mask = ""
578>              | otherwise = concatMap (\v -> mkHappyVar (v+1) ":")
579>                                      [0..maximum mask]
580>    , let nodes NoFiltering  = "ns"
581>          nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
582>    ]
583
584> mk_lambda :: [(Int, String)] -> Int -> String -> String
585> mk_lambda pats v
586>  = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v
587
588> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String
589> mk_binder wrap pats v
590>  = case lookup v pats of
591>       Nothing -> mkHappyVar v
592>       Just p  -> case mapDollarDollar p of
593>                     Nothing -> wrap . mkHappyVar v . showChar '@' . brack p
594>                     Just fn -> wrap . brack' (fn . mkHappyVar v)
595
596
597---
598standardise the naming scheme
599
600> mkSemFn_Name :: (Int, Int) -> String
601> mkSemFn_Name (i,j) = "semfn_" ++ show i ++ "_" ++ show j
602
603---
604maps production name to the underlying (possibly shared) semantic function
605
606> mk_semfn_map :: SemInfo -> Array Name String
607> mk_semfn_map sem_info
608>  = array (0,maximum $ map fst prod_map) prod_map
609>    where
610>        prod_map = [ (p, mkSemFn_Name ij)
611>                   | (_,_,_,pi') <- sem_info, (ij,_,ps) <- pi', p <- ps ]
612
613
614%-----------------------------------------------------------------------------
615Create default decoding functions
616
617Idea is that sem rules are stored as functions in the AbsSyn names, and
618only unpacked when needed. Using classes here to manage the unpacking.
619
620> mkDecodeUtils :: Options -> MonadInfo -> SemInfo -> ShowS
621> mkDecodeUtils (TreeDecode,filter_opt,_) monad_info seminfo
622>  = interleave "\n"
623>  $ map str (monad_defs monad_info)
624>    ++ map mk_inst ty_cs
625>    where
626>       ty_cs = [ (ty, [ (c_name, mask)
627>                      | (ty2, c_name, mask, _j_vs) <- seminfo
628>                      , ty2 == ty
629>                      ])
630>               | ty <- nub [ ty | (ty,_,_,_) <- seminfo ]
631>               ]               -- group by same type
632
633>       mk_inst (ty, cs_vs)
634>        = str ("instance TreeDecode (" ++ ty ++ ") where ") . nl
635>        . interleave "\n"
636>          [   str "  "
637>            . str ("decode_b f (Branch (" ++ c_name ++ " s)")
638>            . str (" (" ++ var_pat ++ ")) = ")
639>            . cross_prod monad_info "s" (nodes filter_opt)
640>          | (c_name, vs) <- cs_vs
641>          , let vars = [ "b_" ++ show n | n <- var_range filter_opt vs ]
642>          , let var_pat = foldr (\l r -> l ++ ":" ++ r) "_" vars
643>          , let nodes NoFiltering  = [ vars !! n | n <- vs ]
644>                nodes UseFiltering = vars
645>          ]
646
647>       var_range _            [] = []
648>       var_range NoFiltering  vs = [0 .. maximum vs ]
649>       var_range UseFiltering vs = [0 .. length vs - 1]
650
651>       cross_prod Nothing s_var nodes
652>        = cross_prod_ (char '[' . str s_var . char ']')
653>                      (map str nodes)
654>       cross_prod (Just (_,_,rtn)) s_var nodes
655>        = str "map happy_join $ "
656>        . cross_prod_ (char '[' . str rtn . char ' ' . str s_var . char ']')
657>                      (map str nodes)
658
659>       cross_prod_ = foldl (\s a -> brack'
660>                                  $ str "cross_fn"
661>                                  . char ' ' . s
662>                                  . str " $ decode f "
663>                                  . a)
664
665
666
667> mkDecodeUtils (LabelDecode,_,_) monad_info seminfo
668>  = interleave "\n"
669>  $ map str
670>  $ monad_defs monad_info ++ concatMap (mk_inst) ty_cs
671>    where
672>       ty_cs = [ (ty, [ (c_name, mask)
673>                      | (ty2, c_name, mask, _) <- seminfo
674>                      , ty2 == ty
675>                      ])
676>               | ty <- nub [ ty | (ty,_,_,_) <- seminfo ]
677>               ]               -- group by same type
678
679>       mk_inst (ty, cns)
680>        = ("instance LabelDecode (" ++ ty ++ ") where ")
681>        : [ "  unpack (" ++ c_name ++ " s) = s"
682>          | (c_name, _mask) <- cns ]
683
684
685---
686This selects the info used for monadic parser generation
687
688> type MonadInfo = Maybe (String,String,String)
689> monad_sub :: Grammar -> MonadInfo
690> monad_sub g
691>  = case monad g of
692>      (True, _, ty,bd,ret) -> Just (ty,bd,ret)
693>      _                    -> Nothing
694>    -- TMP: only use monad info if it was user-declared, and ignore ctxt
695>    -- TMP: otherwise default to non-monadic code
696>    -- TMP: (NB not sure of consequences of monads-everywhere yet)
697
698
699---
700form the various monad-related defs.
701
702> monad_defs :: MonadInfo -> [String]
703> monad_defs Nothing
704>  = [ "type Decode_Result a = a"
705>    , "happy_ap = ($)"
706>    , "happy_return = id"]
707> monad_defs (Just (ty,tn,rtn))
708>  = [ "happy_join x = (" ++ tn ++ ") x id"
709>    , "happy_ap f a = (" ++ tn ++ ") f (\\f -> (" ++ tn ++ ") a (\\a -> " ++ rtn ++ "(f a)))"
710>    , "type Decode_Result a = " ++ brack ty " a"
711>    , "happy_return = " ++ rtn ++ " :: a -> Decode_Result a"
712>    ]
713
714%-----------------------------------------------------------------------------
715Util Functions
716
717---
718remove Happy-generated start symbols.
719
720> user_non_terminals :: Grammar -> [Name]
721> user_non_terminals g
722>  = non_terminals g \\ start_productions g
723
724> start_productions :: Grammar -> [Name]
725> start_productions g = [ s | (_,s,_,_) <- starts g ]
726
727
728---
729
730> mkHappyVar :: Int -> String -> String
731> mkHappyVar n = str "happy_var_" . shows n
732