1Module for producing GLR (Tomita) parsing code.
2This module is designed as an extension to the Haskell parser generator Happy.
3
4(c) University of Durham, Ben Medlock 2001
5        -- initial code, for structure parsing
6(c) University of Durham, Paul Callaghan 2004
7        -- extension to semantic rules, and various optimisations
8%-----------------------------------------------------------------------------
9
10> module ProduceGLRCode ( produceGLRParser
11>                       , DecodeOption(..)
12>                       , FilterOption(..)
13>                       , GhcExts(..)
14>                       , Options
15>                       ) where
16
17> import Paths_happy ( version )
18> import GenUtils ( mapDollarDollar )
19> import GenUtils ( str, char, nl, brack, brack', interleave, maybestr )
20> import Grammar
21> import Data.Array
22> import Data.Char ( isSpace, isAlphaNum )
23> import Data.List ( nub, (\\), sort, find, tails )
24> import Data.Version ( showVersion )
25
26%-----------------------------------------------------------------------------
27File and Function Names
28
29> base_template, lib_template :: String -> String
30> base_template td = td ++ "/GLR_Base"          -- NB Happy uses / too
31> lib_template  td = td ++ "/GLR_Lib"           -- Windows accepts this?
32
33---
34prefix for production names, to avoid name clashes
35
36> prefix :: String
37> prefix = "G_"
38
39%-----------------------------------------------------------------------------
40This type represents choice of decoding style for the result
41
42> data DecodeOption
43>  = TreeDecode
44>  | LabelDecode
45
46---
47This type represents whether filtering done or not
48
49> data FilterOption
50>  = NoFiltering
51>  | UseFiltering
52
53---
54This type represents whether GHC extensions are used or not
55 - extra values are imports and ghc options reqd
56
57> data GhcExts
58>  = NoGhcExts
59>  | UseGhcExts String String           -- imports and options
60
61---
62this is where the exts matter
63
64> show_st :: GhcExts -> {-State-}Int -> String
65> show_st UseGhcExts{} = (++"#") . show
66> show_st NoGhcExts    = show
67
68---
69
70> type DebugMode = Bool
71> type Options = (DecodeOption, FilterOption, GhcExts)
72
73
74%-----------------------------------------------------------------------------
75Main exported function
76
77> produceGLRParser
78>        :: FilePath      -- Output file name
79>        -> String        -- Templates directory
80>        -> ActionTable   -- LR tables
81>        -> GotoTable     -- LR tables
82>        -> Maybe String  -- Module header
83>        -> Maybe String  -- User-defined stuff (token DT, lexer etc.)
84>        -> (DebugMode,Options)       -- selecting code-gen style
85>        -> Grammar       -- Happy Grammar
86>        -> IO ()
87
88> produceGLRParser outfilename template_dir action goto header trailer options g
89>  = do
90>     let basename  = takeWhile (/='.') outfilename
91>     let tbls  = (action,goto)
92>     (parseName,_,_,_) <- case starts g of
93>                          [s] -> return s
94>                          s:_ -> do
95>                                    putStrLn "GLR-Happy doesn't support multiple start points (yet)"
96>                                    putStrLn "Defaulting to first start point."
97>                                    return s
98>                          [] -> error "produceGLRParser: []"
99>     mkFiles basename tbls parseName template_dir header trailer options g
100
101
102%-----------------------------------------------------------------------------
103"mkFiles" generates the files containing the Tomita parsing code.
104It produces two files - one for the data (small template), and one for
105the driver and data strs (large template).
106
107> mkFiles :: FilePath     -- Root of Output file name
108>        -> (ActionTable
109>           ,GotoTable)   -- LR tables
110>        -> String        -- Start parse function name
111>        -> String        -- Templates directory
112>        -> Maybe String  -- Module header
113>        -> Maybe String  -- User-defined stuff (token DT, lexer etc.)
114>        -> (DebugMode,Options)       -- selecting code-gen style
115>        -> Grammar       -- Happy Grammar
116>        -> IO ()
117>
118> mkFiles basename tables start templdir header trailer (debug,options) g
119>  = do
120>       let debug_ext = if debug then "-debug" else ""
121>       let (ext,imps,opts) = case ghcExts_opt of
122>                               UseGhcExts is os -> ("-ghc", is, os)
123>                               _                -> ("", "", "")
124>       base <- readFile (base_template templdir)
125>       --writeFile (basename ++ ".si") (unlines $ map show sem_info)
126>       writeFile (basename ++ "Data.hs") (content base opts $ "")
127
128>       lib <- readFile (lib_template templdir ++ ext ++ debug_ext)
129>       writeFile (basename ++ ".hs") (lib_content imps opts lib)
130>  where
131>   (_,_,ghcExts_opt) = options
132
133Extract the module name from the given module declaration, if it exists.
134
135>   m_mod_decl = find isModKW . zip [0..] . tails . (' ':) =<< header
136>   isModKW (_, c0:'m':'o':'d':'u':'l':'e':c1:_) = not (validIDChar c0 || validIDChar c1)
137>   isModKW _                                    = False
138>   validIDChar c      = isAlphaNum c || c `elem` "_'"
139>   validModNameChar c = validIDChar c || c == '.'
140>   data_mod = mod_name ++ "Data"
141>   mod_name = case m_mod_decl of
142>     Just (_, md) -> takeWhile validModNameChar (dropWhile (not . validModNameChar) (drop 8 md))
143
144Or use a default based upon the filename (original behaviour).
145
146>     Nothing      -> reverse . takeWhile (`notElem` "\\/") $ reverse basename
147
148Remove the module declaration from the header so that the remainder of
149the header can be used in the generated code.
150
151>   header_sans_mod = flip (maybe header) m_mod_decl $ \ (mi, _) -> do
152>       hdr <- header
153
154Extract the string that comes before the module declaration...
155
156>       let (before, mod_decl) = splitAt mi hdr
157
158>       let isWhereKW (c0:'w':'h':'e':'r':'e':c1:_) = not (validIDChar c0 || validIDChar c1)
159>           isWhereKW _ = False
160>       let where_after = dropWhile (not . isWhereKW) . tails . (++ "\n") $ mod_decl
161>       let after = drop 6 . concat . take 1 $ where_after
162
163...and combine it with the string that comes after the 'where' keyword.
164
165>       return $ before ++ "\n" ++ after
166
167>   (sem_def, sem_info) = mkGSemType options g
168>   table_text = mkTbls tables sem_info (ghcExts_opt) g
169
170>   header_parts = fmap (span (\x -> take 3 (dropWhile isSpace x) == "{-#")
171>                                  . lines)
172>                       header_sans_mod
173>       -- Split off initial options, if they are present
174>       -- Assume these options ONLY related to code which is in
175>       --   parser tail or in sem. rules
176
177>   content base_defs opts
178>    = str ("{-# OPTIONS " ++ opts ++ " #-}")    .nl
179>    . str (unlines $ maybe [] fst header_parts) .nl
180>    . nl
181>    . str (comment "data")                      .nl .nl
182>    . str ("module " ++ data_mod ++ " where")   .nl
183
184>     . nl
185>     . maybestr (fmap (unlines.snd) header_parts) .nl
186>     . nl
187>     . str base_defs .nl
188>     . nl
189
190>    . let count_nls     = length . filter (=='\n')
191>          pre_trailer   = maybe 0 count_nls header_sans_mod -- check fmt below
192>                        + count_nls base_defs
193>                        + 10                           -- for the other stuff
194>          post_trailer  = pre_trailer + maybe 0 count_nls trailer + 4
195>      in
196>         str ("{-# LINE " ++ show pre_trailer ++ " "
197>                          ++ show (basename ++ "Data.hs") ++ "#-}")
198>               -- This should show a location in basename.y -- but Happy
199>               -- doesn't pass this info through. But we still avoid being
200>               -- told a location in GLR_Base!
201>       . nl
202>       . nl
203>       . maybestr trailer
204>       .nl
205>       .nl
206>       . str ("{-# LINE " ++ show post_trailer ++ " "
207>                          ++ show (basename ++ "Data.hs") ++ "#-}")
208>       . nl
209>       . nl
210
211>     . mkGSymbols g     .nl
212>     . nl
213>     . sem_def          .nl
214>     . nl
215>     . mkSemObjects  options (monad_sub g) sem_info      .nl
216>     . nl
217>     . mkDecodeUtils options (monad_sub g) sem_info      .nl
218>     . nl
219>     . user_def_token_code (token_type g)                .nl
220>     . nl
221>     . table_text
222
223>   lib_content imps opts lib_text
224>    = let (pre,_drop_me : post) = break (== "fakeimport DATA") $ lines lib_text
225>      in
226>      unlines [ "{-# OPTIONS " ++ opts ++ " #-}\n"
227>              , comment "driver" ++ "\n"
228>              , "module " ++ mod_name ++ "("
229>              , case lexer g of
230>                  Nothing     -> ""
231>                  Just (lf,_) -> "  " ++ lf ++ ","
232>              , "  " ++ start
233>              , ""
234>              , unlines pre
235>              , imps
236>              , "import " ++ data_mod
237>              , start ++ " = glr_parse "
238>              , "use_filtering = " ++ show use_filtering
239>              , "top_symbol = " ++ prefix ++ start_prod
240>              , unlines post
241>              ]
242>   start_prod = token_names g ! (let (_,_,i,_) = head $ starts g in i)
243>   use_filtering = case options of (_, UseFiltering,_) -> True
244>                                   _                   -> False
245
246> comment :: String -> String
247> comment which
248>  = "-- parser (" ++ which ++ ") produced by Happy (GLR) Version " ++
249>       showVersion version
250
251> user_def_token_code :: String -> String -> String
252> user_def_token_code tokenType
253>  = str "type UserDefTok = " . str tokenType                     . nl
254>  . str "instance TreeDecode " . brack tokenType . str " where"  . nl
255>  . str "  decode_b f (Branch (SemTok t) []) = [happy_return t]" . nl
256>  . str "instance LabelDecode " . brack tokenType . str " where" . nl
257>  . str "  unpack (SemTok t) = t"                                . nl
258
259
260%-----------------------------------------------------------------------------
261Formats the tables as code.
262
263> mkTbls :: (ActionTable        -- Action table from Happy
264>           ,GotoTable)         -- Goto table from Happy
265>        -> SemInfo             -- info about production mapping
266>        -> GhcExts             -- Use unboxed values?
267>        -> Grammar             -- Happy Grammar
268>        -> ShowS
269>
270> mkTbls (action,goto) sem_info exts g
271>  = let gsMap = mkGSymMap g
272>        semfn_map = mk_semfn_map sem_info
273>    in
274>      writeActionTbl action gsMap (semfn_map !) exts g
275>    . writeGotoTbl   goto   gsMap exts
276
277
278%-----------------------------------------------------------------------------
279Create a mapping of Happy grammar symbol integers to the data representation
280that will be used for them in the GLR parser.
281
282> mkGSymMap :: Grammar -> [(Name,String)]
283> mkGSymMap g
284>  =    [ -- (errorTok, prefix ++ "Error")
285>       ]
286>    ++ [ (i, prefix ++ (token_names g) ! i)
287>       | i <- user_non_terminals g ]   -- Non-terminals
288>    ++ [ (i, "HappyTok (" ++ mkMatch tok ++ ")")
289>       | (i,tok) <- token_specs g ]    -- Tokens (terminals)
290>    ++ [(eof_term g,"HappyEOF")]       -- EOF symbol (internal terminal)
291>  where
292>   mkMatch tok = case mapDollarDollar tok of
293>                   Nothing -> tok
294>                   Just fn -> fn "_"
295
296> toGSym :: [(Int, String)] -> Int -> String
297> toGSym gsMap i
298>  = case lookup i gsMap of
299>     Nothing -> error $ "No representation for symbol " ++ show i
300>     Just g  -> g
301
302
303%-----------------------------------------------------------------------------
304Take the ActionTable from Happy and turn it into a String representing a
305function that can be included as the action table in the GLR parser.
306It also shares identical reduction values as CAFs
307
308> writeActionTbl
309>  :: ActionTable -> [(Int,String)] -> (Name->String)
310>                                       -> GhcExts -> Grammar -> ShowS
311> writeActionTbl acTbl gsMap semfn_map exts g
312>  = interleave "\n"
313>  $ map str
314>  $ mkLines ++ [errorLine] ++ mkReductions
315>  where
316>   name      = "action"
317>   mkLines   = concatMap (mkState) (assocs acTbl)
318>   errorLine = name ++ " _ _ = Error"
319>   mkState (i,arr)
320>    = filter (/="") $ map (mkLine i) (assocs arr)
321>
322>   mkLine state (symInt,action)
323>    | symInt == errorTok       -- skip error productions
324>    = ""                       -- NB see ProduceCode's handling of these
325>    | otherwise
326>    = case action of
327>       LR'Fail     -> ""
328>       LR'MustFail -> ""
329>       _           -> unwords [ startLine , mkAct action ]
330>    where
331>     startLine
332>      = unwords [ name , show_st exts state, "(" , getTok , ") =" ]
333>     getTok = let tok = toGSym gsMap symInt
334>              in case mapDollarDollar tok of
335>                   Nothing -> tok
336>                   Just f  -> f "_"
337>   mkAct act
338>    = case act of
339>       LR'Shift newSt _ -> "Shift " ++ show newSt ++ " []"
340>       LR'Reduce r    _ -> "Reduce " ++ "[" ++ mkRed r ++ "]"
341>       LR'Accept        -> "Accept"
342>       LR'Multiple rs (LR'Shift st _)
343>                        -> "Shift " ++ show st ++ " " ++ mkReds rs
344>       LR'Multiple rs r@(LR'Reduce{})
345>                        -> "Reduce " ++ mkReds (r:rs)
346>       _ -> error "writeActionTbl/mkAct: Unhandled case"
347>    where
348>     mkReds rs = "[" ++ tail (concat [ "," ++ mkRed r | LR'Reduce r _ <- rs ]) ++ "]"
349
350>   mkRed r = "red_" ++ show r
351>   mkReductions = [ mkRedDefn p
352>                  | p@(_, Production n _ _ _) <- zip [0..] $ productions g
353>                  , n `notElem` start_productions g ]
354
355>   mkRedDefn (r, Production lhs_id rhs_ids (_code,_dollar_vars) _)
356>    = mkRed r ++ " = ("++ lhs ++ "," ++ show arity ++ " :: Int," ++ sem ++")"
357>      where
358>         lhs = toGSym gsMap $ lhs_id
359>         arity = length rhs_ids
360>         sem = semfn_map r
361
362
363%-----------------------------------------------------------------------------
364Do the same with the Happy goto table.
365
366> writeGotoTbl :: GotoTable -> [(Int,String)] -> GhcExts -> ShowS
367> writeGotoTbl goTbl gsMap exts
368>  = interleave "\n" (map str $ filter (not.null) mkLines)
369>  . str errorLine . nl
370>  where
371>   name    = "goto"
372>   errorLine = "goto _ _ = " ++ show_st exts (negate 1)
373>   mkLines = map mkState (assocs goTbl)
374>
375>   mkState (i,arr)
376>    = unlines $ filter (/="") $ map (mkLine i) (assocs arr)
377>
378>   mkLine state (ntInt,goto)
379>    = case goto of
380>       NoGoto  -> ""
381>       Goto st -> unwords [ startLine , show_st exts st ]
382>    where
383>     startLine
384>      = unwords [ name , show_st exts state, getGSym , "=" ]
385>     getGSym = toGSym gsMap ntInt
386
387
388%-----------------------------------------------------------------------------
389Create the 'GSymbol' ADT for the symbols in the grammar
390
391> mkGSymbols :: Grammar -> ShowS
392> mkGSymbols g
393>  = str dec
394>  . str eof
395>  . str tok
396>  . interleave "\n" [ str " | " . str prefix . str sym . str " "
397>                    | sym <- syms ]
398>  . str der
399>    -- ++ eq_inst
400>    -- ++ ord_inst
401>  where
402>   dec  = "data GSymbol"
403>   eof  = " = HappyEOF"
404>   tok  = " | HappyTok {-!Int-} (" ++ token_type g ++ ")"
405>   der  = "   deriving (Show,Eq,Ord)"
406>   syms = [ token_names g ! i | i <- user_non_terminals g ]
407
408NOTES:
409Was considering avoiding use of Eq/Ord over tokens, but this then means
410hand-coding the Eq/Ord classes since we're over-riding the usual order
411except in one case.
412
413maybe possible to form a union and do some juggling, but this isn't that
414easy, eg input type of "action".
415
416plus, issues about how token info gets into TreeDecode sem values - which
417might be tricky to arrange.
418<>   eq_inst = "instance Eq GSymbol where"
419<>           : "  HappyTok i _ == HappyTok j _ = i == j"
420<>           : [ "  i == j = fromEnum i == fromEnum j"
421
422
423
424%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
425Semantic actions on rules.
426
427These are stored in a union type "GSem", and the semantic values are held
428on the branches created at the appropriate reduction.
429
430"GSem" type has one constructor per distinct type of semantic action and
431pattern of child usage.
432
433
434%-----------------------------------------------------------------------------
435Creating a type for storing semantic rules
436 - also collects information on code structure and constructor names, for
437   use in later stages.
438
439> type SemInfo
440>  = [(String, String, [Int], [((Int,Int), ([(Int,String)],String), [Int])])]
441
442> mkGSemType :: Options -> Grammar -> (ShowS, SemInfo)
443> mkGSemType (TreeDecode,_,_) g
444>  = (def, map snd syms)
445>  where
446>   mtype s = case monad_sub g of
447>               Nothing       -> s
448>               Just (ty,_,_) -> ty ++ ' ' : brack s ""
449
450>   def  = str "data GSem" . nl
451>        . str " = NoSem"  . nl
452>        . str (" | SemTok (" ++  token_type g ++ ")") . nl
453>        . interleave "\n" [ str " | " . str sym . str " "
454>                          | sym <- map fst syms ]
455>        . str "instance Show GSem where" . nl
456>        . interleave "\n" [ str "  show " . str c . str "{} = " . str (show c)
457>                          | (_,c,_,_) <- map snd syms ]
458
459>   syms = [ (c_name ++ " (" ++ ty ++ ")", (rty, c_name, mask, prod_info))
460>          | (i,this@(mask,args,rty)) <- zip [0..] (nub $ map fst info)
461>                                               -- find unique types (plus mask)
462>          , let c_name = "Sem_" ++ show i
463>          , let mrty = mtype rty
464>          , let ty = foldr (\l r -> l ++ " -> " ++ r) mrty args
465
466>          , let code_info = [ j_code | (that, j_code) <- info, this == that ]
467>          , let prod_info = [ ((i,k), code, js)
468>                            | (k,code) <- zip [0..] (nub $ map snd code_info)
469>                            , let js = [ j | (j,code2) <- code_info
470>                                           , code == code2 ]
471>                            ]
472>            -- collect specific info about productions with this type
473>          ]
474
475>   info = [ ((var_mask, args, i_ty), (j,(ts_pats,code)))
476>          | i <- user_non_terminals g
477>          , let i_ty = typeOf i
478>          , j <- lookupProdsOfName g i  -- all prod numbers
479>          , let Production _ ts (raw_code,dollar_vars) _ = lookupProdNo g j
480>          , let var_mask = map (\x -> x - 1) vars_used
481>                           where vars_used = sort $ nub dollar_vars
482>          , let args = [ typeOf $ ts !! v | v <- var_mask ]
483>          , let code | all isSpace raw_code = "()"
484>                     | otherwise            = raw_code
485>          , let ts_pats = [ (k+1,c) | k <- var_mask
486>                                    , (t,c) <- token_specs g
487>                                    , ts !! k == t ]
488>          ]
489
490>   typeOf n | n `elem` terminals g = token_type g
491>            | otherwise            = case types g ! n of
492>                                       Nothing -> "()"         -- default
493>                                       Just t  -> t
494
495> -- NB expects that such labels are Showable
496> mkGSemType (LabelDecode,_,_) g
497>  = (def, map snd syms)
498>  where
499>   def = str "data GSem" . nl
500>       . str " = NoSem"  . nl
501>       . str (" | SemTok (" ++  token_type g ++ ")")
502>       . interleave "\n" [ str " | "  . str sym . str " "
503>                         | sym <- map fst syms ]
504>       . str "   deriving (Show)" . nl
505
506>   syms = [ (c_name ++ " (" ++ ty ++ ")", (ty, c_name, mask, prod_info))
507>          | (i,this@(mask,ty)) <- zip [0..] (nub $ map fst info)
508>                                               -- find unique types
509>          , let c_name = "Sem_" ++ show i
510>          , let code_info = [ j_code | (that, j_code) <- info, this == that ]
511>          , let prod_info = [ ((i,k), code, js)
512>                            | (k,code) <- zip [0..] (nub $ map snd code_info)
513>                            , let js = [ j | (j,code2) <- code_info
514>                                           , code == code2 ]
515
516>                            ]
517>            -- collect specific info about productions with this type
518>          ]
519
520>   info = [ ((var_mask,i_ty), (j,(ts_pats,code)))
521>          | i <- user_non_terminals g
522>          , let i_ty = typeOf i
523>          , j <- lookupProdsOfName g i  -- all prod numbers
524>          , let Production _ ts (code,dollar_vars) _ = lookupProdNo g j
525>          , let var_mask = map (\x -> x - 1) vars_used
526>                           where vars_used = sort $ nub dollar_vars
527>          , let ts_pats = [ (k+1,c) | k <- var_mask
528>                                    , (t,c) <- token_specs g
529>                                    , ts !! k == t ]
530>          ]
531
532>   typeOf n = case types g ! n of
533>                Nothing -> "()"                -- default
534>                Just t  -> t
535
536
537%---------------------------------------
538Creates the appropriate semantic values.
539 - for label-decode, these are the code, but abstracted over the child indices
540 - for tree-decode, these are the code abstracted over the children's values
541
542> mkSemObjects :: Options -> MonadInfo -> SemInfo -> ShowS
543> mkSemObjects (LabelDecode,filter_opt,_) _ sem_info
544>  = interleave "\n"
545>  $ [   str (mkSemFn_Name ij)
546>      . str (" ns@(" ++ pat ++ "happy_rest) = ")
547>      . str (" Branch (" ++ c_name ++ " (" ++ code ++ ")) ")
548>      . str (nodes filter_opt)
549>    | (_ty, c_name, mask, prod_info) <- sem_info
550>    , (ij, (pats,code), _ps) <- prod_info
551>    , let pat | null mask = ""
552>              | otherwise = concatMap (\v -> mk_tok_binder pats (v+1) ++ ":")
553>                                      [0..maximum mask]
554
555>    , let nodes NoFiltering  = "ns"
556>          nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
557>    ]
558>    where
559>       mk_tok_binder pats v
560>        = mk_binder (\s -> "(_,_,HappyTok (" ++ s ++ "))") pats v ""
561
562
563> mkSemObjects (TreeDecode,filter_opt,_) monad_info sem_info
564>  = interleave "\n"
565>  $ [   str (mkSemFn_Name ij)
566>      . str (" ns@(" ++ pat ++ "happy_rest) = ")
567>      . str (" Branch (" ++ c_name ++ " (" ++ sem ++ ")) ")
568>      . str (nodes filter_opt)
569>    | (_ty, c_name, mask, prod_info) <- sem_info
570>    , (ij, (pats,code), _) <- prod_info
571>    , let indent c = init $ unlines $ map (replicate 4 ' '++) $ lines c
572>    , let mcode = case monad_info of
573>                    Nothing -> code
574>                    Just (_,_,rtn) -> case code of
575>                                        '%':code' -> "\n" ++ indent code'
576>                                        _         -> rtn ++ " (" ++ code ++ ")"
577>    , let sem = foldr (\v t -> mk_lambda pats (v + 1) "" ++ t) mcode mask
578>    , let pat | null mask = ""
579>              | otherwise = concatMap (\v -> mkHappyVar (v+1) ":")
580>                                      [0..maximum mask]
581>    , let nodes NoFiltering  = "ns"
582>          nodes UseFiltering = "(" ++ foldr (\l -> mkHappyVar (l+1) . showChar ':') "[])" mask
583>    ]
584
585> mk_lambda :: [(Int, String)] -> Int -> String -> String
586> mk_lambda pats v
587>  = (\s -> "\\" ++ s ++ " -> ") . mk_binder id pats v
588
589> mk_binder :: (String -> String) -> [(Int, String)] -> Int -> String -> String
590> mk_binder wrap pats v
591>  = case lookup v pats of
592>       Nothing -> mkHappyVar v
593>       Just p  -> case mapDollarDollar p of
594>                     Nothing -> wrap . mkHappyVar v . showChar '@' . brack p
595>                     Just fn -> wrap . brack' (fn . mkHappyVar v)
596
597
598---
599standardise the naming scheme
600
601> mkSemFn_Name :: (Int, Int) -> String
602> mkSemFn_Name (i,j) = "semfn_" ++ show i ++ "_" ++ show j
603
604---
605maps production name to the underlying (possibly shared) semantic function
606
607> mk_semfn_map :: SemInfo -> Array Name String
608> mk_semfn_map sem_info
609>  = array (0,maximum $ map fst prod_map) prod_map
610>    where
611>        prod_map = [ (p, mkSemFn_Name ij)
612>                   | (_,_,_,pi') <- sem_info, (ij,_,ps) <- pi', p <- ps ]
613
614
615%-----------------------------------------------------------------------------
616Create default decoding functions
617
618Idea is that sem rules are stored as functions in the AbsSyn names, and
619only unpacked when needed. Using classes here to manage the unpacking.
620
621> mkDecodeUtils :: Options -> MonadInfo -> SemInfo -> ShowS
622> mkDecodeUtils (TreeDecode,filter_opt,_) monad_info seminfo
623>  = interleave "\n"
624>  $ map str (monad_defs monad_info)
625>    ++ map mk_inst ty_cs
626>    where
627>       ty_cs = [ (ty, [ (c_name, mask)
628>                      | (ty2, c_name, mask, _j_vs) <- seminfo
629>                      , ty2 == ty
630>                      ])
631>               | ty <- nub [ ty | (ty,_,_,_) <- seminfo ]
632>               ]               -- group by same type
633
634>       mk_inst (ty, cs_vs)
635>        = str ("instance TreeDecode (" ++ ty ++ ") where ") . nl
636>        . interleave "\n"
637>          [   str "  "
638>            . str ("decode_b f (Branch (" ++ c_name ++ " s)")
639>            . str (" (" ++ var_pat ++ ")) = ")
640>            . cross_prod monad_info "s" (nodes filter_opt)
641>          | (c_name, vs) <- cs_vs
642>          , let vars = [ "b_" ++ show n | n <- var_range filter_opt vs ]
643>          , let var_pat = foldr (\l r -> l ++ ":" ++ r) "_" vars
644>          , let nodes NoFiltering  = [ vars !! n | n <- vs ]
645>                nodes UseFiltering = vars
646>          ]
647
648>       var_range _            [] = []
649>       var_range NoFiltering  vs = [0 .. maximum vs ]
650>       var_range UseFiltering vs = [0 .. length vs - 1]
651
652>       cross_prod Nothing s_var nodes
653>        = cross_prod_ (char '[' . str s_var . char ']')
654>                      (map str nodes)
655>       cross_prod (Just (_,_,rtn)) s_var nodes
656>        = str "map happy_join $ "
657>        . cross_prod_ (char '[' . str rtn . char ' ' . str s_var . char ']')
658>                      (map str nodes)
659
660>       cross_prod_ = foldl (\s a -> brack'
661>                                  $ str "cross_fn"
662>                                  . char ' ' . s
663>                                  . str " $ decode f "
664>                                  . a)
665
666
667
668> mkDecodeUtils (LabelDecode,_,_) monad_info seminfo
669>  = interleave "\n"
670>  $ map str
671>  $ monad_defs monad_info ++ concatMap (mk_inst) ty_cs
672>    where
673>       ty_cs = [ (ty, [ (c_name, mask)
674>                      | (ty2, c_name, mask, _) <- seminfo
675>                      , ty2 == ty
676>                      ])
677>               | ty <- nub [ ty | (ty,_,_,_) <- seminfo ]
678>               ]               -- group by same type
679
680>       mk_inst (ty, cns)
681>        = ("instance LabelDecode (" ++ ty ++ ") where ")
682>        : [ "  unpack (" ++ c_name ++ " s) = s"
683>          | (c_name, _mask) <- cns ]
684
685
686---
687This selects the info used for monadic parser generation
688
689> type MonadInfo = Maybe (String,String,String)
690> monad_sub :: Grammar -> MonadInfo
691> monad_sub g
692>  = case monad g of
693>      (True, _, ty,bd,ret) -> Just (ty,bd,ret)
694>      _                    -> Nothing
695>    -- TMP: only use monad info if it was user-declared, and ignore ctxt
696>    -- TMP: otherwise default to non-monadic code
697>    -- TMP: (NB not sure of consequences of monads-everywhere yet)
698
699
700---
701form the various monad-related defs.
702
703> monad_defs :: MonadInfo -> [String]
704> monad_defs Nothing
705>  = [ "type Decode_Result a = a"
706>    , "happy_ap = ($)"
707>    , "happy_return = id"]
708> monad_defs (Just (ty,tn,rtn))
709>  = [ "happy_join x = (" ++ tn ++ ") x id"
710>    , "happy_ap f a = (" ++ tn ++ ") f (\\f -> (" ++ tn ++ ") a (\\a -> " ++ rtn ++ "(f a)))"
711>    , "type Decode_Result a = " ++ brack ty " a"
712>    , "happy_return = " ++ rtn ++ " :: a -> Decode_Result a"
713>    ]
714
715%-----------------------------------------------------------------------------
716Util Functions
717
718---
719remove Happy-generated start symbols.
720
721> user_non_terminals :: Grammar -> [Name]
722> user_non_terminals g
723>  = non_terminals g \\ start_productions g
724
725> start_productions :: Grammar -> [Name]
726> start_productions g = [ s | (_,s,_,_) <- starts g ]
727
728
729---
730
731> mkHappyVar :: Int -> String -> String
732> mkHappyVar n = str "happy_var_" . shows n
733