1-- -----------------------------------------------------------------------------
2--
3-- Output.hs, part of Alex
4--
5-- (c) Simon Marlow 2003
6--
7-- Code-outputing and table-generation routines
8--
9-- ----------------------------------------------------------------------------}
10
11module Output (outputDFA) where
12
13import AbsSyn
14import CharSet
15import Util
16import qualified Map
17import qualified Data.IntMap as IntMap
18
19import Control.Monad.ST ( ST, runST )
20import Data.Array ( Array )
21import Data.Array.Base ( unsafeRead )
22import Data.Array.ST ( STUArray, newArray, readArray, writeArray, freeze )
23import Data.Array.Unboxed ( UArray, elems, (!), array, listArray )
24import Data.Maybe (isJust)
25import Data.Bits
26import Data.Char ( ord, chr )
27import Data.List ( maximumBy, sortBy, groupBy, mapAccumR )
28
29-- -----------------------------------------------------------------------------
30-- Printing the output
31
32outputDFA :: Target -> Int -> String -> Scheme -> DFA SNum Code -> ShowS
33outputDFA target _ _ scheme dfa
34  = interleave_shows nl
35        [outputBase, outputTable, outputCheck, outputDefault,
36         outputAccept, outputActions, outputSigs]
37  where
38    (base, table, check, deflt, accept) = mkTables dfa
39
40    intty = case target of
41      GhcTarget -> "Int#"
42      HaskellTarget -> "Int"
43
44    table_size = length table - 1
45    n_states   = length base - 1
46
47    base_nm   = "alex_base"
48    table_nm  = "alex_table"
49    check_nm  = "alex_check"
50    deflt_nm  = "alex_deflt"
51    accept_nm = "alex_accept"
52    actions_nm = "alex_actions"
53
54    outputBase    = do_array hexChars32 base_nm  n_states   base
55    outputTable   = do_array hexChars16 table_nm table_size table
56    outputCheck   = do_array hexChars16 check_nm table_size check
57    outputDefault = do_array hexChars16 deflt_nm n_states   deflt
58
59    formatArray :: String -> Int -> [ShowS] -> ShowS
60    formatArray constructFunction size contents =
61        str constructFunction
62      . str " (0 :: Int, " . shows size . str ")\n"
63      . str "  [ "
64      . interleave_shows (str "\n  , ") contents
65      . str "\n  ]"
66
67    do_array hex_chars nm upper_bound ints = -- trace ("do_array: " ++ nm) $
68     case target of
69      GhcTarget ->
70          str nm . str " :: AlexAddr\n"
71        . str nm . str " = AlexA#\n"
72        . str "  \"" . str (hex_chars ints) . str "\"#\n"
73
74      _ ->
75          str nm . str " :: Array Int Int\n"
76        . str nm . str " = "
77        . formatArray "listArray" upper_bound (map shows ints)
78        . nl
79
80    outputAccept :: ShowS
81    outputAccept =
82      -- Don't emit explicit type signature as it contains unknown user type,
83      -- see: https://github.com/simonmar/alex/issues/98
84      -- str accept_nm . str " :: Array Int (AlexAcc " . str userStateTy . str ")\n"
85        str accept_nm . str " = "
86      . formatArray "listArray" n_states (snd (mapAccumR outputAccs 0 accept))
87      . nl
88
89    gscanActionType res =
90        str "AlexPosn -> Char -> String -> Int -> ((Int, state) -> "
91      . str res . str ") -> (Int, state) -> " . str res
92
93    outputActions = signature . body
94      where
95        (nacts, acts) = mapAccumR outputActs 0 accept
96        actionsArray :: ShowS
97        actionsArray = formatArray "array" nacts (concat acts)
98        body :: ShowS
99        body = str actions_nm . str " = " . actionsArray . nl
100        signature :: ShowS
101        signature = case scheme of
102          Default { defaultTypeInfo = Just (Nothing, actionty) } ->
103              str actions_nm . str " :: Array Int (" . str actionty . str ")\n"
104          Default { defaultTypeInfo = Just (Just tyclasses, actionty) } ->
105              str actions_nm . str " :: (" . str tyclasses
106            . str ") => Array Int (" . str actionty . str ")\n"
107          GScan { gscanTypeInfo = Just (Nothing, toktype) } ->
108              str actions_nm . str " :: Array Int ("
109            . gscanActionType toktype . str ")\n"
110          GScan { gscanTypeInfo = Just (Just tyclasses, toktype) } ->
111              str actions_nm . str " :: (" . str tyclasses
112            . str ") => Array Int ("
113            . gscanActionType toktype . str ")\n"
114          Basic { basicStrType = strty,
115                  basicTypeInfo = Just (Nothing, toktype) } ->
116              str actions_nm . str " :: Array Int ("
117            . str (show strty) . str " -> " . str toktype
118            . str ")\n"
119          Basic { basicStrType = strty,
120                  basicTypeInfo = Just (Just tyclasses, toktype) } ->
121              str actions_nm . str " :: (" . str tyclasses
122            . str ") => Array Int ("
123            . str (show strty) . str " -> " . str toktype
124            . str ")\n"
125          Posn { posnByteString = isByteString,
126                 posnTypeInfo = Just (Nothing, toktype) } ->
127              str actions_nm . str " :: Array Int (AlexPosn -> "
128            . str (strtype isByteString) . str " -> " . str toktype
129            . str ")\n"
130          Posn { posnByteString = isByteString,
131                 posnTypeInfo = Just (Just tyclasses, toktype) } ->
132              str actions_nm . str " :: (" . str tyclasses
133            . str ") => Array Int (AlexPosn -> "
134            . str (strtype isByteString) . str " -> " . str toktype
135            . str ")\n"
136          Monad { monadByteString = isByteString,
137                  monadTypeInfo = Just (Nothing, toktype) } ->
138            let
139              actintty = if isByteString then "Int64" else "Int"
140            in
141              str actions_nm . str " :: Array Int (AlexInput -> "
142            . str actintty . str " -> Alex(" . str toktype . str "))\n"
143          Monad { monadByteString = isByteString,
144                  monadTypeInfo = Just (Just tyclasses, toktype) } ->
145            let
146              actintty = if isByteString then "Int64" else "Int"
147            in
148              str actions_nm . str " :: (" . str tyclasses
149            . str ") => Array Int (AlexInput -> "
150            . str actintty . str " -> Alex(" . str toktype . str "))\n"
151          _ ->
152              -- No type signature: we don't know what the type of the actions is.
153              -- str accept_nm . str " :: Array Int (Accept Code)\n"
154              id
155
156
157    outputSigs
158        = case scheme of
159          Default { defaultTypeInfo = Just (Nothing, toktype) } ->
160              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
161            . str " -> " . str "AlexInput -> " . str intty
162            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
163            . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn ("
164            . str toktype . str ")\n"
165            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
166            . str toktype . str ")\n"
167          Default { defaultTypeInfo = Just (Just tyclasses, toktype) } ->
168              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
169            . str " -> " . str "AlexInput -> " . str intty
170            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
171            . str "alexScanUser :: (" . str tyclasses
172            . str ") => () -> AlexInput -> Int -> AlexReturn ("
173            . str toktype . str ")\n"
174            . str "alexScan :: (" . str tyclasses
175            . str ") => AlexInput -> Int -> AlexReturn ("
176            . str toktype . str ")\n"
177          GScan { gscanTypeInfo = Just (Nothing, toktype) } ->
178              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
179            . str " -> " . str "AlexInput -> " . str intty
180            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
181            . str "alexScanUser :: () -> AlexInput -> Int -> "
182            . str "AlexReturn (" . gscanActionType toktype . str ")\n"
183            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
184            . gscanActionType toktype . str ")\n"
185          GScan { gscanTypeInfo = Just (Just tyclasses, toktype) } ->
186              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
187            . str " -> " . str "AlexInput -> " . str intty
188            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
189            . str "alexScanUser :: (" . str tyclasses
190            . str ") => () -> AlexInput -> Int -> AlexReturn ("
191            . gscanActionType toktype . str ")\n"
192            . str "alexScan :: (" . str tyclasses
193            . str ") => AlexInput -> Int -> AlexReturn ("
194            . gscanActionType toktype . str ")\n"
195          Basic { basicStrType = strty,
196                  basicTypeInfo = Just (Nothing, toktype) } ->
197              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
198            . str " -> " . str "AlexInput -> " . str intty
199            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
200            . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn ("
201            . str (show strty) . str " -> " . str toktype . str ")\n"
202            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
203            . str (show strty) . str " -> " . str toktype . str ")\n"
204          Basic { basicStrType = strty,
205                  basicTypeInfo = Just (Just tyclasses, toktype) } ->
206              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
207            . str " -> " . str "AlexInput -> " . str intty
208            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
209            . str "alexScanUser :: (" . str tyclasses
210            . str ") => () -> AlexInput -> Int -> AlexReturn ("
211            . str (show strty) . str " -> " . str toktype . str ")\n"
212            . str "alexScan :: (" . str tyclasses
213            . str ") => AlexInput -> Int -> AlexReturn ("
214            . str (show strty) . str " -> " . str toktype . str ")\n"
215          Posn { posnByteString = isByteString,
216                 posnTypeInfo = Just (Nothing, toktype) } ->
217              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
218            . str " -> " . str "AlexInput -> " . str intty
219            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
220            . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (AlexPosn -> "
221            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
222            . str "alexScan :: AlexInput -> Int -> AlexReturn (AlexPosn -> "
223            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
224          Posn { posnByteString = isByteString,
225                 posnTypeInfo = Just (Just tyclasses, toktype) } ->
226              str "alex_scan_tkn :: () -> AlexInput -> " . str intty
227            . str " -> " . str "AlexInput -> " . str intty
228            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
229            . str "alexScanUser :: (" . str tyclasses
230            . str ") => () -> AlexInput -> Int -> AlexReturn (AlexPosn -> "
231            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
232            . str "alexScan :: (" . str tyclasses
233            . str ") => AlexInput -> Int -> AlexReturn (AlexPosn -> "
234            . str (strtype isByteString) . str " -> " . str toktype . str ")\n"
235          Monad { monadTypeInfo = Just (Nothing, toktype),
236                  monadByteString = isByteString,
237                  monadUserState = userState } ->
238            let
239              actintty = if isByteString then "Int64" else "Int"
240              userStateTy | userState = "AlexUserState"
241                          | otherwise = "()"
242            in
243              str "alex_scan_tkn :: " . str userStateTy
244            . str " -> AlexInput -> " . str intty
245            . str " -> " . str "AlexInput -> " . str intty
246            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
247            . str "alexScanUser :: " . str userStateTy
248            . str " -> AlexInput -> Int -> AlexReturn ("
249            . str "AlexInput -> " . str actintty . str " -> Alex ("
250            . str toktype . str "))\n"
251            . str "alexScan :: AlexInput -> Int -> AlexReturn ("
252            . str "AlexInput -> " . str actintty
253            . str " -> Alex (" . str toktype . str "))\n"
254            . str "alexMonadScan :: Alex (" . str toktype . str ")\n"
255          Monad { monadTypeInfo = Just (Just tyclasses, toktype),
256                  monadByteString = isByteString,
257                  monadUserState = userState } ->
258            let
259              actintty = if isByteString then "Int64" else "Int"
260              userStateTy | userState = "AlexUserState"
261                          | otherwise = "()"
262            in
263              str "alex_scan_tkn :: " . str userStateTy
264            . str " -> AlexInput -> " . str intty
265            . str " -> " . str "AlexInput -> " . str intty
266            . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n"
267            . str "alexScanUser :: (" . str tyclasses . str ") => "
268            . str userStateTy . str " -> AlexInput -> Int -> AlexReturn ("
269            . str "AlexInput -> " . str actintty
270            . str " -> Alex (" . str toktype . str "))\n"
271            . str "alexScan :: (" . str tyclasses
272            . str ") => AlexInput -> Int -> AlexReturn ("
273            . str "AlexInput -> " . str actintty
274            . str " -> Alex (" . str toktype . str "))\n"
275            . str "alexMonadScan :: (" . str tyclasses
276            . str ") => Alex (" . str toktype . str ")\n"
277          _ ->
278              str ""
279
280    outputAccs :: Int -> [Accept Code] -> (Int, ShowS)
281    outputAccs idx [] = (idx, str "AlexAccNone")
282    outputAccs idx (Acc _ Nothing Nothing NoRightContext : [])
283      = (idx, str "AlexAccSkip")
284    outputAccs idx (Acc _ (Just _) Nothing NoRightContext : [])
285      = (idx + 1, str "AlexAcc " . str (show idx))
286    outputAccs idx (Acc _ Nothing lctx rctx : rest)
287      = let (idx', rest') = outputAccs idx rest
288        in (idx', str "AlexAccSkipPred" . space
289                 . paren (outputPred lctx rctx)
290                 . paren rest')
291    outputAccs idx (Acc _ (Just _) lctx rctx : rest)
292      = let (idx', rest') = outputAccs idx rest
293        in (idx' + 1, str "AlexAccPred" . space
294                      . str (show idx') . space
295                      . paren (outputPred lctx rctx)
296                      . paren rest')
297
298    outputActs :: Int -> [Accept Code] -> (Int, [ShowS])
299    outputActs idx =
300      let
301        outputAct _ (Acc _ Nothing _ _) = error "Shouldn't see this"
302        outputAct inneridx (Acc _ (Just act) _ _) =
303          (inneridx + 1, paren (shows inneridx . str "," . str act))
304      in
305        mapAccumR outputAct idx . filter (\(Acc _ act _ _) -> isJust act)
306
307    outputPred (Just set) NoRightContext
308        = outputLCtx set
309    outputPred Nothing rctx
310        = outputRCtx rctx
311    outputPred (Just set) rctx
312        = outputLCtx set
313        . str " `alexAndPred` "
314        . outputRCtx rctx
315
316    outputLCtx set = str "alexPrevCharMatches" . str (charSetQuote set)
317
318    outputRCtx NoRightContext = id
319    outputRCtx (RightContextRExp sn)
320        = str "alexRightContext " . shows sn
321    outputRCtx (RightContextCode code)
322        = str code
323
324    -- outputArr arr
325        -- = str "array " . shows (bounds arr) . space
326        -- . shows (assocs arr)
327
328-- -----------------------------------------------------------------------------
329-- Generating arrays.
330
331-- Here we use the table-compression algorithm described in section
332-- 3.9 of the dragon book, which is a common technique used by lexical
333-- analyser generators.
334
335-- We want to generate:
336--
337--    base :: Array SNum Int
338--              maps the current state to an offset in the main table
339--
340--    table :: Array Int SNum
341--              maps (base!state + char) to the next state
342--
343--    check :: Array Int SNum
344--              maps (base!state + char) to state if table entry is valid,
345--              otherwise we use the default for this state
346--
347--    default :: Array SNum SNum
348--              default production for this state
349--
350--    accept :: Array SNum [Accept Code]
351--              maps state to list of accept codes for this state
352--
353-- For each state, we decide what will be the default symbol (pick the
354-- most common).  We now have a mapping Char -> SNum, with one special
355-- state reserved as the default.
356
357
358mkTables :: DFA SNum Code
359         -> (
360              [Int],            -- base
361              [Int],            -- table
362              [Int],            -- check
363              [Int],            -- default
364              [[Accept Code]]   -- accept
365            )
366mkTables dfa = -- trace (show (defaults)) $
367               -- trace (show (fmap (length . snd)  dfa_no_defaults)) $
368  ( elems base_offs,
369     take max_off (elems table),
370     take max_off (elems check),
371     elems defaults,
372     accept
373  )
374 where
375        accept   = [ as | State as _ <- elems dfa_arr ]
376
377        state_assocs = Map.toAscList (dfa_states dfa)
378        n_states = length state_assocs
379        top_state = n_states - 1
380
381        dfa_arr :: Array SNum (State SNum Code)
382        dfa_arr = array (0,top_state) state_assocs
383
384        -- fill in all the error productions
385        expand_states =
386           [ expand (dfa_arr!state) | state <- [0..top_state] ]
387
388        expand (State _ out) =
389           [(i, lookup' out i) | i <- [0..0xff]]
390           where lookup' out' i = case IntMap.lookup i out' of
391                                        Nothing -> -1
392                                        Just s  -> s
393
394        defaults :: UArray SNum SNum
395        defaults = listArray (0,top_state) (map best_default expand_states)
396
397        -- find the most common destination state in a given state, and
398        -- make it the default.
399        best_default :: [(Int,SNum)] -> SNum
400        best_default prod_list
401           | null sorted = -1
402           | otherwise   = snd (head (maximumBy lengths eq))
403           where sorted  = sortBy compareSnds prod_list
404                 compareSnds (_,a) (_,b) = compare a b
405                 eq = groupBy (\(_,a) (_,b) -> a == b) sorted
406                 lengths  a b = length a `compare` length b
407
408        -- remove all the default productions from the DFA
409        dfa_no_defaults =
410          [ (s, prods_without_defaults s out)
411          | (s, out) <- zip [0..] expand_states
412          ]
413
414        prods_without_defaults s out
415          = [ (fromIntegral c, dest) | (c,dest) <- out, dest /= defaults!s ]
416
417        (base_offs, table, check, max_off)
418           = runST (genTables n_states 255 dfa_no_defaults)
419
420
421genTables
422         :: Int                         -- number of states
423         -> Int                         -- maximum token no.
424         -> [(SNum,[(Int,SNum)])]       -- entries for the table
425         -> ST s (UArray Int Int,       -- base
426                  UArray Int Int,       -- table
427                  UArray Int Int,       -- check
428                  Int                   -- highest offset in table
429            )
430
431genTables n_states max_token entries = do
432
433  base       <- newArray (0, n_states-1) 0
434  table      <- newArray (0, mAX_TABLE_SIZE) 0
435  check      <- newArray (0, mAX_TABLE_SIZE) (-1)
436  off_arr    <- newArray (-max_token, mAX_TABLE_SIZE) 0
437
438  max_off    <- genTables' base table check off_arr entries max_token
439
440  base'      <- freeze base
441  table'     <- freeze table
442  check'     <- freeze check
443  return (base', table',check',max_off+1)
444
445  where mAX_TABLE_SIZE = n_states * (max_token + 1)
446
447
448genTables'
449         :: STUArray s Int Int          -- base
450         -> STUArray s Int Int          -- table
451         -> STUArray s Int Int          -- check
452         -> STUArray s Int Int          -- offset array
453         -> [(SNum,[(Int,SNum)])]       -- entries for the table
454         -> Int                         -- maximum token no.
455         -> ST s Int                    -- highest offset in table
456
457genTables' base table check off_arr entries max_token
458        = fit_all entries 0 1
459  where
460
461         fit_all [] max_off _ = return max_off
462         fit_all (s:ss) max_off fst_zero = do
463           (off, new_max_off, new_fst_zero) <- fit s max_off fst_zero
464           writeArray off_arr off 1
465           fit_all ss new_max_off new_fst_zero
466
467         -- fit a vector into the table.  Return the offset of the vector,
468         -- the maximum offset used in the table, and the offset of the first
469         -- entry in the table (used to speed up the lookups a bit).
470         fit (_,[]) max_off fst_zero = return (0,max_off,fst_zero)
471
472         fit (state_no, state@((t,_):_)) max_off fst_zero = do
473                 -- start at offset 1 in the table: all the empty states
474                 -- (states with just a default reduction) are mapped to
475                 -- offset zero.
476           off <- findFreeOffset (-t + fst_zero) check off_arr state
477           let new_max_off | furthest_right > max_off = furthest_right
478                           | otherwise                = max_off
479               furthest_right = off + max_token
480
481           --trace ("fit: state " ++ show state_no ++ ", off " ++ show off ++ ", elems " ++ show state) $ do
482
483           writeArray base state_no off
484           addState off table check state
485           new_fst_zero <- findFstFreeSlot check fst_zero
486           return (off, new_max_off, new_fst_zero)
487
488
489-- Find a valid offset in the table for this state.
490findFreeOffset :: Int
491               -> STUArray s Int Int
492               -> STUArray s Int Int
493               -> [(Int, Int)]
494               -> ST s Int
495findFreeOffset off check off_arr state = do
496    -- offset 0 isn't allowed
497  if off == 0 then try_next else do
498
499    -- don't use an offset we've used before
500  b <- readArray off_arr off
501  if b /= 0 then try_next else do
502
503    -- check whether the actions for this state fit in the table
504  ok <- fits off state check
505  if ok then return off else try_next
506 where
507        try_next = findFreeOffset (off+1) check off_arr state
508
509-- This is an inner loop, so we use some strictness hacks, and avoid
510-- array bounds checks (unsafeRead instead of readArray) to speed
511-- things up a bit.
512fits :: Int -> [(Int,Int)] -> STUArray s Int Int -> ST s Bool
513fits off [] check = off `seq` check `seq` return True -- strictness hacks
514fits off ((t,_):rest) check = do
515  i <- unsafeRead check (off+t)
516  if i /= -1 then return False
517             else fits off rest check
518
519addState :: Int -> STUArray s Int Int -> STUArray s Int Int -> [(Int, Int)]
520         -> ST s ()
521addState _   _     _     [] = return ()
522addState off table check ((t,val):state) = do
523   writeArray table (off+t) val
524   writeArray check (off+t) t
525   addState off table check state
526
527findFstFreeSlot :: STUArray s Int Int -> Int -> ST s Int
528findFstFreeSlot table n = do
529         i <- readArray table n
530         if i == -1 then return n
531                    else findFstFreeSlot table (n+1)
532
533-----------------------------------------------------------------------------
534-- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable
535-- for placing in a string (copied from Happy's ProduceCode.lhs)
536
537hexChars16 :: [Int] -> String
538hexChars16 acts = concat (map conv16 acts)
539  where
540    conv16 i | i > 0x7fff || i < -0x8000
541                = error ("Internal error: hexChars16: out of range: " ++ show i)
542             | otherwise
543                = hexChar16 i
544
545hexChars32 :: [Int] -> String
546hexChars32 acts = concat (map conv32 acts)
547  where
548    conv32 i = hexChar16 (i .&. 0xffff) ++
549                hexChar16 ((i `shiftR` 16) .&. 0xffff)
550
551hexChar16 :: Int -> String
552hexChar16 i = toHex (i .&. 0xff)
553                 ++ toHex ((i `shiftR` 8) .&. 0xff)  -- force little-endian
554
555toHex :: Int -> String
556toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)]
557
558hexDig :: Int -> Char
559hexDig i | i <= 9    = chr (i + ord '0')
560         | otherwise = chr (i - 10 + ord 'a')
561