1-- ----------------------------------------------------------------------------- 2-- 3-- Output.hs, part of Alex 4-- 5-- (c) Simon Marlow 2003 6-- 7-- Code-outputing and table-generation routines 8-- 9-- ----------------------------------------------------------------------------} 10 11module Output (outputDFA) where 12 13import AbsSyn 14import CharSet 15import Util 16import qualified Map 17import qualified Data.IntMap as IntMap 18 19import Control.Monad.ST ( ST, runST ) 20import Data.Array ( Array ) 21import Data.Array.Base ( unsafeRead ) 22import Data.Array.ST ( STUArray, newArray, readArray, writeArray, freeze ) 23import Data.Array.Unboxed ( UArray, elems, (!), array, listArray ) 24import Data.Maybe (isJust) 25import Data.Bits 26import Data.Char ( ord, chr ) 27import Data.List ( maximumBy, sortBy, groupBy, mapAccumR ) 28 29-- ----------------------------------------------------------------------------- 30-- Printing the output 31 32outputDFA :: Target -> Int -> String -> Scheme -> DFA SNum Code -> ShowS 33outputDFA target _ _ scheme dfa 34 = interleave_shows nl 35 [outputBase, outputTable, outputCheck, outputDefault, 36 outputAccept, outputActions, outputSigs] 37 where 38 (base, table, check, deflt, accept) = mkTables dfa 39 40 intty = case target of 41 GhcTarget -> "Int#" 42 HaskellTarget -> "Int" 43 44 table_size = length table - 1 45 n_states = length base - 1 46 47 base_nm = "alex_base" 48 table_nm = "alex_table" 49 check_nm = "alex_check" 50 deflt_nm = "alex_deflt" 51 accept_nm = "alex_accept" 52 actions_nm = "alex_actions" 53 54 outputBase = do_array hexChars32 base_nm n_states base 55 outputTable = do_array hexChars16 table_nm table_size table 56 outputCheck = do_array hexChars16 check_nm table_size check 57 outputDefault = do_array hexChars16 deflt_nm n_states deflt 58 59 formatArray :: String -> Int -> [ShowS] -> ShowS 60 formatArray constructFunction size contents = 61 str constructFunction 62 . str " (0 :: Int, " . shows size . str ")\n" 63 . str " [ " 64 . interleave_shows (str "\n , ") contents 65 . str "\n ]" 66 67 do_array hex_chars nm upper_bound ints = -- trace ("do_array: " ++ nm) $ 68 case target of 69 GhcTarget -> 70 str nm . str " :: AlexAddr\n" 71 . str nm . str " = AlexA#\n" 72 . str " \"" . str (hex_chars ints) . str "\"#\n" 73 74 _ -> 75 str nm . str " :: Array Int Int\n" 76 . str nm . str " = " 77 . formatArray "listArray" upper_bound (map shows ints) 78 . nl 79 80 outputAccept :: ShowS 81 outputAccept = 82 -- Don't emit explicit type signature as it contains unknown user type, 83 -- see: https://github.com/simonmar/alex/issues/98 84 -- str accept_nm . str " :: Array Int (AlexAcc " . str userStateTy . str ")\n" 85 str accept_nm . str " = " 86 . formatArray "listArray" n_states (snd (mapAccumR outputAccs 0 accept)) 87 . nl 88 89 gscanActionType res = 90 str "AlexPosn -> Char -> String -> Int -> ((Int, state) -> " 91 . str res . str ") -> (Int, state) -> " . str res 92 93 outputActions = signature . body 94 where 95 (nacts, acts) = mapAccumR outputActs 0 accept 96 actionsArray :: ShowS 97 actionsArray = formatArray "array" nacts (concat acts) 98 body :: ShowS 99 body = str actions_nm . str " = " . actionsArray . nl 100 signature :: ShowS 101 signature = case scheme of 102 Default { defaultTypeInfo = Just (Nothing, actionty) } -> 103 str actions_nm . str " :: Array Int (" . str actionty . str ")\n" 104 Default { defaultTypeInfo = Just (Just tyclasses, actionty) } -> 105 str actions_nm . str " :: (" . str tyclasses 106 . str ") => Array Int (" . str actionty . str ")\n" 107 GScan { gscanTypeInfo = Just (Nothing, toktype) } -> 108 str actions_nm . str " :: Array Int (" 109 . gscanActionType toktype . str ")\n" 110 GScan { gscanTypeInfo = Just (Just tyclasses, toktype) } -> 111 str actions_nm . str " :: (" . str tyclasses 112 . str ") => Array Int (" 113 . gscanActionType toktype . str ")\n" 114 Basic { basicStrType = strty, 115 basicTypeInfo = Just (Nothing, toktype) } -> 116 str actions_nm . str " :: Array Int (" 117 . str (show strty) . str " -> " . str toktype 118 . str ")\n" 119 Basic { basicStrType = strty, 120 basicTypeInfo = Just (Just tyclasses, toktype) } -> 121 str actions_nm . str " :: (" . str tyclasses 122 . str ") => Array Int (" 123 . str (show strty) . str " -> " . str toktype 124 . str ")\n" 125 Posn { posnByteString = isByteString, 126 posnTypeInfo = Just (Nothing, toktype) } -> 127 str actions_nm . str " :: Array Int (AlexPosn -> " 128 . str (strtype isByteString) . str " -> " . str toktype 129 . str ")\n" 130 Posn { posnByteString = isByteString, 131 posnTypeInfo = Just (Just tyclasses, toktype) } -> 132 str actions_nm . str " :: (" . str tyclasses 133 . str ") => Array Int (AlexPosn -> " 134 . str (strtype isByteString) . str " -> " . str toktype 135 . str ")\n" 136 Monad { monadByteString = isByteString, 137 monadTypeInfo = Just (Nothing, toktype) } -> 138 let 139 actintty = if isByteString then "Int64" else "Int" 140 in 141 str actions_nm . str " :: Array Int (AlexInput -> " 142 . str actintty . str " -> Alex(" . str toktype . str "))\n" 143 Monad { monadByteString = isByteString, 144 monadTypeInfo = Just (Just tyclasses, toktype) } -> 145 let 146 actintty = if isByteString then "Int64" else "Int" 147 in 148 str actions_nm . str " :: (" . str tyclasses 149 . str ") => Array Int (AlexInput -> " 150 . str actintty . str " -> Alex(" . str toktype . str "))\n" 151 _ -> 152 -- No type signature: we don't know what the type of the actions is. 153 -- str accept_nm . str " :: Array Int (Accept Code)\n" 154 id 155 156 157 outputSigs 158 = case scheme of 159 Default { defaultTypeInfo = Just (Nothing, toktype) } -> 160 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 161 . str " -> " . str "AlexInput -> " . str intty 162 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 163 . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (" 164 . str toktype . str ")\n" 165 . str "alexScan :: AlexInput -> Int -> AlexReturn (" 166 . str toktype . str ")\n" 167 Default { defaultTypeInfo = Just (Just tyclasses, toktype) } -> 168 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 169 . str " -> " . str "AlexInput -> " . str intty 170 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 171 . str "alexScanUser :: (" . str tyclasses 172 . str ") => () -> AlexInput -> Int -> AlexReturn (" 173 . str toktype . str ")\n" 174 . str "alexScan :: (" . str tyclasses 175 . str ") => AlexInput -> Int -> AlexReturn (" 176 . str toktype . str ")\n" 177 GScan { gscanTypeInfo = Just (Nothing, toktype) } -> 178 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 179 . str " -> " . str "AlexInput -> " . str intty 180 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 181 . str "alexScanUser :: () -> AlexInput -> Int -> " 182 . str "AlexReturn (" . gscanActionType toktype . str ")\n" 183 . str "alexScan :: AlexInput -> Int -> AlexReturn (" 184 . gscanActionType toktype . str ")\n" 185 GScan { gscanTypeInfo = Just (Just tyclasses, toktype) } -> 186 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 187 . str " -> " . str "AlexInput -> " . str intty 188 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 189 . str "alexScanUser :: (" . str tyclasses 190 . str ") => () -> AlexInput -> Int -> AlexReturn (" 191 . gscanActionType toktype . str ")\n" 192 . str "alexScan :: (" . str tyclasses 193 . str ") => AlexInput -> Int -> AlexReturn (" 194 . gscanActionType toktype . str ")\n" 195 Basic { basicStrType = strty, 196 basicTypeInfo = Just (Nothing, toktype) } -> 197 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 198 . str " -> " . str "AlexInput -> " . str intty 199 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 200 . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (" 201 . str (show strty) . str " -> " . str toktype . str ")\n" 202 . str "alexScan :: AlexInput -> Int -> AlexReturn (" 203 . str (show strty) . str " -> " . str toktype . str ")\n" 204 Basic { basicStrType = strty, 205 basicTypeInfo = Just (Just tyclasses, toktype) } -> 206 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 207 . str " -> " . str "AlexInput -> " . str intty 208 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 209 . str "alexScanUser :: (" . str tyclasses 210 . str ") => () -> AlexInput -> Int -> AlexReturn (" 211 . str (show strty) . str " -> " . str toktype . str ")\n" 212 . str "alexScan :: (" . str tyclasses 213 . str ") => AlexInput -> Int -> AlexReturn (" 214 . str (show strty) . str " -> " . str toktype . str ")\n" 215 Posn { posnByteString = isByteString, 216 posnTypeInfo = Just (Nothing, toktype) } -> 217 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 218 . str " -> " . str "AlexInput -> " . str intty 219 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 220 . str "alexScanUser :: () -> AlexInput -> Int -> AlexReturn (AlexPosn -> " 221 . str (strtype isByteString) . str " -> " . str toktype . str ")\n" 222 . str "alexScan :: AlexInput -> Int -> AlexReturn (AlexPosn -> " 223 . str (strtype isByteString) . str " -> " . str toktype . str ")\n" 224 Posn { posnByteString = isByteString, 225 posnTypeInfo = Just (Just tyclasses, toktype) } -> 226 str "alex_scan_tkn :: () -> AlexInput -> " . str intty 227 . str " -> " . str "AlexInput -> " . str intty 228 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 229 . str "alexScanUser :: (" . str tyclasses 230 . str ") => () -> AlexInput -> Int -> AlexReturn (AlexPosn -> " 231 . str (strtype isByteString) . str " -> " . str toktype . str ")\n" 232 . str "alexScan :: (" . str tyclasses 233 . str ") => AlexInput -> Int -> AlexReturn (AlexPosn -> " 234 . str (strtype isByteString) . str " -> " . str toktype . str ")\n" 235 Monad { monadTypeInfo = Just (Nothing, toktype), 236 monadByteString = isByteString, 237 monadUserState = userState } -> 238 let 239 actintty = if isByteString then "Int64" else "Int" 240 userStateTy | userState = "AlexUserState" 241 | otherwise = "()" 242 in 243 str "alex_scan_tkn :: " . str userStateTy 244 . str " -> AlexInput -> " . str intty 245 . str " -> " . str "AlexInput -> " . str intty 246 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 247 . str "alexScanUser :: " . str userStateTy 248 . str " -> AlexInput -> Int -> AlexReturn (" 249 . str "AlexInput -> " . str actintty . str " -> Alex (" 250 . str toktype . str "))\n" 251 . str "alexScan :: AlexInput -> Int -> AlexReturn (" 252 . str "AlexInput -> " . str actintty 253 . str " -> Alex (" . str toktype . str "))\n" 254 . str "alexMonadScan :: Alex (" . str toktype . str ")\n" 255 Monad { monadTypeInfo = Just (Just tyclasses, toktype), 256 monadByteString = isByteString, 257 monadUserState = userState } -> 258 let 259 actintty = if isByteString then "Int64" else "Int" 260 userStateTy | userState = "AlexUserState" 261 | otherwise = "()" 262 in 263 str "alex_scan_tkn :: " . str userStateTy 264 . str " -> AlexInput -> " . str intty 265 . str " -> " . str "AlexInput -> " . str intty 266 . str " -> AlexLastAcc -> (AlexLastAcc, AlexInput)\n" 267 . str "alexScanUser :: (" . str tyclasses . str ") => " 268 . str userStateTy . str " -> AlexInput -> Int -> AlexReturn (" 269 . str "AlexInput -> " . str actintty 270 . str " -> Alex (" . str toktype . str "))\n" 271 . str "alexScan :: (" . str tyclasses 272 . str ") => AlexInput -> Int -> AlexReturn (" 273 . str "AlexInput -> " . str actintty 274 . str " -> Alex (" . str toktype . str "))\n" 275 . str "alexMonadScan :: (" . str tyclasses 276 . str ") => Alex (" . str toktype . str ")\n" 277 _ -> 278 str "" 279 280 outputAccs :: Int -> [Accept Code] -> (Int, ShowS) 281 outputAccs idx [] = (idx, str "AlexAccNone") 282 outputAccs idx (Acc _ Nothing Nothing NoRightContext : []) 283 = (idx, str "AlexAccSkip") 284 outputAccs idx (Acc _ (Just _) Nothing NoRightContext : []) 285 = (idx + 1, str "AlexAcc " . str (show idx)) 286 outputAccs idx (Acc _ Nothing lctx rctx : rest) 287 = let (idx', rest') = outputAccs idx rest 288 in (idx', str "AlexAccSkipPred" . space 289 . paren (outputPred lctx rctx) 290 . paren rest') 291 outputAccs idx (Acc _ (Just _) lctx rctx : rest) 292 = let (idx', rest') = outputAccs idx rest 293 in (idx' + 1, str "AlexAccPred" . space 294 . str (show idx') . space 295 . paren (outputPred lctx rctx) 296 . paren rest') 297 298 outputActs :: Int -> [Accept Code] -> (Int, [ShowS]) 299 outputActs idx = 300 let 301 outputAct _ (Acc _ Nothing _ _) = error "Shouldn't see this" 302 outputAct inneridx (Acc _ (Just act) _ _) = 303 (inneridx + 1, paren (shows inneridx . str "," . str act)) 304 in 305 mapAccumR outputAct idx . filter (\(Acc _ act _ _) -> isJust act) 306 307 outputPred (Just set) NoRightContext 308 = outputLCtx set 309 outputPred Nothing rctx 310 = outputRCtx rctx 311 outputPred (Just set) rctx 312 = outputLCtx set 313 . str " `alexAndPred` " 314 . outputRCtx rctx 315 316 outputLCtx set = str "alexPrevCharMatches" . str (charSetQuote set) 317 318 outputRCtx NoRightContext = id 319 outputRCtx (RightContextRExp sn) 320 = str "alexRightContext " . shows sn 321 outputRCtx (RightContextCode code) 322 = str code 323 324 -- outputArr arr 325 -- = str "array " . shows (bounds arr) . space 326 -- . shows (assocs arr) 327 328-- ----------------------------------------------------------------------------- 329-- Generating arrays. 330 331-- Here we use the table-compression algorithm described in section 332-- 3.9 of the dragon book, which is a common technique used by lexical 333-- analyser generators. 334 335-- We want to generate: 336-- 337-- base :: Array SNum Int 338-- maps the current state to an offset in the main table 339-- 340-- table :: Array Int SNum 341-- maps (base!state + char) to the next state 342-- 343-- check :: Array Int SNum 344-- maps (base!state + char) to state if table entry is valid, 345-- otherwise we use the default for this state 346-- 347-- default :: Array SNum SNum 348-- default production for this state 349-- 350-- accept :: Array SNum [Accept Code] 351-- maps state to list of accept codes for this state 352-- 353-- For each state, we decide what will be the default symbol (pick the 354-- most common). We now have a mapping Char -> SNum, with one special 355-- state reserved as the default. 356 357 358mkTables :: DFA SNum Code 359 -> ( 360 [Int], -- base 361 [Int], -- table 362 [Int], -- check 363 [Int], -- default 364 [[Accept Code]] -- accept 365 ) 366mkTables dfa = -- trace (show (defaults)) $ 367 -- trace (show (fmap (length . snd) dfa_no_defaults)) $ 368 ( elems base_offs, 369 take max_off (elems table), 370 take max_off (elems check), 371 elems defaults, 372 accept 373 ) 374 where 375 accept = [ as | State as _ <- elems dfa_arr ] 376 377 state_assocs = Map.toAscList (dfa_states dfa) 378 n_states = length state_assocs 379 top_state = n_states - 1 380 381 dfa_arr :: Array SNum (State SNum Code) 382 dfa_arr = array (0,top_state) state_assocs 383 384 -- fill in all the error productions 385 expand_states = 386 [ expand (dfa_arr!state) | state <- [0..top_state] ] 387 388 expand (State _ out) = 389 [(i, lookup' out i) | i <- [0..0xff]] 390 where lookup' out' i = case IntMap.lookup i out' of 391 Nothing -> -1 392 Just s -> s 393 394 defaults :: UArray SNum SNum 395 defaults = listArray (0,top_state) (map best_default expand_states) 396 397 -- find the most common destination state in a given state, and 398 -- make it the default. 399 best_default :: [(Int,SNum)] -> SNum 400 best_default prod_list 401 | null sorted = -1 402 | otherwise = snd (head (maximumBy lengths eq)) 403 where sorted = sortBy compareSnds prod_list 404 compareSnds (_,a) (_,b) = compare a b 405 eq = groupBy (\(_,a) (_,b) -> a == b) sorted 406 lengths a b = length a `compare` length b 407 408 -- remove all the default productions from the DFA 409 dfa_no_defaults = 410 [ (s, prods_without_defaults s out) 411 | (s, out) <- zip [0..] expand_states 412 ] 413 414 prods_without_defaults s out 415 = [ (fromIntegral c, dest) | (c,dest) <- out, dest /= defaults!s ] 416 417 (base_offs, table, check, max_off) 418 = runST (genTables n_states 255 dfa_no_defaults) 419 420 421genTables 422 :: Int -- number of states 423 -> Int -- maximum token no. 424 -> [(SNum,[(Int,SNum)])] -- entries for the table 425 -> ST s (UArray Int Int, -- base 426 UArray Int Int, -- table 427 UArray Int Int, -- check 428 Int -- highest offset in table 429 ) 430 431genTables n_states max_token entries = do 432 433 base <- newArray (0, n_states-1) 0 434 table <- newArray (0, mAX_TABLE_SIZE) 0 435 check <- newArray (0, mAX_TABLE_SIZE) (-1) 436 off_arr <- newArray (-max_token, mAX_TABLE_SIZE) 0 437 438 max_off <- genTables' base table check off_arr entries max_token 439 440 base' <- freeze base 441 table' <- freeze table 442 check' <- freeze check 443 return (base', table',check',max_off+1) 444 445 where mAX_TABLE_SIZE = n_states * (max_token + 1) 446 447 448genTables' 449 :: STUArray s Int Int -- base 450 -> STUArray s Int Int -- table 451 -> STUArray s Int Int -- check 452 -> STUArray s Int Int -- offset array 453 -> [(SNum,[(Int,SNum)])] -- entries for the table 454 -> Int -- maximum token no. 455 -> ST s Int -- highest offset in table 456 457genTables' base table check off_arr entries max_token 458 = fit_all entries 0 1 459 where 460 461 fit_all [] max_off _ = return max_off 462 fit_all (s:ss) max_off fst_zero = do 463 (off, new_max_off, new_fst_zero) <- fit s max_off fst_zero 464 writeArray off_arr off 1 465 fit_all ss new_max_off new_fst_zero 466 467 -- fit a vector into the table. Return the offset of the vector, 468 -- the maximum offset used in the table, and the offset of the first 469 -- entry in the table (used to speed up the lookups a bit). 470 fit (_,[]) max_off fst_zero = return (0,max_off,fst_zero) 471 472 fit (state_no, state@((t,_):_)) max_off fst_zero = do 473 -- start at offset 1 in the table: all the empty states 474 -- (states with just a default reduction) are mapped to 475 -- offset zero. 476 off <- findFreeOffset (-t + fst_zero) check off_arr state 477 let new_max_off | furthest_right > max_off = furthest_right 478 | otherwise = max_off 479 furthest_right = off + max_token 480 481 --trace ("fit: state " ++ show state_no ++ ", off " ++ show off ++ ", elems " ++ show state) $ do 482 483 writeArray base state_no off 484 addState off table check state 485 new_fst_zero <- findFstFreeSlot check fst_zero 486 return (off, new_max_off, new_fst_zero) 487 488 489-- Find a valid offset in the table for this state. 490findFreeOffset :: Int 491 -> STUArray s Int Int 492 -> STUArray s Int Int 493 -> [(Int, Int)] 494 -> ST s Int 495findFreeOffset off check off_arr state = do 496 -- offset 0 isn't allowed 497 if off == 0 then try_next else do 498 499 -- don't use an offset we've used before 500 b <- readArray off_arr off 501 if b /= 0 then try_next else do 502 503 -- check whether the actions for this state fit in the table 504 ok <- fits off state check 505 if ok then return off else try_next 506 where 507 try_next = findFreeOffset (off+1) check off_arr state 508 509-- This is an inner loop, so we use some strictness hacks, and avoid 510-- array bounds checks (unsafeRead instead of readArray) to speed 511-- things up a bit. 512fits :: Int -> [(Int,Int)] -> STUArray s Int Int -> ST s Bool 513fits off [] check = off `seq` check `seq` return True -- strictness hacks 514fits off ((t,_):rest) check = do 515 i <- unsafeRead check (off+t) 516 if i /= -1 then return False 517 else fits off rest check 518 519addState :: Int -> STUArray s Int Int -> STUArray s Int Int -> [(Int, Int)] 520 -> ST s () 521addState _ _ _ [] = return () 522addState off table check ((t,val):state) = do 523 writeArray table (off+t) val 524 writeArray check (off+t) t 525 addState off table check state 526 527findFstFreeSlot :: STUArray s Int Int -> Int -> ST s Int 528findFstFreeSlot table n = do 529 i <- readArray table n 530 if i == -1 then return n 531 else findFstFreeSlot table (n+1) 532 533----------------------------------------------------------------------------- 534-- Convert an integer to a 16-bit number encoded in \xNN\xNN format suitable 535-- for placing in a string (copied from Happy's ProduceCode.lhs) 536 537hexChars16 :: [Int] -> String 538hexChars16 acts = concat (map conv16 acts) 539 where 540 conv16 i | i > 0x7fff || i < -0x8000 541 = error ("Internal error: hexChars16: out of range: " ++ show i) 542 | otherwise 543 = hexChar16 i 544 545hexChars32 :: [Int] -> String 546hexChars32 acts = concat (map conv32 acts) 547 where 548 conv32 i = hexChar16 (i .&. 0xffff) ++ 549 hexChar16 ((i `shiftR` 16) .&. 0xffff) 550 551hexChar16 :: Int -> String 552hexChar16 i = toHex (i .&. 0xff) 553 ++ toHex ((i `shiftR` 8) .&. 0xff) -- force little-endian 554 555toHex :: Int -> String 556toHex i = ['\\','x', hexDig (i `div` 16), hexDig (i `mod` 16)] 557 558hexDig :: Int -> Char 559hexDig i | i <= 9 = chr (i + ord '0') 560 | otherwise = chr (i - 10 + ord 'a') 561