1{-# LANGUAGE CPP #-} 2-- ----------------------------------------------------------------------------- 3-- 4-- Main.hs, part of Alex 5-- 6-- (c) Chris Dornan 1995-2000, Simon Marlow 2003 7-- 8-- ----------------------------------------------------------------------------} 9 10module Main (main) where 11 12import AbsSyn 13import CharSet 14import DFA 15import DFAMin 16import NFA 17import Info 18import Map ( Map ) 19import qualified Map hiding ( Map ) 20import Output 21import ParseMonad ( runP ) 22import Parser 23import Scan 24import Util ( hline ) 25import Paths_alex ( version, getDataDir ) 26 27#if __GLASGOW_HASKELL__ < 610 28import Control.Exception as Exception ( block, unblock, catch, throw ) 29#endif 30#if __GLASGOW_HASKELL__ >= 610 31import Control.Exception ( bracketOnError ) 32#endif 33import Control.Monad ( when, liftM ) 34import Data.Char ( chr ) 35import Data.List ( isSuffixOf, nub ) 36import Data.Maybe ( isJust, fromJust ) 37import Data.Version ( showVersion ) 38import System.Console.GetOpt ( getOpt, usageInfo, ArgOrder(..), OptDescr(..), ArgDescr(..) ) 39import System.Directory ( removeFile ) 40import System.Environment ( getProgName, getArgs ) 41import System.Exit ( ExitCode(..), exitWith ) 42import System.IO ( stderr, Handle, IOMode(..), openFile, hClose, hPutStr, hPutStrLn ) 43#if __GLASGOW_HASKELL__ >= 612 44import System.IO ( hGetContents, hSetEncoding, utf8 ) 45#endif 46 47-- We need to force every file we open to be read in 48-- as UTF8 49alexReadFile :: FilePath -> IO String 50#if __GLASGOW_HASKELL__ >= 612 51alexReadFile file = do 52 h <- alexOpenFile file ReadMode 53 hGetContents h 54#else 55alexReadFile = readFile 56#endif 57 58-- We need to force every file we write to be written 59-- to as UTF8 60alexOpenFile :: FilePath -> IOMode -> IO Handle 61#if __GLASGOW_HASKELL__ >= 612 62alexOpenFile file mode = do 63 h <- openFile file mode 64 hSetEncoding h utf8 65 return h 66#else 67alexOpenFile = openFile 68#endif 69 70-- `main' decodes the command line arguments and calls `alex'. 71 72main:: IO () 73main = do 74 args <- getArgs 75 case getOpt Permute argInfo args of 76 (cli,_,[]) | DumpHelp `elem` cli -> do 77 prog <- getProgramName 78 bye (usageInfo (usageHeader prog) argInfo) 79 (cli,_,[]) | DumpVersion `elem` cli -> 80 bye copyright 81 (cli,[file],[]) -> 82 runAlex cli file 83 (_,_,errors) -> do 84 prog <- getProgramName 85 die (concat errors ++ usageInfo (usageHeader prog) argInfo) 86 87projectVersion :: String 88projectVersion = showVersion version 89 90copyright :: String 91copyright = "Alex version " ++ projectVersion ++ ", (c) 2003 Chris Dornan and Simon Marlow\n" 92 93usageHeader :: String -> String 94usageHeader prog = "Usage: " ++ prog ++ " [OPTION...] file\n" 95 96runAlex :: [CLIFlags] -> FilePath -> IO () 97runAlex cli file = do 98 basename <- case (reverse file) of 99 'x':'.':r -> return (reverse r) 100 _ -> die (file ++ ": filename must end in \'.x\'\n") 101 102 prg <- alexReadFile file 103 script <- parseScript file prg 104 alex cli file basename script 105 106parseScript :: FilePath -> String 107 -> IO (Maybe (AlexPosn,Code), [Directive], Scanner, Maybe (AlexPosn,Code)) 108parseScript file prg = 109 case runP prg initialParserEnv parse of 110 Left (Just (AlexPn _ line col),err) -> 111 die (file ++ ":" ++ show line ++ ":" ++ show col 112 ++ ": " ++ err ++ "\n") 113 Left (Nothing, err) -> 114 die (file ++ ": " ++ err ++ "\n") 115 116 Right script -> return script 117 118alex :: [CLIFlags] -> FilePath -> FilePath 119 -> (Maybe (AlexPosn, Code), [Directive], Scanner, Maybe (AlexPosn, Code)) 120 -> IO () 121alex cli file basename script = do 122 (put_info, finish_info) <- 123 case [ f | OptInfoFile f <- cli ] of 124 [] -> return (\_ -> return (), return ()) 125 [Nothing] -> infoStart file (basename ++ ".info") 126 [Just f] -> infoStart file f 127 _ -> dieAlex "multiple -i/--info options" 128 129 o_file <- case [ f | OptOutputFile f <- cli ] of 130 [] -> return (basename ++ ".hs") 131 [f] -> return f 132 _ -> dieAlex "multiple -o/--outfile options" 133 134 tab_size <- case [ s | OptTabSize s <- cli ] of 135 [] -> return (8 :: Int) 136 [s] -> case reads s of 137 [(n,"")] -> return n 138 _ -> dieAlex "-s/--tab-size option is not a valid integer" 139 _ -> dieAlex "multiple -s/--tab-size options" 140 141 let target 142 | OptGhcTarget `elem` cli = GhcTarget 143 | otherwise = HaskellTarget 144 145 let encodingsCli 146 | OptLatin1 `elem` cli = [Latin1] 147 | otherwise = [] 148 149 template_dir <- templateDir getDataDir cli 150 151 let (maybe_header, directives, scanner1, maybe_footer) = script 152 153 scheme <- getScheme directives 154 155 -- open the output file; remove it if we encounter an error 156 bracketOnError 157 (alexOpenFile o_file WriteMode) 158 (\h -> do hClose h; removeFile o_file) 159 $ \out_h -> do 160 161 let 162 wrapper_name = wrapperFile template_dir scheme 163 (scanner2, scs, sc_hdr) = encodeStartCodes scanner1 164 (scanner_final, actions) = extractActions scheme scanner2 165 encodingsScript = [ e | EncodingDirective e <- directives ] 166 167 encoding <- case nub (encodingsCli ++ encodingsScript) of 168 [] -> return UTF8 -- default 169 [e] -> return e 170 _ | null encodingsCli -> dieAlex "conflicting %encoding directives" 171 | otherwise -> dieAlex "--latin1 flag conflicts with %encoding directive" 172 173 hPutStr out_h (optsToInject target cli) 174 injectCode maybe_header file out_h 175 176 hPutStr out_h (importsToInject target cli) 177 178 -- add the wrapper, if necessary 179 when (isJust wrapper_name) $ 180 do str <- alexReadFile (fromJust wrapper_name) 181 hPutStr out_h str 182 183 -- Inject the tab size 184 hPutStrLn out_h $ "alex_tab_size :: Int" 185 hPutStrLn out_h $ "alex_tab_size = " ++ show (tab_size :: Int) 186 187 let dfa = scanner2dfa encoding scanner_final scs 188 min_dfa = minimizeDFA dfa 189 nm = scannerName scanner_final 190 usespreds = usesPreds min_dfa 191 192 193 put_info "\nStart codes\n" 194 put_info (show $ scs) 195 put_info "\nScanner\n" 196 put_info (show $ scanner_final) 197 put_info "\nNFA\n" 198 put_info (show $ scanner2nfa encoding scanner_final scs) 199 put_info "\nDFA" 200 put_info (infoDFA 1 nm dfa "") 201 put_info "\nMinimized DFA" 202 put_info (infoDFA 1 nm min_dfa "") 203 hPutStr out_h (outputDFA target 1 nm scheme min_dfa "") 204 205 injectCode maybe_footer file out_h 206 207 hPutStr out_h (sc_hdr "") 208 hPutStr out_h (actions "") 209 210 -- add the template 211 let template_name = templateFile template_dir target usespreds cli 212 tmplt <- alexReadFile template_name 213 hPutStr out_h tmplt 214 215 hClose out_h 216 finish_info 217 218getScheme :: [Directive] -> IO Scheme 219getScheme directives = 220 do 221 token <- case [ ty | TokenType ty <- directives ] of 222 [] -> return Nothing 223 [res] -> return (Just res) 224 _ -> dieAlex "multiple %token directives" 225 226 action <- case [ ty | ActionType ty <- directives ] of 227 [] -> return Nothing 228 [res] -> return (Just res) 229 _ -> dieAlex "multiple %action directives" 230 231 typeclass <- case [ tyclass | TypeClass tyclass <- directives ] of 232 [] -> return Nothing 233 [res] -> return (Just res) 234 _ -> dieAlex "multiple %typeclass directives" 235 236 case [ f | WrapperDirective f <- directives ] of 237 [] -> 238 case (typeclass, token, action) of 239 (Nothing, Nothing, Nothing) -> 240 return Default { defaultTypeInfo = Nothing } 241 (Nothing, Nothing, Just actionty) -> 242 return Default { defaultTypeInfo = Just (Nothing, actionty) } 243 (Just _, Nothing, Just actionty) -> 244 return Default { defaultTypeInfo = Just (typeclass, actionty) } 245 (_, Just _, _) -> 246 dieAlex "%token directive only allowed with a wrapper" 247 (Just _, Nothing, Nothing) -> 248 dieAlex "%typeclass directive without %token directive" 249 [single] 250 | single == "gscan" -> 251 case (typeclass, token, action) of 252 (Nothing, Nothing, Nothing) -> 253 return GScan { gscanTypeInfo = Nothing } 254 (Nothing, Just tokenty, Nothing) -> 255 return GScan { gscanTypeInfo = Just (Nothing, tokenty) } 256 (Just _, Just tokenty, Nothing) -> 257 return GScan { gscanTypeInfo = Just (typeclass, tokenty) } 258 (_, _, Just _) -> 259 dieAlex "%action directive not allowed with a wrapper" 260 (Just _, Nothing, Nothing) -> 261 dieAlex "%typeclass directive without %token directive" 262 | single == "basic" || single == "basic-bytestring" || 263 single == "strict-bytestring" -> 264 let 265 strty = case single of 266 "basic" -> Str 267 "basic-bytestring" -> Lazy 268 "strict-bytestring" -> Strict 269 _ -> error "Impossible case" 270 in case (typeclass, token, action) of 271 (Nothing, Nothing, Nothing) -> 272 return Basic { basicStrType = strty, 273 basicTypeInfo = Nothing } 274 (Nothing, Just tokenty, Nothing) -> 275 return Basic { basicStrType = strty, 276 basicTypeInfo = Just (Nothing, tokenty) } 277 (Just _, Just tokenty, Nothing) -> 278 return Basic { basicStrType = strty, 279 basicTypeInfo = Just (typeclass, tokenty) } 280 (_, _, Just _) -> 281 dieAlex "%action directive not allowed with a wrapper" 282 (Just _, Nothing, Nothing) -> 283 dieAlex "%typeclass directive without %token directive" 284 | single == "posn" || single == "posn-bytestring" -> 285 let 286 isByteString = single == "posn-bytestring" 287 in case (typeclass, token, action) of 288 (Nothing, Nothing, Nothing) -> 289 return Posn { posnByteString = isByteString, 290 posnTypeInfo = Nothing } 291 (Nothing, Just tokenty, Nothing) -> 292 return Posn { posnByteString = isByteString, 293 posnTypeInfo = Just (Nothing, tokenty) } 294 (Just _, Just tokenty, Nothing) -> 295 return Posn { posnByteString = isByteString, 296 posnTypeInfo = Just (typeclass, tokenty) } 297 (_, _, Just _) -> 298 dieAlex "%action directive not allowed with a wrapper" 299 (Just _, Nothing, Nothing) -> 300 dieAlex "%typeclass directive without %token directive" 301 | single == "monad" || single == "monad-bytestring" || 302 single == "monadUserState" || 303 single == "monadUserState-bytestring" -> 304 let 305 isByteString = single == "monad-bytestring" || 306 single == "monadUserState-bytestring" 307 userState = single == "monadUserState" || 308 single == "monadUserState-bytestring" 309 in case (typeclass, token, action) of 310 (Nothing, Nothing, Nothing) -> 311 return Monad { monadByteString = isByteString, 312 monadUserState = userState, 313 monadTypeInfo = Nothing } 314 (Nothing, Just tokenty, Nothing) -> 315 return Monad { monadByteString = isByteString, 316 monadUserState = userState, 317 monadTypeInfo = Just (Nothing, tokenty) } 318 (Just _, Just tokenty, Nothing) -> 319 return Monad { monadByteString = isByteString, 320 monadUserState = userState, 321 monadTypeInfo = Just (typeclass, tokenty) } 322 (_, _, Just _) -> 323 dieAlex "%action directive not allowed with a wrapper" 324 (Just _, Nothing, Nothing) -> 325 dieAlex "%typeclass directive without %token directive" 326 | otherwise -> dieAlex ("unknown wrapper type " ++ single) 327 _many -> dieAlex "multiple %wrapper directives" 328 329-- inject some code, and add a {-# LINE #-} pragma at the top 330injectCode :: Maybe (AlexPosn,Code) -> FilePath -> Handle -> IO () 331injectCode Nothing _ _ = return () 332injectCode (Just (AlexPn _ ln _,code)) filename hdl = do 333 hPutStrLn hdl ("{-# LINE " ++ show ln ++ " \"" ++ filename ++ "\" #-}") 334 hPutStrLn hdl code 335 336optsToInject :: Target -> [CLIFlags] -> String 337optsToInject GhcTarget _ = optNoWarnings ++ "{-# LANGUAGE CPP,MagicHash #-}\n" 338optsToInject _ _ = optNoWarnings ++ "{-# LANGUAGE CPP #-}\n" 339 340optNoWarnings :: String 341optNoWarnings = "{-# OPTIONS_GHC -fno-warn-unused-binds -fno-warn-missing-signatures #-}\n" 342 343importsToInject :: Target -> [CLIFlags] -> String 344importsToInject _ cli = always_imports ++ debug_imports ++ glaexts_import 345 where 346 glaexts_import | OptGhcTarget `elem` cli = import_glaexts 347 | otherwise = "" 348 349 debug_imports | OptDebugParser `elem` cli = import_debug 350 | otherwise = "" 351 352-- CPP is turned on for -fglasogw-exts, so we can use conditional 353-- compilation. We need to #include "config.h" to get hold of 354-- WORDS_BIGENDIAN (see GenericTemplate.hs). 355 356always_imports :: String 357always_imports = "#if __GLASGOW_HASKELL__ >= 603\n" ++ 358 "#include \"ghcconfig.h\"\n" ++ 359 "#elif defined(__GLASGOW_HASKELL__)\n" ++ 360 "#include \"config.h\"\n" ++ 361 "#endif\n" ++ 362 "#if __GLASGOW_HASKELL__ >= 503\n" ++ 363 "import Data.Array\n" ++ 364 "#else\n" ++ 365 "import Array\n" ++ 366 "#endif\n" 367 368import_glaexts :: String 369import_glaexts = "#if __GLASGOW_HASKELL__ >= 503\n" ++ 370 "import Data.Array.Base (unsafeAt)\n" ++ 371 "import GHC.Exts\n" ++ 372 "#else\n" ++ 373 "import GlaExts\n" ++ 374 "#endif\n" 375 376import_debug :: String 377import_debug = "#if __GLASGOW_HASKELL__ >= 503\n" ++ 378 "import System.IO\n" ++ 379 "import System.IO.Unsafe\n" ++ 380 "import Debug.Trace\n" ++ 381 "#else\n" ++ 382 "import IO\n" ++ 383 "import IOExts\n" ++ 384 "#endif\n" 385 386templateDir :: IO FilePath -> [CLIFlags] -> IO FilePath 387templateDir def cli 388 = case [ d | OptTemplateDir d <- cli ] of 389 [] -> def 390 ds -> return (last ds) 391 392templateFile :: FilePath -> Target -> UsesPreds -> [CLIFlags] -> FilePath 393templateFile dir target usespreds cli 394 = dir ++ "/AlexTemplate" ++ maybe_ghc ++ maybe_debug ++ maybe_nopred 395 where 396 maybe_ghc = case target of 397 GhcTarget -> "-ghc" 398 _ -> "" 399 400 maybe_debug 401 | OptDebugParser `elem` cli = "-debug" 402 | otherwise = "" 403 404 maybe_nopred = 405 case usespreds of 406 DoesntUsePreds | not (null maybe_ghc) 407 && null maybe_debug -> "-nopred" 408 _ -> "" 409 410wrapperFile :: FilePath -> Scheme -> Maybe FilePath 411wrapperFile dir scheme = 412 do 413 f <- wrapperName scheme 414 return (dir ++ "/AlexWrapper-" ++ f) 415 416infoStart :: FilePath -> FilePath -> IO (String -> IO (), IO ()) 417infoStart x_file info_file = do 418 bracketOnError 419 (alexOpenFile info_file WriteMode) 420 (\h -> do hClose h; removeFile info_file) 421 (\h -> do infoHeader h x_file 422 return (hPutStr h, hClose h) 423 ) 424 425infoHeader :: Handle -> FilePath -> IO () 426infoHeader h file = do 427-- hSetBuffering h NoBuffering 428 hPutStrLn h ("Info file produced by Alex version " ++ projectVersion ++ 429 ", from " ++ file) 430 hPutStrLn h hline 431 hPutStr h "\n" 432 433initialParserEnv :: (Map String CharSet, Map String RExp) 434initialParserEnv = (initSetEnv, initREEnv) 435 436initSetEnv :: Map String CharSet 437initSetEnv = Map.fromList [("white", charSet " \t\n\v\f\r"), 438 ("printable", charSetRange (chr 32) (chr 0x10FFFF)), -- FIXME: Look it up the unicode standard 439 (".", charSetComplement emptyCharSet 440 `charSetMinus` charSetSingleton '\n')] 441 442initREEnv :: Map String RExp 443initREEnv = Map.empty 444 445-- ----------------------------------------------------------------------------- 446-- Command-line flags 447 448data CLIFlags 449 = OptDebugParser 450 | OptGhcTarget 451 | OptOutputFile FilePath 452 | OptInfoFile (Maybe FilePath) 453 | OptTabSize String 454 | OptTemplateDir FilePath 455 | OptLatin1 456 | DumpHelp 457 | DumpVersion 458 deriving Eq 459 460argInfo :: [OptDescr CLIFlags] 461argInfo = [ 462 Option ['o'] ["outfile"] (ReqArg OptOutputFile "FILE") 463 "write the output to FILE (default: file.hs)", 464 Option ['i'] ["info"] (OptArg OptInfoFile "FILE") 465 "put detailed state-machine info in FILE (or file.info)", 466 Option ['t'] ["template"] (ReqArg OptTemplateDir "DIR") 467 "look in DIR for template files", 468 Option ['g'] ["ghc"] (NoArg OptGhcTarget) 469 "use GHC extensions", 470 Option ['l'] ["latin1"] (NoArg OptLatin1) 471 "generated lexer will use the Latin-1 encoding instead of UTF-8", 472 Option ['s'] ["tab-size"] (ReqArg OptTabSize "NUMBER") 473 "set tab size to be used in the generated lexer (default: 8)", 474 Option ['d'] ["debug"] (NoArg OptDebugParser) 475 "produce a debugging scanner", 476 Option ['?'] ["help"] (NoArg DumpHelp) 477 "display this help and exit", 478 Option ['V','v'] ["version"] (NoArg DumpVersion) -- ToDo: -v is deprecated! 479 "output version information and exit" 480 ] 481 482-- ----------------------------------------------------------------------------- 483-- Utils 484 485getProgramName :: IO String 486getProgramName = liftM (`withoutSuffix` ".bin") getProgName 487 where str `withoutSuffix` suff 488 | suff `isSuffixOf` str = take (length str - length suff) str 489 | otherwise = str 490 491bye :: String -> IO a 492bye s = putStr s >> exitWith ExitSuccess 493 494die :: String -> IO a 495die s = hPutStr stderr s >> exitWith (ExitFailure 1) 496 497dieAlex :: String -> IO a 498dieAlex s = getProgramName >>= \prog -> die (prog ++ ": " ++ s) 499 500#if __GLASGOW_HASKELL__ < 610 501bracketOnError 502 :: IO a -- ^ computation to run first (\"acquire resource\") 503 -> (a -> IO b) -- ^ computation to run last (\"release resource\") 504 -> (a -> IO c) -- ^ computation to run in-between 505 -> IO c -- returns the value from the in-between computation 506bracketOnError before after thing = 507 block (do 508 a <- before 509 r <- Exception.catch 510 (unblock (thing a)) 511 (\e -> do { after a; throw e }) 512 return r 513 ) 514#endif 515