1{-# LANGUAGE CPP #-}
2-- -----------------------------------------------------------------------------
3--
4-- Main.hs, part of Alex
5--
6-- (c) Chris Dornan 1995-2000, Simon Marlow 2003
7--
8-- ----------------------------------------------------------------------------}
9
10module Main (main) where
11
12import AbsSyn
13import CharSet
14import DFA
15import DFAMin
16import NFA
17import Info
18import Map ( Map )
19import qualified Map hiding ( Map )
20import Output
21import ParseMonad ( runP )
22import Parser
23import Scan
24import Util ( hline )
25import Paths_alex ( version, getDataDir )
26
27#if __GLASGOW_HASKELL__ < 610
28import Control.Exception as Exception ( block, unblock, catch, throw )
29#endif
30#if __GLASGOW_HASKELL__ >= 610
31import Control.Exception ( bracketOnError )
32#endif
33import Control.Monad ( when, liftM )
34import Data.Char ( chr )
35import Data.List ( isSuffixOf, nub )
36import Data.Maybe ( isJust, fromJust )
37import Data.Version ( showVersion )
38import System.Console.GetOpt ( getOpt, usageInfo, ArgOrder(..), OptDescr(..), ArgDescr(..) )
39import System.Directory ( removeFile )
40import System.Environment ( getProgName, getArgs )
41import System.Exit ( ExitCode(..), exitWith )
42import System.IO ( stderr, Handle, IOMode(..), openFile, hClose, hPutStr, hPutStrLn )
43#if __GLASGOW_HASKELL__ >= 612
44import System.IO ( hGetContents, hSetEncoding, utf8 )
45#endif
46
47-- We need to force every file we open to be read in
48-- as UTF8
49alexReadFile :: FilePath -> IO String
50#if __GLASGOW_HASKELL__ >= 612
51alexReadFile file = do
52  h <- alexOpenFile file ReadMode
53  hGetContents h
54#else
55alexReadFile = readFile
56#endif
57
58-- We need to force every file we write to be written
59-- to as UTF8
60alexOpenFile :: FilePath -> IOMode -> IO Handle
61#if __GLASGOW_HASKELL__ >= 612
62alexOpenFile file mode = do
63  h <- openFile file mode
64  hSetEncoding h utf8
65  return h
66#else
67alexOpenFile = openFile
68#endif
69
70-- `main' decodes the command line arguments and calls `alex'.
71
72main:: IO ()
73main =  do
74 args <- getArgs
75 case getOpt Permute argInfo args of
76    (cli,_,[]) | DumpHelp `elem` cli -> do
77        prog <- getProgramName
78        bye (usageInfo (usageHeader prog) argInfo)
79    (cli,_,[]) | DumpVersion `elem` cli ->
80        bye copyright
81    (cli,[file],[]) ->
82        runAlex cli file
83    (_,_,errors) -> do
84        prog <- getProgramName
85        die (concat errors ++ usageInfo (usageHeader prog) argInfo)
86
87projectVersion :: String
88projectVersion = showVersion version
89
90copyright :: String
91copyright = "Alex version " ++ projectVersion ++ ", (c) 2003 Chris Dornan and Simon Marlow\n"
92
93usageHeader :: String -> String
94usageHeader prog = "Usage: " ++ prog ++ " [OPTION...] file\n"
95
96runAlex :: [CLIFlags] -> FilePath -> IO ()
97runAlex cli file = do
98  basename <- case (reverse file) of
99                'x':'.':r -> return (reverse r)
100                _         -> die (file ++ ": filename must end in \'.x\'\n")
101
102  prg <- alexReadFile file
103  script <- parseScript file prg
104  alex cli file basename script
105
106parseScript :: FilePath -> String
107  -> IO (Maybe (AlexPosn,Code), [Directive], Scanner, Maybe (AlexPosn,Code))
108parseScript file prg =
109  case runP prg initialParserEnv parse of
110        Left (Just (AlexPn _ line col),err) ->
111                die (file ++ ":" ++ show line ++ ":" ++ show col
112                                 ++ ": " ++ err ++ "\n")
113        Left (Nothing, err) ->
114                die (file ++ ": " ++ err ++ "\n")
115
116        Right script -> return script
117
118alex :: [CLIFlags] -> FilePath -> FilePath
119     -> (Maybe (AlexPosn, Code), [Directive], Scanner, Maybe (AlexPosn, Code))
120     -> IO ()
121alex cli file basename script = do
122   (put_info, finish_info) <-
123      case [ f | OptInfoFile f <- cli ] of
124           []  -> return (\_ -> return (), return ())
125           [Nothing] -> infoStart file (basename ++ ".info")
126           [Just f]  -> infoStart file f
127           _   -> dieAlex "multiple -i/--info options"
128
129   o_file <- case [ f | OptOutputFile f <- cli ] of
130                []  -> return (basename ++ ".hs")
131                [f] -> return f
132                _   -> dieAlex "multiple -o/--outfile options"
133
134   tab_size <- case [ s | OptTabSize s <- cli ] of
135                []  -> return (8 :: Int)
136                [s] -> case reads s of
137                        [(n,"")] -> return n
138                        _        -> dieAlex "-s/--tab-size option is not a valid integer"
139                _   -> dieAlex "multiple -s/--tab-size options"
140
141   let target
142        | OptGhcTarget `elem` cli = GhcTarget
143        | otherwise               = HaskellTarget
144
145   let encodingsCli
146        | OptLatin1 `elem` cli = [Latin1]
147        | otherwise            = []
148
149   template_dir  <- templateDir getDataDir cli
150
151   let (maybe_header, directives, scanner1, maybe_footer) = script
152
153   scheme <- getScheme directives
154
155   -- open the output file; remove it if we encounter an error
156   bracketOnError
157        (alexOpenFile o_file WriteMode)
158        (\h -> do hClose h; removeFile o_file)
159        $ \out_h -> do
160
161   let
162         wrapper_name = wrapperFile template_dir scheme
163         (scanner2, scs, sc_hdr) = encodeStartCodes scanner1
164         (scanner_final, actions) = extractActions scheme scanner2
165         encodingsScript = [ e | EncodingDirective e <- directives ]
166
167   encoding <- case nub (encodingsCli ++ encodingsScript) of
168     []  -> return UTF8 -- default
169     [e] -> return e
170     _ | null encodingsCli -> dieAlex "conflicting %encoding directives"
171       | otherwise -> dieAlex "--latin1 flag conflicts with %encoding directive"
172
173   hPutStr out_h (optsToInject target cli)
174   injectCode maybe_header file out_h
175
176   hPutStr out_h (importsToInject target cli)
177
178   -- add the wrapper, if necessary
179   when (isJust wrapper_name) $
180        do str <- alexReadFile (fromJust wrapper_name)
181           hPutStr out_h str
182
183   -- Inject the tab size
184   hPutStrLn out_h $ "alex_tab_size :: Int"
185   hPutStrLn out_h $ "alex_tab_size = " ++ show (tab_size :: Int)
186
187   let dfa = scanner2dfa encoding scanner_final scs
188       min_dfa = minimizeDFA dfa
189       nm  = scannerName scanner_final
190       usespreds = usesPreds min_dfa
191
192
193   put_info "\nStart codes\n"
194   put_info (show $ scs)
195   put_info "\nScanner\n"
196   put_info (show $ scanner_final)
197   put_info "\nNFA\n"
198   put_info (show $ scanner2nfa encoding scanner_final scs)
199   put_info "\nDFA"
200   put_info (infoDFA 1 nm dfa "")
201   put_info "\nMinimized DFA"
202   put_info (infoDFA 1 nm min_dfa "")
203   hPutStr out_h (outputDFA target 1 nm scheme min_dfa "")
204
205   injectCode maybe_footer file out_h
206
207   hPutStr out_h (sc_hdr "")
208   hPutStr out_h (actions "")
209
210   -- add the template
211   let template_name = templateFile template_dir target usespreds cli
212   tmplt <- alexReadFile template_name
213   hPutStr out_h tmplt
214
215   hClose out_h
216   finish_info
217
218getScheme :: [Directive] -> IO Scheme
219getScheme directives =
220  do
221    token <- case [ ty | TokenType ty <- directives ] of
222      [] -> return Nothing
223      [res] -> return (Just res)
224      _ -> dieAlex "multiple %token directives"
225
226    action <- case [ ty | ActionType ty <- directives ] of
227      [] -> return Nothing
228      [res] -> return (Just res)
229      _ -> dieAlex "multiple %action directives"
230
231    typeclass <- case [ tyclass | TypeClass tyclass <- directives ] of
232      [] -> return Nothing
233      [res] -> return (Just res)
234      _ -> dieAlex "multiple %typeclass directives"
235
236    case [ f | WrapperDirective f <- directives ] of
237        []  ->
238          case (typeclass, token, action) of
239            (Nothing, Nothing, Nothing) ->
240              return Default { defaultTypeInfo = Nothing }
241            (Nothing, Nothing, Just actionty) ->
242              return Default { defaultTypeInfo = Just (Nothing, actionty) }
243            (Just _, Nothing, Just actionty) ->
244              return Default { defaultTypeInfo = Just (typeclass, actionty) }
245            (_, Just _, _) ->
246              dieAlex "%token directive only allowed with a wrapper"
247            (Just _, Nothing, Nothing) ->
248              dieAlex "%typeclass directive without %token directive"
249        [single]
250          | single == "gscan" ->
251            case (typeclass, token, action) of
252              (Nothing, Nothing, Nothing) ->
253                return GScan { gscanTypeInfo = Nothing }
254              (Nothing, Just tokenty, Nothing) ->
255                return GScan { gscanTypeInfo = Just (Nothing, tokenty) }
256              (Just _, Just tokenty, Nothing) ->
257                return GScan { gscanTypeInfo = Just (typeclass, tokenty) }
258              (_, _, Just _) ->
259                dieAlex "%action directive not allowed with a wrapper"
260              (Just _, Nothing, Nothing) ->
261                dieAlex "%typeclass directive without %token directive"
262          | single == "basic" || single == "basic-bytestring" ||
263            single == "strict-bytestring" ->
264            let
265              strty = case single of
266                "basic" -> Str
267                "basic-bytestring" -> Lazy
268                "strict-bytestring" -> Strict
269                _ -> error "Impossible case"
270            in case (typeclass, token, action) of
271              (Nothing, Nothing, Nothing) ->
272                return Basic { basicStrType = strty,
273                               basicTypeInfo = Nothing }
274              (Nothing, Just tokenty, Nothing) ->
275                return Basic { basicStrType = strty,
276                               basicTypeInfo = Just (Nothing, tokenty) }
277              (Just _, Just tokenty, Nothing) ->
278                return Basic { basicStrType = strty,
279                               basicTypeInfo = Just (typeclass, tokenty) }
280              (_, _, Just _) ->
281                dieAlex "%action directive not allowed with a wrapper"
282              (Just _, Nothing, Nothing) ->
283                dieAlex "%typeclass directive without %token directive"
284          | single == "posn" || single == "posn-bytestring" ->
285            let
286              isByteString = single == "posn-bytestring"
287            in case (typeclass, token, action) of
288              (Nothing, Nothing, Nothing) ->
289                return Posn { posnByteString = isByteString,
290                              posnTypeInfo = Nothing }
291              (Nothing, Just tokenty, Nothing) ->
292                return Posn { posnByteString = isByteString,
293                              posnTypeInfo = Just (Nothing, tokenty) }
294              (Just _, Just tokenty, Nothing) ->
295                return Posn { posnByteString = isByteString,
296                              posnTypeInfo = Just (typeclass, tokenty) }
297              (_, _, Just _) ->
298                  dieAlex "%action directive not allowed with a wrapper"
299              (Just _, Nothing, Nothing) ->
300                dieAlex "%typeclass directive without %token directive"
301          | single == "monad" || single == "monad-bytestring" ||
302            single == "monadUserState" ||
303            single == "monadUserState-bytestring" ->
304            let
305              isByteString = single == "monad-bytestring" ||
306                             single == "monadUserState-bytestring"
307              userState = single == "monadUserState" ||
308                          single == "monadUserState-bytestring"
309            in case (typeclass, token, action) of
310              (Nothing, Nothing, Nothing) ->
311                return Monad { monadByteString = isByteString,
312                               monadUserState = userState,
313                               monadTypeInfo = Nothing }
314              (Nothing, Just tokenty, Nothing) ->
315                return Monad { monadByteString = isByteString,
316                               monadUserState = userState,
317                               monadTypeInfo = Just (Nothing, tokenty) }
318              (Just _, Just tokenty, Nothing) ->
319                return Monad { monadByteString = isByteString,
320                               monadUserState = userState,
321                               monadTypeInfo = Just (typeclass, tokenty) }
322              (_, _, Just _) ->
323                  dieAlex "%action directive not allowed with a wrapper"
324              (Just _, Nothing, Nothing) ->
325                dieAlex "%typeclass directive without %token directive"
326          | otherwise -> dieAlex ("unknown wrapper type " ++ single)
327        _many -> dieAlex "multiple %wrapper directives"
328
329-- inject some code, and add a {-# LINE #-} pragma at the top
330injectCode :: Maybe (AlexPosn,Code) -> FilePath -> Handle -> IO ()
331injectCode Nothing _ _ = return ()
332injectCode (Just (AlexPn _ ln _,code)) filename hdl = do
333  hPutStrLn hdl ("{-# LINE " ++ show ln ++ " \"" ++ filename ++ "\" #-}")
334  hPutStrLn hdl code
335
336optsToInject :: Target -> [CLIFlags] -> String
337optsToInject GhcTarget _ = optNoWarnings ++ "{-# LANGUAGE CPP,MagicHash #-}\n"
338optsToInject _         _ = optNoWarnings ++ "{-# LANGUAGE CPP #-}\n"
339
340optNoWarnings :: String
341optNoWarnings = "{-# OPTIONS_GHC -fno-warn-unused-binds -fno-warn-missing-signatures #-}\n"
342
343importsToInject :: Target -> [CLIFlags] -> String
344importsToInject _ cli = always_imports ++ debug_imports ++ glaexts_import
345  where
346        glaexts_import | OptGhcTarget `elem` cli    = import_glaexts
347                       | otherwise                  = ""
348
349        debug_imports  | OptDebugParser `elem` cli = import_debug
350                       | otherwise                 = ""
351
352-- CPP is turned on for -fglasogw-exts, so we can use conditional
353-- compilation.  We need to #include "config.h" to get hold of
354-- WORDS_BIGENDIAN (see GenericTemplate.hs).
355
356always_imports :: String
357always_imports = "#if __GLASGOW_HASKELL__ >= 603\n" ++
358                 "#include \"ghcconfig.h\"\n" ++
359                 "#elif defined(__GLASGOW_HASKELL__)\n" ++
360                 "#include \"config.h\"\n" ++
361                 "#endif\n" ++
362                 "#if __GLASGOW_HASKELL__ >= 503\n" ++
363                 "import Data.Array\n" ++
364                 "#else\n" ++
365                 "import Array\n" ++
366                 "#endif\n"
367
368import_glaexts :: String
369import_glaexts = "#if __GLASGOW_HASKELL__ >= 503\n" ++
370                 "import Data.Array.Base (unsafeAt)\n" ++
371                 "import GHC.Exts\n" ++
372                 "#else\n" ++
373                 "import GlaExts\n" ++
374                 "#endif\n"
375
376import_debug :: String
377import_debug   = "#if __GLASGOW_HASKELL__ >= 503\n" ++
378                 "import System.IO\n" ++
379                 "import System.IO.Unsafe\n" ++
380                 "import Debug.Trace\n" ++
381                 "#else\n" ++
382                 "import IO\n" ++
383                 "import IOExts\n" ++
384                 "#endif\n"
385
386templateDir :: IO FilePath -> [CLIFlags] -> IO FilePath
387templateDir def cli
388  = case [ d | OptTemplateDir d <- cli ] of
389      [] -> def
390      ds -> return (last ds)
391
392templateFile :: FilePath -> Target -> UsesPreds -> [CLIFlags] -> FilePath
393templateFile dir target usespreds cli
394  = dir ++ "/AlexTemplate" ++ maybe_ghc ++ maybe_debug ++ maybe_nopred
395  where
396        maybe_ghc = case target of
397                      GhcTarget -> "-ghc"
398                      _         -> ""
399
400        maybe_debug
401          | OptDebugParser `elem` cli  = "-debug"
402          | otherwise                  = ""
403
404        maybe_nopred =
405          case usespreds of
406            DoesntUsePreds | not (null maybe_ghc)
407                          && null maybe_debug -> "-nopred"
408            _                                 -> ""
409
410wrapperFile :: FilePath -> Scheme -> Maybe FilePath
411wrapperFile dir scheme =
412  do
413    f <- wrapperName scheme
414    return (dir ++ "/AlexWrapper-" ++ f)
415
416infoStart :: FilePath -> FilePath -> IO (String -> IO (), IO ())
417infoStart x_file info_file = do
418  bracketOnError
419        (alexOpenFile info_file WriteMode)
420        (\h -> do hClose h; removeFile info_file)
421        (\h -> do infoHeader h x_file
422                  return (hPutStr h, hClose h)
423        )
424
425infoHeader :: Handle -> FilePath -> IO ()
426infoHeader h file = do
427--  hSetBuffering h NoBuffering
428  hPutStrLn h ("Info file produced by Alex version " ++ projectVersion ++
429                ", from " ++ file)
430  hPutStrLn h hline
431  hPutStr h "\n"
432
433initialParserEnv :: (Map String CharSet, Map String RExp)
434initialParserEnv = (initSetEnv, initREEnv)
435
436initSetEnv :: Map String CharSet
437initSetEnv = Map.fromList [("white", charSet " \t\n\v\f\r"),
438                           ("printable", charSetRange (chr 32) (chr 0x10FFFF)), -- FIXME: Look it up the unicode standard
439                           (".", charSetComplement emptyCharSet
440                            `charSetMinus` charSetSingleton '\n')]
441
442initREEnv :: Map String RExp
443initREEnv = Map.empty
444
445-- -----------------------------------------------------------------------------
446-- Command-line flags
447
448data CLIFlags
449  = OptDebugParser
450  | OptGhcTarget
451  | OptOutputFile FilePath
452  | OptInfoFile (Maybe FilePath)
453  | OptTabSize String
454  | OptTemplateDir FilePath
455  | OptLatin1
456  | DumpHelp
457  | DumpVersion
458  deriving Eq
459
460argInfo :: [OptDescr CLIFlags]
461argInfo  = [
462   Option ['o'] ["outfile"] (ReqArg OptOutputFile "FILE")
463        "write the output to FILE (default: file.hs)",
464   Option ['i'] ["info"] (OptArg OptInfoFile "FILE")
465        "put detailed state-machine info in FILE (or file.info)",
466   Option ['t'] ["template"] (ReqArg OptTemplateDir "DIR")
467        "look in DIR for template files",
468   Option ['g'] ["ghc"]    (NoArg OptGhcTarget)
469        "use GHC extensions",
470   Option ['l'] ["latin1"]    (NoArg OptLatin1)
471        "generated lexer will use the Latin-1 encoding instead of UTF-8",
472   Option ['s'] ["tab-size"] (ReqArg OptTabSize "NUMBER")
473        "set tab size to be used in the generated lexer (default: 8)",
474   Option ['d'] ["debug"] (NoArg OptDebugParser)
475        "produce a debugging scanner",
476   Option ['?'] ["help"] (NoArg DumpHelp)
477        "display this help and exit",
478   Option ['V','v'] ["version"] (NoArg DumpVersion)  -- ToDo: -v is deprecated!
479        "output version information and exit"
480  ]
481
482-- -----------------------------------------------------------------------------
483-- Utils
484
485getProgramName :: IO String
486getProgramName = liftM (`withoutSuffix` ".bin") getProgName
487   where str `withoutSuffix` suff
488            | suff `isSuffixOf` str = take (length str - length suff) str
489            | otherwise             = str
490
491bye :: String -> IO a
492bye s = putStr s >> exitWith ExitSuccess
493
494die :: String -> IO a
495die s = hPutStr stderr s >> exitWith (ExitFailure 1)
496
497dieAlex :: String -> IO a
498dieAlex s = getProgramName >>= \prog -> die (prog ++ ": " ++ s)
499
500#if __GLASGOW_HASKELL__ < 610
501bracketOnError
502        :: IO a         -- ^ computation to run first (\"acquire resource\")
503        -> (a -> IO b)  -- ^ computation to run last (\"release resource\")
504        -> (a -> IO c)  -- ^ computation to run in-between
505        -> IO c         -- returns the value from the in-between computation
506bracketOnError before after thing =
507  block (do
508    a <- before
509    r <- Exception.catch
510           (unblock (thing a))
511           (\e -> do { after a; throw e })
512    return r
513 )
514#endif
515