1----------------------------------------------------------------------------- 2-- | 3-- Module : Distribution.Simple.PreProcess.Unlit 4-- Copyright : ... 5-- 6-- Maintainer : cabal-devel@haskell.org 7-- Portability : portable 8-- 9-- Remove the \"literal\" markups from a Haskell source file, including 10-- \"@>@\", \"@\\begin{code}@\", \"@\\end{code}@\", and \"@#@\" 11 12-- This version is interesting because instead of striping comment lines, it 13-- turns them into "-- " style comments. This allows using haddock markup 14-- in literate scripts without having to use "> --" prefix. 15 16module Distribution.Simple.PreProcess.Unlit (unlit,plain) where 17 18import Prelude () 19import Distribution.Compat.Prelude 20import Distribution.Utils.Generic (safeTail, safeLast, safeInit) 21 22import Data.List (mapAccumL) 23 24data Classified = BirdTrack String | Blank String | Ordinary String 25 | Line !Int String | CPP String 26 | BeginCode | EndCode 27 -- output only: 28 | Error String | Comment String 29 30-- | No unliteration. 31plain :: String -> String -> String 32plain _ hs = hs 33 34classify :: String -> Classified 35classify ('>':s) = BirdTrack s 36classify ('#':s) = case tokens s of 37 (line:file@('"':_:_):_) | all isDigit line 38 && safeLast file == Just '"' 39 -- this shouldn't fail as we tested for 'all isDigit' 40 -> Line (fromMaybe (error $ "panic! read @Int " ++ show line) $ readMaybe line) (safeTail (safeInit file)) -- TODO:eradicateNoParse 41 _ -> CPP s 42 where tokens = unfoldr $ \str -> case lex str of 43 (t@(_:_), str'):_ -> Just (t, str') 44 _ -> Nothing 45classify ('\\':s) 46 | "begin{code}" `isPrefixOf` s = BeginCode 47 | "end{code}" `isPrefixOf` s = EndCode 48classify s | all isSpace s = Blank s 49classify s = Ordinary s 50 51-- So the weird exception for comment indenting is to make things work with 52-- haddock, see classifyAndCheckForBirdTracks below. 53unclassify :: Bool -> Classified -> String 54unclassify _ (BirdTrack s) = ' ':s 55unclassify _ (Blank s) = s 56unclassify _ (Ordinary s) = s 57unclassify _ (Line n file) = "# " ++ show n ++ " " ++ show file 58unclassify _ (CPP s) = '#':s 59unclassify True (Comment "") = " --" 60unclassify True (Comment s) = " -- " ++ s 61unclassify False (Comment "") = "--" 62unclassify False (Comment s) = "-- " ++ s 63unclassify _ _ = internalError 64 65-- | 'unlit' takes a filename (for error reports), and transforms the 66-- given string, to eliminate the literate comments from the program text. 67unlit :: FilePath -> String -> Either String String 68unlit file input = 69 let (usesBirdTracks, classified) = classifyAndCheckForBirdTracks 70 . inlines 71 $ input 72 in either (Left . unlines . map (unclassify usesBirdTracks)) 73 Right 74 . checkErrors 75 . reclassify 76 $ classified 77 78 where 79 -- So haddock requires comments and code to align, since it treats comments 80 -- as following the layout rule. This is a pain for us since bird track 81 -- style literate code typically gets indented by two since ">" is replaced 82 -- by " " and people usually use one additional space of indent ie 83 -- "> then the code". On the other hand we cannot just go and indent all 84 -- the comments by two since that does not work for latex style literate 85 -- code. So the hacky solution we use here is that if we see any bird track 86 -- style code then we'll indent all comments by two, otherwise by none. 87 -- Of course this will not work for mixed latex/bird track .lhs files but 88 -- nobody does that, it's silly and specifically recommended against in the 89 -- H98 unlit spec. 90 -- 91 classifyAndCheckForBirdTracks = 92 flip mapAccumL False $ \seenBirdTrack line -> 93 let classification = classify line 94 in (seenBirdTrack || isBirdTrack classification, classification) 95 96 isBirdTrack (BirdTrack _) = True 97 isBirdTrack _ = False 98 99 checkErrors ls = case [ e | Error e <- ls ] of 100 [] -> Left ls 101 (message:_) -> Right (f ++ ":" ++ show n ++ ": " ++ message) 102 where (f, n) = errorPos file 1 ls 103 errorPos f n [] = (f, n) 104 errorPos f n (Error _:_) = (f, n) 105 errorPos _ _ (Line n' f':ls) = errorPos f' n' ls 106 errorPos f n (_ :ls) = errorPos f (n+1) ls 107 108-- Here we model a state machine, with each state represented by 109-- a local function. We only have four states (well, five, 110-- if you count the error state), but the rules 111-- to transition between then are not so simple. 112-- Would it be simpler to have more states? 113-- 114-- Each state represents the type of line that was last read 115-- i.e. are we in a comment section, or a latex-code section, 116-- or a bird-code section, etc? 117reclassify :: [Classified] -> [Classified] 118reclassify = blank -- begin in blank state 119 where 120 latex [] = [] 121 latex (EndCode :ls) = Blank "" : comment ls 122 latex (BeginCode :_ ) = [Error "\\begin{code} in code section"] 123 latex (BirdTrack l:ls) = Ordinary ('>':l) : latex ls 124 latex ( l:ls) = l : latex ls 125 126 blank [] = [] 127 blank (EndCode :_ ) = [Error "\\end{code} without \\begin{code}"] 128 blank (BeginCode :ls) = Blank "" : latex ls 129 blank (BirdTrack l:ls) = BirdTrack l : bird ls 130 blank (Ordinary l:ls) = Comment l : comment ls 131 blank ( l:ls) = l : blank ls 132 133 bird [] = [] 134 bird (EndCode :_ ) = [Error "\\end{code} without \\begin{code}"] 135 bird (BeginCode :ls) = Blank "" : latex ls 136 bird (Blank l :ls) = Blank l : blank ls 137 bird (Ordinary _:_ ) = [Error "program line before comment line"] 138 bird ( l:ls) = l : bird ls 139 140 comment [] = [] 141 comment (EndCode :_ ) = [Error "\\end{code} without \\begin{code}"] 142 comment (BeginCode :ls) = Blank "" : latex ls 143 comment (CPP l :ls) = CPP l : comment ls 144 comment (BirdTrack _:_ ) = [Error "comment line before program line"] 145 -- a blank line and another ordinary line following a comment 146 -- will be treated as continuing the comment. Otherwise it's 147 -- then end of the comment, with a blank line. 148 comment (Blank l:ls@(Ordinary _:_)) = Comment l : comment ls 149 comment (Blank l:ls) = Blank l : blank ls 150 comment (Line n f :ls) = Line n f : comment ls 151 comment (Ordinary l:ls) = Comment l : comment ls 152 comment (Comment _: _) = internalError 153 comment (Error _: _) = internalError 154 155-- Re-implementation of 'lines', for better efficiency (but decreased laziness). 156-- Also, importantly, accepts non-standard DOS and Mac line ending characters. 157inlines :: String -> [String] 158inlines xs = lines' xs id 159 where 160 lines' [] acc = [acc []] 161 lines' ('\^M':'\n':s) acc = acc [] : lines' s id -- DOS 162 lines' ('\^M':s) acc = acc [] : lines' s id -- MacOS 163 lines' ('\n':s) acc = acc [] : lines' s id -- Unix 164 lines' (c:s) acc = lines' s (acc . (c:)) 165 166internalError :: a 167internalError = error "unlit: internal error" 168