1{-# LANGUAGE ViewPatterns #-} 2 3-- | Preprocessors for literate code formats. 4 5module Agda.Syntax.Parser.Literate 6 ( literateProcessors 7 , literateExtsShortList 8 , literateSrcFile 9 , literateTeX 10 , literateRsT 11 , literateMd 12 , literateOrg 13 , illiterate 14 , atomizeLayers 15 , Processor 16 , Layers 17 , Layer(..) 18 , LayerRole(..) 19 , isCode 20 , isCodeLayer 21 ) 22 where 23 24import Prelude hiding (getLine) 25import Control.Monad ((<=<)) 26import Data.Char (isSpace) 27import Data.List (isPrefixOf) 28import Text.Regex.TDFA 29 30import Agda.Syntax.Common 31import Agda.Syntax.Position 32 33import Agda.Utils.List 34import Agda.Utils.Impossible 35 36-- | Role of a character in the file. 37 38data LayerRole = Markup | Comment | Code 39 deriving (Show, Eq) 40 41-- | A sequence of characters in a file playing the same role. 42 43data Layer = Layer 44 { layerRole :: LayerRole 45 , interval :: Interval 46 , layerContent :: String 47 } deriving Show 48 49-- | A list of contiguous layers. 50 51type Layers = [Layer] 52 53instance HasRange Layer where 54 getRange = getRange . interval 55 56-- | Annotates a tokenized string with position information. 57 58mkLayers :: Position -> [(LayerRole, String)] -> Layers 59mkLayers pos [] = emptyLiterate pos 60mkLayers pos ((_,"") : xs) = mkLayers pos xs 61 -- Empty layers are ignored. 62mkLayers pos ((ty,s) : xs) = 63 Layer ty (Interval pos next) s : mkLayers next xs 64 where 65 next = movePosByString pos s 66 67unMkLayers :: Layers -> [(LayerRole, String)] 68unMkLayers = map ((,) <$> layerRole <*> layerContent) 69 70atomizeLayers :: Layers -> [(LayerRole, Char)] 71atomizeLayers = (fmap <$> ((,) . fst) <*> snd) <=< unMkLayers 72 73-- | Type of a literate preprocessor: 74-- Invariants: 75-- 76-- > f : Processor 77-- 78-- prop> f pos s /= [] 79-- 80-- prop> f pos s >>= layerContent == s 81 82type Processor = Position -> String -> [Layer] 83 84literateSrcFile :: [Layer] -> SrcFile 85literateSrcFile [] = __IMPOSSIBLE__ 86literateSrcFile (Layer{interval} : _) = getIntervalFile interval 87 88-- | List of valid extensions for literate Agda files, and their 89-- corresponding preprocessors. 90-- 91-- If you add new extensions, remember to update test/Utils.hs so 92-- that test cases ending in the new extensions are found. 93 94literateProcessors :: [(String, (Processor, FileType))] 95literateProcessors = 96 ((,) <$> (".lagda" ++) . fst <*> snd) <$> 97 [ ("" , (literateTeX, TexFileType)) 98 , (".rst", (literateRsT, RstFileType)) 99 , (".tex", (literateTeX, TexFileType)) 100 , (".md", (literateMd, MdFileType )) 101 , (".org", (literateOrg, OrgFileType)) 102 ] 103 104-- | Returns @True@ if the role corresponds to Agda code. 105 106isCode :: LayerRole -> Bool 107isCode Code = True 108isCode Markup = False 109isCode Comment = False 110 111-- | Returns @True@ if the layer contains Agda code. 112 113isCodeLayer :: Layer -> Bool 114isCodeLayer = isCode . layerRole 115 116-- | Blanks the non-code parts of a given file, preserving positions of 117-- characters corresponding to code. This way, there is a direct 118-- correspondence between source positions and positions in the 119-- processed result. 120 121illiterate :: [Layer] -> String 122illiterate xs = concat 123 [ (if isCode layerRole then id else bleach) layerContent 124 | Layer{layerRole, layerContent} <- xs 125 ] 126 127-- | Replaces non-space characters in a string with spaces. 128 129bleach :: String -> String 130bleach s = map go s 131 where 132 go c | isSpace c = c 133 go _ = ' ' 134 135-- | Check if a character is a blank character. 136 137isBlank :: Char -> Bool 138isBlank = (&&) <$> isSpace <*> (/= '\n') 139 140-- | Short list of extensions for literate Agda files. 141-- For display purposes. 142 143literateExtsShortList :: [String] 144literateExtsShortList = [".lagda"] 145 146-- | Breaks a list just /after/ an element satisfying the predicate is 147-- found. 148-- 149-- >>> break1 even [1,3,5,2,4,7,8] 150-- ([1,3,5,2],[4,7,8]) 151 152break1 :: (a -> Bool) -> [a] -> ([a],[a]) 153break1 _ [] = ([], []) 154break1 p (x:xs) | p x = (x:[],xs) 155break1 p (x:xs) = let (ys,zs) = break1 p xs in (x:ys,zs) 156 157-- | Returns a tuple consisting of the first line of the input, and the rest 158-- of the input. 159 160getLine :: String -> (String, String) 161getLine = break1 (== '\n') 162 163-- | Canonical decomposition of an empty literate file. 164 165emptyLiterate :: Position -> [Layer] 166emptyLiterate pos = [Layer Markup (Interval pos pos) ""] 167 168-- | Create a regular expression that: 169-- - Must match the whole string 170-- - Works across line boundaries 171 172rex :: String -> Regex 173rex s = 174 makeRegexOpts blankCompOpt{newSyntax = True} blankExecOpt $ 175 "\\`" ++ s ++ "\\'" 176 177-- | Preprocessor for literate TeX. 178 179literateTeX :: Position -> String -> [Layer] 180literateTeX pos s = mkLayers pos (tex s) 181 where 182 tex :: String -> [(LayerRole, String)] 183 tex [] = [] 184 tex s = 185 let (line, rest) = getLine s in 186 case r_begin `matchM` line of 187 Just (getAllTextSubmatches -> [_, pre, _, markup, whitespace]) -> 188 (Comment, pre) : (Markup, markup) : 189 (Code, whitespace) : code rest 190 Just _ -> __IMPOSSIBLE__ 191 Nothing -> (Comment, line):tex rest 192 193 r_begin = rex "(([^\\%]|\\\\.)*)(\\\\begin\\{code\\}[^\n]*)(\n)?" 194 195 code :: String -> [(LayerRole, String)] 196 code [] = [] 197 code s = 198 let (line, rest) = getLine s in 199 case r_end `matchM` line of 200 Just (getAllTextSubmatches -> [_, code, markup, post]) -> 201 (Code, code) : (Markup, markup) : (Comment, post) : tex rest 202 Just _ -> __IMPOSSIBLE__ 203 Nothing -> (Code, line) : code rest 204 205 r_end = rex "([[:blank:]]*)(\\\\end\\{code\\})(.*)" 206 207-- | Preprocessor for Markdown. 208 209literateMd :: Position -> String -> [Layer] 210literateMd pos s = mkLayers pos$ md s 211 where 212 md :: String -> [(LayerRole, String)] 213 md [] = [] 214 md s = 215 let (line, rest) = getLine s in 216 case md_begin `matchM` line of 217 Just (getAllTextSubmatches -> [_, pre, markup, _]) -> 218 (Comment, pre) : (Markup, markup) : code rest 219 Just _ -> __IMPOSSIBLE__ 220 Nothing -> 221 (Comment, line) : 222 if md_begin_other `match` line 223 then code_other rest 224 else md rest 225 226 md_begin = rex "(.*)([[:space:]]*```(agda)?[[:space:]]*)" 227 md_begin_other = rex "[[:space:]]*```[a-zA-Z0-9-]*[[:space:]]*" 228 229 code :: String -> [(LayerRole, String)] 230 code [] = [] 231 code s = 232 let (line, rest) = getLine s in 233 case md_end `matchM` line of 234 Just (getAllTextSubmatches -> [_, markup]) -> 235 (Markup, markup) : md rest 236 Just _ -> __IMPOSSIBLE__ 237 Nothing -> (Code, line) : code rest 238 239 -- A non-Agda code block. 240 code_other :: String -> [(LayerRole, String)] 241 code_other [] = [] 242 code_other s = 243 let (line, rest) = getLine s in 244 (Comment, line) : 245 if md_end `match` line 246 then md rest 247 else code_other rest 248 249 md_end = rex "([[:space:]]*```[[:space:]]*)" 250 251-- | Preprocessor for reStructuredText. 252 253literateRsT :: Position -> String -> [Layer] 254literateRsT pos s = mkLayers pos$ rst s 255 where 256 rst :: String -> [(LayerRole, String)] 257 rst [] = [] 258 rst s = maybe_code s 259 260 maybe_code s = 261 if r_comment `match` line then 262 not_code 263 else case r_code `match` line of 264 [] -> not_code 265 [[_, before, "::", after]] -> 266 -- Code starts 267 if maybe True isBlank $ lastMaybe before then 268 (Markup, line) : code rest 269 else 270 (Comment, before ++ ":") : (Markup, ":" ++ after) : code rest 271 _ -> __IMPOSSIBLE__ 272 where 273 (line, rest) = getLine s 274 not_code = (Comment, line) : rst rest 275 276 -- Finds the next indented block in the input. 277 code :: String -> [(LayerRole, String)] 278 code [] = [] 279 code s = 280 let (line, rest) = getLine s in 281 if all isSpace line then 282 (Markup, line) : code rest 283 else 284 let xs = takeWhile isBlank line in 285 if null xs 286 then maybe_code s 287 else (Code, line) : indented xs rest 288 289 -- Process an indented block. 290 indented :: String -> String -> [(LayerRole, String)] 291 indented _ [] = [] 292 indented ind s = 293 let (line, rest) = getLine s 294 in if all isSpace line || (ind `isPrefixOf` line) 295 then (Code, line) : indented ind rest 296 else maybe_code s 297 298 -- Beginning of a code block. 299 r_code = rex "(.*)(::)([[:space:]]*)" 300 301 -- Beginning of a comment block. 302 r_comment = rex "[[:space:]]*\\.\\.([[:space:]].*)?" 303 304-- | Preprocessor for Org mode documents. 305 306literateOrg :: Position -> String -> [Layer] 307literateOrg pos s = mkLayers pos$ org s 308 where 309 org :: String -> [(LayerRole, String)] 310 org [] = [] 311 org s = 312 let (line, rest) = getLine s in 313 if org_begin `match` line then 314 (Markup, line) : code rest 315 else 316 (Comment, line) : org rest 317 318 -- Valid: #+begin_src agda2 :tangle yes 319 -- Valid: #+begin_src agda2 320 -- Invalid: #+begin_src adga2-foo 321 org_begin = rex' "\\`(.*)([[:space:]]*\\#\\+begin_src agda2[[:space:]]+)" 322 323 code :: String -> [(LayerRole, String)] 324 code [] = [] 325 code s = 326 let (line, rest) = getLine s in 327 if org_end `match` line then 328 (Markup, line) : org rest 329 else 330 (Code, line) : code rest 331 332 org_end = rex' "\\`([[:space:]]*\\#\\+end_src[[:space:]]*)(.*)" 333 334 -- Explicit type annotation required to disambiguate source. 335 rex' :: String -> Regex 336 -- Source blocks start with `#+begin_src` but the casing does not matter. 337 rex' = makeRegexOpts blankCompOpt{newSyntax = True, caseSensitive = False} blankExecOpt 338