1{-# LANGUAGE ViewPatterns #-}
2
3-- | Preprocessors for literate code formats.
4
5module Agda.Syntax.Parser.Literate
6  ( literateProcessors
7  , literateExtsShortList
8  , literateSrcFile
9  , literateTeX
10  , literateRsT
11  , literateMd
12  , literateOrg
13  , illiterate
14  , atomizeLayers
15  , Processor
16  , Layers
17  , Layer(..)
18  , LayerRole(..)
19  , isCode
20  , isCodeLayer
21  )
22  where
23
24import Prelude hiding (getLine)
25import Control.Monad ((<=<))
26import Data.Char (isSpace)
27import Data.List (isPrefixOf)
28import Text.Regex.TDFA
29
30import Agda.Syntax.Common
31import Agda.Syntax.Position
32
33import Agda.Utils.List
34import Agda.Utils.Impossible
35
36-- | Role of a character in the file.
37
38data LayerRole = Markup | Comment | Code
39  deriving (Show, Eq)
40
41-- | A sequence of characters in a file playing the same role.
42
43data Layer = Layer
44  { layerRole    :: LayerRole
45  , interval     :: Interval
46  , layerContent :: String
47  } deriving Show
48
49-- | A list of contiguous layers.
50
51type Layers = [Layer]
52
53instance HasRange Layer where
54  getRange = getRange . interval
55
56-- | Annotates a tokenized string with position information.
57
58mkLayers :: Position -> [(LayerRole, String)] -> Layers
59mkLayers pos []            = emptyLiterate pos
60mkLayers pos ((_,"") : xs) = mkLayers pos xs
61                             -- Empty layers are ignored.
62mkLayers pos ((ty,s) : xs) =
63  Layer ty (Interval pos next) s : mkLayers next xs
64  where
65  next = movePosByString pos s
66
67unMkLayers :: Layers -> [(LayerRole, String)]
68unMkLayers = map ((,) <$> layerRole <*> layerContent)
69
70atomizeLayers :: Layers -> [(LayerRole, Char)]
71atomizeLayers = (fmap <$> ((,) . fst) <*> snd) <=< unMkLayers
72
73-- | Type of a literate preprocessor:
74--   Invariants:
75--
76--   > f : Processor
77--
78--   prop> f pos s /= []
79--
80--   prop> f pos s >>= layerContent == s
81
82type Processor = Position -> String -> [Layer]
83
84literateSrcFile :: [Layer] -> SrcFile
85literateSrcFile []                    = __IMPOSSIBLE__
86literateSrcFile (Layer{interval} : _) = getIntervalFile interval
87
88-- | List of valid extensions for literate Agda files, and their
89--   corresponding preprocessors.
90--
91--   If you add new extensions, remember to update test/Utils.hs so
92--   that test cases ending in the new extensions are found.
93
94literateProcessors :: [(String, (Processor, FileType))]
95literateProcessors =
96  ((,) <$> (".lagda" ++) . fst <*> snd) <$>
97    [ (""    , (literateTeX, TexFileType))
98    , (".rst", (literateRsT, RstFileType))
99    , (".tex", (literateTeX, TexFileType))
100    , (".md",  (literateMd,  MdFileType ))
101    , (".org", (literateOrg, OrgFileType))
102    ]
103
104-- | Returns @True@ if the role corresponds to Agda code.
105
106isCode :: LayerRole -> Bool
107isCode Code    = True
108isCode Markup  = False
109isCode Comment = False
110
111-- | Returns @True@ if the layer contains Agda code.
112
113isCodeLayer :: Layer -> Bool
114isCodeLayer = isCode . layerRole
115
116-- | Blanks the non-code parts of a given file, preserving positions of
117--   characters corresponding to code. This way, there is a direct
118--   correspondence between source positions and positions in the
119--   processed result.
120
121illiterate :: [Layer] -> String
122illiterate xs = concat
123  [ (if isCode layerRole then id else bleach) layerContent
124  | Layer{layerRole, layerContent} <- xs
125  ]
126
127-- | Replaces non-space characters in a string with spaces.
128
129bleach :: String -> String
130bleach s = map go s
131  where
132  go c | isSpace c = c
133  go _             = ' '
134
135-- | Check if a character is a blank character.
136
137isBlank :: Char -> Bool
138isBlank = (&&) <$> isSpace <*> (/= '\n')
139
140-- | Short list of extensions for literate Agda files.
141--   For display purposes.
142
143literateExtsShortList :: [String]
144literateExtsShortList = [".lagda"]
145
146-- | Breaks a list just /after/ an element satisfying the predicate is
147--   found.
148--
149--   >>> break1 even [1,3,5,2,4,7,8]
150--   ([1,3,5,2],[4,7,8])
151
152break1 :: (a -> Bool) -> [a] -> ([a],[a])
153break1 _ []           =  ([], [])
154break1 p (x:xs) | p x = (x:[],xs)
155break1 p (x:xs)       = let (ys,zs) = break1 p xs in (x:ys,zs)
156
157-- | Returns a tuple consisting of the first line of the input, and the rest
158--   of the input.
159
160getLine :: String -> (String, String)
161getLine = break1 (== '\n')
162
163-- | Canonical decomposition of an empty literate file.
164
165emptyLiterate :: Position -> [Layer]
166emptyLiterate pos = [Layer Markup (Interval pos pos) ""]
167
168-- | Create a regular expression that:
169--   - Must match the whole string
170--   - Works across line boundaries
171
172rex :: String -> Regex
173rex s =
174  makeRegexOpts blankCompOpt{newSyntax = True} blankExecOpt $
175    "\\`" ++ s ++ "\\'"
176
177-- | Preprocessor for literate TeX.
178
179literateTeX :: Position -> String -> [Layer]
180literateTeX pos s = mkLayers pos (tex s)
181  where
182  tex :: String -> [(LayerRole, String)]
183  tex [] = []
184  tex s  =
185    let (line, rest) = getLine s in
186    case r_begin `matchM` line of
187      Just (getAllTextSubmatches -> [_, pre, _, markup, whitespace]) ->
188        (Comment, pre) : (Markup, markup) :
189        (Code, whitespace) : code rest
190      Just _  -> __IMPOSSIBLE__
191      Nothing -> (Comment, line):tex rest
192
193  r_begin = rex "(([^\\%]|\\\\.)*)(\\\\begin\\{code\\}[^\n]*)(\n)?"
194
195  code :: String -> [(LayerRole, String)]
196  code [] = []
197  code s  =
198    let (line, rest) = getLine s in
199    case r_end `matchM` line of
200      Just (getAllTextSubmatches -> [_, code, markup, post]) ->
201        (Code, code) : (Markup, markup) : (Comment, post) : tex rest
202      Just _  -> __IMPOSSIBLE__
203      Nothing -> (Code, line) : code rest
204
205  r_end = rex "([[:blank:]]*)(\\\\end\\{code\\})(.*)"
206
207-- | Preprocessor for Markdown.
208
209literateMd :: Position -> String -> [Layer]
210literateMd pos s = mkLayers pos$ md s
211  where
212  md :: String -> [(LayerRole, String)]
213  md [] = []
214  md s  =
215    let (line, rest) = getLine s in
216    case md_begin `matchM` line of
217      Just (getAllTextSubmatches -> [_, pre, markup, _]) ->
218        (Comment, pre) : (Markup, markup) : code rest
219      Just _  -> __IMPOSSIBLE__
220      Nothing ->
221        (Comment, line) :
222        if md_begin_other `match` line
223        then code_other rest
224        else md rest
225
226  md_begin       = rex "(.*)([[:space:]]*```(agda)?[[:space:]]*)"
227  md_begin_other = rex "[[:space:]]*```[a-zA-Z0-9-]*[[:space:]]*"
228
229  code :: String -> [(LayerRole, String)]
230  code [] = []
231  code s  =
232    let (line, rest) = getLine s in
233    case md_end `matchM` line of
234      Just (getAllTextSubmatches -> [_, markup]) ->
235        (Markup, markup) : md rest
236      Just _  -> __IMPOSSIBLE__
237      Nothing -> (Code, line) : code rest
238
239  -- A non-Agda code block.
240  code_other :: String -> [(LayerRole, String)]
241  code_other [] = []
242  code_other s  =
243    let (line, rest) = getLine s in
244    (Comment, line) :
245    if md_end `match` line
246    then md rest
247    else code_other rest
248
249  md_end = rex "([[:space:]]*```[[:space:]]*)"
250
251-- | Preprocessor for reStructuredText.
252
253literateRsT :: Position -> String -> [Layer]
254literateRsT pos s = mkLayers pos$ rst s
255  where
256  rst :: String -> [(LayerRole, String)]
257  rst [] = []
258  rst s  = maybe_code s
259
260  maybe_code s =
261    if r_comment `match` line then
262      not_code
263    else case r_code `match` line of
264      []                         -> not_code
265      [[_, before, "::", after]] ->
266        -- Code starts
267        if maybe True isBlank $ lastMaybe before then
268          (Markup, line) : code rest
269        else
270          (Comment, before ++ ":") : (Markup, ":" ++ after) : code rest
271      _ -> __IMPOSSIBLE__
272    where
273    (line, rest) = getLine s
274    not_code     = (Comment, line) : rst rest
275
276  -- Finds the next indented block in the input.
277  code :: String -> [(LayerRole, String)]
278  code [] = []
279  code s  =
280    let (line, rest) = getLine s in
281    if all isSpace line then
282      (Markup, line) : code rest
283    else
284      let xs = takeWhile isBlank line in
285      if null xs
286      then maybe_code s
287      else (Code, line) : indented xs rest
288
289  -- Process an indented block.
290  indented :: String -> String -> [(LayerRole, String)]
291  indented _ [] = []
292  indented ind s =
293    let (line, rest) = getLine s
294    in  if all isSpace line || (ind `isPrefixOf` line)
295          then (Code, line) : indented ind rest
296          else maybe_code s
297
298  -- Beginning of a code block.
299  r_code = rex "(.*)(::)([[:space:]]*)"
300
301  -- Beginning of a comment block.
302  r_comment = rex "[[:space:]]*\\.\\.([[:space:]].*)?"
303
304-- | Preprocessor for Org mode documents.
305
306literateOrg :: Position -> String -> [Layer]
307literateOrg pos s = mkLayers pos$ org s
308  where
309  org :: String -> [(LayerRole, String)]
310  org [] = []
311  org s  =
312    let (line, rest) = getLine s in
313    if org_begin `match` line then
314      (Markup, line) : code rest
315    else
316      (Comment, line) : org rest
317
318  -- Valid: #+begin_src agda2 :tangle yes
319  -- Valid: #+begin_src agda2
320  -- Invalid: #+begin_src adga2-foo
321  org_begin = rex' "\\`(.*)([[:space:]]*\\#\\+begin_src agda2[[:space:]]+)"
322
323  code :: String -> [(LayerRole, String)]
324  code [] = []
325  code s  =
326    let (line, rest) = getLine s in
327    if org_end `match` line then
328      (Markup, line) : org rest
329    else
330      (Code, line) : code rest
331
332  org_end = rex' "\\`([[:space:]]*\\#\\+end_src[[:space:]]*)(.*)"
333
334  -- Explicit type annotation required to disambiguate source.
335  rex' :: String -> Regex
336  -- Source blocks start with `#+begin_src` but the casing does not matter.
337  rex' = makeRegexOpts blankCompOpt{newSyntax = True, caseSensitive = False} blankExecOpt
338