1{-# LANGUAGE OverloadedStrings #-}
2
3-- |
4-- Copyright: © Herbert Valerio Riedel 2018
5-- SPDX-License-Identifier: GPL-2.0-or-later
6--
7-- Incomplete TestML 0.3.0 parser
8module TML
9    ( TML.parse
10
11    , Document(..)
12    , Block(..)
13
14    , Point(..)
15    , PseudoId(..)
16
17    , Code(..)
18    , AssertOp(..)
19    , CodeExpr(..)
20    , CodeObject(..)
21    , FunCall(..)
22    ) where
23
24import           Data.ByteString            (ByteString)
25import qualified Data.ByteString.Char8      as B
26
27import           Control.Applicative        hiding (many, some)
28import           Control.Monad
29import qualified Data.Aeson.Micro           as J
30import qualified Data.ByteString            as BS
31import qualified Data.Char                  as C
32import           Data.List
33import qualified Data.Map                   as Map
34import           Data.Maybe
35import qualified Data.Text                  as T
36import qualified Data.Text.Encoding         as T
37import qualified Data.Text.IO               as T
38import           Data.Void
39import           System.Environment
40import           Text.Megaparsec
41import           Text.Megaparsec.Char
42import qualified Text.Megaparsec.Char.Lexer as L
43import           Text.Megaparsec.Expr
44
45type Parser = Parsec Void T.Text
46
47parse :: String -> T.Text -> Either T.Text Document
48parse fn raw = either (Left . T.pack . parseErrorPretty' raw)
49                      (Right .process_pseudo)
50                      (Text.Megaparsec.parse testml_document fn raw)
51
52
53----------------------------------------------------------------------------
54
55data Document = Document [Code] [Block]
56              deriving Show
57
58instance J.ToJSON Document where
59  toJSON (Document code dat)
60    = J.object [ "testml" J..= ("0.3.0" :: T.Text)
61               , "code" J..= code
62               , "data" J..= dat
63               ]
64
65data Block = Block !T.Text [Point]
66           deriving Show
67
68instance J.ToJSON Block where
69  toJSON (Block label points)
70    = J.object [ "label" J..= label
71               , "point" J..= J.object (map f points)
72               ]
73    where
74      f (PointStr k v)  = k J..= v
75      f (PointPseudo k) = (T.pack (show k)) J..= True
76      f (PointInt k v)  = k J..= v
77
78data Point = PointStr !T.Text !T.Text
79           | PointInt !T.Text !Integer
80           | PointPseudo !PseudoId
81           deriving Show
82
83instance J.ToJSON Code where
84  toJSON (CodeAssignmentStmt lhs rhs)
85    = J.Array [J.String "=", J.String lhs, J.toJSON rhs]
86  toJSON stmt@(CodeExpressionStmt lhs massert)
87    | pobjs@(_:_) <- pointObjsInExpr stmt
88      = J.Array [ J.String "%()"
89                , J.Array [ J.String ("*" `mappend` p) | p <- pobjs ]
90                , expr'
91                ]
92    | otherwise = expr'
93    where
94      expr' = case massert of
95                Just (op,rhs) -> J.toJSON (op,lhs,rhs)
96                Nothing       -> J.toJSON lhs
97
98data Code = CodeAssignmentStmt !T.Text !CodeExpr
99          | CodeExpressionStmt !CodeExpr !(Maybe (AssertOp,CodeExpr))
100          | CodeImportStmt [T.Text]
101          deriving Show
102
103instance J.ToJSON AssertOp where
104  toJSON AssertEq   = J.String "=="
105  toJSON AssertHas  = J.String "~~"
106  toJSON AssertLike = J.String "=~"
107
108data AssertOp = AssertEq | AssertHas | AssertLike
109              deriving Show
110
111instance J.ToJSON CodeExpr where
112  toJSON (CodeExpr obj [])  = J.toJSON obj
113  toJSON (CodeExpr obj fns) = J.Array $ [J.String ".", J.toJSON obj] ++ map J.toJSON fns
114
115data CodeExpr = CodeExpr !CodeObject [FunCall]
116  deriving Show
117
118instance J.ToJSON CodeObject where
119  toJSON (StrObj s)   = J.String s
120  toJSON (NumObj n)   = J.Number n
121  toJSON (PointObj j) = J.Array [J.String "*", J.String j]
122  toJSON (CallObj fn) = J.toJSON fn
123
124data CodeObject = StrObj !T.Text
125                | CallObj !FunCall
126                | NumObj !Double
127                | PointObj !T.Text
128                deriving Show
129
130instance J.ToJSON FunCall where
131  toJSON (FunCall fn args) = J.Array (J.String fn : map J.toJSON args)
132
133data FunCall = FunCall !T.Text [CodeExpr]
134  deriving Show
135
136
137
138pointObjsInExpr :: Code -> [T.Text]
139pointObjsInExpr co = nub $ case co of
140    CodeAssignmentStmt _ expr           -> goExpr expr
141    CodeExpressionStmt e1 Nothing       -> goExpr e1
142    CodeExpressionStmt e1 (Just (_,e2)) -> goExpr e1 ++ goExpr e2
143  where
144    goExpr (CodeExpr obj fns) = goObj obj ++ concatMap goFun fns
145
146    goFun (FunCall _ exprs) = concatMap goExpr exprs
147
148    goObj (PointObj j) = [j]
149    goObj (CallObj fn) = goFun fn
150    goObj (StrObj _)   = []
151    goObj (NumObj _)   = []
152
153
154testml_document :: Parser Document
155testml_document = Document <$> code_section <*> data_section <* eof
156
157pseudo_point_name  :: Parser PseudoId
158pseudo_point_name
159  = choice [ HEAD <$ string "HEAD"
160           , LAST <$ string "LAST"
161           , ONLY <$ string "ONLY"
162           , SKIP <$ string "SKIP"
163           , TODO <$ string "TODO"
164           , DIFF <$ string "DIFF"
165           ]
166
167data PseudoId = HEAD
168              | LAST
169              | ONLY
170              | SKIP
171              | TODO
172              | DIFF
173              deriving (Eq,Show)
174
175process_pseudo :: Document -> Document
176process_pseudo (Document code bs0) = Document code (go bs0)
177  where
178    go blocks
179      | Just b <- find isOnly blocks' = [b]
180      | Just bs <- goHead blocks'     = bs
181      | Just bs <- goLast [] blocks'  = bs
182      | otherwise = blocks'
183      where
184        blocks' = filter (not . isSkip) blocks
185
186        isOnly b = ONLY `elem` pseudos b
187        isSkip b = SKIP `elem` pseudos b
188        isHead b = HEAD `elem` pseudos b
189        isLast b = LAST `elem` pseudos b
190
191        pseudos (Block _ ps) = [ k | PointPseudo k <- ps ]
192
193        goHead [] = Nothing
194        goHead (b:bs)
195          | isHead b = Just (b:bs)
196          | otherwise = goHead bs
197
198        goLast acc [] = Nothing
199        goLast acc (b:bs)
200          | isLast b  = Just $ reverse (b:bs)
201          | otherwise = goLast (b:acc) bs
202
203code_section :: Parser [Code]
204code_section = do
205    xs <- many code_statement
206    pure (catMaybes xs)
207  where
208    code_statement = choice
209      [ Nothing <$ comment_lines
210      , Just <$> import_directive
211      , Just <$> assignment_statement
212      , Just <$> expression_statement
213      ]
214
215    import_directive = do
216      string "%Import"
217      ws
218      mods <- module_name `sepBy1` ws
219      ws0
220      eol
221      pure $! CodeImportStmt mods
222
223    module_name :: Parser T.Text
224    module_name = T.pack <$> some alphaNumChar
225
226    assignment_statement = do
227      v <- try $ do
228        v' <- identifier_name
229        ws
230        void (char '=') <|> void (string "||=") -- FIXME
231        ws
232        pure v'
233      e <- code_expression
234      eol
235      pure (CodeAssignmentStmt v e)
236
237    expression_statement = do
238      -- TODO: expression-label
239      -- optional (double_string >> char ':' >> ws0)
240
241      -- TODO: pick-expression
242
243      lhs <- code_expression
244      ws
245      op <- choice
246            [ AssertEq   <$ string "=="
247            , AssertHas  <$ string "~~"
248            , AssertLike <$ string "=~"
249            ]
250      ws
251      rhs <- code_expression
252
253      optional $ do
254        ws0
255        char ':'
256        double_string
257
258      eol
259
260      pure (CodeExpressionStmt lhs (Just (op,rhs)))
261
262
263code_expression :: Parser CodeExpr
264code_expression = CodeExpr <$> code_object <*> many function_call
265
266
267-- quoted string
268double_string :: Parser T.Text
269double_string = do
270    char '"'
271    str <- many (noneOf ("\n\"\\" :: [Char]) <|> (char '\\' >> (unesc <$> oneOf ("\\\"0nt" :: [Char]))))
272    char '"'
273    pure $! (T.pack str)
274  where
275    unesc '0' = '\0'
276    unesc 'n' = '\n'
277    unesc 't' = '\t'
278    unesc c   = c
279
280single_string :: Parser T.Text
281single_string = do
282    char '\''
283    str <- many (noneOf ("\n'\\" :: [Char]) <|> (char '\\' >> (oneOf ("\\'" :: [Char]))))
284    char '\''
285    pure $! (T.pack str)
286
287function_call :: Parser FunCall
288function_call = do
289  char '.'
290  call_object
291
292call_object :: Parser FunCall
293call_object = FunCall <$> identifier_name
294                      <*> optional' [] (between (char '(') (char ')') $ code_expression `sepBy1` (char ',' >> ws0))
295
296optional' def p = do
297  x <- optional p
298  case x of
299    Nothing -> pure def
300    Just y  -> pure y
301
302code_object :: Parser CodeObject
303code_object
304  = choice
305    [ mkPoint <$> char '*' <*> lowerChar <*> many (lowerChar <|> digitChar <|> char '-' <|> char '_')
306    , mkNum <$> optional (char '-') <*> some digitChar <*> optional (char '.' >> some digitChar)
307    , CallObj <$> call_object
308    , StrObj <$> single_string
309    , StrObj <$> double_string
310    ] <?> "code-object"
311  where
312    mkPoint _ c cs = PointObj $! (T.pack (c:cs))
313    mkNum msign ds1 mds2 = NumObj $! (read $ (maybe id (:) msign) ds1 ++ (maybe "" ('.':) mds2))
314
315data_section :: Parser [Block]
316data_section = many block_definition
317  where
318    block_definition = do
319      -- block_heading
320      string "===" *> ws
321      l <- T.pack <$> manyTill anyChar eol
322
323      -- TODO: user_defined
324      ps <- many point_definition
325      pure (Block l ps)
326
327    point_definition = do
328      string "---" *> ws
329
330      j <- eitherP identifier_user pseudo_point_name
331
332      filters <- maybe [] id <$> optional filter_spec
333
334      let single = do
335            _ <- char ':' *> ws
336            x <- T.pack <$> manyTill anyChar eol
337            -- consume and ignore any point_lines
338            _ <- point_lines
339            pure $! case j of
340                      Left j'  -> mkSinglePointVal j' (transformPoint True filters x)
341                      Right j' -> PointPseudo j' -- is this allowed?
342
343          multi = do
344            ws0 *> eol
345            x <- point_lines
346            pure $! case j of
347                      Left j'  -> PointStr j' (transformPoint False filters x)
348                      Right j' -> PointPseudo j'
349
350      single <|> multi
351
352    filter_spec = between (char '(') (char ')') $ many (oneOf ("<#+-~/@" :: [Char]))
353
354    mkSinglePointVal k v
355      | T.all C.isDigit v = PointInt k (read (T.unpack v))
356      | otherwise         = PointStr k v
357
358point_lines :: Parser T.Text
359point_lines = T.pack . unlines <$> go
360  where
361    go = many (notFollowedBy point_boundary *> manyTill anyChar eol)
362
363    point_boundary :: Parser ()
364    point_boundary = void (string "---") <|> void (string "===") <|> eof
365
366identifier_user :: Parser T.Text
367identifier_user = do
368  x  <- (:) <$> lowerChar <*> many alphaNumChar
369  xs <- many ((:) <$> char '-' <*> some alphaNumChar)
370
371  pure $! T.pack (concat (x:xs))
372
373identifier_name :: Parser T.Text
374identifier_name = do
375  x  <- (:) <$> letterChar <*> many alphaNumChar
376  xs <- many ((:) <$> char '-' <*> some alphaNumChar)
377
378  pure $! T.pack (concat (x:xs))
379
380
381ws :: Parser ()
382ws = void $ takeWhile1P (Just "BLANK") (\c -> c == ' ' || c == '\t')
383
384ws0 :: Parser ()
385ws0 = void $ takeWhileP (Just "BLANK") (\c -> c == ' ' || c == '\t')
386
387blank_line :: Parser ()
388blank_line = (try (ws0 <* eol) <|> try (ws <* eof)) <?> "blank-line"
389
390comment_line :: Parser ()
391comment_line = (char '#' *> takeWhileP Nothing (/= '\n') *> void eol) <?> "comment-line"
392
393comment_lines :: Parser ()
394comment_lines = void (some (comment_line <|> blank_line))
395
396stripTrailEols :: T.Text -> T.Text
397stripTrailEols = go
398  where
399    go t | T.isSuffixOf "\n\n" t     = go (T.init t)
400         | T.isSuffixOf "\r\n\r\n" t = go (T.init (T.init t))
401         | t == "\n"                 = ""
402         | otherwise = t
403
404-- 'undent'
405stripPrefixInd :: T.Text -> T.Text
406stripPrefixInd = T.unlines . map go . T.lines
407  where
408    go t | T.isPrefixOf "    " t = T.drop 4 t
409         | T.isPrefixOf "   "  t = T.drop 3 t
410         | T.isPrefixOf "  "   t = T.drop 2 t
411         | T.isPrefixOf " "    t = T.drop 1 t
412         | otherwise           = t
413
414stripComments :: T.Text -> T.Text
415stripComments = T.unlines . filter (not . T.isPrefixOf "#") . T.lines
416
417transformPoint :: Bool -> [Char] -> T.Text -> T.Text
418transformPoint single mods0 -- TODO: backslash
419  = go mods0 .
420    (if keepBlanks then id else stripTrailEols) .
421    (if keepComments then id else stripComments)
422  where
423    keepBlanks = single || ('+' `elem` mods0)
424    keepComments = single || ('#' `elem` mods0)
425
426    go []       = id
427    go ('<':xs)
428      | single = error "invalid filter for point-single"
429      | otherwise = go xs . stripPrefixInd
430    go ('+':xs) = go xs -- negative flag
431    go ('#':xs) = go xs -- negative flag
432    go ('-':xs) = go xs . T.dropWhileEnd C.isSpace
433    go (c:_)    = error ("unknown filter " ++ show c)
434