1{-# LANGUAGE OverloadedStrings #-} 2 3-- | 4-- Copyright: © Herbert Valerio Riedel 2018 5-- SPDX-License-Identifier: GPL-2.0-or-later 6-- 7-- Incomplete TestML 0.3.0 parser 8module TML 9 ( TML.parse 10 11 , Document(..) 12 , Block(..) 13 14 , Point(..) 15 , PseudoId(..) 16 17 , Code(..) 18 , AssertOp(..) 19 , CodeExpr(..) 20 , CodeObject(..) 21 , FunCall(..) 22 ) where 23 24import Data.ByteString (ByteString) 25import qualified Data.ByteString.Char8 as B 26 27import Control.Applicative hiding (many, some) 28import Control.Monad 29import qualified Data.Aeson.Micro as J 30import qualified Data.ByteString as BS 31import qualified Data.Char as C 32import Data.List 33import qualified Data.Map as Map 34import Data.Maybe 35import qualified Data.Text as T 36import qualified Data.Text.Encoding as T 37import qualified Data.Text.IO as T 38import Data.Void 39import System.Environment 40import Text.Megaparsec 41import Text.Megaparsec.Char 42import qualified Text.Megaparsec.Char.Lexer as L 43import Text.Megaparsec.Expr 44 45type Parser = Parsec Void T.Text 46 47parse :: String -> T.Text -> Either T.Text Document 48parse fn raw = either (Left . T.pack . parseErrorPretty' raw) 49 (Right .process_pseudo) 50 (Text.Megaparsec.parse testml_document fn raw) 51 52 53---------------------------------------------------------------------------- 54 55data Document = Document [Code] [Block] 56 deriving Show 57 58instance J.ToJSON Document where 59 toJSON (Document code dat) 60 = J.object [ "testml" J..= ("0.3.0" :: T.Text) 61 , "code" J..= code 62 , "data" J..= dat 63 ] 64 65data Block = Block !T.Text [Point] 66 deriving Show 67 68instance J.ToJSON Block where 69 toJSON (Block label points) 70 = J.object [ "label" J..= label 71 , "point" J..= J.object (map f points) 72 ] 73 where 74 f (PointStr k v) = k J..= v 75 f (PointPseudo k) = (T.pack (show k)) J..= True 76 f (PointInt k v) = k J..= v 77 78data Point = PointStr !T.Text !T.Text 79 | PointInt !T.Text !Integer 80 | PointPseudo !PseudoId 81 deriving Show 82 83instance J.ToJSON Code where 84 toJSON (CodeAssignmentStmt lhs rhs) 85 = J.Array [J.String "=", J.String lhs, J.toJSON rhs] 86 toJSON stmt@(CodeExpressionStmt lhs massert) 87 | pobjs@(_:_) <- pointObjsInExpr stmt 88 = J.Array [ J.String "%()" 89 , J.Array [ J.String ("*" `mappend` p) | p <- pobjs ] 90 , expr' 91 ] 92 | otherwise = expr' 93 where 94 expr' = case massert of 95 Just (op,rhs) -> J.toJSON (op,lhs,rhs) 96 Nothing -> J.toJSON lhs 97 98data Code = CodeAssignmentStmt !T.Text !CodeExpr 99 | CodeExpressionStmt !CodeExpr !(Maybe (AssertOp,CodeExpr)) 100 | CodeImportStmt [T.Text] 101 deriving Show 102 103instance J.ToJSON AssertOp where 104 toJSON AssertEq = J.String "==" 105 toJSON AssertHas = J.String "~~" 106 toJSON AssertLike = J.String "=~" 107 108data AssertOp = AssertEq | AssertHas | AssertLike 109 deriving Show 110 111instance J.ToJSON CodeExpr where 112 toJSON (CodeExpr obj []) = J.toJSON obj 113 toJSON (CodeExpr obj fns) = J.Array $ [J.String ".", J.toJSON obj] ++ map J.toJSON fns 114 115data CodeExpr = CodeExpr !CodeObject [FunCall] 116 deriving Show 117 118instance J.ToJSON CodeObject where 119 toJSON (StrObj s) = J.String s 120 toJSON (NumObj n) = J.Number n 121 toJSON (PointObj j) = J.Array [J.String "*", J.String j] 122 toJSON (CallObj fn) = J.toJSON fn 123 124data CodeObject = StrObj !T.Text 125 | CallObj !FunCall 126 | NumObj !Double 127 | PointObj !T.Text 128 deriving Show 129 130instance J.ToJSON FunCall where 131 toJSON (FunCall fn args) = J.Array (J.String fn : map J.toJSON args) 132 133data FunCall = FunCall !T.Text [CodeExpr] 134 deriving Show 135 136 137 138pointObjsInExpr :: Code -> [T.Text] 139pointObjsInExpr co = nub $ case co of 140 CodeAssignmentStmt _ expr -> goExpr expr 141 CodeExpressionStmt e1 Nothing -> goExpr e1 142 CodeExpressionStmt e1 (Just (_,e2)) -> goExpr e1 ++ goExpr e2 143 where 144 goExpr (CodeExpr obj fns) = goObj obj ++ concatMap goFun fns 145 146 goFun (FunCall _ exprs) = concatMap goExpr exprs 147 148 goObj (PointObj j) = [j] 149 goObj (CallObj fn) = goFun fn 150 goObj (StrObj _) = [] 151 goObj (NumObj _) = [] 152 153 154testml_document :: Parser Document 155testml_document = Document <$> code_section <*> data_section <* eof 156 157pseudo_point_name :: Parser PseudoId 158pseudo_point_name 159 = choice [ HEAD <$ string "HEAD" 160 , LAST <$ string "LAST" 161 , ONLY <$ string "ONLY" 162 , SKIP <$ string "SKIP" 163 , TODO <$ string "TODO" 164 , DIFF <$ string "DIFF" 165 ] 166 167data PseudoId = HEAD 168 | LAST 169 | ONLY 170 | SKIP 171 | TODO 172 | DIFF 173 deriving (Eq,Show) 174 175process_pseudo :: Document -> Document 176process_pseudo (Document code bs0) = Document code (go bs0) 177 where 178 go blocks 179 | Just b <- find isOnly blocks' = [b] 180 | Just bs <- goHead blocks' = bs 181 | Just bs <- goLast [] blocks' = bs 182 | otherwise = blocks' 183 where 184 blocks' = filter (not . isSkip) blocks 185 186 isOnly b = ONLY `elem` pseudos b 187 isSkip b = SKIP `elem` pseudos b 188 isHead b = HEAD `elem` pseudos b 189 isLast b = LAST `elem` pseudos b 190 191 pseudos (Block _ ps) = [ k | PointPseudo k <- ps ] 192 193 goHead [] = Nothing 194 goHead (b:bs) 195 | isHead b = Just (b:bs) 196 | otherwise = goHead bs 197 198 goLast acc [] = Nothing 199 goLast acc (b:bs) 200 | isLast b = Just $ reverse (b:bs) 201 | otherwise = goLast (b:acc) bs 202 203code_section :: Parser [Code] 204code_section = do 205 xs <- many code_statement 206 pure (catMaybes xs) 207 where 208 code_statement = choice 209 [ Nothing <$ comment_lines 210 , Just <$> import_directive 211 , Just <$> assignment_statement 212 , Just <$> expression_statement 213 ] 214 215 import_directive = do 216 string "%Import" 217 ws 218 mods <- module_name `sepBy1` ws 219 ws0 220 eol 221 pure $! CodeImportStmt mods 222 223 module_name :: Parser T.Text 224 module_name = T.pack <$> some alphaNumChar 225 226 assignment_statement = do 227 v <- try $ do 228 v' <- identifier_name 229 ws 230 void (char '=') <|> void (string "||=") -- FIXME 231 ws 232 pure v' 233 e <- code_expression 234 eol 235 pure (CodeAssignmentStmt v e) 236 237 expression_statement = do 238 -- TODO: expression-label 239 -- optional (double_string >> char ':' >> ws0) 240 241 -- TODO: pick-expression 242 243 lhs <- code_expression 244 ws 245 op <- choice 246 [ AssertEq <$ string "==" 247 , AssertHas <$ string "~~" 248 , AssertLike <$ string "=~" 249 ] 250 ws 251 rhs <- code_expression 252 253 optional $ do 254 ws0 255 char ':' 256 double_string 257 258 eol 259 260 pure (CodeExpressionStmt lhs (Just (op,rhs))) 261 262 263code_expression :: Parser CodeExpr 264code_expression = CodeExpr <$> code_object <*> many function_call 265 266 267-- quoted string 268double_string :: Parser T.Text 269double_string = do 270 char '"' 271 str <- many (noneOf ("\n\"\\" :: [Char]) <|> (char '\\' >> (unesc <$> oneOf ("\\\"0nt" :: [Char])))) 272 char '"' 273 pure $! (T.pack str) 274 where 275 unesc '0' = '\0' 276 unesc 'n' = '\n' 277 unesc 't' = '\t' 278 unesc c = c 279 280single_string :: Parser T.Text 281single_string = do 282 char '\'' 283 str <- many (noneOf ("\n'\\" :: [Char]) <|> (char '\\' >> (oneOf ("\\'" :: [Char])))) 284 char '\'' 285 pure $! (T.pack str) 286 287function_call :: Parser FunCall 288function_call = do 289 char '.' 290 call_object 291 292call_object :: Parser FunCall 293call_object = FunCall <$> identifier_name 294 <*> optional' [] (between (char '(') (char ')') $ code_expression `sepBy1` (char ',' >> ws0)) 295 296optional' def p = do 297 x <- optional p 298 case x of 299 Nothing -> pure def 300 Just y -> pure y 301 302code_object :: Parser CodeObject 303code_object 304 = choice 305 [ mkPoint <$> char '*' <*> lowerChar <*> many (lowerChar <|> digitChar <|> char '-' <|> char '_') 306 , mkNum <$> optional (char '-') <*> some digitChar <*> optional (char '.' >> some digitChar) 307 , CallObj <$> call_object 308 , StrObj <$> single_string 309 , StrObj <$> double_string 310 ] <?> "code-object" 311 where 312 mkPoint _ c cs = PointObj $! (T.pack (c:cs)) 313 mkNum msign ds1 mds2 = NumObj $! (read $ (maybe id (:) msign) ds1 ++ (maybe "" ('.':) mds2)) 314 315data_section :: Parser [Block] 316data_section = many block_definition 317 where 318 block_definition = do 319 -- block_heading 320 string "===" *> ws 321 l <- T.pack <$> manyTill anyChar eol 322 323 -- TODO: user_defined 324 ps <- many point_definition 325 pure (Block l ps) 326 327 point_definition = do 328 string "---" *> ws 329 330 j <- eitherP identifier_user pseudo_point_name 331 332 filters <- maybe [] id <$> optional filter_spec 333 334 let single = do 335 _ <- char ':' *> ws 336 x <- T.pack <$> manyTill anyChar eol 337 -- consume and ignore any point_lines 338 _ <- point_lines 339 pure $! case j of 340 Left j' -> mkSinglePointVal j' (transformPoint True filters x) 341 Right j' -> PointPseudo j' -- is this allowed? 342 343 multi = do 344 ws0 *> eol 345 x <- point_lines 346 pure $! case j of 347 Left j' -> PointStr j' (transformPoint False filters x) 348 Right j' -> PointPseudo j' 349 350 single <|> multi 351 352 filter_spec = between (char '(') (char ')') $ many (oneOf ("<#+-~/@" :: [Char])) 353 354 mkSinglePointVal k v 355 | T.all C.isDigit v = PointInt k (read (T.unpack v)) 356 | otherwise = PointStr k v 357 358point_lines :: Parser T.Text 359point_lines = T.pack . unlines <$> go 360 where 361 go = many (notFollowedBy point_boundary *> manyTill anyChar eol) 362 363 point_boundary :: Parser () 364 point_boundary = void (string "---") <|> void (string "===") <|> eof 365 366identifier_user :: Parser T.Text 367identifier_user = do 368 x <- (:) <$> lowerChar <*> many alphaNumChar 369 xs <- many ((:) <$> char '-' <*> some alphaNumChar) 370 371 pure $! T.pack (concat (x:xs)) 372 373identifier_name :: Parser T.Text 374identifier_name = do 375 x <- (:) <$> letterChar <*> many alphaNumChar 376 xs <- many ((:) <$> char '-' <*> some alphaNumChar) 377 378 pure $! T.pack (concat (x:xs)) 379 380 381ws :: Parser () 382ws = void $ takeWhile1P (Just "BLANK") (\c -> c == ' ' || c == '\t') 383 384ws0 :: Parser () 385ws0 = void $ takeWhileP (Just "BLANK") (\c -> c == ' ' || c == '\t') 386 387blank_line :: Parser () 388blank_line = (try (ws0 <* eol) <|> try (ws <* eof)) <?> "blank-line" 389 390comment_line :: Parser () 391comment_line = (char '#' *> takeWhileP Nothing (/= '\n') *> void eol) <?> "comment-line" 392 393comment_lines :: Parser () 394comment_lines = void (some (comment_line <|> blank_line)) 395 396stripTrailEols :: T.Text -> T.Text 397stripTrailEols = go 398 where 399 go t | T.isSuffixOf "\n\n" t = go (T.init t) 400 | T.isSuffixOf "\r\n\r\n" t = go (T.init (T.init t)) 401 | t == "\n" = "" 402 | otherwise = t 403 404-- 'undent' 405stripPrefixInd :: T.Text -> T.Text 406stripPrefixInd = T.unlines . map go . T.lines 407 where 408 go t | T.isPrefixOf " " t = T.drop 4 t 409 | T.isPrefixOf " " t = T.drop 3 t 410 | T.isPrefixOf " " t = T.drop 2 t 411 | T.isPrefixOf " " t = T.drop 1 t 412 | otherwise = t 413 414stripComments :: T.Text -> T.Text 415stripComments = T.unlines . filter (not . T.isPrefixOf "#") . T.lines 416 417transformPoint :: Bool -> [Char] -> T.Text -> T.Text 418transformPoint single mods0 -- TODO: backslash 419 = go mods0 . 420 (if keepBlanks then id else stripTrailEols) . 421 (if keepComments then id else stripComments) 422 where 423 keepBlanks = single || ('+' `elem` mods0) 424 keepComments = single || ('#' `elem` mods0) 425 426 go [] = id 427 go ('<':xs) 428 | single = error "invalid filter for point-single" 429 | otherwise = go xs . stripPrefixInd 430 go ('+':xs) = go xs -- negative flag 431 go ('#':xs) = go xs -- negative flag 432 go ('-':xs) = go xs . T.dropWhileEnd C.isSpace 433 go (c:_) = error ("unknown filter " ++ show c) 434