1{-# LANGUAGE GeneralizedNewtypeDeriving #-} 2-- | See <http://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html> for the 3-- full awk grammar. 4module Test.Tasty.Patterns.Parser 5 ( Parser 6 , runParser 7 , ParseResult(..) 8 , expr 9 , parseAwkExpr 10 ) 11 where 12 13import Prelude hiding (Ordering(..)) 14import Text.ParserCombinators.ReadP hiding (many, optional) 15import Text.ParserCombinators.ReadPrec (readPrec_to_P, minPrec) 16import Text.Read (readPrec) 17import Data.Functor 18import Data.Char 19import Control.Applicative 20import Control.Monad 21import Test.Tasty.Patterns.Types 22import Test.Tasty.Patterns.Expr 23 24type Token = ReadP 25 26-- | A separate 'Parser' data type ensures that we don't forget to skip 27-- spaces. 28newtype Parser a = Parser (ReadP a) 29 deriving (Functor, Applicative, Alternative, Monad, MonadPlus) 30 31data ParseResult a = Success a | Invalid | Ambiguous [a] 32 deriving (Eq, Show) 33 34token :: Token a -> Parser a 35token a = Parser (a <* skipSpaces) 36 37sym :: Char -> Parser () 38sym = void . token . char 39 40str :: String -> Parser () 41str = void . token . string 42 43-- | Run a parser 44runParser 45 :: Parser a 46 -> String -- ^ text to parse 47 -> ParseResult a 48runParser (Parser p) s = 49 case filter (null . snd) $ readP_to_S (skipSpaces *> p) s of 50 [(a, _)] -> Success a 51 [] -> Invalid 52 as -> Ambiguous (fst <$> as) 53 54intP :: Parser Int 55intP = token $ 56 -- we cannot use the standard Int ReadP parser because it recognizes 57 -- negative numbers, making -1 ambiguous 58 read <$> munch1 isDigit 59 60strP :: Parser String 61strP = token $ readPrec_to_P readPrec minPrec 62 -- this deviates somewhat from the awk string literals, by design 63 64-- | An awk ERE token such as @/foo/@. No special characters are recognized 65-- at the moment, except @\@ as an escape character for @/@ and itself. 66patP :: Parser String 67patP = token $ char '/' *> many ch <* char '/' 68 where 69 ch = 70 satisfy (`notElem` "/\\") <|> 71 (char '\\' *> satisfy (`elem` "/\\")) 72 73nfP :: Parser () 74nfP = token $ void $ string "NF" 75 76-- | Built-in functions 77builtin :: Parser Expr 78builtin = msum 79 [ fn "length" $ LengthFn <$> optional expr 80 -- we don't support length without parentheses at all, 81 -- because that makes length($1) ambiguous 82 -- (we don't require spaces for concatenation) 83 , fn "toupper" $ ToUpperFn <$> expr 84 , fn "tolower" $ ToLowerFn <$> expr 85 , fn "match" $ MatchFn <$> expr <* sym ',' <*> patP 86 , fn "substr" $ SubstrFn <$> expr <* sym ',' <*> expr <*> 87 optional (sym ',' *> expr) 88 ] 89 where 90 fn :: String -> Parser a -> Parser a 91 fn name args = token (string name) *> sym '(' *> args <* sym ')' 92 93-- | Atomic expressions 94expr0 :: Parser Expr 95expr0 = 96 (sym '(' *> expr <* sym ')') <|> 97 (IntLit <$> intP) <|> 98 (StringLit <$> strP) <|> 99 (ERE <$> patP) <|> 100 (NF <$ nfP) <|> 101 builtin 102 103-- | Arguments to unary operators: atomic expressions and field 104-- expressions 105expr1 :: Parser Expr 106expr1 = makeExprParser expr0 107 [ [ Prefix (Field <$ sym '$') ] ] 108 109-- | Whether a parser is unary or non-unary. 110-- 111-- This roughly corresponds to the @unary_expr@ and @non_unary_expr@ 112-- non-terminals in the awk grammar. 113-- (Why roughly? See 'expr2'.) 114data Unary = Unary | NonUnary 115 116-- | Arithmetic expressions. 117-- 118-- Unlike awk, non-unary expressions disallow unary operators everywhere, 119-- not just in the leading position, to avoid extra complexity in 120-- 'makeExprParser'. 121-- 122-- For example, the expression 123-- 124-- >1 3 + -4 125-- 126-- is valid in awk because @3 + -4@ is non-unary, but we disallow it here 127-- because 'makeExprParser' does not allow us to distinguish it from 128-- 129-- >1 -4 + 3 130-- 131-- which is ambiguous. 132expr2 :: Unary -> Parser Expr 133expr2 unary = makeExprParser expr1 134 [ [ Prefix (Not <$ sym '!') ] ++ 135 (case unary of 136 Unary -> [ Prefix (Neg <$ sym '-') ] 137 NonUnary -> [] 138 ) 139 , [ InfixL (Add <$ sym '+') 140 , InfixL (Sub <$ sym '-') 141 ] 142 ] 143 144-- | Expressions that may include string concatenation 145expr3 :: Parser Expr 146expr3 = concatExpr <|> expr2 Unary 147 where 148 -- The awk spec mandates that concatenation associates to the left. 149 -- But concatenation is associative, so why would we care. 150 concatExpr = Concat <$> nonUnary <*> (nonUnary <|> concatExpr) 151 nonUnary = expr2 NonUnary 152 153-- | Everything with lower precedence than concatenation 154expr4 :: Parser Expr 155expr4 = makeExprParser expr3 156 [ [ InfixN (LT <$ sym '<') 157 , InfixN (GT <$ sym '>') 158 , InfixN (LE <$ str "<=") 159 , InfixN (GE <$ str ">=") 160 , InfixN (EQ <$ str "==") 161 , InfixN (NE <$ str "!=") 162 ] 163 , [ Postfix (flip Match <$ sym '~' <*> patP) 164 , Postfix (flip NoMatch <$ str "!~" <*> patP) 165 ] 166 , [ InfixL (And <$ str "&&") ] 167 , [ InfixL (Or <$ str "||") ] 168 , [ TernR ((If <$ sym ':') <$ sym '?') ] 169 ] 170 171-- | The awk-like expression parser 172expr :: Parser Expr 173expr = expr4 174 175-- | Parse an awk expression 176parseAwkExpr :: String -> Maybe Expr 177parseAwkExpr s = 178 case runParser expr s of 179 Success e -> Just e 180 _ -> Nothing 181