1{-# LANGUAGE CPP #-}
2{-# LANGUAGE DeriveDataTypeable #-}
3{-# LANGUAGE PatternGuards #-}
4{-# LANGUAGE OverloadedStrings #-}
5{-# LANGUAGE BangPatterns #-}
6module Data.Yaml.Internal
7    (
8      ParseException(..)
9    , prettyPrintParseException
10    , Warning(..)
11    , parse
12    , decodeHelper
13    , decodeHelper_
14    , textToScientific
15    , stringScalar
16    , defaultStringStyle
17    , isSpecialString
18    , specialStrings
19    , isNumeric
20    , objToStream
21    , objToEvents
22    ) where
23
24#if !MIN_VERSION_base(4,8,0)
25import Control.Applicative ((<$>), Applicative(..))
26#endif
27import Control.Applicative ((<|>))
28import Control.Exception
29import Control.Monad (when, unless)
30import Control.Monad.Trans.Resource (ResourceT, runResourceT)
31import Control.Monad.State.Strict
32import Control.Monad.Reader
33import Data.Aeson
34import Data.Aeson.Internal (JSONPath, JSONPathElement(..), formatError)
35import Data.Aeson.Types hiding (parse)
36import qualified Data.Attoparsec.Text as Atto
37import Data.Bits (shiftL, (.|.))
38import Data.ByteString (ByteString)
39import qualified Data.ByteString.Builder as BB
40import qualified Data.ByteString.Lazy as BL
41import Data.ByteString.Builder.Scientific (scientificBuilder)
42import Data.Char (toUpper, ord)
43import Data.List
44import Data.Conduit ((.|), ConduitM, runConduit)
45import qualified Data.Conduit.List as CL
46import qualified Data.HashMap.Strict as M
47import qualified Data.HashSet as HashSet
48import           Data.Map (Map)
49import qualified Data.Map as Map
50import           Data.Set (Set)
51import qualified Data.Set as Set
52import Data.Scientific (Scientific, base10Exponent, coefficient)
53import Data.Text (Text, pack)
54import qualified Data.Text as T
55import Data.Text.Encoding (decodeUtf8With, encodeUtf8)
56import Data.Text.Encoding.Error (lenientDecode)
57import Data.Typeable
58import qualified Data.Vector as V
59
60import qualified Text.Libyaml as Y
61import Text.Libyaml hiding (encode, decode, encodeFile, decodeFile)
62
63data ParseException = NonScalarKey
64                    | UnknownAlias { _anchorName :: Y.AnchorName }
65                    | UnexpectedEvent { _received :: Maybe Event
66                                      , _expected :: Maybe Event
67                                      }
68                    | InvalidYaml (Maybe YamlException)
69                    | AesonException String
70                    | OtherParseException SomeException
71                    | NonStringKey JSONPath
72                    | NonStringKeyAlias Y.AnchorName Value
73                    | CyclicIncludes
74                    | LoadSettingsException FilePath ParseException
75    deriving (Show, Typeable)
76
77instance Exception ParseException where
78#if MIN_VERSION_base(4, 8, 0)
79  displayException = prettyPrintParseException
80#endif
81
82-- | Alternative to 'show' to display a 'ParseException' on the screen.
83--   Instead of displaying the data constructors applied to their arguments,
84--   a more textual output is returned. For example, instead of printing:
85--
86-- > InvalidYaml (Just (YamlParseException {yamlProblem = "did not find expected ',' or '}'", yamlContext = "while parsing a flow mapping", yamlProblemMark = YamlMark {yamlIndex = 42, yamlLine = 2, yamlColumn = 12}})))
87--
88--   It looks more pleasant to print:
89--
90-- > YAML parse exception at line 2, column 12,
91-- > while parsing a flow mapping:
92-- > did not find expected ',' or '}'
93--
94-- Since 0.8.11
95prettyPrintParseException :: ParseException -> String
96prettyPrintParseException pe = case pe of
97  NonScalarKey -> "Non scalar key"
98  UnknownAlias anchor -> "Unknown alias `" ++ anchor ++ "`"
99  UnexpectedEvent { _expected = mbExpected, _received = mbUnexpected } -> unlines
100    [ "Unexpected event: expected"
101    , "  " ++ show mbExpected
102    , "but received"
103    , "  " ++ show mbUnexpected
104    ]
105  InvalidYaml mbYamlError -> case mbYamlError of
106    Nothing -> "Unspecified YAML error"
107    Just yamlError -> case yamlError of
108      YamlException s -> "YAML exception:\n" ++ s
109      YamlParseException problem context mark -> concat
110        [ "YAML parse exception at line " ++ show (yamlLine mark) ++
111          ", column " ++ show (yamlColumn mark)
112        , case context of
113            "" -> ":\n"
114            -- The context seems to include a leading "while" or similar.
115            _  -> ",\n" ++ context ++ ":\n"
116        , problem
117        ]
118  AesonException s -> "Aeson exception:\n" ++ s
119  OtherParseException exc -> "Generic parse exception:\n" ++ show exc
120  NonStringKey path -> formatError path "Non-string keys are not supported"
121  NonStringKeyAlias anchor value -> unlines
122    [ "Non-string key alias:"
123    , "  Anchor name: " ++ anchor
124    , "  Value: " ++ show value
125    ]
126  CyclicIncludes -> "Cyclic includes"
127  LoadSettingsException fp exc -> "Could not parse file as YAML: " ++ fp ++ "\n" ++ prettyPrintParseException exc
128
129defineAnchor :: Value -> String -> ReaderT JSONPath (ConduitM e o Parse) ()
130defineAnchor value name = modify (modifyAnchors $ Map.insert name value)
131  where
132    modifyAnchors :: (Map String Value -> Map String Value) -> ParseState -> ParseState
133    modifyAnchors f st =  st {parseStateAnchors = f (parseStateAnchors st)}
134
135lookupAnchor :: String -> ReaderT JSONPath (ConduitM e o Parse) (Maybe Value)
136lookupAnchor name = gets (Map.lookup name . parseStateAnchors)
137
138data Warning = DuplicateKey JSONPath
139    deriving (Eq, Show)
140
141addWarning :: Warning -> ReaderT JSONPath (ConduitM e o Parse) ()
142addWarning w = modify (modifyWarnings (w :))
143  where
144    modifyWarnings :: ([Warning] -> [Warning]) -> ParseState -> ParseState
145    modifyWarnings f st =  st {parseStateWarnings = f (parseStateWarnings st)}
146
147data ParseState = ParseState {
148  parseStateAnchors :: Map String Value
149, parseStateWarnings :: [Warning]
150}
151
152type Parse = StateT ParseState (ResourceT IO)
153
154requireEvent :: Event -> ReaderT JSONPath (ConduitM Event o Parse) ()
155requireEvent e = do
156    f <- lift CL.head
157    unless (f == Just e) $ liftIO $ throwIO $ UnexpectedEvent f $ Just e
158
159parse :: ReaderT JSONPath (ConduitM Event o Parse) Value
160parse = do
161    streamStart <- lift CL.head
162    case streamStart of
163        Nothing ->
164            -- empty string input
165            return Null
166        Just EventStreamStart -> do
167            documentStart <- lift CL.head
168            case documentStart of
169                Just EventStreamEnd ->
170                    -- empty file input, comment only string/file input
171                    return Null
172                Just EventDocumentStart -> do
173                    res <- parseO
174                    requireEvent EventDocumentEnd
175                    requireEvent EventStreamEnd
176                    return res
177                _ -> liftIO $ throwIO $ UnexpectedEvent documentStart Nothing
178        _ -> liftIO $ throwIO $ UnexpectedEvent streamStart Nothing
179
180parseScalar :: ByteString -> Anchor -> Style -> Tag
181            -> ReaderT JSONPath (ConduitM Event o Parse) Text
182parseScalar v a style tag = do
183    let res = decodeUtf8With lenientDecode v
184    mapM_ (defineAnchor (textToValue style tag res)) a
185    return res
186
187textToValue :: Style -> Tag -> Text -> Value
188textToValue SingleQuoted _ t = String t
189textToValue DoubleQuoted _ t = String t
190textToValue _ StrTag t = String t
191textToValue Folded _ t = String t
192textToValue _ _ t
193    | t `elem` ["null", "Null", "NULL", "~", ""] = Null
194    | any (t `isLike`) ["y", "yes", "on", "true"] = Bool True
195    | any (t `isLike`) ["n", "no", "off", "false"] = Bool False
196    | Right x <- textToScientific t = Number x
197    | otherwise = String t
198  where x `isLike` ref = x `elem` [ref, T.toUpper ref, titleCased]
199          where titleCased = toUpper (T.head ref) `T.cons` T.tail ref
200
201textToScientific :: Text -> Either String Scientific
202textToScientific = Atto.parseOnly (num <* Atto.endOfInput)
203  where
204    num = (fromInteger <$> ("0x" *> Atto.hexadecimal))
205      <|> (fromInteger <$> ("0o" *> octal))
206      <|> Atto.scientific
207
208    octal = T.foldl' step 0 <$> Atto.takeWhile1 isOctalDigit
209      where
210        isOctalDigit c = (c >= '0' && c <= '7')
211        step a c = (a `shiftL` 3) .|. fromIntegral (ord c - 48)
212
213parseO :: ReaderT JSONPath (ConduitM Event o Parse) Value
214parseO = do
215    me <- lift CL.head
216    case me of
217        Just (EventScalar v tag style a) -> textToValue style tag <$> parseScalar v a style tag
218        Just (EventSequenceStart _ _ a) -> parseS 0 a id
219        Just (EventMappingStart _ _ a) -> parseM mempty a M.empty
220        Just (EventAlias an) -> do
221            m <- lookupAnchor an
222            case m of
223                Nothing -> liftIO $ throwIO $ UnknownAlias an
224                Just v -> return v
225        _ -> liftIO $ throwIO $ UnexpectedEvent me Nothing
226
227parseS :: Int
228       -> Y.Anchor
229       -> ([Value] -> [Value])
230       -> ReaderT JSONPath (ConduitM Event o Parse) Value
231parseS !n a front = do
232    me <- lift CL.peek
233    case me of
234        Just EventSequenceEnd -> do
235            lift $ CL.drop 1
236            let res = Array $ V.fromList $ front []
237            mapM_ (defineAnchor res) a
238            return res
239        _ -> do
240            o <- local (Index n :) parseO
241            parseS (succ n) a $ front . (:) o
242
243parseM :: Set Text
244       -> Y.Anchor
245       -> M.HashMap Text Value
246       -> ReaderT JSONPath (ConduitM Event o Parse) Value
247parseM mergedKeys a front = do
248    me <- lift CL.head
249    case me of
250        Just EventMappingEnd -> do
251            let res = Object front
252            mapM_ (defineAnchor res) a
253            return res
254        _ -> do
255            s <- case me of
256                    Just (EventScalar v tag style a') -> parseScalar v a' style tag
257                    Just (EventAlias an) -> do
258                        m <- lookupAnchor an
259                        case m of
260                            Nothing -> liftIO $ throwIO $ UnknownAlias an
261                            Just (String t) -> return t
262                            Just v -> liftIO $ throwIO $ NonStringKeyAlias an v
263                    _ -> do
264                        path <- ask
265                        liftIO $ throwIO $ NonStringKey path
266
267            (mergedKeys', al') <- local (Key s :) $ do
268              o <- parseO
269              let al = do
270                      when (M.member s front && Set.notMember s mergedKeys) $ do
271                          path <- reverse <$> ask
272                          addWarning (DuplicateKey path)
273                      return (Set.delete s mergedKeys, M.insert s o front)
274              if s == pack "<<"
275                         then case o of
276                                  Object l  -> return (merge l)
277                                  Array l -> return $ merge $ foldl' mergeObjects M.empty $ V.toList l
278                                  _          -> al
279                         else al
280            parseM mergedKeys' a al'
281    where mergeObjects al (Object om) = M.union al om
282          mergeObjects al _           = al
283
284          merge xs = (Set.fromList (M.keys xs \\ M.keys front), M.union front xs)
285
286decodeHelper :: FromJSON a
287             => ConduitM () Y.Event Parse ()
288             -> IO (Either ParseException ([Warning], Either String a))
289decodeHelper src = do
290    -- This used to be tryAny, but the fact is that catching async
291    -- exceptions is fine here. We'll rethrow them immediately in the
292    -- otherwise clause.
293    x <- try $ runResourceT $ runStateT (runConduit $ src .| runReaderT parse []) (ParseState Map.empty [])
294    case x of
295        Left e
296            | Just pe <- fromException e -> return $ Left pe
297            | Just ye <- fromException e -> return $ Left $ InvalidYaml $ Just (ye :: YamlException)
298            | otherwise -> throwIO e
299        Right (y, st) -> return $ Right (parseStateWarnings st, parseEither parseJSON y)
300
301decodeHelper_ :: FromJSON a
302              => ConduitM () Event Parse ()
303              -> IO (Either ParseException ([Warning], a))
304decodeHelper_ src = do
305    x <- try $ runResourceT $ runStateT (runConduit $ src .| runReaderT parse []) (ParseState Map.empty [])
306    case x of
307        Left e
308            | Just pe <- fromException e -> return $ Left pe
309            | Just ye <- fromException e -> return $ Left $ InvalidYaml $ Just (ye :: YamlException)
310            | Just sae <- fromException e -> throwIO (sae :: SomeAsyncException)
311            | otherwise -> return $ Left $ OtherParseException e
312        Right (y, st) -> return $ either
313            (Left . AesonException)
314            Right
315            ((,) (parseStateWarnings st) <$> parseEither parseJSON y)
316
317type StringStyle = Text -> ( Tag, Style )
318
319-- | Encodes a string with the supplied style. This function handles the empty
320-- string case properly to avoid https://github.com/snoyberg/yaml/issues/24
321--
322-- @since 0.11.2.0
323stringScalar :: StringStyle -> Maybe Text -> Text -> Event
324stringScalar _ anchor "" = EventScalar "" NoTag SingleQuoted (T.unpack <$> anchor)
325stringScalar stringStyle anchor s = EventScalar (encodeUtf8 s) tag style (T.unpack <$> anchor)
326  where
327    ( tag, style ) = stringStyle s
328
329-- |
330-- @since 0.11.2.0
331defaultStringStyle :: StringStyle
332defaultStringStyle = \s ->
333    case () of
334      ()
335        | "\n" `T.isInfixOf` s -> ( NoTag, Literal )
336        | isSpecialString s -> ( NoTag, SingleQuoted )
337        | otherwise -> ( NoTag, PlainNoTag )
338
339-- | Determine whether a string must be quoted in YAML and can't appear as plain text.
340-- Useful if you want to use 'setStringStyle'.
341--
342-- @since 0.10.2.0
343isSpecialString :: Text -> Bool
344isSpecialString s = s `HashSet.member` specialStrings || isNumeric s
345
346-- | Strings which must be escaped so as not to be treated as non-string scalars.
347--
348-- @since 0.8.32
349specialStrings :: HashSet.HashSet Text
350specialStrings = HashSet.fromList $ T.words
351    "y Y yes Yes YES n N no No NO true True TRUE false False FALSE on On ON off Off OFF null Null NULL ~ *"
352
353-- |
354-- @since 0.8.32
355isNumeric :: Text -> Bool
356isNumeric = either (const False) (const True) . textToScientific
357
358-- | Encode a value as a YAML document stream.
359--
360-- @since 0.11.2.0
361objToStream :: ToJSON a => StringStyle -> a -> [Y.Event]
362objToStream stringStyle o =
363      (:) EventStreamStart
364    . (:) EventDocumentStart
365    $ objToEvents stringStyle o
366        [ EventDocumentEnd
367        , EventStreamEnd
368        ]
369
370-- | Encode a value as a list of 'Event's.
371--
372-- @since 0.11.2.0
373objToEvents :: ToJSON a => StringStyle -> a -> [Y.Event] -> [Y.Event]
374objToEvents stringStyle = objToEvents' . toJSON
375  where
376    objToEvents' (Array list) rest =
377        EventSequenceStart NoTag AnySequence Nothing
378      : foldr objToEvents' (EventSequenceEnd : rest) (V.toList list)
379
380    objToEvents' (Object o) rest =
381        EventMappingStart NoTag AnyMapping Nothing
382      : foldr pairToEvents (EventMappingEnd : rest) (M.toList o)
383      where
384        pairToEvents :: Pair -> [Y.Event] -> [Y.Event]
385        pairToEvents (k, v) = objToEvents' (String k) . objToEvents' v
386
387    objToEvents' (String s) rest = stringScalar stringStyle Nothing s : rest
388
389    objToEvents' Null rest = EventScalar "null" NullTag PlainNoTag Nothing : rest
390
391    objToEvents' (Bool True) rest = EventScalar "true" BoolTag PlainNoTag Nothing : rest
392    objToEvents' (Bool False) rest = EventScalar "false" BoolTag PlainNoTag Nothing : rest
393
394    objToEvents' (Number s) rest =
395      let builder
396            -- Special case the 0 exponent to remove the trailing .0
397            | base10Exponent s == 0 = BB.integerDec $ coefficient s
398            | otherwise = scientificBuilder s
399          lbs = BB.toLazyByteString builder
400          bs = BL.toStrict lbs
401       in EventScalar bs IntTag PlainNoTag Nothing : rest
402