1-- | 2-- Module : Text.Megaparsec.Error 3-- Copyright : © 2015–2019 Megaparsec contributors 4-- License : FreeBSD 5-- 6-- Maintainer : Mark Karpov <markkarpov92@gmail.com> 7-- Stability : experimental 8-- Portability : portable 9-- 10-- Parse errors. The current version of Megaparsec supports well-typed 11-- errors instead of 'String'-based ones. This gives a lot of flexibility in 12-- describing what exactly went wrong as well as a way to return arbitrary 13-- data in case of failure. 14-- 15-- You probably do not want to import this module directly because 16-- "Text.Megaparsec" re-exports it anyway. 17 18{-# LANGUAGE BangPatterns #-} 19{-# LANGUAGE CPP #-} 20{-# LANGUAGE DeriveDataTypeable #-} 21{-# LANGUAGE DeriveFunctor #-} 22{-# LANGUAGE DeriveGeneric #-} 23{-# LANGUAGE FlexibleContexts #-} 24{-# LANGUAGE FlexibleInstances #-} 25{-# LANGUAGE LambdaCase #-} 26{-# LANGUAGE RecordWildCards #-} 27{-# LANGUAGE ScopedTypeVariables #-} 28{-# LANGUAGE StandaloneDeriving #-} 29{-# LANGUAGE UndecidableInstances#-} 30 31module Text.Megaparsec.Error 32 ( -- * Parse error type 33 ErrorItem (..) 34 , ErrorFancy (..) 35 , ParseError (..) 36 , mapParseError 37 , errorOffset 38 , ParseErrorBundle (..) 39 , attachSourcePos 40 -- * Pretty-printing 41 , ShowErrorComponent (..) 42 , errorBundlePretty 43 , parseErrorPretty 44 , parseErrorTextPretty ) 45where 46 47import Control.DeepSeq 48import Control.Exception 49import Control.Monad.State.Strict 50import Data.Data (Data) 51import Data.List (intercalate) 52import Data.List.NonEmpty (NonEmpty (..)) 53import Data.Maybe (isNothing) 54import Data.Proxy 55import Data.Set (Set) 56import Data.Typeable (Typeable) 57import Data.Void 58import GHC.Generics 59import Text.Megaparsec.Pos 60import Text.Megaparsec.State 61import Text.Megaparsec.Stream 62import qualified Data.List.NonEmpty as NE 63import qualified Data.Set as E 64 65#if !MIN_VERSION_base(4,11,0) 66import Data.Semigroup 67#endif 68 69---------------------------------------------------------------------------- 70-- Parse error type 71 72-- | Data type that is used to represent “unexpected\/expected” items in 73-- 'ParseError'. The data type is parametrized over the token type @t@. 74-- 75-- @since 5.0.0 76 77data ErrorItem t 78 = Tokens (NonEmpty t) -- ^ Non-empty stream of tokens 79 | Label (NonEmpty Char) -- ^ Label (cannot be empty) 80 | EndOfInput -- ^ End of input 81 deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) 82 83instance NFData t => NFData (ErrorItem t) 84 85-- | Additional error data, extendable by user. When no custom data is 86-- necessary, the type is typically indexed by 'Void' to “cancel” the 87-- 'ErrorCustom' constructor. 88-- 89-- @since 6.0.0 90 91data ErrorFancy e 92 = ErrorFail String 93 -- ^ 'fail' has been used in parser monad 94 | ErrorIndentation Ordering Pos Pos 95 -- ^ Incorrect indentation error: desired ordering between reference 96 -- level and actual level, reference indentation level, actual 97 -- indentation level 98 | ErrorCustom e 99 -- ^ Custom error data 100 deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, Functor) 101 102instance NFData a => NFData (ErrorFancy a) where 103 rnf (ErrorFail str) = rnf str 104 rnf (ErrorIndentation ord ref act) = ord `seq` rnf ref `seq` rnf act 105 rnf (ErrorCustom a) = rnf a 106 107-- | @'ParseError' s e@ represents a parse error parametrized over the 108-- stream type @s@ and the custom data @e@. 109-- 110-- 'Semigroup' and 'Monoid' instances of the data type allow to merge parse 111-- errors from different branches of parsing. When merging two 112-- 'ParseError's, the longest match is preferred; if positions are the same, 113-- custom data sets and collections of message items are combined. Note that 114-- fancy errors take precedence over trivial errors in merging. 115-- 116-- @since 7.0.0 117 118data ParseError s e 119 = TrivialError Int (Maybe (ErrorItem (Token s))) (Set (ErrorItem (Token s))) 120 -- ^ Trivial errors, generated by Megaparsec's machinery. The data 121 -- constructor includes the offset of error, unexpected token (if any), 122 -- and expected tokens. 123 -- 124 -- Type of the first argument was changed in the version /7.0.0/. 125 | FancyError Int (Set (ErrorFancy e)) 126 -- ^ Fancy, custom errors. 127 -- 128 -- Type of the first argument was changed in the version /7.0.0/. 129 deriving (Typeable, Generic) 130 131deriving instance ( Show (Token s) 132 , Show e 133 ) => Show (ParseError s e) 134 135deriving instance ( Eq (Token s) 136 , Eq e 137 ) => Eq (ParseError s e) 138 139deriving instance ( Data s 140 , Data (Token s) 141 , Ord (Token s) 142 , Data e 143 , Ord e 144 ) => Data (ParseError s e) 145 146instance ( NFData (Token s) 147 , NFData e 148 ) => NFData (ParseError s e) 149 150instance (Stream s, Ord e) => Semigroup (ParseError s e) where 151 (<>) = mergeError 152 {-# INLINE (<>) #-} 153 154instance (Stream s, Ord e) => Monoid (ParseError s e) where 155 mempty = TrivialError 0 Nothing E.empty 156 mappend = (<>) 157 {-# INLINE mappend #-} 158 159instance ( Show s 160 , Show (Token s) 161 , Show e 162 , ShowErrorComponent e 163 , Stream s 164 , Typeable s 165 , Typeable e ) 166 => Exception (ParseError s e) where 167 displayException = parseErrorPretty 168 169-- | Modify the custom data component in a parse error. This could be done 170-- via 'fmap' if not for the 'Ord' constraint. 171-- 172-- @since 7.0.0 173 174mapParseError :: Ord e' 175 => (e -> e') 176 -> ParseError s e 177 -> ParseError s e' 178mapParseError _ (TrivialError o u p) = TrivialError o u p 179mapParseError f (FancyError o x) = FancyError o (E.map (fmap f) x) 180 181-- | Get offset of given 'ParseError'. 182-- 183-- @since 7.0.0 184 185errorOffset :: ParseError s e -> Int 186errorOffset (TrivialError o _ _) = o 187errorOffset (FancyError o _) = o 188 189-- | Merge two error data structures into one joining their collections of 190-- message items and preferring the longest match. In other words, earlier 191-- error message is discarded. This may seem counter-intuitive, but 192-- 'mergeError' is only used to merge error messages of alternative branches 193-- of parsing and in this case longest match should be preferred. 194 195mergeError :: (Stream s, Ord e) 196 => ParseError s e 197 -> ParseError s e 198 -> ParseError s e 199mergeError e1 e2 = 200 case errorOffset e1 `compare` errorOffset e2 of 201 LT -> e2 202 EQ -> 203 case (e1, e2) of 204 (TrivialError s1 u1 p1, TrivialError _ u2 p2) -> 205 TrivialError s1 (n u1 u2) (E.union p1 p2) 206 (FancyError {}, TrivialError {}) -> e1 207 (TrivialError {}, FancyError {}) -> e2 208 (FancyError s1 x1, FancyError _ x2) -> 209 FancyError s1 (E.union x1 x2) 210 GT -> e1 211 where 212 -- NOTE The logic behind this merging is that since we only combine 213 -- parse errors that happen at exactly the same position, all the 214 -- unexpected items will be prefixes of input stream at that position or 215 -- labels referring to the same thing. Our aim here is to choose the 216 -- longest prefix (merging with labels and end of input is somewhat 217 -- arbitrary, but is necessary because otherwise we can't make 218 -- ParseError lawful Monoid and have nice parse errors at the same 219 -- time). 220 n Nothing Nothing = Nothing 221 n (Just x) Nothing = Just x 222 n Nothing (Just y) = Just y 223 n (Just x) (Just y) = Just (max x y) 224{-# INLINE mergeError #-} 225 226-- | A non-empty collection of 'ParseError's equipped with 'PosState' that 227-- allows to pretty-print the errors efficiently and correctly. 228-- 229-- @since 7.0.0 230 231data ParseErrorBundle s e = ParseErrorBundle 232 { bundleErrors :: NonEmpty (ParseError s e) 233 -- ^ A collection of 'ParseError's that is sorted by parse error offsets 234 , bundlePosState :: PosState s 235 -- ^ State that is used for line\/column calculation 236 } deriving (Generic) 237 238deriving instance ( Show s 239 , Show (Token s) 240 , Show e 241 ) => Show (ParseErrorBundle s e) 242 243deriving instance ( Eq s 244 , Eq (Token s) 245 , Eq e 246 ) => Eq (ParseErrorBundle s e) 247 248deriving instance ( Typeable s 249 , Typeable (Token s) 250 , Typeable e 251 ) => Typeable (ParseErrorBundle s e) 252 253deriving instance ( Data s 254 , Data (Token s) 255 , Ord (Token s) 256 , Data e 257 , Ord e 258 ) => Data (ParseErrorBundle s e) 259 260instance ( NFData s 261 , NFData (Token s) 262 , NFData e 263 ) => NFData (ParseErrorBundle s e) 264 265instance ( Show s 266 , Show (Token s) 267 , Show e 268 , ShowErrorComponent e 269 , Stream s 270 , Typeable s 271 , Typeable e 272 ) => Exception (ParseErrorBundle s e) where 273 displayException = errorBundlePretty 274 275-- | Attach 'SourcePos'es to items in a 'Traversable' container given that 276-- there is a projection allowing to get an offset per item. 277-- 278-- Items must be in ascending order with respect to their offsets. 279-- 280-- @since 7.0.0 281 282attachSourcePos 283 :: (Traversable t, Stream s) 284 => (a -> Int) -- ^ How to project offset from an item (e.g. 'errorOffset') 285 -> t a -- ^ The collection of items 286 -> PosState s -- ^ Initial 'PosState' 287 -> (t (a, SourcePos), PosState s) -- ^ The collection with 'SourcePos'es 288 -- added and the final 'PosState' 289attachSourcePos projectOffset xs = runState (traverse f xs) 290 where 291 f a = do 292 pst <- get 293 let (spos, pst') = reachOffsetNoLine (projectOffset a) pst 294 put pst' 295 return (a, spos) 296{-# INLINEABLE attachSourcePos #-} 297 298---------------------------------------------------------------------------- 299-- Pretty-printing 300 301-- | The type class defines how to print a custom component of 'ParseError'. 302-- 303-- @since 5.0.0 304 305class Ord a => ShowErrorComponent a where 306 307 -- | Pretty-print a component of 'ParseError'. 308 309 showErrorComponent :: a -> String 310 311 -- | Length of the error component in characters, used for highlighting of 312 -- parse errors in input string. 313 -- 314 -- @since 7.0.0 315 316 errorComponentLen :: a -> Int 317 errorComponentLen _ = 1 318 319instance ShowErrorComponent Void where 320 showErrorComponent = absurd 321 322-- | Pretty-print a 'ParseErrorBundle'. All 'ParseError's in the bundle will 323-- be pretty-printed in order together with the corresponding offending 324-- lines by doing a single efficient pass over the input stream. The 325-- rendered 'String' always ends with a newline. 326-- 327-- @since 7.0.0 328 329errorBundlePretty 330 :: forall s e. ( Stream s 331 , ShowErrorComponent e 332 ) 333 => ParseErrorBundle s e -- ^ Parse error bundle to display 334 -> String -- ^ Textual rendition of the bundle 335errorBundlePretty ParseErrorBundle {..} = 336 let (r, _) = foldl f (id, bundlePosState) bundleErrors 337 in drop 1 (r "") 338 where 339 f :: (ShowS, PosState s) 340 -> ParseError s e 341 -> (ShowS, PosState s) 342 f (o, !pst) e = (o . (outChunk ++), pst') 343 where 344 (epos, sline, pst') = reachOffset (errorOffset e) pst 345 outChunk = 346 "\n" <> sourcePosPretty epos <> ":\n" <> 347 padding <> "|\n" <> 348 lineNumber <> " | " <> sline <> "\n" <> 349 padding <> "| " <> rpadding <> pointer <> "\n" <> 350 parseErrorTextPretty e 351 lineNumber = (show . unPos . sourceLine) epos 352 padding = replicate (length lineNumber + 1) ' ' 353 rpadding = 354 if pointerLen > 0 355 then replicate rpshift ' ' 356 else "" 357 rpshift = unPos (sourceColumn epos) - 1 358 pointer = replicate pointerLen '^' 359 pointerLen = 360 if rpshift + elen > slineLen 361 then slineLen - rpshift + 1 362 else elen 363 slineLen = length sline 364 elen = 365 case e of 366 TrivialError _ Nothing _ -> 1 367 TrivialError _ (Just x) _ -> errorItemLength x 368 FancyError _ xs -> 369 E.foldl' (\a b -> max a (errorFancyLength b)) 1 xs 370 371-- | Pretty-print a 'ParseError'. The rendered 'String' always ends with a 372-- newline. 373-- 374-- @since 5.0.0 375 376parseErrorPretty 377 :: (Stream s, ShowErrorComponent e) 378 => ParseError s e -- ^ Parse error to render 379 -> String -- ^ Result of rendering 380parseErrorPretty e = 381 "offset=" <> show (errorOffset e) <> ":\n" <> parseErrorTextPretty e 382 383-- | Pretty-print a textual part of a 'ParseError', that is, everything 384-- except for its position. The rendered 'String' always ends with a 385-- newline. 386-- 387-- @since 5.1.0 388 389parseErrorTextPretty 390 :: forall s e. (Stream s, ShowErrorComponent e) 391 => ParseError s e -- ^ Parse error to render 392 -> String -- ^ Result of rendering 393parseErrorTextPretty (TrivialError _ us ps) = 394 if isNothing us && E.null ps 395 then "unknown parse error\n" 396 else messageItemsPretty "unexpected " (showErrorItem pxy `E.map` maybe E.empty E.singleton us) <> 397 messageItemsPretty "expecting " (showErrorItem pxy `E.map` ps) 398 where 399 pxy = Proxy :: Proxy s 400parseErrorTextPretty (FancyError _ xs) = 401 if E.null xs 402 then "unknown fancy parse error\n" 403 else unlines (showErrorFancy <$> E.toAscList xs) 404 405---------------------------------------------------------------------------- 406-- Helpers 407 408-- | Pretty-print an 'ErrorItem'. 409 410showErrorItem :: Stream s => Proxy s -> ErrorItem (Token s) -> String 411showErrorItem pxy = \case 412 Tokens ts -> showTokens pxy ts 413 Label label -> NE.toList label 414 EndOfInput -> "end of input" 415 416-- | Get length of the “pointer” to display under a given 'ErrorItem'. 417 418errorItemLength :: ErrorItem t -> Int 419errorItemLength = \case 420 Tokens ts -> NE.length ts 421 _ -> 1 422 423-- | Pretty-print an 'ErrorFancy'. 424 425showErrorFancy :: ShowErrorComponent e => ErrorFancy e -> String 426showErrorFancy = \case 427 ErrorFail msg -> msg 428 ErrorIndentation ord ref actual -> 429 "incorrect indentation (got " <> show (unPos actual) <> 430 ", should be " <> p <> show (unPos ref) <> ")" 431 where 432 p = case ord of 433 LT -> "less than " 434 EQ -> "equal to " 435 GT -> "greater than " 436 ErrorCustom a -> showErrorComponent a 437 438-- | Get length of the “pointer” to display under a given 'ErrorFancy'. 439 440errorFancyLength :: ShowErrorComponent e => ErrorFancy e -> Int 441errorFancyLength = \case 442 ErrorCustom a -> errorComponentLen a 443 _ -> 1 444 445-- | Transforms a list of error messages into their textual representation. 446 447messageItemsPretty 448 :: String -- ^ Prefix to prepend 449 -> Set String -- ^ Collection of messages 450 -> String -- ^ Result of rendering 451messageItemsPretty prefix ts 452 | E.null ts = "" 453 | otherwise = 454 prefix <> (orList . NE.fromList . E.toAscList) ts <> "\n" 455 456-- | Print a pretty list where items are separated with commas and the word 457-- “or” according to the rules of English punctuation. 458 459orList :: NonEmpty String -> String 460orList (x:|[]) = x 461orList (x:|[y]) = x <> " or " <> y 462orList xs = intercalate ", " (NE.init xs) <> ", or " <> NE.last xs 463