1{-# LANGUAGE CPP #-}
2{-# LANGUAGE FlexibleContexts #-}
3{-# LANGUAGE FlexibleInstances #-}
4{-# LANGUAGE GeneralizedNewtypeDeriving #-}
5{-# LANGUAGE MultiParamTypeClasses #-}
6{-# LANGUAGE ScopedTypeVariables #-}
7{-# LANGUAGE UndecidableInstances #-}
8#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__ >= 702
9{-# LANGUAGE Trustworthy #-}
10#endif
11{-# OPTIONS_GHC -fspec-constr -fspec-constr-count=8 #-}
12-----------------------------------------------------------------------------
13-- |
14-- Module      :  Text.Parser.Token
15-- Copyright   :  (c) Edward Kmett 2011
16--                (c) Daan Leijen 1999-2001
17-- License     :  BSD3
18--
19-- Maintainer  :  ekmett@gmail.com
20-- Stability   :  experimental
21-- Portability :  non-portable
22--
23-- Parsers that comprehend whitespace and identifier styles
24--
25-- > idStyle    = haskellIdents { styleReserved = ... }
26-- > identifier = ident idStyle
27-- > reserved   = reserve idStyle
28--
29-----------------------------------------------------------------------------
30module Text.Parser.Token
31  (
32  -- * Token Parsing
33    whiteSpace      -- :: TokenParsing m => m ()
34  , charLiteral     -- :: TokenParsing m => m Char
35  , stringLiteral   -- :: (TokenParsing m, IsString s) => m s
36  , stringLiteral'  -- :: (TokenParsing m, IsString s) => m s
37  , natural         -- :: TokenParsing m => m Integer
38  , integer         -- :: TokenParsing m => m Integer
39  , double          -- :: TokenParsing m => m Double
40  , naturalOrDouble -- :: TokenParsing m => m (Either Integer Double)
41  , integerOrDouble -- :: TokenParsing m => m (Either Integer Double)
42  , scientific      -- :: TokenParsing m => m Scientific
43  , naturalOrScientific -- :: TokenParsing m => m (Either Integer Scientific)
44  , integerOrScientific -- :: TokenParsing m => m (Either Integer Scientific)
45  , symbol          -- :: TokenParsing m => String -> m String
46  , textSymbol      -- :: TokenParsing m => Text -> m Text
47  , symbolic        -- :: TokenParsing m => Char -> m Char
48  , parens          -- :: TokenParsing m => m a -> m a
49  , braces          -- :: TokenParsing m => m a -> m a
50  , angles          -- :: TokenParsing m => m a -> m a
51  , brackets        -- :: TokenParsing m => m a -> m a
52  , comma           -- :: TokenParsing m => m Char
53  , colon           -- :: TokenParsing m => m Char
54  , dot             -- :: TokenParsing m => m Char
55  , semiSep         -- :: TokenParsing m => m a -> m [a]
56  , semiSep1        -- :: TokenParsing m => m a -> m [a]
57  , commaSep        -- :: TokenParsing m => m a -> m [a]
58  , commaSep1       -- :: TokenParsing m => m a -> m [a]
59  -- ** Token Parsing Class
60  , TokenParsing(..)
61  -- ** Token Parsing Transformers
62  , Unspaced(..)
63  , Unlined(..)
64  , Unhighlighted(..)
65  -- ** /Non-Token/ Parsers
66  , decimal       -- :: TokenParsing m => m Integer
67  , hexadecimal   -- :: TokenParsing m => m Integer
68  , octal         -- :: TokenParsing m => m Integer
69  , characterChar -- :: TokenParsing m => m Char
70  , integer'      -- :: TokenParsing m => m Integer
71  -- * Identifiers
72  , IdentifierStyle(..)
73  , liftIdentifierStyle -- :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m)
74  , ident           -- :: (TokenParsing m, IsString s) => IdentifierStyle m -> m s
75  , reserve         -- :: TokenParsing m => IdentifierStyle m -> String -> m ()
76  , reserveText     -- :: TokenParsing m => IdentifierStyle m -> Text -> m ()
77  -- ** Lenses and Traversals
78  , styleName
79  , styleStart
80  , styleLetter
81  , styleChars
82  , styleReserved
83  , styleHighlight
84  , styleReservedHighlight
85  , styleHighlights
86  ) where
87
88import Control.Applicative
89import Control.Monad (MonadPlus(..), when)
90import Control.Monad.Trans.Class
91import Control.Monad.Trans.State.Lazy as Lazy
92import Control.Monad.Trans.State.Strict as Strict
93import Control.Monad.Trans.Writer.Lazy as Lazy
94import Control.Monad.Trans.Writer.Strict as Strict
95import Control.Monad.Trans.RWS.Lazy as Lazy
96import Control.Monad.Trans.RWS.Strict as Strict
97import Control.Monad.Trans.Reader
98import Control.Monad.Trans.Identity
99import Control.Monad.State.Class as Class
100import Control.Monad.Reader.Class as Class
101import Control.Monad.Writer.Class as Class
102import Data.Char
103import Data.Functor.Identity
104import qualified Data.HashSet as HashSet
105import Data.HashSet (HashSet)
106import Data.List (foldl', transpose)
107#if __GLASGOW_HASKELL__ < 710
108import Data.Monoid
109#endif
110import Data.Scientific ( Scientific )
111import qualified Data.Scientific as Sci
112import Data.String
113import Data.Text hiding (empty,zip,foldl',take,map,length,splitAt,null,transpose)
114import Numeric (showIntAtBase)
115import qualified Text.ParserCombinators.ReadP as ReadP
116import Text.Parser.Char
117import Text.Parser.Combinators
118import Text.Parser.Token.Highlight
119
120#ifdef MIN_VERSION_parsec
121import qualified Text.Parsec as Parsec
122#endif
123
124#ifdef MIN_VERSION_attoparsec
125import qualified Data.Attoparsec.Types as Att
126#endif
127
128-- | Skip zero or more bytes worth of white space. More complex parsers are
129-- free to consider comments as white space.
130whiteSpace :: TokenParsing m => m ()
131whiteSpace = someSpace <|> pure ()
132{-# INLINE whiteSpace #-}
133
134-- | This token parser parses a single literal character. Returns the
135-- literal character value. This parsers deals correctly with escape
136-- sequences. The literal character is parsed according to the grammar
137-- rules defined in the Haskell report (which matches most programming
138-- languages quite closely).
139charLiteral :: forall m. TokenParsing m => m Char
140charLiteral = token (highlight CharLiteral lit) where
141  lit :: m Char
142  lit = between (char '\'') (char '\'' <?> "end of character") characterChar
143    <?> "character"
144{-# INLINE charLiteral #-}
145
146-- | This token parser parses a literal string. Returns the literal
147-- string value. This parsers deals correctly with escape sequences and
148-- gaps. The literal string is parsed according to the grammar rules
149-- defined in the Haskell report (which matches most programming
150-- languages quite closely).
151stringLiteral :: forall m s. (TokenParsing m, IsString s) => m s
152stringLiteral = fromString <$> token (highlight StringLiteral lit) where
153  lit :: m [Char]
154  lit = Prelude.foldr (maybe id (:)) ""
155    <$> between (char '"') (char '"' <?> "end of string") (many stringChar)
156    <?> "string"
157
158  stringChar :: m (Maybe Char)
159  stringChar = Just <$> stringLetter
160           <|> stringEscape
161       <?> "string character"
162
163  stringLetter :: m Char
164  stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
165
166  stringEscape :: m (Maybe Char)
167  stringEscape = highlight EscapeCode $ char '\\' *> esc where
168    esc :: m (Maybe Char)
169    esc = Nothing <$ escapeGap
170      <|> Nothing <$ escapeEmpty
171      <|> Just <$> escapeCode
172
173  escapeEmpty, escapeGap :: m Char
174  escapeEmpty = char '&'
175  escapeGap = skipSome space *> (char '\\' <?> "end of string gap")
176{-# INLINE stringLiteral #-}
177
178-- | This token parser behaves as 'stringLiteral', but for single-quoted
179-- strings.
180stringLiteral' :: forall m s. (TokenParsing m, IsString s) => m s
181stringLiteral' = fromString <$> token (highlight StringLiteral lit) where
182  lit :: m [Char]
183  lit = Prelude.foldr (maybe id (:)) ""
184    <$> between (char '\'') (char '\'' <?> "end of string") (many stringChar)
185    <?> "string"
186
187  stringChar :: m (Maybe Char)
188  stringChar = Just <$> stringLetter
189           <|> stringEscape
190       <?> "string character"
191
192  stringLetter :: m Char
193  stringLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
194
195  stringEscape :: m (Maybe Char)
196  stringEscape = highlight EscapeCode $ char '\\' *> esc where
197    esc :: m (Maybe Char)
198    esc = Nothing <$ escapeGap
199      <|> Nothing <$ escapeEmpty
200      <|> Just <$> escapeCode
201
202  escapeEmpty, escapeGap :: m Char
203  escapeEmpty = char '&'
204  escapeGap = skipSome space *> (char '\\' <?> "end of string gap")
205{-# INLINE stringLiteral' #-}
206
207-- | This token parser parses a natural number (a non-negative whole
208-- number). Returns the value of the number. The number can be
209-- specified in 'decimal', 'hexadecimal' or
210-- 'octal'. The number is parsed according to the grammar
211-- rules in the Haskell report.
212natural :: TokenParsing m => m Integer
213natural = token natural'
214{-# INLINE natural #-}
215
216-- | This token parser parses an integer (a whole number). This parser
217-- is like 'natural' except that it can be prefixed with
218-- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The
219-- number can be specified in 'decimal', 'hexadecimal'
220-- or 'octal'. The number is parsed according
221-- to the grammar rules in the Haskell report.
222integer :: forall m. TokenParsing m => m Integer
223integer = token (token (highlight Operator sgn <*> natural')) <?> "integer"
224  where
225  sgn :: m (Integer -> Integer)
226  sgn = negate <$ char '-'
227    <|> id <$ char '+'
228    <|> pure id
229{-# INLINE integer #-}
230
231-- | This token parser parses a floating point value. Returns the value
232-- of the number. The number is parsed according to the grammar rules
233-- defined in the Haskell report.
234double :: TokenParsing m => m Double
235double = token (highlight Number (Sci.toRealFloat <$> floating) <?> "double")
236{-# INLINE double #-}
237
238-- | This token parser parses either 'natural' or a 'float'.
239-- Returns the value of the number. This parsers deals with
240-- any overlap in the grammar rules for naturals and floats. The number
241-- is parsed according to the grammar rules defined in the Haskell report.
242naturalOrDouble :: TokenParsing m => m (Either Integer Double)
243naturalOrDouble = fmap Sci.toRealFloat <$> naturalOrScientific
244{-# INLINE naturalOrDouble #-}
245
246-- | This token parser is like 'naturalOrDouble', but handles
247-- leading @-@ or @+@.
248integerOrDouble :: TokenParsing m => m (Either Integer Double)
249integerOrDouble = fmap Sci.toRealFloat <$> integerOrScientific
250{-# INLINE integerOrDouble #-}
251
252-- | This token parser parses a floating point value. Returns the value
253-- of the number. The number is parsed according to the grammar rules
254-- defined in the Haskell report.
255scientific :: TokenParsing m => m Scientific
256scientific = token (highlight Number floating <?> "scientific")
257{-# INLINE scientific #-}
258
259-- | This token parser parses either 'natural' or a 'scientific'.
260-- Returns the value of the number. This parsers deals with
261-- any overlap in the grammar rules for naturals and floats. The number
262-- is parsed according to the grammar rules defined in the Haskell report.
263naturalOrScientific :: TokenParsing m => m (Either Integer Scientific)
264naturalOrScientific = token (highlight Number natFloating <?> "number")
265{-# INLINE naturalOrScientific #-}
266
267-- | This token parser is like 'naturalOrScientific', but handles
268-- leading @-@ or @+@.
269integerOrScientific :: forall m. TokenParsing m => m (Either Integer Scientific)
270integerOrScientific = token (highlight Number ios <?> "number")
271  where ios :: m (Either Integer Scientific)
272        ios = mneg <$> optional (oneOf "+-") <*> natFloating
273
274        mneg (Just '-') nd = either (Left . negate) (Right . negate) nd
275        mneg _          nd = nd
276{-# INLINE integerOrScientific #-}
277
278
279-- | Token parser @symbol s@ parses 'string' @s@ and skips
280-- trailing white space.
281symbol :: TokenParsing m => String -> m String
282symbol name = token (highlight Symbol (string name))
283{-# INLINE symbol #-}
284
285-- | Token parser @textSymbol t@ parses 'text' @s@ and skips
286-- trailing white space.
287textSymbol :: TokenParsing m => Text -> m Text
288textSymbol name = token (highlight Symbol (text name))
289{-# INLINE textSymbol #-}
290
291-- | Token parser @symbolic s@ parses 'char' @s@ and skips
292-- trailing white space.
293symbolic :: TokenParsing m => Char -> m Char
294symbolic name = token (highlight Symbol (char name))
295{-# INLINE symbolic #-}
296
297-- | Token parser @parens p@ parses @p@ enclosed in parenthesis,
298-- returning the value of @p@.
299parens :: TokenParsing m => m a -> m a
300parens = nesting . between (symbolic '(') (symbolic ')')
301{-# INLINE parens #-}
302
303-- | Token parser @braces p@ parses @p@ enclosed in braces (\'{\' and
304-- \'}\'), returning the value of @p@.
305braces :: TokenParsing m => m a -> m a
306braces = nesting . between (symbolic '{') (symbolic '}')
307{-# INLINE braces #-}
308
309-- | Token parser @angles p@ parses @p@ enclosed in angle brackets (\'\<\'
310-- and \'>\'), returning the value of @p@.
311angles :: TokenParsing m => m a -> m a
312angles = nesting . between (symbolic '<') (symbolic '>')
313{-# INLINE angles #-}
314
315-- | Token parser @brackets p@ parses @p@ enclosed in brackets (\'[\'
316-- and \']\'), returning the value of @p@.
317brackets :: TokenParsing m => m a -> m a
318brackets = nesting . between (symbolic '[') (symbolic ']')
319{-# INLINE brackets #-}
320
321-- | Token parser @comma@ parses the character \',\' and skips any
322-- trailing white space. Returns the string \",\".
323comma :: TokenParsing m => m Char
324comma = symbolic ','
325{-# INLINE comma #-}
326
327-- | Token parser @colon@ parses the character \':\' and skips any
328-- trailing white space. Returns the string \":\".
329colon :: TokenParsing m => m Char
330colon = symbolic ':'
331{-# INLINE colon #-}
332
333-- | Token parser @dot@ parses the character \'.\' and skips any
334-- trailing white space. Returns the string \".\".
335dot :: TokenParsing m => m Char
336dot = symbolic '.'
337{-# INLINE dot #-}
338
339-- | Token parser @semiSep p@ parses /zero/ or more occurrences of @p@
340-- separated by 'semi'. Returns a list of values returned by @p@.
341semiSep :: TokenParsing m => m a -> m [a]
342semiSep p = sepBy p semi
343{-# INLINE semiSep #-}
344
345-- | Token parser @semiSep1 p@ parses /one/ or more occurrences of @p@
346-- separated by 'semi'. Returns a list of values returned by @p@.
347semiSep1 :: TokenParsing m => m a -> m [a]
348semiSep1 p = sepBy1 p semi
349{-# INLINE semiSep1 #-}
350
351-- | Token parser @commaSep p@ parses /zero/ or more occurrences of
352-- @p@ separated by 'comma'. Returns a list of values returned
353-- by @p@.
354commaSep :: TokenParsing m => m a -> m [a]
355commaSep p = sepBy p comma
356{-# INLINE commaSep #-}
357
358-- | Token parser @commaSep1 p@ parses /one/ or more occurrences of
359-- @p@ separated by 'comma'. Returns a list of values returned
360-- by @p@.
361commaSep1 :: TokenParsing m => m a -> m [a]
362commaSep1 p = sepBy1 p comma
363{-# INLINE commaSep1 #-}
364
365-- | Additional functionality that is needed to tokenize input while ignoring whitespace.
366class CharParsing m => TokenParsing m where
367  -- | Usually, someSpace consists of /one/ or more occurrences of a 'space'.
368  -- Some parsers may choose to recognize line comments or block (multi line)
369  -- comments as white space as well.
370  someSpace :: m ()
371  someSpace = skipSome (satisfy isSpace)
372  {-# INLINE someSpace #-}
373
374  -- | Called when we enter a nested pair of symbols.
375  -- Overloadable to enable disabling layout
376  nesting :: m a -> m a
377  nesting = id
378  {-# INLINE nesting #-}
379
380  -- | The token parser |semi| parses the character \';\' and skips
381  -- any trailing white space. Returns the character \';\'. Overloadable to
382  -- permit automatic semicolon insertion or Haskell-style layout.
383  semi :: m Char
384  semi = token (satisfy (';'==) <?> ";")
385  {-# INLINE semi #-}
386
387  -- | Tag a region of parsed text with a bit of semantic information.
388  -- Most parsers won't use this, but it is indispensible for highlighters.
389  highlight :: Highlight -> m a -> m a
390  highlight _ a = a
391  {-# INLINE highlight #-}
392
393  -- | @token p@ first applies parser @p@ and then the 'whiteSpace'
394  -- parser, returning the value of @p@. Every lexical
395  -- token (token) is defined using @token@, this way every parse
396  -- starts at a point without white space. Parsers that use @token@ are
397  -- called /token/ parsers in this document.
398  --
399  -- The only point where the 'whiteSpace' parser should be
400  -- called explicitly is the start of the main parser in order to skip
401  -- any leading white space.
402  --
403  -- Alternatively, one might define 'token' as first parsing 'whiteSpace'
404  -- and then parser @p@.  By parsing whiteSpace first, the parser is able
405  -- to return before parsing additional whiteSpace, improving laziness.
406  --
407  -- > mainParser  = sum <$ whiteSpace <*> many (token digit) <* eof
408  token :: m a -> m a
409  token p = p <* (someSpace <|> pure ())
410
411instance (TokenParsing m, MonadPlus m) => TokenParsing (Lazy.StateT s m) where
412  nesting (Lazy.StateT m) = Lazy.StateT $ nesting . m
413  {-# INLINE nesting #-}
414  someSpace = lift someSpace
415  {-# INLINE someSpace #-}
416  semi      = lift semi
417  {-# INLINE semi #-}
418  highlight h (Lazy.StateT m) = Lazy.StateT $ highlight h . m
419  {-# INLINE highlight #-}
420
421instance (TokenParsing m, MonadPlus m) => TokenParsing (Strict.StateT s m) where
422  nesting (Strict.StateT m) = Strict.StateT $ nesting . m
423  {-# INLINE nesting #-}
424  someSpace = lift someSpace
425  {-# INLINE someSpace #-}
426  semi      = lift semi
427  {-# INLINE semi #-}
428  highlight h (Strict.StateT m) = Strict.StateT $ highlight h . m
429  {-# INLINE highlight #-}
430
431instance (TokenParsing m, MonadPlus m) => TokenParsing (ReaderT e m) where
432  nesting (ReaderT m) = ReaderT $ nesting . m
433  {-# INLINE nesting #-}
434  someSpace = lift someSpace
435  {-# INLINE someSpace #-}
436  semi      = lift semi
437  {-# INLINE semi #-}
438  highlight h (ReaderT m) = ReaderT $ highlight h . m
439  {-# INLINE highlight #-}
440
441instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.WriterT w m) where
442  nesting (Strict.WriterT m) = Strict.WriterT $ nesting m
443  {-# INLINE nesting #-}
444  someSpace = lift someSpace
445  {-# INLINE someSpace #-}
446  semi      = lift semi
447  {-# INLINE semi #-}
448  highlight h (Strict.WriterT m) = Strict.WriterT $ highlight h m
449  {-# INLINE highlight #-}
450
451instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.WriterT w m) where
452  nesting (Lazy.WriterT m) = Lazy.WriterT $ nesting m
453  {-# INLINE nesting #-}
454  someSpace = lift someSpace
455  {-# INLINE someSpace #-}
456  semi      = lift semi
457  {-# INLINE semi #-}
458  highlight h (Lazy.WriterT m) = Lazy.WriterT $ highlight h m
459  {-# INLINE highlight #-}
460
461instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Lazy.RWST r w s m) where
462  nesting (Lazy.RWST m) = Lazy.RWST $ \r s -> nesting (m r s)
463  {-# INLINE nesting #-}
464  someSpace = lift someSpace
465  {-# INLINE someSpace #-}
466  semi      = lift semi
467  {-# INLINE semi #-}
468  highlight h (Lazy.RWST m) = Lazy.RWST $ \r s -> highlight h (m r s)
469  {-# INLINE highlight #-}
470
471instance (TokenParsing m, MonadPlus m, Monoid w) => TokenParsing (Strict.RWST r w s m) where
472  nesting (Strict.RWST m) = Strict.RWST $ \r s -> nesting (m r s)
473  {-# INLINE nesting #-}
474  someSpace = lift someSpace
475  {-# INLINE someSpace #-}
476  semi      = lift semi
477  {-# INLINE semi #-}
478  highlight h (Strict.RWST m) = Strict.RWST $ \r s -> highlight h (m r s)
479  {-# INLINE highlight #-}
480
481instance (TokenParsing m, MonadPlus m) => TokenParsing (IdentityT m) where
482  nesting = IdentityT . nesting . runIdentityT
483  {-# INLINE nesting #-}
484  someSpace = lift someSpace
485  {-# INLINE someSpace #-}
486  semi      = lift semi
487  {-# INLINE semi #-}
488  highlight h = IdentityT . highlight h . runIdentityT
489  {-# INLINE highlight #-}
490
491-- | Used to describe an input style for constructors, values, operators, etc.
492data IdentifierStyle m = IdentifierStyle
493  { _styleName              :: String
494  , _styleStart             :: m Char
495  , _styleLetter            :: m Char
496  , _styleReserved          :: HashSet String
497  , _styleHighlight         :: Highlight
498  , _styleReservedHighlight :: Highlight
499  }
500
501-- | This lens can be used to update the name for this style of identifier.
502--
503-- @'styleName' :: Lens' ('IdentifierStyle' m) 'String'@
504styleName :: Functor f => (String -> f String) -> IdentifierStyle m -> f (IdentifierStyle m)
505styleName f is = (\n -> is { _styleName = n }) <$> f (_styleName is)
506{-# INLINE styleName #-}
507
508-- | This lens can be used to update the action used to recognize the first letter in an identifier.
509--
510-- @'styleStart' :: Lens' ('IdentifierStyle' m) (m 'Char')@
511styleStart :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m)
512styleStart f is = (\n -> is { _styleStart = n }) <$> f (_styleStart is)
513{-# INLINE styleStart #-}
514
515-- | This lens can be used to update the action used to recognize subsequent letters in an identifier.
516--
517-- @'styleLetter' :: Lens' ('IdentifierStyle' m) (m 'Char')@
518styleLetter :: Functor f => (m Char -> f (m Char)) -> IdentifierStyle m -> f (IdentifierStyle m)
519styleLetter f is = (\n -> is { _styleLetter = n }) <$> f (_styleLetter is)
520{-# INLINE styleLetter #-}
521
522-- | This is a traversal of both actions in contained in an 'IdentifierStyle'.
523--
524-- @'styleChars' :: Traversal ('IdentifierStyle' m) ('IdentifierStyle' n) (m 'Char') (n 'Char')@
525styleChars :: Applicative f => (m Char -> f (n Char)) -> IdentifierStyle m -> f (IdentifierStyle n)
526styleChars f is = (\n m -> is { _styleStart = n, _styleLetter = m }) <$> f (_styleStart is) <*> f (_styleLetter is)
527{-# INLINE styleChars #-}
528
529-- | This is a lens that can be used to modify the reserved identifier set.
530--
531-- @'styleReserved' :: Lens' ('IdentifierStyle' m) ('HashSet' 'String')@
532styleReserved :: Functor f => (HashSet String -> f (HashSet String)) -> IdentifierStyle m -> f (IdentifierStyle m)
533styleReserved f is = (\n -> is { _styleReserved = n }) <$> f (_styleReserved is)
534{-# INLINE styleReserved #-}
535
536-- | This is a lens that can be used to modify the highlight used for this identifier set.
537--
538-- @'styleHighlight' :: Lens' ('IdentifierStyle' m) 'Highlight'@
539styleHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m)
540styleHighlight f is = (\n -> is { _styleHighlight = n }) <$> f (_styleHighlight is)
541{-# INLINE styleHighlight #-}
542
543-- | This is a lens that can be used to modify the highlight used for reserved identifiers in this identifier set.
544--
545-- @'styleReservedHighlight' :: Lens' ('IdentifierStyle' m) 'Highlight'@
546styleReservedHighlight :: Functor f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m)
547styleReservedHighlight f is = (\n -> is { _styleReservedHighlight = n }) <$> f (_styleReservedHighlight is)
548{-# INLINE styleReservedHighlight #-}
549
550-- | This is a traversal that can be used to modify the highlights used for both non-reserved and reserved identifiers in this identifier set.
551--
552-- @'styleHighlights' :: Traversal' ('IdentifierStyle' m) 'Highlight'@
553styleHighlights :: Applicative f => (Highlight -> f Highlight) -> IdentifierStyle m -> f (IdentifierStyle m)
554styleHighlights f is = (\n m -> is { _styleHighlight = n, _styleReservedHighlight = m }) <$> f (_styleHighlight is) <*> f (_styleReservedHighlight is)
555{-# INLINE styleHighlights #-}
556
557-- | Lift an identifier style into a monad transformer
558--
559-- Using @over@ from the @lens@ package:
560--
561-- @'liftIdentifierStyle' = over 'styleChars' 'lift'@
562liftIdentifierStyle :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m)
563liftIdentifierStyle = runIdentity . styleChars (Identity . lift)
564{-# INLINE liftIdentifierStyle #-}
565
566-- | parse a reserved operator or identifier using a given style
567reserve :: (TokenParsing m, Monad m) => IdentifierStyle m -> String -> m ()
568reserve s name = token $ try $ do
569   _ <- highlight (_styleReservedHighlight s) $ string name
570   notFollowedBy (_styleLetter s) <?> "end of " ++ show name
571{-# INLINE reserve #-}
572
573-- | parse a reserved operator or identifier using a given style given 'Text'.
574reserveText :: (TokenParsing m, Monad m) => IdentifierStyle m -> Text -> m ()
575reserveText s name = token $ try $ do
576   _ <- highlight (_styleReservedHighlight s) $ text name
577   notFollowedBy (_styleLetter s) <?> "end of " ++ show name
578{-# INLINE reserveText #-}
579
580-- | Parse a non-reserved identifier or symbol
581ident :: (TokenParsing m, Monad m, IsString s) => IdentifierStyle m -> m s
582ident s = fmap fromString $ token $ try $ do
583  name <- highlight (_styleHighlight s)
584          ((:) <$> _styleStart s <*> many (_styleLetter s) <?> _styleName s)
585  when (HashSet.member name (_styleReserved s)) $ unexpected $ "reserved " ++ _styleName s ++ " " ++ show name
586  return name
587{-# INLINE ident #-}
588
589-- * Utilities
590
591-- | This parser parses a character literal without the surrounding quotation marks.
592--
593-- This parser does NOT swallow trailing whitespace
594
595characterChar :: TokenParsing m => m Char
596
597charEscape, charLetter :: TokenParsing m => m Char
598characterChar = charLetter <|> charEscape <?> "literal character"
599{-# INLINE characterChar #-}
600
601charEscape = highlight EscapeCode $ char '\\' *> escapeCode
602{-# INLINE charEscape #-}
603
604charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
605{-# INLINE charLetter #-}
606
607-- | This parser parses a literal string. Returns the literal
608-- string value. This parsers deals correctly with escape sequences and
609-- gaps. The literal string is parsed according to the grammar rules
610-- defined in the Haskell report (which matches most programming
611-- languages quite closely).
612--
613-- This parser does NOT swallow trailing whitespace
614
615escapeCode :: forall m. TokenParsing m => m Char
616escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) <?> "escape code"
617  where
618  charControl, charNum :: m Char
619  charControl = (\c -> toEnum (fromEnum c - fromEnum '@')) <$> (char '^' *> (upper <|> char '@'))
620  charNum = toEnum <$> num
621    where
622      num :: m Int
623      num = bounded 10 maxchar
624        <|> (char 'o' *> bounded 8 maxchar)
625        <|> (char 'x' *> bounded 16 maxchar)
626      maxchar = fromEnum (maxBound :: Char)
627
628  bounded :: Int -> Int -> m Int
629  bounded base bnd = foldl' (\x d -> base * x + digitToInt d) 0
630                 <$> bounded' (take base thedigits) (map digitToInt $ showIntAtBase base intToDigit bnd "")
631    where
632      thedigits :: [m Char]
633      thedigits = map char ['0'..'9'] ++ map oneOf (transpose [['A'..'F'],['a'..'f']])
634
635      toomuch :: m a
636      toomuch = unexpected "out-of-range numeric escape sequence"
637
638      bounded', bounded'' :: [m Char] -> [Int] -> m [Char]
639      bounded' dps@(zero:_) bds = skipSome zero *> ([] <$ notFollowedBy (choice dps) <|> bounded'' dps bds)
640                              <|> bounded'' dps bds
641      bounded' []           _   = error "bounded called with base 0"
642      bounded'' dps []         = [] <$ notFollowedBy (choice dps) <|> toomuch
643      bounded'' dps (bd : bds) = let anyd :: m Char
644                                     anyd = choice dps
645
646                                     nomore :: m ()
647                                     nomore = notFollowedBy anyd <|> toomuch
648                                     (low, ex : high) = splitAt bd dps
649                                  in ((:) <$> choice low <*> atMost (length bds) anyd) <* nomore
650                                     <|> ((:) <$> ex <*> ([] <$ nomore <|> bounded'' dps bds))
651                                     <|> if not (null bds)
652                                            then (:) <$> choice high <*> atMost (length bds - 1) anyd <* nomore
653                                            else empty
654      atMost n p | n <= 0    = pure []
655                 | otherwise = ((:) <$> p <*> atMost (n - 1) p) <|> pure []
656
657  charEsc :: m Char
658  charEsc = choice $ parseEsc <$> escMap
659
660  parseEsc (c,code) = code <$ char c
661  escMap = zip "abfnrtv\\\"\'" "\a\b\f\n\r\t\v\\\"\'"
662
663  charAscii :: m Char
664  charAscii = choice $ parseAscii <$> asciiMap
665
666  parseAscii (asc,code) = try $ code <$ string asc
667  asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
668  ascii2codes, ascii3codes :: [String]
669  ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO"
670                , "SI","EM","FS","GS","RS","US","SP"]
671  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK"
672                ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK"
673                ,"SYN","ETB","CAN","SUB","ESC","DEL"]
674  ascii2, ascii3 :: String
675  ascii2 = "\BS\HT\LF\VT\FF\CR\SO\SI\EM\FS\GS\RS\US\SP"
676  ascii3 = "\NUL\SOH\STX\ETX\EOT\ENQ\ACK\BEL\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\SUB\ESC\DEL"
677
678-- | This parser parses a natural number (a non-negative whole
679-- number). Returns the value of the number. The number can be
680-- specified in 'decimal', 'hexadecimal' or
681-- 'octal'. The number is parsed according to the grammar
682-- rules in the Haskell report.
683--
684-- This parser does NOT swallow trailing whitespace.
685natural' :: TokenParsing m => m Integer
686natural' = highlight Number nat <?> "natural"
687
688number :: TokenParsing m => Integer -> m Char -> m Integer
689number base baseDigit =
690  foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 <$> some baseDigit
691
692-- | This parser parses an integer (a whole number). This parser
693-- is like 'natural' except that it can be prefixed with
694-- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The
695-- number can be specified in 'decimal', 'hexadecimal'
696-- or 'octal'. The number is parsed according
697-- to the grammar rules in the Haskell report.
698--
699-- This parser does NOT swallow trailing whitespace.
700--
701-- Also, unlike the 'integer' parser, this parser does not admit spaces
702-- between the sign and the number.
703
704integer' :: TokenParsing m => m Integer
705integer' = int <?> "integer"
706{-# INLINE integer' #-}
707
708sign :: TokenParsing m => m (Integer -> Integer)
709sign = highlight Operator
710     $ negate <$ char '-'
711   <|> id <$ char '+'
712   <|> pure id
713
714int :: TokenParsing m => m Integer
715int = {-token-} sign <*> highlight Number nat
716nat, zeroNumber :: TokenParsing m => m Integer
717nat = zeroNumber <|> decimal
718zeroNumber = char '0' *> (hexadecimal <|> octal <|> decimal <|> pure 0) <?> ""
719
720floating :: TokenParsing m => m Scientific
721floating = decimal <**> fractExponent
722{-# INLINE floating #-}
723
724fractExponent :: forall m. TokenParsing m => m (Integer -> Scientific)
725fractExponent = (\fract expo n -> (fromInteger n + fract) * expo) <$> fraction <*> option 1 exponent'
726            <|> (\expo n -> fromInteger n * expo) <$> exponent'
727 where
728  fraction :: m Scientific
729  fraction = foldl' op 0 <$> (char '.' *> (some digit <?> "fraction"))
730
731  op f d = f + Sci.scientific (fromIntegral (digitToInt d)) (Sci.base10Exponent f - 1)
732
733  exponent' :: m Scientific
734  exponent' = ((\f e -> power (f e)) <$ oneOf "eE" <*> sign <*> (decimal <?> "exponent")) <?> "exponent"
735
736  power = Sci.scientific 1 . fromInteger
737
738
739natFloating, zeroNumFloat, decimalFloat :: TokenParsing m => m (Either Integer Scientific)
740natFloating
741    = char '0' *> zeroNumFloat
742  <|> decimalFloat
743zeroNumFloat
744    = Left <$> (hexadecimal <|> octal)
745  <|> decimalFloat
746  <|> pure 0 <**> try fractFloat
747  <|> pure (Left 0)
748decimalFloat = decimal <**> option Left (try fractFloat)
749
750fractFloat :: TokenParsing m => m (Integer -> Either Integer Scientific)
751fractFloat = (Right .) <$> fractExponent
752{-# INLINE fractFloat #-}
753
754-- | Parses a non-negative whole number in the decimal system. Returns the
755-- value of the number.
756--
757-- This parser does NOT swallow trailing whitespace
758decimal :: TokenParsing m => m Integer
759decimal = number 10 digit
760{-# INLINE decimal #-}
761
762-- | Parses a non-negative whole number in the hexadecimal system. The number
763-- should be prefixed with \"x\" or \"X\". Returns the value of the
764-- number.
765--
766-- This parser does NOT swallow trailing whitespace
767hexadecimal :: TokenParsing m => m Integer
768hexadecimal = oneOf "xX" *> number 16 hexDigit
769{-# INLINE hexadecimal #-}
770
771-- | Parses a non-negative whole number in the octal system. The number
772-- should be prefixed with \"o\" or \"O\". Returns the value of the
773-- number.
774--
775-- This parser does NOT swallow trailing whitespace
776octal :: TokenParsing m => m Integer
777octal = oneOf "oO" *> number 8 octDigit
778{-# INLINE octal #-}
779
780-- | This is a parser transformer you can use to disable syntax highlighting
781-- over a range of text you are parsing.
782newtype Unhighlighted m a = Unhighlighted { runUnhighlighted :: m a }
783  deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing)
784
785instance Parsing m => Parsing (Unhighlighted m) where
786  try (Unhighlighted m) = Unhighlighted $ try m
787  {-# INLINE try #-}
788  Unhighlighted m <?> l = Unhighlighted $ m <?> l
789  {-# INLINE (<?>) #-}
790  unexpected = Unhighlighted . unexpected
791  {-# INLINE unexpected #-}
792  eof = Unhighlighted eof
793  {-# INLINE eof #-}
794  notFollowedBy (Unhighlighted m) = Unhighlighted $ notFollowedBy m
795  {-# INLINE notFollowedBy #-}
796
797
798instance MonadTrans Unhighlighted where
799  lift = Unhighlighted
800  {-# INLINE lift #-}
801
802instance MonadState s m => MonadState s (Unhighlighted m) where
803  get = lift Class.get
804  {-# INLINE get #-}
805  put = lift . Class.put
806  {-# INLINE put #-}
807
808instance MonadReader e m => MonadReader e (Unhighlighted m) where
809  ask = lift Class.ask
810  {-# INLINE ask #-}
811  local f = Unhighlighted . Class.local f . runUnhighlighted
812  {-# INLINE local #-}
813
814instance MonadWriter e m => MonadWriter e (Unhighlighted m) where
815  tell = lift . Class.tell
816  {-# INLINE tell #-}
817  listen = Unhighlighted . Class.listen . runUnhighlighted
818  {-# INLINE listen #-}
819  pass = Unhighlighted . Class.pass . runUnhighlighted
820  {-# INLINE pass #-}
821
822instance TokenParsing m => TokenParsing (Unhighlighted m) where
823  nesting (Unhighlighted m) = Unhighlighted (nesting m)
824  {-# INLINE nesting #-}
825  someSpace = Unhighlighted someSpace
826  {-# INLINE someSpace #-}
827  semi      = Unhighlighted semi
828  {-# INLINE semi #-}
829  highlight _ m = m
830  {-# INLINE highlight #-}
831
832-- | This is a parser transformer you can use to disable the automatic trailing
833-- space consumption of a Token parser.
834newtype Unspaced m a = Unspaced { runUnspaced :: m a }
835  deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing)
836
837instance Parsing m => Parsing (Unspaced m) where
838  try (Unspaced m) = Unspaced $ try m
839  {-# INLINE try #-}
840  Unspaced m <?> l = Unspaced $ m <?> l
841  {-# INLINE (<?>) #-}
842  unexpected = Unspaced . unexpected
843  {-# INLINE unexpected #-}
844  eof = Unspaced eof
845  {-# INLINE eof #-}
846  notFollowedBy (Unspaced m) = Unspaced $ notFollowedBy m
847  {-# INLINE notFollowedBy #-}
848
849instance MonadTrans Unspaced where
850  lift = Unspaced
851  {-# INLINE lift #-}
852
853instance MonadState s m => MonadState s (Unspaced m) where
854  get = lift Class.get
855  {-# INLINE get #-}
856  put = lift . Class.put
857  {-# INLINE put #-}
858
859instance MonadReader e m => MonadReader e (Unspaced m) where
860  ask = lift Class.ask
861  {-# INLINE ask #-}
862  local f = Unspaced . Class.local f . runUnspaced
863  {-# INLINE local #-}
864
865instance MonadWriter e m => MonadWriter e (Unspaced m) where
866  tell = lift . Class.tell
867  {-# INLINE tell #-}
868  listen = Unspaced . Class.listen . runUnspaced
869  {-# INLINE listen #-}
870  pass = Unspaced . Class.pass . runUnspaced
871  {-# INLINE pass #-}
872
873instance TokenParsing m => TokenParsing (Unspaced m) where
874  nesting (Unspaced m) = Unspaced (nesting m)
875  {-# INLINE nesting #-}
876  someSpace = empty
877  {-# INLINE someSpace #-}
878  semi      = Unspaced semi
879  {-# INLINE semi #-}
880  highlight h (Unspaced m) = Unspaced (highlight h m)
881  {-# INLINE highlight #-}
882
883-- | This is a parser transformer you can use to disable the automatic trailing
884-- newline (but not whitespace-in-general) consumption of a Token parser.
885newtype Unlined m a = Unlined { runUnlined :: m a }
886  deriving (Functor,Applicative,Alternative,Monad,MonadPlus,CharParsing)
887
888instance Parsing m => Parsing (Unlined m) where
889  try (Unlined m) = Unlined $ try m
890  {-# INLINE try #-}
891  Unlined m <?> l = Unlined $ m <?> l
892  {-# INLINE (<?>) #-}
893  unexpected = Unlined . unexpected
894  {-# INLINE unexpected #-}
895  eof = Unlined eof
896  {-# INLINE eof #-}
897  notFollowedBy (Unlined m) = Unlined $ notFollowedBy m
898  {-# INLINE notFollowedBy #-}
899
900instance MonadTrans Unlined where
901  lift = Unlined
902  {-# INLINE lift #-}
903
904instance MonadState s m => MonadState s (Unlined m) where
905  get = lift Class.get
906  {-# INLINE get #-}
907  put = lift . Class.put
908  {-# INLINE put #-}
909
910instance MonadReader e m => MonadReader e (Unlined m) where
911  ask = lift Class.ask
912  {-# INLINE ask #-}
913  local f = Unlined . Class.local f . runUnlined
914  {-# INLINE local #-}
915
916instance MonadWriter e m => MonadWriter e (Unlined m) where
917  tell = lift . Class.tell
918  {-# INLINE tell #-}
919  listen = Unlined . Class.listen . runUnlined
920  {-# INLINE listen #-}
921  pass = Unlined . Class.pass . runUnlined
922  {-# INLINE pass #-}
923
924instance TokenParsing m => TokenParsing (Unlined m) where
925  nesting (Unlined m) = Unlined (nesting m)
926  {-# INLINE nesting #-}
927  someSpace = skipMany (satisfy $ \c -> c /= '\n' && isSpace c)
928  {-# INLINE someSpace #-}
929  semi      = Unlined semi
930  {-# INLINE semi #-}
931  highlight h (Unlined m) = Unlined (highlight h m)
932  {-# INLINE highlight #-}
933
934#ifdef MIN_VERSION_parsec
935instance Parsec.Stream s m Char => TokenParsing (Parsec.ParsecT s u m)
936#endif
937
938#ifdef MIN_VERSION_attoparsec
939instance Att.Chunk t => TokenParsing (Att.Parser t)
940#endif
941
942instance TokenParsing ReadP.ReadP
943