1{-# LANGUAGE BangPatterns #-}
2{-# LANGUAGE RankNTypes #-}
3{-# LANGUAGE ViewPatterns #-}
4
5-- Module      : Data.Text.Manipulate.Internal.Fusion
6-- Copyright   : (c) 2014-2020 Brendan Hay <brendan.g.hay@gmail.com>
7-- License     : This Source Code Form is subject to the terms of
8--               the Mozilla Public License, v. 2.0.
9--               A copy of the MPL can be found in the LICENSE file or
10--               you can obtain it at http://mozilla.org/MPL/2.0/.
11-- Maintainer  : Brendan Hay <brendan.g.hay@gmail.com>
12-- Stability   : experimental
13-- Portability : non-portable (GHC extensions)
14
15module Data.Text.Manipulate.Internal.Fusion where
16
17import qualified Data.Char as Char
18import Data.Text (Text)
19import qualified Data.Text.Internal.Fusion as Fusion
20import Data.Text.Internal.Fusion.CaseMapping (lowerMapping, upperMapping)
21import Data.Text.Internal.Fusion.Common
22import Data.Text.Internal.Fusion.Types
23import qualified Data.Text.Internal.Lazy.Fusion as LFusion
24import qualified Data.Text.Lazy as LText
25import Data.Text.Manipulate.Internal.Types
26
27takeWord :: Stream Char -> Stream Char
28takeWord = transform (const Done) yield . tokenise
29{-# INLINE [0] takeWord #-}
30
31dropWord :: Stream Char -> Stream Char
32dropWord (tokenise -> Stream next0 s0 len) = Stream next (True :*: s0) len
33  where
34    next (skip :*: s) =
35      case next0 s of
36        Done -> Done
37        Skip s' -> Skip (skip :*: s')
38        Yield t s' ->
39          case t of
40            B '\0' -> Skip (False :*: s')
41            B _ | skip -> Skip (False :*: s')
42            B c -> Yield c (False :*: s')
43            _ | skip -> Skip (skip :*: s')
44            U c -> Yield c (skip :*: s')
45            L c -> Yield c (skip :*: s')
46{-# INLINE [0] dropWord #-}
47
48toTitle :: Stream Char -> Stream Char
49toTitle = mapHead toUpper . transformWith (yield ' ') upper lower . tokenise
50{-# INLINE [0] toTitle #-}
51
52toCamel :: Stream Char -> Stream Char
53toCamel = mapHead toLower . transformWith skip' upper lower . tokenise
54{-# INLINE [0] toCamel #-}
55
56toPascal :: Stream Char -> Stream Char
57toPascal = mapHead toUpper . transformWith skip' upper lower . tokenise
58{-# INLINE [0] toPascal #-}
59
60toSnake :: Stream Char -> Stream Char
61toSnake = transform (yield '_') lower . tokenise
62{-# INLINE [0] toSnake #-}
63
64toSpinal :: Stream Char -> Stream Char
65toSpinal = transform (yield '-') lower . tokenise
66{-# INLINE [0] toSpinal #-}
67
68toTrain :: Stream Char -> Stream Char
69toTrain = mapHead toUpper . transformWith (yield '-') upper lower . tokenise
70{-# INLINE [0] toTrain #-}
71
72strict :: (Stream Char -> Stream Char) -> Text -> Text
73strict f t = Fusion.unstream (f (Fusion.stream t))
74{-# INLINE [0] strict #-}
75
76lazy :: (Stream Char -> Stream Char) -> LText.Text -> LText.Text
77lazy f t = LFusion.unstream (f (LFusion.stream t))
78{-# INLINE [0] lazy #-}
79
80skip' :: forall s. s -> Step (CC s) Char
81skip' s = Skip (CC s '\0' '\0')
82
83yield, upper, lower :: forall s. Char -> s -> Step (CC s) Char
84yield !c s = Yield c (CC s '\0' '\0')
85upper !c s = upperMapping c s
86lower !c s = lowerMapping c s
87
88-- | Step across word boundaries using a custom action, and transform
89-- both subsequent uppercase and lowercase characters uniformly.
90--
91-- /See:/ 'transformWith'
92transform ::
93  -- | Boundary action.
94  (forall s. s -> Step (CC s) Char) ->
95  -- | Character mapping.
96  (forall s. Char -> s -> Step (CC s) Char) ->
97  -- | Input stream.
98  Stream Token ->
99  Stream Char
100transform s m = transformWith s m m
101{-# INLINE [0] transform #-}
102
103-- | Step across word boundaries using a custom action, and transform
104-- subsequent characters after the word boundary is encountered with a mapping
105-- depending on case.
106transformWith ::
107  -- | Boundary action.
108  (forall s. s -> Step (CC s) Char) ->
109  -- | Boundary mapping.
110  (forall s. Char -> s -> Step (CC s) Char) ->
111  -- | Subsequent character mapping.
112  (forall s. Char -> s -> Step (CC s) Char) ->
113  -- | Input stream.
114  Stream Token ->
115  Stream Char
116transformWith md mu mc (Stream next0 s0 len) =
117  -- HINT: len incorrect when the boundary replacement yields a char.
118  Stream next (CC (False :*: False :*: s0) '\0' '\0') len
119  where
120    next (CC (up :*: prev :*: s) '\0' _) =
121      case next0 s of
122        Done -> Done
123        Skip s' -> Skip (CC (up :*: prev :*: s') '\0' '\0')
124        Yield t s' ->
125          case t of
126            B _ -> md (False :*: True :*: s')
127            U c | prev -> mu c (True :*: False :*: s')
128            L c | prev -> mu c (False :*: False :*: s')
129            U c | up -> mu c (True :*: False :*: s')
130            U c -> mc c (True :*: False :*: s')
131            L c -> mc c (False :*: False :*: s')
132    next (CC s a b) = Yield a (CC s b '\0')
133{-# INLINE [0] transformWith #-}
134
135-- | A token representing characters and boundaries in a stream.
136data Token
137  = -- | Word boundary.
138    B {-# UNPACK #-} !Char
139  | -- | Upper case character.
140    U {-# UNPACK #-} !Char
141  | -- | Lower case character.
142    L {-# UNPACK #-} !Char
143  deriving (Show)
144
145-- | Tokenise a character stream using the default 'isBoundary' predicate.
146--
147-- /See:/ 'tokeniseWith'
148tokenise ::
149  -- | Input stream.
150  Stream Char ->
151  Stream Token
152tokenise = tokeniseWith isBoundary
153{-# INLINE [0] tokenise #-}
154
155-- | Tokenise a character stream using a custom boundary predicate.
156tokeniseWith ::
157  -- | Boundary predicate.
158  (Char -> Bool) ->
159  -- | Input stream.
160  Stream Char ->
161  Stream Token
162tokeniseWith f (Stream next0 s0 len) =
163  -- HINT: len incorrect if there are adjacent boundaries, which are skipped.
164  Stream next (CC (True :*: False :*: False :*: s0) '\0' '\0') len
165  where
166    next (CC (start :*: up :*: prev :*: s) '\0' _) =
167      case next0 s of
168        Done -> Done
169        Skip s' -> Skip (CC (start :*: up :*: prev :*: s') '\0' '\0')
170        Yield c s'
171          | not b, start -> push
172          | up -> push
173          | b, prev -> Skip (step start)
174          | otherwise -> push
175          where
176            push
177              | b = Yield (B c) (step False)
178              | u, skip = Yield (U c) (step False)
179              | u = Yield (B '\0') (CC (False :*: u :*: b :*: s') c '\0')
180              | otherwise = Yield (L c) (step False)
181
182            step p = CC (p :*: u :*: b :*: s') '\0' '\0'
183
184            skip = up || start || prev
185
186            b = f c
187            u = Char.isUpper c
188    next (CC s a b) = Yield (U a) (CC s b '\0')
189{-# INLINE [0] tokeniseWith #-}
190
191mapHead :: (Stream Char -> Stream Char) -> Stream Char -> Stream Char
192mapHead f s = maybe s (\(x, s') -> f (singleton x) `append` s') (uncons s)
193{-# INLINE [0] mapHead #-}
194