1{-# LANGUAGE BangPatterns #-} 2{-# LANGUAGE RankNTypes #-} 3{-# LANGUAGE ViewPatterns #-} 4 5-- Module : Data.Text.Manipulate.Internal.Fusion 6-- Copyright : (c) 2014-2020 Brendan Hay <brendan.g.hay@gmail.com> 7-- License : This Source Code Form is subject to the terms of 8-- the Mozilla Public License, v. 2.0. 9-- A copy of the MPL can be found in the LICENSE file or 10-- you can obtain it at http://mozilla.org/MPL/2.0/. 11-- Maintainer : Brendan Hay <brendan.g.hay@gmail.com> 12-- Stability : experimental 13-- Portability : non-portable (GHC extensions) 14 15module Data.Text.Manipulate.Internal.Fusion where 16 17import qualified Data.Char as Char 18import Data.Text (Text) 19import qualified Data.Text.Internal.Fusion as Fusion 20import Data.Text.Internal.Fusion.CaseMapping (lowerMapping, upperMapping) 21import Data.Text.Internal.Fusion.Common 22import Data.Text.Internal.Fusion.Types 23import qualified Data.Text.Internal.Lazy.Fusion as LFusion 24import qualified Data.Text.Lazy as LText 25import Data.Text.Manipulate.Internal.Types 26 27takeWord :: Stream Char -> Stream Char 28takeWord = transform (const Done) yield . tokenise 29{-# INLINE [0] takeWord #-} 30 31dropWord :: Stream Char -> Stream Char 32dropWord (tokenise -> Stream next0 s0 len) = Stream next (True :*: s0) len 33 where 34 next (skip :*: s) = 35 case next0 s of 36 Done -> Done 37 Skip s' -> Skip (skip :*: s') 38 Yield t s' -> 39 case t of 40 B '\0' -> Skip (False :*: s') 41 B _ | skip -> Skip (False :*: s') 42 B c -> Yield c (False :*: s') 43 _ | skip -> Skip (skip :*: s') 44 U c -> Yield c (skip :*: s') 45 L c -> Yield c (skip :*: s') 46{-# INLINE [0] dropWord #-} 47 48toTitle :: Stream Char -> Stream Char 49toTitle = mapHead toUpper . transformWith (yield ' ') upper lower . tokenise 50{-# INLINE [0] toTitle #-} 51 52toCamel :: Stream Char -> Stream Char 53toCamel = mapHead toLower . transformWith skip' upper lower . tokenise 54{-# INLINE [0] toCamel #-} 55 56toPascal :: Stream Char -> Stream Char 57toPascal = mapHead toUpper . transformWith skip' upper lower . tokenise 58{-# INLINE [0] toPascal #-} 59 60toSnake :: Stream Char -> Stream Char 61toSnake = transform (yield '_') lower . tokenise 62{-# INLINE [0] toSnake #-} 63 64toSpinal :: Stream Char -> Stream Char 65toSpinal = transform (yield '-') lower . tokenise 66{-# INLINE [0] toSpinal #-} 67 68toTrain :: Stream Char -> Stream Char 69toTrain = mapHead toUpper . transformWith (yield '-') upper lower . tokenise 70{-# INLINE [0] toTrain #-} 71 72strict :: (Stream Char -> Stream Char) -> Text -> Text 73strict f t = Fusion.unstream (f (Fusion.stream t)) 74{-# INLINE [0] strict #-} 75 76lazy :: (Stream Char -> Stream Char) -> LText.Text -> LText.Text 77lazy f t = LFusion.unstream (f (LFusion.stream t)) 78{-# INLINE [0] lazy #-} 79 80skip' :: forall s. s -> Step (CC s) Char 81skip' s = Skip (CC s '\0' '\0') 82 83yield, upper, lower :: forall s. Char -> s -> Step (CC s) Char 84yield !c s = Yield c (CC s '\0' '\0') 85upper !c s = upperMapping c s 86lower !c s = lowerMapping c s 87 88-- | Step across word boundaries using a custom action, and transform 89-- both subsequent uppercase and lowercase characters uniformly. 90-- 91-- /See:/ 'transformWith' 92transform :: 93 -- | Boundary action. 94 (forall s. s -> Step (CC s) Char) -> 95 -- | Character mapping. 96 (forall s. Char -> s -> Step (CC s) Char) -> 97 -- | Input stream. 98 Stream Token -> 99 Stream Char 100transform s m = transformWith s m m 101{-# INLINE [0] transform #-} 102 103-- | Step across word boundaries using a custom action, and transform 104-- subsequent characters after the word boundary is encountered with a mapping 105-- depending on case. 106transformWith :: 107 -- | Boundary action. 108 (forall s. s -> Step (CC s) Char) -> 109 -- | Boundary mapping. 110 (forall s. Char -> s -> Step (CC s) Char) -> 111 -- | Subsequent character mapping. 112 (forall s. Char -> s -> Step (CC s) Char) -> 113 -- | Input stream. 114 Stream Token -> 115 Stream Char 116transformWith md mu mc (Stream next0 s0 len) = 117 -- HINT: len incorrect when the boundary replacement yields a char. 118 Stream next (CC (False :*: False :*: s0) '\0' '\0') len 119 where 120 next (CC (up :*: prev :*: s) '\0' _) = 121 case next0 s of 122 Done -> Done 123 Skip s' -> Skip (CC (up :*: prev :*: s') '\0' '\0') 124 Yield t s' -> 125 case t of 126 B _ -> md (False :*: True :*: s') 127 U c | prev -> mu c (True :*: False :*: s') 128 L c | prev -> mu c (False :*: False :*: s') 129 U c | up -> mu c (True :*: False :*: s') 130 U c -> mc c (True :*: False :*: s') 131 L c -> mc c (False :*: False :*: s') 132 next (CC s a b) = Yield a (CC s b '\0') 133{-# INLINE [0] transformWith #-} 134 135-- | A token representing characters and boundaries in a stream. 136data Token 137 = -- | Word boundary. 138 B {-# UNPACK #-} !Char 139 | -- | Upper case character. 140 U {-# UNPACK #-} !Char 141 | -- | Lower case character. 142 L {-# UNPACK #-} !Char 143 deriving (Show) 144 145-- | Tokenise a character stream using the default 'isBoundary' predicate. 146-- 147-- /See:/ 'tokeniseWith' 148tokenise :: 149 -- | Input stream. 150 Stream Char -> 151 Stream Token 152tokenise = tokeniseWith isBoundary 153{-# INLINE [0] tokenise #-} 154 155-- | Tokenise a character stream using a custom boundary predicate. 156tokeniseWith :: 157 -- | Boundary predicate. 158 (Char -> Bool) -> 159 -- | Input stream. 160 Stream Char -> 161 Stream Token 162tokeniseWith f (Stream next0 s0 len) = 163 -- HINT: len incorrect if there are adjacent boundaries, which are skipped. 164 Stream next (CC (True :*: False :*: False :*: s0) '\0' '\0') len 165 where 166 next (CC (start :*: up :*: prev :*: s) '\0' _) = 167 case next0 s of 168 Done -> Done 169 Skip s' -> Skip (CC (start :*: up :*: prev :*: s') '\0' '\0') 170 Yield c s' 171 | not b, start -> push 172 | up -> push 173 | b, prev -> Skip (step start) 174 | otherwise -> push 175 where 176 push 177 | b = Yield (B c) (step False) 178 | u, skip = Yield (U c) (step False) 179 | u = Yield (B '\0') (CC (False :*: u :*: b :*: s') c '\0') 180 | otherwise = Yield (L c) (step False) 181 182 step p = CC (p :*: u :*: b :*: s') '\0' '\0' 183 184 skip = up || start || prev 185 186 b = f c 187 u = Char.isUpper c 188 next (CC s a b) = Yield (U a) (CC s b '\0') 189{-# INLINE [0] tokeniseWith #-} 190 191mapHead :: (Stream Char -> Stream Char) -> Stream Char -> Stream Char 192mapHead f s = maybe s (\(x, s') -> f (singleton x) `append` s') (uncons s) 193{-# INLINE [0] mapHead #-} 194