1-- | 2-- Module : Basement.String.Encoding.Encoding 3-- License : BSD-style 4-- Maintainer : Foundation 5-- Stability : experimental 6-- Portability : portable 7-- 8 9{-# LANGUAGE FlexibleContexts #-} 10 11module Basement.String.Encoding.Encoding 12 ( Encoding(..) 13 , convertFromTo 14 ) where 15 16import Basement.Compat.Base 17import Basement.Types.OffsetSize 18import Basement.Monad 19import Basement.PrimType 20import Basement.MutableBuilder 21import Basement.Numerical.Additive 22import Basement.UArray (UArray) 23import Basement.UArray.Mutable (MUArray) 24import qualified Basement.UArray as Vec 25 26class Encoding encoding where 27 -- | the unit element use for the encoding. 28 -- i.e. Word8 for ASCII7 or UTF8, Word16 for UTF16... 29 -- 30 type Unit encoding 31 32 -- | define the type of error handling you want to use for the 33 -- next function. 34 -- 35 -- > type Error UTF8 = Either UTF8_Invalid 36 -- 37 type Error encoding 38 39 -- | consume an `Unit encoding` and return the Unicode point and the position 40 -- of the next possible `Unit encoding` 41 -- 42 encodingNext :: encoding 43 -- ^ only used for type deduction 44 -> (Offset (Unit encoding) -> Unit encoding) 45 -- ^ method to access a given `Unit encoding` 46 -- (see `unsafeIndexer`) 47 -> Offset (Unit encoding) 48 -- ^ offset of the `Unit encoding` where starts the 49 -- encoding of a given unicode 50 -> Either (Error encoding) (Char, Offset (Unit encoding)) -- ^ either successfully validated the `Unit encoding` 51 -- and returned the next offset or fail with an 52 -- `Error encoding` 53 54 -- Write a unicode point encoded into one or multiple `Unit encoding` 55 -- 56 -- > build 64 $ sequence_ (write UTF8) "this is a simple list of char..." 57 -- 58 encodingWrite :: (PrimMonad st, Monad st) 59 => encoding 60 -- ^ only used for type deduction 61 -> Char 62 -- ^ the unicode character to encode 63 -> Builder (UArray (Unit encoding)) 64 (MUArray (Unit encoding)) 65 (Unit encoding) st err () 66 67-- | helper to convert a given Array in a given encoding into an array 68-- with another encoding. 69-- 70-- This is a helper to convert from one String encoding to another. 71-- This function is (quite) slow and needs some work. 72-- 73-- ``` 74-- let s16 = ... -- string in UTF16 75-- -- create s8, a UTF8 String 76-- let s8 = runST $ convertWith UTF16 UTF8 (toBytes s16) 77-- 78-- print s8 79-- ``` 80-- 81convertFromTo :: ( PrimMonad st, Monad st 82 , Encoding input, PrimType (Unit input) 83 , Encoding output, PrimType (Unit output) 84 ) 85 => input 86 -- ^ Input's encoding type 87 -> output 88 -- ^ Output's encoding type 89 -> UArray (Unit input) 90 -- ^ the input raw array 91 -> st (Either (Offset (Unit input), Error input) (UArray (Unit output))) 92convertFromTo inputEncodingTy outputEncodingTy bytes 93 | Vec.null bytes = return . return $ mempty 94 | otherwise = Vec.unsafeIndexer bytes $ \t -> Vec.builderBuild 64 (loop azero t) 95 where 96 lastUnit = Vec.length bytes 97 98 loop off getter 99 | off .==# lastUnit = return () 100 | otherwise = case encodingNext inputEncodingTy getter off of 101 Left err -> mFail (off, err) 102 Right (c, noff) -> encodingWrite outputEncodingTy c >> loop noff getter 103