1-- | 2-- Module : Basement.String.Encoding.ASCII7 3-- License : BSD-style 4-- Maintainer : Foundation 5-- Stability : experimental 6-- Portability : portable 7-- 8 9{-# LANGUAGE MagicHash #-} 10 11module Basement.String.Encoding.ASCII7 12 ( ASCII7(..) 13 , ASCII7_Invalid(..) 14 ) where 15 16import Basement.Compat.Base 17import Basement.Types.OffsetSize 18import Basement.Numerical.Additive 19import Basement.Monad 20 21import GHC.Prim 22import GHC.Word 23import GHC.Types 24import Basement.UArray 25import Basement.UArray.Mutable (MUArray) 26import Basement.MutableBuilder 27 28import Basement.String.Encoding.Encoding 29 30-- | validate a given byte is within ASCII characters encoring size 31-- 32-- This function check the 8th bit is set to 0 33-- 34isAscii :: Word8 -> Bool 35isAscii (W8# w) = W8# (and# w 0x80## ) == 0 36{-# INLINE isAscii #-} 37 38data ASCII7_Invalid 39 = ByteOutOfBound Word8 40 | CharNotAscii Char 41 deriving (Typeable, Show, Eq) 42instance Exception ASCII7_Invalid 43 44data ASCII7 = ASCII7 45 46instance Encoding ASCII7 where 47 type Unit ASCII7 = Word8 48 type Error ASCII7 = ASCII7_Invalid 49 encodingNext _ = next 50 encodingWrite _ = write 51 52-- | consume an Ascii7 char and return the Unicode point and the position 53-- of the next possible Ascii7 char 54-- 55next :: (Offset Word8 -> Word8) 56 -- ^ method to access a given byte 57 -> Offset Word8 58 -- ^ index of the byte 59 -> Either ASCII7_Invalid (Char, Offset Word8) 60 -- ^ either successfully validated the ASCII char and returned the 61 -- next index or fail with an error 62next getter off 63 | isAscii w8 = Right (toChar w, off + 1) 64 | otherwise = Left $ ByteOutOfBound w8 65 where 66 !w8@(W8# w) = getter off 67 toChar :: Word# -> Char 68 toChar a = C# (chr# (word2Int# a)) 69 70-- Write ascii char 71-- 72-- > build 64 $ sequence_ write "this is a simple list of char..." 73-- 74write :: (PrimMonad st, Monad st) 75 => Char 76 -- ^ expecting it to be a valid Ascii character. 77 -- otherwise this function will throw an exception 78 -> Builder (UArray Word8) (MUArray Word8) Word8 st err () 79write c 80 | c < toEnum 0x80 = builderAppend $ w8 c 81 | otherwise = throw $ CharNotAscii c 82 where 83 w8 :: Char -> Word8 84 w8 (C# ch) = W8# (int2Word# (ord# ch)) 85