1-- |
2-- Module      : Basement.String.Encoding.ASCII7
3-- License     : BSD-style
4-- Maintainer  : Foundation
5-- Stability   : experimental
6-- Portability : portable
7--
8
9{-# LANGUAGE MagicHash #-}
10
11module Basement.String.Encoding.ASCII7
12    ( ASCII7(..)
13    , ASCII7_Invalid(..)
14    ) where
15
16import Basement.Compat.Base
17import Basement.Types.OffsetSize
18import Basement.Numerical.Additive
19import Basement.Monad
20
21import GHC.Prim
22import GHC.Word
23import GHC.Types
24import Basement.UArray
25import Basement.UArray.Mutable (MUArray)
26import Basement.MutableBuilder
27
28import Basement.String.Encoding.Encoding
29
30-- | validate a given byte is within ASCII characters encoring size
31--
32-- This function check the 8th bit is set to 0
33--
34isAscii :: Word8 -> Bool
35isAscii (W8# w) = W8# (and# w 0x80## ) == 0
36{-# INLINE isAscii #-}
37
38data ASCII7_Invalid
39    = ByteOutOfBound Word8
40    | CharNotAscii   Char
41  deriving (Typeable, Show, Eq)
42instance Exception ASCII7_Invalid
43
44data ASCII7 = ASCII7
45
46instance Encoding ASCII7 where
47    type Unit ASCII7 = Word8
48    type Error ASCII7 = ASCII7_Invalid
49    encodingNext  _ = next
50    encodingWrite _ = write
51
52-- | consume an Ascii7 char and return the Unicode point and the position
53-- of the next possible Ascii7 char
54--
55next :: (Offset Word8 -> Word8)
56          -- ^ method to access a given byte
57     -> Offset Word8
58          -- ^ index of the byte
59     -> Either ASCII7_Invalid (Char, Offset Word8)
60          -- ^ either successfully validated the ASCII char and returned the
61          -- next index or fail with an error
62next getter off
63    | isAscii w8 = Right (toChar w, off + 1)
64    | otherwise  = Left $ ByteOutOfBound w8
65  where
66    !w8@(W8# w) = getter off
67    toChar :: Word# -> Char
68    toChar a = C# (chr# (word2Int# a))
69
70-- Write ascii char
71--
72-- > build 64 $ sequence_ write "this is a simple list of char..."
73--
74write :: (PrimMonad st, Monad st)
75      => Char
76           -- ^ expecting it to be a valid Ascii character.
77           -- otherwise this function will throw an exception
78      -> Builder (UArray Word8) (MUArray Word8) Word8 st err ()
79write c
80    | c < toEnum 0x80 = builderAppend $ w8 c
81    | otherwise       = throw $ CharNotAscii c
82  where
83    w8 :: Char -> Word8
84    w8 (C# ch) = W8# (int2Word# (ord# ch))
85