1module UTF8 where
2
3import Data.Word
4import Data.Bits
5import Data.Char
6
7{-
8-- Could also be imported:
9
10import Codec.Binary.UTF8.Light as UTF8
11
12encode :: Char -> [Word8]
13encode c = head (UTF8.encodeUTF8' [UTF8.c2w c])
14
15-}
16
17-- | Encode a Haskell String to a list of Word8 values, in UTF8 format.
18encode :: Char -> [Word8]
19encode = map fromIntegral . go . ord
20 where
21  go oc
22   | oc <= 0x7f       = [oc]
23
24   | oc <= 0x7ff      = [ 0xc0 + (oc `shiftR` 6)
25                        , 0x80 + oc .&. 0x3f
26                        ]
27
28   | oc <= 0xffff     = [ 0xe0 + (oc `shiftR` 12)
29                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
30                        , 0x80 + oc .&. 0x3f
31                        ]
32   | otherwise        = [ 0xf0 + (oc `shiftR` 18)
33                        , 0x80 + ((oc `shiftR` 12) .&. 0x3f)
34                        , 0x80 + ((oc `shiftR` 6) .&. 0x3f)
35                        , 0x80 + oc .&. 0x3f
36                        ]
37