1{-# LANGUAGE BangPatterns #-}
2
3-- |
4-- Module      : Data.Text.Internal.Encoding.Fusion.Common
5-- Copyright   : (c) Tom Harper 2008-2009,
6--               (c) Bryan O'Sullivan 2009,
7--               (c) Duncan Coutts 2009,
8--               (c) Jasper Van der Jeugt 2011
9--
10-- License     : BSD-style
11-- Maintainer  : bos@serpentine.com
12-- Stability   : experimental
13-- Portability : portable
14--
15-- /Warning/: this is an internal module, and does not have a stable
16-- API or name. Use at your own risk!
17--
18-- Fusible 'Stream'-oriented functions for converting between 'Text'
19-- and several common encodings.
20
21module Data.Text.Internal.Encoding.Fusion.Common
22    (
23    -- * Restreaming
24    -- Restreaming is the act of converting from one 'Stream'
25    -- representation to another.
26      restreamUtf16LE
27    , restreamUtf16BE
28    , restreamUtf32LE
29    , restreamUtf32BE
30    ) where
31
32import Data.Bits ((.&.))
33import Data.Text.Internal.Fusion (Step(..), Stream(..))
34import Data.Text.Internal.Fusion.Types (RS(..))
35import Data.Text.Internal.Unsafe.Char (ord)
36import Data.Text.Internal.Unsafe.Shift (shiftR)
37import Data.Word (Word8)
38
39restreamUtf16BE :: Stream Char -> Stream Word8
40restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
41  where
42    next (RS0 s) = case next0 s of
43        Done -> Done
44        Skip s' -> Skip (RS0 s')
45        Yield x s'
46            | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
47                             RS1 s' (fromIntegral n)
48            | otherwise   -> Yield c1 $ RS3 s' c2 c3 c4
49            where
50              n  = ord x
51              n1 = n - 0x10000
52              c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
53              c2 = fromIntegral (n1 `shiftR` 10)
54              n2 = n1 .&. 0x3FF
55              c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
56              c4 = fromIntegral n2
57    next (RS1 s x2)       = Yield x2 (RS0 s)
58    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
59    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
60    {-# INLINE next #-}
61{-# INLINE restreamUtf16BE #-}
62
63restreamUtf16LE :: Stream Char -> Stream Word8
64restreamUtf16LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
65  where
66    next (RS0 s) = case next0 s of
67        Done -> Done
68        Skip s' -> Skip (RS0 s')
69        Yield x s'
70            | n < 0x10000 -> Yield (fromIntegral n) $
71                             RS1 s' (fromIntegral $ shiftR n 8)
72            | otherwise   -> Yield c1 $ RS3 s' c2 c3 c4
73          where
74            n  = ord x
75            n1 = n - 0x10000
76            c2 = fromIntegral (shiftR n1 18 + 0xD8)
77            c1 = fromIntegral (shiftR n1 10)
78            n2 = n1 .&. 0x3FF
79            c4 = fromIntegral (shiftR n2 8 + 0xDC)
80            c3 = fromIntegral n2
81    next (RS1 s x2)       = Yield x2 (RS0 s)
82    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
83    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
84    {-# INLINE next #-}
85{-# INLINE restreamUtf16LE #-}
86
87restreamUtf32BE :: Stream Char -> Stream Word8
88restreamUtf32BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
89  where
90    next (RS0 s) = case next0 s of
91        Done       -> Done
92        Skip s'    -> Skip (RS0 s')
93        Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
94          where
95            n  = ord x
96            c1 = fromIntegral $ shiftR n 24
97            c2 = fromIntegral $ shiftR n 16
98            c3 = fromIntegral $ shiftR n 8
99            c4 = fromIntegral n
100    next (RS1 s x2)       = Yield x2 (RS0 s)
101    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
102    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
103    {-# INLINE next #-}
104{-# INLINE restreamUtf32BE #-}
105
106restreamUtf32LE :: Stream Char -> Stream Word8
107restreamUtf32LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
108  where
109    next (RS0 s) = case next0 s of
110        Done       -> Done
111        Skip s'    -> Skip (RS0 s')
112        Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
113          where
114            n  = ord x
115            c4 = fromIntegral $ shiftR n 24
116            c3 = fromIntegral $ shiftR n 16
117            c2 = fromIntegral $ shiftR n 8
118            c1 = fromIntegral n
119    next (RS1 s x2)       = Yield x2 (RS0 s)
120    next (RS2 s x2 x3)    = Yield x2 (RS1 s x3)
121    next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
122    {-# INLINE next #-}
123{-# INLINE restreamUtf32LE #-}
124