1-- |
2-- Module      : Data.Memory.Encoding.Base32
3-- License     : BSD-style
4-- Maintainer  : Nicolas DI PRIMA <nicolas@di-prima.fr>
5-- Stability   : experimental
6-- Portability : unknown
7--
8-- Low-level Base32 encoding and decoding.
9--
10-- If you just want to encode or decode some bytes, you probably want to use
11-- the "Data.ByteArray.Encoding" module.
12--
13{-# LANGUAGE MagicHash #-}
14{-# LANGUAGE UnboxedTuples #-}
15{-# LANGUAGE OverloadedStrings #-}
16{-# LANGUAGE BangPatterns #-}
17{-# LANGUAGE Rank2Types #-}
18module Data.Memory.Encoding.Base32
19    ( toBase32
20    , unBase32Length
21    , fromBase32
22    ) where
23
24import           Data.Memory.Internal.Compat
25import           Data.Memory.Internal.CompatPrim
26import           Data.Word
27import           Data.Bits ((.|.))
28import           GHC.Prim
29import           GHC.Word
30import           Control.Monad
31import           Foreign.Storable
32import           Foreign.Ptr (Ptr)
33
34-- | Transform a number of bytes pointed by.@src in the base32 binary representation in @dst
35--
36-- destination memory need to be of correct size, otherwise it will lead
37-- to really bad things.
38toBase32 :: Ptr Word8 -- ^ input
39         -> Ptr Word8 -- ^ output
40         -> Int       -- ^ input len
41         -> IO ()
42toBase32 dst src len = loop 0 0
43  where
44    eqChar :: Word8
45    eqChar = 0x3d
46
47    peekOrZero :: Int -> IO Word8
48    peekOrZero i
49        | i >= len  = return 0
50        | otherwise = peekByteOff src i
51
52    pokeOrPadding :: Int -- for the test
53                  -> Int -- src index
54                  -> Word8 -- the value
55                  -> IO ()
56    pokeOrPadding i di v
57        | i <  len  = pokeByteOff dst di v
58        | otherwise = pokeByteOff dst di eqChar
59
60    loop :: Int -- index input
61         -> Int -- index output
62         -> IO ()
63    loop i di
64        | i >= len  = return ()
65        | otherwise = do
66            i1 <- peekByteOff src i
67            i2 <- peekOrZero (i + 1)
68            i3 <- peekOrZero (i + 2)
69            i4 <- peekOrZero (i + 3)
70            i5 <- peekOrZero (i + 4)
71
72            let (o1,o2,o3,o4,o5,o6,o7,o8) = toBase32Per5Bytes (i1, i2, i3, i4, i5)
73
74            pokeByteOff dst di o1
75            pokeByteOff dst (di + 1) o2
76            pokeOrPadding (i + 1) (di + 2) o3
77            pokeOrPadding (i + 1) (di + 3) o4
78            pokeOrPadding (i + 2) (di + 4) o5
79            pokeOrPadding (i + 3) (di + 5) o6
80            pokeOrPadding (i + 3) (di + 6) o7
81            pokeOrPadding (i + 4) (di + 7) o8
82
83            loop (i+5) (di+8)
84
85toBase32Per5Bytes :: (Word8, Word8, Word8, Word8, Word8)
86                  -> (Word8, Word8, Word8, Word8, Word8, Word8, Word8, Word8)
87toBase32Per5Bytes (W8# i1, W8# i2, W8# i3, W8# i4, W8# i5) =
88    (index o1, index o2, index o3, index o4, index o5, index o6, index o7, index o8)
89  where
90    -- 1111 1000 >> 3
91    !o1 =     (uncheckedShiftRL# (and# i1 0xF8##) 3#)
92    -- 0000 0111 << 2 | 1100 0000 >> 6
93    !o2 = or# (uncheckedShiftL#  (and# i1 0x07##) 2#) (uncheckedShiftRL# (and# i2 0xC0##) 6#)
94    -- 0011 1110 >> 1
95    !o3 =     (uncheckedShiftRL# (and# i2 0x3E##) 1#)
96    -- 0000 0001 << 4 | 1111 0000 >> 4
97    !o4 = or# (uncheckedShiftL#  (and# i2 0x01##) 4#) (uncheckedShiftRL# (and# i3 0xF0##) 4#)
98    -- 0000 1111 << 1 | 1000 0000 >> 7
99    !o5 = or# (uncheckedShiftL#  (and# i3 0x0F##) 1#) (uncheckedShiftRL# (and# i4 0x80##) 7#)
100    -- 0111 1100 >> 2
101    !o6 =     (uncheckedShiftRL# (and# i4 0x7C##) 2#)
102    -- 0000 0011 << 3 | 1110 0000 >> 5
103    !o7 = or# (uncheckedShiftL#  (and# i4 0x03##) 3#) (uncheckedShiftRL# (and# i5 0xE0##) 5#)
104    -- 0001 1111
105    !o8 =     ((and# i5 0x1F##))
106
107    !set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"#
108
109    index :: Word# -> Word8
110    index idx = W8# (indexWord8OffAddr# set (word2Int# idx))
111
112-- | Get the length needed for the destination buffer for a base32 decoding.
113--
114-- if the length is not a multiple of 8, Nothing is returned
115unBase32Length :: Ptr Word8 -> Int -> IO (Maybe Int)
116unBase32Length src len
117    | len < 1            = return $ Just 0
118    | (len `mod` 8) /= 0 = return Nothing
119    | otherwise          = do
120        last1Byte <- peekByteOff src (len - 1)
121        last2Byte <- peekByteOff src (len - 2)
122        last3Byte <- peekByteOff src (len - 3)
123        last4Byte <- peekByteOff src (len - 4)
124        last5Byte <- peekByteOff src (len - 5)
125        last6Byte <- peekByteOff src (len - 6)
126
127        let dstLen = caseByte last1Byte last2Byte last3Byte last4Byte last5Byte last6Byte
128        return $ Just $ (len `div` 8) * 5 - dstLen
129  where
130    caseByte :: Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Int
131    caseByte last1 last2 last3 last4 last5 last6
132        | last6 == eqAscii = 4
133        | last5 == eqAscii = 3 -- error this padding is not expected (error will be detected in fromBase32)
134        | last4 == eqAscii = 3
135        | last3 == eqAscii = 2
136        | last2 == eqAscii = 1 -- error this padding is not expected (error will be detected in fromBase32)
137        | last1 == eqAscii = 1
138        | otherwise        = 0
139
140    eqAscii :: Word8
141    eqAscii = 0x3D
142
143-- | convert from base32 in @src to binary in @dst, using the number of bytes specified
144--
145-- the user should use unBase32Length to compute the correct length, or check that
146-- the length specification is proper. no check is done here.
147fromBase32 :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
148fromBase32 dst src len
149    | len == 0  = return Nothing
150    | otherwise = loop 0 0
151  where
152    loop :: Int -- the index dst
153         -> Int -- the index src
154         -> IO (Maybe Int)
155    loop di i
156        | i == (len - 8) = do
157            i1 <- peekByteOff src i
158            i2 <- peekByteOff src (i + 1)
159            i3 <- peekByteOff src (i + 2)
160            i4 <- peekByteOff src (i + 3)
161            i5 <- peekByteOff src (i + 4)
162            i6 <- peekByteOff src (i + 5)
163            i7 <- peekByteOff src (i + 6)
164            i8 <- peekByteOff src (i + 7)
165
166            let (nbBytes, i3', i4', i5', i6', i7', i8') =
167                    case (i3, i4, i5, i6, i7, i8) of
168                        (0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D) -> (6, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41)
169                        (0x3D, _   , _   , _   , _   , _   ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
170                        (_   , 0x3D, 0x3D, 0x3D, 0x3D, 0x3D) -> (5, i3  , 0x41, 0x41, 0x41, 0x41, 0x41)
171                        (_   , 0x3D, _   , _   , _   , _   ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
172                        (_   , _   , 0x3D, 0x3D, 0x3D, 0x3D) -> (4, i3  , i4  , 0x41, 0x41, 0x41, 0x41)
173                        (_   , _   , 0x3D, _   , _   , _   ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
174                        (_   , _   , _   , 0x3D, 0x3D, 0x3D) -> (3, i3  , i4  , i5  , 0x41, 0x41, 0x41)
175                        (_   , _   , _   , 0x3D, _   , _   ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
176                        (_   , _   , _   , _   , 0x3D, 0x3D) -> (2, i3  , i4  , i5  , i6  , 0x41, 0x41)
177                        (_   , _   , _   , _   , 0x3D, _   ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid
178                        (_   , _   , _   , _   , _   , 0x3D) -> (1, i3  , i4  , i5  , i6  , i7  , 0x41)
179                        (_   , _   , _   , _   , _   , _   ) -> (0 :: Int, i3, i4, i5, i6, i7, i8)
180
181            case fromBase32Per8Bytes (i1, i2, i3', i4', i5', i6', i7', i8') of
182                Left  ofs                  -> return $ Just (i + ofs)
183                Right (o1, o2, o3, o4, o5) -> do
184                    pokeByteOff dst  di    o1
185                    pokeByteOff dst (di+1) o2
186                    when (nbBytes < 5) $ pokeByteOff dst (di+2) o3
187                    when (nbBytes < 4) $ pokeByteOff dst (di+3) o4
188                    when (nbBytes < 2) $ pokeByteOff dst (di+4) o5
189                    return Nothing
190
191        | otherwise = do
192            i1 <- peekByteOff src i
193            i2 <- peekByteOff src (i + 1)
194            i3 <- peekByteOff src (i + 2)
195            i4 <- peekByteOff src (i + 3)
196            i5 <- peekByteOff src (i + 4)
197            i6 <- peekByteOff src (i + 5)
198            i7 <- peekByteOff src (i + 6)
199            i8 <- peekByteOff src (i + 7)
200
201            case fromBase32Per8Bytes (i1, i2, i3, i4, i5, i6, i7, i8) of
202                Left  ofs                  -> return $ Just (i + ofs)
203                Right (o1, o2, o3, o4, o5) -> do
204                    pokeByteOff dst  di    o1
205                    pokeByteOff dst (di+1) o2
206                    pokeByteOff dst (di+2) o3
207                    pokeByteOff dst (di+3) o4
208                    pokeByteOff dst (di+4) o5
209                    loop (di+5) (i+8)
210
211fromBase32Per8Bytes :: (Word8, Word8, Word8, Word8, Word8, Word8, Word8, Word8)
212                    -> Either Int (Word8, Word8, Word8, Word8, Word8)
213fromBase32Per8Bytes (i1, i2, i3, i4, i5, i6, i7, i8) =
214    case (rset i1, rset i2, rset i3, rset i4, rset i5, rset i6, rset i7, rset i8) of
215        (0xFF, _   , _   , _   , _   , _   , _   , _   ) -> Left 0
216        (_   , 0xFF, _   , _   , _   , _   , _   , _   ) -> Left 1
217        (_   , _   , 0xFF, _   , _   , _   , _   , _   ) -> Left 2
218        (_   , _   , _   , 0xFF, _   , _   , _   , _   ) -> Left 3
219        (_   , _   , _   , _   , 0xFF, _   , _   , _   ) -> Left 4
220        (_   , _   , _   , _   , _   , 0xFF, _   , _   ) -> Left 5
221        (_   , _   , _   , _   , _   , _   , 0xFF, _   ) -> Left 6
222        (_   , _   , _   , _   , _   , _   , _   , 0xFF) -> Left 7
223        (ri1 , ri2 , ri3 , ri4 , ri5 , ri6 , ri7 , ri8 ) ->
224                -- 0001 1111 << 3 | 0001 11xx >> 2
225            let o1 = (ri1 `unsafeShiftL` 3) .|. (ri2 `unsafeShiftR` 2)
226                -- 000x xx11 << 6 | 0001 1111 << 1 | 0001 xxxx >> 4
227                o2 = (ri2 `unsafeShiftL` 6) .|. (ri3 `unsafeShiftL` 1) .|. (ri4 `unsafeShiftR` 4)
228                -- 000x 1111 << 4 | 0001 111x >> 1
229                o3 = (ri4 `unsafeShiftL` 4) .|. (ri5 `unsafeShiftR` 1)
230                -- 000x xxx1 << 7 | 0001 1111 << 2 | 0001 1xxx >> 3
231                o4 = (ri5 `unsafeShiftL` 7) .|. (ri6 `unsafeShiftL` 2) .|. (ri7 `unsafeShiftR` 3)
232                -- 000x x111 << 5 | 0001 1111
233                o5 = (ri7 `unsafeShiftL` 5) .|. ri8
234             in Right (o1, o2, o3, o4, o5)
235  where
236    rset :: Word8 -> Word8
237    rset (W8# w)
238        | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w))
239        | otherwise                        = 0xff
240
241    !rsetTable = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
242                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
243                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
244                 \\xFF\xFF\x1A\x1B\x1C\x1D\x1E\x1F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
245                 \\xFF\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\
246                 \\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xFF\xFF\xFF\xFF\xFF\
247                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
248                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
249                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
250                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
251                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
252                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
253                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
254                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
255                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\
256                 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"#
257