1-- |
2-- Module      : Data.Memory.Encoding.Base64
3-- License     : BSD-style
4-- Maintainer  : Vincent Hanquez <vincent@snarc.org>
5-- Stability   : experimental
6-- Portability : unknown
7--
8-- Low-level Base64 encoding and decoding.
9--
10-- If you just want to encode or decode some bytes, you probably want to use
11-- the "Data.ByteArray.Encoding" module.
12--
13{-# LANGUAGE MagicHash         #-}
14{-# LANGUAGE UnboxedTuples     #-}
15{-# LANGUAGE OverloadedStrings #-}
16{-# LANGUAGE BangPatterns      #-}
17{-# LANGUAGE Rank2Types        #-}
18module Data.Memory.Encoding.Base64
19    ( toBase64
20    , toBase64URL
21    , toBase64OpenBSD
22    , unBase64Length
23    , unBase64LengthUnpadded
24    , fromBase64
25    , fromBase64URLUnpadded
26    , fromBase64OpenBSD
27    ) where
28
29import           Control.Monad
30import           Data.Memory.Internal.Compat
31import           Data.Memory.Internal.CompatPrim
32import           Data.Memory.Internal.Imports
33import           Data.Bits ((.|.))
34import           GHC.Prim
35import           GHC.Word
36import           Foreign.Storable
37import           Foreign.Ptr (Ptr)
38
39-- | Transform a number of bytes pointed by @src@ to base64 binary representation in @dst@
40--
41-- The destination memory need to be of correct size, otherwise it will lead
42-- to really bad things.
43toBase64 :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
44toBase64 dst src len = toBase64Internal set dst src len True
45  where
46        !set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"#
47
48-- | Transform a number of bytes pointed by @src@ to, URL-safe base64 binary
49-- representation in @dst@. The result will be either padded or unpadded,
50-- depending on the boolean @padded@ argument.
51--
52-- The destination memory need to be of correct size, otherwise it will lead
53-- to really bad things.
54toBase64URL :: Bool -> Ptr Word8 -> Ptr Word8 -> Int -> IO ()
55toBase64URL padded dst src len = toBase64Internal set dst src len padded
56  where
57        !set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"#
58
59toBase64OpenBSD :: Ptr Word8 -> Ptr Word8 -> Int -> IO ()
60toBase64OpenBSD dst src len = toBase64Internal set dst src len False
61  where
62        !set = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"#
63
64toBase64Internal :: Addr# -> Ptr Word8 -> Ptr Word8 -> Int -> Bool -> IO ()
65toBase64Internal table dst src len padded = loop 0 0
66  where
67        eqChar = 0x3d :: Word8
68
69        loop i di
70            | i >= len  = return ()
71            | otherwise = do
72                a <- peekByteOff src i
73                b <- if i + 1 >= len then return 0 else peekByteOff src (i+1)
74                c <- if i + 2 >= len then return 0 else peekByteOff src (i+2)
75
76                let (w,x,y,z) = convert3 table a b c
77
78                pokeByteOff dst di     w
79                pokeByteOff dst (di+1) x
80
81                if i + 1 < len
82                    then
83                        pokeByteOff dst (di+2) y
84                    else
85                        when padded (pokeByteOff dst (di+2) eqChar)
86                if i + 2 < len
87                    then
88                        pokeByteOff dst (di+3) z
89                    else
90                        when padded (pokeByteOff dst (di+3) eqChar)
91
92                loop (i+3) (di+4)
93
94convert3 :: Addr# -> Word8 -> Word8 -> Word8 -> (Word8, Word8, Word8, Word8)
95convert3 table (W8# a) (W8# b) (W8# c) =
96    let !w = narrow8Word# (uncheckedShiftRL# a 2#)
97        !x = or# (and# (uncheckedShiftL# a 4#) 0x30##) (uncheckedShiftRL# b 4#)
98        !y = or# (and# (uncheckedShiftL# b 2#) 0x3c##) (uncheckedShiftRL# c 6#)
99        !z = and# c 0x3f##
100     in (index w, index x, index y, index z)
101  where
102        index :: Word# -> Word8
103        index idx = W8# (indexWord8OffAddr# table (word2Int# idx))
104
105-- | Get the length needed for the destination buffer for a base64 decoding.
106--
107-- if the length is not a multiple of 4, Nothing is returned
108unBase64Length :: Ptr Word8 -> Int -> IO (Maybe Int)
109unBase64Length src len
110    | len < 1            = return $ Just 0
111    | (len `mod` 4) /= 0 = return Nothing
112    | otherwise          = do
113        last1Byte <- peekByteOff src (len - 1)
114        last2Byte <- peekByteOff src (len - 2)
115        let dstLen = if last1Byte == eqAscii
116                        then if last2Byte == eqAscii then 2 else 1
117                        else 0
118        return $ Just $ (len `div` 4) * 3 - dstLen
119  where
120        eqAscii :: Word8
121        eqAscii = fromIntegral (fromEnum '=')
122
123-- | Get the length needed for the destination buffer for an
124-- <http://tools.ietf.org/html/rfc4648#section-3.2 unpadded> base64 decoding.
125--
126-- If the length of the encoded string is a multiple of 4, plus one, Nothing is
127-- returned. Any other value can be valid without padding.
128unBase64LengthUnpadded :: Int -> Maybe Int
129unBase64LengthUnpadded len = case r of
130    0 -> Just (3*q)
131    2 -> Just (3*q + 1)
132    3 -> Just (3*q + 2)
133    _ -> Nothing
134  where (q, r) = len `divMod` 4
135
136fromBase64OpenBSD :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
137fromBase64OpenBSD dst src len = fromBase64Unpadded rsetOpenBSD dst src len
138
139fromBase64URLUnpadded :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
140fromBase64URLUnpadded dst src len = fromBase64Unpadded rsetURL dst src len
141
142fromBase64Unpadded :: (Word8 -> Word8) -> Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
143fromBase64Unpadded rset dst src len = loop 0 0
144  where loop di i
145            | i == len       = return Nothing
146            | i == len - 1   = return Nothing -- Shouldn't happen if len is valid
147            | i == len - 2   = do
148                a <- peekByteOff src i
149                b <- peekByteOff src (i+1)
150
151                case decode2 a b of
152                    Left ofs -> return $ Just (i + ofs)
153                    Right x  -> do
154                        pokeByteOff dst di x
155                        return Nothing
156            | i == len - 3   = do
157                a <- peekByteOff src i
158                b <- peekByteOff src (i+1)
159                c <- peekByteOff src (i+2)
160
161                case decode3 a b c of
162                    Left ofs    -> return $ Just (i + ofs)
163                    Right (x,y) -> do
164                        pokeByteOff dst di     x
165                        pokeByteOff dst (di+1) y
166                        return Nothing
167            | otherwise      = do
168                a <- peekByteOff src i
169                b <- peekByteOff src (i+1)
170                c <- peekByteOff src (i+2)
171                d <- peekByteOff src (i+3)
172
173                case decode4 a b c d of
174                    Left ofs      -> return $ Just (i + ofs)
175                    Right (x,y,z) -> do
176                        pokeByteOff dst di     x
177                        pokeByteOff dst (di+1) y
178                        pokeByteOff dst (di+2) z
179                        loop (di + 3) (i + 4)
180
181        decode2 :: Word8 -> Word8 -> Either Int Word8
182        decode2 a b =
183            case (rset a, rset b) of
184                (0xff, _   ) -> Left 0
185                (_   , 0xff) -> Left 1
186                (ra  , rb  ) -> Right ((ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4))
187
188        decode3 :: Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8)
189        decode3 a b c =
190            case (rset a, rset b, rset c) of
191                (0xff, _   , _   ) -> Left 0
192                (_   , 0xff, _   ) -> Left 1
193                (_   , _   , 0xff) -> Left 2
194                (ra  , rb  , rc  ) ->
195                    let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)
196                        y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2)
197                     in Right (x,y)
198
199
200        decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8, Word8)
201        decode4 a b c d =
202            case (rset a, rset b, rset c, rset d) of
203                (0xff, _   , _   , _   ) -> Left 0
204                (_   , 0xff, _   , _   ) -> Left 1
205                (_   , _   , 0xff, _   ) -> Left 2
206                (_   , _   , _   , 0xff) -> Left 3
207                (ra  , rb  , rc  , rd  ) ->
208                    let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)
209                        y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2)
210                        z = (rc `unsafeShiftL` 6) .|. rd
211                     in Right (x,y,z)
212
213rsetURL :: Word8 -> Word8
214rsetURL (W8# w)
215    | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w))
216    | otherwise                        = 0xff
217  where !rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
218                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
219                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\
220                     \\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\
221                     \\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\
222                     \\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\x3f\
223                     \\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\
224                     \\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff\
225                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
226                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
227                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
228                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
229                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
230                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
231                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
232                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
233
234rsetOpenBSD :: Word8 -> Word8
235rsetOpenBSD (W8# w)
236    | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w))
237    | otherwise                        = 0xff
238  where !rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
239                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
240                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\
241                     \\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\xff\xff\xff\xff\xff\xff\
242                     \\xff\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\
243                     \\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\xff\xff\xff\xff\xff\
244                     \\xff\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\
245                     \\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\xff\xff\xff\xff\xff\
246                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
247                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
248                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
249                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
250                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
251                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
252                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
253                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
254
255
256-- | convert from base64 in @src@ to binary in @dst@, using the number of bytes specified
257--
258-- the user should use unBase64Length to compute the correct length, or check that
259-- the length specification is proper. no check is done here.
260fromBase64 :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int)
261fromBase64 dst src len
262    | len == 0  = return Nothing
263    | otherwise = loop 0 0
264  where loop di i
265            | i == (len-4) = do
266                a <- peekByteOff src i
267                b <- peekByteOff src (i+1)
268                c <- peekByteOff src (i+2)
269                d <- peekByteOff src (i+3)
270
271                let (nbBytes, c',d') =
272                        case (c,d) of
273                            (0x3d, 0x3d) -> (2, 0x30, 0x30)
274                            (0x3d, _   ) -> (0, c, d) -- invalid: automatically 'c' will make it error out
275                            (_   , 0x3d) -> (1, c, 0x30)
276                            (_   , _   ) -> (0 :: Int, c, d)
277                case decode4 a b c' d' of
278                    Left ofs -> return $ Just (i + ofs)
279                    Right (x,y,z) -> do
280                        pokeByteOff dst di x
281                        when (nbBytes < 2) $ pokeByteOff dst (di+1) y
282                        when (nbBytes < 1) $ pokeByteOff dst (di+2) z
283                        return Nothing
284            | otherwise    = do
285                a <- peekByteOff src i
286                b <- peekByteOff src (i+1)
287                c <- peekByteOff src (i+2)
288                d <- peekByteOff src (i+3)
289
290                case decode4 a b c d of
291                    Left ofs      -> return $ Just (i + ofs)
292                    Right (x,y,z) -> do
293                        pokeByteOff dst di     x
294                        pokeByteOff dst (di+1) y
295                        pokeByteOff dst (di+2) z
296                        loop (di + 3) (i + 4)
297
298        decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8, Word8)
299        decode4 a b c d =
300            case (rset a, rset b, rset c, rset d) of
301                (0xff, _   , _   , _   ) -> Left 0
302                (_   , 0xff, _   , _   ) -> Left 1
303                (_   , _   , 0xff, _   ) -> Left 2
304                (_   , _   , _   , 0xff) -> Left 3
305                (ra  , rb  , rc  , rd  ) ->
306                    let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)
307                        y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2)
308                        z = (rc `unsafeShiftL` 6) .|. rd
309                     in Right (x,y,z)
310
311        rset :: Word8 -> Word8
312        rset (W8# w)
313            | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w))
314            | otherwise                        = 0xff
315
316        !rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
317                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
318                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\xff\x3f\
319                     \\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\
320                     \\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\
321                     \\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\
322                     \\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\
323                     \\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff\
324                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
325                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
326                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
327                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
328                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
329                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
330                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\
331                     \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"#
332