1-- | 2-- Module : Data.Memory.Encoding.Base64 3-- License : BSD-style 4-- Maintainer : Vincent Hanquez <vincent@snarc.org> 5-- Stability : experimental 6-- Portability : unknown 7-- 8-- Low-level Base64 encoding and decoding. 9-- 10-- If you just want to encode or decode some bytes, you probably want to use 11-- the "Data.ByteArray.Encoding" module. 12-- 13{-# LANGUAGE MagicHash #-} 14{-# LANGUAGE UnboxedTuples #-} 15{-# LANGUAGE OverloadedStrings #-} 16{-# LANGUAGE BangPatterns #-} 17{-# LANGUAGE Rank2Types #-} 18module Data.Memory.Encoding.Base64 19 ( toBase64 20 , toBase64URL 21 , toBase64OpenBSD 22 , unBase64Length 23 , unBase64LengthUnpadded 24 , fromBase64 25 , fromBase64URLUnpadded 26 , fromBase64OpenBSD 27 ) where 28 29import Control.Monad 30import Data.Memory.Internal.Compat 31import Data.Memory.Internal.CompatPrim 32import Data.Memory.Internal.Imports 33import Data.Bits ((.|.)) 34import GHC.Prim 35import GHC.Word 36import Foreign.Storable 37import Foreign.Ptr (Ptr) 38 39-- | Transform a number of bytes pointed by @src@ to base64 binary representation in @dst@ 40-- 41-- The destination memory need to be of correct size, otherwise it will lead 42-- to really bad things. 43toBase64 :: Ptr Word8 -> Ptr Word8 -> Int -> IO () 44toBase64 dst src len = toBase64Internal set dst src len True 45 where 46 !set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"# 47 48-- | Transform a number of bytes pointed by @src@ to, URL-safe base64 binary 49-- representation in @dst@. The result will be either padded or unpadded, 50-- depending on the boolean @padded@ argument. 51-- 52-- The destination memory need to be of correct size, otherwise it will lead 53-- to really bad things. 54toBase64URL :: Bool -> Ptr Word8 -> Ptr Word8 -> Int -> IO () 55toBase64URL padded dst src len = toBase64Internal set dst src len padded 56 where 57 !set = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"# 58 59toBase64OpenBSD :: Ptr Word8 -> Ptr Word8 -> Int -> IO () 60toBase64OpenBSD dst src len = toBase64Internal set dst src len False 61 where 62 !set = "./ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"# 63 64toBase64Internal :: Addr# -> Ptr Word8 -> Ptr Word8 -> Int -> Bool -> IO () 65toBase64Internal table dst src len padded = loop 0 0 66 where 67 eqChar = 0x3d :: Word8 68 69 loop i di 70 | i >= len = return () 71 | otherwise = do 72 a <- peekByteOff src i 73 b <- if i + 1 >= len then return 0 else peekByteOff src (i+1) 74 c <- if i + 2 >= len then return 0 else peekByteOff src (i+2) 75 76 let (w,x,y,z) = convert3 table a b c 77 78 pokeByteOff dst di w 79 pokeByteOff dst (di+1) x 80 81 if i + 1 < len 82 then 83 pokeByteOff dst (di+2) y 84 else 85 when padded (pokeByteOff dst (di+2) eqChar) 86 if i + 2 < len 87 then 88 pokeByteOff dst (di+3) z 89 else 90 when padded (pokeByteOff dst (di+3) eqChar) 91 92 loop (i+3) (di+4) 93 94convert3 :: Addr# -> Word8 -> Word8 -> Word8 -> (Word8, Word8, Word8, Word8) 95convert3 table (W8# a) (W8# b) (W8# c) = 96 let !w = narrow8Word# (uncheckedShiftRL# a 2#) 97 !x = or# (and# (uncheckedShiftL# a 4#) 0x30##) (uncheckedShiftRL# b 4#) 98 !y = or# (and# (uncheckedShiftL# b 2#) 0x3c##) (uncheckedShiftRL# c 6#) 99 !z = and# c 0x3f## 100 in (index w, index x, index y, index z) 101 where 102 index :: Word# -> Word8 103 index idx = W8# (indexWord8OffAddr# table (word2Int# idx)) 104 105-- | Get the length needed for the destination buffer for a base64 decoding. 106-- 107-- if the length is not a multiple of 4, Nothing is returned 108unBase64Length :: Ptr Word8 -> Int -> IO (Maybe Int) 109unBase64Length src len 110 | len < 1 = return $ Just 0 111 | (len `mod` 4) /= 0 = return Nothing 112 | otherwise = do 113 last1Byte <- peekByteOff src (len - 1) 114 last2Byte <- peekByteOff src (len - 2) 115 let dstLen = if last1Byte == eqAscii 116 then if last2Byte == eqAscii then 2 else 1 117 else 0 118 return $ Just $ (len `div` 4) * 3 - dstLen 119 where 120 eqAscii :: Word8 121 eqAscii = fromIntegral (fromEnum '=') 122 123-- | Get the length needed for the destination buffer for an 124-- <http://tools.ietf.org/html/rfc4648#section-3.2 unpadded> base64 decoding. 125-- 126-- If the length of the encoded string is a multiple of 4, plus one, Nothing is 127-- returned. Any other value can be valid without padding. 128unBase64LengthUnpadded :: Int -> Maybe Int 129unBase64LengthUnpadded len = case r of 130 0 -> Just (3*q) 131 2 -> Just (3*q + 1) 132 3 -> Just (3*q + 2) 133 _ -> Nothing 134 where (q, r) = len `divMod` 4 135 136fromBase64OpenBSD :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int) 137fromBase64OpenBSD dst src len = fromBase64Unpadded rsetOpenBSD dst src len 138 139fromBase64URLUnpadded :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int) 140fromBase64URLUnpadded dst src len = fromBase64Unpadded rsetURL dst src len 141 142fromBase64Unpadded :: (Word8 -> Word8) -> Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int) 143fromBase64Unpadded rset dst src len = loop 0 0 144 where loop di i 145 | i == len = return Nothing 146 | i == len - 1 = return Nothing -- Shouldn't happen if len is valid 147 | i == len - 2 = do 148 a <- peekByteOff src i 149 b <- peekByteOff src (i+1) 150 151 case decode2 a b of 152 Left ofs -> return $ Just (i + ofs) 153 Right x -> do 154 pokeByteOff dst di x 155 return Nothing 156 | i == len - 3 = do 157 a <- peekByteOff src i 158 b <- peekByteOff src (i+1) 159 c <- peekByteOff src (i+2) 160 161 case decode3 a b c of 162 Left ofs -> return $ Just (i + ofs) 163 Right (x,y) -> do 164 pokeByteOff dst di x 165 pokeByteOff dst (di+1) y 166 return Nothing 167 | otherwise = do 168 a <- peekByteOff src i 169 b <- peekByteOff src (i+1) 170 c <- peekByteOff src (i+2) 171 d <- peekByteOff src (i+3) 172 173 case decode4 a b c d of 174 Left ofs -> return $ Just (i + ofs) 175 Right (x,y,z) -> do 176 pokeByteOff dst di x 177 pokeByteOff dst (di+1) y 178 pokeByteOff dst (di+2) z 179 loop (di + 3) (i + 4) 180 181 decode2 :: Word8 -> Word8 -> Either Int Word8 182 decode2 a b = 183 case (rset a, rset b) of 184 (0xff, _ ) -> Left 0 185 (_ , 0xff) -> Left 1 186 (ra , rb ) -> Right ((ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4)) 187 188 decode3 :: Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8) 189 decode3 a b c = 190 case (rset a, rset b, rset c) of 191 (0xff, _ , _ ) -> Left 0 192 (_ , 0xff, _ ) -> Left 1 193 (_ , _ , 0xff) -> Left 2 194 (ra , rb , rc ) -> 195 let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4) 196 y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2) 197 in Right (x,y) 198 199 200 decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8, Word8) 201 decode4 a b c d = 202 case (rset a, rset b, rset c, rset d) of 203 (0xff, _ , _ , _ ) -> Left 0 204 (_ , 0xff, _ , _ ) -> Left 1 205 (_ , _ , 0xff, _ ) -> Left 2 206 (_ , _ , _ , 0xff) -> Left 3 207 (ra , rb , rc , rd ) -> 208 let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4) 209 y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2) 210 z = (rc `unsafeShiftL` 6) .|. rd 211 in Right (x,y,z) 212 213rsetURL :: Word8 -> Word8 214rsetURL (W8# w) 215 | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w)) 216 | otherwise = 0xff 217 where !rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 218 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 219 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\ 220 \\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\ 221 \\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\ 222 \\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\x3f\ 223 \\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\ 224 \\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff\ 225 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 226 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 227 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 228 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 229 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 230 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 231 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 232 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# 233 234rsetOpenBSD :: Word8 -> Word8 235rsetOpenBSD (W8# w) 236 | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w)) 237 | otherwise = 0xff 238 where !rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 239 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 240 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00\x01\ 241 \\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f\xff\xff\xff\xff\xff\xff\ 242 \\xff\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\ 243 \\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\xff\xff\xff\xff\xff\ 244 \\xff\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\ 245 \\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\x34\x35\xff\xff\xff\xff\xff\ 246 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 247 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 248 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 249 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 250 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 251 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 252 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 253 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# 254 255 256-- | convert from base64 in @src@ to binary in @dst@, using the number of bytes specified 257-- 258-- the user should use unBase64Length to compute the correct length, or check that 259-- the length specification is proper. no check is done here. 260fromBase64 :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int) 261fromBase64 dst src len 262 | len == 0 = return Nothing 263 | otherwise = loop 0 0 264 where loop di i 265 | i == (len-4) = do 266 a <- peekByteOff src i 267 b <- peekByteOff src (i+1) 268 c <- peekByteOff src (i+2) 269 d <- peekByteOff src (i+3) 270 271 let (nbBytes, c',d') = 272 case (c,d) of 273 (0x3d, 0x3d) -> (2, 0x30, 0x30) 274 (0x3d, _ ) -> (0, c, d) -- invalid: automatically 'c' will make it error out 275 (_ , 0x3d) -> (1, c, 0x30) 276 (_ , _ ) -> (0 :: Int, c, d) 277 case decode4 a b c' d' of 278 Left ofs -> return $ Just (i + ofs) 279 Right (x,y,z) -> do 280 pokeByteOff dst di x 281 when (nbBytes < 2) $ pokeByteOff dst (di+1) y 282 when (nbBytes < 1) $ pokeByteOff dst (di+2) z 283 return Nothing 284 | otherwise = do 285 a <- peekByteOff src i 286 b <- peekByteOff src (i+1) 287 c <- peekByteOff src (i+2) 288 d <- peekByteOff src (i+3) 289 290 case decode4 a b c d of 291 Left ofs -> return $ Just (i + ofs) 292 Right (x,y,z) -> do 293 pokeByteOff dst di x 294 pokeByteOff dst (di+1) y 295 pokeByteOff dst (di+2) z 296 loop (di + 3) (i + 4) 297 298 decode4 :: Word8 -> Word8 -> Word8 -> Word8 -> Either Int (Word8, Word8, Word8) 299 decode4 a b c d = 300 case (rset a, rset b, rset c, rset d) of 301 (0xff, _ , _ , _ ) -> Left 0 302 (_ , 0xff, _ , _ ) -> Left 1 303 (_ , _ , 0xff, _ ) -> Left 2 304 (_ , _ , _ , 0xff) -> Left 3 305 (ra , rb , rc , rd ) -> 306 let x = (ra `unsafeShiftL` 2) .|. (rb `unsafeShiftR` 4) 307 y = (rb `unsafeShiftL` 4) .|. (rc `unsafeShiftR` 2) 308 z = (rc `unsafeShiftL` 6) .|. rd 309 in Right (x,y,z) 310 311 rset :: Word8 -> Word8 312 rset (W8# w) 313 | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w)) 314 | otherwise = 0xff 315 316 !rsetTable = "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 317 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 318 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x3e\xff\xff\xff\x3f\ 319 \\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\xff\xff\xff\xff\xff\xff\ 320 \\xff\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\ 321 \\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xff\xff\xff\xff\xff\ 322 \\xff\x1a\x1b\x1c\x1d\x1e\x1f\x20\x21\x22\x23\x24\x25\x26\x27\x28\ 323 \\x29\x2a\x2b\x2c\x2d\x2e\x2f\x30\x31\x32\x33\xff\xff\xff\xff\xff\ 324 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 325 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 326 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 327 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 328 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 329 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 330 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\ 331 \\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"# 332