1-- | 2-- Module : Data.Memory.Encoding.Base32 3-- License : BSD-style 4-- Maintainer : Nicolas DI PRIMA <nicolas@di-prima.fr> 5-- Stability : experimental 6-- Portability : unknown 7-- 8-- Low-level Base32 encoding and decoding. 9-- 10-- If you just want to encode or decode some bytes, you probably want to use 11-- the "Data.ByteArray.Encoding" module. 12-- 13{-# LANGUAGE MagicHash #-} 14{-# LANGUAGE UnboxedTuples #-} 15{-# LANGUAGE OverloadedStrings #-} 16{-# LANGUAGE BangPatterns #-} 17{-# LANGUAGE Rank2Types #-} 18module Data.Memory.Encoding.Base32 19 ( toBase32 20 , unBase32Length 21 , fromBase32 22 ) where 23 24import Data.Memory.Internal.Compat 25import Data.Memory.Internal.CompatPrim 26import Data.Word 27import Data.Bits ((.|.)) 28import GHC.Prim 29import GHC.Word 30import Control.Monad 31import Foreign.Storable 32import Foreign.Ptr (Ptr) 33 34-- | Transform a number of bytes pointed by.@src in the base32 binary representation in @dst 35-- 36-- destination memory need to be of correct size, otherwise it will lead 37-- to really bad things. 38toBase32 :: Ptr Word8 -- ^ input 39 -> Ptr Word8 -- ^ output 40 -> Int -- ^ input len 41 -> IO () 42toBase32 dst src len = loop 0 0 43 where 44 eqChar :: Word8 45 eqChar = 0x3d 46 47 peekOrZero :: Int -> IO Word8 48 peekOrZero i 49 | i >= len = return 0 50 | otherwise = peekByteOff src i 51 52 pokeOrPadding :: Int -- for the test 53 -> Int -- src index 54 -> Word8 -- the value 55 -> IO () 56 pokeOrPadding i di v 57 | i < len = pokeByteOff dst di v 58 | otherwise = pokeByteOff dst di eqChar 59 60 loop :: Int -- index input 61 -> Int -- index output 62 -> IO () 63 loop i di 64 | i >= len = return () 65 | otherwise = do 66 i1 <- peekByteOff src i 67 i2 <- peekOrZero (i + 1) 68 i3 <- peekOrZero (i + 2) 69 i4 <- peekOrZero (i + 3) 70 i5 <- peekOrZero (i + 4) 71 72 let (o1,o2,o3,o4,o5,o6,o7,o8) = toBase32Per5Bytes (i1, i2, i3, i4, i5) 73 74 pokeByteOff dst di o1 75 pokeByteOff dst (di + 1) o2 76 pokeOrPadding (i + 1) (di + 2) o3 77 pokeOrPadding (i + 1) (di + 3) o4 78 pokeOrPadding (i + 2) (di + 4) o5 79 pokeOrPadding (i + 3) (di + 5) o6 80 pokeOrPadding (i + 3) (di + 6) o7 81 pokeOrPadding (i + 4) (di + 7) o8 82 83 loop (i+5) (di+8) 84 85toBase32Per5Bytes :: (Word8, Word8, Word8, Word8, Word8) 86 -> (Word8, Word8, Word8, Word8, Word8, Word8, Word8, Word8) 87toBase32Per5Bytes (W8# i1, W8# i2, W8# i3, W8# i4, W8# i5) = 88 (index o1, index o2, index o3, index o4, index o5, index o6, index o7, index o8) 89 where 90 -- 1111 1000 >> 3 91 !o1 = (uncheckedShiftRL# (and# i1 0xF8##) 3#) 92 -- 0000 0111 << 2 | 1100 0000 >> 6 93 !o2 = or# (uncheckedShiftL# (and# i1 0x07##) 2#) (uncheckedShiftRL# (and# i2 0xC0##) 6#) 94 -- 0011 1110 >> 1 95 !o3 = (uncheckedShiftRL# (and# i2 0x3E##) 1#) 96 -- 0000 0001 << 4 | 1111 0000 >> 4 97 !o4 = or# (uncheckedShiftL# (and# i2 0x01##) 4#) (uncheckedShiftRL# (and# i3 0xF0##) 4#) 98 -- 0000 1111 << 1 | 1000 0000 >> 7 99 !o5 = or# (uncheckedShiftL# (and# i3 0x0F##) 1#) (uncheckedShiftRL# (and# i4 0x80##) 7#) 100 -- 0111 1100 >> 2 101 !o6 = (uncheckedShiftRL# (and# i4 0x7C##) 2#) 102 -- 0000 0011 << 3 | 1110 0000 >> 5 103 !o7 = or# (uncheckedShiftL# (and# i4 0x03##) 3#) (uncheckedShiftRL# (and# i5 0xE0##) 5#) 104 -- 0001 1111 105 !o8 = ((and# i5 0x1F##)) 106 107 !set = "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567"# 108 109 index :: Word# -> Word8 110 index idx = W8# (indexWord8OffAddr# set (word2Int# idx)) 111 112-- | Get the length needed for the destination buffer for a base32 decoding. 113-- 114-- if the length is not a multiple of 8, Nothing is returned 115unBase32Length :: Ptr Word8 -> Int -> IO (Maybe Int) 116unBase32Length src len 117 | len < 1 = return $ Just 0 118 | (len `mod` 8) /= 0 = return Nothing 119 | otherwise = do 120 last1Byte <- peekByteOff src (len - 1) 121 last2Byte <- peekByteOff src (len - 2) 122 last3Byte <- peekByteOff src (len - 3) 123 last4Byte <- peekByteOff src (len - 4) 124 last5Byte <- peekByteOff src (len - 5) 125 last6Byte <- peekByteOff src (len - 6) 126 127 let dstLen = caseByte last1Byte last2Byte last3Byte last4Byte last5Byte last6Byte 128 return $ Just $ (len `div` 8) * 5 - dstLen 129 where 130 caseByte :: Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Word8 -> Int 131 caseByte last1 last2 last3 last4 last5 last6 132 | last6 == eqAscii = 4 133 | last5 == eqAscii = 3 -- error this padding is not expected (error will be detected in fromBase32) 134 | last4 == eqAscii = 3 135 | last3 == eqAscii = 2 136 | last2 == eqAscii = 1 -- error this padding is not expected (error will be detected in fromBase32) 137 | last1 == eqAscii = 1 138 | otherwise = 0 139 140 eqAscii :: Word8 141 eqAscii = 0x3D 142 143-- | convert from base32 in @src to binary in @dst, using the number of bytes specified 144-- 145-- the user should use unBase32Length to compute the correct length, or check that 146-- the length specification is proper. no check is done here. 147fromBase32 :: Ptr Word8 -> Ptr Word8 -> Int -> IO (Maybe Int) 148fromBase32 dst src len 149 | len == 0 = return Nothing 150 | otherwise = loop 0 0 151 where 152 loop :: Int -- the index dst 153 -> Int -- the index src 154 -> IO (Maybe Int) 155 loop di i 156 | i == (len - 8) = do 157 i1 <- peekByteOff src i 158 i2 <- peekByteOff src (i + 1) 159 i3 <- peekByteOff src (i + 2) 160 i4 <- peekByteOff src (i + 3) 161 i5 <- peekByteOff src (i + 4) 162 i6 <- peekByteOff src (i + 5) 163 i7 <- peekByteOff src (i + 6) 164 i8 <- peekByteOff src (i + 7) 165 166 let (nbBytes, i3', i4', i5', i6', i7', i8') = 167 case (i3, i4, i5, i6, i7, i8) of 168 (0x3D, 0x3D, 0x3D, 0x3D, 0x3D, 0x3D) -> (6, 0x41, 0x41, 0x41, 0x41, 0x41, 0x41) 169 (0x3D, _ , _ , _ , _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid 170 (_ , 0x3D, 0x3D, 0x3D, 0x3D, 0x3D) -> (5, i3 , 0x41, 0x41, 0x41, 0x41, 0x41) 171 (_ , 0x3D, _ , _ , _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid 172 (_ , _ , 0x3D, 0x3D, 0x3D, 0x3D) -> (4, i3 , i4 , 0x41, 0x41, 0x41, 0x41) 173 (_ , _ , 0x3D, _ , _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid 174 (_ , _ , _ , 0x3D, 0x3D, 0x3D) -> (3, i3 , i4 , i5 , 0x41, 0x41, 0x41) 175 (_ , _ , _ , 0x3D, _ , _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid 176 (_ , _ , _ , _ , 0x3D, 0x3D) -> (2, i3 , i4 , i5 , i6 , 0x41, 0x41) 177 (_ , _ , _ , _ , 0x3D, _ ) -> (0, i3, i4, i5, i6, i7, i8) -- invalid 178 (_ , _ , _ , _ , _ , 0x3D) -> (1, i3 , i4 , i5 , i6 , i7 , 0x41) 179 (_ , _ , _ , _ , _ , _ ) -> (0 :: Int, i3, i4, i5, i6, i7, i8) 180 181 case fromBase32Per8Bytes (i1, i2, i3', i4', i5', i6', i7', i8') of 182 Left ofs -> return $ Just (i + ofs) 183 Right (o1, o2, o3, o4, o5) -> do 184 pokeByteOff dst di o1 185 pokeByteOff dst (di+1) o2 186 when (nbBytes < 5) $ pokeByteOff dst (di+2) o3 187 when (nbBytes < 4) $ pokeByteOff dst (di+3) o4 188 when (nbBytes < 2) $ pokeByteOff dst (di+4) o5 189 return Nothing 190 191 | otherwise = do 192 i1 <- peekByteOff src i 193 i2 <- peekByteOff src (i + 1) 194 i3 <- peekByteOff src (i + 2) 195 i4 <- peekByteOff src (i + 3) 196 i5 <- peekByteOff src (i + 4) 197 i6 <- peekByteOff src (i + 5) 198 i7 <- peekByteOff src (i + 6) 199 i8 <- peekByteOff src (i + 7) 200 201 case fromBase32Per8Bytes (i1, i2, i3, i4, i5, i6, i7, i8) of 202 Left ofs -> return $ Just (i + ofs) 203 Right (o1, o2, o3, o4, o5) -> do 204 pokeByteOff dst di o1 205 pokeByteOff dst (di+1) o2 206 pokeByteOff dst (di+2) o3 207 pokeByteOff dst (di+3) o4 208 pokeByteOff dst (di+4) o5 209 loop (di+5) (i+8) 210 211fromBase32Per8Bytes :: (Word8, Word8, Word8, Word8, Word8, Word8, Word8, Word8) 212 -> Either Int (Word8, Word8, Word8, Word8, Word8) 213fromBase32Per8Bytes (i1, i2, i3, i4, i5, i6, i7, i8) = 214 case (rset i1, rset i2, rset i3, rset i4, rset i5, rset i6, rset i7, rset i8) of 215 (0xFF, _ , _ , _ , _ , _ , _ , _ ) -> Left 0 216 (_ , 0xFF, _ , _ , _ , _ , _ , _ ) -> Left 1 217 (_ , _ , 0xFF, _ , _ , _ , _ , _ ) -> Left 2 218 (_ , _ , _ , 0xFF, _ , _ , _ , _ ) -> Left 3 219 (_ , _ , _ , _ , 0xFF, _ , _ , _ ) -> Left 4 220 (_ , _ , _ , _ , _ , 0xFF, _ , _ ) -> Left 5 221 (_ , _ , _ , _ , _ , _ , 0xFF, _ ) -> Left 6 222 (_ , _ , _ , _ , _ , _ , _ , 0xFF) -> Left 7 223 (ri1 , ri2 , ri3 , ri4 , ri5 , ri6 , ri7 , ri8 ) -> 224 -- 0001 1111 << 3 | 0001 11xx >> 2 225 let o1 = (ri1 `unsafeShiftL` 3) .|. (ri2 `unsafeShiftR` 2) 226 -- 000x xx11 << 6 | 0001 1111 << 1 | 0001 xxxx >> 4 227 o2 = (ri2 `unsafeShiftL` 6) .|. (ri3 `unsafeShiftL` 1) .|. (ri4 `unsafeShiftR` 4) 228 -- 000x 1111 << 4 | 0001 111x >> 1 229 o3 = (ri4 `unsafeShiftL` 4) .|. (ri5 `unsafeShiftR` 1) 230 -- 000x xxx1 << 7 | 0001 1111 << 2 | 0001 1xxx >> 3 231 o4 = (ri5 `unsafeShiftL` 7) .|. (ri6 `unsafeShiftL` 2) .|. (ri7 `unsafeShiftR` 3) 232 -- 000x x111 << 5 | 0001 1111 233 o5 = (ri7 `unsafeShiftL` 5) .|. ri8 234 in Right (o1, o2, o3, o4, o5) 235 where 236 rset :: Word8 -> Word8 237 rset (W8# w) 238 | booleanPrim (w `leWord#` 0xff##) = W8# (indexWord8OffAddr# rsetTable (word2Int# w)) 239 | otherwise = 0xff 240 241 !rsetTable = "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 242 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 243 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 244 \\xFF\xFF\x1A\x1B\x1C\x1D\x1E\x1F\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 245 \\xFF\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0A\x0B\x0C\x0D\x0E\ 246 \\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\xFF\xFF\xFF\xFF\xFF\ 247 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 248 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 249 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 250 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 251 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 252 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 253 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 254 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 255 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\ 256 \\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF"# 257