1module Bytes.Encode exposing 2 ( encode 3 , Encoder 4 , signedInt8, signedInt16, signedInt32 5 , unsignedInt8, unsignedInt16, unsignedInt32 6 , float32, float64 7 , bytes 8 , string, getStringWidth 9 , sequence 10 ) 11 12 13{-| 14 15# Encoders 16@docs encode, Encoder, sequence 17 18# Integers 19@docs signedInt8, signedInt16, signedInt32, 20 unsignedInt8, unsignedInt16, unsignedInt32 21 22# Floats 23@docs float32, float64 24 25# Bytes 26@docs bytes 27 28# Strings 29@docs string, getStringWidth 30 31-} 32 33 34import Bytes exposing (Bytes, Endianness(..)) 35 36 37 38-- ENCODER 39 40 41{-| Describes how to generate a sequence of bytes. 42 43These encoders snap together with [`sequence`](#sequence) so you can start with 44small building blocks and put them together into a more complex encoding. 45-} 46type Encoder 47 = I8 Int 48 | I16 Endianness Int 49 | I32 Endianness Int 50 | U8 Int 51 | U16 Endianness Int 52 | U32 Endianness Int 53 | F32 Endianness Float 54 | F64 Endianness Float 55 | Seq Int (List Encoder) 56 | Utf8 Int String 57 | Bytes Bytes 58 59 60 61-- ENCODE 62 63 64{-| Turn an `Encoder` into `Bytes`. 65 66 encode (unsignedInt8 7) -- <07> 67 encode (unsignedInt16 BE 7) -- <0007> 68 encode (unsignedInt16 LE 7) -- <0700> 69 70The `encode` function is designed to minimize allocation. It figures out the 71exact width necessary to fit everything in `Bytes` and then generate that 72value directly. This is valuable when you are encoding more elaborate data: 73 74 import Bytes exposing (Endianness(..)) 75 import Bytes.Encode as Encode 76 77 type alias Person = 78 { age : Int 79 , name : String 80 } 81 82 toEncoder : Person -> Encode.Encoder 83 toEncoder person = 84 Encode.sequence 85 [ Encode.unsignedInt16 BE person.age 86 , Encode.unsignedInt16 BE (Encode.getStringWidth person.name) 87 , Encode.string person.name 88 ] 89 90 -- encode (toEncoder (Person 33 "Tom")) == <00210003546F6D> 91 92Did you know it was going to be seven bytes? How about when you have a hundred 93people to serialize? And when some have Japanese and Norwegian names? Having 94this intermediate `Encoder` can help reduce allocation quite a lot! 95-} 96encode : Encoder -> Bytes 97encode = 98 Elm.Kernel.Bytes.encode 99 100 101 102-- INTEGERS 103 104 105{-| Encode integers from `-128` to `127` in one byte. 106-} 107signedInt8 : Int -> Encoder 108signedInt8 = 109 I8 110 111 112{-| Encode integers from `-32768` to `32767` in two bytes. 113-} 114signedInt16 : Endianness -> Int -> Encoder 115signedInt16 = 116 I16 117 118 119{-| Encode integers from `-2147483648` to `2147483647` in four bytes. 120-} 121signedInt32 : Endianness -> Int -> Encoder 122signedInt32 = 123 I32 124 125 126{-| Encode integers from `0` to `255` in one byte. 127-} 128unsignedInt8 : Int -> Encoder 129unsignedInt8 = 130 U8 131 132 133{-| Encode integers from `0` to `65535` in two bytes. 134-} 135unsignedInt16 : Endianness -> Int -> Encoder 136unsignedInt16 = 137 U16 138 139 140{-| Encode integers from `0` to `4294967295` in four bytes. 141-} 142unsignedInt32 : Endianness -> Int -> Encoder 143unsignedInt32 = 144 U32 145 146 147 148-- FLOATS 149 150 151{-| Encode 32-bit floating point numbers in four bytes. 152-} 153float32 : Endianness -> Float -> Encoder 154float32 = 155 F32 156 157 158{-| Encode 64-bit floating point numbers in eight bytes. 159-} 160float64 : Endianness -> Float -> Encoder 161float64 = 162 F64 163 164 165 166-- BYTES 167 168 169{-| Copy bytes directly into the new `Bytes` sequence. This does not record the 170width though! You usually want to say something like this: 171 172 import Bytes exposing (Bytes, Endianness(..)) 173 import Bytes.Encode as Encode 174 175 png : Bytes -> Encode.Encoder 176 png imageData = 177 Encode.sequence 178 [ Encode.unsignedInt32 BE (Bytes.width imageData) 179 , Encode.bytes imageData 180 ] 181 182This allows you to represent the width however is necessary for your protocol. 183For example, you can use [Base 128 Varints][pb] for ProtoBuf, 184[Variable-Length Integers][sql] for SQLite, or whatever else they dream up. 185 186[pb]: https://developers.google.com/protocol-buffers/docs/encoding#varints 187[sql]: https://www.sqlite.org/src4/doc/trunk/www/varint.wiki 188-} 189bytes : Bytes -> Encoder 190bytes = 191 Bytes 192 193 194 195-- STRINGS 196 197 198{-| Encode a `String` as a bunch of UTF-8 bytes. 199 200 encode (string "$20") -- <24 32 30> 201 encode (string "£20") -- <C2A3 32 30> 202 encode (string "€20") -- <E282AC 32 30> 203 encode (string "bread") -- <62 72 65 61 64> 204 encode (string "brød") -- <62 72 C3B8 64> 205 206Some characters take one byte, while others can take up to four. Read more 207about [UTF-8](https://en.wikipedia.org/wiki/UTF-8) to learn the details! 208 209But if you just encode UTF-8 directly, how can you know when you get to the end 210of the string when you are decoding? So most protocols have an integer saying 211how many bytes follow, like this: 212 213 sizedString : String -> Encoder 214 sizedString str = 215 sequence 216 [ unsignedInt32 BE (getStringWidth str) 217 , string str 218 ] 219 220You can choose whatever representation you want for the width, which is helpful 221because many protocols use different integer representations to save space. For 222example: 223 224- ProtoBuf uses [Base 128 Varints](https://developers.google.com/protocol-buffers/docs/encoding#varints) 225- SQLite uses [Variable-Length Integers](https://www.sqlite.org/src4/doc/trunk/www/varint.wiki) 226 227In both cases, small numbers can fit just one byte, saving some space. (The 228SQLite encoding has the benefit that the first byte tells you how long the 229number is, making it faster to decode.) In both cases, it is sort of tricky 230to make negative numbers small. 231-} 232string : String -> Encoder 233string str = 234 Utf8 (Elm.Kernel.Bytes.getStringWidth str) str 235 236 237{-| Get the width of a `String` in UTF-8 bytes. 238 239 getStringWidth "$20" == 3 240 getStringWidth "£20" == 4 241 getStringWidth "€20" == 5 242 getStringWidth "bread" == 5 243 getStringWidth "brød" == 5 244 245Most protocols need this number to come directly before a chunk of UTF-8 bytes 246as a way to know where the string ends! 247 248Read more about how UTF-8 works [here](https://en.wikipedia.org/wiki/UTF-8). 249-} 250getStringWidth : String -> Int 251getStringWidth = 252 Elm.Kernel.Bytes.getStringWidth 253 254 255 256-- SEQUENCE 257 258 259{-| Put together a bunch of builders. So if you wanted to encode three `Float` 260values for the position of a ball in 3D space, you could say: 261 262 import Bytes exposing (Endianness(..)) 263 import Bytes.Encode as Encode 264 265 type alias Ball = { x : Float, y : Float, z : Float } 266 267 ball : Ball -> Encode.Encoder 268 ball {x,y,z} = 269 Encode.sequence 270 [ Encode.float32 BE x 271 , Encode.float32 BE y 272 , Encode.float32 BE z 273 ] 274 275-} 276sequence : List Encoder -> Encoder 277sequence builders = 278 Seq (getWidths 0 builders) builders 279 280 281 282-- WRITE 283 284 285write : Encoder -> Bytes -> Int -> Int 286write builder mb offset = 287 case builder of 288 I8 n -> Elm.Kernel.Bytes.write_i8 mb offset n 289 I16 e n -> Elm.Kernel.Bytes.write_i16 mb offset n (e == LE) 290 I32 e n -> Elm.Kernel.Bytes.write_i32 mb offset n (e == LE) 291 U8 n -> Elm.Kernel.Bytes.write_u8 mb offset n 292 U16 e n -> Elm.Kernel.Bytes.write_u16 mb offset n (e == LE) 293 U32 e n -> Elm.Kernel.Bytes.write_u32 mb offset n (e == LE) 294 F32 e n -> Elm.Kernel.Bytes.write_f32 mb offset n (e == LE) 295 F64 e n -> Elm.Kernel.Bytes.write_f64 mb offset n (e == LE) 296 Seq _ bs -> writeSequence bs mb offset 297 Utf8 _ s -> Elm.Kernel.Bytes.write_string mb offset s 298 Bytes bs -> Elm.Kernel.Bytes.write_bytes mb offset bs 299 300 301writeSequence : List Encoder -> Bytes -> Int -> Int 302writeSequence builders mb offset = 303 case builders of 304 [] -> 305 offset 306 307 b :: bs -> 308 writeSequence bs mb (write b mb offset) 309 310 311 312-- WIDTHS 313 314 315getWidth : Encoder -> Int 316getWidth builder = 317 case builder of 318 I8 _ -> 1 319 I16 _ _ -> 2 320 I32 _ _ -> 4 321 U8 _ -> 1 322 U16 _ _ -> 2 323 U32 _ _ -> 4 324 F32 _ _ -> 4 325 F64 _ _ -> 8 326 Seq w _ -> w 327 Utf8 w _ -> w 328 Bytes bs -> Elm.Kernel.Bytes.width bs 329 330 331getWidths : Int -> List Encoder -> Int 332getWidths width builders = 333 case builders of 334 [] -> 335 width 336 337 b :: bs -> 338 getWidths (width + getWidth b) bs 339