1module Bytes.Encode exposing
2  ( encode
3  , Encoder
4  , signedInt8, signedInt16, signedInt32
5  , unsignedInt8, unsignedInt16, unsignedInt32
6  , float32, float64
7  , bytes
8  , string, getStringWidth
9  , sequence
10  )
11
12
13{-|
14
15# Encoders
16@docs encode, Encoder, sequence
17
18# Integers
19@docs signedInt8, signedInt16, signedInt32,
20  unsignedInt8, unsignedInt16, unsignedInt32
21
22# Floats
23@docs float32, float64
24
25# Bytes
26@docs bytes
27
28# Strings
29@docs string, getStringWidth
30
31-}
32
33
34import Bytes exposing (Bytes, Endianness(..))
35
36
37
38-- ENCODER
39
40
41{-| Describes how to generate a sequence of bytes.
42
43These encoders snap together with [`sequence`](#sequence) so you can start with
44small building blocks and put them together into a more complex encoding.
45-}
46type Encoder
47  = I8 Int
48  | I16 Endianness Int
49  | I32 Endianness Int
50  | U8 Int
51  | U16 Endianness Int
52  | U32 Endianness Int
53  | F32 Endianness Float
54  | F64 Endianness Float
55  | Seq Int (List Encoder)
56  | Utf8 Int String
57  | Bytes Bytes
58
59
60
61-- ENCODE
62
63
64{-| Turn an `Encoder` into `Bytes`.
65
66    encode (unsignedInt8     7) -- <07>
67    encode (unsignedInt16 BE 7) -- <0007>
68    encode (unsignedInt16 LE 7) -- <0700>
69
70The `encode` function is designed to minimize allocation. It figures out the
71exact width necessary to fit everything in `Bytes` and then generate that
72value directly. This is valuable when you are encoding more elaborate data:
73
74    import Bytes exposing (Endianness(..))
75    import Bytes.Encode as Encode
76
77    type alias Person =
78      { age : Int
79      , name : String
80      }
81
82    toEncoder : Person -> Encode.Encoder
83    toEncoder person =
84      Encode.sequence
85        [ Encode.unsignedInt16 BE person.age
86        , Encode.unsignedInt16 BE (Encode.getStringWidth person.name)
87        , Encode.string person.name
88        ]
89
90    -- encode (toEncoder (Person 33 "Tom")) == <00210003546F6D>
91
92Did you know it was going to be seven bytes? How about when you have a hundred
93people to serialize? And when some have Japanese and Norwegian names? Having
94this intermediate `Encoder` can help reduce allocation quite a lot!
95-}
96encode : Encoder -> Bytes
97encode =
98  Elm.Kernel.Bytes.encode
99
100
101
102-- INTEGERS
103
104
105{-| Encode integers from `-128` to `127` in one byte.
106-}
107signedInt8 : Int -> Encoder
108signedInt8 =
109  I8
110
111
112{-| Encode integers from `-32768` to `32767` in two bytes.
113-}
114signedInt16 : Endianness -> Int -> Encoder
115signedInt16 =
116  I16
117
118
119{-| Encode integers from `-2147483648` to `2147483647` in four bytes.
120-}
121signedInt32 : Endianness -> Int -> Encoder
122signedInt32 =
123  I32
124
125
126{-| Encode integers from `0` to `255` in one byte.
127-}
128unsignedInt8 : Int -> Encoder
129unsignedInt8 =
130  U8
131
132
133{-| Encode integers from `0` to `65535` in two bytes.
134-}
135unsignedInt16 : Endianness -> Int -> Encoder
136unsignedInt16 =
137  U16
138
139
140{-| Encode integers from `0` to `4294967295` in four bytes.
141-}
142unsignedInt32 : Endianness -> Int -> Encoder
143unsignedInt32 =
144  U32
145
146
147
148-- FLOATS
149
150
151{-| Encode 32-bit floating point numbers in four bytes.
152-}
153float32 : Endianness -> Float -> Encoder
154float32 =
155  F32
156
157
158{-| Encode 64-bit floating point numbers in eight bytes.
159-}
160float64 : Endianness -> Float -> Encoder
161float64 =
162  F64
163
164
165
166-- BYTES
167
168
169{-| Copy bytes directly into the new `Bytes` sequence. This does not record the
170width though! You usually want to say something like this:
171
172    import Bytes exposing (Bytes, Endianness(..))
173    import Bytes.Encode as Encode
174
175    png : Bytes -> Encode.Encoder
176    png imageData =
177      Encode.sequence
178        [ Encode.unsignedInt32 BE (Bytes.width imageData)
179        , Encode.bytes imageData
180        ]
181
182This allows you to represent the width however is necessary for your protocol.
183For example, you can use [Base 128 Varints][pb] for ProtoBuf,
184[Variable-Length Integers][sql] for SQLite, or whatever else they dream up.
185
186[pb]: https://developers.google.com/protocol-buffers/docs/encoding#varints
187[sql]: https://www.sqlite.org/src4/doc/trunk/www/varint.wiki
188-}
189bytes : Bytes -> Encoder
190bytes =
191  Bytes
192
193
194
195-- STRINGS
196
197
198{-| Encode a `String` as a bunch of UTF-8 bytes.
199
200    encode (string "$20")   -- <24 32 30>
201    encode (string "£20")   -- <C2A3 32 30>
202    encode (string "€20")   -- <E282AC 32 30>
203    encode (string "bread") -- <62 72 65 61 64>
204    encode (string "brød")  -- <62 72 C3B8 64>
205
206Some characters take one byte, while others can take up to four. Read more
207about [UTF-8](https://en.wikipedia.org/wiki/UTF-8) to learn the details!
208
209But if you just encode UTF-8 directly, how can you know when you get to the end
210of the string when you are decoding? So most protocols have an integer saying
211how many bytes follow, like this:
212
213    sizedString : String -> Encoder
214    sizedString str =
215      sequence
216        [ unsignedInt32 BE (getStringWidth str)
217        , string str
218        ]
219
220You can choose whatever representation you want for the width, which is helpful
221because many protocols use different integer representations to save space. For
222example:
223
224- ProtoBuf uses [Base 128 Varints](https://developers.google.com/protocol-buffers/docs/encoding#varints)
225- SQLite uses [Variable-Length Integers](https://www.sqlite.org/src4/doc/trunk/www/varint.wiki)
226
227In both cases, small numbers can fit just one byte, saving some space. (The
228SQLite encoding has the benefit that the first byte tells you how long the
229number is, making it faster to decode.) In both cases, it is sort of tricky
230to make negative numbers small.
231-}
232string : String -> Encoder
233string str =
234  Utf8 (Elm.Kernel.Bytes.getStringWidth str) str
235
236
237{-| Get the width of a `String` in UTF-8 bytes.
238
239    getStringWidth "$20"   == 3
240    getStringWidth "£20"   == 4
241    getStringWidth "€20"   == 5
242    getStringWidth "bread" == 5
243    getStringWidth "brød"  == 5
244
245Most protocols need this number to come directly before a chunk of UTF-8 bytes
246as a way to know where the string ends!
247
248Read more about how UTF-8 works [here](https://en.wikipedia.org/wiki/UTF-8).
249-}
250getStringWidth : String -> Int
251getStringWidth =
252  Elm.Kernel.Bytes.getStringWidth
253
254
255
256-- SEQUENCE
257
258
259{-| Put together a bunch of builders. So if you wanted to encode three `Float`
260values for the position of a ball in 3D space, you could say:
261
262    import Bytes exposing (Endianness(..))
263    import Bytes.Encode as Encode
264
265    type alias Ball = { x : Float, y : Float, z : Float }
266
267    ball : Ball -> Encode.Encoder
268    ball {x,y,z} =
269      Encode.sequence
270        [ Encode.float32 BE x
271        , Encode.float32 BE y
272        , Encode.float32 BE z
273        ]
274
275-}
276sequence : List Encoder -> Encoder
277sequence builders =
278  Seq (getWidths 0 builders) builders
279
280
281
282-- WRITE
283
284
285write : Encoder -> Bytes -> Int -> Int
286write builder mb offset =
287  case builder of
288    I8    n -> Elm.Kernel.Bytes.write_i8  mb offset n
289    I16 e n -> Elm.Kernel.Bytes.write_i16 mb offset n (e == LE)
290    I32 e n -> Elm.Kernel.Bytes.write_i32 mb offset n (e == LE)
291    U8    n -> Elm.Kernel.Bytes.write_u8  mb offset n
292    U16 e n -> Elm.Kernel.Bytes.write_u16 mb offset n (e == LE)
293    U32 e n -> Elm.Kernel.Bytes.write_u32 mb offset n (e == LE)
294    F32 e n -> Elm.Kernel.Bytes.write_f32 mb offset n (e == LE)
295    F64 e n -> Elm.Kernel.Bytes.write_f64 mb offset n (e == LE)
296    Seq _ bs -> writeSequence bs mb offset
297    Utf8 _ s -> Elm.Kernel.Bytes.write_string mb offset s
298    Bytes bs -> Elm.Kernel.Bytes.write_bytes mb offset bs
299
300
301writeSequence : List Encoder -> Bytes -> Int -> Int
302writeSequence builders mb offset =
303  case builders of
304    [] ->
305      offset
306
307    b :: bs ->
308      writeSequence bs mb (write b mb offset)
309
310
311
312-- WIDTHS
313
314
315getWidth : Encoder -> Int
316getWidth builder =
317  case builder of
318    I8    _ -> 1
319    I16 _ _ -> 2
320    I32 _ _ -> 4
321    U8    _ -> 1
322    U16 _ _ -> 2
323    U32 _ _ -> 4
324    F32 _ _ -> 4
325    F64 _ _ -> 8
326    Seq w _ -> w
327    Utf8 w _ -> w
328    Bytes bs -> Elm.Kernel.Bytes.width bs
329
330
331getWidths : Int -> List Encoder -> Int
332getWidths width builders =
333  case builders of
334    [] ->
335      width
336
337    b :: bs ->
338      getWidths (width + getWidth b) bs
339