1%%
2%% %CopyrightBegin%
3%%
4%% Copyright Ericsson AB 2007-2017. All Rights Reserved.
5%%
6%% Licensed under the Apache License, Version 2.0 (the "License");
7%% you may not use this file except in compliance with the License.
8%% You may obtain a copy of the License at
9%%
10%%     http://www.apache.org/licenses/LICENSE-2.0
11%%
12%% Unless required by applicable law or agreed to in writing, software
13%% distributed under the License is distributed on an "AS IS" BASIS,
14%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15%% See the License for the specific language governing permissions and
16%% limitations under the License.
17%%
18%% %CopyrightEnd%
19%%
20%% Description: Implements base 64 encode and decode. See RFC4648.
21
22-module(base64).
23
24-export([encode/1, decode/1, mime_decode/1,
25	 encode_to_string/1, decode_to_string/1, mime_decode_to_string/1]).
26
27
28%% RFC 4648: Base 64 Encoding alphabet
29-type base64_alphabet() :: $A..$Z | $a..$z | $0..$9 | $+ | $/ | $=.
30
31%% The following type is a subtype of string() for return values
32%% of encoding functions.
33-type base64_string() :: [base64_alphabet()].
34-type base64_binary() :: binary().
35
36%% Decoded sequence of octets
37-type byte_string() :: [byte()].
38
39-spec encode_to_string(Data) -> Base64String when
40      Data :: byte_string() | binary(),
41      Base64String :: base64_string().
42
43encode_to_string(Bin) when is_binary(Bin) ->
44    encode_to_string(binary_to_list(Bin));
45encode_to_string(List) when is_list(List) ->
46    encode_list_to_string(List).
47
48-spec encode(Data) -> Base64 when
49      Data :: byte_string() | binary(),
50      Base64 :: base64_binary().
51
52encode(Bin) when is_binary(Bin) ->
53    encode_binary(Bin, <<>>);
54encode(List) when is_list(List) ->
55    encode_list(List, <<>>).
56
57encode_list_to_string([]) ->
58    [];
59encode_list_to_string([B1]) ->
60    [b64e(B1 bsr 2),
61     b64e((B1 band 3) bsl 4), $=, $=];
62encode_list_to_string([B1,B2]) ->
63    [b64e(B1 bsr 2),
64     b64e(((B1 band 3) bsl 4) bor (B2 bsr 4)),
65     b64e((B2 band 15) bsl 2), $=];
66encode_list_to_string([B1,B2,B3|Ls]) ->
67    BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
68    [b64e(BB bsr 18),
69     b64e((BB bsr 12) band 63),
70     b64e((BB bsr 6) band 63),
71     b64e(BB band 63) | encode_list_to_string(Ls)].
72
73encode_binary(<<>>, A) ->
74    A;
75encode_binary(<<B1:8>>, A) ->
76    <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
77encode_binary(<<B1:8, B2:8>>, A) ->
78    <<A/bits,(b64e(B1 bsr 2)):8,
79      (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
80      (b64e((B2 band 15) bsl 2)):8, $=:8>>;
81encode_binary(<<B1:8, B2:8, B3:8, Ls/bits>>, A) ->
82    BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
83    encode_binary(Ls,
84                  <<A/bits,(b64e(BB bsr 18)):8,
85                    (b64e((BB bsr 12) band 63)):8,
86                    (b64e((BB bsr 6) band 63)):8,
87                    (b64e(BB band 63)):8>>).
88
89encode_list([], A) ->
90    A;
91encode_list([B1], A) ->
92    <<A/bits,(b64e(B1 bsr 2)):8,(b64e((B1 band 3) bsl 4)):8,$=:8,$=:8>>;
93encode_list([B1,B2], A) ->
94    <<A/bits,(b64e(B1 bsr 2)):8,
95      (b64e(((B1 band 3) bsl 4) bor (B2 bsr 4))):8,
96      (b64e((B2 band 15) bsl 2)):8, $=:8>>;
97encode_list([B1,B2,B3|Ls], A) ->
98    BB = (B1 bsl 16) bor (B2 bsl 8) bor B3,
99    encode_list(Ls,
100                <<A/bits,(b64e(BB bsr 18)):8,
101                  (b64e((BB bsr 12) band 63)):8,
102                  (b64e((BB bsr 6) band 63)):8,
103                  (b64e(BB band 63)):8>>).
104
105%% mime_decode strips away all characters not Base64 before
106%% converting, whereas decode crashes if an illegal character is found
107
108-spec decode(Base64) -> Data when
109      Base64 :: base64_string() | base64_binary(),
110      Data :: binary().
111
112decode(Bin) when is_binary(Bin) ->
113    decode_binary(Bin, <<>>);
114decode(List) when is_list(List) ->
115    decode_list(List, <<>>).
116
117-spec mime_decode(Base64) -> Data when
118      Base64 :: base64_string() | base64_binary(),
119      Data :: binary().
120
121mime_decode(Bin) when is_binary(Bin) ->
122    mime_decode_binary(Bin, <<>>);
123mime_decode(List) when is_list(List) ->
124    mime_decode_list(List, <<>>).
125
126%% mime_decode_to_string strips away all characters not Base64 before
127%% converting, whereas decode_to_string crashes if an illegal
128%% character is found
129
130-spec decode_to_string(Base64) -> DataString when
131      Base64 :: base64_string() | base64_binary(),
132      DataString :: byte_string().
133
134decode_to_string(Bin) when is_binary(Bin) ->
135    decode_to_string(binary_to_list(Bin));
136decode_to_string(List) when is_list(List) ->
137    decode_list_to_string(List).
138
139-spec mime_decode_to_string(Base64) -> DataString when
140      Base64 :: base64_string() | base64_binary(),
141      DataString :: byte_string().
142
143mime_decode_to_string(Bin) when is_binary(Bin) ->
144    mime_decode_to_string(binary_to_list(Bin));
145mime_decode_to_string(List) when is_list(List) ->
146    mime_decode_list_to_string(List).
147
148%% Skipping pad character if not at end of string. Also liberal about
149%% excess padding and skipping of other illegal (non-base64 alphabet)
150%% characters. See section 3.3 of RFC4648
151mime_decode_list([0 | Cs], A) ->
152    mime_decode_list(Cs, A);
153mime_decode_list([C1 | Cs], A) ->
154    case b64d(C1) of
155        B1 when is_integer(B1) -> mime_decode_list(Cs, A, B1);
156        _ -> mime_decode_list(Cs, A)  % eq is padding
157    end;
158mime_decode_list([], A) ->
159    A.
160
161mime_decode_list([0 | Cs], A, B1) ->
162    mime_decode_list(Cs, A, B1);
163mime_decode_list([C2 | Cs], A, B1) ->
164    case b64d(C2) of
165        B2 when is_integer(B2) ->
166            mime_decode_list(Cs, A, B1, B2);
167        _ -> mime_decode_list(Cs, A, B1) % eq is padding
168    end.
169
170mime_decode_list([0 | Cs], A, B1, B2) ->
171    mime_decode_list(Cs, A, B1, B2);
172mime_decode_list([C3 | Cs], A, B1, B2) ->
173    case b64d(C3) of
174        B3 when is_integer(B3) ->
175            mime_decode_list(Cs, A, B1, B2, B3);
176        eq=B3 ->
177            mime_decode_list_after_eq(Cs, A, B1, B2, B3);
178        _ -> mime_decode_list(Cs, A, B1, B2)
179    end.
180
181mime_decode_list([0 | Cs], A, B1, B2, B3) ->
182    mime_decode_list(Cs, A, B1, B2, B3);
183mime_decode_list([C4 | Cs], A, B1, B2, B3) ->
184    case b64d(C4) of
185        B4 when is_integer(B4) ->
186            mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
187        eq ->
188            mime_decode_list_after_eq(Cs, A, B1, B2, B3);
189        _ -> mime_decode_list(Cs, A, B1, B2, B3)
190    end.
191
192mime_decode_list_after_eq([0 | Cs], A, B1, B2, B3) ->
193    mime_decode_list_after_eq(Cs, A, B1, B2, B3);
194mime_decode_list_after_eq([C | Cs], A, B1, B2, B3) ->
195    case b64d(C) of
196        B when is_integer(B) ->
197            %% More valid data, skip the eq as invalid
198            case B3 of
199                eq -> mime_decode_list(Cs, A, B1, B2, B);
200                _ -> mime_decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
201            end;
202        _ -> mime_decode_list_after_eq(Cs, A, B1, B2, B3)
203    end;
204mime_decode_list_after_eq([], A, B1, B2, eq) ->
205    <<A/bits,B1:6,(B2 bsr 4):2>>;
206mime_decode_list_after_eq([], A, B1, B2, B3) ->
207    <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>.
208
209mime_decode_binary(<<0:8, Cs/bits>>, A) ->
210    mime_decode_binary(Cs, A);
211mime_decode_binary(<<C1:8, Cs/bits>>, A) ->
212    case b64d(C1) of
213        B1 when is_integer(B1) -> mime_decode_binary(Cs, A, B1);
214        _ -> mime_decode_binary(Cs, A)  % eq is padding
215    end;
216mime_decode_binary(<<>>, A) ->
217    A.
218
219mime_decode_binary(<<0:8, Cs/bits>>, A, B1) ->
220    mime_decode_binary(Cs, A, B1);
221mime_decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
222    case b64d(C2) of
223        B2 when is_integer(B2) ->
224            mime_decode_binary(Cs, A, B1, B2);
225        _ -> mime_decode_binary(Cs, A, B1) % eq is padding
226    end.
227
228mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2) ->
229    mime_decode_binary(Cs, A, B1, B2);
230mime_decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
231    case b64d(C3) of
232        B3 when is_integer(B3) ->
233            mime_decode_binary(Cs, A, B1, B2, B3);
234        eq=B3 ->
235            mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
236        _ -> mime_decode_binary(Cs, A, B1, B2)
237    end.
238
239mime_decode_binary(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
240    mime_decode_binary(Cs, A, B1, B2, B3);
241mime_decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
242    case b64d(C4) of
243        B4 when is_integer(B4) ->
244            mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>);
245        eq ->
246            mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
247        _ -> mime_decode_binary(Cs, A, B1, B2, B3)
248    end.
249
250mime_decode_binary_after_eq(<<0:8, Cs/bits>>, A, B1, B2, B3) ->
251    mime_decode_binary_after_eq(Cs, A, B1, B2, B3);
252mime_decode_binary_after_eq(<<C:8, Cs/bits>>, A, B1, B2, B3) ->
253    case b64d(C) of
254        B when is_integer(B) ->
255            %% More valid data, skip the eq as invalid
256            case B3 of
257                eq -> mime_decode_binary(Cs, A, B1, B2, B);
258                _ -> mime_decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B:6>>)
259            end;
260        _ -> mime_decode_binary_after_eq(Cs, A, B1, B2, B3)
261    end;
262mime_decode_binary_after_eq(<<>>, A, B1, B2, eq) ->
263    <<A/bits,B1:6,(B2 bsr 4):2>>;
264mime_decode_binary_after_eq(<<>>, A, B1, B2, B3) ->
265    <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>.
266
267mime_decode_list_to_string([0 | Cs]) ->
268    mime_decode_list_to_string(Cs);
269mime_decode_list_to_string([C1 | Cs]) ->
270    case b64d(C1) of
271        B1 when is_integer(B1) -> mime_decode_list_to_string(Cs, B1);
272        _ -> mime_decode_list_to_string(Cs) % eq is padding
273    end;
274mime_decode_list_to_string([]) ->
275    [].
276
277mime_decode_list_to_string([0 | Cs], B1) ->
278    mime_decode_list_to_string(Cs, B1);
279mime_decode_list_to_string([C2 | Cs], B1) ->
280    case b64d(C2) of
281        B2 when is_integer(B2) ->
282            mime_decode_list_to_string(Cs, B1, B2);
283        _ -> mime_decode_list_to_string(Cs, B1) % eq is padding
284    end.
285
286mime_decode_list_to_string([0 | Cs], B1, B2) ->
287    mime_decode_list_to_string(Cs, B1, B2);
288mime_decode_list_to_string([C3 | Cs], B1, B2) ->
289    case b64d(C3) of
290        B3 when is_integer(B3) ->
291            mime_decode_list_to_string(Cs, B1, B2, B3);
292        eq=B3 -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
293        _ -> mime_decode_list_to_string(Cs, B1, B2)
294    end.
295
296mime_decode_list_to_string([0 | Cs], B1, B2, B3) ->
297    mime_decode_list_to_string(Cs, B1, B2, B3);
298mime_decode_list_to_string([C4 | Cs], B1, B2, B3) ->
299    case b64d(C4) of
300        B4 when is_integer(B4) ->
301            Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
302            Octet1 = Bits4x6 bsr 16,
303            Octet2 = (Bits4x6 bsr 8) band 16#ff,
304            Octet3 = Bits4x6 band 16#ff,
305            [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)];
306        eq ->
307            mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
308        _ -> mime_decode_list_to_string(Cs, B1, B2, B3)
309    end.
310
311mime_decode_list_to_string_after_eq([0 | Cs], B1, B2, B3) ->
312    mime_decode_list_to_string_after_eq(Cs, B1, B2, B3);
313mime_decode_list_to_string_after_eq([C | Cs], B1, B2, B3) ->
314    case b64d(C) of
315        B when is_integer(B) ->
316            %% More valid data, skip the eq as invalid
317            case B3 of
318                eq -> mime_decode_list_to_string(Cs, B1, B2, B);
319                _ ->
320                    Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B,
321                    Octet1 = Bits4x6 bsr 16,
322                    Octet2 = (Bits4x6 bsr 8) band 16#ff,
323                    Octet3 = Bits4x6 band 16#ff,
324                    [Octet1, Octet2, Octet3 | mime_decode_list_to_string(Cs)]
325            end;
326        _ -> mime_decode_list_to_string_after_eq(Cs, B1, B2, B3)
327    end;
328mime_decode_list_to_string_after_eq([], B1, B2, eq) ->
329    binary_to_list(<<B1:6,(B2 bsr 4):2>>);
330mime_decode_list_to_string_after_eq([], B1, B2, B3) ->
331    binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>).
332
333decode_list([C1 | Cs], A) ->
334    case b64d(C1) of
335        ws -> decode_list(Cs, A);
336        B1 -> decode_list(Cs, A, B1)
337    end;
338decode_list([], A) ->
339    A.
340
341decode_list([C2 | Cs], A, B1) ->
342    case b64d(C2) of
343        ws -> decode_list(Cs, A, B1);
344        B2 -> decode_list(Cs, A, B1, B2)
345    end.
346
347decode_list([C3 | Cs], A, B1, B2) ->
348    case b64d(C3) of
349        ws -> decode_list(Cs, A, B1, B2);
350        B3 -> decode_list(Cs, A, B1, B2, B3)
351    end.
352
353decode_list([C4 | Cs], A, B1, B2, B3) ->
354    case b64d(C4) of
355        ws                -> decode_list(Cs, A, B1, B2, B3);
356        eq when B3 =:= eq -> only_ws(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>);
357        eq                -> only_ws(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>);
358        B4                -> decode_list(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>)
359    end.
360
361decode_binary(<<C1:8, Cs/bits>>, A) ->
362    case b64d(C1) of
363        ws -> decode_binary(Cs, A);
364        B1 -> decode_binary(Cs, A, B1)
365    end;
366decode_binary(<<>>, A) ->
367    A.
368
369decode_binary(<<C2:8, Cs/bits>>, A, B1) ->
370    case b64d(C2) of
371        ws -> decode_binary(Cs, A, B1);
372        B2 -> decode_binary(Cs, A, B1, B2)
373    end.
374
375decode_binary(<<C3:8, Cs/bits>>, A, B1, B2) ->
376    case b64d(C3) of
377        ws -> decode_binary(Cs, A, B1, B2);
378        B3 -> decode_binary(Cs, A, B1, B2, B3)
379    end.
380
381decode_binary(<<C4:8, Cs/bits>>, A, B1, B2, B3) ->
382    case b64d(C4) of
383        ws                -> decode_binary(Cs, A, B1, B2, B3);
384        eq when B3 =:= eq -> only_ws_binary(Cs, <<A/bits,B1:6,(B2 bsr 4):2>>);
385        eq                -> only_ws_binary(Cs, <<A/bits,B1:6,B2:6,(B3 bsr 2):4>>);
386        B4                -> decode_binary(Cs, <<A/bits,B1:6,B2:6,B3:6,B4:6>>)
387    end.
388
389only_ws_binary(<<>>, A) ->
390    A;
391only_ws_binary(<<C:8, Cs/bits>>, A) ->
392    case b64d(C) of
393        ws -> only_ws_binary(Cs, A)
394    end.
395
396decode_list_to_string([C1 | Cs]) ->
397    case b64d(C1) of
398        ws -> decode_list_to_string(Cs);
399        B1 -> decode_list_to_string(Cs, B1)
400    end;
401decode_list_to_string([]) ->
402    [].
403
404decode_list_to_string([C2 | Cs], B1) ->
405    case b64d(C2) of
406        ws -> decode_list_to_string(Cs, B1);
407        B2 -> decode_list_to_string(Cs, B1, B2)
408    end.
409
410decode_list_to_string([C3 | Cs], B1, B2) ->
411    case b64d(C3) of
412        ws -> decode_list_to_string(Cs, B1, B2);
413        B3 -> decode_list_to_string(Cs, B1, B2, B3)
414    end.
415
416decode_list_to_string([C4 | Cs], B1, B2, B3) ->
417    case b64d(C4) of
418        ws ->
419            decode_list_to_string(Cs, B1, B2, B3);
420        eq when B3 =:= eq ->
421            only_ws(Cs, binary_to_list(<<B1:6,(B2 bsr 4):2>>));
422        eq ->
423            only_ws(Cs, binary_to_list(<<B1:6,B2:6,(B3 bsr 2):4>>));
424        B4 ->
425            Bits4x6 = (B1 bsl 18) bor (B2 bsl 12) bor (B3 bsl 6) bor B4,
426            Octet1 = Bits4x6 bsr 16,
427            Octet2 = (Bits4x6 bsr 8) band 16#ff,
428            Octet3 = Bits4x6 band 16#ff,
429            [Octet1, Octet2, Octet3 | decode_list_to_string(Cs)]
430    end.
431
432only_ws([], A) ->
433    A;
434only_ws([C | Cs], A) ->
435    case b64d(C) of
436        ws -> only_ws(Cs, A)
437    end.
438
439%%%========================================================================
440%%% Internal functions
441%%%========================================================================
442
443%% accessors
444-compile({inline, [{b64d, 1}]}).
445%% One-based decode map.
446b64d(X) ->
447    element(X,
448            {bad,bad,bad,bad,bad,bad,bad,bad,ws,ws,bad,bad,ws,bad,bad, %1-15
449             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad, %16-31
450             ws,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,62,bad,bad,bad,63, %32-47
451             52,53,54,55,56,57,58,59,60,61,bad,bad,bad,eq,bad,bad, %48-63
452             bad,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
453             15,16,17,18,19,20,21,22,23,24,25,bad,bad,bad,bad,bad,
454             bad,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
455             41,42,43,44,45,46,47,48,49,50,51,bad,bad,bad,bad,bad,
456             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
457             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
458             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
459             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
460             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
461             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
462             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,
463             bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad,bad}).
464
465-compile({inline, [{b64e, 1}]}).
466b64e(X) ->
467    element(X+1,
468	    {$A, $B, $C, $D, $E, $F, $G, $H, $I, $J, $K, $L, $M, $N,
469	     $O, $P, $Q, $R, $S, $T, $U, $V, $W, $X, $Y, $Z,
470	     $a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n,
471	     $o, $p, $q, $r, $s, $t, $u, $v, $w, $x, $y, $z,
472	     $0, $1, $2, $3, $4, $5, $6, $7, $8, $9, $+, $/}).
473