1defmodule Base do
2  import Bitwise
3
4  @moduledoc """
5  This module provides data encoding and decoding functions
6  according to [RFC 4648](https://tools.ietf.org/html/rfc4648).
7
8  This document defines the commonly used base 16, base 32, and base
9  64 encoding schemes.
10
11  ## Base 16 alphabet
12
13  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
14  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
15  |     0 | 0        |     4 | 4        |     8 | 8        |    12 | C        |
16  |     1 | 1        |     5 | 5        |     9 | 9        |    13 | D        |
17  |     2 | 2        |     6 | 6        |    10 | A        |    14 | E        |
18  |     3 | 3        |     7 | 7        |    11 | B        |    15 | F        |
19
20  ## Base 32 alphabet
21
22  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
23  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
24  |     0 | A        |     9 | J        |    18 | S        |    27 | 3        |
25  |     1 | B        |    10 | K        |    19 | T        |    28 | 4        |
26  |     2 | C        |    11 | L        |    20 | U        |    29 | 5        |
27  |     3 | D        |    12 | M        |    21 | V        |    30 | 6        |
28  |     4 | E        |    13 | N        |    22 | W        |    31 | 7        |
29  |     5 | F        |    14 | O        |    23 | X        |       |          |
30  |     6 | G        |    15 | P        |    24 | Y        | (pad) | =        |
31  |     7 | H        |    16 | Q        |    25 | Z        |       |          |
32  |     8 | I        |    17 | R        |    26 | 2        |       |          |
33
34
35  ## Base 32 (extended hex) alphabet
36
37  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
38  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
39  |     0 | 0        |     9 | 9        |    18 | I        |    27 | R        |
40  |     1 | 1        |    10 | A        |    19 | J        |    28 | S        |
41  |     2 | 2        |    11 | B        |    20 | K        |    29 | T        |
42  |     3 | 3        |    12 | C        |    21 | L        |    30 | U        |
43  |     4 | 4        |    13 | D        |    22 | M        |    31 | V        |
44  |     5 | 5        |    14 | E        |    23 | N        |       |          |
45  |     6 | 6        |    15 | F        |    24 | O        | (pad) | =        |
46  |     7 | 7        |    16 | G        |    25 | P        |       |          |
47  |     8 | 8        |    17 | H        |    26 | Q        |       |          |
48
49  ## Base 64 alphabet
50
51  | Value |  Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
52  |------:|:----------|------:|:---------|------:|:---------|------:|:---------|
53  |     0 | A         |    17 | R        |    34 | i        |    51 | z        |
54  |     1 | B         |    18 | S        |    35 | j        |    52 | 0        |
55  |     2 | C         |    19 | T        |    36 | k        |    53 | 1        |
56  |     3 | D         |    20 | U        |    37 | l        |    54 | 2        |
57  |     4 | E         |    21 | V        |    38 | m        |    55 | 3        |
58  |     5 | F         |    22 | W        |    39 | n        |    56 | 4        |
59  |     6 | G         |    23 | X        |    40 | o        |    57 | 5        |
60  |     7 | H         |    24 | Y        |    41 | p        |    58 | 6        |
61  |     8 | I         |    25 | Z        |    42 | q        |    59 | 7        |
62  |     9 | J         |    26 | a        |    43 | r        |    60 | 8        |
63  |    10 | K         |    27 | b        |    44 | s        |    61 | 9        |
64  |    11 | L         |    28 | c        |    45 | t        |    62 | +        |
65  |    12 | M         |    29 | d        |    46 | u        |    63 | /        |
66  |    13 | N         |    30 | e        |    47 | v        |       |          |
67  |    14 | O         |    31 | f        |    48 | w        | (pad) | =        |
68  |    15 | P         |    32 | g        |    49 | x        |       |          |
69  |    16 | Q         |    33 | h        |    50 | y        |       |          |
70
71  ## Base 64 (URL and filename safe) alphabet
72
73  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
74  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
75  |     0 | A        |    17 | R        |    34 | i        |    51 | z        |
76  |     1 | B        |    18 | S        |    35 | j        |    52 | 0        |
77  |     2 | C        |    19 | T        |    36 | k        |    53 | 1        |
78  |     3 | D        |    20 | U        |    37 | l        |    54 | 2        |
79  |     4 | E        |    21 | V        |    38 | m        |    55 | 3        |
80  |     5 | F        |    22 | W        |    39 | n        |    56 | 4        |
81  |     6 | G        |    23 | X        |    40 | o        |    57 | 5        |
82  |     7 | H        |    24 | Y        |    41 | p        |    58 | 6        |
83  |     8 | I        |    25 | Z        |    42 | q        |    59 | 7        |
84  |     9 | J        |    26 | a        |    43 | r        |    60 | 8        |
85  |    10 | K        |    27 | b        |    44 | s        |    61 | 9        |
86  |    11 | L        |    28 | c        |    45 | t        |    62 | -        |
87  |    12 | M        |    29 | d        |    46 | u        |    63 | _        |
88  |    13 | N        |    30 | e        |    47 | v        |       |          |
89  |    14 | O        |    31 | f        |    48 | w        | (pad) | =        |
90  |    15 | P        |    32 | g        |    49 | x        |       |          |
91  |    16 | Q        |    33 | h        |    50 | y        |       |          |
92
93  """
94
95  @type encode_case :: :upper | :lower
96  @type decode_case :: :upper | :lower | :mixed
97
98  b16_alphabet = '0123456789ABCDEF'
99  b64_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
100  b64url_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
101  b32_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
102  b32hex_alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUV'
103
104  defmacrop encode_pair(alphabet, case, value) do
105    quote do
106      case unquote(value) do
107        unquote(encode_pair_clauses(alphabet, case))
108      end
109    end
110  end
111
112  defp encode_pair_clauses(alphabet, case) when case in [:sensitive, :upper] do
113    shift = shift(alphabet)
114
115    alphabet
116    |> Enum.with_index()
117    |> encode_clauses(shift)
118  end
119
120  defp encode_pair_clauses(alphabet, :lower) do
121    shift = shift(alphabet)
122
123    alphabet
124    |> Stream.map(fn c -> if c in ?A..?Z, do: c - ?A + ?a, else: c end)
125    |> Enum.with_index()
126    |> encode_clauses(shift)
127  end
128
129  defp shift(alphabet) do
130    alphabet
131    |> length()
132    |> :math.log2()
133    |> round()
134  end
135
136  defp encode_clauses(alphabet, shift) do
137    for {encoding1, value1} <- alphabet,
138        {encoding2, value2} <- alphabet do
139      encoding = bsl(encoding1, 8) + encoding2
140      value = bsl(value1, shift) + value2
141      [clause] = quote(do: (unquote(value) -> unquote(encoding)))
142      clause
143    end
144  end
145
146  defmacrop decode_char(alphabet, case, encoding) do
147    quote do
148      case unquote(encoding) do
149        unquote(decode_char_clauses(alphabet, case))
150      end
151    end
152  end
153
154  defp decode_char_clauses(alphabet, case) when case in [:sensitive, :upper] do
155    clauses =
156      alphabet
157      |> Enum.with_index()
158      |> decode_clauses()
159
160    clauses ++ bad_digit_clause()
161  end
162
163  defp decode_char_clauses(alphabet, :lower) do
164    {uppers, rest} =
165      alphabet
166      |> Stream.with_index()
167      |> Enum.split_with(fn {encoding, _} -> encoding in ?A..?Z end)
168
169    lowers = Enum.map(uppers, fn {encoding, value} -> {encoding - ?A + ?a, value} end)
170
171    if length(uppers) > length(rest) do
172      decode_mixed_clauses(lowers, rest)
173    else
174      decode_mixed_clauses(rest, lowers)
175    end
176  end
177
178  defp decode_char_clauses(alphabet, :mixed) when length(alphabet) == 16 do
179    alphabet = Enum.with_index(alphabet)
180
181    lowers =
182      alphabet
183      |> Stream.filter(fn {encoding, _} -> encoding in ?A..?Z end)
184      |> Enum.map(fn {encoding, value} -> {encoding - ?A + ?a, value} end)
185
186    decode_mixed_clauses(alphabet, lowers)
187  end
188
189  defp decode_char_clauses(alphabet, :mixed) when length(alphabet) == 32 do
190    clauses =
191      alphabet
192      |> Stream.with_index()
193      |> Enum.flat_map(fn {encoding, value} = pair ->
194        if encoding in ?A..?Z do
195          [pair, {encoding - ?A + ?a, value}]
196        else
197          [pair]
198        end
199      end)
200      |> decode_clauses()
201
202    clauses ++ bad_digit_clause()
203  end
204
205  defp decode_mixed_clauses(first, second) do
206    first_clauses = decode_clauses(first)
207    second_clauses = decode_clauses(second) ++ bad_digit_clause()
208
209    join_clause =
210      quote do
211        encoding ->
212          case encoding do
213            unquote(second_clauses)
214          end
215      end
216
217    first_clauses ++ join_clause
218  end
219
220  defp decode_clauses(alphabet) do
221    for {encoding, value} <- alphabet do
222      [clause] = quote(do: (unquote(encoding) -> unquote(value)))
223      clause
224    end
225  end
226
227  defp bad_digit_clause() do
228    quote do
229      c ->
230        raise ArgumentError,
231              "non-alphabet digit found: #{inspect(<<c>>, binaries: :as_strings)} (byte #{c})"
232    end
233  end
234
235  defp maybe_pad(body, "", _, _), do: body
236  defp maybe_pad(body, tail, false, _), do: body <> tail
237
238  defp maybe_pad(body, tail, _, group_size) do
239    case group_size - rem(byte_size(tail), group_size) do
240      ^group_size -> body <> tail
241      6 -> body <> tail <> "======"
242      5 -> body <> tail <> "====="
243      4 -> body <> tail <> "===="
244      3 -> body <> tail <> "==="
245      2 -> body <> tail <> "=="
246      1 -> body <> tail <> "="
247    end
248  end
249
250  @doc """
251  Encodes a binary string into a base 16 encoded string.
252
253  ## Options
254
255  The accepted options are:
256
257    * `:case` - specifies the character case to use when encoding
258
259  The values for `:case` can be:
260
261    * `:upper` - uses upper case characters (default)
262    * `:lower` - uses lower case characters
263
264  ## Examples
265
266      iex> Base.encode16("foobar")
267      "666F6F626172"
268
269      iex> Base.encode16("foobar", case: :lower)
270      "666f6f626172"
271
272  """
273  @spec encode16(binary, case: encode_case) :: binary
274  def encode16(data, opts \\ []) when is_binary(data) do
275    case = Keyword.get(opts, :case, :upper)
276    do_encode16(case, data)
277  end
278
279  @doc """
280  Decodes a base 16 encoded string into a binary string.
281
282  ## Options
283
284  The accepted options are:
285
286    * `:case` - specifies the character case to accept when decoding
287
288  The values for `:case` can be:
289
290    * `:upper` - only allows upper case characters (default)
291    * `:lower` - only allows lower case characters
292    * `:mixed` - allows mixed case characters
293
294  ## Examples
295
296      iex> Base.decode16("666F6F626172")
297      {:ok, "foobar"}
298
299      iex> Base.decode16("666f6f626172", case: :lower)
300      {:ok, "foobar"}
301
302      iex> Base.decode16("666f6F626172", case: :mixed)
303      {:ok, "foobar"}
304
305  """
306  @spec decode16(binary, case: decode_case) :: {:ok, binary} | :error
307  def decode16(string, opts \\ []) do
308    {:ok, decode16!(string, opts)}
309  rescue
310    ArgumentError -> :error
311  end
312
313  @doc """
314  Decodes a base 16 encoded string into a binary string.
315
316  ## Options
317
318  The accepted options are:
319
320    * `:case` - specifies the character case to accept when decoding
321
322  The values for `:case` can be:
323
324    * `:upper` - only allows upper case characters (default)
325    * `:lower` - only allows lower case characters
326    * `:mixed` - allows mixed case characters
327
328  An `ArgumentError` exception is raised if the padding is incorrect or
329  a non-alphabet character is present in the string.
330
331  ## Examples
332
333      iex> Base.decode16!("666F6F626172")
334      "foobar"
335
336      iex> Base.decode16!("666f6f626172", case: :lower)
337      "foobar"
338
339      iex> Base.decode16!("666f6F626172", case: :mixed)
340      "foobar"
341
342  """
343  @spec decode16!(binary, case: encode_case) :: binary
344  def decode16!(string, opts \\ [])
345
346  def decode16!(string, opts) when is_binary(string) and rem(byte_size(string), 2) == 0 do
347    case = Keyword.get(opts, :case, :upper)
348    do_decode16(case, string)
349  end
350
351  def decode16!(string, _opts) when is_binary(string) do
352    raise ArgumentError, "odd-length string"
353  end
354
355  @doc """
356  Encodes a binary string into a base 64 encoded string.
357
358  Accepts `padding: false` option which will omit padding from
359  the output string.
360
361  ## Examples
362
363      iex> Base.encode64("foobar")
364      "Zm9vYmFy"
365
366      iex> Base.encode64("foob")
367      "Zm9vYg=="
368
369      iex> Base.encode64("foob", padding: false)
370      "Zm9vYg"
371
372  """
373  @spec encode64(binary, padding: boolean) :: binary
374  def encode64(data, opts \\ []) when is_binary(data) do
375    pad? = Keyword.get(opts, :padding, true)
376    do_encode64(data, pad?)
377  end
378
379  @doc """
380  Decodes a base 64 encoded string into a binary string.
381
382  Accepts `ignore: :whitespace` option which will ignore all the
383  whitespace characters in the input string.
384
385  Accepts `padding: false` option which will ignore padding from
386  the input string.
387
388  ## Examples
389
390      iex> Base.decode64("Zm9vYmFy")
391      {:ok, "foobar"}
392
393      iex> Base.decode64("Zm9vYmFy\\n", ignore: :whitespace)
394      {:ok, "foobar"}
395
396      iex> Base.decode64("Zm9vYg==")
397      {:ok, "foob"}
398
399      iex> Base.decode64("Zm9vYg", padding: false)
400      {:ok, "foob"}
401
402  """
403  @spec decode64(binary, ignore: :whitespace, padding: boolean) :: {:ok, binary} | :error
404  def decode64(string, opts \\ []) when is_binary(string) do
405    {:ok, decode64!(string, opts)}
406  rescue
407    ArgumentError -> :error
408  end
409
410  @doc """
411  Decodes a base 64 encoded string into a binary string.
412
413  Accepts `ignore: :whitespace` option which will ignore all the
414  whitespace characters in the input string.
415
416  Accepts `padding: false` option which will ignore padding from
417  the input string.
418
419  An `ArgumentError` exception is raised if the padding is incorrect or
420  a non-alphabet character is present in the string.
421
422  ## Examples
423
424      iex> Base.decode64!("Zm9vYmFy")
425      "foobar"
426
427      iex> Base.decode64!("Zm9vYmFy\\n", ignore: :whitespace)
428      "foobar"
429
430      iex> Base.decode64!("Zm9vYg==")
431      "foob"
432
433      iex> Base.decode64!("Zm9vYg", padding: false)
434      "foob"
435
436  """
437  @spec decode64!(binary, ignore: :whitespace, padding: boolean) :: binary
438  def decode64!(string, opts \\ []) when is_binary(string) do
439    pad? = Keyword.get(opts, :padding, true)
440    string |> remove_ignored(opts[:ignore]) |> do_decode64(pad?)
441  end
442
443  @doc """
444  Encodes a binary string into a base 64 encoded string with URL and filename
445  safe alphabet.
446
447  Accepts `padding: false` option which will omit padding from
448  the output string.
449
450  ## Examples
451
452      iex> Base.url_encode64(<<255, 127, 254, 252>>)
453      "_3_-_A=="
454
455      iex> Base.url_encode64(<<255, 127, 254, 252>>, padding: false)
456      "_3_-_A"
457
458  """
459  @spec url_encode64(binary, padding: boolean) :: binary
460  def url_encode64(data, opts \\ []) when is_binary(data) do
461    pad? = Keyword.get(opts, :padding, true)
462    do_encode64url(data, pad?)
463  end
464
465  @doc """
466  Decodes a base 64 encoded string with URL and filename safe alphabet
467  into a binary string.
468
469  Accepts `ignore: :whitespace` option which will ignore all the
470  whitespace characters in the input string.
471
472  Accepts `padding: false` option which will ignore padding from
473  the input string.
474
475  ## Examples
476
477      iex> Base.url_decode64("_3_-_A==")
478      {:ok, <<255, 127, 254, 252>>}
479
480      iex> Base.url_decode64("_3_-_A==\\n", ignore: :whitespace)
481      {:ok, <<255, 127, 254, 252>>}
482
483      iex> Base.url_decode64("_3_-_A", padding: false)
484      {:ok, <<255, 127, 254, 252>>}
485
486  """
487  @spec url_decode64(binary, ignore: :whitespace, padding: boolean) :: {:ok, binary} | :error
488  def url_decode64(string, opts \\ []) when is_binary(string) do
489    {:ok, url_decode64!(string, opts)}
490  rescue
491    ArgumentError -> :error
492  end
493
494  @doc """
495  Decodes a base 64 encoded string with URL and filename safe alphabet
496  into a binary string.
497
498  Accepts `ignore: :whitespace` option which will ignore all the
499  whitespace characters in the input string.
500
501  Accepts `padding: false` option which will ignore padding from
502  the input string.
503
504  An `ArgumentError` exception is raised if the padding is incorrect or
505  a non-alphabet character is present in the string.
506
507  ## Examples
508
509      iex> Base.url_decode64!("_3_-_A==")
510      <<255, 127, 254, 252>>
511
512      iex> Base.url_decode64!("_3_-_A==\\n", ignore: :whitespace)
513      <<255, 127, 254, 252>>
514
515      iex> Base.url_decode64!("_3_-_A", padding: false)
516      <<255, 127, 254, 252>>
517
518  """
519  @spec url_decode64!(binary, ignore: :whitespace, padding: boolean) :: binary
520  def url_decode64!(string, opts \\ []) when is_binary(string) do
521    pad? = Keyword.get(opts, :padding, true)
522    string |> remove_ignored(opts[:ignore]) |> do_decode64url(pad?)
523  end
524
525  @doc """
526  Encodes a binary string into a base 32 encoded string.
527
528  ## Options
529
530  The accepted options are:
531
532    * `:case` - specifies the character case to use when encoding
533    * `:padding` - specifies whether to apply padding
534
535  The values for `:case` can be:
536
537    * `:upper` - uses upper case characters (default)
538    * `:lower` - uses lower case characters
539
540  The values for `:padding` can be:
541
542    * `true` - pad the output string to the nearest multiple of 8 (default)
543    * `false` - omit padding from the output string
544
545  ## Examples
546
547      iex> Base.encode32("foobar")
548      "MZXW6YTBOI======"
549
550      iex> Base.encode32("foobar", case: :lower)
551      "mzxw6ytboi======"
552
553      iex> Base.encode32("foobar", padding: false)
554      "MZXW6YTBOI"
555
556  """
557  @spec encode32(binary, case: encode_case, padding: boolean) :: binary
558  def encode32(data, opts \\ []) when is_binary(data) do
559    case = Keyword.get(opts, :case, :upper)
560    pad? = Keyword.get(opts, :padding, true)
561    do_encode32(case, data, pad?)
562  end
563
564  @doc """
565  Decodes a base 32 encoded string into a binary string.
566
567  ## Options
568
569  The accepted options are:
570
571    * `:case` - specifies the character case to accept when decoding
572    * `:padding` - specifies whether to require padding
573
574  The values for `:case` can be:
575
576    * `:upper` - only allows  upper case characters (default)
577    * `:lower` - only allows lower case characters
578    * `:mixed` - allows mixed case characters
579
580  The values for `:padding` can be:
581
582    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
583    * `false` - ignores padding from the input string
584
585  ## Examples
586
587      iex> Base.decode32("MZXW6YTBOI======")
588      {:ok, "foobar"}
589
590      iex> Base.decode32("mzxw6ytboi======", case: :lower)
591      {:ok, "foobar"}
592
593      iex> Base.decode32("mzXW6ytBOi======", case: :mixed)
594      {:ok, "foobar"}
595
596      iex> Base.decode32("MZXW6YTBOI", padding: false)
597      {:ok, "foobar"}
598
599  """
600  @spec decode32(binary, case: decode_case, padding: boolean) :: {:ok, binary} | :error
601  def decode32(string, opts \\ []) do
602    {:ok, decode32!(string, opts)}
603  rescue
604    ArgumentError -> :error
605  end
606
607  @doc """
608  Decodes a base 32 encoded string into a binary string.
609
610  An `ArgumentError` exception is raised if the padding is incorrect or
611  a non-alphabet character is present in the string.
612
613  ## Options
614
615  The accepted options are:
616
617    * `:case` - specifies the character case to accept when decoding
618    * `:padding` - specifies whether to require padding
619
620  The values for `:case` can be:
621
622    * `:upper` - only allows upper case characters (default)
623    * `:lower` - only allows lower case characters
624    * `:mixed` - allows mixed case characters
625
626  The values for `:padding` can be:
627
628    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
629    * `false` - ignores padding from the input string
630
631  ## Examples
632
633      iex> Base.decode32!("MZXW6YTBOI======")
634      "foobar"
635
636      iex> Base.decode32!("mzxw6ytboi======", case: :lower)
637      "foobar"
638
639      iex> Base.decode32!("mzXW6ytBOi======", case: :mixed)
640      "foobar"
641
642      iex> Base.decode32!("MZXW6YTBOI", padding: false)
643      "foobar"
644
645  """
646  @spec decode32!(binary, case: decode_case, padding: boolean) :: binary
647  def decode32!(string, opts \\ []) when is_binary(string) do
648    case = Keyword.get(opts, :case, :upper)
649    pad? = Keyword.get(opts, :padding, true)
650    do_decode32(case, string, pad?)
651  end
652
653  @doc """
654  Encodes a binary string into a base 32 encoded string with an
655  extended hexadecimal alphabet.
656
657  ## Options
658
659  The accepted options are:
660
661    * `:case` - specifies the character case to use when encoding
662    * `:padding` - specifies whether to apply padding
663
664  The values for `:case` can be:
665
666    * `:upper` - uses upper case characters (default)
667    * `:lower` - uses lower case characters
668
669  The values for `:padding` can be:
670
671    * `true` - pad the output string to the nearest multiple of 8 (default)
672    * `false` - omit padding from the output string
673
674  ## Examples
675
676      iex> Base.hex_encode32("foobar")
677      "CPNMUOJ1E8======"
678
679      iex> Base.hex_encode32("foobar", case: :lower)
680      "cpnmuoj1e8======"
681
682      iex> Base.hex_encode32("foobar", padding: false)
683      "CPNMUOJ1E8"
684
685  """
686  @spec hex_encode32(binary, case: encode_case, padding: boolean) :: binary
687  def hex_encode32(data, opts \\ []) when is_binary(data) do
688    case = Keyword.get(opts, :case, :upper)
689    pad? = Keyword.get(opts, :padding, true)
690    do_encode32hex(case, data, pad?)
691  end
692
693  @doc """
694  Decodes a base 32 encoded string with extended hexadecimal alphabet
695  into a binary string.
696
697  ## Options
698
699  The accepted options are:
700
701    * `:case` - specifies the character case to accept when decoding
702    * `:padding` - specifies whether to require padding
703
704  The values for `:case` can be:
705
706    * `:upper` - only allows upper case characters (default)
707    * `:lower` - only allows lower case characters
708    * `:mixed` - allows mixed case characters
709
710  The values for `:padding` can be:
711
712    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
713    * `false` - ignores padding from the input string
714
715  ## Examples
716
717      iex> Base.hex_decode32("CPNMUOJ1E8======")
718      {:ok, "foobar"}
719
720      iex> Base.hex_decode32("cpnmuoj1e8======", case: :lower)
721      {:ok, "foobar"}
722
723      iex> Base.hex_decode32("cpnMuOJ1E8======", case: :mixed)
724      {:ok, "foobar"}
725
726      iex> Base.hex_decode32("CPNMUOJ1E8", padding: false)
727      {:ok, "foobar"}
728
729  """
730  @spec hex_decode32(binary, case: decode_case, padding: boolean) :: {:ok, binary} | :error
731  def hex_decode32(string, opts \\ []) do
732    {:ok, hex_decode32!(string, opts)}
733  rescue
734    ArgumentError -> :error
735  end
736
737  @doc """
738  Decodes a base 32 encoded string with extended hexadecimal alphabet
739  into a binary string.
740
741  An `ArgumentError` exception is raised if the padding is incorrect or
742  a non-alphabet character is present in the string.
743
744  ## Options
745
746  The accepted options are:
747
748    * `:case` - specifies the character case to accept when decoding
749    * `:padding` - specifies whether to require padding
750
751  The values for `:case` can be:
752
753    * `:upper` - only allows upper case characters (default)
754    * `:lower` - only allows lower case characters
755    * `:mixed` - allows mixed case characters
756
757  The values for `:padding` can be:
758
759    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
760    * `false` - ignores padding from the input string
761
762  ## Examples
763
764      iex> Base.hex_decode32!("CPNMUOJ1E8======")
765      "foobar"
766
767      iex> Base.hex_decode32!("cpnmuoj1e8======", case: :lower)
768      "foobar"
769
770      iex> Base.hex_decode32!("cpnMuOJ1E8======", case: :mixed)
771      "foobar"
772
773      iex> Base.hex_decode32!("CPNMUOJ1E8", padding: false)
774      "foobar"
775
776  """
777  @spec hex_decode32!(binary, case: decode_case, padding: boolean) :: binary
778  def hex_decode32!(string, opts \\ []) when is_binary(string) do
779    case = Keyword.get(opts, :case, :upper)
780    pad? = Keyword.get(opts, :padding, true)
781    do_decode32hex(case, string, pad?)
782  end
783
784  defp remove_ignored(string, nil), do: string
785
786  defp remove_ignored(string, :whitespace) do
787    for <<char::8 <- string>>, char not in '\s\t\r\n', into: <<>>, do: <<char::8>>
788  end
789
790  enc16 = [upper: :enc16_upper, lower: :enc16_lower]
791
792  for {case, fun} <- enc16 do
793    defp unquote(fun)(char) do
794      encode_pair(unquote(b16_alphabet), unquote(case), char)
795    end
796  end
797
798  defp do_encode16(_, <<>>), do: <<>>
799
800  for {case, fun} <- enc16 do
801    defp do_encode16(unquote(case), data) do
802      split = 8 * div(byte_size(data), 8)
803      <<main::size(split)-binary, rest::binary>> = data
804
805      main =
806        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
807          <<
808            unquote(fun)(c1)::16,
809            unquote(fun)(c2)::16,
810            unquote(fun)(c3)::16,
811            unquote(fun)(c4)::16,
812            unquote(fun)(c5)::16,
813            unquote(fun)(c6)::16,
814            unquote(fun)(c7)::16,
815            unquote(fun)(c8)::16
816          >>
817        end
818
819      case rest do
820        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> ->
821          <<
822            main::binary,
823            unquote(fun)(c1)::16,
824            unquote(fun)(c2)::16,
825            unquote(fun)(c3)::16,
826            unquote(fun)(c4)::16,
827            unquote(fun)(c5)::16,
828            unquote(fun)(c6)::16,
829            unquote(fun)(c7)::16
830          >>
831
832        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8>> ->
833          <<
834            main::binary,
835            unquote(fun)(c1)::16,
836            unquote(fun)(c2)::16,
837            unquote(fun)(c3)::16,
838            unquote(fun)(c4)::16,
839            unquote(fun)(c5)::16,
840            unquote(fun)(c6)::16
841          >>
842
843        <<c1::8, c2::8, c3::8, c4::8, c5::8>> ->
844          <<
845            main::binary,
846            unquote(fun)(c1)::16,
847            unquote(fun)(c2)::16,
848            unquote(fun)(c3)::16,
849            unquote(fun)(c4)::16,
850            unquote(fun)(c5)::16
851          >>
852
853        <<c1::8, c2::8, c3::8, c4::8>> ->
854          <<
855            main::binary,
856            unquote(fun)(c1)::16,
857            unquote(fun)(c2)::16,
858            unquote(fun)(c3)::16,
859            unquote(fun)(c4)::16
860          >>
861
862        <<c1::8, c2::8, c3::8>> ->
863          <<main::binary, unquote(fun)(c1)::16, unquote(fun)(c2)::16, unquote(fun)(c3)::16>>
864
865        <<c1::8, c2::8>> ->
866          <<main::binary, unquote(fun)(c1)::16, unquote(fun)(c2)::16>>
867
868        <<c1::8>> ->
869          <<main::binary, unquote(fun)(c1)::16>>
870
871        <<>> ->
872          main
873      end
874    end
875  end
876
877  dec16 = [upper: :dec16_upper, lower: :dec16_lower, mixed: :dec16_mixed]
878
879  for {case, fun} <- dec16 do
880    defp unquote(fun)(encoding) do
881      decode_char(unquote(b16_alphabet), unquote(case), encoding)
882    end
883  end
884
885  defp do_decode16(_, <<>>), do: <<>>
886
887  for {case, fun} <- dec16 do
888    defp do_decode16(unquote(case), string) do
889      split = 8 * div(byte_size(string), 8)
890      <<main::size(split)-binary, rest::binary>> = string
891
892      main =
893        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
894          <<
895            unquote(fun)(c1)::4,
896            unquote(fun)(c2)::4,
897            unquote(fun)(c3)::4,
898            unquote(fun)(c4)::4,
899            unquote(fun)(c5)::4,
900            unquote(fun)(c6)::4,
901            unquote(fun)(c7)::4,
902            unquote(fun)(c8)::4
903          >>
904        end
905
906      case rest do
907        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8>> ->
908          <<
909            main::bits,
910            unquote(fun)(c1)::4,
911            unquote(fun)(c2)::4,
912            unquote(fun)(c3)::4,
913            unquote(fun)(c4)::4,
914            unquote(fun)(c5)::4,
915            unquote(fun)(c6)::4
916          >>
917
918        <<c1::8, c2::8, c3::8, c4::8>> ->
919          <<
920            main::bits,
921            unquote(fun)(c1)::4,
922            unquote(fun)(c2)::4,
923            unquote(fun)(c3)::4,
924            unquote(fun)(c4)::4
925          >>
926
927        <<c1::8, c2::8>> ->
928          <<main::bits, unquote(fun)(c1)::4, unquote(fun)(c2)::4>>
929
930        <<_::8>> ->
931          raise ArgumentError, "odd-length string"
932
933        <<>> ->
934          main
935      end
936    end
937  end
938
939  for {base, alphabet} <- ["64": b64_alphabet, "64url": b64url_alphabet] do
940    pair = :"enc#{base}_pair"
941    char = :"enc#{base}_char"
942    do_encode = :"do_encode#{base}"
943
944    defp unquote(pair)(value) do
945      encode_pair(unquote(alphabet), :sensitive, value)
946    end
947
948    defp unquote(char)(value) do
949      value
950      |> unquote(pair)()
951      |> band(0x00FF)
952    end
953
954    defp unquote(do_encode)(<<>>, _), do: <<>>
955
956    defp unquote(do_encode)(data, pad?) do
957      split = 6 * div(byte_size(data), 6)
958      <<main::size(split)-binary, rest::binary>> = data
959
960      main =
961        for <<c1::12, c2::12, c3::12, c4::12 <- main>>, into: <<>> do
962          <<
963            unquote(pair)(c1)::16,
964            unquote(pair)(c2)::16,
965            unquote(pair)(c3)::16,
966            unquote(pair)(c4)::16
967          >>
968        end
969
970      tail =
971        case rest do
972          <<c1::12, c2::12, c3::12, c::4>> ->
973            <<
974              unquote(pair)(c1)::16,
975              unquote(pair)(c2)::16,
976              unquote(pair)(c3)::16,
977              unquote(char)(bsl(c, 2))::8
978            >>
979
980          <<c1::12, c2::12, c3::8>> ->
981            <<unquote(pair)(c1)::16, unquote(pair)(c2)::16, unquote(pair)(bsl(c3, 4))::16>>
982
983          <<c1::12, c2::12>> ->
984            <<unquote(pair)(c1)::16, unquote(pair)(c2)::16>>
985
986          <<c1::12, c2::4>> ->
987            <<unquote(pair)(c1)::16, unquote(char)(bsl(c2, 2))::8>>
988
989          <<c1::8>> ->
990            <<unquote(pair)(bsl(c1, 4))::16>>
991
992          <<>> ->
993            <<>>
994        end
995
996      maybe_pad(main, tail, pad?, 4)
997    end
998  end
999
1000  for {base, alphabet} <- ["64": b64_alphabet, "64url": b64url_alphabet] do
1001    fun = :"dec#{base}"
1002    do_decode = :"do_decode#{base}"
1003
1004    defp unquote(fun)(encoding) do
1005      decode_char(unquote(alphabet), :sensitive, encoding)
1006    end
1007
1008    defp unquote(do_decode)(<<>>, _), do: <<>>
1009
1010    defp unquote(do_decode)(string, pad?) do
1011      segs = div(byte_size(string) + 7, 8) - 1
1012      <<main::size(segs)-binary-unit(64), rest::binary>> = string
1013
1014      main =
1015        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
1016          <<
1017            unquote(fun)(c1)::6,
1018            unquote(fun)(c2)::6,
1019            unquote(fun)(c3)::6,
1020            unquote(fun)(c4)::6,
1021            unquote(fun)(c5)::6,
1022            unquote(fun)(c6)::6,
1023            unquote(fun)(c7)::6,
1024            unquote(fun)(c8)::6
1025          >>
1026        end
1027
1028      case rest do
1029        <<c1::8, c2::8, ?=, ?=>> ->
1030          <<main::bits, unquote(fun)(c1)::6, bsr(unquote(fun)(c2), 4)::2>>
1031
1032        <<c1::8, c2::8, c3::8, ?=>> ->
1033          <<main::bits, unquote(fun)(c1)::6, unquote(fun)(c2)::6, bsr(unquote(fun)(c3), 2)::4>>
1034
1035        <<c1::8, c2::8, c3::8, c4::8>> ->
1036          <<
1037            main::bits,
1038            unquote(fun)(c1)::6,
1039            unquote(fun)(c2)::6,
1040            unquote(fun)(c3)::6,
1041            unquote(fun)(c4)::6
1042          >>
1043
1044        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, ?=, ?=>> ->
1045          <<
1046            main::bits,
1047            unquote(fun)(c1)::6,
1048            unquote(fun)(c2)::6,
1049            unquote(fun)(c3)::6,
1050            unquote(fun)(c4)::6,
1051            unquote(fun)(c5)::6,
1052            bsr(unquote(fun)(c6), 4)::2
1053          >>
1054
1055        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, ?=>> ->
1056          <<
1057            main::bits,
1058            unquote(fun)(c1)::6,
1059            unquote(fun)(c2)::6,
1060            unquote(fun)(c3)::6,
1061            unquote(fun)(c4)::6,
1062            unquote(fun)(c5)::6,
1063            unquote(fun)(c6)::6,
1064            bsr(unquote(fun)(c7), 2)::4
1065          >>
1066
1067        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8>> ->
1068          <<
1069            main::bits,
1070            unquote(fun)(c1)::6,
1071            unquote(fun)(c2)::6,
1072            unquote(fun)(c3)::6,
1073            unquote(fun)(c4)::6,
1074            unquote(fun)(c5)::6,
1075            unquote(fun)(c6)::6,
1076            unquote(fun)(c7)::6,
1077            unquote(fun)(c8)::6
1078          >>
1079
1080        <<c1::8, c2::8>> when not pad? ->
1081          <<main::bits, unquote(fun)(c1)::6, bsr(unquote(fun)(c2), 4)::2>>
1082
1083        <<c1::8, c2::8, c3::8>> when not pad? ->
1084          <<main::bits, unquote(fun)(c1)::6, unquote(fun)(c2)::6, bsr(unquote(fun)(c3), 2)::4>>
1085
1086        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8>> when not pad? ->
1087          <<
1088            main::bits,
1089            unquote(fun)(c1)::6,
1090            unquote(fun)(c2)::6,
1091            unquote(fun)(c3)::6,
1092            unquote(fun)(c4)::6,
1093            unquote(fun)(c5)::6,
1094            bsr(unquote(fun)(c6), 4)::2
1095          >>
1096
1097        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> when not pad? ->
1098          <<
1099            main::bits,
1100            unquote(fun)(c1)::6,
1101            unquote(fun)(c2)::6,
1102            unquote(fun)(c3)::6,
1103            unquote(fun)(c4)::6,
1104            unquote(fun)(c5)::6,
1105            unquote(fun)(c6)::6,
1106            bsr(unquote(fun)(c7), 2)::4
1107          >>
1108
1109        _ ->
1110          raise ArgumentError, "incorrect padding"
1111      end
1112    end
1113  end
1114
1115  for {base, alphabet} <- ["32": b32_alphabet, "32hex": b32hex_alphabet],
1116      case <- [:upper, :lower] do
1117    pair = :"enc#{base}_#{case}_pair"
1118    char = :"enc#{base}_#{case}_char"
1119    do_encode = :"do_encode#{base}"
1120
1121    defp unquote(pair)(value) do
1122      encode_pair(unquote(alphabet), unquote(case), value)
1123    end
1124
1125    defp unquote(char)(value) do
1126      value
1127      |> unquote(pair)()
1128      |> band(0x00FF)
1129    end
1130
1131    defp unquote(do_encode)(_, <<>>, _), do: <<>>
1132
1133    defp unquote(do_encode)(unquote(case), data, pad?) do
1134      split = 5 * div(byte_size(data), 5)
1135      <<main::size(split)-binary, rest::binary>> = data
1136
1137      main =
1138        for <<c1::10, c2::10, c3::10, c4::10 <- main>>, into: <<>> do
1139          <<
1140            unquote(pair)(c1)::16,
1141            unquote(pair)(c2)::16,
1142            unquote(pair)(c3)::16,
1143            unquote(pair)(c4)::16
1144          >>
1145        end
1146
1147      tail =
1148        case rest do
1149          <<c1::10, c2::10, c3::10, c4::2>> ->
1150            <<
1151              unquote(pair)(c1)::16,
1152              unquote(pair)(c2)::16,
1153              unquote(pair)(c3)::16,
1154              unquote(char)(bsl(c4, 3))::8
1155            >>
1156
1157          <<c1::10, c2::10, c3::4>> ->
1158            <<unquote(pair)(c1)::16, unquote(pair)(c2)::16, unquote(char)(bsl(c3, 1))::8>>
1159
1160          <<c1::10, c2::6>> ->
1161            <<unquote(pair)(c1)::16, unquote(pair)(bsl(c2, 4))::16>>
1162
1163          <<c1::8>> ->
1164            <<unquote(pair)(bsl(c1, 2))::16>>
1165
1166          <<>> ->
1167            <<>>
1168        end
1169
1170      maybe_pad(main, tail, pad?, 8)
1171    end
1172  end
1173
1174  for {base, alphabet} <- ["32": b32_alphabet, "32hex": b32hex_alphabet],
1175      case <- [:upper, :lower, :mixed] do
1176    fun = :"dec#{base}_#{case}"
1177    do_decode = :"do_decode#{base}"
1178
1179    defp unquote(fun)(encoding) do
1180      decode_char(unquote(alphabet), unquote(case), encoding)
1181    end
1182
1183    defp unquote(do_decode)(_, <<>>, _), do: <<>>
1184
1185    defp unquote(do_decode)(unquote(case), string, pad?) do
1186      segs = div(byte_size(string) + 7, 8) - 1
1187      <<main::size(segs)-binary-unit(64), rest::binary>> = string
1188
1189      main =
1190        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
1191          <<
1192            unquote(fun)(c1)::5,
1193            unquote(fun)(c2)::5,
1194            unquote(fun)(c3)::5,
1195            unquote(fun)(c4)::5,
1196            unquote(fun)(c5)::5,
1197            unquote(fun)(c6)::5,
1198            unquote(fun)(c7)::5,
1199            unquote(fun)(c8)::5
1200          >>
1201        end
1202
1203      case rest do
1204        <<c1::8, c2::8, ?=, ?=, ?=, ?=, ?=, ?=>> ->
1205          <<main::bits, unquote(fun)(c1)::5, bsr(unquote(fun)(c2), 2)::3>>
1206
1207        <<c1::8, c2::8, c3::8, c4::8, ?=, ?=, ?=, ?=>> ->
1208          <<
1209            main::bits,
1210            unquote(fun)(c1)::5,
1211            unquote(fun)(c2)::5,
1212            unquote(fun)(c3)::5,
1213            bsr(unquote(fun)(c4), 4)::1
1214          >>
1215
1216        <<c1::8, c2::8, c3::8, c4::8, c5::8, ?=, ?=, ?=>> ->
1217          <<
1218            main::bits,
1219            unquote(fun)(c1)::5,
1220            unquote(fun)(c2)::5,
1221            unquote(fun)(c3)::5,
1222            unquote(fun)(c4)::5,
1223            bsr(unquote(fun)(c5), 1)::4
1224          >>
1225
1226        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, ?=>> ->
1227          <<
1228            main::bits,
1229            unquote(fun)(c1)::5,
1230            unquote(fun)(c2)::5,
1231            unquote(fun)(c3)::5,
1232            unquote(fun)(c4)::5,
1233            unquote(fun)(c5)::5,
1234            unquote(fun)(c6)::5,
1235            bsr(unquote(fun)(c7), 3)::2
1236          >>
1237
1238        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8>> ->
1239          <<
1240            main::bits,
1241            unquote(fun)(c1)::5,
1242            unquote(fun)(c2)::5,
1243            unquote(fun)(c3)::5,
1244            unquote(fun)(c4)::5,
1245            unquote(fun)(c5)::5,
1246            unquote(fun)(c6)::5,
1247            unquote(fun)(c7)::5,
1248            unquote(fun)(c8)::5
1249          >>
1250
1251        <<c1::8, c2::8>> when not pad? ->
1252          <<main::bits, unquote(fun)(c1)::5, bsr(unquote(fun)(c2), 2)::3>>
1253
1254        <<c1::8, c2::8, c3::8, c4::8>> when not pad? ->
1255          <<
1256            main::bits,
1257            unquote(fun)(c1)::5,
1258            unquote(fun)(c2)::5,
1259            unquote(fun)(c3)::5,
1260            bsr(unquote(fun)(c4), 4)::1
1261          >>
1262
1263        <<c1::8, c2::8, c3::8, c4::8, c5::8>> when not pad? ->
1264          <<
1265            main::bits,
1266            unquote(fun)(c1)::5,
1267            unquote(fun)(c2)::5,
1268            unquote(fun)(c3)::5,
1269            unquote(fun)(c4)::5,
1270            bsr(unquote(fun)(c5), 1)::4
1271          >>
1272
1273        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> when not pad? ->
1274          <<
1275            main::bits,
1276            unquote(fun)(c1)::5,
1277            unquote(fun)(c2)::5,
1278            unquote(fun)(c3)::5,
1279            unquote(fun)(c4)::5,
1280            unquote(fun)(c5)::5,
1281            unquote(fun)(c6)::5,
1282            bsr(unquote(fun)(c7), 3)::2
1283          >>
1284
1285        _ ->
1286          raise ArgumentError, "incorrect padding"
1287      end
1288    end
1289  end
1290end
1291