1defmodule Base do
2  import Bitwise
3
4  @moduledoc """
5  This module provides data encoding and decoding functions
6  according to [RFC 4648](https://tools.ietf.org/html/rfc4648).
7
8  This document defines the commonly used base 16, base 32, and base
9  64 encoding schemes.
10
11  ## Base 16 alphabet
12
13  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
14  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
15  |     0 | 0        |     4 | 4        |     8 | 8        |    12 | C        |
16  |     1 | 1        |     5 | 5        |     9 | 9        |    13 | D        |
17  |     2 | 2        |     6 | 6        |    10 | A        |    14 | E        |
18  |     3 | 3        |     7 | 7        |    11 | B        |    15 | F        |
19
20  ## Base 32 alphabet
21
22  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
23  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
24  |     0 | A        |     9 | J        |    18 | S        |    27 | 3        |
25  |     1 | B        |    10 | K        |    19 | T        |    28 | 4        |
26  |     2 | C        |    11 | L        |    20 | U        |    29 | 5        |
27  |     3 | D        |    12 | M        |    21 | V        |    30 | 6        |
28  |     4 | E        |    13 | N        |    22 | W        |    31 | 7        |
29  |     5 | F        |    14 | O        |    23 | X        |       |          |
30  |     6 | G        |    15 | P        |    24 | Y        | (pad) | =        |
31  |     7 | H        |    16 | Q        |    25 | Z        |       |          |
32  |     8 | I        |    17 | R        |    26 | 2        |       |          |
33
34
35  ## Base 32 (extended hex) alphabet
36
37  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
38  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
39  |     0 | 0        |     9 | 9        |    18 | I        |    27 | R        |
40  |     1 | 1        |    10 | A        |    19 | J        |    28 | S        |
41  |     2 | 2        |    11 | B        |    20 | K        |    29 | T        |
42  |     3 | 3        |    12 | C        |    21 | L        |    30 | U        |
43  |     4 | 4        |    13 | D        |    22 | M        |    31 | V        |
44  |     5 | 5        |    14 | E        |    23 | N        |       |          |
45  |     6 | 6        |    15 | F        |    24 | O        | (pad) | =        |
46  |     7 | 7        |    16 | G        |    25 | P        |       |          |
47  |     8 | 8        |    17 | H        |    26 | Q        |       |          |
48
49  ## Base 64 alphabet
50
51  | Value |  Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
52  |------:|:----------|------:|:---------|------:|:---------|------:|:---------|
53  |     0 | A         |    17 | R        |    34 | i        |    51 | z        |
54  |     1 | B         |    18 | S        |    35 | j        |    52 | 0        |
55  |     2 | C         |    19 | T        |    36 | k        |    53 | 1        |
56  |     3 | D         |    20 | U        |    37 | l        |    54 | 2        |
57  |     4 | E         |    21 | V        |    38 | m        |    55 | 3        |
58  |     5 | F         |    22 | W        |    39 | n        |    56 | 4        |
59  |     6 | G         |    23 | X        |    40 | o        |    57 | 5        |
60  |     7 | H         |    24 | Y        |    41 | p        |    58 | 6        |
61  |     8 | I         |    25 | Z        |    42 | q        |    59 | 7        |
62  |     9 | J         |    26 | a        |    43 | r        |    60 | 8        |
63  |    10 | K         |    27 | b        |    44 | s        |    61 | 9        |
64  |    11 | L         |    28 | c        |    45 | t        |    62 | +        |
65  |    12 | M         |    29 | d        |    46 | u        |    63 | /        |
66  |    13 | N         |    30 | e        |    47 | v        |       |          |
67  |    14 | O         |    31 | f        |    48 | w        | (pad) | =        |
68  |    15 | P         |    32 | g        |    49 | x        |       |          |
69  |    16 | Q         |    33 | h        |    50 | y        |       |          |
70
71  ## Base 64 (URL and filename safe) alphabet
72
73  | Value | Encoding | Value | Encoding | Value | Encoding | Value | Encoding |
74  |------:|:---------|------:|:---------|------:|:---------|------:|:---------|
75  |     0 | A        |    17 | R        |    34 | i        |    51 | z        |
76  |     1 | B        |    18 | S        |    35 | j        |    52 | 0        |
77  |     2 | C        |    19 | T        |    36 | k        |    53 | 1        |
78  |     3 | D        |    20 | U        |    37 | l        |    54 | 2        |
79  |     4 | E        |    21 | V        |    38 | m        |    55 | 3        |
80  |     5 | F        |    22 | W        |    39 | n        |    56 | 4        |
81  |     6 | G        |    23 | X        |    40 | o        |    57 | 5        |
82  |     7 | H        |    24 | Y        |    41 | p        |    58 | 6        |
83  |     8 | I        |    25 | Z        |    42 | q        |    59 | 7        |
84  |     9 | J        |    26 | a        |    43 | r        |    60 | 8        |
85  |    10 | K        |    27 | b        |    44 | s        |    61 | 9        |
86  |    11 | L        |    28 | c        |    45 | t        |    62 | -        |
87  |    12 | M        |    29 | d        |    46 | u        |    63 | _        |
88  |    13 | N        |    30 | e        |    47 | v        |       |          |
89  |    14 | O        |    31 | f        |    48 | w        | (pad) | =        |
90  |    15 | P        |    32 | g        |    49 | x        |       |          |
91  |    16 | Q        |    33 | h        |    50 | y        |       |          |
92
93  """
94
95  b16_alphabet = '0123456789ABCDEF'
96  b64_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'
97  b64url_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_'
98  b32_alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
99  b32hex_alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUV'
100
101  defmacrop encode_pair(alphabet, case, value) do
102    quote do
103      case unquote(value) do
104        unquote(encode_pair_clauses(alphabet, case))
105      end
106    end
107  end
108
109  defp encode_pair_clauses(alphabet, case) when case in [:sensitive, :upper] do
110    shift = shift(alphabet)
111
112    alphabet
113    |> Enum.with_index()
114    |> encode_clauses(shift)
115  end
116
117  defp encode_pair_clauses(alphabet, :lower) do
118    shift = shift(alphabet)
119
120    alphabet
121    |> Stream.map(fn c -> if c in ?A..?Z, do: c - ?A + ?a, else: c end)
122    |> Enum.with_index()
123    |> encode_clauses(shift)
124  end
125
126  defp shift(alphabet) do
127    alphabet
128    |> length()
129    |> :math.log2()
130    |> round()
131  end
132
133  defp encode_clauses(alphabet, shift) do
134    for {encoding1, value1} <- alphabet,
135        {encoding2, value2} <- alphabet do
136      encoding = bsl(encoding1, 8) + encoding2
137      value = bsl(value1, shift) + value2
138      [clause] = quote(do: (unquote(value) -> unquote(encoding)))
139      clause
140    end
141  end
142
143  defmacrop decode_char(alphabet, case, encoding) do
144    quote do
145      case unquote(encoding) do
146        unquote(decode_char_clauses(alphabet, case))
147      end
148    end
149  end
150
151  defp decode_char_clauses(alphabet, case) when case in [:sensitive, :upper] do
152    clauses =
153      alphabet
154      |> Enum.with_index()
155      |> decode_clauses()
156
157    clauses ++ bad_digit_clause()
158  end
159
160  defp decode_char_clauses(alphabet, :lower) do
161    {uppers, rest} =
162      alphabet
163      |> Stream.with_index()
164      |> Enum.split_with(fn {encoding, _} -> encoding in ?A..?Z end)
165
166    lowers = Enum.map(uppers, fn {encoding, value} -> {encoding - ?A + ?a, value} end)
167
168    if length(uppers) > length(rest) do
169      decode_mixed_clauses(lowers, rest)
170    else
171      decode_mixed_clauses(rest, lowers)
172    end
173  end
174
175  defp decode_char_clauses(alphabet, :mixed) when length(alphabet) == 16 do
176    alphabet = Enum.with_index(alphabet)
177
178    lowers =
179      alphabet
180      |> Stream.filter(fn {encoding, _} -> encoding in ?A..?Z end)
181      |> Enum.map(fn {encoding, value} -> {encoding - ?A + ?a, value} end)
182
183    decode_mixed_clauses(alphabet, lowers)
184  end
185
186  defp decode_char_clauses(alphabet, :mixed) when length(alphabet) == 32 do
187    clauses =
188      alphabet
189      |> Stream.with_index()
190      |> Enum.flat_map(fn {encoding, value} = pair ->
191        if encoding in ?A..?Z do
192          [pair, {encoding - ?A + ?a, value}]
193        else
194          [pair]
195        end
196      end)
197      |> decode_clauses()
198
199    clauses ++ bad_digit_clause()
200  end
201
202  defp decode_mixed_clauses(first, second) do
203    first_clauses = decode_clauses(first)
204    second_clauses = decode_clauses(second) ++ bad_digit_clause()
205
206    join_clause =
207      quote do
208        encoding ->
209          case encoding do
210            unquote(second_clauses)
211          end
212      end
213
214    first_clauses ++ join_clause
215  end
216
217  defp decode_clauses(alphabet) do
218    for {encoding, value} <- alphabet do
219      [clause] = quote(do: (unquote(encoding) -> unquote(value)))
220      clause
221    end
222  end
223
224  defp bad_digit_clause() do
225    quote do
226      c ->
227        raise ArgumentError,
228              "non-alphabet digit found: #{inspect(<<c>>, binaries: :as_strings)} (byte #{c})"
229    end
230  end
231
232  defp maybe_pad(body, "", _, _), do: body
233  defp maybe_pad(body, tail, false, _), do: body <> tail
234
235  defp maybe_pad(body, tail, _, group_size) do
236    case group_size - rem(byte_size(tail), group_size) do
237      ^group_size -> body <> tail
238      6 -> body <> tail <> "======"
239      5 -> body <> tail <> "====="
240      4 -> body <> tail <> "===="
241      3 -> body <> tail <> "==="
242      2 -> body <> tail <> "=="
243      1 -> body <> tail <> "="
244    end
245  end
246
247  @doc """
248  Encodes a binary string into a base 16 encoded string.
249
250  ## Options
251
252  The accepted options are:
253
254    * `:case` - specifies the character case to use when encoding
255
256  The values for `:case` can be:
257
258    * `:upper` - uses upper case characters (default)
259    * `:lower` - uses lower case characters
260
261  ## Examples
262
263      iex> Base.encode16("foobar")
264      "666F6F626172"
265
266      iex> Base.encode16("foobar", case: :lower)
267      "666f6f626172"
268
269  """
270  @spec encode16(binary, keyword) :: binary
271  def encode16(data, opts \\ []) when is_binary(data) do
272    case = Keyword.get(opts, :case, :upper)
273    do_encode16(case, data)
274  end
275
276  @doc """
277  Decodes a base 16 encoded string into a binary string.
278
279  ## Options
280
281  The accepted options are:
282
283    * `:case` - specifies the character case to accept when decoding
284
285  The values for `:case` can be:
286
287    * `:upper` - only allows upper case characters (default)
288    * `:lower` - only allows lower case characters
289    * `:mixed` - allows mixed case characters
290
291  ## Examples
292
293      iex> Base.decode16("666F6F626172")
294      {:ok, "foobar"}
295
296      iex> Base.decode16("666f6f626172", case: :lower)
297      {:ok, "foobar"}
298
299      iex> Base.decode16("666f6F626172", case: :mixed)
300      {:ok, "foobar"}
301
302  """
303  @spec decode16(binary, keyword) :: {:ok, binary} | :error
304  def decode16(string, opts \\ []) do
305    {:ok, decode16!(string, opts)}
306  rescue
307    ArgumentError -> :error
308  end
309
310  @doc """
311  Decodes a base 16 encoded string into a binary string.
312
313  ## Options
314
315  The accepted options are:
316
317    * `:case` - specifies the character case to accept when decoding
318
319  The values for `:case` can be:
320
321    * `:upper` - only allows upper case characters (default)
322    * `:lower` - only allows lower case characters
323    * `:mixed` - allows mixed case characters
324
325  An `ArgumentError` exception is raised if the padding is incorrect or
326  a non-alphabet character is present in the string.
327
328  ## Examples
329
330      iex> Base.decode16!("666F6F626172")
331      "foobar"
332
333      iex> Base.decode16!("666f6f626172", case: :lower)
334      "foobar"
335
336      iex> Base.decode16!("666f6F626172", case: :mixed)
337      "foobar"
338
339  """
340  @spec decode16!(binary, keyword) :: binary
341  def decode16!(string, opts \\ [])
342
343  def decode16!(string, opts) when is_binary(string) and rem(byte_size(string), 2) == 0 do
344    case = Keyword.get(opts, :case, :upper)
345    do_decode16(case, string)
346  end
347
348  def decode16!(string, _opts) when is_binary(string) do
349    raise ArgumentError, "odd-length string"
350  end
351
352  @doc """
353  Encodes a binary string into a base 64 encoded string.
354
355  Accepts `padding: false` option which will omit padding from
356  the output string.
357
358  ## Examples
359
360      iex> Base.encode64("foobar")
361      "Zm9vYmFy"
362
363      iex> Base.encode64("foob")
364      "Zm9vYg=="
365
366      iex> Base.encode64("foob", padding: false)
367      "Zm9vYg"
368
369  """
370  @spec encode64(binary, keyword) :: binary
371  def encode64(data, opts \\ []) when is_binary(data) do
372    pad? = Keyword.get(opts, :padding, true)
373    do_encode64(data, pad?)
374  end
375
376  @doc """
377  Decodes a base 64 encoded string into a binary string.
378
379  Accepts `ignore: :whitespace` option which will ignore all the
380  whitespace characters in the input string.
381
382  Accepts `padding: false` option which will ignore padding from
383  the input string.
384
385  ## Examples
386
387      iex> Base.decode64("Zm9vYmFy")
388      {:ok, "foobar"}
389
390      iex> Base.decode64("Zm9vYmFy\\n", ignore: :whitespace)
391      {:ok, "foobar"}
392
393      iex> Base.decode64("Zm9vYg==")
394      {:ok, "foob"}
395
396      iex> Base.decode64("Zm9vYg", padding: false)
397      {:ok, "foob"}
398
399  """
400  @spec decode64(binary, keyword) :: {:ok, binary} | :error
401  def decode64(string, opts \\ []) when is_binary(string) do
402    {:ok, decode64!(string, opts)}
403  rescue
404    ArgumentError -> :error
405  end
406
407  @doc """
408  Decodes a base 64 encoded string into a binary string.
409
410  Accepts `ignore: :whitespace` option which will ignore all the
411  whitespace characters in the input string.
412
413  Accepts `padding: false` option which will ignore padding from
414  the input string.
415
416  An `ArgumentError` exception is raised if the padding is incorrect or
417  a non-alphabet character is present in the string.
418
419  ## Examples
420
421      iex> Base.decode64!("Zm9vYmFy")
422      "foobar"
423
424      iex> Base.decode64!("Zm9vYmFy\\n", ignore: :whitespace)
425      "foobar"
426
427      iex> Base.decode64!("Zm9vYg==")
428      "foob"
429
430      iex> Base.decode64!("Zm9vYg", padding: false)
431      "foob"
432
433  """
434  @spec decode64!(binary, keyword) :: binary
435  def decode64!(string, opts \\ []) when is_binary(string) do
436    pad? = Keyword.get(opts, :padding, true)
437    string |> remove_ignored(opts[:ignore]) |> do_decode64(pad?)
438  end
439
440  @doc """
441  Encodes a binary string into a base 64 encoded string with URL and filename
442  safe alphabet.
443
444  Accepts `padding: false` option which will omit padding from
445  the output string.
446
447  ## Examples
448
449      iex> Base.url_encode64(<<255, 127, 254, 252>>)
450      "_3_-_A=="
451
452      iex> Base.url_encode64(<<255, 127, 254, 252>>, padding: false)
453      "_3_-_A"
454
455  """
456  @spec url_encode64(binary, keyword) :: binary
457  def url_encode64(data, opts \\ []) when is_binary(data) do
458    pad? = Keyword.get(opts, :padding, true)
459    do_encode64url(data, pad?)
460  end
461
462  @doc """
463  Decodes a base 64 encoded string with URL and filename safe alphabet
464  into a binary string.
465
466  Accepts `ignore: :whitespace` option which will ignore all the
467  whitespace characters in the input string.
468
469  Accepts `padding: false` option which will ignore padding from
470  the input string.
471
472  ## Examples
473
474      iex> Base.url_decode64("_3_-_A==")
475      {:ok, <<255, 127, 254, 252>>}
476
477      iex> Base.url_decode64("_3_-_A==\\n", ignore: :whitespace)
478      {:ok, <<255, 127, 254, 252>>}
479
480      iex> Base.url_decode64("_3_-_A", padding: false)
481      {:ok, <<255, 127, 254, 252>>}
482
483  """
484  @spec url_decode64(binary, keyword) :: {:ok, binary} | :error
485  def url_decode64(string, opts \\ []) when is_binary(string) do
486    {:ok, url_decode64!(string, opts)}
487  rescue
488    ArgumentError -> :error
489  end
490
491  @doc """
492  Decodes a base 64 encoded string with URL and filename safe alphabet
493  into a binary string.
494
495  Accepts `ignore: :whitespace` option which will ignore all the
496  whitespace characters in the input string.
497
498  Accepts `padding: false` option which will ignore padding from
499  the input string.
500
501  An `ArgumentError` exception is raised if the padding is incorrect or
502  a non-alphabet character is present in the string.
503
504  ## Examples
505
506      iex> Base.url_decode64!("_3_-_A==")
507      <<255, 127, 254, 252>>
508
509      iex> Base.url_decode64!("_3_-_A==\\n", ignore: :whitespace)
510      <<255, 127, 254, 252>>
511
512      iex> Base.url_decode64!("_3_-_A", padding: false)
513      <<255, 127, 254, 252>>
514
515  """
516  @spec url_decode64!(binary, keyword) :: binary
517  def url_decode64!(string, opts \\ []) when is_binary(string) do
518    pad? = Keyword.get(opts, :padding, true)
519    string |> remove_ignored(opts[:ignore]) |> do_decode64url(pad?)
520  end
521
522  @doc """
523  Encodes a binary string into a base 32 encoded string.
524
525  ## Options
526
527  The accepted options are:
528
529    * `:case` - specifies the character case to use when encoding
530    * `:padding` - specifies whether to apply padding
531
532  The values for `:case` can be:
533
534    * `:upper` - uses upper case characters (default)
535    * `:lower` - uses lower case characters
536
537  The values for `:padding` can be:
538
539    * `true` - pad the output string to the nearest multiple of 8 (default)
540    * `false` - omit padding from the output string
541
542  ## Examples
543
544      iex> Base.encode32("foobar")
545      "MZXW6YTBOI======"
546
547      iex> Base.encode32("foobar", case: :lower)
548      "mzxw6ytboi======"
549
550      iex> Base.encode32("foobar", padding: false)
551      "MZXW6YTBOI"
552
553  """
554  @spec encode32(binary, keyword) :: binary
555  def encode32(data, opts \\ []) when is_binary(data) do
556    case = Keyword.get(opts, :case, :upper)
557    pad? = Keyword.get(opts, :padding, true)
558    do_encode32(case, data, pad?)
559  end
560
561  @doc """
562  Decodes a base 32 encoded string into a binary string.
563
564  ## Options
565
566  The accepted options are:
567
568    * `:case` - specifies the character case to accept when decoding
569    * `:padding` - specifies whether to require padding
570
571  The values for `:case` can be:
572
573    * `:upper` - only allows  upper case characters (default)
574    * `:lower` - only allows lower case characters
575    * `:mixed` - allows mixed case characters
576
577  The values for `:padding` can be:
578
579    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
580    * `false` - ignores padding from the input string
581
582  ## Examples
583
584      iex> Base.decode32("MZXW6YTBOI======")
585      {:ok, "foobar"}
586
587      iex> Base.decode32("mzxw6ytboi======", case: :lower)
588      {:ok, "foobar"}
589
590      iex> Base.decode32("mzXW6ytBOi======", case: :mixed)
591      {:ok, "foobar"}
592
593      iex> Base.decode32("MZXW6YTBOI", padding: false)
594      {:ok, "foobar"}
595
596  """
597  @spec decode32(binary, keyword) :: {:ok, binary} | :error
598  def decode32(string, opts \\ []) do
599    {:ok, decode32!(string, opts)}
600  rescue
601    ArgumentError -> :error
602  end
603
604  @doc """
605  Decodes a base 32 encoded string into a binary string.
606
607  An `ArgumentError` exception is raised if the padding is incorrect or
608  a non-alphabet character is present in the string.
609
610  ## Options
611
612  The accepted options are:
613
614    * `:case` - specifies the character case to accept when decoding
615    * `:padding` - specifies whether to require padding
616
617  The values for `:case` can be:
618
619    * `:upper` - only allows upper case characters (default)
620    * `:lower` - only allows lower case characters
621    * `:mixed` - allows mixed case characters
622
623  The values for `:padding` can be:
624
625    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
626    * `false` - ignores padding from the input string
627
628  ## Examples
629
630      iex> Base.decode32!("MZXW6YTBOI======")
631      "foobar"
632
633      iex> Base.decode32!("mzxw6ytboi======", case: :lower)
634      "foobar"
635
636      iex> Base.decode32!("mzXW6ytBOi======", case: :mixed)
637      "foobar"
638
639      iex> Base.decode32!("MZXW6YTBOI", padding: false)
640      "foobar"
641
642  """
643  @spec decode32!(binary, keyword) :: binary
644  def decode32!(string, opts \\ []) when is_binary(string) do
645    case = Keyword.get(opts, :case, :upper)
646    pad? = Keyword.get(opts, :padding, true)
647    do_decode32(case, string, pad?)
648  end
649
650  @doc """
651  Encodes a binary string into a base 32 encoded string with an
652  extended hexadecimal alphabet.
653
654  ## Options
655
656  The accepted options are:
657
658    * `:case` - specifies the character case to use when encoding
659    * `:padding` - specifies whether to apply padding
660
661  The values for `:case` can be:
662
663    * `:upper` - uses upper case characters (default)
664    * `:lower` - uses lower case characters
665
666  The values for `:padding` can be:
667
668    * `true` - pad the output string to the nearest multiple of 8 (default)
669    * `false` - omit padding from the output string
670
671  ## Examples
672
673      iex> Base.hex_encode32("foobar")
674      "CPNMUOJ1E8======"
675
676      iex> Base.hex_encode32("foobar", case: :lower)
677      "cpnmuoj1e8======"
678
679      iex> Base.hex_encode32("foobar", padding: false)
680      "CPNMUOJ1E8"
681
682  """
683  @spec hex_encode32(binary, keyword) :: binary
684  def hex_encode32(data, opts \\ []) when is_binary(data) do
685    case = Keyword.get(opts, :case, :upper)
686    pad? = Keyword.get(opts, :padding, true)
687    do_encode32hex(case, data, pad?)
688  end
689
690  @doc """
691  Decodes a base 32 encoded string with extended hexadecimal alphabet
692  into a binary string.
693
694  ## Options
695
696  The accepted options are:
697
698    * `:case` - specifies the character case to accept when decoding
699    * `:padding` - specifies whether to require padding
700
701  The values for `:case` can be:
702
703    * `:upper` - only allows upper case characters (default)
704    * `:lower` - only allows lower case characters
705    * `:mixed` - allows mixed case characters
706
707  The values for `:padding` can be:
708
709    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
710    * `false` - ignores padding from the input string
711
712  ## Examples
713
714      iex> Base.hex_decode32("CPNMUOJ1E8======")
715      {:ok, "foobar"}
716
717      iex> Base.hex_decode32("cpnmuoj1e8======", case: :lower)
718      {:ok, "foobar"}
719
720      iex> Base.hex_decode32("cpnMuOJ1E8======", case: :mixed)
721      {:ok, "foobar"}
722
723      iex> Base.hex_decode32("CPNMUOJ1E8", padding: false)
724      {:ok, "foobar"}
725
726  """
727  @spec hex_decode32(binary, keyword) :: {:ok, binary} | :error
728  def hex_decode32(string, opts \\ []) do
729    {:ok, hex_decode32!(string, opts)}
730  rescue
731    ArgumentError -> :error
732  end
733
734  @doc """
735  Decodes a base 32 encoded string with extended hexadecimal alphabet
736  into a binary string.
737
738  An `ArgumentError` exception is raised if the padding is incorrect or
739  a non-alphabet character is present in the string.
740
741  ## Options
742
743  The accepted options are:
744
745    * `:case` - specifies the character case to accept when decoding
746    * `:padding` - specifies whether to require padding
747
748  The values for `:case` can be:
749
750    * `:upper` - only allows upper case characters (default)
751    * `:lower` - only allows lower case characters
752    * `:mixed` - allows mixed case characters
753
754  The values for `:padding` can be:
755
756    * `true` - requires the input string to be padded to the nearest multiple of 8 (default)
757    * `false` - ignores padding from the input string
758
759  ## Examples
760
761      iex> Base.hex_decode32!("CPNMUOJ1E8======")
762      "foobar"
763
764      iex> Base.hex_decode32!("cpnmuoj1e8======", case: :lower)
765      "foobar"
766
767      iex> Base.hex_decode32!("cpnMuOJ1E8======", case: :mixed)
768      "foobar"
769
770      iex> Base.hex_decode32!("CPNMUOJ1E8", padding: false)
771      "foobar"
772
773  """
774  @spec hex_decode32!(binary, keyword) :: binary
775  def hex_decode32!(string, opts \\ []) when is_binary(string) do
776    case = Keyword.get(opts, :case, :upper)
777    pad? = Keyword.get(opts, :padding, true)
778    do_decode32hex(case, string, pad?)
779  end
780
781  defp remove_ignored(string, nil), do: string
782
783  defp remove_ignored(string, :whitespace) do
784    for <<char::8 <- string>>, char not in '\s\t\r\n', into: <<>>, do: <<char::8>>
785  end
786
787  enc16 = [upper: :enc16_upper, lower: :enc16_lower]
788
789  for {case, fun} <- enc16 do
790    defp unquote(fun)(char) do
791      encode_pair(unquote(b16_alphabet), unquote(case), char)
792    end
793  end
794
795  defp do_encode16(_, <<>>), do: <<>>
796
797  for {case, fun} <- enc16 do
798    defp do_encode16(unquote(case), data) do
799      split = 8 * div(byte_size(data), 8)
800      <<main::size(split)-binary, rest::binary>> = data
801
802      main =
803        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
804          <<
805            unquote(fun)(c1)::16,
806            unquote(fun)(c2)::16,
807            unquote(fun)(c3)::16,
808            unquote(fun)(c4)::16,
809            unquote(fun)(c5)::16,
810            unquote(fun)(c6)::16,
811            unquote(fun)(c7)::16,
812            unquote(fun)(c8)::16
813          >>
814        end
815
816      case rest do
817        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> ->
818          <<
819            main::binary,
820            unquote(fun)(c1)::16,
821            unquote(fun)(c2)::16,
822            unquote(fun)(c3)::16,
823            unquote(fun)(c4)::16,
824            unquote(fun)(c5)::16,
825            unquote(fun)(c6)::16,
826            unquote(fun)(c7)::16
827          >>
828
829        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8>> ->
830          <<
831            main::binary,
832            unquote(fun)(c1)::16,
833            unquote(fun)(c2)::16,
834            unquote(fun)(c3)::16,
835            unquote(fun)(c4)::16,
836            unquote(fun)(c5)::16,
837            unquote(fun)(c6)::16
838          >>
839
840        <<c1::8, c2::8, c3::8, c4::8, c5::8>> ->
841          <<
842            main::binary,
843            unquote(fun)(c1)::16,
844            unquote(fun)(c2)::16,
845            unquote(fun)(c3)::16,
846            unquote(fun)(c4)::16,
847            unquote(fun)(c5)::16
848          >>
849
850        <<c1::8, c2::8, c3::8, c4::8>> ->
851          <<
852            main::binary,
853            unquote(fun)(c1)::16,
854            unquote(fun)(c2)::16,
855            unquote(fun)(c3)::16,
856            unquote(fun)(c4)::16
857          >>
858
859        <<c1::8, c2::8, c3::8>> ->
860          <<main::binary, unquote(fun)(c1)::16, unquote(fun)(c2)::16, unquote(fun)(c3)::16>>
861
862        <<c1::8, c2::8>> ->
863          <<main::binary, unquote(fun)(c1)::16, unquote(fun)(c2)::16>>
864
865        <<c1::8>> ->
866          <<main::binary, unquote(fun)(c1)::16>>
867
868        <<>> ->
869          main
870      end
871    end
872  end
873
874  dec16 = [upper: :dec16_upper, lower: :dec16_lower, mixed: :dec16_mixed]
875
876  for {case, fun} <- dec16 do
877    defp unquote(fun)(encoding) do
878      decode_char(unquote(b16_alphabet), unquote(case), encoding)
879    end
880  end
881
882  defp do_decode16(_, <<>>), do: <<>>
883
884  for {case, fun} <- dec16 do
885    defp do_decode16(unquote(case), string) do
886      split = 8 * div(byte_size(string), 8)
887      <<main::size(split)-binary, rest::binary>> = string
888
889      main =
890        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
891          <<
892            unquote(fun)(c1)::4,
893            unquote(fun)(c2)::4,
894            unquote(fun)(c3)::4,
895            unquote(fun)(c4)::4,
896            unquote(fun)(c5)::4,
897            unquote(fun)(c6)::4,
898            unquote(fun)(c7)::4,
899            unquote(fun)(c8)::4
900          >>
901        end
902
903      case rest do
904        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8>> ->
905          <<
906            main::bits,
907            unquote(fun)(c1)::4,
908            unquote(fun)(c2)::4,
909            unquote(fun)(c3)::4,
910            unquote(fun)(c4)::4,
911            unquote(fun)(c5)::4,
912            unquote(fun)(c6)::4
913          >>
914
915        <<c1::8, c2::8, c3::8, c4::8>> ->
916          <<
917            main::bits,
918            unquote(fun)(c1)::4,
919            unquote(fun)(c2)::4,
920            unquote(fun)(c3)::4,
921            unquote(fun)(c4)::4
922          >>
923
924        <<c1::8, c2::8>> ->
925          <<main::bits, unquote(fun)(c1)::4, unquote(fun)(c2)::4>>
926
927        <<_::8>> ->
928          raise ArgumentError, "odd-length string"
929
930        <<>> ->
931          main
932      end
933    end
934  end
935
936  for {base, alphabet} <- ["64": b64_alphabet, "64url": b64url_alphabet] do
937    pair = :"enc#{base}_pair"
938    char = :"enc#{base}_char"
939    do_encode = :"do_encode#{base}"
940
941    defp unquote(pair)(value) do
942      encode_pair(unquote(alphabet), :sensitive, value)
943    end
944
945    defp unquote(char)(value) do
946      value
947      |> unquote(pair)()
948      |> band(0x00FF)
949    end
950
951    defp unquote(do_encode)(<<>>, _), do: <<>>
952
953    defp unquote(do_encode)(data, pad?) do
954      split = 6 * div(byte_size(data), 6)
955      <<main::size(split)-binary, rest::binary>> = data
956
957      main =
958        for <<c1::12, c2::12, c3::12, c4::12 <- main>>, into: <<>> do
959          <<
960            unquote(pair)(c1)::16,
961            unquote(pair)(c2)::16,
962            unquote(pair)(c3)::16,
963            unquote(pair)(c4)::16
964          >>
965        end
966
967      tail =
968        case rest do
969          <<c1::12, c2::12, c3::12, c::4>> ->
970            <<
971              unquote(pair)(c1)::16,
972              unquote(pair)(c2)::16,
973              unquote(pair)(c3)::16,
974              unquote(char)(bsl(c, 2))::8
975            >>
976
977          <<c1::12, c2::12, c3::8>> ->
978            <<unquote(pair)(c1)::16, unquote(pair)(c2)::16, unquote(pair)(bsl(c3, 4))::16>>
979
980          <<c1::12, c2::12>> ->
981            <<unquote(pair)(c1)::16, unquote(pair)(c2)::16>>
982
983          <<c1::12, c2::4>> ->
984            <<unquote(pair)(c1)::16, unquote(char)(bsl(c2, 2))::8>>
985
986          <<c1::8>> ->
987            <<unquote(pair)(bsl(c1, 4))::16>>
988
989          <<>> ->
990            <<>>
991        end
992
993      maybe_pad(main, tail, pad?, 4)
994    end
995  end
996
997  for {base, alphabet} <- ["64": b64_alphabet, "64url": b64url_alphabet] do
998    fun = :"dec#{base}"
999    do_decode = :"do_decode#{base}"
1000
1001    defp unquote(fun)(encoding) do
1002      decode_char(unquote(alphabet), :sensitive, encoding)
1003    end
1004
1005    defp unquote(do_decode)(<<>>, _), do: <<>>
1006
1007    defp unquote(do_decode)(string, pad?) do
1008      segs = div(byte_size(string) + 7, 8) - 1
1009      <<main::size(segs)-binary-unit(64), rest::binary>> = string
1010
1011      main =
1012        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
1013          <<
1014            unquote(fun)(c1)::6,
1015            unquote(fun)(c2)::6,
1016            unquote(fun)(c3)::6,
1017            unquote(fun)(c4)::6,
1018            unquote(fun)(c5)::6,
1019            unquote(fun)(c6)::6,
1020            unquote(fun)(c7)::6,
1021            unquote(fun)(c8)::6
1022          >>
1023        end
1024
1025      case rest do
1026        <<c1::8, c2::8, ?=, ?=>> ->
1027          <<main::bits, unquote(fun)(c1)::6, bsr(unquote(fun)(c2), 4)::2>>
1028
1029        <<c1::8, c2::8, c3::8, ?=>> ->
1030          <<main::bits, unquote(fun)(c1)::6, unquote(fun)(c2)::6, bsr(unquote(fun)(c3), 2)::4>>
1031
1032        <<c1::8, c2::8, c3::8, c4::8>> ->
1033          <<
1034            main::bits,
1035            unquote(fun)(c1)::6,
1036            unquote(fun)(c2)::6,
1037            unquote(fun)(c3)::6,
1038            unquote(fun)(c4)::6
1039          >>
1040
1041        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, ?=, ?=>> ->
1042          <<
1043            main::bits,
1044            unquote(fun)(c1)::6,
1045            unquote(fun)(c2)::6,
1046            unquote(fun)(c3)::6,
1047            unquote(fun)(c4)::6,
1048            unquote(fun)(c5)::6,
1049            bsr(unquote(fun)(c6), 4)::2
1050          >>
1051
1052        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, ?=>> ->
1053          <<
1054            main::bits,
1055            unquote(fun)(c1)::6,
1056            unquote(fun)(c2)::6,
1057            unquote(fun)(c3)::6,
1058            unquote(fun)(c4)::6,
1059            unquote(fun)(c5)::6,
1060            unquote(fun)(c6)::6,
1061            bsr(unquote(fun)(c7), 2)::4
1062          >>
1063
1064        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8>> ->
1065          <<
1066            main::bits,
1067            unquote(fun)(c1)::6,
1068            unquote(fun)(c2)::6,
1069            unquote(fun)(c3)::6,
1070            unquote(fun)(c4)::6,
1071            unquote(fun)(c5)::6,
1072            unquote(fun)(c6)::6,
1073            unquote(fun)(c7)::6,
1074            unquote(fun)(c8)::6
1075          >>
1076
1077        <<c1::8, c2::8>> when not pad? ->
1078          <<main::bits, unquote(fun)(c1)::6, bsr(unquote(fun)(c2), 4)::2>>
1079
1080        <<c1::8, c2::8, c3::8>> when not pad? ->
1081          <<main::bits, unquote(fun)(c1)::6, unquote(fun)(c2)::6, bsr(unquote(fun)(c3), 2)::4>>
1082
1083        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8>> when not pad? ->
1084          <<
1085            main::bits,
1086            unquote(fun)(c1)::6,
1087            unquote(fun)(c2)::6,
1088            unquote(fun)(c3)::6,
1089            unquote(fun)(c4)::6,
1090            unquote(fun)(c5)::6,
1091            bsr(unquote(fun)(c6), 4)::2
1092          >>
1093
1094        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> when not pad? ->
1095          <<
1096            main::bits,
1097            unquote(fun)(c1)::6,
1098            unquote(fun)(c2)::6,
1099            unquote(fun)(c3)::6,
1100            unquote(fun)(c4)::6,
1101            unquote(fun)(c5)::6,
1102            unquote(fun)(c6)::6,
1103            bsr(unquote(fun)(c7), 2)::4
1104          >>
1105
1106        _ ->
1107          raise ArgumentError, "incorrect padding"
1108      end
1109    end
1110  end
1111
1112  for {base, alphabet} <- ["32": b32_alphabet, "32hex": b32hex_alphabet],
1113      case <- [:upper, :lower] do
1114    pair = :"enc#{base}_#{case}_pair"
1115    char = :"enc#{base}_#{case}_char"
1116    do_encode = :"do_encode#{base}"
1117
1118    defp unquote(pair)(value) do
1119      encode_pair(unquote(alphabet), unquote(case), value)
1120    end
1121
1122    defp unquote(char)(value) do
1123      value
1124      |> unquote(pair)()
1125      |> band(0x00FF)
1126    end
1127
1128    defp unquote(do_encode)(_, <<>>, _), do: <<>>
1129
1130    defp unquote(do_encode)(unquote(case), data, pad?) do
1131      split = 5 * div(byte_size(data), 5)
1132      <<main::size(split)-binary, rest::binary>> = data
1133
1134      main =
1135        for <<c1::10, c2::10, c3::10, c4::10 <- main>>, into: <<>> do
1136          <<
1137            unquote(pair)(c1)::16,
1138            unquote(pair)(c2)::16,
1139            unquote(pair)(c3)::16,
1140            unquote(pair)(c4)::16
1141          >>
1142        end
1143
1144      tail =
1145        case rest do
1146          <<c1::10, c2::10, c3::10, c4::2>> ->
1147            <<
1148              unquote(pair)(c1)::16,
1149              unquote(pair)(c2)::16,
1150              unquote(pair)(c3)::16,
1151              unquote(char)(bsl(c4, 3))::8
1152            >>
1153
1154          <<c1::10, c2::10, c3::4>> ->
1155            <<unquote(pair)(c1)::16, unquote(pair)(c2)::16, unquote(char)(bsl(c3, 1))::8>>
1156
1157          <<c1::10, c2::6>> ->
1158            <<unquote(pair)(c1)::16, unquote(pair)(bsl(c2, 4))::16>>
1159
1160          <<c1::8>> ->
1161            <<unquote(pair)(bsl(c1, 2))::16>>
1162
1163          <<>> ->
1164            <<>>
1165        end
1166
1167      maybe_pad(main, tail, pad?, 8)
1168    end
1169  end
1170
1171  for {base, alphabet} <- ["32": b32_alphabet, "32hex": b32hex_alphabet],
1172      case <- [:upper, :lower, :mixed] do
1173    fun = :"dec#{base}_#{case}"
1174    do_decode = :"do_decode#{base}"
1175
1176    defp unquote(fun)(encoding) do
1177      decode_char(unquote(alphabet), unquote(case), encoding)
1178    end
1179
1180    defp unquote(do_decode)(_, <<>>, _), do: <<>>
1181
1182    defp unquote(do_decode)(unquote(case), string, pad?) do
1183      segs = div(byte_size(string) + 7, 8) - 1
1184      <<main::size(segs)-binary-unit(64), rest::binary>> = string
1185
1186      main =
1187        for <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8 <- main>>, into: <<>> do
1188          <<
1189            unquote(fun)(c1)::5,
1190            unquote(fun)(c2)::5,
1191            unquote(fun)(c3)::5,
1192            unquote(fun)(c4)::5,
1193            unquote(fun)(c5)::5,
1194            unquote(fun)(c6)::5,
1195            unquote(fun)(c7)::5,
1196            unquote(fun)(c8)::5
1197          >>
1198        end
1199
1200      case rest do
1201        <<c1::8, c2::8, ?=, ?=, ?=, ?=, ?=, ?=>> ->
1202          <<main::bits, unquote(fun)(c1)::5, bsr(unquote(fun)(c2), 2)::3>>
1203
1204        <<c1::8, c2::8, c3::8, c4::8, ?=, ?=, ?=, ?=>> ->
1205          <<
1206            main::bits,
1207            unquote(fun)(c1)::5,
1208            unquote(fun)(c2)::5,
1209            unquote(fun)(c3)::5,
1210            bsr(unquote(fun)(c4), 4)::1
1211          >>
1212
1213        <<c1::8, c2::8, c3::8, c4::8, c5::8, ?=, ?=, ?=>> ->
1214          <<
1215            main::bits,
1216            unquote(fun)(c1)::5,
1217            unquote(fun)(c2)::5,
1218            unquote(fun)(c3)::5,
1219            unquote(fun)(c4)::5,
1220            bsr(unquote(fun)(c5), 1)::4
1221          >>
1222
1223        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, ?=>> ->
1224          <<
1225            main::bits,
1226            unquote(fun)(c1)::5,
1227            unquote(fun)(c2)::5,
1228            unquote(fun)(c3)::5,
1229            unquote(fun)(c4)::5,
1230            unquote(fun)(c5)::5,
1231            unquote(fun)(c6)::5,
1232            bsr(unquote(fun)(c7), 3)::2
1233          >>
1234
1235        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8, c8::8>> ->
1236          <<
1237            main::bits,
1238            unquote(fun)(c1)::5,
1239            unquote(fun)(c2)::5,
1240            unquote(fun)(c3)::5,
1241            unquote(fun)(c4)::5,
1242            unquote(fun)(c5)::5,
1243            unquote(fun)(c6)::5,
1244            unquote(fun)(c7)::5,
1245            unquote(fun)(c8)::5
1246          >>
1247
1248        <<c1::8, c2::8>> when not pad? ->
1249          <<main::bits, unquote(fun)(c1)::5, bsr(unquote(fun)(c2), 2)::3>>
1250
1251        <<c1::8, c2::8, c3::8, c4::8>> when not pad? ->
1252          <<
1253            main::bits,
1254            unquote(fun)(c1)::5,
1255            unquote(fun)(c2)::5,
1256            unquote(fun)(c3)::5,
1257            bsr(unquote(fun)(c4), 4)::1
1258          >>
1259
1260        <<c1::8, c2::8, c3::8, c4::8, c5::8>> when not pad? ->
1261          <<
1262            main::bits,
1263            unquote(fun)(c1)::5,
1264            unquote(fun)(c2)::5,
1265            unquote(fun)(c3)::5,
1266            unquote(fun)(c4)::5,
1267            bsr(unquote(fun)(c5), 1)::4
1268          >>
1269
1270        <<c1::8, c2::8, c3::8, c4::8, c5::8, c6::8, c7::8>> when not pad? ->
1271          <<
1272            main::bits,
1273            unquote(fun)(c1)::5,
1274            unquote(fun)(c2)::5,
1275            unquote(fun)(c3)::5,
1276            unquote(fun)(c4)::5,
1277            unquote(fun)(c5)::5,
1278            unquote(fun)(c6)::5,
1279            bsr(unquote(fun)(c7), 3)::2
1280          >>
1281
1282        _ ->
1283          raise ArgumentError, "incorrect padding"
1284      end
1285    end
1286  end
1287end
1288