1 /***
2  * Copyright (C) Microsoft. All rights reserved.
3  * Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
4  *
5  * =+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
6  *
7  * For the latest on this and related APIs, please see: https://github.com/Microsoft/cpprestsdk
8  *
9  * =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
10  ****/
11 #include "stdafx.h"
12 
13 using namespace web;
14 using namespace utility;
15 
16 std::vector<unsigned char> _from_base64(const utility::string_t& str);
17 utility::string_t _to_base64(const unsigned char* ptr, size_t size);
18 
from_base64(const utility::string_t & str)19 std::vector<unsigned char> __cdecl conversions::from_base64(const utility::string_t& str) { return _from_base64(str); }
20 
to_base64(const std::vector<unsigned char> & input)21 utility::string_t __cdecl conversions::to_base64(const std::vector<unsigned char>& input)
22 {
23     if (input.size() == 0)
24     {
25         // return empty string
26         return utility::string_t();
27     }
28 
29     return _to_base64(&input[0], input.size());
30 }
31 
to_base64(uint64_t input)32 utility::string_t __cdecl conversions::to_base64(uint64_t input)
33 {
34     return _to_base64(reinterpret_cast<const unsigned char*>(&input), sizeof(input));
35 }
36 
37 static const char* _base64_enctbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
38 const std::array<unsigned char, 128> _base64_dectbl = {
39     {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
40      255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62,
41      255, 255, 255, 63,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  255, 255, 255, 254, 255, 255, 255, 0,
42      1,   2,   3,   4,   5,   6,   7,   8,   9,   10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  20,  21,  22,
43      23,  24,  25,  255, 255, 255, 255, 255, 255, 26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
44      39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  255, 255, 255, 255, 255}};
45 
46 struct _triple_byte
47 {
48     unsigned char _1_1 : 2;
49     unsigned char _0 : 6;
50     unsigned char _2_1 : 4;
51     unsigned char _1_2 : 4;
52     unsigned char _3 : 6;
53     unsigned char _2_2 : 2;
54 };
55 
56 struct _double_byte
57 {
58     unsigned char _1_1 : 2;
59     unsigned char _0 : 6;
60     unsigned char _2_1 : 4;
61     unsigned char _1_2 : 4;
62 };
63 
64 struct _single_byte
65 {
66     unsigned char _1_1 : 2;
67     unsigned char _0 : 6;
68 };
69 
70 //
71 // A note on the implementation of BASE64 encoding and decoding:
72 //
73 // This is a fairly basic and naive implementation; there is probably a lot of room for
74 // performance improvement, as well as for adding options such as support for URI-safe base64,
75 // ignoring CRLF, relaxed validation rules, etc. The decoder is currently pretty strict.
76 //
77 
78 #ifdef __GNUC__
79 // gcc is concerned about the bitfield uses in the code, something we simply need to ignore.
80 #pragma GCC diagnostic ignored "-Wconversion"
81 #endif
_from_base64(const utility::string_t & input)82 std::vector<unsigned char> _from_base64(const utility::string_t& input)
83 {
84     std::vector<unsigned char> result;
85 
86     if (input.empty()) return result;
87 
88     size_t padding = 0;
89 
90     // Validation
91     {
92         auto size = input.size();
93 
94         if ((size % 4) != 0)
95         {
96             throw std::runtime_error("length of base64 string is not an even multiple of 4");
97         }
98 
99         for (auto iter = input.begin(); iter != input.end(); ++iter, --size)
100         {
101             const size_t ch_sz = static_cast<size_t>(*iter);
102             if (ch_sz >= _base64_dectbl.size() || _base64_dectbl[ch_sz] == 255)
103             {
104                 throw std::runtime_error("invalid character found in base64 string");
105             }
106             if (_base64_dectbl[ch_sz] == 254)
107             {
108                 padding++;
109                 // padding only at the end
110                 if (size > 2)
111                 {
112                     throw std::runtime_error("invalid padding character found in base64 string");
113                 }
114                 if (size == 2)
115                 {
116                     const size_t ch2_sz = static_cast<size_t>(*(iter + 1));
117                     if (ch2_sz >= _base64_dectbl.size() || _base64_dectbl[ch2_sz] != 254)
118                     {
119                         throw std::runtime_error("invalid padding character found in base64 string");
120                     }
121                 }
122             }
123         }
124     }
125 
126     auto size = input.size();
127     const char_t* ptr = &input[0];
128 
129     auto outsz = (size / 4) * 3;
130     outsz -= padding;
131 
132     result.resize(outsz);
133 
134     size_t idx = 0;
135     for (; size > 4; ++idx)
136     {
137         unsigned char target[3];
138         memset(target, 0, sizeof(target));
139         _triple_byte* record = reinterpret_cast<_triple_byte*>(target);
140 
141         unsigned char val0 = _base64_dectbl[ptr[0]];
142         unsigned char val1 = _base64_dectbl[ptr[1]];
143         unsigned char val2 = _base64_dectbl[ptr[2]];
144         unsigned char val3 = _base64_dectbl[ptr[3]];
145 
146         record->_0 = val0;
147         record->_1_1 = val1 >> 4;
148         result[idx] = target[0];
149 
150         record->_1_2 = val1 & 0xF;
151         record->_2_1 = val2 >> 2;
152         result[++idx] = target[1];
153 
154         record->_2_2 = val2 & 0x3;
155         record->_3 = val3 & 0x3F;
156         result[++idx] = target[2];
157 
158         ptr += 4;
159         size -= 4;
160     }
161 
162     // Handle the last four bytes separately, to avoid having the conditional statements
163     // in all the iterations (a performance issue).
164 
165     {
166         unsigned char target[3];
167         memset(target, 0, sizeof(target));
168         _triple_byte* record = reinterpret_cast<_triple_byte*>(target);
169 
170         unsigned char val0 = _base64_dectbl[ptr[0]];
171         unsigned char val1 = _base64_dectbl[ptr[1]];
172         unsigned char val2 = _base64_dectbl[ptr[2]];
173         unsigned char val3 = _base64_dectbl[ptr[3]];
174 
175         record->_0 = val0;
176         record->_1_1 = val1 >> 4;
177         result[idx] = target[0];
178 
179         record->_1_2 = val1 & 0xF;
180         if (val2 != 254)
181         {
182             record->_2_1 = val2 >> 2;
183             result[++idx] = target[1];
184         }
185         else
186         {
187             // There shouldn't be any information (ones) in the unused bits,
188             if (record->_1_2 != 0)
189             {
190                 throw std::runtime_error("Invalid end of base64 string");
191             }
192             return result;
193         }
194 
195         record->_2_2 = val2 & 0x3;
196         if (val3 != 254)
197         {
198             record->_3 = val3 & 0x3F;
199             result[++idx] = target[2];
200         }
201         else
202         {
203             // There shouldn't be any information (ones) in the unused bits.
204             if (record->_2_2 != 0)
205             {
206                 throw std::runtime_error("Invalid end of base64 string");
207             }
208             return result;
209         }
210     }
211 
212     return result;
213 }
214 
_to_base64(const unsigned char * ptr,size_t size)215 utility::string_t _to_base64(const unsigned char* ptr, size_t size)
216 {
217     utility::string_t result;
218 
219     for (; size >= 3;)
220     {
221         const _triple_byte* record = reinterpret_cast<const _triple_byte*>(ptr);
222         unsigned char idx0 = record->_0;
223         unsigned char idx1 = (record->_1_1 << 4) | record->_1_2;
224         unsigned char idx2 = (record->_2_1 << 2) | record->_2_2;
225         unsigned char idx3 = record->_3;
226         result.push_back(char_t(_base64_enctbl[idx0]));
227         result.push_back(char_t(_base64_enctbl[idx1]));
228         result.push_back(char_t(_base64_enctbl[idx2]));
229         result.push_back(char_t(_base64_enctbl[idx3]));
230         size -= 3;
231         ptr += 3;
232     }
233     switch (size)
234     {
235         case 1:
236         {
237             const _single_byte* record = reinterpret_cast<const _single_byte*>(ptr);
238             unsigned char idx0 = record->_0;
239             unsigned char idx1 = (record->_1_1 << 4);
240             result.push_back(char_t(_base64_enctbl[idx0]));
241             result.push_back(char_t(_base64_enctbl[idx1]));
242             result.push_back('=');
243             result.push_back('=');
244             break;
245         }
246         case 2:
247         {
248             const _double_byte* record = reinterpret_cast<const _double_byte*>(ptr);
249             unsigned char idx0 = record->_0;
250             unsigned char idx1 = (record->_1_1 << 4) | record->_1_2;
251             unsigned char idx2 = (record->_2_1 << 2);
252             result.push_back(char_t(_base64_enctbl[idx0]));
253             result.push_back(char_t(_base64_enctbl[idx1]));
254             result.push_back(char_t(_base64_enctbl[idx2]));
255             result.push_back('=');
256             break;
257         }
258     }
259     return result;
260 }
261