1 #ifndef BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
2 #define BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
3
4 #include <boost/range/begin.hpp>
5 #include <boost/range/end.hpp>
6 #include <algorithm>
7 #include <iterator>
8 #include <string>
9
10 namespace boost {
11 namespace network {
12 namespace utils {
13
14 // Implements a BASE64 converter working on an iterator range.
15 // If the input sequence does not end at the three-byte boundary, the last
16 // encoded value part is remembered in an encoding state to be able to
17 // continue with the next chunk; the BASE64 encoding processes the input
18 // by byte-triplets.
19 //
20 // Summarized interface:
21 //
22 // struct state<Value> {
23 // bool empty () const;
24 // void clear();
25 // }
26 //
27 // OutputIterator encode(InputIterator begin, InputIterator end,
28 // OutputIterator output, State & rest)
29 // OutputIterator encode_rest(OutputIterator output, State & rest)
30 // OutputIterator encode(InputRange const & input, OutputIterator output,
31 // State & rest)
32 // OutputIterator encode(char const * value, OutputIterator output,
33 // state<char> & rest)
34 // std::basic_string<Char> encode(InputRange const & value, State & rest)
35 // std::basic_string<Char> encode(char const * value, state<char> & rest)
36 //
37 // OutputIterator encode(InputIterator begin, InputIterator end,
38 // OutputIterator output)
39 // OutputIterator encode(InputRange const & input, OutputIterator output)
40 // OutputIterator encode(char const * value, OutputIterator output)
41 // std::basic_string<Char> encode(InputRange const & value)
42 // std::basic_string<Char> encode(char const * value) {
43 //
44 // See also http://libb64.sourceforge.net, which served as inspiration.
45 // See also http://tools.ietf.org/html/rfc4648 for the specification.
46
47 namespace base64 {
48
49 namespace detail {
50
51 // Picks a character from the output alphabet for another 6-bit value
52 // from the input sequence to encode.
53 template <typename Value>
encode_value(Value value)54 char encode_value(Value value) {
55 static char const encoding[] =
56 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
57 "+/";
58 return encoding[static_cast<unsigned int>(value)];
59 }
60
61 } // namespace detail
62
63 // Stores the state after processing the last chunk by the encoder. If
64 // the
65 // chunk byte-length is not divisible by three, the last (incomplete)
66 // value
67 // quantum canot be encoded right away; it has to wait for the next
68 // chunk
69 // of octets which will be processed joined (as if the trailing rest
70 // from
71 // the previous one was at its beinning).
72 template <typename Value>
73 struct state {
stateboost::network::utils::base64::state74 state() : triplet_index(0), last_encoded_value(0) {}
75
stateboost::network::utils::base64::state76 state(state<Value> const& source)
77 : triplet_index(source.triplet_index),
78 last_encoded_value(source.last_encoded_value) {}
79
emptyboost::network::utils::base64::state80 bool empty() const { return triplet_index == 0; }
81
clearboost::network::utils::base64::state82 void clear() {
83 // indicate that no rest has been left in the last encoded value
84 // and no padding is needed for the encoded output
85 triplet_index = 0;
86 // the last encoded value, which may have been left from the last
87 // encoding step, must be zeroed too; it is important before the
88 // next encoding begins, because it works as a cyclic buffer and
89 // must start empty - with zero
90 last_encoded_value = 0;
91 }
92
93 protected:
94 // number of the octet in the incomplete quantum, which has been
95 // processed the last time; 0 means that the previous quantum was
96 // complete 3 octets, 1 that just one octet was avalable and 2 that
97 // two octets were available
98 unsigned char triplet_index;
99 // the value made of the previously shifted and or-ed octets which
100 // was not completely split to 6-bit codes, because the last quantum
101 // did not stop on the boundary of three octets
102 Value last_encoded_value;
103
104 // encoding of an input chunk needs to read and update the state
105 template <typename InputIterator, typename OutputIterator, typename State>
106 friend OutputIterator encode(InputIterator begin, InputIterator end,
107 OutputIterator output, State& rest);
108
109 // finishing the encoding needs to read and clear the state
110 template <typename OutputIterator, typename State>
111 friend OutputIterator encode_rest(OutputIterator output, State& rest);
112 };
113
114 // Encodes an input sequence to BASE64 writing it to the output iterator
115 // and stopping if the last input tree-octet quantum was not complete,
116 // in
117 // which case it stores the state for the later continuation, when
118 // another
119 // input chunk is ready for the encoding. The encoding must be finished
120 // by calling the encode_rest after processing the last chunk.
121 //
122 // std::vector<unsigned char> buffer = ...;
123 // std::basic_string<Char> result;
124 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
125 // base64::state<unsigned char> rest;
126 // base64::encode(buffer.begin(), buffer.end(), appender, rest);
127 // ...
128 // base64::encode_rest(appender, rest);
129 template <typename InputIterator, typename OutputIterator, typename State>
130 OutputIterator encode(InputIterator begin, InputIterator end,
131 OutputIterator output, State& rest) {
132 typedef typename iterator_value<InputIterator>::type value_type;
133 // continue with the rest of the last chunk - 2 or 4 bits which
134 // are already shifted to the left and need to be or-ed with the
135 // continuing data up to the target 6 bits
136 value_type encoded_value = rest.last_encoded_value;
137 // if the previous chunk stopped at encoding the first (1) or the
138 // second
139 // (2) octet of the three-byte quantum, jump to the right place,
140 // otherwise start the loop with an empty encoded value buffer
141 switch (rest.triplet_index) {
142 // this loop processes the input sequence of bit-octets by bits,
143 // shifting the current_value (used as a cyclic buffer) left and
144 // or-ing next bits there, while pulling the bit-sextets from the
145 // high word of the current_value
146 for (value_type current_value;;) {
147 case 0:
148 // if the input sequence is empty or reached its end at the
149 // 3-byte boundary, finish with an empty encoding state
150 if (begin == end) {
151 rest.triplet_index = 0;
152 // the last encoded value is not interesting - it would not
153 // be used, because processing of the next chunk will start
154 // at the 3-byte boundary
155 rest.last_encoded_value = 0;
156 return output;
157 }
158 // read the first octet from the current triplet
159 current_value = *begin++;
160 // use just the upper 6 bits to encode it to the target alphabet
161 encoded_value = (current_value & 0xfc) >> 2;
162 *output++ = detail::encode_value(encoded_value);
163 // shift the remaining two bits up to make place for the upoming
164 // part of the next octet
165 encoded_value = (current_value & 0x03) << 4;
166 case 1:
167 // if the input sequence reached its end after the first octet
168 // from the quantum triplet, store the encoding state and finish
169 if (begin == end) {
170 rest.triplet_index = 1;
171 rest.last_encoded_value = encoded_value;
172 return output;
173 }
174 // read the second first octet from the current triplet
175 current_value = *begin++;
176 // combine the upper four bits (as the lower part) with the
177 // previous two bits to encode it to the target alphabet
178 encoded_value |= (current_value & 0xf0) >> 4;
179 *output++ = detail::encode_value(encoded_value);
180 // shift the remaining four bits up to make place for the
181 // upoming
182 // part of the next octet
183 encoded_value = (current_value & 0x0f) << 2;
184 case 2:
185 // if the input sequence reached its end after the second octet
186 // from the quantum triplet, store the encoding state and finish
187 if (begin == end) {
188 rest.triplet_index = 2;
189 rest.last_encoded_value = encoded_value;
190 return output;
191 }
192 // read the third octet from the current triplet
193 current_value = *begin++;
194 // combine the upper two bits (as the lower part) with the
195 // previous four bits to encode it to the target alphabet
196 encoded_value |= (current_value & 0xc0) >> 6;
197 *output++ = detail::encode_value(encoded_value);
198 // encode the remaining 6 bits to the target alphabet
199 encoded_value = current_value & 0x3f;
200 *output++ = detail::encode_value(encoded_value);
201 }
202 }
203 return output;
204 }
205
206 // Finishes encoding of the previously processed chunks. If their total
207 // byte-length was divisible by three, nothing is needed, if not, the
208 // last
209 // quantum will be encoded as if padded with zeroes, which will be
210 // indicated
211 // by appending '=' characters to the output. This method must be
212 // always
213 // used at the end of encoding, if the previous chunks were encoded by
214 // the
215 // method overload accepting the encoding state.
216 //
217 // std::vector<unsigned char> buffer = ...;
218 // std::basic_string<Char> result;
219 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
220 // base64::state<unsigned char> rest;
221 // base64::encode(buffer.begin(), buffer.end(), appender, rest);
222 // ...
223 // base64::encode_rest(appender, rest);
224 template <typename OutputIterator, typename State>
225 OutputIterator encode_rest(OutputIterator output, State& rest) {
226 if (!rest.empty()) {
227 // process the last part of the trailing octet (either 4 or 2 bits)
228 // as if the input was padded with zeros - without or-ing the next
229 // input value to it; it has been already shifted to the left
230 *output++ = detail::encode_value(rest.last_encoded_value);
231 // at least one padding '=' will be always needed - at least two
232 // bits are missing in the finally encoded 6-bit value
233 *output++ = '=';
234 // if the last octet was the first in the triplet (the index was
235 // 1), four bits are missing in the finally encoded 6-bit value;
236 // another '=' character is needed for the another two bits
237 if (rest.triplet_index < 2) *output++ = '=';
238 // clear the state all the time to make sure that another call to
239 // the encode_rest would not cause damage; the last encoded value,
240 // which may have been left there, must be zeroed too; it is
241 // important before the next encoding begins, because it works as
242 // a cyclic buffer and must start empty - with zero
243 rest.clear();
244 }
245 return output;
246 }
247
248 // Encodes a part of an input sequence specified by the pair of begin
249 // and
250 // end iterators.to BASE64 writing it to the output iterator. If its
251 // total
252 // byte-length was not divisible by three, the output will be padded by
253 // the
254 // '=' characters. If you encode an input consisting of mutiple chunks,
255 // use the method overload maintaining the encoding state.
256 //
257 // std::vector<unsigned char> buffer = ...;
258 // std::basic_string<Char> result;
259 // base64::encode(buffer.begin(), buffer.end(),
260 // std::back_inserter(result));
261 template <typename InputIterator, typename OutputIterator>
encode(InputIterator begin,InputIterator end,OutputIterator output)262 OutputIterator encode(InputIterator begin, InputIterator end,
263 OutputIterator output) {
264 state<typename iterator_value<InputIterator>::type> rest;
265 output = encode(begin, end, output, rest);
266 return encode_rest(output, rest);
267 }
268
269 // Encodes an entire input sequence to BASE64, which either supports
270 // begin()
271 // and end() methods returning boundaries of the sequence or the
272 // boundaries
273 // can be computed by the Boost::Range, writing it to the output
274 // iterator
275 // and stopping if the last input tree-octet quantum was not complete,
276 // in
277 // which case it stores the state for the later continuation, when
278 // another
279 // input chunk is ready for the encoding. The encoding must be finished
280 // by calling the encode_rest after processing the last chunk.
281 //
282 // Warning: Buffers identified by C-pointers are processed including
283 // their
284 // termination character, if they have any. This is unexpected at least
285 // for the storing literals, which have a specialization here to avoid
286 // it.
287 //
288 // std::vector<unsigned char> buffer = ...;
289 // std::basic_string<Char> result;
290 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
291 // base64::state<unsigned char> rest;
292 // base64::encode(buffer, appender, rest);
293 // ...
294 // base64::encode_rest(appender, rest);
295 template <typename InputRange, typename OutputIterator, typename State>
296 OutputIterator encode(InputRange const& input, OutputIterator output,
297 State& rest) {
298 return encode(std::begin(input), std::end(input), output, rest);
299 }
300
301 // Encodes an entire string literal to BASE64, writing it to the output
302 // iterator and stopping if the last input tree-octet quantum was not
303 // complete, in which case it stores the state for the later
304 // continuation,
305 // when another input chunk is ready for the encoding. The encoding
306 // must
307 // be finished by calling the encode_rest after processing the last
308 // chunk.
309 //
310 // The string literal is encoded without processing its terminating zero
311 // character, which is the usual expectation.
312 //
313 // std::basic_string<Char> result;
314 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
315 // base64::state<char> rest;
316 // base64::encode("ab", appender, rest);
317 // ...
318 // base64::encode_rest(appender, rest);
319 template <typename OutputIterator>
encode(char const * value,OutputIterator output,state<char> & rest)320 OutputIterator encode(char const* value, OutputIterator output,
321 state<char>& rest) {
322 return encode(value, value + strlen(value), output, rest);
323 }
324
325 // Encodes an entire input sequence to BASE64 writing it to the output
326 // iterator, which either supports begin() and end() methods returning
327 // boundaries of the sequence or the boundaries can be computed by the
328 // Boost::Range. If its total byte-length was not divisible by three,
329 // the output will be padded by the '=' characters. If you encode an
330 // input consisting of mutiple chunks, use the method overload
331 // maintaining
332 // the encoding state.
333 //
334 // Warning: Buffers identified by C-pointers are processed including
335 // their
336 // termination character, if they have any. This is unexpected at least
337 // for the storing literals, which have a specialization here to avoid
338 // it.
339 //
340 // std::vector<unsigned char> buffer = ...;
341 // std::basic_string<Char> result;
342 // base64::encode(buffer, std::back_inserter(result));
343 template <typename InputRange, typename OutputIterator>
encode(InputRange const & value,OutputIterator output)344 OutputIterator encode(InputRange const& value, OutputIterator output) {
345 return encode(std::begin(value), std::end(value), output);
346 }
347
348 // Encodes an entire string literal to BASE64 writing it to the output
349 // iterator. If its total length (without the trailing zero) was not
350 // divisible by three, the output will be padded by the '=' characters.
351 // If you encode an input consisting of mutiple chunks, use the method
352 // overload maintaining the encoding state.
353 //
354 // The string literal is encoded without processing its terminating zero
355 // character, which is the usual expectation.
356 //
357 // std::basic_string<Char> result;
358 // base64::encode("ab", std::back_inserter(result));
359 template <typename OutputIterator>
encode(char const * value,OutputIterator output)360 OutputIterator encode(char const* value, OutputIterator output) {
361 return encode(value, value + strlen(value), output);
362 }
363
364 // Encodes an entire input sequence to BASE64 returning the result as
365 // string, which either supports begin() and end() methods returning
366 // boundaries of the sequence or the boundaries can be computed by the
367 // Boost::Range. If its total byte-length was not divisible by three,
368 // the output will be padded by the '=' characters. If you encode an
369 // input consisting of mutiple chunks, use other method maintaining
370 // the encoding state writing to an output iterator.
371 //
372 // Warning: Buffers identified by C-pointers are processed including
373 // their
374 // termination character, if they have any. This is unexpected at least
375 // for the storing literals, which have a specialization here to avoid
376 // it.
377 //
378 // std::vector<unsigned char> buffer = ...;
379 // std::basic_string<Char> result = base64::encode<Char>(buffer);
380 template <typename Char, typename InputRange>
encode(InputRange const & value)381 std::basic_string<Char> encode(InputRange const& value) {
382 std::basic_string<Char> result;
383 encode(value, std::back_inserter(result));
384 return result;
385 }
386
387 // Encodes an entire string literal to BASE64 returning the result as
388 // string. If its total byte-length was not divisible by three, the
389 // output will be padded by the '=' characters. If you encode an
390 // input consisting of mutiple chunks, use other method maintaining
391 // the encoding state writing to an output iterator.
392 //
393 // The string literal is encoded without processing its terminating zero
394 // character, which is the usual expectation.
395 //
396 // std::basic_string<Char> result = base64::encode<Char>("ab");
397 template <typename Char>
encode(char const * value)398 std::basic_string<Char> encode(char const* value) {
399 std::basic_string<Char> result;
400 encode(value, std::back_inserter(result));
401 return result;
402 }
403
404 // The function overloads for string literals encode the input without
405 // the terminating zero, which is usually expected, because the trailing
406 // zero byte is not considered a part of the string value; the overloads
407 // for an input range would wrap the string literal by Boost.Range and
408 // encode the full memory occupated by the string literal - including
409 // the
410 // unwanted last zero byte.
411
412 } // namespace base64
413
414 } // namespace utils
415 } // namespace network
416 } // namespace boost
417
418 #endif // BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
419