1 #ifndef BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
2 #define BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
3 
4 #include <boost/range/begin.hpp>
5 #include <boost/range/end.hpp>
6 #include <algorithm>
7 #include <iterator>
8 #include <string>
9 
10 namespace boost {
11 namespace network {
12 namespace utils {
13 
14 // Implements a BASE64 converter working on an iterator range.
15 // If the input sequence does not end at the three-byte boundary, the last
16 // encoded value part is remembered in an encoding state to be able to
17 // continue with the next chunk; the BASE64 encoding processes the input
18 // by byte-triplets.
19 //
20 // Summarized interface:
21 //
22 // struct state<Value>  {
23 //     bool empty () const;
24 //     void clear();
25 // }
26 //
27 // OutputIterator encode(InputIterator begin, InputIterator end,
28 //                       OutputIterator output, State & rest)
29 // OutputIterator encode_rest(OutputIterator output, State & rest)
30 // OutputIterator encode(InputRange const & input, OutputIterator output,
31 //                       State & rest)
32 // OutputIterator encode(char const * value, OutputIterator output,
33 //                       state<char> & rest)
34 // std::basic_string<Char> encode(InputRange const & value, State & rest)
35 // std::basic_string<Char> encode(char const * value, state<char> & rest)
36 //
37 // OutputIterator encode(InputIterator begin, InputIterator end,
38 //                       OutputIterator output)
39 // OutputIterator encode(InputRange const & input, OutputIterator output)
40 // OutputIterator encode(char const * value, OutputIterator output)
41 // std::basic_string<Char> encode(InputRange const & value)
42 // std::basic_string<Char> encode(char const * value) {
43 //
44 // See also http://libb64.sourceforge.net, which served as inspiration.
45 // See also http://tools.ietf.org/html/rfc4648 for the specification.
46 
47 namespace base64 {
48 
49 namespace detail {
50 
51 // Picks a character from the output alphabet for another 6-bit value
52 // from the input sequence to encode.
53 template <typename Value>
encode_value(Value value)54 char encode_value(Value value) {
55   static char const encoding[] =
56       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
57       "+/";
58   return encoding[static_cast<unsigned int>(value)];
59 }
60 
61 }  // namespace detail
62 
63 // Stores the state after processing the last chunk by the encoder.  If
64 // the
65 // chunk byte-length is not divisible by three, the last (incomplete)
66 // value
67 // quantum canot be encoded right away; it has to wait for the next
68 // chunk
69 // of octets which will be processed joined (as if the trailing rest
70 // from
71 // the previous one was at its beinning).
72 template <typename Value>
73 struct state {
stateboost::network::utils::base64::state74   state() : triplet_index(0), last_encoded_value(0) {}
75 
stateboost::network::utils::base64::state76   state(state<Value> const& source)
77       : triplet_index(source.triplet_index),
78         last_encoded_value(source.last_encoded_value) {}
79 
emptyboost::network::utils::base64::state80   bool empty() const { return triplet_index == 0; }
81 
clearboost::network::utils::base64::state82   void clear() {
83     // indicate that no rest has been left in the last encoded value
84     // and no padding is needed for the encoded output
85     triplet_index = 0;
86     // the last encoded value, which may have been left from the last
87     // encoding step, must be zeroed too; it is important before the
88     // next encoding begins, because it works as a cyclic buffer and
89     // must start empty - with zero
90     last_encoded_value = 0;
91   }
92 
93  protected:
94   // number of the octet in the incomplete quantum, which has been
95   // processed the last time; 0 means that the previous quantum was
96   // complete 3 octets, 1 that just one octet was avalable and 2 that
97   // two octets were available
98   unsigned char triplet_index;
99   // the value made of the previously shifted and or-ed octets which
100   // was not completely split to 6-bit codes, because the last quantum
101   // did not stop on the boundary of three octets
102   Value last_encoded_value;
103 
104   // encoding of an input chunk needs to read and update the state
105   template <typename InputIterator, typename OutputIterator, typename State>
106   friend OutputIterator encode(InputIterator begin, InputIterator end,
107                                OutputIterator output, State& rest);
108 
109   // finishing the encoding needs to read and clear the state
110   template <typename OutputIterator, typename State>
111   friend OutputIterator encode_rest(OutputIterator output, State& rest);
112 };
113 
114 // Encodes an input sequence to BASE64 writing it to the output iterator
115 // and stopping if the last input tree-octet quantum was not complete,
116 // in
117 // which case it stores the state for the later continuation, when
118 // another
119 // input chunk is ready for the encoding.  The encoding must be finished
120 // by calling the encode_rest after processing the last chunk.
121 //
122 // std::vector<unsigned char> buffer = ...;
123 // std::basic_string<Char> result;
124 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
125 // base64::state<unsigned char> rest;
126 // base64::encode(buffer.begin(), buffer.end(), appender, rest);
127 // ...
128 // base64::encode_rest(appender, rest);
129 template <typename InputIterator, typename OutputIterator, typename State>
130 OutputIterator encode(InputIterator begin, InputIterator end,
131                       OutputIterator output, State& rest) {
132   typedef typename iterator_value<InputIterator>::type value_type;
133   // continue with the rest of the last chunk - 2 or 4 bits which
134   // are already shifted to the left and need to be or-ed with the
135   // continuing data up to the target 6 bits
136   value_type encoded_value = rest.last_encoded_value;
137   // if the previous chunk stopped at encoding the first (1) or the
138   // second
139   // (2) octet of the three-byte quantum, jump to the right place,
140   // otherwise start the loop with an empty encoded value buffer
141   switch (rest.triplet_index) {
142     // this loop processes the input sequence of bit-octets by bits,
143     // shifting the current_value (used as a cyclic buffer) left and
144     // or-ing next bits there, while pulling the bit-sextets from the
145     // high word of the current_value
146     for (value_type current_value;;) {
147       case 0:
148         // if the input sequence is empty or reached its end at the
149         // 3-byte boundary, finish with an empty encoding state
150         if (begin == end) {
151           rest.triplet_index = 0;
152           // the last encoded value is not interesting - it would not
153           // be used, because processing of the next chunk will start
154           // at the 3-byte boundary
155           rest.last_encoded_value = 0;
156           return output;
157         }
158         // read the first octet from the current triplet
159         current_value = *begin++;
160         // use just the upper 6 bits to encode it to the target alphabet
161         encoded_value = (current_value & 0xfc) >> 2;
162         *output++ = detail::encode_value(encoded_value);
163         // shift the remaining two bits up to make place for the upoming
164         // part of the next octet
165         encoded_value = (current_value & 0x03) << 4;
166       case 1:
167         // if the input sequence reached its end after the first octet
168         // from the quantum triplet, store the encoding state and finish
169         if (begin == end) {
170           rest.triplet_index = 1;
171           rest.last_encoded_value = encoded_value;
172           return output;
173         }
174         // read the second first octet from the current triplet
175         current_value = *begin++;
176         // combine the upper four bits (as the lower part) with the
177         // previous two bits to encode it to the target alphabet
178         encoded_value |= (current_value & 0xf0) >> 4;
179         *output++ = detail::encode_value(encoded_value);
180         // shift the remaining four bits up to make place for the
181         // upoming
182         // part of the next octet
183         encoded_value = (current_value & 0x0f) << 2;
184       case 2:
185         // if the input sequence reached its end after the second octet
186         // from the quantum triplet, store the encoding state and finish
187         if (begin == end) {
188           rest.triplet_index = 2;
189           rest.last_encoded_value = encoded_value;
190           return output;
191         }
192         // read the third octet from the current triplet
193         current_value = *begin++;
194         // combine the upper two bits (as the lower part) with the
195         // previous four bits to encode it to the target alphabet
196         encoded_value |= (current_value & 0xc0) >> 6;
197         *output++ = detail::encode_value(encoded_value);
198         // encode the remaining 6 bits to the target alphabet
199         encoded_value = current_value & 0x3f;
200         *output++ = detail::encode_value(encoded_value);
201     }
202   }
203   return output;
204 }
205 
206 // Finishes encoding of the previously processed chunks.  If their total
207 // byte-length was divisible by three, nothing is needed, if not, the
208 // last
209 // quantum will be encoded as if padded with zeroes, which will be
210 // indicated
211 // by appending '=' characters to the output.  This method must be
212 // always
213 // used at the end of encoding, if the previous chunks were encoded by
214 // the
215 // method overload accepting the encoding state.
216 //
217 // std::vector<unsigned char> buffer = ...;
218 // std::basic_string<Char> result;
219 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
220 // base64::state<unsigned char> rest;
221 // base64::encode(buffer.begin(), buffer.end(), appender, rest);
222 // ...
223 // base64::encode_rest(appender, rest);
224 template <typename OutputIterator, typename State>
225 OutputIterator encode_rest(OutputIterator output, State& rest) {
226   if (!rest.empty()) {
227     // process the last part of the trailing octet (either 4 or 2 bits)
228     // as if the input was padded with zeros - without or-ing the next
229     // input value to it; it has been already shifted to the left
230     *output++ = detail::encode_value(rest.last_encoded_value);
231     // at least one padding '=' will be always needed - at least two
232     // bits are missing in the finally encoded 6-bit value
233     *output++ = '=';
234     // if the last octet was the first in the triplet (the index was
235     // 1), four bits are missing in the finally encoded 6-bit value;
236     // another '=' character is needed for the another two bits
237     if (rest.triplet_index < 2) *output++ = '=';
238     // clear the state all the time to make sure that another call to
239     // the encode_rest would not cause damage; the last encoded value,
240     // which may have been left there, must be zeroed too; it is
241     // important before the next encoding begins, because it works as
242     // a cyclic buffer and must start empty - with zero
243     rest.clear();
244   }
245   return output;
246 }
247 
248 // Encodes a part of an input sequence specified by the pair of begin
249 // and
250 // end iterators.to BASE64 writing it to the output iterator. If its
251 // total
252 // byte-length was not divisible by three, the output will be padded by
253 // the
254 // '=' characters.  If you encode an input consisting of mutiple chunks,
255 // use the method overload maintaining the encoding state.
256 //
257 // std::vector<unsigned char> buffer = ...;
258 // std::basic_string<Char> result;
259 // base64::encode(buffer.begin(), buffer.end(),
260 // std::back_inserter(result));
261 template <typename InputIterator, typename OutputIterator>
encode(InputIterator begin,InputIterator end,OutputIterator output)262 OutputIterator encode(InputIterator begin, InputIterator end,
263                       OutputIterator output) {
264   state<typename iterator_value<InputIterator>::type> rest;
265   output = encode(begin, end, output, rest);
266   return encode_rest(output, rest);
267 }
268 
269 // Encodes an entire input sequence to BASE64, which either supports
270 // begin()
271 // and end() methods returning boundaries of the sequence or the
272 // boundaries
273 // can be computed by the Boost::Range, writing it to the output
274 // iterator
275 // and stopping if the last input tree-octet quantum was not complete,
276 // in
277 // which case it stores the state for the later continuation, when
278 // another
279 // input chunk is ready for the encoding.  The encoding must be finished
280 // by calling the encode_rest after processing the last chunk.
281 //
282 // Warning: Buffers identified by C-pointers are processed including
283 // their
284 // termination character, if they have any.  This is unexpected at least
285 // for the storing literals, which have a specialization here to avoid
286 // it.
287 //
288 // std::vector<unsigned char> buffer = ...;
289 // std::basic_string<Char> result;
290 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
291 // base64::state<unsigned char> rest;
292 // base64::encode(buffer, appender, rest);
293 // ...
294 // base64::encode_rest(appender, rest);
295 template <typename InputRange, typename OutputIterator, typename State>
296 OutputIterator encode(InputRange const& input, OutputIterator output,
297                       State& rest) {
298   return encode(std::begin(input), std::end(input), output, rest);
299 }
300 
301 // Encodes an entire string literal to BASE64, writing it to the output
302 // iterator and stopping if the last input tree-octet quantum was not
303 // complete, in which case it stores the state for the later
304 // continuation,
305 // when another input chunk is ready for the encoding.  The encoding
306 // must
307 // be finished by calling the encode_rest after processing the last
308 // chunk.
309 //
310 // The string literal is encoded without processing its terminating zero
311 // character, which is the usual expectation.
312 //
313 // std::basic_string<Char> result;
314 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
315 // base64::state<char> rest;
316 // base64::encode("ab", appender, rest);
317 // ...
318 // base64::encode_rest(appender, rest);
319 template <typename OutputIterator>
encode(char const * value,OutputIterator output,state<char> & rest)320 OutputIterator encode(char const* value, OutputIterator output,
321                       state<char>& rest) {
322   return encode(value, value + strlen(value), output, rest);
323 }
324 
325 // Encodes an entire input sequence to BASE64 writing it to the output
326 // iterator, which either supports begin() and end() methods returning
327 // boundaries of the sequence or the boundaries can be computed by the
328 // Boost::Range. If its total byte-length was not divisible by three,
329 // the output will be padded by the '=' characters.  If you encode an
330 // input consisting of mutiple chunks, use the method overload
331 // maintaining
332 // the encoding state.
333 //
334 // Warning: Buffers identified by C-pointers are processed including
335 // their
336 // termination character, if they have any.  This is unexpected at least
337 // for the storing literals, which have a specialization here to avoid
338 // it.
339 //
340 // std::vector<unsigned char> buffer = ...;
341 // std::basic_string<Char> result;
342 // base64::encode(buffer, std::back_inserter(result));
343 template <typename InputRange, typename OutputIterator>
encode(InputRange const & value,OutputIterator output)344 OutputIterator encode(InputRange const& value, OutputIterator output) {
345   return encode(std::begin(value), std::end(value), output);
346 }
347 
348 // Encodes an entire string literal to BASE64 writing it to the output
349 // iterator. If its total length (without the trailing zero) was not
350 // divisible by three, the output will be padded by the '=' characters.
351 // If you encode an input consisting of mutiple chunks, use the method
352 // overload maintaining the encoding state.
353 //
354 // The string literal is encoded without processing its terminating zero
355 // character, which is the usual expectation.
356 //
357 // std::basic_string<Char> result;
358 // base64::encode("ab", std::back_inserter(result));
359 template <typename OutputIterator>
encode(char const * value,OutputIterator output)360 OutputIterator encode(char const* value, OutputIterator output) {
361   return encode(value, value + strlen(value), output);
362 }
363 
364 // Encodes an entire input sequence to BASE64 returning the result as
365 // string, which either supports begin() and end() methods returning
366 // boundaries of the sequence or the boundaries can be computed by the
367 // Boost::Range. If its total byte-length was not divisible by three,
368 // the output will be padded by the '=' characters.  If you encode an
369 // input consisting of mutiple chunks, use other method maintaining
370 // the encoding state writing to an output iterator.
371 //
372 // Warning: Buffers identified by C-pointers are processed including
373 // their
374 // termination character, if they have any.  This is unexpected at least
375 // for the storing literals, which have a specialization here to avoid
376 // it.
377 //
378 // std::vector<unsigned char> buffer = ...;
379 // std::basic_string<Char> result = base64::encode<Char>(buffer);
380 template <typename Char, typename InputRange>
encode(InputRange const & value)381 std::basic_string<Char> encode(InputRange const& value) {
382   std::basic_string<Char> result;
383   encode(value, std::back_inserter(result));
384   return result;
385 }
386 
387 // Encodes an entire string literal to BASE64 returning the result as
388 // string. If its total byte-length was not divisible by three, the
389 // output will be padded by the '=' characters.  If you encode an
390 // input consisting of mutiple chunks, use other method maintaining
391 // the encoding state writing to an output iterator.
392 //
393 // The string literal is encoded without processing its terminating zero
394 // character, which is the usual expectation.
395 //
396 // std::basic_string<Char> result = base64::encode<Char>("ab");
397 template <typename Char>
encode(char const * value)398 std::basic_string<Char> encode(char const* value) {
399   std::basic_string<Char> result;
400   encode(value, std::back_inserter(result));
401   return result;
402 }
403 
404 // The function overloads for string literals encode the input without
405 // the terminating zero, which is usually expected, because the trailing
406 // zero byte is not considered a part of the string value; the overloads
407 // for an input range would wrap the string literal by Boost.Range and
408 // encode the full memory occupated by the string literal - including
409 // the
410 // unwanted last zero byte.
411 
412 }  // namespace base64
413 
414 }  // namespace utils
415 }  // namespace network
416 }  // namespace boost
417 
418 #endif  // BOOST_NETWORK_UTILS_BASE64_ENCODE_HPP
419