1 #ifndef BOOST_NETWORK_UTILS_BASE64_STANDALONE_HPP
2 #define BOOST_NETWORK_UTILS_BASE64_STANDALONE_HPP
3 
4 #include <boost/range/begin.hpp>
5 #include <boost/range/end.hpp>
6 #include <algorithm>
7 #include <iterator>
8 #include <string>
9 
10 namespace boost {
11 namespace network {
12 namespace utils {
13 
14 // Implements a BASE64 converter working on an iterator range from the
15 // scratch.  If the input sequence does not end at the three-byte
16 // boundary,
17 // the last encoded value part is remembered in an encoding state to be
18 // able
19 // to continue with the next chunk; the BASE64 encoding processes the
20 // input
21 // by byte-triplets.
22 //
23 // Summarized interface:
24 //
25 // struct state<Value>  {
26 //     bool empty () const;
27 //     void clear();
28 // }
29 //
30 // OutputIterator encode(InputIterator begin, InputIterator end,
31 //                       OutputIterator output, State & rest)
32 // OutputIterator encode_rest(OutputIterator output, State & rest)
33 // OutputIterator encode(InputRange const & input, OutputIterator output,
34 //                       State & rest)
35 // OutputIterator encode(char const * value, OutputIterator output,
36 //                       state<char> & rest)
37 // std::basic_string<Char> encode(InputRange const & value, State & rest)
38 // std::basic_string<Char> encode(char const * value, state<char> & rest)
39 //
40 // OutputIterator encode(InputIterator begin, InputIterator end,
41 //                       OutputIterator output)
42 // OutputIterator encode(InputRange const & input, OutputIterator output)
43 // OutputIterator encode(char const * value, OutputIterator output)
44 // std::basic_string<Char> encode(InputRange const & value)
45 // std::basic_string<Char> encode(char const * value) {
46 
47 namespace base64_standalone {
48 
49 namespace detail {
50 
51 // Picks a character from the output alphabet for another 6-bit value
52 // from the input sequence to encode.
53 template <typename Value>
encode_value(Value value)54 char encode_value(Value value) {
55   static char const* encoding =
56       "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
57       "+/";
58   return encoding[static_cast<unsigned int>(value)];
59 }
60 
61 }  // namespace detail
62 
63 // Stores the state after processing the last chunk by the encoder.  If
64 // the
65 // chunk byte-length is not divisible by three, the last (incomplete)
66 // value
67 // quantum canot be encoded right away; it has to wait for the next
68 // chunk
69 // of octets which will be processed joined (as if the trailing rest
70 // from
71 // the previous one was at its beinning).
72 template <typename Value>
73 struct state {
stateboost::network::utils::base64_standalone::state74   state() : triplet_index(0), last_encoded_value(0) {}
75 
stateboost::network::utils::base64_standalone::state76   state(state<Value> const& source)
77       : triplet_index(source.triplet_index),
78         last_encoded_value(source.last_encoded_value) {}
79 
emptyboost::network::utils::base64_standalone::state80   bool empty() const { return triplet_index == 0; }
81 
clearboost::network::utils::base64_standalone::state82   void clear() {
83     // indicate that no rest has been left in the last encoded value
84     // and no padding is needed for the encoded output
85     triplet_index = 0;
86     // the last encoded value, which may have been left from the last
87     // encoding step, must be zeroed too; it is important before the
88     // next encoding begins, because it works as a cyclic buffer and
89     // must start empty - with zero
90     last_encoded_value = 0;
91   }
92 
padding_lengthboost::network::utils::base64_standalone::state93   unsigned short padding_length() const {
94     // the fewer octets from the triplet processed, the more characters
95     // needed as padding padding - that is why the complement here
96     return triplet_index ? 3 - triplet_index : 0;
97   }
98 
99  protected:
100   // number of the octet in the incomplete quantum, which has been
101   // processed the last time; 0 means that the previous quantum was
102   // complete 3 octets, 1 that just one octet was avalable and 2 that
103   // two octets were available
104   unsigned char triplet_index;
105   // the value made of the previously shifted and or-ed octets which
106   // was not completely split to 6-bit codes, because the last quantum
107   // did not stop on the boundary of three octets
108   Value last_encoded_value;
109 
110   // encoding of an input chunk needs to read and update the state
111   template <typename InputIterator, typename OutputIterator, typename State>
112   friend OutputIterator encode(InputIterator begin, InputIterator end,
113                                OutputIterator output, State& rest);
114 
115   // finishing the encoding needs to read and clear the state
116   template <typename OutputIterator, typename State>
117   friend OutputIterator encode_rest(OutputIterator output, State& rest);
118 };
119 
120 // Encodes an input sequence to BASE64 writing it to the output iterator
121 // and stopping if the last input tree-octet quantum was not complete,
122 // in
123 // which case it stores the state for the later continuation, when
124 // another
125 // input chunk is ready for the encoding.  The encoding must be finished
126 // by calling the encode_rest after processing the last chunk.
127 //
128 // std::vector<unsigned char> buffer = ...;
129 // std::basic_string<Char> result;
130 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
131 // base64::state<unsigned char> rest;
132 // base64::encode(buffer.begin(), buffer.end(), appender, rest);
133 // ...
134 // base64::encode_rest(appender, rest);
135 template <typename InputIterator, typename OutputIterator, typename State>
136 OutputIterator encode(InputIterator begin, InputIterator end,
137                       OutputIterator output, State& rest) {
138   typedef typename iterator_value<InputIterator>::type value_type;
139   // continue with the rest of the last chunk - 2 or 4 bits which
140   // are already shifted to the left and need to be or-ed with the
141   // continuing data up to the target 6 bits
142   value_type encoded_value = rest.last_encoded_value;
143   // if the previous chunk stopped at encoding the first (1) or the
144   // second
145   // (2) octet of the three-byte quantum, jump to the right place,
146   // otherwise start the loop with an empty encoded value buffer
147   switch (rest.triplet_index) {
148     // this loop processes the input sequence of bit-octets by bits,
149     // shifting the current_value (used as a cyclic buffer) left and
150     // or-ing next bits there, while pulling the bit-sextets from the
151     // high word of the current_value
152     for (value_type current_value;;) {
153       case 0:
154         // if the input sequence is empty or reached its end at the
155         // 3-byte boundary, finish with an empty encoding state
156         if (begin == end) {
157           rest.triplet_index = 0;
158           // the last encoded value is not interesting - it would not
159           // be used, because processing of the next chunk will start
160           // at the 3-byte boundary
161           rest.last_encoded_value = 0;
162           return output;
163         }
164         // read the first octet from the current triplet
165         current_value = *begin++;
166         // use just the upper 6 bits to encode it to the target alphabet
167         encoded_value = (current_value & 0xfc) >> 2;
168         *output++ = detail::encode_value(encoded_value);
169         // shift the remaining two bits up to make place for the upoming
170         // part of the next octet
171         encoded_value = (current_value & 0x03) << 4;
172       case 1:
173         // if the input sequence reached its end after the first octet
174         // from the quantum triplet, store the encoding state and finish
175         if (begin == end) {
176           rest.triplet_index = 1;
177           rest.last_encoded_value = encoded_value;
178           return output;
179         }
180         // read the second first octet from the current triplet
181         current_value = *begin++;
182         // combine the upper four bits (as the lower part) with the
183         // previous two bits to encode it to the target alphabet
184         encoded_value |= (current_value & 0xf0) >> 4;
185         *output++ = detail::encode_value(encoded_value);
186         // shift the remaining four bits up to make place for the
187         // upoming
188         // part of the next octet
189         encoded_value = (current_value & 0x0f) << 2;
190       case 2:
191         // if the input sequence reached its end after the second octet
192         // from the quantum triplet, store the encoding state and finish
193         if (begin == end) {
194           rest.triplet_index = 2;
195           rest.last_encoded_value = encoded_value;
196           return output;
197         }
198         // read the third octet from the current triplet
199         current_value = *begin++;
200         // combine the upper two bits (as the lower part) with the
201         // previous four bits to encode it to the target alphabet
202         encoded_value |= (current_value & 0xc0) >> 6;
203         *output++ = detail::encode_value(encoded_value);
204         // encode the remaining 6 bits to the target alphabet
205         encoded_value = current_value & 0x3f;
206         *output++ = detail::encode_value(encoded_value);
207     }
208   }
209   return output;
210 }
211 
212 // Finishes encoding of the previously processed chunks.  If their total
213 // byte-length was divisible by three, nothing is needed, if not, the
214 // last
215 // quantum will be encoded as if padded with zeroes, which will be
216 // indicated
217 // by appending '=' characters to the output.  This method must be
218 // always
219 // used at the end of encoding, if the previous chunks were encoded by
220 // the
221 // method overload accepting the encoding state.
222 //
223 // std::vector<unsigned char> buffer = ...;
224 // std::basic_string<Char> result;
225 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
226 // base64::state<unsigned char> rest;
227 // base64::encode(buffer.begin(), buffer.end(), appender, rest);
228 // ...
229 // base64::encode_rest(appender, rest);
230 template <typename OutputIterator, typename State>
231 OutputIterator encode_rest(OutputIterator output, State& rest) {
232   if (!rest.empty()) {
233     // process the last part of the trailing octet (either 4 or 2 bits)
234     // as if the input was padded with zeros - without or-ing the next
235     // input value to it; it has been already shifted to the left
236     *output++ = detail::encode_value(rest.last_encoded_value);
237     // at least one padding '=' will be always needed - at least two
238     // bits are missing in the finally encoded 6-bit value
239     *output++ = '=';
240     // if the last octet was the first in the triplet (the index was
241     // 1), four bits are missing in the finally encoded 6-bit value;
242     // another '=' character is needed for the another two bits
243     if (rest.triplet_index < 2) *output++ = '=';
244     // clear the state all the time to make sure that another call to
245     // the encode_rest would not cause damage; the last encoded value,
246     // which may have been left there, must be zeroed too; it is
247     // important before the next encoding begins, because it works as
248     // a cyclic buffer and must start empty - with zero
249     rest.clear();
250   }
251   return output;
252 }
253 
254 // Encodes a part of an input sequence specified by the pair of begin
255 // and
256 // end iterators.to BASE64 writing it to the output iterator. If its
257 // total
258 // byte-length was not divisible by three, the output will be padded by
259 // the
260 // '=' characters.  If you encode an input consisting of mutiple chunks,
261 // use the method overload maintaining the encoding state.
262 //
263 // std::vector<unsigned char> buffer = ...;
264 // std::basic_string<Char> result;
265 // base64::encode(buffer.begin(), buffer.end(),
266 // std::back_inserter(result));
267 template <typename InputIterator, typename OutputIterator>
encode(InputIterator begin,InputIterator end,OutputIterator output)268 OutputIterator encode(InputIterator begin, InputIterator end,
269                       OutputIterator output) {
270   state<typename iterator_value<InputIterator>::type> rest;
271   output = encode(begin, end, output, rest);
272   return encode_rest(output, rest);
273 }
274 
275 // Encodes an entire input sequence to BASE64, which either supports
276 // begin()
277 // and end() methods returning boundaries of the sequence or the
278 // boundaries
279 // can be computed by the Boost::Range, writing it to the output
280 // iterator
281 // and stopping if the last input tree-octet quantum was not complete,
282 // in
283 // which case it stores the state for the later continuation, when
284 // another
285 // input chunk is ready for the encoding.  The encoding must be finished
286 // by calling the encode_rest after processing the last chunk.
287 //
288 // Warning: Buffers identified by C-pointers are processed including
289 // their
290 // termination character, if they have any.  This is unexpected at least
291 // for the storing literals, which have a specialization here to avoid
292 // it.
293 //
294 // std::vector<unsigned char> buffer = ...;
295 // std::basic_string<Char> result;
296 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
297 // base64::state<unsigned char> rest;
298 // base64::encode(buffer, appender, rest);
299 // ...
300 // base64::encode_rest(appender, rest);
301 template <typename InputRange, typename OutputIterator, typename State>
302 OutputIterator encode(InputRange const& input, OutputIterator output,
303                       State& rest) {
304   return encode(boost::begin(input), boost::end(input), output, rest);
305 }
306 
307 // Encodes an entire string literal to BASE64, writing it to the output
308 // iterator and stopping if the last input tree-octet quantum was not
309 // complete, in which case it stores the state for the later
310 // continuation,
311 // when another input chunk is ready for the encoding.  The encoding
312 // must
313 // be finished by calling the encode_rest after processing the last
314 // chunk.
315 //
316 // The string literal is encoded without processing its terminating zero
317 // character, which is the usual expectation.
318 //
319 // std::basic_string<Char> result;
320 // std::back_insert_iterator<std::basic_string<Char> > appender(result);
321 // base64::state<char> rest;
322 // base64::encode("ab", appender, rest);
323 // ...
324 // base64::encode_rest(appender, rest);
325 template <typename OutputIterator>
encode(char const * value,OutputIterator output,state<char> & rest)326 OutputIterator encode(char const* value, OutputIterator output,
327                       state<char>& rest) {
328   return encode(value, value + strlen(value), output, rest);
329 }
330 
331 // Encodes an entire input sequence to BASE64 writing it to the output
332 // iterator, which either supports begin() and end() methods returning
333 // boundaries of the sequence or the boundaries can be computed by the
334 // Boost::Range. If its total byte-length was not divisible by three,
335 // the output will be padded by the '=' characters.  If you encode an
336 // input consisting of mutiple chunks, use the method overload
337 // maintaining
338 // the encoding state.
339 //
340 // Warning: Buffers identified by C-pointers are processed including
341 // their
342 // termination character, if they have any.  This is unexpected at least
343 // for the storing literals, which have a specialization here to avoid
344 // it.
345 //
346 // std::vector<unsigned char> buffer = ...;
347 // std::basic_string<Char> result;
348 // base64::encode(buffer, std::back_inserter(result));
349 template <typename InputRange, typename OutputIterator>
encode(InputRange const & value,OutputIterator output)350 OutputIterator encode(InputRange const& value, OutputIterator output) {
351   return encode(boost::begin(value), boost::end(value), output);
352 }
353 
354 // Encodes an entire string literal to BASE64 writing it to the output
355 // iterator. If its total length (without the trailing zero) was not
356 // divisible by three, the output will be padded by the '=' characters.
357 // If you encode an input consisting of mutiple chunks, use the method
358 // overload maintaining the encoding state.
359 //
360 // The string literal is encoded without processing its terminating zero
361 // character, which is the usual expectation.
362 //
363 // std::basic_string<Char> result;
364 // base64::encode("ab", std::back_inserter(result));
365 template <typename OutputIterator>
encode(char const * value,OutputIterator output)366 OutputIterator encode(char const* value, OutputIterator output) {
367   return encode(value, value + strlen(value), output);
368 }
369 
370 // Encodes an entire input sequence to BASE64 returning the result as
371 // string, which either supports begin() and end() methods returning
372 // boundaries of the sequence or the boundaries can be computed by the
373 // Boost::Range. If its total byte-length was not divisible by three,
374 // the output will be padded by the '=' characters.  If you encode an
375 // input consisting of mutiple chunks, use other method maintaining
376 // the encoding state writing to an output iterator.
377 //
378 // Warning: Buffers identified by C-pointers are processed including
379 // their
380 // termination character, if they have any.  This is unexpected at least
381 // for the storing literals, which have a specialization here to avoid
382 // it.
383 //
384 // std::vector<unsigned char> buffer = ...;
385 // std::basic_string<Char> result = base64::encode<Char>(buffer);
386 template <typename Char, typename InputRange>
encode(InputRange const & value)387 std::basic_string<Char> encode(InputRange const& value) {
388   std::basic_string<Char> result;
389   encode(value, std::back_inserter(result));
390   return result;
391 }
392 
393 // Encodes an entire string literal to BASE64 returning the result as
394 // string. If its total byte-length was not divisible by three, the
395 // output will be padded by the '=' characters.  If you encode an
396 // input consisting of mutiple chunks, use other method maintaining
397 // the encoding state writing to an output iterator.
398 //
399 // The string literal is encoded without processing its terminating zero
400 // character, which is the usual expectation.
401 //
402 // std::basic_string<Char> result = base64::encode<Char>("ab");
403 template <typename Char>
encode(char const * value)404 std::basic_string<Char> encode(char const* value) {
405   std::basic_string<Char> result;
406   encode(value, std::back_inserter(result));
407   return result;
408 }
409 
410 // the function overloads for string literals encode the input without
411 // the terminating zero, which is usually expected, because the trailing
412 // zero byte is not considered a part of the string value; the overloads
413 // foran input range would wrap the string literal by Boost.Range and
414 // encodethe full memory occupated by the string literal - including the
415 // unwanted last zero byte
416 
417 }  // namespace base64_standalone
418 
419 }  // namespace utils
420 }  // namespace network
421 }  // namespace boost
422 
423 #endif  // BOOST_NETWORK_UTILS_BASE64_STANDALONE_HPP
424