1 /*
2  * base64.c:  base64 encoding and decoding functions
3  *
4  * ====================================================================
5  *    Licensed to the Apache Software Foundation (ASF) under one
6  *    or more contributor license agreements.  See the NOTICE file
7  *    distributed with this work for additional information
8  *    regarding copyright ownership.  The ASF licenses this file
9  *    to you under the Apache License, Version 2.0 (the
10  *    "License"); you may not use this file except in compliance
11  *    with the License.  You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  *    Unless required by applicable law or agreed to in writing,
16  *    software distributed under the License is distributed on an
17  *    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18  *    KIND, either express or implied.  See the License for the
19  *    specific language governing permissions and limitations
20  *    under the License.
21  * ====================================================================
22  */
23 
24 
25 
26 #include <string.h>
27 
28 #include <apr.h>
29 #include <apr_pools.h>
30 #include <apr_general.h>        /* for APR_INLINE */
31 
32 #include "svn_pools.h"
33 #include "svn_io.h"
34 #include "svn_error.h"
35 #include "svn_base64.h"
36 #include "private/svn_string_private.h"
37 #include "private/svn_subr_private.h"
38 
39 /* When asked to format the base64-encoded output as multiple lines,
40    we put this many chars in each line (plus one new line char) unless
41    we run out of data.
42    It is vital for some of the optimizations below that this value is
43    a multiple of 4. */
44 #define BASE64_LINELEN 76
45 
46 /* This number of bytes is encoded in a line of base64 chars. */
47 #define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)
48 
49 /* Value -> base64 char mapping table (2^6 entries) */
50 static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
51                                 "abcdefghijklmnopqrstuvwxyz0123456789+/";
52 
53 
54 /* Binary input --> base64-encoded output */
55 
56 struct encode_baton {
57   svn_stream_t *output;
58   unsigned char buf[3];         /* Bytes waiting to be encoded */
59   size_t buflen;                /* Number of bytes waiting */
60   size_t linelen;               /* Bytes output so far on this line */
61   svn_boolean_t break_lines;
62   apr_pool_t *scratch_pool;
63 };
64 
65 
66 /* Base64-encode a group.  IN needs to have three bytes and OUT needs
67    to have room for four bytes.  The input group is treated as four
68    six-bit units which are treated as lookups into base64tab for the
69    bytes of the output group.  */
70 static APR_INLINE void
encode_group(const unsigned char * in,char * out)71 encode_group(const unsigned char *in, char *out)
72 {
73   /* Expand input bytes to machine word length (with zero extra cost
74      on x86/x64) ... */
75   apr_size_t part0 = in[0];
76   apr_size_t part1 = in[1];
77   apr_size_t part2 = in[2];
78 
79   /* ... to prevent these arithmetic operations from being limited to
80      byte size.  This saves non-zero cost conversions of the result when
81      calculating the addresses within base64tab. */
82   out[0] = base64tab[part0 >> 2];
83   out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
84   out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
85   out[3] = base64tab[part2 & 0x3f];
86 }
87 
88 /* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
89    BASE64_LINELEN chars and append it to STR.  It does not assume that
90    a new line char will be appended, though.
91    The code in this function will simply transform the data without
92    performing any boundary checks.  Therefore, DATA must have at least
93    BYTES_PER_LINE left and space for at least another BASE64_LINELEN
94    chars must have been pre-allocated in STR before calling this
95    function. */
96 static void
encode_line(svn_stringbuf_t * str,const char * data)97 encode_line(svn_stringbuf_t *str, const char *data)
98 {
99   /* Translate directly from DATA to STR->DATA. */
100   const unsigned char *in = (const unsigned char *)data;
101   char *out = str->data + str->len;
102   char *end = out + BASE64_LINELEN;
103 
104   /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
105      a multiple of 4. */
106   for ( ; out != end; in += 3, out += 4)
107     encode_group(in, out);
108 
109   /* Expand and terminate the string. */
110   *out = '\0';
111   str->len += BASE64_LINELEN;
112 }
113 
114 /* (Continue to) Base64-encode the byte string DATA (of length LEN)
115    into STR. Include newlines every so often if BREAK_LINES is true.
116    INBUF, INBUFLEN, and LINELEN are used internally; the caller shall
117    make INBUF have room for three characters and initialize *INBUFLEN
118    and *LINELEN to 0.
119 
120    INBUF and *INBUFLEN carry the leftover data from call to call, and
121    *LINELEN carries the length of the current output line. */
122 static void
encode_bytes(svn_stringbuf_t * str,const void * data,apr_size_t len,unsigned char * inbuf,size_t * inbuflen,size_t * linelen,svn_boolean_t break_lines)123 encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len,
124              unsigned char *inbuf, size_t *inbuflen, size_t *linelen,
125              svn_boolean_t break_lines)
126 {
127   char group[4];
128   const char *p = data, *end = p + len;
129   apr_size_t buflen;
130 
131   /* Resize the stringbuf to make room for the (approximate) size of
132      output, to avoid repeated resizes later.
133      Please note that our optimized code relies on the fact that STR
134      never needs to be resized until we leave this function. */
135   buflen = len * 4 / 3 + 4;
136   if (break_lines)
137     {
138       /* Add an extra space for line breaks. */
139       buflen += buflen / BASE64_LINELEN;
140     }
141   svn_stringbuf_ensure(str, str->len + buflen);
142 
143   /* Keep encoding three-byte groups until we run out.  */
144   while ((end - p) >= (3 - *inbuflen))
145     {
146       /* May we encode BYTES_PER_LINE bytes without caring about
147          line breaks, data in the temporary INBUF or running out
148          of data? */
149       if (   *inbuflen == 0
150           && (*linelen == 0 || !break_lines)
151           && (end - p >= BYTES_PER_LINE))
152         {
153           /* Yes, we can encode a whole chunk of data at once. */
154           encode_line(str, p);
155           p += BYTES_PER_LINE;
156           *linelen += BASE64_LINELEN;
157         }
158       else
159         {
160           /* No, this is one of a number of special cases.
161              Encode the data byte by byte. */
162           memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
163           p += (3 - *inbuflen);
164           encode_group(inbuf, group);
165           svn_stringbuf_appendbytes(str, group, 4);
166           *inbuflen = 0;
167           *linelen += 4;
168         }
169 
170       /* Add line breaks as necessary. */
171       if (break_lines && *linelen == BASE64_LINELEN)
172         {
173           svn_stringbuf_appendbyte(str, '\n');
174           *linelen = 0;
175         }
176     }
177 
178   /* Tack any extra input onto *INBUF.  */
179   memcpy(inbuf + *inbuflen, p, end - p);
180   *inbuflen += (end - p);
181 }
182 
183 
184 /* Encode leftover data, if any, and possibly a final newline (if
185    there has been any data and BREAK_LINES is set), appending to STR.
186    LEN must be in the range 0..2.  */
187 static void
encode_partial_group(svn_stringbuf_t * str,const unsigned char * extra,size_t len,size_t linelen,svn_boolean_t break_lines)188 encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra,
189                      size_t len, size_t linelen, svn_boolean_t break_lines)
190 {
191   unsigned char ingroup[3];
192   char outgroup[4];
193 
194   if (len > 0)
195     {
196       memcpy(ingroup, extra, len);
197       memset(ingroup + len, 0, 3 - len);
198       encode_group(ingroup, outgroup);
199       memset(outgroup + (len + 1), '=', 4 - (len + 1));
200       svn_stringbuf_appendbytes(str, outgroup, 4);
201       linelen += 4;
202     }
203   if (break_lines && linelen > 0)
204     svn_stringbuf_appendbyte(str, '\n');
205 }
206 
207 
208 /* Write handler for svn_base64_encode.  */
209 static svn_error_t *
encode_data(void * baton,const char * data,apr_size_t * len)210 encode_data(void *baton, const char *data, apr_size_t *len)
211 {
212   struct encode_baton *eb = baton;
213   svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
214   apr_size_t enclen;
215   svn_error_t *err = SVN_NO_ERROR;
216 
217   /* Encode this block of data and write it out.  */
218   encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen,
219                eb->break_lines);
220   enclen = encoded->len;
221   if (enclen != 0)
222     err = svn_stream_write(eb->output, encoded->data, &enclen);
223   svn_pool_clear(eb->scratch_pool);
224   return err;
225 }
226 
227 
228 /* Close handler for svn_base64_encode().  */
229 static svn_error_t *
finish_encoding_data(void * baton)230 finish_encoding_data(void *baton)
231 {
232   struct encode_baton *eb = baton;
233   svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
234   apr_size_t enclen;
235   svn_error_t *err = SVN_NO_ERROR;
236 
237   /* Encode a partial group at the end if necessary, and write it out.  */
238   encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen,
239                        eb->break_lines);
240   enclen = encoded->len;
241   if (enclen != 0)
242     err = svn_stream_write(eb->output, encoded->data, &enclen);
243 
244   /* Pass on the close request and clean up the baton.  */
245   if (err == SVN_NO_ERROR)
246     err = svn_stream_close(eb->output);
247   svn_pool_destroy(eb->scratch_pool);
248   return err;
249 }
250 
251 
252 svn_stream_t *
svn_base64_encode2(svn_stream_t * output,svn_boolean_t break_lines,apr_pool_t * pool)253 svn_base64_encode2(svn_stream_t *output,
254                    svn_boolean_t break_lines,
255                    apr_pool_t *pool)
256 {
257   struct encode_baton *eb = apr_palloc(pool, sizeof(*eb));
258   svn_stream_t *stream;
259 
260   eb->output = output;
261   eb->buflen = 0;
262   eb->linelen = 0;
263   eb->break_lines = break_lines;
264   eb->scratch_pool = svn_pool_create(pool);
265   stream = svn_stream_create(eb, pool);
266   svn_stream_set_write(stream, encode_data);
267   svn_stream_set_close(stream, finish_encoding_data);
268   return stream;
269 }
270 
271 
272 const svn_string_t *
svn_base64_encode_string2(const svn_string_t * str,svn_boolean_t break_lines,apr_pool_t * pool)273 svn_base64_encode_string2(const svn_string_t *str,
274                           svn_boolean_t break_lines,
275                           apr_pool_t *pool)
276 {
277   svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool);
278   unsigned char ingroup[3];
279   size_t ingrouplen = 0;
280   size_t linelen = 0;
281 
282   encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen,
283                break_lines);
284   encode_partial_group(encoded, ingroup, ingrouplen, linelen,
285                        break_lines);
286   return svn_stringbuf__morph_into_string(encoded);
287 }
288 
289 const svn_string_t *
svn_base64_encode_string(const svn_string_t * str,apr_pool_t * pool)290 svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool)
291 {
292   return svn_base64_encode_string2(str, TRUE, pool);
293 }
294 
295 
296 
297 /* Base64-encoded input --> binary output */
298 
299 struct decode_baton {
300   svn_stream_t *output;
301   unsigned char buf[4];         /* Bytes waiting to be decoded */
302   int buflen;                   /* Number of bytes waiting */
303   svn_boolean_t done;           /* True if we already saw an '=' */
304   apr_pool_t *scratch_pool;
305 };
306 
307 
308 /* Base64-decode a group.  IN needs to have four bytes and OUT needs
309    to have room for three bytes.  The input bytes must already have
310    been decoded from base64tab into the range 0..63.  The four
311    six-bit values are pasted together to form three eight-bit bytes.  */
312 static APR_INLINE void
decode_group(const unsigned char * in,char * out)313 decode_group(const unsigned char *in, char *out)
314 {
315   out[0] = (char)((in[0] << 2) | (in[1] >> 4));
316   out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2));
317   out[2] = (char)(((in[2] & 0x3) << 6) | in[3]);
318 }
319 
320 /* Lookup table for base64 characters; reverse_base64[ch] gives a
321    negative value if ch is not a valid base64 character, or otherwise
322    the value of the byte represented; 'A' => 0 etc. */
323 static const signed char reverse_base64[256] = {
324 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
325 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
326 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
327 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
328 -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
329 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
330 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
331 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
332 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
333 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
334 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
335 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
336 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
337 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
338 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
339 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
340 };
341 
342 /* Similar to decode_group but this function also translates the
343    6-bit values from the IN buffer before translating them.
344    Return FALSE if a non-base64 char (e.g. '=' or new line)
345    has been encountered. */
346 static APR_INLINE svn_boolean_t
decode_group_directly(const unsigned char * in,char * out)347 decode_group_directly(const unsigned char *in, char *out)
348 {
349   /* Translate the base64 chars in values [0..63, 0xff] */
350   apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
351   apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
352   apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
353   apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];
354 
355   /* Pack 4x6 bits into 3x8.*/
356   out[0] = (char)((part0 << 2) | (part1 >> 4));
357   out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
358   out[2] = (char)(((part2 & 0x3) << 6) | part3);
359 
360   /* FALSE, iff any part is 0xff. */
361   return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
362 }
363 
364 /* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
365    STR.  After the function returns, *DATA will point to the first char
366    that has not been translated, yet.  Returns TRUE if all BASE64_LINELEN
367    chars could be translated, i.e. no special char has been encountered
368    in between.
369    The code in this function will simply transform the data without
370    performing any boundary checks.  Therefore, DATA must have at least
371    BASE64_LINELEN left and space for at least another BYTES_PER_LINE
372    chars must have been pre-allocated in STR before calling this
373    function. */
374 static svn_boolean_t
decode_line(svn_stringbuf_t * str,const char ** data)375 decode_line(svn_stringbuf_t *str, const char **data)
376 {
377   /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
378   const unsigned char *p = *(const unsigned char **)data;
379   char *out = str->data + str->len;
380   char *end = out + BYTES_PER_LINE;
381 
382   /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
383      a multiple of 4.  Stop translation as soon as we encounter a special
384      char.  Leave the entire group untouched in that case. */
385   for (; out < end; p += 4, out += 3)
386     if (!decode_group_directly(p, out))
387       break;
388 
389   /* Update string sizes and positions. */
390   str->len = out - str->data;
391   *out = '\0';
392   *data = (const char *)p;
393 
394   /* Return FALSE, if the caller should continue the decoding process
395      using the slow standard method. */
396   return out == end;
397 }
398 
399 
400 /* (Continue to) Base64-decode the byte string DATA (of length LEN)
401    into STR. INBUF, INBUFLEN, and DONE are used internally; the
402    caller shall have room for four bytes in INBUF and initialize
403    *INBUFLEN to 0 and *DONE to FALSE.
404 
405    INBUF and *INBUFLEN carry the leftover bytes from call to call, and
406    *DONE keeps track of whether we've seen an '=' which terminates the
407    encoded data. */
408 static void
decode_bytes(svn_stringbuf_t * str,const char * data,apr_size_t len,unsigned char * inbuf,int * inbuflen,svn_boolean_t * done)409 decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
410              unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
411 {
412   const char *p = data;
413   char group[3];
414   signed char find;
415   const char *end = data + len;
416 
417   /* Resize the stringbuf to make room for the maximum size of output,
418      to avoid repeated resizes later.  The optimizations in
419      decode_line rely on no resizes being necessary!
420 
421      (*inbuflen+len) is encoded data length
422      (*inbuflen+len)/4 is the number of complete 4-bytes sets
423      (*inbuflen+len)/4*3 is the number of decoded bytes
424      svn_stringbuf_ensure will add an additional byte for the terminating 0.
425   */
426   svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3);
427 
428   while ( !*done && p < end )
429     {
430       /* If no data is left in temporary INBUF and there is at least
431          one line-sized chunk left to decode, we may use the optimized
432          code path. */
433       if ((*inbuflen == 0) && (end - p >= BASE64_LINELEN))
434         if (decode_line(str, &p))
435           continue;
436 
437       /* A special case or decode_line encountered a special char. */
438       if (*p == '=')
439         {
440           /* We are at the end and have to decode a partial group.  */
441           if (*inbuflen >= 2)
442             {
443               memset(inbuf + *inbuflen, 0, 4 - *inbuflen);
444               decode_group(inbuf, group);
445               svn_stringbuf_appendbytes(str, group, *inbuflen - 1);
446             }
447           *done = TRUE;
448         }
449       else
450         {
451           find = reverse_base64[(unsigned char)*p];
452           ++p;
453 
454           if (find >= 0)
455             inbuf[(*inbuflen)++] = find;
456           if (*inbuflen == 4)
457             {
458               decode_group(inbuf, group);
459               svn_stringbuf_appendbytes(str, group, 3);
460               *inbuflen = 0;
461             }
462         }
463     }
464 }
465 
466 
467 /* Write handler for svn_base64_decode.  */
468 static svn_error_t *
decode_data(void * baton,const char * data,apr_size_t * len)469 decode_data(void *baton, const char *data, apr_size_t *len)
470 {
471   struct decode_baton *db = baton;
472   svn_stringbuf_t *decoded;
473   apr_size_t declen;
474   svn_error_t *err = SVN_NO_ERROR;
475 
476   /* Decode this block of data.  */
477   decoded = svn_stringbuf_create_empty(db->scratch_pool);
478   decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done);
479 
480   /* Write the output, clean up, go home.  */
481   declen = decoded->len;
482   if (declen != 0)
483     err = svn_stream_write(db->output, decoded->data, &declen);
484   svn_pool_clear(db->scratch_pool);
485   return err;
486 }
487 
488 
489 /* Close handler for svn_base64_decode().  */
490 static svn_error_t *
finish_decoding_data(void * baton)491 finish_decoding_data(void *baton)
492 {
493   struct decode_baton *db = baton;
494   svn_error_t *err;
495 
496   /* Pass on the close request and clean up the baton.  */
497   err = svn_stream_close(db->output);
498   svn_pool_destroy(db->scratch_pool);
499   return err;
500 }
501 
502 
503 svn_stream_t *
svn_base64_decode(svn_stream_t * output,apr_pool_t * pool)504 svn_base64_decode(svn_stream_t *output, apr_pool_t *pool)
505 {
506   struct decode_baton *db = apr_palloc(pool, sizeof(*db));
507   svn_stream_t *stream;
508 
509   db->output = output;
510   db->buflen = 0;
511   db->done = FALSE;
512   db->scratch_pool = svn_pool_create(pool);
513   stream = svn_stream_create(db, pool);
514   svn_stream_set_write(stream, decode_data);
515   svn_stream_set_close(stream, finish_decoding_data);
516   return stream;
517 }
518 
519 
520 const svn_string_t *
svn_base64_decode_string(const svn_string_t * str,apr_pool_t * pool)521 svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool)
522 {
523   svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool);
524   unsigned char ingroup[4];
525   int ingrouplen = 0;
526   svn_boolean_t done = FALSE;
527 
528   decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done);
529   return svn_stringbuf__morph_into_string(decoded);
530 }
531 
532 
533 /* Return a base64-encoded representation of CHECKSUM, allocated in POOL.
534    If CHECKSUM->kind is not recognized, return NULL.
535    ### That 'NULL' claim was in the header file when this was public, but
536    doesn't look true in the implementation.
537 
538    ### This is now only used as a new implementation of svn_base64_from_md5();
539    it would probably be safer to revert that to its old implementation. */
540 static svn_stringbuf_t *
base64_from_checksum(const svn_checksum_t * checksum,apr_pool_t * pool)541 base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool)
542 {
543   svn_stringbuf_t *checksum_str;
544   unsigned char ingroup[3];
545   size_t ingrouplen = 0;
546   size_t linelen = 0;
547   checksum_str = svn_stringbuf_create_empty(pool);
548 
549   encode_bytes(checksum_str, checksum->digest,
550                svn_checksum_size(checksum), ingroup, &ingrouplen,
551                &linelen, TRUE);
552   encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE);
553 
554   /* Our base64-encoding routines append a final newline if any data
555      was created at all, so let's hack that off. */
556   if (checksum_str->len)
557     {
558       checksum_str->len--;
559       checksum_str->data[checksum_str->len] = 0;
560     }
561 
562   return checksum_str;
563 }
564 
565 
566 svn_stringbuf_t *
svn_base64_from_md5(unsigned char digest[],apr_pool_t * pool)567 svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool)
568 {
569   svn_checksum_t *checksum
570     = svn_checksum__from_digest_md5(digest, pool);
571 
572   return base64_from_checksum(checksum, pool);
573 }
574