1 /*
2 * base64.c: base64 encoding and decoding functions
3 *
4 * ====================================================================
5 * Licensed to the Apache Software Foundation (ASF) under one
6 * or more contributor license agreements. See the NOTICE file
7 * distributed with this work for additional information
8 * regarding copyright ownership. The ASF licenses this file
9 * to you under the Apache License, Version 2.0 (the
10 * "License"); you may not use this file except in compliance
11 * with the License. You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing,
16 * software distributed under the License is distributed on an
17 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 * KIND, either express or implied. See the License for the
19 * specific language governing permissions and limitations
20 * under the License.
21 * ====================================================================
22 */
23
24
25
26 #include <string.h>
27
28 #include <apr.h>
29 #include <apr_pools.h>
30 #include <apr_general.h> /* for APR_INLINE */
31
32 #include "svn_pools.h"
33 #include "svn_io.h"
34 #include "svn_error.h"
35 #include "svn_base64.h"
36 #include "private/svn_string_private.h"
37 #include "private/svn_subr_private.h"
38
39 /* When asked to format the base64-encoded output as multiple lines,
40 we put this many chars in each line (plus one new line char) unless
41 we run out of data.
42 It is vital for some of the optimizations below that this value is
43 a multiple of 4. */
44 #define BASE64_LINELEN 76
45
46 /* This number of bytes is encoded in a line of base64 chars. */
47 #define BYTES_PER_LINE (BASE64_LINELEN / 4 * 3)
48
49 /* Value -> base64 char mapping table (2^6 entries) */
50 static const char base64tab[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
51 "abcdefghijklmnopqrstuvwxyz0123456789+/";
52
53
54 /* Binary input --> base64-encoded output */
55
56 struct encode_baton {
57 svn_stream_t *output;
58 unsigned char buf[3]; /* Bytes waiting to be encoded */
59 size_t buflen; /* Number of bytes waiting */
60 size_t linelen; /* Bytes output so far on this line */
61 svn_boolean_t break_lines;
62 apr_pool_t *scratch_pool;
63 };
64
65
66 /* Base64-encode a group. IN needs to have three bytes and OUT needs
67 to have room for four bytes. The input group is treated as four
68 six-bit units which are treated as lookups into base64tab for the
69 bytes of the output group. */
70 static APR_INLINE void
encode_group(const unsigned char * in,char * out)71 encode_group(const unsigned char *in, char *out)
72 {
73 /* Expand input bytes to machine word length (with zero extra cost
74 on x86/x64) ... */
75 apr_size_t part0 = in[0];
76 apr_size_t part1 = in[1];
77 apr_size_t part2 = in[2];
78
79 /* ... to prevent these arithmetic operations from being limited to
80 byte size. This saves non-zero cost conversions of the result when
81 calculating the addresses within base64tab. */
82 out[0] = base64tab[part0 >> 2];
83 out[1] = base64tab[((part0 & 3) << 4) | (part1 >> 4)];
84 out[2] = base64tab[((part1 & 0xf) << 2) | (part2 >> 6)];
85 out[3] = base64tab[part2 & 0x3f];
86 }
87
88 /* Base64-encode a line, i.e. BYTES_PER_LINE bytes from DATA into
89 BASE64_LINELEN chars and append it to STR. It does not assume that
90 a new line char will be appended, though.
91 The code in this function will simply transform the data without
92 performing any boundary checks. Therefore, DATA must have at least
93 BYTES_PER_LINE left and space for at least another BASE64_LINELEN
94 chars must have been pre-allocated in STR before calling this
95 function. */
96 static void
encode_line(svn_stringbuf_t * str,const char * data)97 encode_line(svn_stringbuf_t *str, const char *data)
98 {
99 /* Translate directly from DATA to STR->DATA. */
100 const unsigned char *in = (const unsigned char *)data;
101 char *out = str->data + str->len;
102 char *end = out + BASE64_LINELEN;
103
104 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
105 a multiple of 4. */
106 for ( ; out != end; in += 3, out += 4)
107 encode_group(in, out);
108
109 /* Expand and terminate the string. */
110 *out = '\0';
111 str->len += BASE64_LINELEN;
112 }
113
114 /* (Continue to) Base64-encode the byte string DATA (of length LEN)
115 into STR. Include newlines every so often if BREAK_LINES is true.
116 INBUF, INBUFLEN, and LINELEN are used internally; the caller shall
117 make INBUF have room for three characters and initialize *INBUFLEN
118 and *LINELEN to 0.
119
120 INBUF and *INBUFLEN carry the leftover data from call to call, and
121 *LINELEN carries the length of the current output line. */
122 static void
encode_bytes(svn_stringbuf_t * str,const void * data,apr_size_t len,unsigned char * inbuf,size_t * inbuflen,size_t * linelen,svn_boolean_t break_lines)123 encode_bytes(svn_stringbuf_t *str, const void *data, apr_size_t len,
124 unsigned char *inbuf, size_t *inbuflen, size_t *linelen,
125 svn_boolean_t break_lines)
126 {
127 char group[4];
128 const char *p = data, *end = p + len;
129 apr_size_t buflen;
130
131 /* Resize the stringbuf to make room for the (approximate) size of
132 output, to avoid repeated resizes later.
133 Please note that our optimized code relies on the fact that STR
134 never needs to be resized until we leave this function. */
135 buflen = len * 4 / 3 + 4;
136 if (break_lines)
137 {
138 /* Add an extra space for line breaks. */
139 buflen += buflen / BASE64_LINELEN;
140 }
141 svn_stringbuf_ensure(str, str->len + buflen);
142
143 /* Keep encoding three-byte groups until we run out. */
144 while ((end - p) >= (3 - *inbuflen))
145 {
146 /* May we encode BYTES_PER_LINE bytes without caring about
147 line breaks, data in the temporary INBUF or running out
148 of data? */
149 if ( *inbuflen == 0
150 && (*linelen == 0 || !break_lines)
151 && (end - p >= BYTES_PER_LINE))
152 {
153 /* Yes, we can encode a whole chunk of data at once. */
154 encode_line(str, p);
155 p += BYTES_PER_LINE;
156 *linelen += BASE64_LINELEN;
157 }
158 else
159 {
160 /* No, this is one of a number of special cases.
161 Encode the data byte by byte. */
162 memcpy(inbuf + *inbuflen, p, 3 - *inbuflen);
163 p += (3 - *inbuflen);
164 encode_group(inbuf, group);
165 svn_stringbuf_appendbytes(str, group, 4);
166 *inbuflen = 0;
167 *linelen += 4;
168 }
169
170 /* Add line breaks as necessary. */
171 if (break_lines && *linelen == BASE64_LINELEN)
172 {
173 svn_stringbuf_appendbyte(str, '\n');
174 *linelen = 0;
175 }
176 }
177
178 /* Tack any extra input onto *INBUF. */
179 memcpy(inbuf + *inbuflen, p, end - p);
180 *inbuflen += (end - p);
181 }
182
183
184 /* Encode leftover data, if any, and possibly a final newline (if
185 there has been any data and BREAK_LINES is set), appending to STR.
186 LEN must be in the range 0..2. */
187 static void
encode_partial_group(svn_stringbuf_t * str,const unsigned char * extra,size_t len,size_t linelen,svn_boolean_t break_lines)188 encode_partial_group(svn_stringbuf_t *str, const unsigned char *extra,
189 size_t len, size_t linelen, svn_boolean_t break_lines)
190 {
191 unsigned char ingroup[3];
192 char outgroup[4];
193
194 if (len > 0)
195 {
196 memcpy(ingroup, extra, len);
197 memset(ingroup + len, 0, 3 - len);
198 encode_group(ingroup, outgroup);
199 memset(outgroup + (len + 1), '=', 4 - (len + 1));
200 svn_stringbuf_appendbytes(str, outgroup, 4);
201 linelen += 4;
202 }
203 if (break_lines && linelen > 0)
204 svn_stringbuf_appendbyte(str, '\n');
205 }
206
207
208 /* Write handler for svn_base64_encode. */
209 static svn_error_t *
encode_data(void * baton,const char * data,apr_size_t * len)210 encode_data(void *baton, const char *data, apr_size_t *len)
211 {
212 struct encode_baton *eb = baton;
213 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
214 apr_size_t enclen;
215 svn_error_t *err = SVN_NO_ERROR;
216
217 /* Encode this block of data and write it out. */
218 encode_bytes(encoded, data, *len, eb->buf, &eb->buflen, &eb->linelen,
219 eb->break_lines);
220 enclen = encoded->len;
221 if (enclen != 0)
222 err = svn_stream_write(eb->output, encoded->data, &enclen);
223 svn_pool_clear(eb->scratch_pool);
224 return err;
225 }
226
227
228 /* Close handler for svn_base64_encode(). */
229 static svn_error_t *
finish_encoding_data(void * baton)230 finish_encoding_data(void *baton)
231 {
232 struct encode_baton *eb = baton;
233 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(eb->scratch_pool);
234 apr_size_t enclen;
235 svn_error_t *err = SVN_NO_ERROR;
236
237 /* Encode a partial group at the end if necessary, and write it out. */
238 encode_partial_group(encoded, eb->buf, eb->buflen, eb->linelen,
239 eb->break_lines);
240 enclen = encoded->len;
241 if (enclen != 0)
242 err = svn_stream_write(eb->output, encoded->data, &enclen);
243
244 /* Pass on the close request and clean up the baton. */
245 if (err == SVN_NO_ERROR)
246 err = svn_stream_close(eb->output);
247 svn_pool_destroy(eb->scratch_pool);
248 return err;
249 }
250
251
252 svn_stream_t *
svn_base64_encode2(svn_stream_t * output,svn_boolean_t break_lines,apr_pool_t * pool)253 svn_base64_encode2(svn_stream_t *output,
254 svn_boolean_t break_lines,
255 apr_pool_t *pool)
256 {
257 struct encode_baton *eb = apr_palloc(pool, sizeof(*eb));
258 svn_stream_t *stream;
259
260 eb->output = output;
261 eb->buflen = 0;
262 eb->linelen = 0;
263 eb->break_lines = break_lines;
264 eb->scratch_pool = svn_pool_create(pool);
265 stream = svn_stream_create(eb, pool);
266 svn_stream_set_write(stream, encode_data);
267 svn_stream_set_close(stream, finish_encoding_data);
268 return stream;
269 }
270
271
272 const svn_string_t *
svn_base64_encode_string2(const svn_string_t * str,svn_boolean_t break_lines,apr_pool_t * pool)273 svn_base64_encode_string2(const svn_string_t *str,
274 svn_boolean_t break_lines,
275 apr_pool_t *pool)
276 {
277 svn_stringbuf_t *encoded = svn_stringbuf_create_empty(pool);
278 unsigned char ingroup[3];
279 size_t ingrouplen = 0;
280 size_t linelen = 0;
281
282 encode_bytes(encoded, str->data, str->len, ingroup, &ingrouplen, &linelen,
283 break_lines);
284 encode_partial_group(encoded, ingroup, ingrouplen, linelen,
285 break_lines);
286 return svn_stringbuf__morph_into_string(encoded);
287 }
288
289 const svn_string_t *
svn_base64_encode_string(const svn_string_t * str,apr_pool_t * pool)290 svn_base64_encode_string(const svn_string_t *str, apr_pool_t *pool)
291 {
292 return svn_base64_encode_string2(str, TRUE, pool);
293 }
294
295
296
297 /* Base64-encoded input --> binary output */
298
299 struct decode_baton {
300 svn_stream_t *output;
301 unsigned char buf[4]; /* Bytes waiting to be decoded */
302 int buflen; /* Number of bytes waiting */
303 svn_boolean_t done; /* True if we already saw an '=' */
304 apr_pool_t *scratch_pool;
305 };
306
307
308 /* Base64-decode a group. IN needs to have four bytes and OUT needs
309 to have room for three bytes. The input bytes must already have
310 been decoded from base64tab into the range 0..63. The four
311 six-bit values are pasted together to form three eight-bit bytes. */
312 static APR_INLINE void
decode_group(const unsigned char * in,char * out)313 decode_group(const unsigned char *in, char *out)
314 {
315 out[0] = (char)((in[0] << 2) | (in[1] >> 4));
316 out[1] = (char)(((in[1] & 0xf) << 4) | (in[2] >> 2));
317 out[2] = (char)(((in[2] & 0x3) << 6) | in[3]);
318 }
319
320 /* Lookup table for base64 characters; reverse_base64[ch] gives a
321 negative value if ch is not a valid base64 character, or otherwise
322 the value of the byte represented; 'A' => 0 etc. */
323 static const signed char reverse_base64[256] = {
324 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
325 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
326 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63,
327 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
328 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
329 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
330 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
331 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
332 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
333 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
334 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
335 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
336 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
337 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
338 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
339 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
340 };
341
342 /* Similar to decode_group but this function also translates the
343 6-bit values from the IN buffer before translating them.
344 Return FALSE if a non-base64 char (e.g. '=' or new line)
345 has been encountered. */
346 static APR_INLINE svn_boolean_t
decode_group_directly(const unsigned char * in,char * out)347 decode_group_directly(const unsigned char *in, char *out)
348 {
349 /* Translate the base64 chars in values [0..63, 0xff] */
350 apr_size_t part0 = (unsigned char)reverse_base64[(unsigned char)in[0]];
351 apr_size_t part1 = (unsigned char)reverse_base64[(unsigned char)in[1]];
352 apr_size_t part2 = (unsigned char)reverse_base64[(unsigned char)in[2]];
353 apr_size_t part3 = (unsigned char)reverse_base64[(unsigned char)in[3]];
354
355 /* Pack 4x6 bits into 3x8.*/
356 out[0] = (char)((part0 << 2) | (part1 >> 4));
357 out[1] = (char)(((part1 & 0xf) << 4) | (part2 >> 2));
358 out[2] = (char)(((part2 & 0x3) << 6) | part3);
359
360 /* FALSE, iff any part is 0xff. */
361 return (part0 | part1 | part2 | part3) != (unsigned char)(-1);
362 }
363
364 /* Base64-encode up to BASE64_LINELEN chars from *DATA and append it to
365 STR. After the function returns, *DATA will point to the first char
366 that has not been translated, yet. Returns TRUE if all BASE64_LINELEN
367 chars could be translated, i.e. no special char has been encountered
368 in between.
369 The code in this function will simply transform the data without
370 performing any boundary checks. Therefore, DATA must have at least
371 BASE64_LINELEN left and space for at least another BYTES_PER_LINE
372 chars must have been pre-allocated in STR before calling this
373 function. */
374 static svn_boolean_t
decode_line(svn_stringbuf_t * str,const char ** data)375 decode_line(svn_stringbuf_t *str, const char **data)
376 {
377 /* Decode up to BYTES_PER_LINE bytes directly from *DATA into STR->DATA. */
378 const unsigned char *p = *(const unsigned char **)data;
379 char *out = str->data + str->len;
380 char *end = out + BYTES_PER_LINE;
381
382 /* We assume that BYTES_PER_LINE is a multiple of 3 and BASE64_LINELEN
383 a multiple of 4. Stop translation as soon as we encounter a special
384 char. Leave the entire group untouched in that case. */
385 for (; out < end; p += 4, out += 3)
386 if (!decode_group_directly(p, out))
387 break;
388
389 /* Update string sizes and positions. */
390 str->len = out - str->data;
391 *out = '\0';
392 *data = (const char *)p;
393
394 /* Return FALSE, if the caller should continue the decoding process
395 using the slow standard method. */
396 return out == end;
397 }
398
399
400 /* (Continue to) Base64-decode the byte string DATA (of length LEN)
401 into STR. INBUF, INBUFLEN, and DONE are used internally; the
402 caller shall have room for four bytes in INBUF and initialize
403 *INBUFLEN to 0 and *DONE to FALSE.
404
405 INBUF and *INBUFLEN carry the leftover bytes from call to call, and
406 *DONE keeps track of whether we've seen an '=' which terminates the
407 encoded data. */
408 static void
decode_bytes(svn_stringbuf_t * str,const char * data,apr_size_t len,unsigned char * inbuf,int * inbuflen,svn_boolean_t * done)409 decode_bytes(svn_stringbuf_t *str, const char *data, apr_size_t len,
410 unsigned char *inbuf, int *inbuflen, svn_boolean_t *done)
411 {
412 const char *p = data;
413 char group[3];
414 signed char find;
415 const char *end = data + len;
416
417 /* Resize the stringbuf to make room for the maximum size of output,
418 to avoid repeated resizes later. The optimizations in
419 decode_line rely on no resizes being necessary!
420
421 (*inbuflen+len) is encoded data length
422 (*inbuflen+len)/4 is the number of complete 4-bytes sets
423 (*inbuflen+len)/4*3 is the number of decoded bytes
424 svn_stringbuf_ensure will add an additional byte for the terminating 0.
425 */
426 svn_stringbuf_ensure(str, str->len + ((*inbuflen + len) / 4) * 3);
427
428 while ( !*done && p < end )
429 {
430 /* If no data is left in temporary INBUF and there is at least
431 one line-sized chunk left to decode, we may use the optimized
432 code path. */
433 if ((*inbuflen == 0) && (end - p >= BASE64_LINELEN))
434 if (decode_line(str, &p))
435 continue;
436
437 /* A special case or decode_line encountered a special char. */
438 if (*p == '=')
439 {
440 /* We are at the end and have to decode a partial group. */
441 if (*inbuflen >= 2)
442 {
443 memset(inbuf + *inbuflen, 0, 4 - *inbuflen);
444 decode_group(inbuf, group);
445 svn_stringbuf_appendbytes(str, group, *inbuflen - 1);
446 }
447 *done = TRUE;
448 }
449 else
450 {
451 find = reverse_base64[(unsigned char)*p];
452 ++p;
453
454 if (find >= 0)
455 inbuf[(*inbuflen)++] = find;
456 if (*inbuflen == 4)
457 {
458 decode_group(inbuf, group);
459 svn_stringbuf_appendbytes(str, group, 3);
460 *inbuflen = 0;
461 }
462 }
463 }
464 }
465
466
467 /* Write handler for svn_base64_decode. */
468 static svn_error_t *
decode_data(void * baton,const char * data,apr_size_t * len)469 decode_data(void *baton, const char *data, apr_size_t *len)
470 {
471 struct decode_baton *db = baton;
472 svn_stringbuf_t *decoded;
473 apr_size_t declen;
474 svn_error_t *err = SVN_NO_ERROR;
475
476 /* Decode this block of data. */
477 decoded = svn_stringbuf_create_empty(db->scratch_pool);
478 decode_bytes(decoded, data, *len, db->buf, &db->buflen, &db->done);
479
480 /* Write the output, clean up, go home. */
481 declen = decoded->len;
482 if (declen != 0)
483 err = svn_stream_write(db->output, decoded->data, &declen);
484 svn_pool_clear(db->scratch_pool);
485 return err;
486 }
487
488
489 /* Close handler for svn_base64_decode(). */
490 static svn_error_t *
finish_decoding_data(void * baton)491 finish_decoding_data(void *baton)
492 {
493 struct decode_baton *db = baton;
494 svn_error_t *err;
495
496 /* Pass on the close request and clean up the baton. */
497 err = svn_stream_close(db->output);
498 svn_pool_destroy(db->scratch_pool);
499 return err;
500 }
501
502
503 svn_stream_t *
svn_base64_decode(svn_stream_t * output,apr_pool_t * pool)504 svn_base64_decode(svn_stream_t *output, apr_pool_t *pool)
505 {
506 struct decode_baton *db = apr_palloc(pool, sizeof(*db));
507 svn_stream_t *stream;
508
509 db->output = output;
510 db->buflen = 0;
511 db->done = FALSE;
512 db->scratch_pool = svn_pool_create(pool);
513 stream = svn_stream_create(db, pool);
514 svn_stream_set_write(stream, decode_data);
515 svn_stream_set_close(stream, finish_decoding_data);
516 return stream;
517 }
518
519
520 const svn_string_t *
svn_base64_decode_string(const svn_string_t * str,apr_pool_t * pool)521 svn_base64_decode_string(const svn_string_t *str, apr_pool_t *pool)
522 {
523 svn_stringbuf_t *decoded = svn_stringbuf_create_empty(pool);
524 unsigned char ingroup[4];
525 int ingrouplen = 0;
526 svn_boolean_t done = FALSE;
527
528 decode_bytes(decoded, str->data, str->len, ingroup, &ingrouplen, &done);
529 return svn_stringbuf__morph_into_string(decoded);
530 }
531
532
533 /* Return a base64-encoded representation of CHECKSUM, allocated in POOL.
534 If CHECKSUM->kind is not recognized, return NULL.
535 ### That 'NULL' claim was in the header file when this was public, but
536 doesn't look true in the implementation.
537
538 ### This is now only used as a new implementation of svn_base64_from_md5();
539 it would probably be safer to revert that to its old implementation. */
540 static svn_stringbuf_t *
base64_from_checksum(const svn_checksum_t * checksum,apr_pool_t * pool)541 base64_from_checksum(const svn_checksum_t *checksum, apr_pool_t *pool)
542 {
543 svn_stringbuf_t *checksum_str;
544 unsigned char ingroup[3];
545 size_t ingrouplen = 0;
546 size_t linelen = 0;
547 checksum_str = svn_stringbuf_create_empty(pool);
548
549 encode_bytes(checksum_str, checksum->digest,
550 svn_checksum_size(checksum), ingroup, &ingrouplen,
551 &linelen, TRUE);
552 encode_partial_group(checksum_str, ingroup, ingrouplen, linelen, TRUE);
553
554 /* Our base64-encoding routines append a final newline if any data
555 was created at all, so let's hack that off. */
556 if (checksum_str->len)
557 {
558 checksum_str->len--;
559 checksum_str->data[checksum_str->len] = 0;
560 }
561
562 return checksum_str;
563 }
564
565
566 svn_stringbuf_t *
svn_base64_from_md5(unsigned char digest[],apr_pool_t * pool)567 svn_base64_from_md5(unsigned char digest[], apr_pool_t *pool)
568 {
569 svn_checksum_t *checksum
570 = svn_checksum__from_digest_md5(digest, pool);
571
572 return base64_from_checksum(checksum, pool);
573 }
574