xref: /openbsd/lib/libkeynote/base64.c (revision db3296cf)
1 /* $OpenBSD: base64.c,v 1.9 2002/02/16 21:27:26 millert Exp $ */
2 /*
3  * Copyright (c) 1996 by Internet Software Consortium.
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
10  * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
11  * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
12  * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
13  * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
14  * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
15  * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
16  * SOFTWARE.
17  */
18 
19 /*
20  * Portions Copyright (c) 1995 by International Business Machines, Inc.
21  *
22  * International Business Machines, Inc. (hereinafter called IBM) grants
23  * permission under its copyrights to use, copy, modify, and distribute this
24  * Software with or without fee, provided that the above copyright notice and
25  * all paragraphs of this notice appear in all copies, and that the name of IBM
26  * not be used in connection with the marketing of any product incorporating
27  * the Software or modifications thereof, without specific, written prior
28  * permission.
29  *
30  * To the extent it has a right to do so, IBM grants an immunity from suit
31  * under its patents, if any, for the use, sale or manufacture of products to
32  * the extent that such products are used for performing Domain Name System
33  * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
34  * granted for any product per se or for any other function of any product.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
37  * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
38  * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
39  * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
40  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
41  * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
42  */
43 
44 #if HAVE_CONFIG_H
45 #include "config.h"
46 #endif /* HAVE_CONFIG_H */
47 
48 #include <sys/types.h>
49 #include <stdlib.h>
50 #include <stdio.h>
51 #include <ctype.h>
52 
53 #if STDC_HEADERS
54 #include <string.h>
55 #if !defined(HAVE_STRCHR)
56 #define strchr index
57 #endif /* !HAVE_STRCHR */
58 #endif /* STDC_HEADERS */
59 
60 #include "header.h"
61 #include "keynote.h"
62 
63 #if defined(HAVE___B64_NTOP)
64 int __b64_ntop(unsigned char const *, size_t, char *, size_t);
65 int __b64_pton(char const *, unsigned char *, size_t);
66 
67 int
68 kn_encode_base64(src, srclength, target, targsize)
69 unsigned char const *src;
70 unsigned int srclength;
71 char *target;
72 unsigned int targsize;
73 {
74     int i;
75 
76     i = __b64_ntop(src, srclength, target, targsize);
77     if (i == -1)
78       keynote_errno = ERROR_SYNTAX;
79     return i;
80 }
81 
82 int
83 kn_decode_base64(src, target, targsize)
84 char const *src;
85 unsigned char *target;
86 unsigned int targsize;
87 {
88     int i;
89 
90     i = __b64_pton(src, target, targsize);
91     if (i == -1)
92       keynote_errno = ERROR_SYNTAX;
93     return i;
94 }
95 #else /* HAVE___B64_NTOP */
96 #define Assert(Cond) if (!(Cond)) { keynote_errno = ERROR_SYNTAX; return -1; }
97 
98 static const char Base64[] =
99 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
100 static const char Pad64 = '=';
101 
102 /* (From RFC1521 and draft-ietf-dnssec-secext-03.txt)
103    The following encoding technique is taken from RFC 1521 by Borenstein
104    and Freed.  It is reproduced here in a slightly edited form for
105    convenience.
106 
107    A 65-character subset of US-ASCII is used, enabling 6 bits to be
108    represented per printable character. (The extra 65th character, "=",
109    is used to signify a special processing function.)
110 
111    The encoding process represents 24-bit groups of input bits as output
112    strings of 4 encoded characters. Proceeding from left to right, a
113    24-bit input group is formed by concatenating 3 8-bit input groups.
114    These 24 bits are then treated as 4 concatenated 6-bit groups, each
115    of which is translated into a single digit in the base64 alphabet.
116 
117    Each 6-bit group is used as an index into an array of 64 printable
118    characters. The character referenced by the index is placed in the
119    output string.
120 
121                          Table 1: The Base64 Alphabet
122 
123       Value Encoding  Value Encoding  Value Encoding  Value Encoding
124           0 A            17 R            34 i            51 z
125           1 B            18 S            35 j            52 0
126           2 C            19 T            36 k            53 1
127           3 D            20 U            37 l            54 2
128           4 E            21 V            38 m            55 3
129           5 F            22 W            39 n            56 4
130           6 G            23 X            40 o            57 5
131           7 H            24 Y            41 p            58 6
132           8 I            25 Z            42 q            59 7
133           9 J            26 a            43 r            60 8
134          10 K            27 b            44 s            61 9
135          11 L            28 c            45 t            62 +
136          12 M            29 d            46 u            63 /
137          13 N            30 e            47 v
138          14 O            31 f            48 w         (pad) =
139          15 P            32 g            49 x
140          16 Q            33 h            50 y
141 
142    Special processing is performed if fewer than 24 bits are available
143    at the end of the data being encoded.  A full encoding quantum is
144    always completed at the end of a quantity.  When fewer than 24 input
145    bits are available in an input group, zero bits are added (on the
146    right) to form an integral number of 6-bit groups.  Padding at the
147    end of the data is performed using the '=' character.
148 
149    Since all base64 input is an integral number of octets, only the
150          -------------------------------------------------
151    following cases can arise:
152 
153        (1) the final quantum of encoding input is an integral
154            multiple of 24 bits; here, the final unit of encoded
155 	   output will be an integral multiple of 4 characters
156 	   with no "=" padding,
157        (2) the final quantum of encoding input is exactly 8 bits;
158            here, the final unit of encoded output will be two
159 	   characters followed by two "=" padding characters, or
160        (3) the final quantum of encoding input is exactly 16 bits;
161            here, the final unit of encoded output will be three
162 	   characters followed by one "=" padding character.
163    */
164 
165 int
166 kn_encode_base64(src, srclength, target, targsize)
167 unsigned char const *src;
168 unsigned int srclength;
169 char *target;
170 unsigned int targsize;
171 {
172     unsigned int datalength = 0;
173     unsigned char input[3];
174     unsigned char output[4];
175     int i;
176 
177     keynote_errno = 0;
178     while (2 < srclength) {
179 	input[0] = *src++;
180 	input[1] = *src++;
181 	input[2] = *src++;
182 	srclength -= 3;
183 
184 	output[0] = input[0] >> 2;
185 	output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
186 	output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
187 	output[3] = input[2] & 0x3f;
188 	Assert(output[0] < 64);
189 	Assert(output[1] < 64);
190 	Assert(output[2] < 64);
191 	Assert(output[3] < 64);
192 
193 	if (datalength + 4 > targsize)
194 	{
195 	  keynote_errno = ERROR_SYNTAX;
196 	  return (-1);
197 	}
198 	target[datalength++] = Base64[output[0]];
199 	target[datalength++] = Base64[output[1]];
200 	target[datalength++] = Base64[output[2]];
201 	target[datalength++] = Base64[output[3]];
202     }
203 
204     /* Now we worry about padding. */
205     if (0 != srclength) {
206 	/* Get what's left. */
207 	input[0] = input[1] = input[2] = '\0';
208 	for (i = 0; i < srclength; i++)
209 	  input[i] = *src++;
210 
211 	output[0] = input[0] >> 2;
212 	output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
213 	output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
214 	Assert(output[0] < 64);
215 	Assert(output[1] < 64);
216 	Assert(output[2] < 64);
217 
218 	if (datalength + 4 > targsize)
219 	{
220 	  keynote_errno = ERROR_SYNTAX;
221 	  return (-1);
222 	}
223 	target[datalength++] = Base64[output[0]];
224 	target[datalength++] = Base64[output[1]];
225 	if (srclength == 1)
226 	  target[datalength++] = Pad64;
227 	else
228 	  target[datalength++] = Base64[output[2]];
229 	target[datalength++] = Pad64;
230     }
231     if (datalength >= targsize)
232     {
233       keynote_errno = ERROR_SYNTAX;
234       return (-1);
235     }
236     target[datalength] = '\0';	/* Returned value doesn't count \0. */
237     return (datalength);
238 }
239 
240 /* skips all whitespace anywhere.
241    converts characters, four at a time, starting at (or after)
242    src from base - 64 numbers into three 8 bit bytes in the target area.
243    it returns the number of data bytes stored at the target, or -1 on error.
244  */
245 
246 int
247 kn_decode_base64(src, target, targsize)
248 char const *src;
249 unsigned char *target;
250 unsigned int targsize;
251 {
252     int tarindex, state, ch;
253     char *pos;
254 
255     keynote_errno = 0;
256     state = 0;
257     tarindex = 0;
258 
259     while ((ch = *src++) != '\0') {
260 	if (isspace((int) ch))	/* Skip whitespace anywhere. */
261 	  continue;
262 
263 	if (ch == Pad64)
264 	  break;
265 
266 	pos = strchr(Base64, ch);
267 	if (pos == 0) 		/* A non-base64 character. */
268 	{
269 	  keynote_errno = ERROR_SYNTAX;
270 	  return (-1);
271 	}
272 	switch (state) {
273 	    case 0:
274 		if (target) {
275 		    if (tarindex >= targsize)
276 		    {
277 		      keynote_errno = ERROR_SYNTAX;
278 		      return (-1);
279 		    }
280 		    target[tarindex] = (pos - Base64) << 2;
281 		}
282 		state = 1;
283 		break;
284 	    case 1:
285 		if (target) {
286 		    if (tarindex + 1 >= targsize)
287 		    {
288 		      keynote_errno = ERROR_SYNTAX;
289 		      return (-1);
290 		    }
291 		    target[tarindex]   |=  (pos - Base64) >> 4;
292 		    target[tarindex+1]  = ((pos - Base64) & 0x0f)
293 					  << 4 ;
294 		}
295 		tarindex++;
296 		state = 2;
297 		break;
298 	    case 2:
299 		if (target) {
300 		    if (tarindex + 1 >= targsize)
301 		    {
302 		      keynote_errno = ERROR_SYNTAX;
303 		      return (-1);
304 		    }
305 		    target[tarindex]   |=  (pos - Base64) >> 2;
306 		    target[tarindex+1]  = ((pos - Base64) & 0x03)
307 					  << 6;
308 		}
309 		tarindex++;
310 		state = 3;
311 		break;
312 	    case 3:
313 		if (target) {
314 		    if (tarindex >= targsize)
315 		    {
316 		      keynote_errno = ERROR_SYNTAX;
317 		      return (-1);
318 		    }
319 		    target[tarindex] |= (pos - Base64);
320 		}
321 		tarindex++;
322 		state = 0;
323 		break;
324 	}
325     }
326 
327     /*
328      * We are done decoding Base-64 chars.  Let's see if we ended
329      * on a byte boundary, and/or with erroneous trailing characters.
330      */
331 
332     if (ch == Pad64) {		/* We got a pad char. */
333 	ch = *src++;		/* Skip it, get next. */
334 	switch (state) {
335 	    case 0:		/* Invalid = in first position */
336 	    case 1:		/* Invalid = in second position */
337 	 	keynote_errno = ERROR_SYNTAX;
338 		return (-1);
339 
340 	    case 2:		/* Valid, means one byte of info */
341 		/* Skip any number of spaces. */
342 		for (; ch != '\0'; ch = *src++)
343 		  if (!isspace((int) ch))
344 		    break;
345 		/* Make sure there is another trailing = sign. */
346 		if (ch != Pad64)
347 		{
348 		  keynote_errno = ERROR_SYNTAX;
349 		  return (-1);
350 		}
351 		ch = *src++;		/* Skip the = */
352 		/* Fall through to "single trailing =" case. */
353 		/* FALLTHROUGH */
354 
355 	    case 3:		/* Valid, means two bytes of info */
356 		/*
357 		 * We know this char is an =.  Is there anything but
358 		 * whitespace after it?
359 		 */
360 		for (; ch != '\0'; ch = *src++)
361 		  if (!isspace(ch))
362 		  {
363 		    keynote_errno = ERROR_SYNTAX;
364 		    return (-1);
365 		  }
366 
367 		/*
368 		 * Now make sure for cases 2 and 3 that the "extra"
369 		 * bits that slopped past the last full byte were
370 		 * zeros.  If we don't check them, they become a
371 		 * subliminal channel.
372 		 */
373 		if (target && target[tarindex] != 0)
374 		{
375 		  keynote_errno = ERROR_SYNTAX;
376 		  return (-1);
377 		}
378 	}
379     } else {
380 	/*
381 	 * We ended by seeing the end of the string.  Make sure we
382 	 * have no partial bytes lying around.
383 	 */
384 	if (state != 0)
385 	{
386 	  keynote_errno = ERROR_SYNTAX;
387 	  return (-1);
388 	}
389     }
390 
391     return (tarindex);
392 }
393 #endif /* HAVE___B64_NTOP */
394