1 /*
2  * Copyright (C) 2009 J.A.Bezemer@opensourcepartners.nl
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 
17 /*
18  * raw	76543210 76543210 76543210 76543210 76543210 76543210 76543210
19  * enc	65432106 54321065 43210654 32106543 21065432 10654321 06543210
20  *	^      ^       ^       ^       ^       ^       ^       ^
21  *
22  *	0001 1  0001 1
23  *	0011 3  0011 3
24  *	0111 7  0111 7
25  *	1111 f  0110 6
26  *	1110 e  0100 4
27  *	1100 c
28  *	1000 8
29  */
30 
31 #include <stdio.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #include "encoding.h"
36 #include "base128.h"
37 
38 #define BLKSIZE_RAW 7
39 #define BLKSIZE_ENC 8
40 
41 /* Don't use '-' (restricted to middle of labels), prefer iso_8859-1
42  * accent chars since they might readily be entered in normal use,
43  * don't use 254-255 because of possible function overloading in DNS systems.
44  */
45 static const unsigned char cb128[] =
46 	"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
47 	"\274\275\276\277"
48 	"\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317"
49 	"\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337"
50 	"\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357"
51 	"\360\361\362\363\364\365\366\367\370\371\372\373\374\375";
52 static unsigned char rev128[256];
53 static int reverse_init = 0;
54 
55 static int base128_encode(char *, size_t *, const void *, size_t);
56 static int base128_decode(void *, size_t *, const char *, size_t);
57 static int base128_handles_dots();
58 static int base128_blksize_raw();
59 static int base128_blksize_enc();
60 
61 static struct encoder base128_encoder =
62 {
63 	"Base128",
64 	base128_encode,
65 	base128_decode,
66 	base128_handles_dots,
67 	base128_handles_dots,
68 	base128_blksize_raw,
69 	base128_blksize_enc
70 };
71 
72 struct encoder
get_base128_encoder()73 *get_base128_encoder()
74 {
75 	return &base128_encoder;
76 }
77 
78 static int
base128_handles_dots()79 base128_handles_dots()
80 {
81 	return 0;
82 }
83 
84 static int
base128_blksize_raw()85 base128_blksize_raw()
86 {
87 	return BLKSIZE_RAW;
88 }
89 
90 static int
base128_blksize_enc()91 base128_blksize_enc()
92 {
93 	return BLKSIZE_ENC;
94 }
95 
96 inline static void
base128_reverse_init()97 base128_reverse_init()
98 {
99 	int i;
100 	unsigned char c;
101 
102 	if (!reverse_init) {
103 		memset (rev128, 0, 256);
104 		for (i = 0; i < 128; i++) {
105 			c = cb128[i];
106 			rev128[(int) c] = i;
107 		}
108 		reverse_init = 1;
109 	}
110 }
111 
112 static int
base128_encode(char * buf,size_t * buflen,const void * data,size_t size)113 base128_encode(char *buf, size_t *buflen, const void *data, size_t size)
114 /*
115  * Fills *buf with max. *buflen characters, encoding size bytes of *data.
116  *
117  * NOTE: *buf space should be at least 1 byte _more_ than *buflen
118  * to hold the trailing '\0'.
119  *
120  * return value    : #bytes filled in buf   (excluding \0)
121  * sets *buflen to : #bytes encoded from data
122  */
123 {
124 	unsigned char *ubuf = (unsigned char *) buf;
125 	unsigned char *udata = (unsigned char *) data;
126 	int iout = 0;	/* to-be-filled output char */
127 	int iin = 0;	/* one more than last input byte that can be
128 			   successfully decoded */
129 
130 	/* Note: Don't bother to optimize manually. GCC optimizes
131 	   better(!) when using simplistic array indexing. */
132 
133 	while (1) {
134 		if (iout >= *buflen || iin >= size)
135 			break;
136 		ubuf[iout] = cb128[((udata[iin] & 0xfe) >> 1)];
137 		iout++;
138 
139 		if (iout >= *buflen || iin >= size) {
140 			iout--; 	/* previous char is useless */
141 			break;
142 		}
143 		ubuf[iout] = cb128[((udata[iin] & 0x01) << 6) |
144 				   ((iin + 1 < size) ?
145 				    ((udata[iin + 1] & 0xfc) >> 2) : 0)];
146 		iin++;			/* 0 complete, iin=1 */
147 		iout++;
148 
149 		if (iout >= *buflen || iin >= size)
150 			break;
151 		ubuf[iout] = cb128[((udata[iin] & 0x03) << 5) |
152 				   ((iin + 1 < size) ?
153 				    ((udata[iin + 1] & 0xf8) >> 3) : 0)];
154 		iin++;			/* 1 complete, iin=2 */
155 		iout++;
156 
157 		if (iout >= *buflen || iin >= size)
158 			break;
159 		ubuf[iout] = cb128[((udata[iin] & 0x07) << 4) |
160 				   ((iin + 1 < size) ?
161 				    ((udata[iin + 1] & 0xf0) >> 4) : 0)];
162 		iin++;			/* 2 complete, iin=3 */
163 		iout++;
164 
165 		if (iout >= *buflen || iin >= size)
166 			break;
167 		ubuf[iout] = cb128[((udata[iin] & 0x0f) << 3) |
168 				   ((iin + 1 < size) ?
169 				    ((udata[iin + 1] & 0xe0) >> 5) : 0)];
170 		iin++;			/* 3 complete, iin=4 */
171 		iout++;
172 
173 		if (iout >= *buflen || iin >= size)
174 			break;
175 		ubuf[iout] = cb128[((udata[iin] & 0x1f) << 2) |
176 				   ((iin + 1 < size) ?
177 				    ((udata[iin + 1] & 0xc0) >> 6) : 0)];
178 		iin++;			/* 4 complete, iin=5 */
179 		iout++;
180 
181 		if (iout >= *buflen || iin >= size)
182 			break;
183 		ubuf[iout] = cb128[((udata[iin] & 0x3f) << 1) |
184 				   ((iin + 1 < size) ?
185 				    ((udata[iin + 1] & 0x80) >> 7) : 0)];
186 		iin++;			/* 5 complete, iin=6 */
187 		iout++;
188 
189 		if (iout >= *buflen || iin >= size)
190 			break;
191 		ubuf[iout] = cb128[(udata[iin] & 0x7f)];
192 		iin++;			/* 6 complete, iin=7 */
193 		iout++;
194 	}
195 
196 	ubuf[iout] = '\0';
197 
198 	/* store number of bytes from data that was used */
199 	*buflen = iin;
200 
201 	return iout;
202 }
203 
204 #define REV128(x) rev128[(int) (x)]
205 
206 static int
base128_decode(void * buf,size_t * buflen,const char * str,size_t slen)207 base128_decode(void *buf, size_t *buflen, const char *str, size_t slen)
208 /*
209  * Fills *buf with max. *buflen bytes, decoded from slen chars in *str.
210  * Decoding stops early when *str contains \0.
211  * Illegal encoded chars are assumed to decode to zero.
212  *
213  * NOTE: *buf space should be at least 1 byte _more_ than *buflen
214  * to hold a trailing '\0' that is added (though *buf will usually
215  * contain full-binary data).
216  *
217  * return value    : #bytes filled in buf   (excluding \0)
218  */
219 {
220 	unsigned char *ustr = (unsigned char *) str;
221 	unsigned char *ubuf = (unsigned char *) buf;
222 	int iout = 0;	/* to-be-filled output byte */
223 	int iin = 0;	/* next input char to use in decoding */
224 
225 	base128_reverse_init ();
226 
227 	/* Note: Don't bother to optimize manually. GCC optimizes
228 	   better(!) when using simplistic array indexing. */
229 
230 	while (1) {
231 		if (iout >= *buflen || iin + 1 >= slen ||
232 		    str[iin] == '\0' || str[iin + 1] == '\0')
233 			break;
234 		ubuf[iout] = ((REV128(ustr[iin]) & 0x7f) << 1) |
235 			     ((REV128(ustr[iin + 1]) & 0x40) >> 6);
236 		iin++;  		/* 0 used up, iin=1 */
237 		iout++;
238 
239 		if (iout >= *buflen || iin + 1 >= slen ||
240 		    str[iin] == '\0' || str[iin + 1] == '\0')
241 			break;
242 		ubuf[iout] = ((REV128(ustr[iin]) & 0x3f) << 2) |
243 			     ((REV128(ustr[iin + 1]) & 0x60) >> 5);
244 		iin++;  		/* 1 used up, iin=2 */
245 		iout++;
246 
247 		if (iout >= *buflen || iin + 1 >= slen ||
248 		    str[iin] == '\0' || str[iin + 1] == '\0')
249 			break;
250 		ubuf[iout] = ((REV128(ustr[iin]) & 0x1f) << 3) |
251 			     ((REV128(ustr[iin + 1]) & 0x70) >> 4);
252 		iin++;  		/* 2 used up, iin=3 */
253 		iout++;
254 
255 		if (iout >= *buflen || iin + 1 >= slen ||
256 		    str[iin] == '\0' || str[iin + 1] == '\0')
257 			break;
258 		ubuf[iout] = ((REV128(ustr[iin]) & 0x0f) << 4) |
259 			     ((REV128(ustr[iin + 1]) & 0x78) >> 3);
260 		iin++;  		/* 3 used up, iin=4 */
261 		iout++;
262 
263 		if (iout >= *buflen || iin + 1 >= slen ||
264 		    str[iin] == '\0' || str[iin + 1] == '\0')
265 			break;
266 		ubuf[iout] = ((REV128(ustr[iin]) & 0x07) << 5) |
267 			     ((REV128(ustr[iin + 1]) & 0x7c) >> 2);
268 		iin++;  		/* 4 used up, iin=5 */
269 		iout++;
270 
271 		if (iout >= *buflen || iin + 1 >= slen ||
272 		    str[iin] == '\0' || str[iin + 1] == '\0')
273 			break;
274 		ubuf[iout] = ((REV128(ustr[iin]) & 0x03) << 6) |
275 			     ((REV128(ustr[iin + 1]) & 0x7e) >> 1);
276 		iin++;  		/* 5 used up, iin=6 */
277 		iout++;
278 
279 		if (iout >= *buflen || iin + 1 >= slen ||
280 		    str[iin] == '\0' || str[iin + 1] == '\0')
281 			break;
282 		ubuf[iout] = ((REV128(ustr[iin]) & 0x01) << 7) |
283 			     ((REV128(ustr[iin + 1]) & 0x7f));
284 		iin += 2;  		/* 6,7 used up, iin=8 */
285 		iout++;
286 	}
287 
288 	ubuf[iout] = '\0';
289 
290 	return iout;
291 }
292