1 /*
2 Copyright 2011-2012 David Anderson. All rights reserved.
3 Portions Copyright 2012 SN Systems Ltd. All rights reserved.
4
5 This program is free software; you can redistribute it and/or modify it
6 under the terms of version 2 of the GNU General Public License as
7 published by the Free Software Foundation.
8
9 This program is distributed in the hope that it would be useful, but
10 WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12
13 Further, this software is distributed without any warranty that it is
14 free of the rightful claim of any third person regarding infringement
15 or the like. Any license provided herein, whether implied or
16 otherwise, applies only to this software file. Patent licenses, if
17 any, provided herein do not apply to combinations of this program with
18 other software, or any other product whatsoever.
19
20 You should have received a copy of the GNU General Public License along
21 with this program; if not, write the Free Software Foundation, Inc., 51
22 Franklin Street - Fifth Floor, Boston MA 02110-1301, USA.
23
24 */
25
26 #include "globals.h"
27 #include "esb.h"
28 #include "uri.h"
29 #include <stdio.h>
30 #include <ctype.h>
31
32 /* dwarfdump_ctype table. See uritablebuild.c */
33 static char dwarfdump_ctype_table[256] = {
34 0, /* NUL 0x00 */
35 0, /* control 0x01 */
36 0, /* control 0x02 */
37 0, /* control 0x03 */
38 0, /* control 0x04 */
39 0, /* control 0x05 */
40 0, /* control 0x06 */
41 0, /* control 0x07 */
42 0, /* control 0x08 */
43 0, /* whitespace 0x09 */
44 0, /* whitespace 0x0a */
45 0, /* whitespace 0x0b */
46 0, /* whitespace 0x0c */
47 0, /* whitespace 0x0d */
48 0, /* control 0x0e */
49 0, /* control 0x0f */
50 0, /* control 0x10 */
51 0, /* control 0x11 */
52 0, /* control 0x12 */
53 0, /* control 0x13 */
54 0, /* control 0x14 */
55 0, /* control 0x15 */
56 0, /* control 0x16 */
57 0, /* control 0x17 */
58 0, /* control 0x18 */
59 0, /* control 0x19 */
60 0, /* control 0x1a */
61 0, /* control 0x1b */
62 0, /* control 0x1c */
63 0, /* control 0x1d */
64 0, /* control 0x1e */
65 0, /* control 0x1f */
66 1, /* ' ' 0x20 */
67 1, /* '!' 0x21 */
68 0, /* '"' 0x22 */
69 1, /* '#' 0x23 */
70 1, /* '$' 0x24 */
71 0, /* '%' 0x25 */
72 1, /* '&' 0x26 */
73 0, /* ''' 0x27 */
74 1, /* '(' 0x28 */
75 1, /* ')' 0x29 */
76 1, /* '*' 0x2a */
77 1, /* '+' 0x2b */
78 1, /* ',' 0x2c */
79 1, /* '-' 0x2d */
80 1, /* '.' 0x2e */
81 1, /* '/' 0x2f */
82 1, /* '0' 0x30 */
83 1, /* '1' 0x31 */
84 1, /* '2' 0x32 */
85 1, /* '3' 0x33 */
86 1, /* '4' 0x34 */
87 1, /* '5' 0x35 */
88 1, /* '6' 0x36 */
89 1, /* '7' 0x37 */
90 1, /* '8' 0x38 */
91 1, /* '9' 0x39 */
92 1, /* ':' 0x3a */
93 0, /* ';' 0x3b */
94 1, /* '<' 0x3c */
95 1, /* '=' 0x3d */
96 1, /* '>' 0x3e */
97 1, /* '?' 0x3f */
98 1, /* '@' 0x40 */
99 1, /* 'A' 0x41 */
100 1, /* 'B' 0x42 */
101 1, /* 'C' 0x43 */
102 1, /* 'D' 0x44 */
103 1, /* 'E' 0x45 */
104 1, /* 'F' 0x46 */
105 1, /* 'G' 0x47 */
106 1, /* 'H' 0x48 */
107 1, /* 'I' 0x49 */
108 1, /* 'J' 0x4a */
109 1, /* 'K' 0x4b */
110 1, /* 'L' 0x4c */
111 1, /* 'M' 0x4d */
112 1, /* 'N' 0x4e */
113 1, /* 'O' 0x4f */
114 1, /* 'P' 0x50 */
115 1, /* 'Q' 0x51 */
116 1, /* 'R' 0x52 */
117 1, /* 'S' 0x53 */
118 1, /* 'T' 0x54 */
119 1, /* 'U' 0x55 */
120 1, /* 'V' 0x56 */
121 1, /* 'W' 0x57 */
122 1, /* 'X' 0x58 */
123 1, /* 'Y' 0x59 */
124 1, /* 'Z' 0x5a */
125 1, /* '[' 0x5b */
126 1, /* '\' 0x5c */
127 1, /* ']' 0x5d */
128 1, /* '^' 0x5e */
129 1, /* '_' 0x5f */
130 0, /* '`' 0x60 */
131 1, /* 'a' 0x61 */
132 1, /* 'b' 0x62 */
133 1, /* 'c' 0x63 */
134 1, /* 'd' 0x64 */
135 1, /* 'e' 0x65 */
136 1, /* 'f' 0x66 */
137 1, /* 'g' 0x67 */
138 1, /* 'h' 0x68 */
139 1, /* 'i' 0x69 */
140 1, /* 'j' 0x6a */
141 1, /* 'k' 0x6b */
142 1, /* 'l' 0x6c */
143 1, /* 'm' 0x6d */
144 1, /* 'n' 0x6e */
145 1, /* 'o' 0x6f */
146 1, /* 'p' 0x70 */
147 1, /* 'q' 0x71 */
148 1, /* 'r' 0x72 */
149 1, /* 's' 0x73 */
150 1, /* 't' 0x74 */
151 1, /* 'u' 0x75 */
152 1, /* 'v' 0x76 */
153 1, /* 'w' 0x77 */
154 1, /* 'x' 0x78 */
155 1, /* 'y' 0x79 */
156 1, /* 'z' 0x7a */
157 1, /* '{' 0x7b */
158 1, /* '|' 0x7c */
159 1, /* '}' 0x7d */
160 1, /* '~' 0x7e */
161 0, /* DEL 0x7f */
162 1, /* 0x80 */
163 1, /* 0x81 */
164 1, /* 0x82 */
165 1, /* 0x83 */
166 1, /* 0x84 */
167 1, /* 0x85 */
168 1, /* 0x86 */
169 1, /* 0x87 */
170 1, /* 0x88 */
171 1, /* 0x89 */
172 1, /* 0x8a */
173 1, /* 0x8b */
174 1, /* 0x8c */
175 1, /* 0x8d */
176 1, /* 0x8e */
177 1, /* 0x8f */
178 1, /* 0x90 */
179 1, /* 0x91 */
180 1, /* 0x92 */
181 1, /* 0x93 */
182 1, /* 0x94 */
183 1, /* 0x95 */
184 1, /* 0x96 */
185 1, /* 0x97 */
186 1, /* 0x98 */
187 1, /* 0x99 */
188 1, /* 0x9a */
189 1, /* 0x9b */
190 1, /* 0x9c */
191 1, /* 0x9d */
192 1, /* 0x9e */
193 1, /* 0x9f */
194 0, /* other: 0xa0 */
195 1, /* 0xa1 */
196 1, /* 0xa2 */
197 1, /* 0xa3 */
198 1, /* 0xa4 */
199 1, /* 0xa5 */
200 1, /* 0xa6 */
201 1, /* 0xa7 */
202 1, /* 0xa8 */
203 1, /* 0xa9 */
204 1, /* 0xaa */
205 1, /* 0xab */
206 1, /* 0xac */
207 1, /* 0xad */
208 1, /* 0xae */
209 1, /* 0xaf */
210 1, /* 0xb0 */
211 1, /* 0xb1 */
212 1, /* 0xb2 */
213 1, /* 0xb3 */
214 1, /* 0xb4 */
215 1, /* 0xb5 */
216 1, /* 0xb6 */
217 1, /* 0xb7 */
218 1, /* 0xb8 */
219 1, /* 0xb9 */
220 1, /* 0xba */
221 1, /* 0xbb */
222 1, /* 0xbc */
223 1, /* 0xbd */
224 1, /* 0xbe */
225 1, /* 0xbf */
226 1, /* 0xc0 */
227 1, /* 0xc1 */
228 1, /* 0xc2 */
229 1, /* 0xc3 */
230 1, /* 0xc4 */
231 1, /* 0xc5 */
232 1, /* 0xc6 */
233 1, /* 0xc7 */
234 1, /* 0xc8 */
235 1, /* 0xc9 */
236 1, /* 0xca */
237 1, /* 0xcb */
238 1, /* 0xcc */
239 1, /* 0xcd */
240 1, /* 0xce */
241 1, /* 0xcf */
242 1, /* 0xd0 */
243 1, /* 0xd1 */
244 1, /* 0xd2 */
245 1, /* 0xd3 */
246 1, /* 0xd4 */
247 1, /* 0xd5 */
248 1, /* 0xd6 */
249 1, /* 0xd7 */
250 1, /* 0xd8 */
251 1, /* 0xd9 */
252 1, /* 0xda */
253 1, /* 0xdb */
254 1, /* 0xdc */
255 1, /* 0xdd */
256 1, /* 0xde */
257 1, /* 0xdf */
258 1, /* 0xe0 */
259 1, /* 0xe1 */
260 1, /* 0xe2 */
261 1, /* 0xe3 */
262 1, /* 0xe4 */
263 1, /* 0xe5 */
264 1, /* 0xe6 */
265 1, /* 0xe7 */
266 1, /* 0xe8 */
267 1, /* 0xe9 */
268 1, /* 0xea */
269 1, /* 0xeb */
270 1, /* 0xec */
271 1, /* 0xed */
272 1, /* 0xee */
273 1, /* 0xef */
274 1, /* 0xf0 */
275 1, /* 0xf1 */
276 1, /* 0xf2 */
277 1, /* 0xf3 */
278 1, /* 0xf4 */
279 1, /* 0xf5 */
280 1, /* 0xf6 */
281 1, /* 0xf7 */
282 1, /* 0xf8 */
283 1, /* 0xf9 */
284 1, /* 0xfa */
285 1, /* 0xfb */
286 1, /* 0xfc */
287 1, /* 0xfd */
288 1, /* 0xfe */
289 0, /* other: 0xff */
290 };
291 static char *
xchar(int c,char * buf,int size)292 xchar(int c, char *buf, int size)
293 {
294 snprintf(buf, size,"%%%02x",c);
295 return buf;
296 }
297
298 /* Translate dangerous and some other characters to safe
299 %xx form.
300 */
301 void
translate_to_uri(const char * filename,struct esb_s * out)302 translate_to_uri(const char * filename, struct esb_s *out)
303 {
304 char buf[8];
305 const char *cp = 0;
306 for (cp = filename ; *cp; ++cp) {
307 char v[2];
308 int c = 0xff & (unsigned char)*cp;
309 if (dwarfdump_ctype_table[c]) {
310 v[0] = c;
311 v[1] = 0;
312 esb_append(out,v);
313 } else {
314 char *b = xchar(c,buf,sizeof(buf));
315 esb_append(out,b);
316 }
317 }
318 }
319
320 /* This is not very efficient, but it is seldom called. */
321 static char
hexdig(char c)322 hexdig(char c)
323 {
324 char ochar = 0;
325 if (c >= '0' && c <= '9') {
326 ochar = (c - '0');
327 return ochar;
328 }
329 if (c >= 'a' && c <= 'f') {
330 ochar = (c - 'a')+10;
331 return ochar;
332 }
333 if (c >= 'A' && c <= 'F') {
334 ochar = (c - 'A')+10;
335 return ochar;
336 }
337 /* We have an input botch here. */
338 fprintf(stderr,"Translating from uri: "
339 "A supposed hexadecimal input character is "
340 "not 0-9 or a-f or A-F, it is (shown as hex here): %x\n",c);
341 return ochar;
342 }
343
tohex(char c1,char c2)344 static char tohex(char c1, char c2)
345 {
346 char out = (hexdig(c1) << 4) | hexdig(c2);
347 return out;
348 }
349 static int
hexpairtochar(const char * cp,char * myochar)350 hexpairtochar(const char *cp, char*myochar)
351 {
352 char ochar = 0;
353 int olen = 0;
354 char c = cp[0];
355 if (c) {
356 char c2 = cp[1];
357 if (c2) {
358 ochar = tohex(c,c2);
359 olen = 2;
360 } else {
361 fprintf(stderr,"Translating from uri: "
362 "A supposed hexadecimal input character pair "
363 "runs off the end of the input after 1 hex digit.\n");
364 /* botched input. */
365 ochar = c;
366 olen = 1;
367 }
368 } else {
369 /* botched input. */
370 fprintf(stderr,"Translating from uri: "
371 "A supposed hexadecimal input character pair "
372 "runs off the end of the input.\n");
373 ochar = '%';
374 olen = 0;
375 }
376 *myochar = ochar;
377 return olen;
378 }
379
380 void
translate_from_uri(const char * input,struct esb_s * out)381 translate_from_uri(const char * input, struct esb_s* out)
382 {
383 const char *cp = input;
384 char tempstr[2];
385 for (; *cp; ++cp) {
386 char c = *cp;
387 if (c == '%') {
388 int increment = 0;
389 char c2 = cp[1];
390 /* hexpairtochar deals with c2 being NUL. */
391 if (c2 == '%') {
392 tempstr[0] = c;
393 tempstr[1] = 0;
394 esb_append(out,tempstr);
395 ++cp;
396 continue;
397 }
398
399 increment = hexpairtochar(cp+1,&c);
400 tempstr[0] = c;
401 tempstr[1] = 0;
402 esb_append(out,tempstr);
403 cp +=increment;
404 continue;
405 }
406 tempstr[0] = c;
407 tempstr[1] = 0;
408 esb_append(out,tempstr);
409 }
410 }
411
412
413
414
415 #ifdef TEST
416
417 unsigned errcnt = 0;
418
419 static void
mytestfrom(const char * in,const char * expected,int testnum)420 mytestfrom(const char * in,const char *expected,int testnum)
421 {
422 struct esb_s out;
423 esb_constructor(&out);
424 translate_from_uri(in, &out);
425 if (strcmp(expected, esb_get_string(&out))) {
426 printf(" Fail test %d expected \"%s\" got \"%s\"\n",
427 testnum,expected,esb_get_string(&out));
428 ++errcnt;
429 }
430 esb_destructor(&out);
431 }
432
433
434 static void
mytest(char * in,char * expected,int testnum)435 mytest(char *in,char *expected,int testnum)
436 {
437 struct esb_s out;
438 esb_constructor(&out);
439 translate_to_uri(in, &out);
440 if (strcmp(expected, esb_get_string(&out))) {
441 printf(" Fail test %d expected %s got %s\n",testnum,expected,esb_get_string(&out));
442 ++errcnt;
443 }
444 esb_destructor(&out);
445 }
446
447
448 int
main()449 main()
450 {
451 /* We no longer translate space to %20, that
452 turns out not to help all that much. */
453 mytest("aaa","aaa",1);
454 mytest(" bc"," bc",2);
455 mytest(";bc","%3bbc",3);
456 mytest(" bc\n"," bc%0a",4);
457 mytest(";bc\n","%3bbc%0a",5);
458 mytest(" bc\r"," bc%0d",6);
459 mytest(";bc\r","%3bbc%0d",7);
460 mytest(" \x01"," %01",8);
461 mytest(";\x01","%3b%01",9);
462 mytestfrom("abc","abc",10);
463 mytestfrom("a%20bc","a bc",11);
464 mytestfrom("a%%20bc","a%20bc",12);
465 mytestfrom("a%%%20bc","a% bc",13);
466 mytestfrom("a%%%%20bc","a%%20bc",14);
467 mytestfrom("a%20","a ",15);
468 /* The following is mistaken input. */
469 mytestfrom("a%2","a2",16);
470 mytestfrom("a%","a%",17);
471 mytest("%bc","%25bc",18);
472
473 if (errcnt) {
474 printf("uri errcount ",errcnt);
475 }
476 return errcnt? 1:0;
477 }
478 #endif
479
480