1 /*
2     Copyright 2011-2012 David Anderson. All rights reserved.
3     Portions Copyright 2012 SN Systems Ltd. All rights reserved.
4 
5     This program is free software; you can redistribute it and/or modify it
6     under the terms of version 2 of the GNU General Public License as
7     published by the Free Software Foundation.
8 
9     This program is distributed in the hope that it would be useful, but
10     WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12 
13     Further, this software is distributed without any warranty that it is
14     free of the rightful claim of any third person regarding infringement
15     or the like.  Any license provided herein, whether implied or
16     otherwise, applies only to this software file.  Patent licenses, if
17     any, provided herein do not apply to combinations of this program with
18     other software, or any other product whatsoever.
19 
20     You should have received a copy of the GNU General Public License along
21     with this program; if not, write the Free Software Foundation, Inc., 51
22     Franklin Street - Fifth Floor, Boston MA 02110-1301, USA.
23 
24 */
25 
26 #include "globals.h"
27 #include "esb.h"
28 #include "uri.h"
29 #include <stdio.h>
30 #include <ctype.h>
31 
32 /* dwarfdump_ctype table. See uritablebuild.c */
33 static char dwarfdump_ctype_table[256] = {
34 0, /* NUL 0x00 */
35 0, /* control 0x01 */
36 0, /* control 0x02 */
37 0, /* control 0x03 */
38 0, /* control 0x04 */
39 0, /* control 0x05 */
40 0, /* control 0x06 */
41 0, /* control 0x07 */
42 0, /* control 0x08 */
43 0, /* whitespace 0x09 */
44 0, /* whitespace 0x0a */
45 0, /* whitespace 0x0b */
46 0, /* whitespace 0x0c */
47 0, /* whitespace 0x0d */
48 0, /* control 0x0e */
49 0, /* control 0x0f */
50 0, /* control 0x10 */
51 0, /* control 0x11 */
52 0, /* control 0x12 */
53 0, /* control 0x13 */
54 0, /* control 0x14 */
55 0, /* control 0x15 */
56 0, /* control 0x16 */
57 0, /* control 0x17 */
58 0, /* control 0x18 */
59 0, /* control 0x19 */
60 0, /* control 0x1a */
61 0, /* control 0x1b */
62 0, /* control 0x1c */
63 0, /* control 0x1d */
64 0, /* control 0x1e */
65 0, /* control 0x1f */
66 1, /* ' ' 0x20 */
67 1, /* '!' 0x21 */
68 0, /* '"' 0x22 */
69 1, /* '#' 0x23 */
70 1, /* '$' 0x24 */
71 0, /* '%' 0x25 */
72 1, /* '&' 0x26 */
73 0, /* ''' 0x27 */
74 1, /* '(' 0x28 */
75 1, /* ')' 0x29 */
76 1, /* '*' 0x2a */
77 1, /* '+' 0x2b */
78 1, /* ',' 0x2c */
79 1, /* '-' 0x2d */
80 1, /* '.' 0x2e */
81 1, /* '/' 0x2f */
82 1, /* '0' 0x30 */
83 1, /* '1' 0x31 */
84 1, /* '2' 0x32 */
85 1, /* '3' 0x33 */
86 1, /* '4' 0x34 */
87 1, /* '5' 0x35 */
88 1, /* '6' 0x36 */
89 1, /* '7' 0x37 */
90 1, /* '8' 0x38 */
91 1, /* '9' 0x39 */
92 1, /* ':' 0x3a */
93 0, /* ';' 0x3b */
94 1, /* '<' 0x3c */
95 1, /* '=' 0x3d */
96 1, /* '>' 0x3e */
97 1, /* '?' 0x3f */
98 1, /* '@' 0x40 */
99 1, /* 'A' 0x41 */
100 1, /* 'B' 0x42 */
101 1, /* 'C' 0x43 */
102 1, /* 'D' 0x44 */
103 1, /* 'E' 0x45 */
104 1, /* 'F' 0x46 */
105 1, /* 'G' 0x47 */
106 1, /* 'H' 0x48 */
107 1, /* 'I' 0x49 */
108 1, /* 'J' 0x4a */
109 1, /* 'K' 0x4b */
110 1, /* 'L' 0x4c */
111 1, /* 'M' 0x4d */
112 1, /* 'N' 0x4e */
113 1, /* 'O' 0x4f */
114 1, /* 'P' 0x50 */
115 1, /* 'Q' 0x51 */
116 1, /* 'R' 0x52 */
117 1, /* 'S' 0x53 */
118 1, /* 'T' 0x54 */
119 1, /* 'U' 0x55 */
120 1, /* 'V' 0x56 */
121 1, /* 'W' 0x57 */
122 1, /* 'X' 0x58 */
123 1, /* 'Y' 0x59 */
124 1, /* 'Z' 0x5a */
125 1, /* '[' 0x5b */
126 1, /* '\' 0x5c */
127 1, /* ']' 0x5d */
128 1, /* '^' 0x5e */
129 1, /* '_' 0x5f */
130 0, /* '`' 0x60 */
131 1, /* 'a' 0x61 */
132 1, /* 'b' 0x62 */
133 1, /* 'c' 0x63 */
134 1, /* 'd' 0x64 */
135 1, /* 'e' 0x65 */
136 1, /* 'f' 0x66 */
137 1, /* 'g' 0x67 */
138 1, /* 'h' 0x68 */
139 1, /* 'i' 0x69 */
140 1, /* 'j' 0x6a */
141 1, /* 'k' 0x6b */
142 1, /* 'l' 0x6c */
143 1, /* 'm' 0x6d */
144 1, /* 'n' 0x6e */
145 1, /* 'o' 0x6f */
146 1, /* 'p' 0x70 */
147 1, /* 'q' 0x71 */
148 1, /* 'r' 0x72 */
149 1, /* 's' 0x73 */
150 1, /* 't' 0x74 */
151 1, /* 'u' 0x75 */
152 1, /* 'v' 0x76 */
153 1, /* 'w' 0x77 */
154 1, /* 'x' 0x78 */
155 1, /* 'y' 0x79 */
156 1, /* 'z' 0x7a */
157 1, /* '{' 0x7b */
158 1, /* '|' 0x7c */
159 1, /* '}' 0x7d */
160 1, /* '~' 0x7e */
161 0, /* DEL 0x7f */
162 1, /* 0x80 */
163 1, /* 0x81 */
164 1, /* 0x82 */
165 1, /* 0x83 */
166 1, /* 0x84 */
167 1, /* 0x85 */
168 1, /* 0x86 */
169 1, /* 0x87 */
170 1, /* 0x88 */
171 1, /* 0x89 */
172 1, /* 0x8a */
173 1, /* 0x8b */
174 1, /* 0x8c */
175 1, /* 0x8d */
176 1, /* 0x8e */
177 1, /* 0x8f */
178 1, /* 0x90 */
179 1, /* 0x91 */
180 1, /* 0x92 */
181 1, /* 0x93 */
182 1, /* 0x94 */
183 1, /* 0x95 */
184 1, /* 0x96 */
185 1, /* 0x97 */
186 1, /* 0x98 */
187 1, /* 0x99 */
188 1, /* 0x9a */
189 1, /* 0x9b */
190 1, /* 0x9c */
191 1, /* 0x9d */
192 1, /* 0x9e */
193 1, /* 0x9f */
194 0, /* other: 0xa0 */
195 1, /* 0xa1 */
196 1, /* 0xa2 */
197 1, /* 0xa3 */
198 1, /* 0xa4 */
199 1, /* 0xa5 */
200 1, /* 0xa6 */
201 1, /* 0xa7 */
202 1, /* 0xa8 */
203 1, /* 0xa9 */
204 1, /* 0xaa */
205 1, /* 0xab */
206 1, /* 0xac */
207 1, /* 0xad */
208 1, /* 0xae */
209 1, /* 0xaf */
210 1, /* 0xb0 */
211 1, /* 0xb1 */
212 1, /* 0xb2 */
213 1, /* 0xb3 */
214 1, /* 0xb4 */
215 1, /* 0xb5 */
216 1, /* 0xb6 */
217 1, /* 0xb7 */
218 1, /* 0xb8 */
219 1, /* 0xb9 */
220 1, /* 0xba */
221 1, /* 0xbb */
222 1, /* 0xbc */
223 1, /* 0xbd */
224 1, /* 0xbe */
225 1, /* 0xbf */
226 1, /* 0xc0 */
227 1, /* 0xc1 */
228 1, /* 0xc2 */
229 1, /* 0xc3 */
230 1, /* 0xc4 */
231 1, /* 0xc5 */
232 1, /* 0xc6 */
233 1, /* 0xc7 */
234 1, /* 0xc8 */
235 1, /* 0xc9 */
236 1, /* 0xca */
237 1, /* 0xcb */
238 1, /* 0xcc */
239 1, /* 0xcd */
240 1, /* 0xce */
241 1, /* 0xcf */
242 1, /* 0xd0 */
243 1, /* 0xd1 */
244 1, /* 0xd2 */
245 1, /* 0xd3 */
246 1, /* 0xd4 */
247 1, /* 0xd5 */
248 1, /* 0xd6 */
249 1, /* 0xd7 */
250 1, /* 0xd8 */
251 1, /* 0xd9 */
252 1, /* 0xda */
253 1, /* 0xdb */
254 1, /* 0xdc */
255 1, /* 0xdd */
256 1, /* 0xde */
257 1, /* 0xdf */
258 1, /* 0xe0 */
259 1, /* 0xe1 */
260 1, /* 0xe2 */
261 1, /* 0xe3 */
262 1, /* 0xe4 */
263 1, /* 0xe5 */
264 1, /* 0xe6 */
265 1, /* 0xe7 */
266 1, /* 0xe8 */
267 1, /* 0xe9 */
268 1, /* 0xea */
269 1, /* 0xeb */
270 1, /* 0xec */
271 1, /* 0xed */
272 1, /* 0xee */
273 1, /* 0xef */
274 1, /* 0xf0 */
275 1, /* 0xf1 */
276 1, /* 0xf2 */
277 1, /* 0xf3 */
278 1, /* 0xf4 */
279 1, /* 0xf5 */
280 1, /* 0xf6 */
281 1, /* 0xf7 */
282 1, /* 0xf8 */
283 1, /* 0xf9 */
284 1, /* 0xfa */
285 1, /* 0xfb */
286 1, /* 0xfc */
287 1, /* 0xfd */
288 1, /* 0xfe */
289 0, /* other: 0xff */
290 };
291 static char *
xchar(int c,char * buf,int size)292 xchar(int c, char *buf, int size)
293 {
294     snprintf(buf, size,"%%%02x",c);
295     return buf;
296 }
297 
298 /* Translate dangerous and some other characters to safe
299    %xx form.
300 */
301 void
translate_to_uri(const char * filename,struct esb_s * out)302 translate_to_uri(const char * filename, struct esb_s *out)
303 {
304     char buf[8];
305     const char *cp = 0;
306     for (cp = filename  ; *cp; ++cp) {
307         char v[2];
308         int c = 0xff & (unsigned char)*cp;
309         if (dwarfdump_ctype_table[c]) {
310             v[0] = c;
311             v[1] = 0;
312             esb_append(out,v);
313         } else {
314             char *b = xchar(c,buf,sizeof(buf));
315             esb_append(out,b);
316         }
317     }
318 }
319 
320 /* This is not very efficient, but it is seldom called. */
321 static char
hexdig(char c)322 hexdig(char c)
323 {
324     char ochar = 0;
325     if (c >= '0' && c <= '9') {
326         ochar = (c - '0');
327         return ochar;
328     }
329     if (c >= 'a' && c <= 'f') {
330         ochar = (c - 'a')+10;
331         return ochar;
332     }
333     if (c >= 'A' && c <= 'F') {
334         ochar = (c - 'A')+10;
335         return ochar;
336     }
337     /* We have an input botch here. */
338     fprintf(stderr,"Translating from uri: "
339         "A supposed hexadecimal input character is "
340         "not 0-9 or a-f or A-F, it is (shown as hex here): %x\n",c);
341     return ochar;
342 }
343 
tohex(char c1,char c2)344 static char tohex(char c1, char c2)
345 {
346     char out = (hexdig(c1) << 4) | hexdig(c2);
347     return out;
348 }
349 static int
hexpairtochar(const char * cp,char * myochar)350 hexpairtochar(const char *cp, char*myochar)
351 {
352     char ochar = 0;
353     int olen = 0;
354     char c = cp[0];
355     if (c) {
356         char c2 = cp[1];
357         if (c2) {
358             ochar = tohex(c,c2);
359             olen = 2;
360         } else {
361             fprintf(stderr,"Translating from uri: "
362                 "A supposed hexadecimal input character pair "
363                 "runs off the end of the input after 1 hex digit.\n");
364             /* botched input. */
365             ochar = c;
366             olen = 1;
367         }
368     } else {
369         /* botched input. */
370         fprintf(stderr,"Translating from uri: "
371             "A supposed hexadecimal input character pair "
372             "runs off the end of the input.\n");
373         ochar = '%';
374         olen = 0;
375     }
376     *myochar = ochar;
377     return olen;
378 }
379 
380 void
translate_from_uri(const char * input,struct esb_s * out)381 translate_from_uri(const char * input, struct esb_s* out)
382 {
383     const char *cp = input;
384     char tempstr[2];
385     for (; *cp; ++cp) {
386         char c = *cp;
387         if (c == '%') {
388             int increment = 0;
389             char c2 = cp[1];
390             /* hexpairtochar deals with c2 being NUL. */
391             if (c2  == '%') {
392                 tempstr[0] = c;
393                 tempstr[1] = 0;
394                 esb_append(out,tempstr);
395                 ++cp;
396                 continue;
397             }
398 
399             increment = hexpairtochar(cp+1,&c);
400             tempstr[0] = c;
401             tempstr[1] = 0;
402             esb_append(out,tempstr);
403             cp +=increment;
404             continue;
405         }
406         tempstr[0] = c;
407         tempstr[1] = 0;
408         esb_append(out,tempstr);
409     }
410 }
411 
412 
413 
414 
415 #ifdef TEST
416 
417 unsigned errcnt = 0;
418 
419 static void
mytestfrom(const char * in,const char * expected,int testnum)420 mytestfrom(const char * in,const char *expected,int testnum)
421 {
422     struct esb_s out;
423     esb_constructor(&out);
424     translate_from_uri(in, &out);
425     if (strcmp(expected, esb_get_string(&out))) {
426         printf(" Fail test %d expected \"%s\" got \"%s\"\n",
427             testnum,expected,esb_get_string(&out));
428         ++errcnt;
429     }
430     esb_destructor(&out);
431 }
432 
433 
434 static void
mytest(char * in,char * expected,int testnum)435 mytest(char *in,char *expected,int testnum)
436 {
437     struct esb_s out;
438     esb_constructor(&out);
439     translate_to_uri(in, &out);
440     if (strcmp(expected, esb_get_string(&out))) {
441         printf(" Fail test %d expected %s got %s\n",testnum,expected,esb_get_string(&out));
442         ++errcnt;
443     }
444     esb_destructor(&out);
445 }
446 
447 
448 int
main()449 main()
450 {
451     /* We no longer translate space to %20, that
452     turns out not to help all that much. */
453     mytest("aaa","aaa",1);
454     mytest(" bc"," bc",2);
455     mytest(";bc","%3bbc",3);
456     mytest(" bc\n"," bc%0a",4);
457     mytest(";bc\n","%3bbc%0a",5);
458     mytest(" bc\r"," bc%0d",6);
459     mytest(";bc\r","%3bbc%0d",7);
460     mytest(" \x01"," %01",8);
461     mytest(";\x01","%3b%01",9);
462     mytestfrom("abc","abc",10);
463     mytestfrom("a%20bc","a bc",11);
464     mytestfrom("a%%20bc","a%20bc",12);
465     mytestfrom("a%%%20bc","a% bc",13);
466     mytestfrom("a%%%%20bc","a%%20bc",14);
467     mytestfrom("a%20","a ",15);
468     /* The following is mistaken input. */
469     mytestfrom("a%2","a2",16);
470     mytestfrom("a%","a%",17);
471     mytest("%bc","%25bc",18);
472 
473     if (errcnt) {
474         printf("uri errcount ",errcnt);
475     }
476     return errcnt? 1:0;
477 }
478 #endif
479 
480