/* Copyright 2011-2012 David Anderson. All rights reserved. Portions Copyright 2012 SN Systems Ltd. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation. This program is distributed in the hope that it would be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Further, this software is distributed without any warranty that it is free of the rightful claim of any third person regarding infringement or the like. Any license provided herein, whether implied or otherwise, applies only to this software file. Patent licenses, if any, provided herein do not apply to combinations of this program with other software, or any other product whatsoever. You should have received a copy of the GNU General Public License along with this program; if not, write the Free Software Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston MA 02110-1301, USA. */ #include "globals.h" #include "esb.h" #include "uri.h" #include #include /* dwarfdump_ctype table. See uritablebuild.c */ static char dwarfdump_ctype_table[256] = { 0, /* NUL 0x00 */ 0, /* control 0x01 */ 0, /* control 0x02 */ 0, /* control 0x03 */ 0, /* control 0x04 */ 0, /* control 0x05 */ 0, /* control 0x06 */ 0, /* control 0x07 */ 0, /* control 0x08 */ 0, /* whitespace 0x09 */ 0, /* whitespace 0x0a */ 0, /* whitespace 0x0b */ 0, /* whitespace 0x0c */ 0, /* whitespace 0x0d */ 0, /* control 0x0e */ 0, /* control 0x0f */ 0, /* control 0x10 */ 0, /* control 0x11 */ 0, /* control 0x12 */ 0, /* control 0x13 */ 0, /* control 0x14 */ 0, /* control 0x15 */ 0, /* control 0x16 */ 0, /* control 0x17 */ 0, /* control 0x18 */ 0, /* control 0x19 */ 0, /* control 0x1a */ 0, /* control 0x1b */ 0, /* control 0x1c */ 0, /* control 0x1d */ 0, /* control 0x1e */ 0, /* control 0x1f */ 1, /* ' ' 0x20 */ 1, /* '!' 0x21 */ 0, /* '"' 0x22 */ 1, /* '#' 0x23 */ 1, /* '$' 0x24 */ 0, /* '%' 0x25 */ 1, /* '&' 0x26 */ 0, /* ''' 0x27 */ 1, /* '(' 0x28 */ 1, /* ')' 0x29 */ 1, /* '*' 0x2a */ 1, /* '+' 0x2b */ 1, /* ',' 0x2c */ 1, /* '-' 0x2d */ 1, /* '.' 0x2e */ 1, /* '/' 0x2f */ 1, /* '0' 0x30 */ 1, /* '1' 0x31 */ 1, /* '2' 0x32 */ 1, /* '3' 0x33 */ 1, /* '4' 0x34 */ 1, /* '5' 0x35 */ 1, /* '6' 0x36 */ 1, /* '7' 0x37 */ 1, /* '8' 0x38 */ 1, /* '9' 0x39 */ 1, /* ':' 0x3a */ 0, /* ';' 0x3b */ 1, /* '<' 0x3c */ 1, /* '=' 0x3d */ 1, /* '>' 0x3e */ 1, /* '?' 0x3f */ 1, /* '@' 0x40 */ 1, /* 'A' 0x41 */ 1, /* 'B' 0x42 */ 1, /* 'C' 0x43 */ 1, /* 'D' 0x44 */ 1, /* 'E' 0x45 */ 1, /* 'F' 0x46 */ 1, /* 'G' 0x47 */ 1, /* 'H' 0x48 */ 1, /* 'I' 0x49 */ 1, /* 'J' 0x4a */ 1, /* 'K' 0x4b */ 1, /* 'L' 0x4c */ 1, /* 'M' 0x4d */ 1, /* 'N' 0x4e */ 1, /* 'O' 0x4f */ 1, /* 'P' 0x50 */ 1, /* 'Q' 0x51 */ 1, /* 'R' 0x52 */ 1, /* 'S' 0x53 */ 1, /* 'T' 0x54 */ 1, /* 'U' 0x55 */ 1, /* 'V' 0x56 */ 1, /* 'W' 0x57 */ 1, /* 'X' 0x58 */ 1, /* 'Y' 0x59 */ 1, /* 'Z' 0x5a */ 1, /* '[' 0x5b */ 1, /* '\' 0x5c */ 1, /* ']' 0x5d */ 1, /* '^' 0x5e */ 1, /* '_' 0x5f */ 0, /* '`' 0x60 */ 1, /* 'a' 0x61 */ 1, /* 'b' 0x62 */ 1, /* 'c' 0x63 */ 1, /* 'd' 0x64 */ 1, /* 'e' 0x65 */ 1, /* 'f' 0x66 */ 1, /* 'g' 0x67 */ 1, /* 'h' 0x68 */ 1, /* 'i' 0x69 */ 1, /* 'j' 0x6a */ 1, /* 'k' 0x6b */ 1, /* 'l' 0x6c */ 1, /* 'm' 0x6d */ 1, /* 'n' 0x6e */ 1, /* 'o' 0x6f */ 1, /* 'p' 0x70 */ 1, /* 'q' 0x71 */ 1, /* 'r' 0x72 */ 1, /* 's' 0x73 */ 1, /* 't' 0x74 */ 1, /* 'u' 0x75 */ 1, /* 'v' 0x76 */ 1, /* 'w' 0x77 */ 1, /* 'x' 0x78 */ 1, /* 'y' 0x79 */ 1, /* 'z' 0x7a */ 1, /* '{' 0x7b */ 1, /* '|' 0x7c */ 1, /* '}' 0x7d */ 1, /* '~' 0x7e */ 0, /* DEL 0x7f */ 1, /* 0x80 */ 1, /* 0x81 */ 1, /* 0x82 */ 1, /* 0x83 */ 1, /* 0x84 */ 1, /* 0x85 */ 1, /* 0x86 */ 1, /* 0x87 */ 1, /* 0x88 */ 1, /* 0x89 */ 1, /* 0x8a */ 1, /* 0x8b */ 1, /* 0x8c */ 1, /* 0x8d */ 1, /* 0x8e */ 1, /* 0x8f */ 1, /* 0x90 */ 1, /* 0x91 */ 1, /* 0x92 */ 1, /* 0x93 */ 1, /* 0x94 */ 1, /* 0x95 */ 1, /* 0x96 */ 1, /* 0x97 */ 1, /* 0x98 */ 1, /* 0x99 */ 1, /* 0x9a */ 1, /* 0x9b */ 1, /* 0x9c */ 1, /* 0x9d */ 1, /* 0x9e */ 1, /* 0x9f */ 0, /* other: 0xa0 */ 1, /* 0xa1 */ 1, /* 0xa2 */ 1, /* 0xa3 */ 1, /* 0xa4 */ 1, /* 0xa5 */ 1, /* 0xa6 */ 1, /* 0xa7 */ 1, /* 0xa8 */ 1, /* 0xa9 */ 1, /* 0xaa */ 1, /* 0xab */ 1, /* 0xac */ 1, /* 0xad */ 1, /* 0xae */ 1, /* 0xaf */ 1, /* 0xb0 */ 1, /* 0xb1 */ 1, /* 0xb2 */ 1, /* 0xb3 */ 1, /* 0xb4 */ 1, /* 0xb5 */ 1, /* 0xb6 */ 1, /* 0xb7 */ 1, /* 0xb8 */ 1, /* 0xb9 */ 1, /* 0xba */ 1, /* 0xbb */ 1, /* 0xbc */ 1, /* 0xbd */ 1, /* 0xbe */ 1, /* 0xbf */ 1, /* 0xc0 */ 1, /* 0xc1 */ 1, /* 0xc2 */ 1, /* 0xc3 */ 1, /* 0xc4 */ 1, /* 0xc5 */ 1, /* 0xc6 */ 1, /* 0xc7 */ 1, /* 0xc8 */ 1, /* 0xc9 */ 1, /* 0xca */ 1, /* 0xcb */ 1, /* 0xcc */ 1, /* 0xcd */ 1, /* 0xce */ 1, /* 0xcf */ 1, /* 0xd0 */ 1, /* 0xd1 */ 1, /* 0xd2 */ 1, /* 0xd3 */ 1, /* 0xd4 */ 1, /* 0xd5 */ 1, /* 0xd6 */ 1, /* 0xd7 */ 1, /* 0xd8 */ 1, /* 0xd9 */ 1, /* 0xda */ 1, /* 0xdb */ 1, /* 0xdc */ 1, /* 0xdd */ 1, /* 0xde */ 1, /* 0xdf */ 1, /* 0xe0 */ 1, /* 0xe1 */ 1, /* 0xe2 */ 1, /* 0xe3 */ 1, /* 0xe4 */ 1, /* 0xe5 */ 1, /* 0xe6 */ 1, /* 0xe7 */ 1, /* 0xe8 */ 1, /* 0xe9 */ 1, /* 0xea */ 1, /* 0xeb */ 1, /* 0xec */ 1, /* 0xed */ 1, /* 0xee */ 1, /* 0xef */ 1, /* 0xf0 */ 1, /* 0xf1 */ 1, /* 0xf2 */ 1, /* 0xf3 */ 1, /* 0xf4 */ 1, /* 0xf5 */ 1, /* 0xf6 */ 1, /* 0xf7 */ 1, /* 0xf8 */ 1, /* 0xf9 */ 1, /* 0xfa */ 1, /* 0xfb */ 1, /* 0xfc */ 1, /* 0xfd */ 1, /* 0xfe */ 0, /* other: 0xff */ }; static char * xchar(int c, char *buf, int size) { snprintf(buf, size,"%%%02x",c); return buf; } /* Translate dangerous and some other characters to safe %xx form. */ void translate_to_uri(const char * filename, struct esb_s *out) { char buf[8]; const char *cp = 0; for (cp = filename ; *cp; ++cp) { char v[2]; int c = 0xff & (unsigned char)*cp; if (dwarfdump_ctype_table[c]) { v[0] = c; v[1] = 0; esb_append(out,v); } else { char *b = xchar(c,buf,sizeof(buf)); esb_append(out,b); } } } /* This is not very efficient, but it is seldom called. */ static char hexdig(char c) { char ochar = 0; if (c >= '0' && c <= '9') { ochar = (c - '0'); return ochar; } if (c >= 'a' && c <= 'f') { ochar = (c - 'a')+10; return ochar; } if (c >= 'A' && c <= 'F') { ochar = (c - 'A')+10; return ochar; } /* We have an input botch here. */ fprintf(stderr,"Translating from uri: " "A supposed hexadecimal input character is " "not 0-9 or a-f or A-F, it is (shown as hex here): %x\n",c); return ochar; } static char tohex(char c1, char c2) { char out = (hexdig(c1) << 4) | hexdig(c2); return out; } static int hexpairtochar(const char *cp, char*myochar) { char ochar = 0; int olen = 0; char c = cp[0]; if (c) { char c2 = cp[1]; if (c2) { ochar = tohex(c,c2); olen = 2; } else { fprintf(stderr,"Translating from uri: " "A supposed hexadecimal input character pair " "runs off the end of the input after 1 hex digit.\n"); /* botched input. */ ochar = c; olen = 1; } } else { /* botched input. */ fprintf(stderr,"Translating from uri: " "A supposed hexadecimal input character pair " "runs off the end of the input.\n"); ochar = '%'; olen = 0; } *myochar = ochar; return olen; } void translate_from_uri(const char * input, struct esb_s* out) { const char *cp = input; char tempstr[2]; for (; *cp; ++cp) { char c = *cp; if (c == '%') { int increment = 0; char c2 = cp[1]; /* hexpairtochar deals with c2 being NUL. */ if (c2 == '%') { tempstr[0] = c; tempstr[1] = 0; esb_append(out,tempstr); ++cp; continue; } increment = hexpairtochar(cp+1,&c); tempstr[0] = c; tempstr[1] = 0; esb_append(out,tempstr); cp +=increment; continue; } tempstr[0] = c; tempstr[1] = 0; esb_append(out,tempstr); } } #ifdef TEST unsigned errcnt = 0; static void mytestfrom(const char * in,const char *expected,int testnum) { struct esb_s out; esb_constructor(&out); translate_from_uri(in, &out); if (strcmp(expected, esb_get_string(&out))) { printf(" Fail test %d expected \"%s\" got \"%s\"\n", testnum,expected,esb_get_string(&out)); ++errcnt; } esb_destructor(&out); } static void mytest(char *in,char *expected,int testnum) { struct esb_s out; esb_constructor(&out); translate_to_uri(in, &out); if (strcmp(expected, esb_get_string(&out))) { printf(" Fail test %d expected %s got %s\n",testnum,expected,esb_get_string(&out)); ++errcnt; } esb_destructor(&out); } int main() { /* We no longer translate space to %20, that turns out not to help all that much. */ mytest("aaa","aaa",1); mytest(" bc"," bc",2); mytest(";bc","%3bbc",3); mytest(" bc\n"," bc%0a",4); mytest(";bc\n","%3bbc%0a",5); mytest(" bc\r"," bc%0d",6); mytest(";bc\r","%3bbc%0d",7); mytest(" \x01"," %01",8); mytest(";\x01","%3b%01",9); mytestfrom("abc","abc",10); mytestfrom("a%20bc","a bc",11); mytestfrom("a%%20bc","a%20bc",12); mytestfrom("a%%%20bc","a% bc",13); mytestfrom("a%%%%20bc","a%%20bc",14); mytestfrom("a%20","a ",15); /* The following is mistaken input. */ mytestfrom("a%2","a2",16); mytestfrom("a%","a%",17); mytest("%bc","%25bc",18); if (errcnt) { printf("uri errcount ",errcnt); } return errcnt? 1:0; } #endif