1 /*
2  *  Licensed to the Apache Software Foundation (ASF) under one or more
3  *  contributor license agreements.  See the NOTICE file distributed with
4  *  this work for additional information regarding copyright ownership.
5  *  The ASF licenses this file to You under the Apache License, Version 2.0
6  *  (the "License"); you may not use this file except in compliance with
7  *  the License.  You may obtain a copy of the License at
8  *
9  *      http://www.apache.org/licenses/LICENSE-2.0
10  *
11  *  Unless required by applicable law or agreed to in writing, software
12  *  distributed under the License is distributed on an "AS IS" BASIS,
13  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *  See the License for the specific language governing permissions and
15  *  limitations under the License.
16  */
17 
18 /***************************************************************************
19  * Description: URL manipulation subroutines. (ported from mod_proxy).     *
20  * Version:     $Revision: 531816 $                                        *
21  ***************************************************************************/
22 
23 #include "jk_global.h"
24 #include "jk_url.h"
25 
jk_c2hex(int ch,char * x)26 static void jk_c2hex(int ch, char *x)
27 {
28 #if !CHARSET_EBCDIC
29     int i;
30 
31     x[0] = '%';
32     i = (ch & 0xF0) >> 4;
33     if (i >= 10) {
34         x[1] = ('A' - 10) + i;
35     }
36     else {
37         x[1] = '0' + i;
38     }
39 
40     i = ch & 0x0F;
41     if (i >= 10) {
42         x[2] = ('A' - 10) + i;
43     }
44     else {
45         x[2] = '0' + i;
46     }
47 #else /*CHARSET_EBCDIC*/
48     static const char ntoa[] = { "0123456789ABCDEF" };
49     char buf[1];
50 
51     ch &= 0xFF;
52 
53     buf[0] = ch;
54     jk_xlate_to_ascii(buf, 1);
55 
56     x[0] = '%';
57     x[1] = ntoa[(buf[0] >> 4) & 0x0F];
58     x[2] = ntoa[buf[0] & 0x0F];
59     x[3] = '\0';
60 #endif /*CHARSET_EBCDIC*/
61 }
62 
63 /*
64  * Convert a URL-encoded string to canonical form.
65  * It encodes those which must be encoded, and does not touch
66  * those which must not be touched.
67  * String x must be '\0'-terminated.
68  * String y must be pre-allocated with len maxlen
69  * (including the terminating '\0').
70  */
jk_canonenc(const char * x,char * y,int maxlen)71 int jk_canonenc(const char *x, char *y, int maxlen)
72 {
73     int i, j;
74     int ch = x[0];
75     char *allowed;  /* characters which should not be encoded */
76     char *reserved; /* characters which much not be en/de-coded */
77 
78 /*
79  * N.B. in addition to :@&=, this allows ';' in an http path
80  * and '?' in an ftp path -- this may be revised
81  */
82     allowed = "~$-_.+!*'(),;:@&=";
83     reserved = "/";
84 
85     for (i = 0, j = 0; ch != '\0' && j < maxlen; i++, j++, ch=x[i]) {
86 /* always handle '/' first */
87         if (strchr(reserved, ch)) {
88             y[j] = ch;
89             continue;
90         }
91 /* recode it, if necessary */
92         if (!jk_isalnum(ch) && !strchr(allowed, ch)) {
93             if (j+2<maxlen) {
94                 jk_c2hex(ch, &y[j]);
95                 j += 2;
96             }
97             else {
98                 return JK_FALSE;
99             }
100         }
101         else {
102             y[j] = ch;
103         }
104     }
105     if (j<maxlen) {
106         y[j] = '\0';
107         return JK_TRUE;
108     }
109     else {
110         return JK_FALSE;
111     }
112 }
113 
114 #if USE_CHARSET_EBCDIC
115 static int convert_a2e[256] = {
116   0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
117   0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
118   0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
119   0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
120   0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
121   0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
122   0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
123   0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
124   0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
125   0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
126   0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
127   0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
128   0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
129   0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
130   0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
131   0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF };
132 #endif /*USE_CHARSET_EBCDIC*/
133 
x2c(const char * what)134 static char x2c(const char *what)
135 {
136     register char digit;
137 
138 #if !USE_CHARSET_EBCDIC
139     digit =
140             ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
141     digit *= 16;
142     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));
143 #else /*USE_CHARSET_EBCDIC*/
144     char xstr[5];
145     xstr[0]='0';
146     xstr[1]='x';
147     xstr[2]=what[0];
148     xstr[3]=what[1];
149     xstr[4]='\0';
150     digit = convert_a2e[0xFF & strtol(xstr, NULL, 16)];
151 #endif /*USE_CHARSET_EBCDIC*/
152     return (digit);
153 }
154 
155 #define jk_isxdigit(c) (isxdigit(((unsigned char)(c))))
156 
157 /**
158  * Unescapes a URL, leaving reserved characters intact.
159  * @param unescaped Optional buffer to write the encoded string, can be
160  * NULL, in which case the URL decoding does not actually take place
161  * but the result length of the decoded URL will be returned.
162  * @param url String to be unescaped
163  * @param slen The length of the original url, or -1 to decode until
164  * a terminating '\0' is seen
165  * @param forbid Optional list of forbidden characters, in addition to
166  * 0x00
167  * @param reserved Optional list of reserved characters that will be
168  * left unescaped
169  * @param plus If non zero, '+' is converted to ' ' as per
170  * application/x-www-form-urlencoded encoding
171  * @param len If set, the length of the unescaped string will be returned
172  * @return JK_TRUE on success, JK_FALSE if no characters are
173  * decoded or the string is NULL, if a bad escape sequence is
174  * found, or if a character on the forbid list is found.
175  * Implementation copied from APR 1.5.x.
176  */
jk_unescape_url(char * const unescaped,const char * const url,size_t slen,const char * const forbid,const char * const reserved,const int plus,size_t * len)177 int jk_unescape_url(char *const unescaped,
178                     const char *const url,
179                     size_t slen,
180                     const char *const forbid,
181                     const char *const reserved,
182                     const int plus,
183                     size_t *len)
184 {
185     size_t size = 1;
186     int found = 0;
187     const char *s = (const char *) url;
188     char *d = (char *) unescaped;
189     register int badesc, badpath;
190 
191     if (!url) {
192         return JK_FALSE;
193     }
194 
195     badesc = 0;
196     badpath = 0;
197     if (s) {
198         if (d) {
199             for (; *s && slen; ++s, d++, slen--) {
200                 if (plus && *s == '+') {
201                     *d = ' ';
202                     found = 1;
203                 }
204                 else if (*s != '%') {
205                     *d = *s;
206                 }
207                 else {
208                     if (!jk_isxdigit(*(s + 1)) || !jk_isxdigit(*(s + 2))) {
209                         badesc = 1;
210                         *d = '%';
211                     }
212                     else {
213                         char decoded;
214                         decoded = x2c(s + 1);
215                         if ((decoded == '\0')
216                                 || (forbid && strchr(forbid, decoded))) {
217                             badpath = 1;
218                             *d = decoded;
219                             s += 2;
220                             slen -= 2;
221                         }
222                         else if (reserved && strchr(reserved, decoded)) {
223                             *d++ = *s++;
224                             *d++ = *s++;
225                             *d = *s;
226                             size += 2;
227                         }
228                         else {
229                             *d = decoded;
230                             s += 2;
231                             slen -= 2;
232                             found = 1;
233                         }
234                     }
235                 }
236                 size++;
237             }
238             *d = '\0';
239         }
240         else {
241             for (; *s && slen; ++s, slen--) {
242                 if (plus && *s == '+') {
243                     found = 1;
244                 }
245                 else if (*s != '%') {
246                     /* character unchanged */
247                 }
248                 else {
249                     if (!jk_isxdigit(*(s + 1)) || !jk_isxdigit(*(s + 2))) {
250                         badesc = 1;
251                     }
252                     else {
253                         char decoded;
254                         decoded = x2c(s + 1);
255                         if ((decoded == '\0')
256                                 || (forbid && strchr(forbid, decoded))) {
257                             badpath = 1;
258                             s += 2;
259                             slen -= 2;
260                         }
261                         else if (reserved && strchr(reserved, decoded)) {
262                             s += 2;
263                             slen -= 2;
264                             size += 2;
265                         }
266                         else {
267                             s += 2;
268                             slen -= 2;
269                             found = 1;
270                         }
271                     }
272                 }
273                 size++;
274             }
275         }
276     }
277 
278     if (len) {
279         *len = size;
280     }
281     if (badesc) {
282         return JK_FALSE;
283     }
284     else if (badpath) {
285         return JK_FALSE;
286     }
287     else if (!found) {
288         return JK_TRUE;
289     }
290 
291     return JK_TRUE;
292 }
293