1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 /***************************************************************************
19 * Description: URL manipulation subroutines. (ported from mod_proxy). *
20 * Version: $Revision: 531816 $ *
21 ***************************************************************************/
22
23 #include "jk_global.h"
24 #include "jk_url.h"
25
jk_c2hex(int ch,char * x)26 static void jk_c2hex(int ch, char *x)
27 {
28 #if !CHARSET_EBCDIC
29 int i;
30
31 x[0] = '%';
32 i = (ch & 0xF0) >> 4;
33 if (i >= 10) {
34 x[1] = ('A' - 10) + i;
35 }
36 else {
37 x[1] = '0' + i;
38 }
39
40 i = ch & 0x0F;
41 if (i >= 10) {
42 x[2] = ('A' - 10) + i;
43 }
44 else {
45 x[2] = '0' + i;
46 }
47 #else /*CHARSET_EBCDIC*/
48 static const char ntoa[] = { "0123456789ABCDEF" };
49 char buf[1];
50
51 ch &= 0xFF;
52
53 buf[0] = ch;
54 jk_xlate_to_ascii(buf, 1);
55
56 x[0] = '%';
57 x[1] = ntoa[(buf[0] >> 4) & 0x0F];
58 x[2] = ntoa[buf[0] & 0x0F];
59 x[3] = '\0';
60 #endif /*CHARSET_EBCDIC*/
61 }
62
63 /*
64 * Convert a URL-encoded string to canonical form.
65 * It encodes those which must be encoded, and does not touch
66 * those which must not be touched.
67 * String x must be '\0'-terminated.
68 * String y must be pre-allocated with len maxlen
69 * (including the terminating '\0').
70 */
jk_canonenc(const char * x,char * y,int maxlen)71 int jk_canonenc(const char *x, char *y, int maxlen)
72 {
73 int i, j;
74 int ch = x[0];
75 char *allowed; /* characters which should not be encoded */
76 char *reserved; /* characters which much not be en/de-coded */
77
78 /*
79 * N.B. in addition to :@&=, this allows ';' in an http path
80 * and '?' in an ftp path -- this may be revised
81 */
82 allowed = "~$-_.+!*'(),;:@&=";
83 reserved = "/";
84
85 for (i = 0, j = 0; ch != '\0' && j < maxlen; i++, j++, ch=x[i]) {
86 /* always handle '/' first */
87 if (strchr(reserved, ch)) {
88 y[j] = ch;
89 continue;
90 }
91 /* recode it, if necessary */
92 if (!jk_isalnum(ch) && !strchr(allowed, ch)) {
93 if (j+2<maxlen) {
94 jk_c2hex(ch, &y[j]);
95 j += 2;
96 }
97 else {
98 return JK_FALSE;
99 }
100 }
101 else {
102 y[j] = ch;
103 }
104 }
105 if (j<maxlen) {
106 y[j] = '\0';
107 return JK_TRUE;
108 }
109 else {
110 return JK_FALSE;
111 }
112 }
113
114 #if USE_CHARSET_EBCDIC
115 static int convert_a2e[256] = {
116 0x00, 0x01, 0x02, 0x03, 0x37, 0x2D, 0x2E, 0x2F, 0x16, 0x05, 0x15, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
117 0x10, 0x11, 0x12, 0x13, 0x3C, 0x3D, 0x32, 0x26, 0x18, 0x19, 0x3F, 0x27, 0x1C, 0x1D, 0x1E, 0x1F,
118 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D, 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
119 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
120 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
121 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
122 0x79, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
123 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xC0, 0x4F, 0xD0, 0xA1, 0x07,
124 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x06, 0x17, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x09, 0x0A, 0x1B,
125 0x30, 0x31, 0x1A, 0x33, 0x34, 0x35, 0x36, 0x08, 0x38, 0x39, 0x3A, 0x3B, 0x04, 0x14, 0x3E, 0xFF,
126 0x41, 0xAA, 0x4A, 0xB1, 0x9F, 0xB2, 0x6A, 0xB5, 0xBB, 0xB4, 0x9A, 0x8A, 0xB0, 0xCA, 0xAF, 0xBC,
127 0x90, 0x8F, 0xEA, 0xFA, 0xBE, 0xA0, 0xB6, 0xB3, 0x9D, 0xDA, 0x9B, 0x8B, 0xB7, 0xB8, 0xB9, 0xAB,
128 0x64, 0x65, 0x62, 0x66, 0x63, 0x67, 0x9E, 0x68, 0x74, 0x71, 0x72, 0x73, 0x78, 0x75, 0x76, 0x77,
129 0xAC, 0x69, 0xED, 0xEE, 0xEB, 0xEF, 0xEC, 0xBF, 0x80, 0xFD, 0xFE, 0xFB, 0xFC, 0xBA, 0xAE, 0x59,
130 0x44, 0x45, 0x42, 0x46, 0x43, 0x47, 0x9C, 0x48, 0x54, 0x51, 0x52, 0x53, 0x58, 0x55, 0x56, 0x57,
131 0x8C, 0x49, 0xCD, 0xCE, 0xCB, 0xCF, 0xCC, 0xE1, 0x70, 0xDD, 0xDE, 0xDB, 0xDC, 0x8D, 0x8E, 0xDF };
132 #endif /*USE_CHARSET_EBCDIC*/
133
x2c(const char * what)134 static char x2c(const char *what)
135 {
136 register char digit;
137
138 #if !USE_CHARSET_EBCDIC
139 digit =
140 ((what[0] >= 'A') ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0'));
141 digit *= 16;
142 digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0'));
143 #else /*USE_CHARSET_EBCDIC*/
144 char xstr[5];
145 xstr[0]='0';
146 xstr[1]='x';
147 xstr[2]=what[0];
148 xstr[3]=what[1];
149 xstr[4]='\0';
150 digit = convert_a2e[0xFF & strtol(xstr, NULL, 16)];
151 #endif /*USE_CHARSET_EBCDIC*/
152 return (digit);
153 }
154
155 #define jk_isxdigit(c) (isxdigit(((unsigned char)(c))))
156
157 /**
158 * Unescapes a URL, leaving reserved characters intact.
159 * @param unescaped Optional buffer to write the encoded string, can be
160 * NULL, in which case the URL decoding does not actually take place
161 * but the result length of the decoded URL will be returned.
162 * @param url String to be unescaped
163 * @param slen The length of the original url, or -1 to decode until
164 * a terminating '\0' is seen
165 * @param forbid Optional list of forbidden characters, in addition to
166 * 0x00
167 * @param reserved Optional list of reserved characters that will be
168 * left unescaped
169 * @param plus If non zero, '+' is converted to ' ' as per
170 * application/x-www-form-urlencoded encoding
171 * @param len If set, the length of the unescaped string will be returned
172 * @return JK_TRUE on success, JK_FALSE if no characters are
173 * decoded or the string is NULL, if a bad escape sequence is
174 * found, or if a character on the forbid list is found.
175 * Implementation copied from APR 1.5.x.
176 */
jk_unescape_url(char * const unescaped,const char * const url,size_t slen,const char * const forbid,const char * const reserved,const int plus,size_t * len)177 int jk_unescape_url(char *const unescaped,
178 const char *const url,
179 size_t slen,
180 const char *const forbid,
181 const char *const reserved,
182 const int plus,
183 size_t *len)
184 {
185 size_t size = 1;
186 int found = 0;
187 const char *s = (const char *) url;
188 char *d = (char *) unescaped;
189 register int badesc, badpath;
190
191 if (!url) {
192 return JK_FALSE;
193 }
194
195 badesc = 0;
196 badpath = 0;
197 if (s) {
198 if (d) {
199 for (; *s && slen; ++s, d++, slen--) {
200 if (plus && *s == '+') {
201 *d = ' ';
202 found = 1;
203 }
204 else if (*s != '%') {
205 *d = *s;
206 }
207 else {
208 if (!jk_isxdigit(*(s + 1)) || !jk_isxdigit(*(s + 2))) {
209 badesc = 1;
210 *d = '%';
211 }
212 else {
213 char decoded;
214 decoded = x2c(s + 1);
215 if ((decoded == '\0')
216 || (forbid && strchr(forbid, decoded))) {
217 badpath = 1;
218 *d = decoded;
219 s += 2;
220 slen -= 2;
221 }
222 else if (reserved && strchr(reserved, decoded)) {
223 *d++ = *s++;
224 *d++ = *s++;
225 *d = *s;
226 size += 2;
227 }
228 else {
229 *d = decoded;
230 s += 2;
231 slen -= 2;
232 found = 1;
233 }
234 }
235 }
236 size++;
237 }
238 *d = '\0';
239 }
240 else {
241 for (; *s && slen; ++s, slen--) {
242 if (plus && *s == '+') {
243 found = 1;
244 }
245 else if (*s != '%') {
246 /* character unchanged */
247 }
248 else {
249 if (!jk_isxdigit(*(s + 1)) || !jk_isxdigit(*(s + 2))) {
250 badesc = 1;
251 }
252 else {
253 char decoded;
254 decoded = x2c(s + 1);
255 if ((decoded == '\0')
256 || (forbid && strchr(forbid, decoded))) {
257 badpath = 1;
258 s += 2;
259 slen -= 2;
260 }
261 else if (reserved && strchr(reserved, decoded)) {
262 s += 2;
263 slen -= 2;
264 size += 2;
265 }
266 else {
267 s += 2;
268 slen -= 2;
269 found = 1;
270 }
271 }
272 }
273 size++;
274 }
275 }
276 }
277
278 if (len) {
279 *len = size;
280 }
281 if (badesc) {
282 return JK_FALSE;
283 }
284 else if (badpath) {
285 return JK_FALSE;
286 }
287 else if (!found) {
288 return JK_TRUE;
289 }
290
291 return JK_TRUE;
292 }
293