1 /*
2 * uriparser - RFC 3986 URI parsing library
3 *
4 * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5 * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * 3. Neither the name of the copyright holder nor the names of
22 * its contributors may be used to endorse or promote products
23 * derived from this software without specific prior written
24 * permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
30 * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
31 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37 * OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /* What encodings are enabled? */
41 #include <uriparser/UriDefsConfig.h>
42 #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
43 /* Include SELF twice */
44 # ifdef URI_ENABLE_ANSI
45 # define URI_PASS_ANSI 1
46 # include "UriEscape.c"
47 # undef URI_PASS_ANSI
48 # endif
49 # ifdef URI_ENABLE_UNICODE
50 # define URI_PASS_UNICODE 1
51 # include "UriEscape.c"
52 # undef URI_PASS_UNICODE
53 # endif
54 #else
55 # ifdef URI_PASS_ANSI
56 # include <uriparser/UriDefsAnsi.h>
57 # else
58 # include <uriparser/UriDefsUnicode.h>
59 # include <wchar.h>
60 # endif
61
62
63
64 #ifndef URI_DOXYGEN
65 # include <uriparser/Uri.h>
66 # include "UriCommon.h"
67 #endif
68
69
70
URI_FUNC(Escape)71 URI_CHAR * URI_FUNC(Escape)(const URI_CHAR * in, URI_CHAR * out,
72 UriBool spaceToPlus, UriBool normalizeBreaks) {
73 return URI_FUNC(EscapeEx)(in, NULL, out, spaceToPlus, normalizeBreaks);
74 }
75
76
77
URI_FUNC(EscapeEx)78 URI_CHAR * URI_FUNC(EscapeEx)(const URI_CHAR * inFirst,
79 const URI_CHAR * inAfterLast, URI_CHAR * out,
80 UriBool spaceToPlus, UriBool normalizeBreaks) {
81 const URI_CHAR * read = inFirst;
82 URI_CHAR * write = out;
83 UriBool prevWasCr = URI_FALSE;
84 if ((out == NULL) || (inFirst == out)) {
85 return NULL;
86 } else if (inFirst == NULL) {
87 if (out != NULL) {
88 out[0] = _UT('\0');
89 }
90 return out;
91 }
92
93 for (;;) {
94 if ((inAfterLast != NULL) && (read >= inAfterLast)) {
95 write[0] = _UT('\0');
96 return write;
97 }
98
99 switch (read[0]) {
100 case _UT('\0'):
101 write[0] = _UT('\0');
102 return write;
103
104 case _UT(' '):
105 if (spaceToPlus) {
106 write[0] = _UT('+');
107 write++;
108 } else {
109 write[0] = _UT('%');
110 write[1] = _UT('2');
111 write[2] = _UT('0');
112 write += 3;
113 }
114 prevWasCr = URI_FALSE;
115 break;
116
117 case _UT('a'): /* ALPHA */
118 case _UT('A'):
119 case _UT('b'):
120 case _UT('B'):
121 case _UT('c'):
122 case _UT('C'):
123 case _UT('d'):
124 case _UT('D'):
125 case _UT('e'):
126 case _UT('E'):
127 case _UT('f'):
128 case _UT('F'):
129 case _UT('g'):
130 case _UT('G'):
131 case _UT('h'):
132 case _UT('H'):
133 case _UT('i'):
134 case _UT('I'):
135 case _UT('j'):
136 case _UT('J'):
137 case _UT('k'):
138 case _UT('K'):
139 case _UT('l'):
140 case _UT('L'):
141 case _UT('m'):
142 case _UT('M'):
143 case _UT('n'):
144 case _UT('N'):
145 case _UT('o'):
146 case _UT('O'):
147 case _UT('p'):
148 case _UT('P'):
149 case _UT('q'):
150 case _UT('Q'):
151 case _UT('r'):
152 case _UT('R'):
153 case _UT('s'):
154 case _UT('S'):
155 case _UT('t'):
156 case _UT('T'):
157 case _UT('u'):
158 case _UT('U'):
159 case _UT('v'):
160 case _UT('V'):
161 case _UT('w'):
162 case _UT('W'):
163 case _UT('x'):
164 case _UT('X'):
165 case _UT('y'):
166 case _UT('Y'):
167 case _UT('z'):
168 case _UT('Z'):
169 case _UT('0'): /* DIGIT */
170 case _UT('1'):
171 case _UT('2'):
172 case _UT('3'):
173 case _UT('4'):
174 case _UT('5'):
175 case _UT('6'):
176 case _UT('7'):
177 case _UT('8'):
178 case _UT('9'):
179 case _UT('-'): /* "-" / "." / "_" / "~" */
180 case _UT('.'):
181 case _UT('_'):
182 case _UT('~'):
183 /* Copy unmodified */
184 write[0] = read[0];
185 write++;
186
187 prevWasCr = URI_FALSE;
188 break;
189
190 case _UT('\x0a'):
191 if (normalizeBreaks) {
192 if (!prevWasCr) {
193 write[0] = _UT('%');
194 write[1] = _UT('0');
195 write[2] = _UT('D');
196 write[3] = _UT('%');
197 write[4] = _UT('0');
198 write[5] = _UT('A');
199 write += 6;
200 }
201 } else {
202 write[0] = _UT('%');
203 write[1] = _UT('0');
204 write[2] = _UT('A');
205 write += 3;
206 }
207 prevWasCr = URI_FALSE;
208 break;
209
210 case _UT('\x0d'):
211 if (normalizeBreaks) {
212 write[0] = _UT('%');
213 write[1] = _UT('0');
214 write[2] = _UT('D');
215 write[3] = _UT('%');
216 write[4] = _UT('0');
217 write[5] = _UT('A');
218 write += 6;
219 } else {
220 write[0] = _UT('%');
221 write[1] = _UT('0');
222 write[2] = _UT('D');
223 write += 3;
224 }
225 prevWasCr = URI_TRUE;
226 break;
227
228 default:
229 /* Percent encode */
230 {
231 const unsigned char code = (unsigned char)read[0];
232 write[0] = _UT('%');
233 write[1] = URI_FUNC(HexToLetter)(code >> 4);
234 write[2] = URI_FUNC(HexToLetter)(code & 0x0f);
235 write += 3;
236 }
237 prevWasCr = URI_FALSE;
238 break;
239 }
240
241 read++;
242 }
243 }
244
245
246
URI_FUNC(UnescapeInPlace)247 const URI_CHAR * URI_FUNC(UnescapeInPlace)(URI_CHAR * inout) {
248 return URI_FUNC(UnescapeInPlaceEx)(inout, URI_FALSE, URI_BR_DONT_TOUCH);
249 }
250
251
252
URI_FUNC(UnescapeInPlaceEx)253 const URI_CHAR * URI_FUNC(UnescapeInPlaceEx)(URI_CHAR * inout,
254 UriBool plusToSpace, UriBreakConversion breakConversion) {
255 URI_CHAR * read = inout;
256 URI_CHAR * write = inout;
257 UriBool prevWasCr = URI_FALSE;
258
259 if (inout == NULL) {
260 return NULL;
261 }
262
263 for (;;) {
264 switch (read[0]) {
265 case _UT('\0'):
266 if (read > write) {
267 write[0] = _UT('\0');
268 }
269 return write;
270
271 case _UT('%'):
272 switch (read[1]) {
273 case _UT('0'):
274 case _UT('1'):
275 case _UT('2'):
276 case _UT('3'):
277 case _UT('4'):
278 case _UT('5'):
279 case _UT('6'):
280 case _UT('7'):
281 case _UT('8'):
282 case _UT('9'):
283 case _UT('a'):
284 case _UT('b'):
285 case _UT('c'):
286 case _UT('d'):
287 case _UT('e'):
288 case _UT('f'):
289 case _UT('A'):
290 case _UT('B'):
291 case _UT('C'):
292 case _UT('D'):
293 case _UT('E'):
294 case _UT('F'):
295 switch (read[2]) {
296 case _UT('0'):
297 case _UT('1'):
298 case _UT('2'):
299 case _UT('3'):
300 case _UT('4'):
301 case _UT('5'):
302 case _UT('6'):
303 case _UT('7'):
304 case _UT('8'):
305 case _UT('9'):
306 case _UT('a'):
307 case _UT('b'):
308 case _UT('c'):
309 case _UT('d'):
310 case _UT('e'):
311 case _UT('f'):
312 case _UT('A'):
313 case _UT('B'):
314 case _UT('C'):
315 case _UT('D'):
316 case _UT('E'):
317 case _UT('F'):
318 {
319 /* Percent group found */
320 const unsigned char left = URI_FUNC(HexdigToInt)(read[1]);
321 const unsigned char right = URI_FUNC(HexdigToInt)(read[2]);
322 const int code = 16 * left + right;
323 switch (code) {
324 case 10:
325 switch (breakConversion) {
326 case URI_BR_TO_LF:
327 if (!prevWasCr) {
328 write[0] = (URI_CHAR)10;
329 write++;
330 }
331 break;
332
333 case URI_BR_TO_CRLF:
334 if (!prevWasCr) {
335 write[0] = (URI_CHAR)13;
336 write[1] = (URI_CHAR)10;
337 write += 2;
338 }
339 break;
340
341 case URI_BR_TO_CR:
342 if (!prevWasCr) {
343 write[0] = (URI_CHAR)13;
344 write++;
345 }
346 break;
347
348 case URI_BR_DONT_TOUCH:
349 default:
350 write[0] = (URI_CHAR)10;
351 write++;
352
353 }
354 prevWasCr = URI_FALSE;
355 break;
356
357 case 13:
358 switch (breakConversion) {
359 case URI_BR_TO_LF:
360 write[0] = (URI_CHAR)10;
361 write++;
362 break;
363
364 case URI_BR_TO_CRLF:
365 write[0] = (URI_CHAR)13;
366 write[1] = (URI_CHAR)10;
367 write += 2;
368 break;
369
370 case URI_BR_TO_CR:
371 write[0] = (URI_CHAR)13;
372 write++;
373 break;
374
375 case URI_BR_DONT_TOUCH:
376 default:
377 write[0] = (URI_CHAR)13;
378 write++;
379
380 }
381 prevWasCr = URI_TRUE;
382 break;
383
384 default:
385 write[0] = (URI_CHAR)(code);
386 write++;
387
388 prevWasCr = URI_FALSE;
389
390 }
391 read += 3;
392 }
393 break;
394
395 default:
396 /* Copy two chars unmodified and */
397 /* look at this char again */
398 if (read > write) {
399 write[0] = read[0];
400 write[1] = read[1];
401 }
402 read += 2;
403 write += 2;
404
405 prevWasCr = URI_FALSE;
406 }
407 break;
408
409 default:
410 /* Copy one char unmodified and */
411 /* look at this char again */
412 if (read > write) {
413 write[0] = read[0];
414 }
415 read++;
416 write++;
417
418 prevWasCr = URI_FALSE;
419 }
420 break;
421
422 case _UT('+'):
423 if (plusToSpace) {
424 /* Convert '+' to ' ' */
425 write[0] = _UT(' ');
426 } else {
427 /* Copy one char unmodified */
428 if (read > write) {
429 write[0] = read[0];
430 }
431 }
432 read++;
433 write++;
434
435 prevWasCr = URI_FALSE;
436 break;
437
438 default:
439 /* Copy one char unmodified */
440 if (read > write) {
441 write[0] = read[0];
442 }
443 read++;
444 write++;
445
446 prevWasCr = URI_FALSE;
447 }
448 }
449 }
450
451
452
453 #endif
454