1 /*
2 * uriparser - RFC 3986 URI parsing library
3 *
4 * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5 * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * * Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * * Neither the name of the <ORGANIZATION> nor the names of its
22 * contributors may be used to endorse or promote products
23 * derived from this software without specific prior written
24 * permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37 * OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /* What encodings are enabled? */
41 #include "UriDefsConfig.h"
42 #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
43 /* Include SELF twice */
44 # ifdef URI_ENABLE_ANSI
45 # define URI_PASS_ANSI 1
46 # include "UriCommon.c"
47 # undef URI_PASS_ANSI
48 # endif
49 # ifdef URI_ENABLE_UNICODE
50 # define URI_PASS_UNICODE 1
51 # include "UriCommon.c"
52 # undef URI_PASS_UNICODE
53 # endif
54 #else
55 # ifdef URI_PASS_ANSI
56 # include "UriDefsAnsi.h"
57 # else
58 # include "UriDefsUnicode.h"
59 # include <wchar.h>
60 # endif
61
62
63
64 #ifndef URI_DOXYGEN
65 # include "Uri.h"
66 # include "UriCommon.h"
67 #endif
68
69
70
71 /*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X");
72 /*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT(".");
73 /*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT("..");
74
75
76
URI_FUNC(ResetUri)77 void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) {
78 if (uri == NULL) {
79 return;
80 }
81 memset(uri, 0, sizeof(URI_TYPE(Uri)));
82 }
83
84
85
86 /* Compares two text ranges for equal text content */
URI_FUNC(CompareRange)87 int URI_FUNC(CompareRange)(
88 const URI_TYPE(TextRange) * a,
89 const URI_TYPE(TextRange) * b) {
90 int diff;
91
92 /* NOTE: Both NULL means equal! */
93 if ((a == NULL) || (b == NULL)) {
94 return ((a == NULL) ? 0 : 1) - ((b == NULL) ? 0 : 1);
95 }
96
97 /* NOTE: Both NULL means equal! */
98 if ((a->first == NULL) || (b->first == NULL)) {
99 return ((a->first == NULL) ? 0 : 1) - ((b->first == NULL) ? 0 : 1);
100 }
101
102 diff = ((int)(a->afterLast - a->first) - (int)(b->afterLast - b->first));
103 if (diff > 0) {
104 return 1;
105 } else if (diff < 0) {
106 return -1;
107 }
108
109 diff = URI_STRNCMP(a->first, b->first, (a->afterLast - a->first));
110
111 if (diff > 0) {
112 return 1;
113 } else if (diff < 0) {
114 return -1;
115 }
116
117 return diff;
118 }
119
120
121
122 /* Properly removes "." and ".." path segments */
URI_FUNC(RemoveDotSegments)123 UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri,
124 UriBool relative, UriMemoryManager * memory) {
125 if (uri == NULL) {
126 return URI_TRUE;
127 }
128 return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner, memory);
129 }
130
131
132
URI_FUNC(RemoveDotSegmentsEx)133 UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
134 UriBool relative, UriBool pathOwned, UriMemoryManager * memory) {
135 URI_TYPE(PathSegment) * walker;
136 if ((uri == NULL) || (uri->pathHead == NULL)) {
137 return URI_TRUE;
138 }
139
140 walker = uri->pathHead;
141 walker->reserved = NULL; /* Prev pointer */
142 do {
143 UriBool removeSegment = URI_FALSE;
144 int len = (int)(walker->text.afterLast - walker->text.first);
145 switch (len) {
146 case 1:
147 if ((walker->text.first)[0] == _UT('.')) {
148 /* "." segment -> remove if not essential */
149 URI_TYPE(PathSegment) * const prev = walker->reserved;
150 URI_TYPE(PathSegment) * const nextBackup = walker->next;
151
152 /* Is this dot segment essential? */
153 removeSegment = URI_TRUE;
154 if (relative && (walker == uri->pathHead) && (walker->next != NULL)) {
155 const URI_CHAR * ch = walker->next->text.first;
156 for (; ch < walker->next->text.afterLast; ch++) {
157 if (*ch == _UT(':')) {
158 removeSegment = URI_FALSE;
159 break;
160 }
161 }
162 }
163
164 if (removeSegment) {
165 /* Last segment? */
166 if (walker->next != NULL) {
167 /* Not last segment */
168 walker->next->reserved = prev;
169
170 if (prev == NULL) {
171 /* First but not last segment */
172 uri->pathHead = walker->next;
173 } else {
174 /* Middle segment */
175 prev->next = walker->next;
176 }
177
178 if (pathOwned && (walker->text.first != walker->text.afterLast)) {
179 memory->free(memory, (URI_CHAR *)walker->text.first);
180 }
181 memory->free(memory, walker);
182 } else {
183 /* Last segment */
184 if (pathOwned && (walker->text.first != walker->text.afterLast)) {
185 memory->free(memory, (URI_CHAR *)walker->text.first);
186 }
187
188 if (prev == NULL) {
189 /* Last and first */
190 if (URI_FUNC(IsHostSet)(uri)) {
191 /* Replace "." with empty segment to represent trailing slash */
192 walker->text.first = URI_FUNC(SafeToPointTo);
193 walker->text.afterLast = URI_FUNC(SafeToPointTo);
194 } else {
195 memory->free(memory, walker);
196
197 uri->pathHead = NULL;
198 uri->pathTail = NULL;
199 }
200 } else {
201 /* Last but not first, replace "." with empty segment to represent trailing slash */
202 walker->text.first = URI_FUNC(SafeToPointTo);
203 walker->text.afterLast = URI_FUNC(SafeToPointTo);
204 }
205 }
206
207 walker = nextBackup;
208 }
209 }
210 break;
211
212 case 2:
213 if (((walker->text.first)[0] == _UT('.'))
214 && ((walker->text.first)[1] == _UT('.'))) {
215 /* Path ".." -> remove this and the previous segment */
216 URI_TYPE(PathSegment) * const prev = walker->reserved;
217 URI_TYPE(PathSegment) * prevPrev;
218 URI_TYPE(PathSegment) * const nextBackup = walker->next;
219
220 removeSegment = URI_TRUE;
221 if (relative) {
222 if (prev == NULL) {
223 removeSegment = URI_FALSE;
224 } else if ((prev != NULL)
225 && ((prev->text.afterLast - prev->text.first) == 2)
226 && ((prev->text.first)[0] == _UT('.'))
227 && ((prev->text.first)[1] == _UT('.'))) {
228 removeSegment = URI_FALSE;
229 }
230 }
231
232 if (removeSegment) {
233 if (prev != NULL) {
234 /* Not first segment */
235 prevPrev = prev->reserved;
236 if (prevPrev != NULL) {
237 /* Not even prev is the first one */
238 prevPrev->next = walker->next;
239 if (walker->next != NULL) {
240 walker->next->reserved = prevPrev;
241 } else {
242 /* Last segment -> insert "" segment to represent trailing slash, update tail */
243 URI_TYPE(PathSegment) * const segment = memory->calloc(memory, 1, sizeof(URI_TYPE(PathSegment)));
244 if (segment == NULL) {
245 if (pathOwned && (walker->text.first != walker->text.afterLast)) {
246 memory->free(memory, (URI_CHAR *)walker->text.first);
247 }
248 memory->free(memory, walker);
249
250 if (pathOwned && (prev->text.first != prev->text.afterLast)) {
251 memory->free(memory, (URI_CHAR *)prev->text.first);
252 }
253 memory->free(memory, prev);
254
255 return URI_FALSE; /* Raises malloc error */
256 }
257 segment->text.first = URI_FUNC(SafeToPointTo);
258 segment->text.afterLast = URI_FUNC(SafeToPointTo);
259 prevPrev->next = segment;
260 uri->pathTail = segment;
261 }
262
263 if (pathOwned && (walker->text.first != walker->text.afterLast)) {
264 memory->free(memory, (URI_CHAR *)walker->text.first);
265 }
266 memory->free(memory, walker);
267
268 if (pathOwned && (prev->text.first != prev->text.afterLast)) {
269 memory->free(memory, (URI_CHAR *)prev->text.first);
270 }
271 memory->free(memory, prev);
272
273 walker = nextBackup;
274 } else {
275 /* Prev is the first segment */
276 if (walker->next != NULL) {
277 uri->pathHead = walker->next;
278 walker->next->reserved = NULL;
279
280 if (pathOwned && (walker->text.first != walker->text.afterLast)) {
281 memory->free(memory, (URI_CHAR *)walker->text.first);
282 }
283 memory->free(memory, walker);
284 } else {
285 /* Re-use segment for "" path segment to represent trailing slash, update tail */
286 URI_TYPE(PathSegment) * const segment = walker;
287 if (pathOwned && (segment->text.first != segment->text.afterLast)) {
288 memory->free(memory, (URI_CHAR *)segment->text.first);
289 }
290 segment->text.first = URI_FUNC(SafeToPointTo);
291 segment->text.afterLast = URI_FUNC(SafeToPointTo);
292 uri->pathHead = segment;
293 uri->pathTail = segment;
294 }
295
296 if (pathOwned && (prev->text.first != prev->text.afterLast)) {
297 memory->free(memory, (URI_CHAR *)prev->text.first);
298 }
299 memory->free(memory, prev);
300
301 walker = nextBackup;
302 }
303 } else {
304 URI_TYPE(PathSegment) * const anotherNextBackup = walker->next;
305 /* First segment -> update head pointer */
306 uri->pathHead = walker->next;
307 if (walker->next != NULL) {
308 walker->next->reserved = NULL;
309 } else {
310 /* Last segment -> update tail */
311 uri->pathTail = NULL;
312 }
313
314 if (pathOwned && (walker->text.first != walker->text.afterLast)) {
315 memory->free(memory, (URI_CHAR *)walker->text.first);
316 }
317 memory->free(memory, walker);
318
319 walker = anotherNextBackup;
320 }
321 }
322 }
323 break;
324
325 }
326
327 if (!removeSegment) {
328 if (walker->next != NULL) {
329 walker->next->reserved = walker;
330 } else {
331 /* Last segment -> update tail */
332 uri->pathTail = walker;
333 }
334 walker = walker->next;
335 }
336 } while (walker != NULL);
337
338 return URI_TRUE;
339 }
340
341
342
343 /* Properly removes "." and ".." path segments */
URI_FUNC(RemoveDotSegmentsAbsolute)344 UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
345 UriMemoryManager * memory) {
346 const UriBool ABSOLUTE = URI_FALSE;
347 return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE, memory);
348 }
349
350
351
URI_FUNC(HexdigToInt)352 unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) {
353 switch (hexdig) {
354 case _UT('0'):
355 case _UT('1'):
356 case _UT('2'):
357 case _UT('3'):
358 case _UT('4'):
359 case _UT('5'):
360 case _UT('6'):
361 case _UT('7'):
362 case _UT('8'):
363 case _UT('9'):
364 return (unsigned char)(9 + hexdig - _UT('9'));
365
366 case _UT('a'):
367 case _UT('b'):
368 case _UT('c'):
369 case _UT('d'):
370 case _UT('e'):
371 case _UT('f'):
372 return (unsigned char)(15 + hexdig - _UT('f'));
373
374 case _UT('A'):
375 case _UT('B'):
376 case _UT('C'):
377 case _UT('D'):
378 case _UT('E'):
379 case _UT('F'):
380 return (unsigned char)(15 + hexdig - _UT('F'));
381
382 default:
383 return 0;
384 }
385 }
386
387
388
URI_FUNC(HexToLetter)389 URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) {
390 /* Uppercase recommended in section 2.1. of RFC 3986 *
391 * http://tools.ietf.org/html/rfc3986#section-2.1 */
392 return URI_FUNC(HexToLetterEx)(value, URI_TRUE);
393 }
394
395
396
URI_FUNC(HexToLetterEx)397 URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
398 switch (value) {
399 case 0: return _UT('0');
400 case 1: return _UT('1');
401 case 2: return _UT('2');
402 case 3: return _UT('3');
403 case 4: return _UT('4');
404 case 5: return _UT('5');
405 case 6: return _UT('6');
406 case 7: return _UT('7');
407 case 8: return _UT('8');
408 case 9: return _UT('9');
409
410 case 10: return (uppercase == URI_TRUE) ? _UT('A') : _UT('a');
411 case 11: return (uppercase == URI_TRUE) ? _UT('B') : _UT('b');
412 case 12: return (uppercase == URI_TRUE) ? _UT('C') : _UT('c');
413 case 13: return (uppercase == URI_TRUE) ? _UT('D') : _UT('d');
414 case 14: return (uppercase == URI_TRUE) ? _UT('E') : _UT('e');
415 default: return (uppercase == URI_TRUE) ? _UT('F') : _UT('f');
416 }
417 }
418
419
420
421 /* Checks if a URI has the host component set. */
URI_FUNC(IsHostSet)422 UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) {
423 return (uri != NULL)
424 && ((uri->hostText.first != NULL)
425 || (uri->hostData.ip4 != NULL)
426 || (uri->hostData.ip6 != NULL)
427 || (uri->hostData.ipFuture.first != NULL)
428 );
429 }
430
431
432
433 /* Copies the path segment list from one URI to another. */
URI_FUNC(CopyPath)434 UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest,
435 const URI_TYPE(Uri) * source, UriMemoryManager * memory) {
436 if (source->pathHead == NULL) {
437 /* No path component */
438 dest->pathHead = NULL;
439 dest->pathTail = NULL;
440 } else {
441 /* Copy list but not the text contained */
442 URI_TYPE(PathSegment) * sourceWalker = source->pathHead;
443 URI_TYPE(PathSegment) * destPrev = NULL;
444 do {
445 URI_TYPE(PathSegment) * cur = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
446 if (cur == NULL) {
447 /* Fix broken list */
448 if (destPrev != NULL) {
449 destPrev->next = NULL;
450 }
451 return URI_FALSE; /* Raises malloc error */
452 }
453
454 /* From this functions usage we know that *
455 * the dest URI cannot be uri->owner */
456 cur->text = sourceWalker->text;
457 if (destPrev == NULL) {
458 /* First segment ever */
459 dest->pathHead = cur;
460 } else {
461 destPrev->next = cur;
462 }
463 destPrev = cur;
464 sourceWalker = sourceWalker->next;
465 } while (sourceWalker != NULL);
466 dest->pathTail = destPrev;
467 dest->pathTail->next = NULL;
468 }
469
470 dest->absolutePath = source->absolutePath;
471 return URI_TRUE;
472 }
473
474
475
476 /* Copies the authority part of an URI over to another. */
URI_FUNC(CopyAuthority)477 UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest,
478 const URI_TYPE(Uri) * source, UriMemoryManager * memory) {
479 /* From this functions usage we know that *
480 * the dest URI cannot be uri->owner */
481
482 /* Copy userInfo */
483 dest->userInfo = source->userInfo;
484
485 /* Copy hostText */
486 dest->hostText = source->hostText;
487
488 /* Copy hostData */
489 if (source->hostData.ip4 != NULL) {
490 dest->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4));
491 if (dest->hostData.ip4 == NULL) {
492 return URI_FALSE; /* Raises malloc error */
493 }
494 *(dest->hostData.ip4) = *(source->hostData.ip4);
495 dest->hostData.ip6 = NULL;
496 dest->hostData.ipFuture.first = NULL;
497 dest->hostData.ipFuture.afterLast = NULL;
498 } else if (source->hostData.ip6 != NULL) {
499 dest->hostData.ip4 = NULL;
500 dest->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6));
501 if (dest->hostData.ip6 == NULL) {
502 return URI_FALSE; /* Raises malloc error */
503 }
504 *(dest->hostData.ip6) = *(source->hostData.ip6);
505 dest->hostData.ipFuture.first = NULL;
506 dest->hostData.ipFuture.afterLast = NULL;
507 } else {
508 dest->hostData.ip4 = NULL;
509 dest->hostData.ip6 = NULL;
510 dest->hostData.ipFuture = source->hostData.ipFuture;
511 }
512
513 /* Copy portText */
514 dest->portText = source->portText;
515
516 return URI_TRUE;
517 }
518
519
520
URI_FUNC(FixAmbiguity)521 UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri,
522 UriMemoryManager * memory) {
523 URI_TYPE(PathSegment) * segment;
524
525 if ( /* Case 1: absolute path, empty first segment */
526 (uri->absolutePath
527 && (uri->pathHead != NULL)
528 && (uri->pathHead->text.afterLast == uri->pathHead->text.first))
529
530 /* Case 2: relative path, empty first and second segment */
531 || (!uri->absolutePath
532 && (uri->pathHead != NULL)
533 && (uri->pathHead->next != NULL)
534 && (uri->pathHead->text.afterLast == uri->pathHead->text.first)
535 && (uri->pathHead->next->text.afterLast == uri->pathHead->next->text.first))) {
536 /* NOOP */
537 } else {
538 return URI_TRUE;
539 }
540
541 segment = memory->malloc(memory, 1 * sizeof(URI_TYPE(PathSegment)));
542 if (segment == NULL) {
543 return URI_FALSE; /* Raises malloc error */
544 }
545
546 /* Insert "." segment in front */
547 segment->next = uri->pathHead;
548 segment->text.first = URI_FUNC(ConstPwd);
549 segment->text.afterLast = URI_FUNC(ConstPwd) + 1;
550 uri->pathHead = segment;
551 return URI_TRUE;
552 }
553
554
555
URI_FUNC(FixEmptyTrailSegment)556 void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri,
557 UriMemoryManager * memory) {
558 /* Fix path if only one empty segment */
559 if (!uri->absolutePath
560 && !URI_FUNC(IsHostSet)(uri)
561 && (uri->pathHead != NULL)
562 && (uri->pathHead->next == NULL)
563 && (uri->pathHead->text.first == uri->pathHead->text.afterLast)) {
564 memory->free(memory, uri->pathHead);
565 uri->pathHead = NULL;
566 uri->pathTail = NULL;
567 }
568 }
569
570
571
572 #endif
573