1 /*
2  * uriparser - RFC 3986 URI parsing library
3  *
4  * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5  * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
6  * All rights reserved.
7  *
8  * Redistribution  and use in source and binary forms, with or without
9  * modification,  are permitted provided that the following conditions
10  * are met:
11  *
12  *     * Redistributions   of  source  code  must  retain  the   above
13  *       copyright  notice, this list of conditions and the  following
14  *       disclaimer.
15  *
16  *     * Redistributions  in  binary  form must  reproduce  the  above
17  *       copyright  notice, this list of conditions and the  following
18  *       disclaimer   in  the  documentation  and/or  other  materials
19  *       provided with the distribution.
20  *
21  *     * Neither  the name of the <ORGANIZATION> nor the names of  its
22  *       contributors  may  be  used to endorse  or  promote  products
23  *       derived  from  this software without specific  prior  written
24  *       permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27  * "AS  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT  NOT
28  * LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
29  * FOR  A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT  SHALL  THE
30  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31  * INCIDENTAL,    SPECIAL,   EXEMPLARY,   OR   CONSEQUENTIAL   DAMAGES
32  * (INCLUDING,  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33  * SERVICES;  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35  * STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37  * OF THE POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /* What encodings are enabled? */
41 #include "UriDefsConfig.h"
42 #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
43 /* Include SELF twice */
44 # ifdef URI_ENABLE_ANSI
45 #  define URI_PASS_ANSI 1
46 #  include "UriCommon.c"
47 #  undef URI_PASS_ANSI
48 # endif
49 # ifdef URI_ENABLE_UNICODE
50 #  define URI_PASS_UNICODE 1
51 #  include "UriCommon.c"
52 #  undef URI_PASS_UNICODE
53 # endif
54 #else
55 # ifdef URI_PASS_ANSI
56 #  include "UriDefsAnsi.h"
57 # else
58 #  include "UriDefsUnicode.h"
59 #  include <wchar.h>
60 # endif
61 
62 
63 
64 #ifndef URI_DOXYGEN
65 # include "Uri.h"
66 # include "UriCommon.h"
67 #endif
68 
69 
70 
71 /*extern*/ const URI_CHAR * const URI_FUNC(SafeToPointTo) = _UT("X");
72 /*extern*/ const URI_CHAR * const URI_FUNC(ConstPwd) = _UT(".");
73 /*extern*/ const URI_CHAR * const URI_FUNC(ConstParent) = _UT("..");
74 
75 
76 
URI_FUNC(ResetUri)77 void URI_FUNC(ResetUri)(URI_TYPE(Uri) * uri) {
78 	if (uri == NULL) {
79 		return;
80 	}
81 	memset(uri, 0, sizeof(URI_TYPE(Uri)));
82 }
83 
84 
85 
86 /* Compares two text ranges for equal text content */
URI_FUNC(CompareRange)87 int URI_FUNC(CompareRange)(
88 		const URI_TYPE(TextRange) * a,
89 		const URI_TYPE(TextRange) * b) {
90 	int diff;
91 
92 	/* NOTE: Both NULL means equal! */
93 	if ((a == NULL) || (b == NULL)) {
94 		return ((a == NULL) ? 0 : 1) - ((b == NULL) ? 0 : 1);
95 	}
96 
97 	/* NOTE: Both NULL means equal! */
98 	if ((a->first == NULL) || (b->first == NULL)) {
99 		return ((a->first == NULL) ? 0 : 1) - ((b->first == NULL) ? 0 : 1);
100 	}
101 
102 	diff = ((int)(a->afterLast - a->first) - (int)(b->afterLast - b->first));
103 	if (diff > 0) {
104 		return 1;
105 	} else if (diff < 0) {
106 		return -1;
107 	}
108 
109 	diff = URI_STRNCMP(a->first, b->first, (a->afterLast - a->first));
110 
111 	if (diff > 0) {
112 		return 1;
113 	} else if (diff < 0) {
114 		return -1;
115 	}
116 
117 	return diff;
118 }
119 
120 
121 
122 /* Properly removes "." and ".." path segments */
URI_FUNC(RemoveDotSegments)123 UriBool URI_FUNC(RemoveDotSegments)(URI_TYPE(Uri) * uri,
124 		UriBool relative, UriMemoryManager * memory) {
125 	if (uri == NULL) {
126 		return URI_TRUE;
127 	}
128 	return URI_FUNC(RemoveDotSegmentsEx)(uri, relative, uri->owner, memory);
129 }
130 
131 
132 
URI_FUNC(RemoveDotSegmentsEx)133 UriBool URI_FUNC(RemoveDotSegmentsEx)(URI_TYPE(Uri) * uri,
134 		UriBool relative, UriBool pathOwned, UriMemoryManager * memory) {
135 	URI_TYPE(PathSegment) * walker;
136 	if ((uri == NULL) || (uri->pathHead == NULL)) {
137 		return URI_TRUE;
138 	}
139 
140 	walker = uri->pathHead;
141 	walker->reserved = NULL; /* Prev pointer */
142 	do {
143 		UriBool removeSegment = URI_FALSE;
144 		int len = (int)(walker->text.afterLast - walker->text.first);
145 		switch (len) {
146 		case 1:
147 			if ((walker->text.first)[0] == _UT('.')) {
148 				/* "." segment -> remove if not essential */
149 				URI_TYPE(PathSegment) * const prev = walker->reserved;
150 				URI_TYPE(PathSegment) * const nextBackup = walker->next;
151 
152 				/* Is this dot segment essential? */
153 				removeSegment = URI_TRUE;
154 				if (relative && (walker == uri->pathHead) && (walker->next != NULL)) {
155 					const URI_CHAR * ch = walker->next->text.first;
156 					for (; ch < walker->next->text.afterLast; ch++) {
157 						if (*ch == _UT(':')) {
158 							removeSegment = URI_FALSE;
159 							break;
160 						}
161 					}
162 				}
163 
164 				if (removeSegment) {
165 					/* Last segment? */
166 					if (walker->next != NULL) {
167 						/* Not last segment */
168 						walker->next->reserved = prev;
169 
170 						if (prev == NULL) {
171 							/* First but not last segment */
172 							uri->pathHead = walker->next;
173 						} else {
174 							/* Middle segment */
175 							prev->next = walker->next;
176 						}
177 
178 						if (pathOwned && (walker->text.first != walker->text.afterLast)) {
179 							memory->free(memory, (URI_CHAR *)walker->text.first);
180 						}
181 						memory->free(memory, walker);
182 					} else {
183 						/* Last segment */
184 						if (pathOwned && (walker->text.first != walker->text.afterLast)) {
185 							memory->free(memory, (URI_CHAR *)walker->text.first);
186 						}
187 
188 						if (prev == NULL) {
189 							/* Last and first */
190 							if (URI_FUNC(IsHostSet)(uri)) {
191 								/* Replace "." with empty segment to represent trailing slash */
192 								walker->text.first = URI_FUNC(SafeToPointTo);
193 								walker->text.afterLast = URI_FUNC(SafeToPointTo);
194 							} else {
195 								memory->free(memory, walker);
196 
197 								uri->pathHead = NULL;
198 								uri->pathTail = NULL;
199 							}
200 						} else {
201 							/* Last but not first, replace "." with empty segment to represent trailing slash */
202 							walker->text.first = URI_FUNC(SafeToPointTo);
203 							walker->text.afterLast = URI_FUNC(SafeToPointTo);
204 						}
205 					}
206 
207 					walker = nextBackup;
208 				}
209 			}
210 			break;
211 
212 		case 2:
213 			if (((walker->text.first)[0] == _UT('.'))
214 					&& ((walker->text.first)[1] == _UT('.'))) {
215 				/* Path ".." -> remove this and the previous segment */
216 				URI_TYPE(PathSegment) * const prev = walker->reserved;
217 				URI_TYPE(PathSegment) * prevPrev;
218 				URI_TYPE(PathSegment) * const nextBackup = walker->next;
219 
220 				removeSegment = URI_TRUE;
221 				if (relative) {
222 					if (prev == NULL) {
223 						removeSegment = URI_FALSE;
224 					} else if ((prev != NULL)
225 							&& ((prev->text.afterLast - prev->text.first) == 2)
226 							&& ((prev->text.first)[0] == _UT('.'))
227 							&& ((prev->text.first)[1] == _UT('.'))) {
228 						removeSegment = URI_FALSE;
229 					}
230 				}
231 
232 				if (removeSegment) {
233 					if (prev != NULL) {
234 						/* Not first segment */
235 						prevPrev = prev->reserved;
236 						if (prevPrev != NULL) {
237 							/* Not even prev is the first one */
238 							prevPrev->next = walker->next;
239 							if (walker->next != NULL) {
240 								walker->next->reserved = prevPrev;
241 							} else {
242 								/* Last segment -> insert "" segment to represent trailing slash, update tail */
243 								URI_TYPE(PathSegment) * const segment = memory->calloc(memory, 1, sizeof(URI_TYPE(PathSegment)));
244 								if (segment == NULL) {
245 									if (pathOwned && (walker->text.first != walker->text.afterLast)) {
246 										memory->free(memory, (URI_CHAR *)walker->text.first);
247 									}
248 									memory->free(memory, walker);
249 
250 									if (pathOwned && (prev->text.first != prev->text.afterLast)) {
251 										memory->free(memory, (URI_CHAR *)prev->text.first);
252 									}
253 									memory->free(memory, prev);
254 
255 									return URI_FALSE; /* Raises malloc error */
256 								}
257 								segment->text.first = URI_FUNC(SafeToPointTo);
258 								segment->text.afterLast = URI_FUNC(SafeToPointTo);
259 								prevPrev->next = segment;
260 								uri->pathTail = segment;
261 							}
262 
263 							if (pathOwned && (walker->text.first != walker->text.afterLast)) {
264 								memory->free(memory, (URI_CHAR *)walker->text.first);
265 							}
266 							memory->free(memory, walker);
267 
268 							if (pathOwned && (prev->text.first != prev->text.afterLast)) {
269 								memory->free(memory, (URI_CHAR *)prev->text.first);
270 							}
271 							memory->free(memory, prev);
272 
273 							walker = nextBackup;
274 						} else {
275 							/* Prev is the first segment */
276 							if (walker->next != NULL) {
277 								uri->pathHead = walker->next;
278 								walker->next->reserved = NULL;
279 
280 								if (pathOwned && (walker->text.first != walker->text.afterLast)) {
281 									memory->free(memory, (URI_CHAR *)walker->text.first);
282 								}
283 								memory->free(memory, walker);
284 							} else {
285 								/* Re-use segment for "" path segment to represent trailing slash, update tail */
286 								URI_TYPE(PathSegment) * const segment = walker;
287 								if (pathOwned && (segment->text.first != segment->text.afterLast)) {
288 									memory->free(memory, (URI_CHAR *)segment->text.first);
289 								}
290 								segment->text.first = URI_FUNC(SafeToPointTo);
291 								segment->text.afterLast = URI_FUNC(SafeToPointTo);
292 								uri->pathHead = segment;
293 								uri->pathTail = segment;
294 							}
295 
296 							if (pathOwned && (prev->text.first != prev->text.afterLast)) {
297 								memory->free(memory, (URI_CHAR *)prev->text.first);
298 							}
299 							memory->free(memory, prev);
300 
301 							walker = nextBackup;
302 						}
303 					} else {
304 						URI_TYPE(PathSegment) * const anotherNextBackup = walker->next;
305 						/* First segment -> update head pointer */
306 						uri->pathHead = walker->next;
307 						if (walker->next != NULL) {
308 							walker->next->reserved = NULL;
309 						} else {
310 							/* Last segment -> update tail */
311 							uri->pathTail = NULL;
312 						}
313 
314 						if (pathOwned && (walker->text.first != walker->text.afterLast)) {
315 							memory->free(memory, (URI_CHAR *)walker->text.first);
316 						}
317 						memory->free(memory, walker);
318 
319 						walker = anotherNextBackup;
320 					}
321 				}
322 			}
323 			break;
324 
325 		}
326 
327 		if (!removeSegment) {
328 			if (walker->next != NULL) {
329 				walker->next->reserved = walker;
330 			} else {
331 				/* Last segment -> update tail */
332 				uri->pathTail = walker;
333 			}
334 			walker = walker->next;
335 		}
336 	} while (walker != NULL);
337 
338 	return URI_TRUE;
339 }
340 
341 
342 
343 /* Properly removes "." and ".." path segments */
URI_FUNC(RemoveDotSegmentsAbsolute)344 UriBool URI_FUNC(RemoveDotSegmentsAbsolute)(URI_TYPE(Uri) * uri,
345 		UriMemoryManager * memory) {
346 	const UriBool ABSOLUTE = URI_FALSE;
347 	return URI_FUNC(RemoveDotSegments)(uri, ABSOLUTE, memory);
348 }
349 
350 
351 
URI_FUNC(HexdigToInt)352 unsigned char URI_FUNC(HexdigToInt)(URI_CHAR hexdig) {
353 	switch (hexdig) {
354 	case _UT('0'):
355 	case _UT('1'):
356 	case _UT('2'):
357 	case _UT('3'):
358 	case _UT('4'):
359 	case _UT('5'):
360 	case _UT('6'):
361 	case _UT('7'):
362 	case _UT('8'):
363 	case _UT('9'):
364 		return (unsigned char)(9 + hexdig - _UT('9'));
365 
366 	case _UT('a'):
367 	case _UT('b'):
368 	case _UT('c'):
369 	case _UT('d'):
370 	case _UT('e'):
371 	case _UT('f'):
372 		return (unsigned char)(15 + hexdig - _UT('f'));
373 
374 	case _UT('A'):
375 	case _UT('B'):
376 	case _UT('C'):
377 	case _UT('D'):
378 	case _UT('E'):
379 	case _UT('F'):
380 		return (unsigned char)(15 + hexdig - _UT('F'));
381 
382 	default:
383 		return 0;
384 	}
385 }
386 
387 
388 
URI_FUNC(HexToLetter)389 URI_CHAR URI_FUNC(HexToLetter)(unsigned int value) {
390 	/* Uppercase recommended in section 2.1. of RFC 3986 *
391 	 * http://tools.ietf.org/html/rfc3986#section-2.1    */
392 	return URI_FUNC(HexToLetterEx)(value, URI_TRUE);
393 }
394 
395 
396 
URI_FUNC(HexToLetterEx)397 URI_CHAR URI_FUNC(HexToLetterEx)(unsigned int value, UriBool uppercase) {
398 	switch (value) {
399 	case  0: return _UT('0');
400 	case  1: return _UT('1');
401 	case  2: return _UT('2');
402 	case  3: return _UT('3');
403 	case  4: return _UT('4');
404 	case  5: return _UT('5');
405 	case  6: return _UT('6');
406 	case  7: return _UT('7');
407 	case  8: return _UT('8');
408 	case  9: return _UT('9');
409 
410 	case 10: return (uppercase == URI_TRUE) ? _UT('A') : _UT('a');
411 	case 11: return (uppercase == URI_TRUE) ? _UT('B') : _UT('b');
412 	case 12: return (uppercase == URI_TRUE) ? _UT('C') : _UT('c');
413 	case 13: return (uppercase == URI_TRUE) ? _UT('D') : _UT('d');
414 	case 14: return (uppercase == URI_TRUE) ? _UT('E') : _UT('e');
415 	default: return (uppercase == URI_TRUE) ? _UT('F') : _UT('f');
416 	}
417 }
418 
419 
420 
421 /* Checks if a URI has the host component set. */
URI_FUNC(IsHostSet)422 UriBool URI_FUNC(IsHostSet)(const URI_TYPE(Uri) * uri) {
423 	return (uri != NULL)
424 			&& ((uri->hostText.first != NULL)
425 				|| (uri->hostData.ip4 != NULL)
426 				|| (uri->hostData.ip6 != NULL)
427 				|| (uri->hostData.ipFuture.first != NULL)
428 			);
429 }
430 
431 
432 
433 /* Copies the path segment list from one URI to another. */
URI_FUNC(CopyPath)434 UriBool URI_FUNC(CopyPath)(URI_TYPE(Uri) * dest,
435 		const URI_TYPE(Uri) * source, UriMemoryManager * memory) {
436 	if (source->pathHead == NULL) {
437 		/* No path component */
438 		dest->pathHead = NULL;
439 		dest->pathTail = NULL;
440 	} else {
441 		/* Copy list but not the text contained */
442 		URI_TYPE(PathSegment) * sourceWalker = source->pathHead;
443 		URI_TYPE(PathSegment) * destPrev = NULL;
444 		do {
445 			URI_TYPE(PathSegment) * cur = memory->malloc(memory, sizeof(URI_TYPE(PathSegment)));
446 			if (cur == NULL) {
447 				/* Fix broken list */
448 				if (destPrev != NULL) {
449 					destPrev->next = NULL;
450 				}
451 				return URI_FALSE; /* Raises malloc error */
452 			}
453 
454 			/* From this functions usage we know that *
455 			 * the dest URI cannot be uri->owner      */
456 			cur->text = sourceWalker->text;
457 			if (destPrev == NULL) {
458 				/* First segment ever */
459 				dest->pathHead = cur;
460 			} else {
461 				destPrev->next = cur;
462 			}
463 			destPrev = cur;
464 			sourceWalker = sourceWalker->next;
465 		} while (sourceWalker != NULL);
466 		dest->pathTail = destPrev;
467 		dest->pathTail->next = NULL;
468 	}
469 
470 	dest->absolutePath = source->absolutePath;
471 	return URI_TRUE;
472 }
473 
474 
475 
476 /* Copies the authority part of an URI over to another. */
URI_FUNC(CopyAuthority)477 UriBool URI_FUNC(CopyAuthority)(URI_TYPE(Uri) * dest,
478 		const URI_TYPE(Uri) * source, UriMemoryManager * memory) {
479 	/* From this functions usage we know that *
480 	 * the dest URI cannot be uri->owner      */
481 
482 	/* Copy userInfo */
483 	dest->userInfo = source->userInfo;
484 
485 	/* Copy hostText */
486 	dest->hostText = source->hostText;
487 
488 	/* Copy hostData */
489 	if (source->hostData.ip4 != NULL) {
490 		dest->hostData.ip4 = memory->malloc(memory, sizeof(UriIp4));
491 		if (dest->hostData.ip4 == NULL) {
492 			return URI_FALSE; /* Raises malloc error */
493 		}
494 		*(dest->hostData.ip4) = *(source->hostData.ip4);
495 		dest->hostData.ip6 = NULL;
496 		dest->hostData.ipFuture.first = NULL;
497 		dest->hostData.ipFuture.afterLast = NULL;
498 	} else if (source->hostData.ip6 != NULL) {
499 		dest->hostData.ip4 = NULL;
500 		dest->hostData.ip6 = memory->malloc(memory, sizeof(UriIp6));
501 		if (dest->hostData.ip6 == NULL) {
502 			return URI_FALSE; /* Raises malloc error */
503 		}
504 		*(dest->hostData.ip6) = *(source->hostData.ip6);
505 		dest->hostData.ipFuture.first = NULL;
506 		dest->hostData.ipFuture.afterLast = NULL;
507 	} else {
508 		dest->hostData.ip4 = NULL;
509 		dest->hostData.ip6 = NULL;
510 		dest->hostData.ipFuture = source->hostData.ipFuture;
511 	}
512 
513 	/* Copy portText */
514 	dest->portText = source->portText;
515 
516 	return URI_TRUE;
517 }
518 
519 
520 
URI_FUNC(FixAmbiguity)521 UriBool URI_FUNC(FixAmbiguity)(URI_TYPE(Uri) * uri,
522 		UriMemoryManager * memory) {
523 	URI_TYPE(PathSegment) * segment;
524 
525 	if (	/* Case 1: absolute path, empty first segment */
526 			(uri->absolutePath
527 			&& (uri->pathHead != NULL)
528 			&& (uri->pathHead->text.afterLast == uri->pathHead->text.first))
529 
530 			/* Case 2: relative path, empty first and second segment */
531 			|| (!uri->absolutePath
532 			&& (uri->pathHead != NULL)
533 			&& (uri->pathHead->next != NULL)
534 			&& (uri->pathHead->text.afterLast == uri->pathHead->text.first)
535 			&& (uri->pathHead->next->text.afterLast == uri->pathHead->next->text.first))) {
536 		/* NOOP */
537 	} else {
538 		return URI_TRUE;
539 	}
540 
541 	segment = memory->malloc(memory, 1 * sizeof(URI_TYPE(PathSegment)));
542 	if (segment == NULL) {
543 		return URI_FALSE; /* Raises malloc error */
544 	}
545 
546 	/* Insert "." segment in front */
547 	segment->next = uri->pathHead;
548 	segment->text.first = URI_FUNC(ConstPwd);
549 	segment->text.afterLast = URI_FUNC(ConstPwd) + 1;
550 	uri->pathHead = segment;
551 	return URI_TRUE;
552 }
553 
554 
555 
URI_FUNC(FixEmptyTrailSegment)556 void URI_FUNC(FixEmptyTrailSegment)(URI_TYPE(Uri) * uri,
557 		UriMemoryManager * memory) {
558 	/* Fix path if only one empty segment */
559 	if (!uri->absolutePath
560 			&& !URI_FUNC(IsHostSet)(uri)
561 			&& (uri->pathHead != NULL)
562 			&& (uri->pathHead->next == NULL)
563 			&& (uri->pathHead->text.first == uri->pathHead->text.afterLast)) {
564 		memory->free(memory, uri->pathHead);
565 		uri->pathHead = NULL;
566 		uri->pathTail = NULL;
567 	}
568 }
569 
570 
571 
572 #endif
573