1 /*
2 * uriparser - RFC 3986 URI parsing library
3 *
4 * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5 * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * * Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * * Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * * Neither the name of the <ORGANIZATION> nor the names of its
22 * contributors may be used to endorse or promote products
23 * derived from this software without specific prior written
24 * permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
30 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
31 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37 * OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /**
41 * @file UriNormalize.c
42 * Holds the RFC 3986 %URI normalization implementation.
43 * NOTE: This source file includes itself twice.
44 */
45
46 /* What encodings are enabled? */
47 #include "UriDefsConfig.h"
48 #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
49 /* Include SELF twice */
50 # ifdef URI_ENABLE_ANSI
51 # define URI_PASS_ANSI 1
52 # include "UriNormalize.c"
53 # undef URI_PASS_ANSI
54 # endif
55 # ifdef URI_ENABLE_UNICODE
56 # define URI_PASS_UNICODE 1
57 # include "UriNormalize.c"
58 # undef URI_PASS_UNICODE
59 # endif
60 #else
61 # ifdef URI_PASS_ANSI
62 # include "UriDefsAnsi.h"
63 # else
64 # include "UriDefsUnicode.h"
65 # include <wchar.h>
66 # endif
67
68
69
70 #ifndef URI_DOXYGEN
71 # include "Uri.h"
72 # include "UriNormalizeBase.h"
73 # include "UriCommon.h"
74 # include "UriMemory.h"
75 #endif
76
77
78
79 #include <assert.h>
80
81
82
83 static int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri, unsigned int inMask,
84 unsigned int * outMask, UriMemoryManager * memory);
85
86 static UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
87 unsigned int maskTest, URI_TYPE(TextRange) * range,
88 UriMemoryManager * memory);
89 static UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri,
90 unsigned int * doneMask, UriMemoryManager * memory);
91
92 static void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first,
93 const URI_CHAR ** afterLast);
94 static UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first,
95 const URI_CHAR ** afterLast, UriMemoryManager * memory);
96 static void URI_FUNC(FixPercentEncodingEngine)(
97 const URI_CHAR * inFirst, const URI_CHAR * inAfterLast,
98 const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast);
99
100 static UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first,
101 const URI_CHAR * afterLast);
102 static UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first,
103 const URI_CHAR * afterLast);
104
105 static void URI_FUNC(LowercaseInplace)(const URI_CHAR * first,
106 const URI_CHAR * afterLast);
107 static UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first,
108 const URI_CHAR ** afterLast, UriMemoryManager * memory);
109
110 static void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
111 unsigned int revertMask, UriMemoryManager * memory);
112
113
114
URI_FUNC(PreventLeakage)115 static URI_INLINE void URI_FUNC(PreventLeakage)(URI_TYPE(Uri) * uri,
116 unsigned int revertMask, UriMemoryManager * memory) {
117 if (revertMask & URI_NORMALIZE_SCHEME) {
118 memory->free(memory, (URI_CHAR *)uri->scheme.first);
119 uri->scheme.first = NULL;
120 uri->scheme.afterLast = NULL;
121 }
122
123 if (revertMask & URI_NORMALIZE_USER_INFO) {
124 memory->free(memory, (URI_CHAR *)uri->userInfo.first);
125 uri->userInfo.first = NULL;
126 uri->userInfo.afterLast = NULL;
127 }
128
129 if (revertMask & URI_NORMALIZE_HOST) {
130 if (uri->hostData.ipFuture.first != NULL) {
131 /* IPvFuture */
132 memory->free(memory, (URI_CHAR *)uri->hostData.ipFuture.first);
133 uri->hostData.ipFuture.first = NULL;
134 uri->hostData.ipFuture.afterLast = NULL;
135 uri->hostText.first = NULL;
136 uri->hostText.afterLast = NULL;
137 } else if ((uri->hostText.first != NULL)
138 && (uri->hostData.ip4 == NULL)
139 && (uri->hostData.ip6 == NULL)) {
140 /* Regname */
141 memory->free(memory, (URI_CHAR *)uri->hostText.first);
142 uri->hostText.first = NULL;
143 uri->hostText.afterLast = NULL;
144 }
145 }
146
147 /* NOTE: Port cannot happen! */
148
149 if (revertMask & URI_NORMALIZE_PATH) {
150 URI_TYPE(PathSegment) * walker = uri->pathHead;
151 while (walker != NULL) {
152 URI_TYPE(PathSegment) * const next = walker->next;
153 if (walker->text.afterLast > walker->text.first) {
154 memory->free(memory, (URI_CHAR *)walker->text.first);
155 }
156 memory->free(memory, walker);
157 walker = next;
158 }
159 uri->pathHead = NULL;
160 uri->pathTail = NULL;
161 }
162
163 if (revertMask & URI_NORMALIZE_QUERY) {
164 memory->free(memory, (URI_CHAR *)uri->query.first);
165 uri->query.first = NULL;
166 uri->query.afterLast = NULL;
167 }
168
169 if (revertMask & URI_NORMALIZE_FRAGMENT) {
170 memory->free(memory, (URI_CHAR *)uri->fragment.first);
171 uri->fragment.first = NULL;
172 uri->fragment.afterLast = NULL;
173 }
174 }
175
176
177
URI_FUNC(ContainsUppercaseLetters)178 static URI_INLINE UriBool URI_FUNC(ContainsUppercaseLetters)(const URI_CHAR * first,
179 const URI_CHAR * afterLast) {
180 if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) {
181 const URI_CHAR * i = first;
182 for (; i < afterLast; i++) {
183 /* 6.2.2.1 Case Normalization: uppercase letters in scheme or host */
184 if ((*i >= _UT('A')) && (*i <= _UT('Z'))) {
185 return URI_TRUE;
186 }
187 }
188 }
189 return URI_FALSE;
190 }
191
192
193
URI_FUNC(ContainsUglyPercentEncoding)194 static URI_INLINE UriBool URI_FUNC(ContainsUglyPercentEncoding)(const URI_CHAR * first,
195 const URI_CHAR * afterLast) {
196 if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) {
197 const URI_CHAR * i = first;
198 for (; i + 2 < afterLast; i++) {
199 if (i[0] == _UT('%')) {
200 /* 6.2.2.1 Case Normalization: *
201 * lowercase percent-encodings */
202 if (((i[1] >= _UT('a')) && (i[1] <= _UT('f')))
203 || ((i[2] >= _UT('a')) && (i[2] <= _UT('f')))) {
204 return URI_TRUE;
205 } else {
206 /* 6.2.2.2 Percent-Encoding Normalization: *
207 * percent-encoded unreserved characters */
208 const unsigned char left = URI_FUNC(HexdigToInt)(i[1]);
209 const unsigned char right = URI_FUNC(HexdigToInt)(i[2]);
210 const int code = 16 * left + right;
211 if (uriIsUnreserved(code)) {
212 return URI_TRUE;
213 }
214 }
215 }
216 }
217 }
218 return URI_FALSE;
219 }
220
221
222
URI_FUNC(LowercaseInplace)223 static URI_INLINE void URI_FUNC(LowercaseInplace)(const URI_CHAR * first,
224 const URI_CHAR * afterLast) {
225 if ((first != NULL) && (afterLast != NULL) && (afterLast > first)) {
226 URI_CHAR * i = (URI_CHAR *)first;
227 const int lowerUpperDiff = (_UT('a') - _UT('A'));
228 for (; i < afterLast; i++) {
229 if ((*i >= _UT('A')) && (*i <=_UT('Z'))) {
230 *i = (URI_CHAR)(*i + lowerUpperDiff);
231 }
232 }
233 }
234 }
235
236
237
URI_FUNC(LowercaseMalloc)238 static URI_INLINE UriBool URI_FUNC(LowercaseMalloc)(const URI_CHAR ** first,
239 const URI_CHAR ** afterLast, UriMemoryManager * memory) {
240 int lenInChars;
241 const int lowerUpperDiff = (_UT('a') - _UT('A'));
242 URI_CHAR * buffer;
243 int i = 0;
244
245 if ((first == NULL) || (afterLast == NULL) || (*first == NULL)
246 || (*afterLast == NULL)) {
247 return URI_FALSE;
248 }
249
250 lenInChars = (int)(*afterLast - *first);
251 if (lenInChars == 0) {
252 return URI_TRUE;
253 } else if (lenInChars < 0) {
254 return URI_FALSE;
255 }
256
257 buffer = memory->malloc(memory, lenInChars * sizeof(URI_CHAR));
258 if (buffer == NULL) {
259 return URI_FALSE;
260 }
261
262 for (; i < lenInChars; i++) {
263 if (((*first)[i] >= _UT('A')) && ((*first)[i] <=_UT('Z'))) {
264 buffer[i] = (URI_CHAR)((*first)[i] + lowerUpperDiff);
265 } else {
266 buffer[i] = (*first)[i];
267 }
268 }
269
270 *first = buffer;
271 *afterLast = buffer + lenInChars;
272 return URI_TRUE;
273 }
274
275
276
277 /* NOTE: Implementation must stay inplace-compatible */
URI_FUNC(FixPercentEncodingEngine)278 static URI_INLINE void URI_FUNC(FixPercentEncodingEngine)(
279 const URI_CHAR * inFirst, const URI_CHAR * inAfterLast,
280 const URI_CHAR * outFirst, const URI_CHAR ** outAfterLast) {
281 URI_CHAR * write = (URI_CHAR *)outFirst;
282 const int lenInChars = (int)(inAfterLast - inFirst);
283 int i = 0;
284
285 /* All but last two */
286 for (; i + 2 < lenInChars; i++) {
287 if (inFirst[i] != _UT('%')) {
288 write[0] = inFirst[i];
289 write++;
290 } else {
291 /* 6.2.2.2 Percent-Encoding Normalization: *
292 * percent-encoded unreserved characters */
293 const URI_CHAR one = inFirst[i + 1];
294 const URI_CHAR two = inFirst[i + 2];
295 const unsigned char left = URI_FUNC(HexdigToInt)(one);
296 const unsigned char right = URI_FUNC(HexdigToInt)(two);
297 const int code = 16 * left + right;
298 if (uriIsUnreserved(code)) {
299 write[0] = (URI_CHAR)(code);
300 write++;
301 } else {
302 /* 6.2.2.1 Case Normalization: *
303 * lowercase percent-encodings */
304 write[0] = _UT('%');
305 write[1] = URI_FUNC(HexToLetter)(left);
306 write[2] = URI_FUNC(HexToLetter)(right);
307 write += 3;
308 }
309
310 i += 2; /* For the two chars of the percent group we just ate */
311 }
312 }
313
314 /* Last two */
315 for (; i < lenInChars; i++) {
316 write[0] = inFirst[i];
317 write++;
318 }
319
320 *outAfterLast = write;
321 }
322
323
324
URI_FUNC(FixPercentEncodingInplace)325 static URI_INLINE void URI_FUNC(FixPercentEncodingInplace)(const URI_CHAR * first,
326 const URI_CHAR ** afterLast) {
327 /* Death checks */
328 if ((first == NULL) || (afterLast == NULL) || (*afterLast == NULL)) {
329 return;
330 }
331
332 /* Fix inplace */
333 URI_FUNC(FixPercentEncodingEngine)(first, *afterLast, first, afterLast);
334 }
335
336
337
URI_FUNC(FixPercentEncodingMalloc)338 static URI_INLINE UriBool URI_FUNC(FixPercentEncodingMalloc)(const URI_CHAR ** first,
339 const URI_CHAR ** afterLast, UriMemoryManager * memory) {
340 int lenInChars;
341 URI_CHAR * buffer;
342
343 /* Death checks */
344 if ((first == NULL) || (afterLast == NULL)
345 || (*first == NULL) || (*afterLast == NULL)) {
346 return URI_FALSE;
347 }
348
349 /* Old text length */
350 lenInChars = (int)(*afterLast - *first);
351 if (lenInChars == 0) {
352 return URI_TRUE;
353 } else if (lenInChars < 0) {
354 return URI_FALSE;
355 }
356
357 /* New buffer */
358 buffer = memory->malloc(memory, lenInChars * sizeof(URI_CHAR));
359 if (buffer == NULL) {
360 return URI_FALSE;
361 }
362
363 /* Fix on copy */
364 URI_FUNC(FixPercentEncodingEngine)(*first, *afterLast, buffer, afterLast);
365 *first = buffer;
366 return URI_TRUE;
367 }
368
369
370
URI_FUNC(MakeRangeOwner)371 static URI_INLINE UriBool URI_FUNC(MakeRangeOwner)(unsigned int * doneMask,
372 unsigned int maskTest, URI_TYPE(TextRange) * range,
373 UriMemoryManager * memory) {
374 if (((*doneMask & maskTest) == 0)
375 && (range->first != NULL)
376 && (range->afterLast != NULL)
377 && (range->afterLast > range->first)) {
378 const int lenInChars = (int)(range->afterLast - range->first);
379 const int lenInBytes = lenInChars * sizeof(URI_CHAR);
380 URI_CHAR * dup = memory->malloc(memory, lenInBytes);
381 if (dup == NULL) {
382 return URI_FALSE; /* Raises malloc error */
383 }
384 memcpy(dup, range->first, lenInBytes);
385 range->first = dup;
386 range->afterLast = dup + lenInChars;
387 *doneMask |= maskTest;
388 }
389 return URI_TRUE;
390 }
391
392
393
URI_FUNC(MakeOwner)394 static URI_INLINE UriBool URI_FUNC(MakeOwner)(URI_TYPE(Uri) * uri,
395 unsigned int * doneMask, UriMemoryManager * memory) {
396 URI_TYPE(PathSegment) * walker = uri->pathHead;
397 if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_SCHEME,
398 &(uri->scheme), memory)
399 || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_USER_INFO,
400 &(uri->userInfo), memory)
401 || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_QUERY,
402 &(uri->query), memory)
403 || !URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_FRAGMENT,
404 &(uri->fragment), memory)) {
405 return URI_FALSE; /* Raises malloc error */
406 }
407
408 /* Host */
409 if ((*doneMask & URI_NORMALIZE_HOST) == 0) {
410 if ((uri->hostData.ip4 == NULL)
411 && (uri->hostData.ip6 == NULL)) {
412 if (uri->hostData.ipFuture.first != NULL) {
413 /* IPvFuture */
414 if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST,
415 &(uri->hostData.ipFuture), memory)) {
416 return URI_FALSE; /* Raises malloc error */
417 }
418 uri->hostText.first = uri->hostData.ipFuture.first;
419 uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
420 } else if (uri->hostText.first != NULL) {
421 /* Regname */
422 if (!URI_FUNC(MakeRangeOwner)(doneMask, URI_NORMALIZE_HOST,
423 &(uri->hostText), memory)) {
424 return URI_FALSE; /* Raises malloc error */
425 }
426 }
427 }
428 }
429
430 /* Path */
431 if ((*doneMask & URI_NORMALIZE_PATH) == 0) {
432 while (walker != NULL) {
433 if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(walker->text), memory)) {
434 /* Free allocations done so far and kill path */
435
436 /* Kill path to one before walker (if any) */
437 URI_TYPE(PathSegment) * ranger = uri->pathHead;
438 while (ranger != walker) {
439 URI_TYPE(PathSegment) * const next = ranger->next;
440 if ((ranger->text.first != NULL)
441 && (ranger->text.afterLast != NULL)
442 && (ranger->text.afterLast > ranger->text.first)) {
443 memory->free(memory, (URI_CHAR *)ranger->text.first);
444 }
445 memory->free(memory, ranger);
446 ranger = next;
447 }
448
449 /* Kill path from walker */
450 while (walker != NULL) {
451 URI_TYPE(PathSegment) * const next = walker->next;
452 memory->free(memory, walker);
453 walker = next;
454 }
455
456 uri->pathHead = NULL;
457 uri->pathTail = NULL;
458 return URI_FALSE; /* Raises malloc error */
459 }
460 walker = walker->next;
461 }
462 *doneMask |= URI_NORMALIZE_PATH;
463 }
464
465 /* Port text, must come last so we don't have to undo that one if it fails. *
466 * Otherwise we would need and extra enum flag for it although the port *
467 * cannot go unnormalized... */
468 if (!URI_FUNC(MakeRangeOwner)(doneMask, 0, &(uri->portText), memory)) {
469 return URI_FALSE; /* Raises malloc error */
470 }
471
472 return URI_TRUE;
473 }
474
475
476
URI_FUNC(NormalizeSyntaxMaskRequired)477 unsigned int URI_FUNC(NormalizeSyntaxMaskRequired)(const URI_TYPE(Uri) * uri) {
478 unsigned int outMask = URI_NORMALIZED; /* for NULL uri */
479 URI_FUNC(NormalizeSyntaxMaskRequiredEx)(uri, &outMask);
480 return outMask;
481 }
482
483
484
URI_FUNC(NormalizeSyntaxMaskRequiredEx)485 int URI_FUNC(NormalizeSyntaxMaskRequiredEx)(const URI_TYPE(Uri) * uri,
486 unsigned int * outMask) {
487 UriMemoryManager * const memory = NULL; /* no use of memory manager */
488
489 #if defined(__GNUC__) && ((__GNUC__ > 4) \
490 || ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2)))
491 /* Slower code that fixes a warning, not sure if this is a smart idea */
492 URI_TYPE(Uri) writeableClone;
493 #endif
494
495 if ((uri == NULL) || (outMask == NULL)) {
496 return URI_ERROR_NULL;
497 }
498
499 #if defined(__GNUC__) && ((__GNUC__ > 4) \
500 || ((__GNUC__ == 4) && defined(__GNUC_MINOR__) && (__GNUC_MINOR__ >= 2)))
501 /* Slower code that fixes a warning, not sure if this is a smart idea */
502 memcpy(&writeableClone, uri, 1 * sizeof(URI_TYPE(Uri)));
503 URI_FUNC(NormalizeSyntaxEngine)(&writeableClone, 0, outMask, memory);
504 #else
505 URI_FUNC(NormalizeSyntaxEngine)((URI_TYPE(Uri) *)uri, 0, outMask, memory);
506 #endif
507 return URI_SUCCESS;
508 }
509
510
511
URI_FUNC(NormalizeSyntaxEx)512 int URI_FUNC(NormalizeSyntaxEx)(URI_TYPE(Uri) * uri, unsigned int mask) {
513 return URI_FUNC(NormalizeSyntaxExMm)(uri, mask, NULL);
514 }
515
516
517
URI_FUNC(NormalizeSyntaxExMm)518 int URI_FUNC(NormalizeSyntaxExMm)(URI_TYPE(Uri) * uri, unsigned int mask,
519 UriMemoryManager * memory) {
520 URI_CHECK_MEMORY_MANAGER(memory); /* may return */
521 return URI_FUNC(NormalizeSyntaxEngine)(uri, mask, NULL, memory);
522 }
523
524
525
URI_FUNC(NormalizeSyntax)526 int URI_FUNC(NormalizeSyntax)(URI_TYPE(Uri) * uri) {
527 return URI_FUNC(NormalizeSyntaxEx)(uri, (unsigned int)-1);
528 }
529
530
531
URI_FUNC(NormalizeSyntaxEngine)532 static URI_INLINE int URI_FUNC(NormalizeSyntaxEngine)(URI_TYPE(Uri) * uri,
533 unsigned int inMask, unsigned int * outMask,
534 UriMemoryManager * memory) {
535 unsigned int doneMask = URI_NORMALIZED;
536
537 /* Not just doing inspection? -> memory manager required! */
538 if (outMask == NULL) {
539 assert(memory != NULL);
540 }
541
542 if (uri == NULL) {
543 if (outMask != NULL) {
544 *outMask = URI_NORMALIZED;
545 return URI_SUCCESS;
546 } else {
547 return URI_ERROR_NULL;
548 }
549 }
550
551 if (outMask != NULL) {
552 /* Reset mask */
553 *outMask = URI_NORMALIZED;
554 } else if (inMask == URI_NORMALIZED) {
555 /* Nothing to do */
556 return URI_SUCCESS;
557 }
558
559 /* Scheme, host */
560 if (outMask != NULL) {
561 const UriBool normalizeScheme = URI_FUNC(ContainsUppercaseLetters)(
562 uri->scheme.first, uri->scheme.afterLast);
563 const UriBool normalizeHostCase = URI_FUNC(ContainsUppercaseLetters)(
564 uri->hostText.first, uri->hostText.afterLast);
565 if (normalizeScheme) {
566 *outMask |= URI_NORMALIZE_SCHEME;
567 }
568
569 if (normalizeHostCase) {
570 *outMask |= URI_NORMALIZE_HOST;
571 } else {
572 const UriBool normalizeHostPrecent = URI_FUNC(ContainsUglyPercentEncoding)(
573 uri->hostText.first, uri->hostText.afterLast);
574 if (normalizeHostPrecent) {
575 *outMask |= URI_NORMALIZE_HOST;
576 }
577 }
578 } else {
579 /* Scheme */
580 if ((inMask & URI_NORMALIZE_SCHEME) && (uri->scheme.first != NULL)) {
581 if (uri->owner) {
582 URI_FUNC(LowercaseInplace)(uri->scheme.first, uri->scheme.afterLast);
583 } else {
584 if (!URI_FUNC(LowercaseMalloc)(&(uri->scheme.first), &(uri->scheme.afterLast), memory)) {
585 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
586 return URI_ERROR_MALLOC;
587 }
588 doneMask |= URI_NORMALIZE_SCHEME;
589 }
590 }
591
592 /* Host */
593 if (inMask & URI_NORMALIZE_HOST) {
594 if (uri->hostData.ipFuture.first != NULL) {
595 /* IPvFuture */
596 if (uri->owner) {
597 URI_FUNC(LowercaseInplace)(uri->hostData.ipFuture.first,
598 uri->hostData.ipFuture.afterLast);
599 } else {
600 if (!URI_FUNC(LowercaseMalloc)(&(uri->hostData.ipFuture.first),
601 &(uri->hostData.ipFuture.afterLast), memory)) {
602 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
603 return URI_ERROR_MALLOC;
604 }
605 doneMask |= URI_NORMALIZE_HOST;
606 }
607 uri->hostText.first = uri->hostData.ipFuture.first;
608 uri->hostText.afterLast = uri->hostData.ipFuture.afterLast;
609 } else if ((uri->hostText.first != NULL)
610 && (uri->hostData.ip4 == NULL)
611 && (uri->hostData.ip6 == NULL)) {
612 /* Regname */
613 if (uri->owner) {
614 URI_FUNC(FixPercentEncodingInplace)(uri->hostText.first,
615 &(uri->hostText.afterLast));
616 } else {
617 if (!URI_FUNC(FixPercentEncodingMalloc)(
618 &(uri->hostText.first),
619 &(uri->hostText.afterLast),
620 memory)) {
621 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
622 return URI_ERROR_MALLOC;
623 }
624 doneMask |= URI_NORMALIZE_HOST;
625 }
626
627 URI_FUNC(LowercaseInplace)(uri->hostText.first,
628 uri->hostText.afterLast);
629 }
630 }
631 }
632
633 /* User info */
634 if (outMask != NULL) {
635 const UriBool normalizeUserInfo = URI_FUNC(ContainsUglyPercentEncoding)(
636 uri->userInfo.first, uri->userInfo.afterLast);
637 if (normalizeUserInfo) {
638 *outMask |= URI_NORMALIZE_USER_INFO;
639 }
640 } else {
641 if ((inMask & URI_NORMALIZE_USER_INFO) && (uri->userInfo.first != NULL)) {
642 if (uri->owner) {
643 URI_FUNC(FixPercentEncodingInplace)(uri->userInfo.first, &(uri->userInfo.afterLast));
644 } else {
645 if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->userInfo.first),
646 &(uri->userInfo.afterLast), memory)) {
647 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
648 return URI_ERROR_MALLOC;
649 }
650 doneMask |= URI_NORMALIZE_USER_INFO;
651 }
652 }
653 }
654
655 /* Path */
656 if (outMask != NULL) {
657 const URI_TYPE(PathSegment) * walker = uri->pathHead;
658 while (walker != NULL) {
659 const URI_CHAR * const first = walker->text.first;
660 const URI_CHAR * const afterLast = walker->text.afterLast;
661 if ((first != NULL)
662 && (afterLast != NULL)
663 && (afterLast > first)
664 && (
665 (((afterLast - first) == 1)
666 && (first[0] == _UT('.')))
667 ||
668 (((afterLast - first) == 2)
669 && (first[0] == _UT('.'))
670 && (first[1] == _UT('.')))
671 ||
672 URI_FUNC(ContainsUglyPercentEncoding)(first, afterLast)
673 )) {
674 *outMask |= URI_NORMALIZE_PATH;
675 break;
676 }
677 walker = walker->next;
678 }
679 } else if (inMask & URI_NORMALIZE_PATH) {
680 URI_TYPE(PathSegment) * walker;
681 const UriBool relative = ((uri->scheme.first == NULL)
682 && !uri->absolutePath) ? URI_TRUE : URI_FALSE;
683
684 /* Fix percent-encoding for each segment */
685 walker = uri->pathHead;
686 if (uri->owner) {
687 while (walker != NULL) {
688 URI_FUNC(FixPercentEncodingInplace)(walker->text.first, &(walker->text.afterLast));
689 walker = walker->next;
690 }
691 } else {
692 while (walker != NULL) {
693 if (!URI_FUNC(FixPercentEncodingMalloc)(&(walker->text.first),
694 &(walker->text.afterLast), memory)) {
695 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
696 return URI_ERROR_MALLOC;
697 }
698 walker = walker->next;
699 }
700 doneMask |= URI_NORMALIZE_PATH;
701 }
702
703 /* 6.2.2.3 Path Segment Normalization */
704 if (!URI_FUNC(RemoveDotSegmentsEx)(uri, relative,
705 (uri->owner == URI_TRUE)
706 || ((doneMask & URI_NORMALIZE_PATH) != 0),
707 memory)) {
708 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
709 return URI_ERROR_MALLOC;
710 }
711 URI_FUNC(FixEmptyTrailSegment)(uri, memory);
712 }
713
714 /* Query, fragment */
715 if (outMask != NULL) {
716 const UriBool normalizeQuery = URI_FUNC(ContainsUglyPercentEncoding)(
717 uri->query.first, uri->query.afterLast);
718 const UriBool normalizeFragment = URI_FUNC(ContainsUglyPercentEncoding)(
719 uri->fragment.first, uri->fragment.afterLast);
720 if (normalizeQuery) {
721 *outMask |= URI_NORMALIZE_QUERY;
722 }
723
724 if (normalizeFragment) {
725 *outMask |= URI_NORMALIZE_FRAGMENT;
726 }
727 } else {
728 /* Query */
729 if ((inMask & URI_NORMALIZE_QUERY) && (uri->query.first != NULL)) {
730 if (uri->owner) {
731 URI_FUNC(FixPercentEncodingInplace)(uri->query.first, &(uri->query.afterLast));
732 } else {
733 if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->query.first),
734 &(uri->query.afterLast), memory)) {
735 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
736 return URI_ERROR_MALLOC;
737 }
738 doneMask |= URI_NORMALIZE_QUERY;
739 }
740 }
741
742 /* Fragment */
743 if ((inMask & URI_NORMALIZE_FRAGMENT) && (uri->fragment.first != NULL)) {
744 if (uri->owner) {
745 URI_FUNC(FixPercentEncodingInplace)(uri->fragment.first, &(uri->fragment.afterLast));
746 } else {
747 if (!URI_FUNC(FixPercentEncodingMalloc)(&(uri->fragment.first),
748 &(uri->fragment.afterLast), memory)) {
749 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
750 return URI_ERROR_MALLOC;
751 }
752 doneMask |= URI_NORMALIZE_FRAGMENT;
753 }
754 }
755 }
756
757 /* Dup all not duped yet */
758 if ((outMask == NULL) && !uri->owner) {
759 if (!URI_FUNC(MakeOwner)(uri, &doneMask, memory)) {
760 URI_FUNC(PreventLeakage)(uri, doneMask, memory);
761 return URI_ERROR_MALLOC;
762 }
763 uri->owner = URI_TRUE;
764 }
765
766 return URI_SUCCESS;
767 }
768
769
770
771 #endif
772