1 /*
2 * uriparser - RFC 3986 URI parsing library
3 *
4 * Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
5 * Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * 1. Redistributions of source code must retain the above
13 * copyright notice, this list of conditions and the following
14 * disclaimer.
15 *
16 * 2. Redistributions in binary form must reproduce the above
17 * copyright notice, this list of conditions and the following
18 * disclaimer in the documentation and/or other materials
19 * provided with the distribution.
20 *
21 * 3. Neither the name of the copyright holder nor the names of
22 * its contributors may be used to endorse or promote products
23 * derived from this software without specific prior written
24 * permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
29 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
30 * THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
31 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
32 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
33 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
35 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
37 * OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40 /**
41 * @file UriParse.c
42 * Holds the RFC 3986 %URI parsing implementation.
43 * NOTE: This source file includes itself twice.
44 */
45
46 /* What encodings are enabled? */
47 #include <uriparser/UriDefsConfig.h>
48 #if (!defined(URI_PASS_ANSI) && !defined(URI_PASS_UNICODE))
49 /* Include SELF twice */
50 # ifdef URI_ENABLE_ANSI
51 # define URI_PASS_ANSI 1
52 # include "UriParse.c"
53 # undef URI_PASS_ANSI
54 # endif
55 # ifdef URI_ENABLE_UNICODE
56 # define URI_PASS_UNICODE 1
57 # include "UriParse.c"
58 # undef URI_PASS_UNICODE
59 # endif
60 #else
61 # ifdef URI_PASS_ANSI
62 # include <uriparser/UriDefsAnsi.h>
63 # else
64 # include <uriparser/UriDefsUnicode.h>
65 # include <wchar.h>
66 # endif
67
68
69
70 #ifndef URI_DOXYGEN
71 # include <uriparser/Uri.h>
72 # include <uriparser/UriIp4.h>
73 # include "UriCommon.h"
74 # include "UriMemory.h"
75 # include "UriParseBase.h"
76 #endif
77
78
79
80 #define URI_SET_DIGIT \
81 _UT('0'): \
82 case _UT('1'): \
83 case _UT('2'): \
84 case _UT('3'): \
85 case _UT('4'): \
86 case _UT('5'): \
87 case _UT('6'): \
88 case _UT('7'): \
89 case _UT('8'): \
90 case _UT('9')
91
92 #define URI_SET_HEX_LETTER_UPPER \
93 _UT('A'): \
94 case _UT('B'): \
95 case _UT('C'): \
96 case _UT('D'): \
97 case _UT('E'): \
98 case _UT('F')
99
100 #define URI_SET_HEX_LETTER_LOWER \
101 _UT('a'): \
102 case _UT('b'): \
103 case _UT('c'): \
104 case _UT('d'): \
105 case _UT('e'): \
106 case _UT('f')
107
108 #define URI_SET_HEXDIG \
109 URI_SET_DIGIT: \
110 case URI_SET_HEX_LETTER_UPPER: \
111 case URI_SET_HEX_LETTER_LOWER
112
113 #define URI_SET_ALPHA \
114 URI_SET_HEX_LETTER_UPPER: \
115 case URI_SET_HEX_LETTER_LOWER: \
116 case _UT('g'): \
117 case _UT('G'): \
118 case _UT('h'): \
119 case _UT('H'): \
120 case _UT('i'): \
121 case _UT('I'): \
122 case _UT('j'): \
123 case _UT('J'): \
124 case _UT('k'): \
125 case _UT('K'): \
126 case _UT('l'): \
127 case _UT('L'): \
128 case _UT('m'): \
129 case _UT('M'): \
130 case _UT('n'): \
131 case _UT('N'): \
132 case _UT('o'): \
133 case _UT('O'): \
134 case _UT('p'): \
135 case _UT('P'): \
136 case _UT('q'): \
137 case _UT('Q'): \
138 case _UT('r'): \
139 case _UT('R'): \
140 case _UT('s'): \
141 case _UT('S'): \
142 case _UT('t'): \
143 case _UT('T'): \
144 case _UT('u'): \
145 case _UT('U'): \
146 case _UT('v'): \
147 case _UT('V'): \
148 case _UT('w'): \
149 case _UT('W'): \
150 case _UT('x'): \
151 case _UT('X'): \
152 case _UT('y'): \
153 case _UT('Y'): \
154 case _UT('z'): \
155 case _UT('Z')
156
157
158
159 static const URI_CHAR * URI_FUNC(ParseAuthority)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
160 static const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
161 static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
162 static const URI_CHAR * URI_FUNC(ParseHierPart)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
163 static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
164 static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
165 static const URI_CHAR * URI_FUNC(ParseIpLit2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
166 static const URI_CHAR * URI_FUNC(ParseIPv6address2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
167 static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
168 static const URI_CHAR * URI_FUNC(ParseOwnHost)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
169 static const URI_CHAR * URI_FUNC(ParseOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
170 static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
171 static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
172 static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
173 static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
174 static const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
175 static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
176 static const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
177 static const URI_CHAR * URI_FUNC(ParsePathRootless)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
178 static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
179 static const URI_CHAR * URI_FUNC(ParsePctEncoded)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
180 static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
181 static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast);
182 static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
183 static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
184 static const URI_CHAR * URI_FUNC(ParseSegmentNz)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
185 static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
186 static const URI_CHAR * URI_FUNC(ParseUriReference)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
187 static const URI_CHAR * URI_FUNC(ParseUriTail)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
188 static const URI_CHAR * URI_FUNC(ParseUriTailTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
189 static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast, UriMemoryManager * memory);
190
191 static UriBool URI_FUNC(OnExitOwnHost2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory);
192 static UriBool URI_FUNC(OnExitOwnHostUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory);
193 static UriBool URI_FUNC(OnExitOwnPortUserInfo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory);
194 static UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(URI_TYPE(ParserState) * state, const URI_CHAR * first, UriMemoryManager * memory);
195 static void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state);
196
197 static void URI_FUNC(ResetParserStateExceptUri)(URI_TYPE(ParserState) * state);
198
199 static UriBool URI_FUNC(PushPathSegment)(URI_TYPE(ParserState) * state,
200 const URI_CHAR * first, const URI_CHAR * afterLast,
201 UriMemoryManager * memory);
202
203 static void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state, const URI_CHAR * errorPos, UriMemoryManager * memory);
204 static void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state, UriMemoryManager * memory);
205
206 static int URI_FUNC(ParseUriExMm)(URI_TYPE(ParserState) * state,
207 const URI_CHAR * first, const URI_CHAR * afterLast,
208 UriMemoryManager * memory);
209
210
211
URI_FUNC(StopSyntax)212 static URI_INLINE void URI_FUNC(StopSyntax)(URI_TYPE(ParserState) * state,
213 const URI_CHAR * errorPos, UriMemoryManager * memory) {
214 URI_FUNC(FreeUriMembersMm)(state->uri, memory);
215 state->errorPos = errorPos;
216 state->errorCode = URI_ERROR_SYNTAX;
217 }
218
219
220
URI_FUNC(StopMalloc)221 static URI_INLINE void URI_FUNC(StopMalloc)(URI_TYPE(ParserState) * state, UriMemoryManager * memory) {
222 URI_FUNC(FreeUriMembersMm)(state->uri, memory);
223 state->errorPos = NULL;
224 state->errorCode = URI_ERROR_MALLOC;
225 }
226
227
228
229 /*
230 * [authority]-><[>[ipLit2][authorityTwo]
231 * [authority]->[ownHostUserInfoNz]
232 * [authority]-><NULL>
233 */
URI_FUNC(ParseAuthority)234 static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthority)(
235 URI_TYPE(ParserState) * state, const URI_CHAR * first,
236 const URI_CHAR * afterLast, UriMemoryManager * memory) {
237 if (first >= afterLast) {
238 /* "" regname host */
239 state->uri->hostText.first = URI_FUNC(SafeToPointTo);
240 state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo);
241 return afterLast;
242 }
243
244 switch (*first) {
245 case _UT('['):
246 {
247 const URI_CHAR * const afterIpLit2
248 = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast, memory);
249 if (afterIpLit2 == NULL) {
250 return NULL;
251 }
252 state->uri->hostText.first = first + 1; /* HOST BEGIN */
253 return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast);
254 }
255
256 case _UT('!'):
257 case _UT('$'):
258 case _UT('%'):
259 case _UT('&'):
260 case _UT('('):
261 case _UT(')'):
262 case _UT('-'):
263 case _UT('*'):
264 case _UT(','):
265 case _UT('.'):
266 case _UT(':'):
267 case _UT(';'):
268 case _UT('@'):
269 case _UT('\''):
270 case _UT('_'):
271 case _UT('~'):
272 case _UT('+'):
273 case _UT('='):
274 case URI_SET_DIGIT:
275 case URI_SET_ALPHA:
276 state->uri->userInfo.first = first; /* USERINFO BEGIN */
277 return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory);
278
279 default:
280 /* "" regname host */
281 state->uri->hostText.first = URI_FUNC(SafeToPointTo);
282 state->uri->hostText.afterLast = URI_FUNC(SafeToPointTo);
283 return first;
284 }
285 }
286
287
288
289 /*
290 * [authorityTwo]-><:>[port]
291 * [authorityTwo]-><NULL>
292 */
URI_FUNC(ParseAuthorityTwo)293 static URI_INLINE const URI_CHAR * URI_FUNC(ParseAuthorityTwo)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
294 if (first >= afterLast) {
295 return afterLast;
296 }
297
298 switch (*first) {
299 case _UT(':'):
300 {
301 const URI_CHAR * const afterPort = URI_FUNC(ParsePort)(state, first + 1, afterLast);
302 if (afterPort == NULL) {
303 return NULL;
304 }
305 state->uri->portText.first = first + 1; /* PORT BEGIN */
306 state->uri->portText.afterLast = afterPort; /* PORT END */
307 return afterPort;
308 }
309
310 default:
311 return first;
312 }
313 }
314
315
316
317 /*
318 * [hexZero]->[HEXDIG][hexZero]
319 * [hexZero]-><NULL>
320 */
URI_FUNC(ParseHexZero)321 static const URI_CHAR * URI_FUNC(ParseHexZero)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
322 if (first >= afterLast) {
323 return afterLast;
324 }
325
326 switch (*first) {
327 case URI_SET_HEXDIG:
328 return URI_FUNC(ParseHexZero)(state, first + 1, afterLast);
329
330 default:
331 return first;
332 }
333 }
334
335
336
337 /*
338 * [hierPart]->[pathRootless]
339 * [hierPart]-></>[partHelperTwo]
340 * [hierPart]-><NULL>
341 */
URI_FUNC(ParseHierPart)342 static URI_INLINE const URI_CHAR * URI_FUNC(ParseHierPart)(
343 URI_TYPE(ParserState) * state, const URI_CHAR * first,
344 const URI_CHAR * afterLast, UriMemoryManager * memory) {
345 if (first >= afterLast) {
346 return afterLast;
347 }
348
349 switch (*first) {
350 case _UT('!'):
351 case _UT('$'):
352 case _UT('%'):
353 case _UT('&'):
354 case _UT('('):
355 case _UT(')'):
356 case _UT('-'):
357 case _UT('*'):
358 case _UT(','):
359 case _UT('.'):
360 case _UT(':'):
361 case _UT(';'):
362 case _UT('@'):
363 case _UT('\''):
364 case _UT('_'):
365 case _UT('~'):
366 case _UT('+'):
367 case _UT('='):
368 case URI_SET_DIGIT:
369 case URI_SET_ALPHA:
370 return URI_FUNC(ParsePathRootless)(state, first, afterLast, memory);
371
372 case _UT('/'):
373 return URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast, memory);
374
375 default:
376 return first;
377 }
378 }
379
380
381
382 /*
383 * [ipFutLoop]->[subDelims][ipFutStopGo]
384 * [ipFutLoop]->[unreserved][ipFutStopGo]
385 * [ipFutLoop]-><:>[ipFutStopGo]
386 */
URI_FUNC(ParseIpFutLoop)387 static const URI_CHAR * URI_FUNC(ParseIpFutLoop)(URI_TYPE(ParserState) * state,
388 const URI_CHAR * first, const URI_CHAR * afterLast,
389 UriMemoryManager * memory) {
390 if (first >= afterLast) {
391 URI_FUNC(StopSyntax)(state, afterLast, memory);
392 return NULL;
393 }
394
395 switch (*first) {
396 case _UT('!'):
397 case _UT('$'):
398 case _UT('&'):
399 case _UT('('):
400 case _UT(')'):
401 case _UT('-'):
402 case _UT('*'):
403 case _UT(','):
404 case _UT('.'):
405 case _UT(':'):
406 case _UT(';'):
407 case _UT('\''):
408 case _UT('_'):
409 case _UT('~'):
410 case _UT('+'):
411 case _UT('='):
412 case URI_SET_DIGIT:
413 case URI_SET_ALPHA:
414 return URI_FUNC(ParseIpFutStopGo)(state, first + 1, afterLast, memory);
415
416 default:
417 URI_FUNC(StopSyntax)(state, first, memory);
418 return NULL;
419 }
420 }
421
422
423
424 /*
425 * [ipFutStopGo]->[ipFutLoop]
426 * [ipFutStopGo]-><NULL>
427 */
URI_FUNC(ParseIpFutStopGo)428 static const URI_CHAR * URI_FUNC(ParseIpFutStopGo)(
429 URI_TYPE(ParserState) * state,
430 const URI_CHAR * first, const URI_CHAR * afterLast,
431 UriMemoryManager * memory) {
432 if (first >= afterLast) {
433 return afterLast;
434 }
435
436 switch (*first) {
437 case _UT('!'):
438 case _UT('$'):
439 case _UT('&'):
440 case _UT('('):
441 case _UT(')'):
442 case _UT('-'):
443 case _UT('*'):
444 case _UT(','):
445 case _UT('.'):
446 case _UT(':'):
447 case _UT(';'):
448 case _UT('\''):
449 case _UT('_'):
450 case _UT('~'):
451 case _UT('+'):
452 case _UT('='):
453 case URI_SET_DIGIT:
454 case URI_SET_ALPHA:
455 return URI_FUNC(ParseIpFutLoop)(state, first, afterLast, memory);
456
457 default:
458 return first;
459 }
460 }
461
462
463
464 /*
465 * [ipFuture]-><v>[HEXDIG][hexZero]<.>[ipFutLoop]
466 */
URI_FUNC(ParseIpFuture)467 static const URI_CHAR * URI_FUNC(ParseIpFuture)(URI_TYPE(ParserState) * state,
468 const URI_CHAR * first, const URI_CHAR * afterLast,
469 UriMemoryManager * memory) {
470 if (first >= afterLast) {
471 URI_FUNC(StopSyntax)(state, afterLast, memory);
472 return NULL;
473 }
474
475 /*
476 First character has already been
477 checked before entering this rule.
478
479 switch (*first) {
480 case _UT('v'):
481 */
482 if (first + 1 >= afterLast) {
483 URI_FUNC(StopSyntax)(state, afterLast, memory);
484 return NULL;
485 }
486
487 switch (first[1]) {
488 case URI_SET_HEXDIG:
489 {
490 const URI_CHAR * afterIpFutLoop;
491 const URI_CHAR * const afterHexZero
492 = URI_FUNC(ParseHexZero)(state, first + 2, afterLast);
493 if (afterHexZero == NULL) {
494 return NULL;
495 }
496 if (afterHexZero >= afterLast) {
497 URI_FUNC(StopSyntax)(state, afterLast, memory);
498 return NULL;
499 }
500 if (*afterHexZero != _UT('.')) {
501 URI_FUNC(StopSyntax)(state, afterHexZero, memory);
502 return NULL;
503 }
504 state->uri->hostText.first = first; /* HOST BEGIN */
505 state->uri->hostData.ipFuture.first = first; /* IPFUTURE BEGIN */
506 afterIpFutLoop = URI_FUNC(ParseIpFutLoop)(state, afterHexZero + 1, afterLast, memory);
507 if (afterIpFutLoop == NULL) {
508 return NULL;
509 }
510 state->uri->hostText.afterLast = afterIpFutLoop; /* HOST END */
511 state->uri->hostData.ipFuture.afterLast = afterIpFutLoop; /* IPFUTURE END */
512 return afterIpFutLoop;
513 }
514
515 default:
516 URI_FUNC(StopSyntax)(state, first + 1, memory);
517 return NULL;
518 }
519
520 /*
521 default:
522 URI_FUNC(StopSyntax)(state, first, memory);
523 return NULL;
524 }
525 */
526 }
527
528
529
530 /*
531 * [ipLit2]->[ipFuture]<]>
532 * [ipLit2]->[IPv6address2]
533 */
URI_FUNC(ParseIpLit2)534 static URI_INLINE const URI_CHAR * URI_FUNC(ParseIpLit2)(
535 URI_TYPE(ParserState) * state, const URI_CHAR * first,
536 const URI_CHAR * afterLast, UriMemoryManager * memory) {
537 if (first >= afterLast) {
538 URI_FUNC(StopSyntax)(state, afterLast, memory);
539 return NULL;
540 }
541
542 switch (*first) {
543 case _UT('v'):
544 {
545 const URI_CHAR * const afterIpFuture
546 = URI_FUNC(ParseIpFuture)(state, first, afterLast, memory);
547 if (afterIpFuture == NULL) {
548 return NULL;
549 }
550 if (afterIpFuture >= afterLast) {
551 URI_FUNC(StopSyntax)(state, afterLast, memory);
552 return NULL;
553 }
554 if (*afterIpFuture != _UT(']')) {
555 URI_FUNC(StopSyntax)(state, afterIpFuture, memory);
556 return NULL;
557 }
558 return afterIpFuture + 1;
559 }
560
561 case _UT(':'):
562 case _UT(']'):
563 case URI_SET_HEXDIG:
564 state->uri->hostData.ip6 = memory->malloc(memory, 1 * sizeof(UriIp6)); /* Freed when stopping on parse error */
565 if (state->uri->hostData.ip6 == NULL) {
566 URI_FUNC(StopMalloc)(state, memory);
567 return NULL;
568 }
569 return URI_FUNC(ParseIPv6address2)(state, first, afterLast, memory);
570
571 default:
572 URI_FUNC(StopSyntax)(state, first, memory);
573 return NULL;
574 }
575 }
576
577
578
579 /*
580 * [IPv6address2]->..<]>
581 */
URI_FUNC(ParseIPv6address2)582 static const URI_CHAR * URI_FUNC(ParseIPv6address2)(
583 URI_TYPE(ParserState) * state,
584 const URI_CHAR * first, const URI_CHAR * afterLast,
585 UriMemoryManager * memory) {
586 int zipperEver = 0;
587 int quadsDone = 0;
588 int digitCount = 0;
589 unsigned char digitHistory[4];
590 int ip4OctetsDone = 0;
591
592 unsigned char quadsAfterZipper[14];
593 int quadsAfterZipperCount = 0;
594
595
596 for (;;) {
597 if (first >= afterLast) {
598 URI_FUNC(StopSyntax)(state, afterLast, memory);
599 return NULL;
600 }
601
602 /* Inside IPv4 part? */
603 if (ip4OctetsDone > 0) {
604 /* Eat rest of IPv4 address */
605 for (;;) {
606 switch (*first) {
607 case URI_SET_DIGIT:
608 if (digitCount == 4) {
609 URI_FUNC(StopSyntax)(state, first, memory);
610 return NULL;
611 }
612 digitHistory[digitCount++] = (unsigned char)(9 + *first - _UT('9'));
613 break;
614
615 case _UT('.'):
616 if ((ip4OctetsDone == 4) /* NOTE! */
617 || (digitCount == 0)
618 || (digitCount == 4)) {
619 /* Invalid digit or octet count */
620 URI_FUNC(StopSyntax)(state, first, memory);
621 return NULL;
622 } else if ((digitCount > 1)
623 && (digitHistory[0] == 0)) {
624 /* Leading zero */
625 URI_FUNC(StopSyntax)(state, first - digitCount, memory);
626 return NULL;
627 } else if ((digitCount > 2)
628 && (digitHistory[1] == 0)) {
629 /* Leading zero */
630 URI_FUNC(StopSyntax)(state, first - digitCount + 1, memory);
631 return NULL;
632 } else if ((digitCount == 3)
633 && (100 * digitHistory[0]
634 + 10 * digitHistory[1]
635 + digitHistory[2] > 255)) {
636 /* Octet value too large */
637 if (digitHistory[0] > 2) {
638 URI_FUNC(StopSyntax)(state, first - 3, memory);
639 } else if (digitHistory[1] > 5) {
640 URI_FUNC(StopSyntax)(state, first - 2, memory);
641 } else {
642 URI_FUNC(StopSyntax)(state, first - 1, memory);
643 }
644 return NULL;
645 }
646
647 /* Copy IPv4 octet */
648 state->uri->hostData.ip6->data[16 - 4 + ip4OctetsDone] = uriGetOctetValue(digitHistory, digitCount);
649 digitCount = 0;
650 ip4OctetsDone++;
651 break;
652
653 case _UT(']'):
654 if ((ip4OctetsDone != 3) /* NOTE! */
655 || (digitCount == 0)
656 || (digitCount == 4)) {
657 /* Invalid digit or octet count */
658 URI_FUNC(StopSyntax)(state, first, memory);
659 return NULL;
660 } else if ((digitCount > 1)
661 && (digitHistory[0] == 0)) {
662 /* Leading zero */
663 URI_FUNC(StopSyntax)(state, first - digitCount, memory);
664 return NULL;
665 } else if ((digitCount > 2)
666 && (digitHistory[1] == 0)) {
667 /* Leading zero */
668 URI_FUNC(StopSyntax)(state, first - digitCount + 1, memory);
669 return NULL;
670 } else if ((digitCount == 3)
671 && (100 * digitHistory[0]
672 + 10 * digitHistory[1]
673 + digitHistory[2] > 255)) {
674 /* Octet value too large */
675 if (digitHistory[0] > 2) {
676 URI_FUNC(StopSyntax)(state, first - 3, memory);
677 } else if (digitHistory[1] > 5) {
678 URI_FUNC(StopSyntax)(state, first - 2, memory);
679 } else {
680 URI_FUNC(StopSyntax)(state, first - 1, memory);
681 }
682 return NULL;
683 }
684
685 state->uri->hostText.afterLast = first; /* HOST END */
686
687 /* Copy missing quads right before IPv4 */
688 memcpy(state->uri->hostData.ip6->data + 16 - 4 - 2 * quadsAfterZipperCount,
689 quadsAfterZipper, 2 * quadsAfterZipperCount);
690
691 /* Copy last IPv4 octet */
692 state->uri->hostData.ip6->data[16 - 4 + 3] = uriGetOctetValue(digitHistory, digitCount);
693
694 return first + 1;
695
696 default:
697 URI_FUNC(StopSyntax)(state, first, memory);
698 return NULL;
699 }
700 first++;
701
702 if (first >= afterLast) {
703 URI_FUNC(StopSyntax)(state, afterLast, memory);
704 return NULL;
705 }
706 }
707 } else {
708 /* Eat while no dot in sight */
709 int letterAmong = 0;
710 int walking = 1;
711 do {
712 switch (*first) {
713 case URI_SET_HEX_LETTER_LOWER:
714 letterAmong = 1;
715 if (digitCount == 4) {
716 URI_FUNC(StopSyntax)(state, first, memory);
717 return NULL;
718 }
719 digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('f'));
720 digitCount++;
721 break;
722
723 case URI_SET_HEX_LETTER_UPPER:
724 letterAmong = 1;
725 if (digitCount == 4) {
726 URI_FUNC(StopSyntax)(state, first, memory);
727 return NULL;
728 }
729 digitHistory[digitCount] = (unsigned char)(15 + *first - _UT('F'));
730 digitCount++;
731 break;
732
733 case URI_SET_DIGIT:
734 if (digitCount == 4) {
735 URI_FUNC(StopSyntax)(state, first, memory);
736 return NULL;
737 }
738 digitHistory[digitCount] = (unsigned char)(9 + *first - _UT('9'));
739 digitCount++;
740 break;
741
742 case _UT(':'):
743 {
744 int setZipper = 0;
745
746 if (digitCount > 0) {
747 if (zipperEver) {
748 uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount);
749 quadsAfterZipperCount++;
750 } else {
751 uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone);
752 }
753 quadsDone++;
754 digitCount = 0;
755 }
756 letterAmong = 0;
757
758 /* Too many quads? */
759 if (quadsDone >= 8 - zipperEver) {
760 URI_FUNC(StopSyntax)(state, first, memory);
761 return NULL;
762 }
763
764 /* "::"? */
765 if (first + 1 >= afterLast) {
766 URI_FUNC(StopSyntax)(state, afterLast, memory);
767 return NULL;
768 }
769 if (first[1] == _UT(':')) {
770 const int resetOffset = 2 * (quadsDone + (digitCount > 0));
771
772 first++;
773 if (zipperEver) {
774 URI_FUNC(StopSyntax)(state, first, memory);
775 return NULL; /* "::.+::" */
776 }
777
778 /* Zero everything after zipper */
779 memset(state->uri->hostData.ip6->data + resetOffset, 0, 16 - resetOffset);
780 setZipper = 1;
781
782 /* ":::+"? */
783 if (first + 1 >= afterLast) {
784 URI_FUNC(StopSyntax)(state, afterLast, memory);
785 return NULL; /* No ']' yet */
786 }
787 if (first[1] == _UT(':')) {
788 URI_FUNC(StopSyntax)(state, first + 1, memory);
789 return NULL; /* ":::+ "*/
790 }
791 }
792
793 if (setZipper) {
794 zipperEver = 1;
795 }
796 }
797 break;
798
799 case _UT('.'):
800 if ((quadsDone > 6) /* NOTE */
801 || (!zipperEver && (quadsDone < 6))
802 || letterAmong
803 || (digitCount == 0)
804 || (digitCount == 4)) {
805 /* Invalid octet before */
806 URI_FUNC(StopSyntax)(state, first, memory);
807 return NULL;
808 } else if ((digitCount > 1)
809 && (digitHistory[0] == 0)) {
810 /* Leading zero */
811 URI_FUNC(StopSyntax)(state, first - digitCount, memory);
812 return NULL;
813 } else if ((digitCount > 2)
814 && (digitHistory[1] == 0)) {
815 /* Leading zero */
816 URI_FUNC(StopSyntax)(state, first - digitCount + 1, memory);
817 return NULL;
818 } else if ((digitCount == 3)
819 && (100 * digitHistory[0]
820 + 10 * digitHistory[1]
821 + digitHistory[2] > 255)) {
822 /* Octet value too large */
823 if (digitHistory[0] > 2) {
824 URI_FUNC(StopSyntax)(state, first - 3, memory);
825 } else if (digitHistory[1] > 5) {
826 URI_FUNC(StopSyntax)(state, first - 2, memory);
827 } else {
828 URI_FUNC(StopSyntax)(state, first - 1, memory);
829 }
830 return NULL;
831 }
832
833 /* Copy first IPv4 octet */
834 state->uri->hostData.ip6->data[16 - 4] = uriGetOctetValue(digitHistory, digitCount);
835 digitCount = 0;
836
837 /* Switch over to IPv4 loop */
838 ip4OctetsDone = 1;
839 walking = 0;
840 break;
841
842 case _UT(']'):
843 /* Too little quads? */
844 if (!zipperEver && !((quadsDone == 7) && (digitCount > 0))) {
845 URI_FUNC(StopSyntax)(state, first, memory);
846 return NULL;
847 }
848
849 if (digitCount > 0) {
850 if (zipperEver) {
851 uriWriteQuadToDoubleByte(digitHistory, digitCount, quadsAfterZipper + 2 * quadsAfterZipperCount);
852 quadsAfterZipperCount++;
853 } else {
854 uriWriteQuadToDoubleByte(digitHistory, digitCount, state->uri->hostData.ip6->data + 2 * quadsDone);
855 }
856 /*
857 quadsDone++;
858 digitCount = 0;
859 */
860 }
861
862 /* Copy missing quads to the end */
863 memcpy(state->uri->hostData.ip6->data + 16 - 2 * quadsAfterZipperCount,
864 quadsAfterZipper, 2 * quadsAfterZipperCount);
865
866 state->uri->hostText.afterLast = first; /* HOST END */
867 return first + 1; /* Fine */
868
869 default:
870 URI_FUNC(StopSyntax)(state, first, memory);
871 return NULL;
872 }
873 first++;
874
875 if (first >= afterLast) {
876 URI_FUNC(StopSyntax)(state, afterLast, memory);
877 return NULL; /* No ']' yet */
878 }
879 } while (walking);
880 }
881 }
882 }
883
884
885
886 /*
887 * [mustBeSegmentNzNc]->[pctEncoded][mustBeSegmentNzNc]
888 * [mustBeSegmentNzNc]->[subDelims][mustBeSegmentNzNc]
889 * [mustBeSegmentNzNc]->[unreserved][mustBeSegmentNzNc]
890 * [mustBeSegmentNzNc]->[uriTail] // can take <NULL>
891 * [mustBeSegmentNzNc]-></>[segment][zeroMoreSlashSegs][uriTail]
892 * [mustBeSegmentNzNc]-><@>[mustBeSegmentNzNc]
893 */
URI_FUNC(ParseMustBeSegmentNzNc)894 static const URI_CHAR * URI_FUNC(ParseMustBeSegmentNzNc)(
895 URI_TYPE(ParserState) * state, const URI_CHAR * first,
896 const URI_CHAR * afterLast, UriMemoryManager * memory) {
897 if (first >= afterLast) {
898 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */
899 URI_FUNC(StopMalloc)(state, memory);
900 return NULL;
901 }
902 state->uri->scheme.first = NULL; /* Not a scheme, reset */
903 return afterLast;
904 }
905
906 switch (*first) {
907 case _UT('%'):
908 {
909 const URI_CHAR * const afterPctEncoded
910 = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
911 if (afterPctEncoded == NULL) {
912 return NULL;
913 }
914 return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast, memory);
915 }
916
917 case _UT('@'):
918 case _UT('!'):
919 case _UT('$'):
920 case _UT('&'):
921 case _UT('('):
922 case _UT(')'):
923 case _UT('*'):
924 case _UT(','):
925 case _UT(';'):
926 case _UT('\''):
927 case _UT('+'):
928 case _UT('='):
929 case _UT('-'):
930 case _UT('.'):
931 case _UT('_'):
932 case _UT('~'):
933 case URI_SET_DIGIT:
934 case URI_SET_ALPHA:
935 return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory);
936
937 case _UT('/'):
938 {
939 const URI_CHAR * afterZeroMoreSlashSegs;
940 const URI_CHAR * afterSegment;
941 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */
942 URI_FUNC(StopMalloc)(state, memory);
943 return NULL;
944 }
945 state->uri->scheme.first = NULL; /* Not a scheme, reset */
946 afterSegment = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory);
947 if (afterSegment == NULL) {
948 return NULL;
949 }
950 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */
951 URI_FUNC(StopMalloc)(state, memory);
952 return NULL;
953 }
954 afterZeroMoreSlashSegs
955 = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast, memory);
956 if (afterZeroMoreSlashSegs == NULL) {
957 return NULL;
958 }
959 return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast, memory);
960 }
961
962 default:
963 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */
964 URI_FUNC(StopMalloc)(state, memory);
965 return NULL;
966 }
967 state->uri->scheme.first = NULL; /* Not a scheme, reset */
968 return URI_FUNC(ParseUriTail)(state, first, afterLast, memory);
969 }
970 }
971
972
973
974 /*
975 * [ownHost]-><[>[ipLit2][authorityTwo]
976 * [ownHost]->[ownHost2] // can take <NULL>
977 */
URI_FUNC(ParseOwnHost)978 static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHost)(
979 URI_TYPE(ParserState) * state, const URI_CHAR * first,
980 const URI_CHAR * afterLast, UriMemoryManager * memory) {
981 if (first >= afterLast) {
982 state->uri->hostText.afterLast = afterLast; /* HOST END */
983 return afterLast;
984 }
985
986 switch (*first) {
987 case _UT('['):
988 {
989 const URI_CHAR * const afterIpLit2
990 = URI_FUNC(ParseIpLit2)(state, first + 1, afterLast, memory);
991 if (afterIpLit2 == NULL) {
992 return NULL;
993 }
994 state->uri->hostText.first = first + 1; /* HOST BEGIN */
995 return URI_FUNC(ParseAuthorityTwo)(state, afterIpLit2, afterLast);
996 }
997
998 default:
999 return URI_FUNC(ParseOwnHost2)(state, first, afterLast, memory);
1000 }
1001 }
1002
1003
1004
URI_FUNC(OnExitOwnHost2)1005 static URI_INLINE UriBool URI_FUNC(OnExitOwnHost2)(
1006 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1007 UriMemoryManager * memory) {
1008 state->uri->hostText.afterLast = first; /* HOST END */
1009
1010 /* Valid IPv4 or just a regname? */
1011 state->uri->hostData.ip4 = memory->malloc(memory, 1 * sizeof(UriIp4)); /* Freed when stopping on parse error */
1012 if (state->uri->hostData.ip4 == NULL) {
1013 return URI_FALSE; /* Raises malloc error */
1014 }
1015 if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data,
1016 state->uri->hostText.first, state->uri->hostText.afterLast)) {
1017 /* Not IPv4 */
1018 memory->free(memory, state->uri->hostData.ip4);
1019 state->uri->hostData.ip4 = NULL;
1020 }
1021 return URI_TRUE; /* Success */
1022 }
1023
1024
1025
1026 /*
1027 * [ownHost2]->[authorityTwo] // can take <NULL>
1028 * [ownHost2]->[pctSubUnres][ownHost2]
1029 */
URI_FUNC(ParseOwnHost2)1030 static const URI_CHAR * URI_FUNC(ParseOwnHost2)(
1031 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1032 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1033 if (first >= afterLast) {
1034 if (!URI_FUNC(OnExitOwnHost2)(state, first, memory)) {
1035 URI_FUNC(StopMalloc)(state, memory);
1036 return NULL;
1037 }
1038 return afterLast;
1039 }
1040
1041 switch (*first) {
1042 case _UT('!'):
1043 case _UT('$'):
1044 case _UT('%'):
1045 case _UT('&'):
1046 case _UT('('):
1047 case _UT(')'):
1048 case _UT('-'):
1049 case _UT('*'):
1050 case _UT(','):
1051 case _UT('.'):
1052 case _UT(';'):
1053 case _UT('\''):
1054 case _UT('_'):
1055 case _UT('~'):
1056 case _UT('+'):
1057 case _UT('='):
1058 case URI_SET_DIGIT:
1059 case URI_SET_ALPHA:
1060 {
1061 const URI_CHAR * const afterPctSubUnres
1062 = URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
1063 if (afterPctSubUnres == NULL) {
1064 return NULL;
1065 }
1066 return URI_FUNC(ParseOwnHost2)(state, afterPctSubUnres, afterLast, memory);
1067 }
1068
1069 default:
1070 if (!URI_FUNC(OnExitOwnHost2)(state, first, memory)) {
1071 URI_FUNC(StopMalloc)(state, memory);
1072 return NULL;
1073 }
1074 return URI_FUNC(ParseAuthorityTwo)(state, first, afterLast);
1075 }
1076 }
1077
1078
1079
URI_FUNC(OnExitOwnHostUserInfo)1080 static URI_INLINE UriBool URI_FUNC(OnExitOwnHostUserInfo)(
1081 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1082 UriMemoryManager * memory) {
1083 state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */
1084 state->uri->userInfo.first = NULL; /* Not a userInfo, reset */
1085 state->uri->hostText.afterLast = first; /* HOST END */
1086
1087 /* Valid IPv4 or just a regname? */
1088 state->uri->hostData.ip4 = memory->malloc(memory, 1 * sizeof(UriIp4)); /* Freed when stopping on parse error */
1089 if (state->uri->hostData.ip4 == NULL) {
1090 return URI_FALSE; /* Raises malloc error */
1091 }
1092 if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data,
1093 state->uri->hostText.first, state->uri->hostText.afterLast)) {
1094 /* Not IPv4 */
1095 memory->free(memory, state->uri->hostData.ip4);
1096 state->uri->hostData.ip4 = NULL;
1097 }
1098 return URI_TRUE; /* Success */
1099 }
1100
1101
1102
1103 /*
1104 * [ownHostUserInfo]->[ownHostUserInfoNz]
1105 * [ownHostUserInfo]-><NULL>
1106 */
URI_FUNC(ParseOwnHostUserInfo)1107 static URI_INLINE const URI_CHAR * URI_FUNC(ParseOwnHostUserInfo)(
1108 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1109 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1110 if (first >= afterLast) {
1111 if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first, memory)) {
1112 URI_FUNC(StopMalloc)(state, memory);
1113 return NULL;
1114 }
1115 return afterLast;
1116 }
1117
1118 switch (*first) {
1119 case _UT('!'):
1120 case _UT('$'):
1121 case _UT('%'):
1122 case _UT('&'):
1123 case _UT('('):
1124 case _UT(')'):
1125 case _UT('-'):
1126 case _UT('*'):
1127 case _UT(','):
1128 case _UT('.'):
1129 case _UT(':'):
1130 case _UT(';'):
1131 case _UT('@'):
1132 case _UT('\''):
1133 case _UT('_'):
1134 case _UT('~'):
1135 case _UT('+'):
1136 case _UT('='):
1137 case URI_SET_DIGIT:
1138 case URI_SET_ALPHA:
1139 return URI_FUNC(ParseOwnHostUserInfoNz)(state, first, afterLast, memory);
1140
1141 default:
1142 if (!URI_FUNC(OnExitOwnHostUserInfo)(state, first, memory)) {
1143 URI_FUNC(StopMalloc)(state, memory);
1144 return NULL;
1145 }
1146 return first;
1147 }
1148 }
1149
1150
1151
1152 /*
1153 * [ownHostUserInfoNz]->[pctSubUnres][ownHostUserInfo]
1154 * [ownHostUserInfoNz]-><:>[ownPortUserInfo]
1155 * [ownHostUserInfoNz]-><@>[ownHost]
1156 */
URI_FUNC(ParseOwnHostUserInfoNz)1157 static const URI_CHAR * URI_FUNC(ParseOwnHostUserInfoNz)(
1158 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1159 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1160 if (first >= afterLast) {
1161 URI_FUNC(StopSyntax)(state, afterLast, memory);
1162 return NULL;
1163 }
1164
1165 switch (*first) {
1166 case _UT('!'):
1167 case _UT('$'):
1168 case _UT('%'):
1169 case _UT('&'):
1170 case _UT('('):
1171 case _UT(')'):
1172 case _UT('-'):
1173 case _UT('*'):
1174 case _UT(','):
1175 case _UT('.'):
1176 case _UT(';'):
1177 case _UT('\''):
1178 case _UT('_'):
1179 case _UT('~'):
1180 case _UT('+'):
1181 case _UT('='):
1182 case URI_SET_DIGIT:
1183 case URI_SET_ALPHA:
1184 {
1185 const URI_CHAR * const afterPctSubUnres
1186 = URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
1187 if (afterPctSubUnres == NULL) {
1188 return NULL;
1189 }
1190 return URI_FUNC(ParseOwnHostUserInfo)(state, afterPctSubUnres, afterLast, memory);
1191 }
1192
1193 case _UT(':'):
1194 state->uri->hostText.afterLast = first; /* HOST END */
1195 state->uri->portText.first = first + 1; /* PORT BEGIN */
1196 return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast, memory);
1197
1198 case _UT('@'):
1199 state->uri->userInfo.afterLast = first; /* USERINFO END */
1200 state->uri->hostText.first = first + 1; /* HOST BEGIN */
1201 return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast, memory);
1202
1203 default:
1204 URI_FUNC(StopSyntax)(state, first, memory);
1205 return NULL;
1206 }
1207 }
1208
1209
1210
URI_FUNC(OnExitOwnPortUserInfo)1211 static URI_INLINE UriBool URI_FUNC(OnExitOwnPortUserInfo)(
1212 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1213 UriMemoryManager * memory) {
1214 state->uri->hostText.first = state->uri->userInfo.first; /* Host instead of userInfo, update */
1215 state->uri->userInfo.first = NULL; /* Not a userInfo, reset */
1216 state->uri->portText.afterLast = first; /* PORT END */
1217
1218 /* Valid IPv4 or just a regname? */
1219 state->uri->hostData.ip4 = memory->malloc(memory, 1 * sizeof(UriIp4)); /* Freed when stopping on parse error */
1220 if (state->uri->hostData.ip4 == NULL) {
1221 return URI_FALSE; /* Raises malloc error */
1222 }
1223 if (URI_FUNC(ParseIpFourAddress)(state->uri->hostData.ip4->data,
1224 state->uri->hostText.first, state->uri->hostText.afterLast)) {
1225 /* Not IPv4 */
1226 memory->free(memory, state->uri->hostData.ip4);
1227 state->uri->hostData.ip4 = NULL;
1228 }
1229 return URI_TRUE; /* Success */
1230 }
1231
1232
1233
1234 /*
1235 * [ownPortUserInfo]->[ALPHA][ownUserInfo]
1236 * [ownPortUserInfo]->[DIGIT][ownPortUserInfo]
1237 * [ownPortUserInfo]-><.>[ownUserInfo]
1238 * [ownPortUserInfo]-><_>[ownUserInfo]
1239 * [ownPortUserInfo]-><~>[ownUserInfo]
1240 * [ownPortUserInfo]-><->[ownUserInfo]
1241 * [ownPortUserInfo]->[subDelims][ownUserInfo]
1242 * [ownPortUserInfo]->[pctEncoded][ownUserInfo]
1243 * [ownPortUserInfo]-><:>[ownUserInfo]
1244 * [ownPortUserInfo]-><@>[ownHost]
1245 * [ownPortUserInfo]-><NULL>
1246 */
URI_FUNC(ParseOwnPortUserInfo)1247 static const URI_CHAR * URI_FUNC(ParseOwnPortUserInfo)(
1248 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1249 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1250 if (first >= afterLast) {
1251 if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first, memory)) {
1252 URI_FUNC(StopMalloc)(state, memory);
1253 return NULL;
1254 }
1255 return afterLast;
1256 }
1257
1258 switch (*first) {
1259 /* begin sub-delims */
1260 case _UT('!'):
1261 case _UT('$'):
1262 case _UT('&'):
1263 case _UT('\''):
1264 case _UT('('):
1265 case _UT(')'):
1266 case _UT('*'):
1267 case _UT('+'):
1268 case _UT(','):
1269 case _UT(';'):
1270 case _UT('='):
1271 /* end sub-delims */
1272 /* begin unreserved (except alpha and digit) */
1273 case _UT('-'):
1274 case _UT('.'):
1275 case _UT('_'):
1276 case _UT('~'):
1277 /* end unreserved (except alpha and digit) */
1278 case _UT(':'):
1279 case URI_SET_ALPHA:
1280 state->uri->hostText.afterLast = NULL; /* Not a host, reset */
1281 state->uri->portText.first = NULL; /* Not a port, reset */
1282 return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast, memory);
1283
1284 case URI_SET_DIGIT:
1285 return URI_FUNC(ParseOwnPortUserInfo)(state, first + 1, afterLast, memory);
1286
1287 case _UT('%'):
1288 state->uri->portText.first = NULL; /* Not a port, reset */
1289 {
1290 const URI_CHAR * const afterPct
1291 = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
1292 if (afterPct == NULL) {
1293 return NULL;
1294 }
1295 return URI_FUNC(ParseOwnUserInfo)(state, afterPct, afterLast, memory);
1296 }
1297
1298 case _UT('@'):
1299 state->uri->hostText.afterLast = NULL; /* Not a host, reset */
1300 state->uri->portText.first = NULL; /* Not a port, reset */
1301 state->uri->userInfo.afterLast = first; /* USERINFO END */
1302 state->uri->hostText.first = first + 1; /* HOST BEGIN */
1303 return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast, memory);
1304
1305 default:
1306 if (!URI_FUNC(OnExitOwnPortUserInfo)(state, first, memory)) {
1307 URI_FUNC(StopMalloc)(state, memory);
1308 return NULL;
1309 }
1310 return first;
1311 }
1312 }
1313
1314
1315
1316 /*
1317 * [ownUserInfo]->[pctSubUnres][ownUserInfo]
1318 * [ownUserInfo]-><:>[ownUserInfo]
1319 * [ownUserInfo]-><@>[ownHost]
1320 */
URI_FUNC(ParseOwnUserInfo)1321 static const URI_CHAR * URI_FUNC(ParseOwnUserInfo)(
1322 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1323 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1324 if (first >= afterLast) {
1325 URI_FUNC(StopSyntax)(state, afterLast, memory);
1326 return NULL;
1327 }
1328
1329 switch (*first) {
1330 case _UT('!'):
1331 case _UT('$'):
1332 case _UT('%'):
1333 case _UT('&'):
1334 case _UT('('):
1335 case _UT(')'):
1336 case _UT('-'):
1337 case _UT('*'):
1338 case _UT(','):
1339 case _UT('.'):
1340 case _UT(';'):
1341 case _UT('\''):
1342 case _UT('_'):
1343 case _UT('~'):
1344 case _UT('+'):
1345 case _UT('='):
1346 case URI_SET_DIGIT:
1347 case URI_SET_ALPHA:
1348 {
1349 const URI_CHAR * const afterPctSubUnres
1350 = URI_FUNC(ParsePctSubUnres)(state, first, afterLast, memory);
1351 if (afterPctSubUnres == NULL) {
1352 return NULL;
1353 }
1354 return URI_FUNC(ParseOwnUserInfo)(state, afterPctSubUnres, afterLast, memory);
1355 }
1356
1357 case _UT(':'):
1358 return URI_FUNC(ParseOwnUserInfo)(state, first + 1, afterLast, memory);
1359
1360 case _UT('@'):
1361 /* SURE */
1362 state->uri->userInfo.afterLast = first; /* USERINFO END */
1363 state->uri->hostText.first = first + 1; /* HOST BEGIN */
1364 return URI_FUNC(ParseOwnHost)(state, first + 1, afterLast, memory);
1365
1366 default:
1367 URI_FUNC(StopSyntax)(state, first, memory);
1368 return NULL;
1369 }
1370 }
1371
1372
1373
URI_FUNC(OnExitPartHelperTwo)1374 static URI_INLINE void URI_FUNC(OnExitPartHelperTwo)(URI_TYPE(ParserState) * state) {
1375 state->uri->absolutePath = URI_TRUE;
1376 }
1377
1378
1379
1380 /*
1381 * [partHelperTwo]->[pathAbsNoLeadSlash] // can take <NULL>
1382 * [partHelperTwo]-></>[authority][pathAbsEmpty]
1383 */
URI_FUNC(ParsePartHelperTwo)1384 static URI_INLINE const URI_CHAR * URI_FUNC(ParsePartHelperTwo)(
1385 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1386 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1387 if (first >= afterLast) {
1388 URI_FUNC(OnExitPartHelperTwo)(state);
1389 return afterLast;
1390 }
1391
1392 switch (*first) {
1393 case _UT('/'):
1394 {
1395 const URI_CHAR * const afterAuthority
1396 = URI_FUNC(ParseAuthority)(state, first + 1, afterLast, memory);
1397 const URI_CHAR * afterPathAbsEmpty;
1398 if (afterAuthority == NULL) {
1399 return NULL;
1400 }
1401 afterPathAbsEmpty = URI_FUNC(ParsePathAbsEmpty)(state, afterAuthority, afterLast, memory);
1402
1403 URI_FUNC(FixEmptyTrailSegment)(state->uri, memory);
1404
1405 return afterPathAbsEmpty;
1406 }
1407
1408 default:
1409 URI_FUNC(OnExitPartHelperTwo)(state);
1410 return URI_FUNC(ParsePathAbsNoLeadSlash)(state, first, afterLast, memory);
1411 }
1412 }
1413
1414
1415
1416 /*
1417 * [pathAbsEmpty]-></>[segment][pathAbsEmpty]
1418 * [pathAbsEmpty]-><NULL>
1419 */
URI_FUNC(ParsePathAbsEmpty)1420 static const URI_CHAR * URI_FUNC(ParsePathAbsEmpty)(
1421 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1422 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1423 if (first >= afterLast) {
1424 return afterLast;
1425 }
1426
1427 switch (*first) {
1428 case _UT('/'):
1429 {
1430 const URI_CHAR * const afterSegment
1431 = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory);
1432 if (afterSegment == NULL) {
1433 return NULL;
1434 }
1435 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */
1436 URI_FUNC(StopMalloc)(state, memory);
1437 return NULL;
1438 }
1439 return URI_FUNC(ParsePathAbsEmpty)(state, afterSegment, afterLast, memory);
1440 }
1441
1442 default:
1443 return first;
1444 }
1445 }
1446
1447
1448
1449 /*
1450 * [pathAbsNoLeadSlash]->[segmentNz][zeroMoreSlashSegs]
1451 * [pathAbsNoLeadSlash]-><NULL>
1452 */
URI_FUNC(ParsePathAbsNoLeadSlash)1453 static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathAbsNoLeadSlash)(
1454 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1455 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1456 if (first >= afterLast) {
1457 return afterLast;
1458 }
1459
1460 switch (*first) {
1461 case _UT('!'):
1462 case _UT('$'):
1463 case _UT('%'):
1464 case _UT('&'):
1465 case _UT('('):
1466 case _UT(')'):
1467 case _UT('-'):
1468 case _UT('*'):
1469 case _UT(','):
1470 case _UT('.'):
1471 case _UT(':'):
1472 case _UT(';'):
1473 case _UT('@'):
1474 case _UT('\''):
1475 case _UT('_'):
1476 case _UT('~'):
1477 case _UT('+'):
1478 case _UT('='):
1479 case URI_SET_DIGIT:
1480 case URI_SET_ALPHA:
1481 {
1482 const URI_CHAR * const afterSegmentNz
1483 = URI_FUNC(ParseSegmentNz)(state, first, afterLast, memory);
1484 if (afterSegmentNz == NULL) {
1485 return NULL;
1486 }
1487 if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz, memory)) { /* SEGMENT BOTH */
1488 URI_FUNC(StopMalloc)(state, memory);
1489 return NULL;
1490 }
1491 return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast, memory);
1492 }
1493
1494 default:
1495 return first;
1496 }
1497 }
1498
1499
1500
1501 /*
1502 * [pathRootless]->[segmentNz][zeroMoreSlashSegs]
1503 */
URI_FUNC(ParsePathRootless)1504 static URI_INLINE const URI_CHAR * URI_FUNC(ParsePathRootless)(
1505 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1506 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1507 const URI_CHAR * const afterSegmentNz
1508 = URI_FUNC(ParseSegmentNz)(state, first, afterLast, memory);
1509 if (afterSegmentNz == NULL) {
1510 return NULL;
1511 } else {
1512 if (!URI_FUNC(PushPathSegment)(state, first, afterSegmentNz, memory)) { /* SEGMENT BOTH */
1513 URI_FUNC(StopMalloc)(state, memory);
1514 return NULL;
1515 }
1516 }
1517 return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegmentNz, afterLast, memory);
1518 }
1519
1520
1521
1522 /*
1523 * [pchar]->[pctEncoded]
1524 * [pchar]->[subDelims]
1525 * [pchar]->[unreserved]
1526 * [pchar]-><:>
1527 * [pchar]-><@>
1528 */
URI_FUNC(ParsePchar)1529 static const URI_CHAR * URI_FUNC(ParsePchar)(URI_TYPE(ParserState) * state,
1530 const URI_CHAR * first, const URI_CHAR * afterLast,
1531 UriMemoryManager * memory) {
1532 if (first >= afterLast) {
1533 URI_FUNC(StopSyntax)(state, afterLast, memory);
1534 return NULL;
1535 }
1536
1537 switch (*first) {
1538 case _UT('%'):
1539 return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
1540
1541 case _UT(':'):
1542 case _UT('@'):
1543 case _UT('!'):
1544 case _UT('$'):
1545 case _UT('&'):
1546 case _UT('('):
1547 case _UT(')'):
1548 case _UT('*'):
1549 case _UT(','):
1550 case _UT(';'):
1551 case _UT('\''):
1552 case _UT('+'):
1553 case _UT('='):
1554 case _UT('-'):
1555 case _UT('.'):
1556 case _UT('_'):
1557 case _UT('~'):
1558 case URI_SET_DIGIT:
1559 case URI_SET_ALPHA:
1560 return first + 1;
1561
1562 default:
1563 URI_FUNC(StopSyntax)(state, first, memory);
1564 return NULL;
1565 }
1566 }
1567
1568
1569
1570 /*
1571 * [pctEncoded]-><%>[HEXDIG][HEXDIG]
1572 */
URI_FUNC(ParsePctEncoded)1573 static const URI_CHAR * URI_FUNC(ParsePctEncoded)(
1574 URI_TYPE(ParserState) * state,
1575 const URI_CHAR * first, const URI_CHAR * afterLast,
1576 UriMemoryManager * memory) {
1577 if (first >= afterLast) {
1578 URI_FUNC(StopSyntax)(state, afterLast, memory);
1579 return NULL;
1580 }
1581
1582 /*
1583 First character has already been
1584 checked before entering this rule.
1585
1586 switch (*first) {
1587 case _UT('%'):
1588 */
1589 if (first + 1 >= afterLast) {
1590 URI_FUNC(StopSyntax)(state, afterLast, memory);
1591 return NULL;
1592 }
1593
1594 switch (first[1]) {
1595 case URI_SET_HEXDIG:
1596 if (first + 2 >= afterLast) {
1597 URI_FUNC(StopSyntax)(state, afterLast, memory);
1598 return NULL;
1599 }
1600
1601 switch (first[2]) {
1602 case URI_SET_HEXDIG:
1603 return first + 3;
1604
1605 default:
1606 URI_FUNC(StopSyntax)(state, first + 2, memory);
1607 return NULL;
1608 }
1609
1610 default:
1611 URI_FUNC(StopSyntax)(state, first + 1, memory);
1612 return NULL;
1613 }
1614
1615 /*
1616 default:
1617 URI_FUNC(StopSyntax)(state, first, memory);
1618 return NULL;
1619 }
1620 */
1621 }
1622
1623
1624
1625 /*
1626 * [pctSubUnres]->[pctEncoded]
1627 * [pctSubUnres]->[subDelims]
1628 * [pctSubUnres]->[unreserved]
1629 */
URI_FUNC(ParsePctSubUnres)1630 static const URI_CHAR * URI_FUNC(ParsePctSubUnres)(
1631 URI_TYPE(ParserState) * state,
1632 const URI_CHAR * first, const URI_CHAR * afterLast,
1633 UriMemoryManager * memory) {
1634 if (first >= afterLast) {
1635 URI_FUNC(StopSyntax)(state, afterLast, memory);
1636 return NULL;
1637 }
1638
1639 switch (*first) {
1640 case _UT('%'):
1641 return URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
1642
1643 case _UT('!'):
1644 case _UT('$'):
1645 case _UT('&'):
1646 case _UT('('):
1647 case _UT(')'):
1648 case _UT('*'):
1649 case _UT(','):
1650 case _UT(';'):
1651 case _UT('\''):
1652 case _UT('+'):
1653 case _UT('='):
1654 case _UT('-'):
1655 case _UT('.'):
1656 case _UT('_'):
1657 case _UT('~'):
1658 case URI_SET_DIGIT:
1659 case URI_SET_ALPHA:
1660 return first + 1;
1661
1662 default:
1663 URI_FUNC(StopSyntax)(state, first, memory);
1664 return NULL;
1665 }
1666 }
1667
1668
1669
1670 /*
1671 * [port]->[DIGIT][port]
1672 * [port]-><NULL>
1673 */
URI_FUNC(ParsePort)1674 static const URI_CHAR * URI_FUNC(ParsePort)(URI_TYPE(ParserState) * state, const URI_CHAR * first, const URI_CHAR * afterLast) {
1675 if (first >= afterLast) {
1676 return afterLast;
1677 }
1678
1679 switch (*first) {
1680 case URI_SET_DIGIT:
1681 return URI_FUNC(ParsePort)(state, first + 1, afterLast);
1682
1683 default:
1684 return first;
1685 }
1686 }
1687
1688
1689
1690 /*
1691 * [queryFrag]->[pchar][queryFrag]
1692 * [queryFrag]-></>[queryFrag]
1693 * [queryFrag]-><?>[queryFrag]
1694 * [queryFrag]-><NULL>
1695 */
URI_FUNC(ParseQueryFrag)1696 static const URI_CHAR * URI_FUNC(ParseQueryFrag)(URI_TYPE(ParserState) * state,
1697 const URI_CHAR * first, const URI_CHAR * afterLast,
1698 UriMemoryManager * memory) {
1699 if (first >= afterLast) {
1700 return afterLast;
1701 }
1702
1703 switch (*first) {
1704 case _UT('!'):
1705 case _UT('$'):
1706 case _UT('%'):
1707 case _UT('&'):
1708 case _UT('('):
1709 case _UT(')'):
1710 case _UT('-'):
1711 case _UT('*'):
1712 case _UT(','):
1713 case _UT('.'):
1714 case _UT(':'):
1715 case _UT(';'):
1716 case _UT('@'):
1717 case _UT('\''):
1718 case _UT('_'):
1719 case _UT('~'):
1720 case _UT('+'):
1721 case _UT('='):
1722 case URI_SET_DIGIT:
1723 case URI_SET_ALPHA:
1724 {
1725 const URI_CHAR * const afterPchar
1726 = URI_FUNC(ParsePchar)(state, first, afterLast, memory);
1727 if (afterPchar == NULL) {
1728 return NULL;
1729 }
1730 return URI_FUNC(ParseQueryFrag)(state, afterPchar, afterLast, memory);
1731 }
1732
1733 case _UT('/'):
1734 case _UT('?'):
1735 return URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory);
1736
1737 default:
1738 return first;
1739 }
1740 }
1741
1742
1743
1744 /*
1745 * [segment]->[pchar][segment]
1746 * [segment]-><NULL>
1747 */
URI_FUNC(ParseSegment)1748 static const URI_CHAR * URI_FUNC(ParseSegment)(URI_TYPE(ParserState) * state,
1749 const URI_CHAR * first, const URI_CHAR * afterLast,
1750 UriMemoryManager * memory) {
1751 if (first >= afterLast) {
1752 return afterLast;
1753 }
1754
1755 switch (*first) {
1756 case _UT('!'):
1757 case _UT('$'):
1758 case _UT('%'):
1759 case _UT('&'):
1760 case _UT('('):
1761 case _UT(')'):
1762 case _UT('-'):
1763 case _UT('*'):
1764 case _UT(','):
1765 case _UT('.'):
1766 case _UT(':'):
1767 case _UT(';'):
1768 case _UT('@'):
1769 case _UT('\''):
1770 case _UT('_'):
1771 case _UT('~'):
1772 case _UT('+'):
1773 case _UT('='):
1774 case URI_SET_DIGIT:
1775 case URI_SET_ALPHA:
1776 {
1777 const URI_CHAR * const afterPchar
1778 = URI_FUNC(ParsePchar)(state, first, afterLast, memory);
1779 if (afterPchar == NULL) {
1780 return NULL;
1781 }
1782 return URI_FUNC(ParseSegment)(state, afterPchar, afterLast, memory);
1783 }
1784
1785 default:
1786 return first;
1787 }
1788 }
1789
1790
1791
1792 /*
1793 * [segmentNz]->[pchar][segment]
1794 */
URI_FUNC(ParseSegmentNz)1795 static URI_INLINE const URI_CHAR * URI_FUNC(ParseSegmentNz)(
1796 URI_TYPE(ParserState) * state,
1797 const URI_CHAR * first, const URI_CHAR * afterLast,
1798 UriMemoryManager * memory) {
1799 const URI_CHAR * const afterPchar
1800 = URI_FUNC(ParsePchar)(state, first, afterLast, memory);
1801 if (afterPchar == NULL) {
1802 return NULL;
1803 }
1804 return URI_FUNC(ParseSegment)(state, afterPchar, afterLast, memory);
1805 }
1806
1807
1808
URI_FUNC(OnExitSegmentNzNcOrScheme2)1809 static URI_INLINE UriBool URI_FUNC(OnExitSegmentNzNcOrScheme2)(
1810 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1811 UriMemoryManager * memory) {
1812 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */
1813 return URI_FALSE; /* Raises malloc error*/
1814 }
1815 state->uri->scheme.first = NULL; /* Not a scheme, reset */
1816 return URI_TRUE; /* Success */
1817 }
1818
1819
1820
1821 /*
1822 * [segmentNzNcOrScheme2]->[ALPHA][segmentNzNcOrScheme2]
1823 * [segmentNzNcOrScheme2]->[DIGIT][segmentNzNcOrScheme2]
1824 * [segmentNzNcOrScheme2]->[pctEncoded][mustBeSegmentNzNc]
1825 * [segmentNzNcOrScheme2]->[uriTail] // can take <NULL>
1826 * [segmentNzNcOrScheme2]-><!>[mustBeSegmentNzNc]
1827 * [segmentNzNcOrScheme2]-><$>[mustBeSegmentNzNc]
1828 * [segmentNzNcOrScheme2]-><&>[mustBeSegmentNzNc]
1829 * [segmentNzNcOrScheme2]-><(>[mustBeSegmentNzNc]
1830 * [segmentNzNcOrScheme2]-><)>[mustBeSegmentNzNc]
1831 * [segmentNzNcOrScheme2]-><*>[mustBeSegmentNzNc]
1832 * [segmentNzNcOrScheme2]-><,>[mustBeSegmentNzNc]
1833 * [segmentNzNcOrScheme2]-><.>[segmentNzNcOrScheme2]
1834 * [segmentNzNcOrScheme2]-></>[segment][zeroMoreSlashSegs][uriTail]
1835 * [segmentNzNcOrScheme2]-><:>[hierPart][uriTail]
1836 * [segmentNzNcOrScheme2]-><;>[mustBeSegmentNzNc]
1837 * [segmentNzNcOrScheme2]-><@>[mustBeSegmentNzNc]
1838 * [segmentNzNcOrScheme2]-><_>[mustBeSegmentNzNc]
1839 * [segmentNzNcOrScheme2]-><~>[mustBeSegmentNzNc]
1840 * [segmentNzNcOrScheme2]-><+>[segmentNzNcOrScheme2]
1841 * [segmentNzNcOrScheme2]-><=>[mustBeSegmentNzNc]
1842 * [segmentNzNcOrScheme2]-><'>[mustBeSegmentNzNc]
1843 * [segmentNzNcOrScheme2]-><->[segmentNzNcOrScheme2]
1844 */
URI_FUNC(ParseSegmentNzNcOrScheme2)1845 static const URI_CHAR * URI_FUNC(ParseSegmentNzNcOrScheme2)(
1846 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1847 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1848 if (first >= afterLast) {
1849 if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first, memory)) {
1850 URI_FUNC(StopMalloc)(state, memory);
1851 return NULL;
1852 }
1853 return afterLast;
1854 }
1855
1856 switch (*first) {
1857 case _UT('.'):
1858 case _UT('+'):
1859 case _UT('-'):
1860 case URI_SET_ALPHA:
1861 case URI_SET_DIGIT:
1862 return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory);
1863
1864 case _UT('%'):
1865 {
1866 const URI_CHAR * const afterPctEncoded
1867 = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
1868 if (afterPctEncoded == NULL) {
1869 return NULL;
1870 }
1871 return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast, memory);
1872 }
1873
1874 case _UT('!'):
1875 case _UT('$'):
1876 case _UT('&'):
1877 case _UT('('):
1878 case _UT(')'):
1879 case _UT('*'):
1880 case _UT(','):
1881 case _UT(';'):
1882 case _UT('@'):
1883 case _UT('_'):
1884 case _UT('~'):
1885 case _UT('='):
1886 case _UT('\''):
1887 return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory);
1888
1889 case _UT('/'):
1890 {
1891 const URI_CHAR * afterZeroMoreSlashSegs;
1892 const URI_CHAR * const afterSegment
1893 = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory);
1894 if (afterSegment == NULL) {
1895 return NULL;
1896 }
1897 if (!URI_FUNC(PushPathSegment)(state, state->uri->scheme.first, first, memory)) { /* SEGMENT BOTH */
1898 URI_FUNC(StopMalloc)(state, memory);
1899 return NULL;
1900 }
1901 state->uri->scheme.first = NULL; /* Not a scheme, reset */
1902 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */
1903 URI_FUNC(StopMalloc)(state, memory);
1904 return NULL;
1905 }
1906 afterZeroMoreSlashSegs
1907 = URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast, memory);
1908 if (afterZeroMoreSlashSegs == NULL) {
1909 return NULL;
1910 }
1911 return URI_FUNC(ParseUriTail)(state, afterZeroMoreSlashSegs, afterLast, memory);
1912 }
1913
1914 case _UT(':'):
1915 {
1916 const URI_CHAR * const afterHierPart
1917 = URI_FUNC(ParseHierPart)(state, first + 1, afterLast, memory);
1918 state->uri->scheme.afterLast = first; /* SCHEME END */
1919 if (afterHierPart == NULL) {
1920 return NULL;
1921 }
1922 return URI_FUNC(ParseUriTail)(state, afterHierPart, afterLast, memory);
1923 }
1924
1925 default:
1926 if (!URI_FUNC(OnExitSegmentNzNcOrScheme2)(state, first, memory)) {
1927 URI_FUNC(StopMalloc)(state, memory);
1928 return NULL;
1929 }
1930 return URI_FUNC(ParseUriTail)(state, first, afterLast, memory);
1931 }
1932 }
1933
1934
1935
1936 /*
1937 * [uriReference]->[ALPHA][segmentNzNcOrScheme2]
1938 * [uriReference]->[DIGIT][mustBeSegmentNzNc]
1939 * [uriReference]->[pctEncoded][mustBeSegmentNzNc]
1940 * [uriReference]->[subDelims][mustBeSegmentNzNc]
1941 * [uriReference]->[uriTail] // can take <NULL>
1942 * [uriReference]-><.>[mustBeSegmentNzNc]
1943 * [uriReference]-></>[partHelperTwo][uriTail]
1944 * [uriReference]-><@>[mustBeSegmentNzNc]
1945 * [uriReference]-><_>[mustBeSegmentNzNc]
1946 * [uriReference]-><~>[mustBeSegmentNzNc]
1947 * [uriReference]-><->[mustBeSegmentNzNc]
1948 */
URI_FUNC(ParseUriReference)1949 static const URI_CHAR * URI_FUNC(ParseUriReference)(
1950 URI_TYPE(ParserState) * state, const URI_CHAR * first,
1951 const URI_CHAR * afterLast, UriMemoryManager * memory) {
1952 if (first >= afterLast) {
1953 return afterLast;
1954 }
1955
1956 switch (*first) {
1957 case URI_SET_ALPHA:
1958 state->uri->scheme.first = first; /* SCHEME BEGIN */
1959 return URI_FUNC(ParseSegmentNzNcOrScheme2)(state, first + 1, afterLast, memory);
1960
1961 case URI_SET_DIGIT:
1962 case _UT('!'):
1963 case _UT('$'):
1964 case _UT('&'):
1965 case _UT('('):
1966 case _UT(')'):
1967 case _UT('*'):
1968 case _UT(','):
1969 case _UT(';'):
1970 case _UT('\''):
1971 case _UT('+'):
1972 case _UT('='):
1973 case _UT('.'):
1974 case _UT('_'):
1975 case _UT('~'):
1976 case _UT('-'):
1977 case _UT('@'):
1978 state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */
1979 return URI_FUNC(ParseMustBeSegmentNzNc)(state, first + 1, afterLast, memory);
1980
1981 case _UT('%'):
1982 {
1983 const URI_CHAR * const afterPctEncoded
1984 = URI_FUNC(ParsePctEncoded)(state, first, afterLast, memory);
1985 if (afterPctEncoded == NULL) {
1986 return NULL;
1987 }
1988 state->uri->scheme.first = first; /* SEGMENT BEGIN, ABUSE SCHEME POINTER */
1989 return URI_FUNC(ParseMustBeSegmentNzNc)(state, afterPctEncoded, afterLast, memory);
1990 }
1991
1992 case _UT('/'):
1993 {
1994 const URI_CHAR * const afterPartHelperTwo
1995 = URI_FUNC(ParsePartHelperTwo)(state, first + 1, afterLast, memory);
1996 if (afterPartHelperTwo == NULL) {
1997 return NULL;
1998 }
1999 return URI_FUNC(ParseUriTail)(state, afterPartHelperTwo, afterLast, memory);
2000 }
2001
2002 default:
2003 return URI_FUNC(ParseUriTail)(state, first, afterLast, memory);
2004 }
2005 }
2006
2007
2008
2009 /*
2010 * [uriTail]-><#>[queryFrag]
2011 * [uriTail]-><?>[queryFrag][uriTailTwo]
2012 * [uriTail]-><NULL>
2013 */
URI_FUNC(ParseUriTail)2014 static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTail)(
2015 URI_TYPE(ParserState) * state,
2016 const URI_CHAR * first, const URI_CHAR * afterLast,
2017 UriMemoryManager * memory) {
2018 if (first >= afterLast) {
2019 return afterLast;
2020 }
2021
2022 switch (*first) {
2023 case _UT('#'):
2024 {
2025 const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory);
2026 if (afterQueryFrag == NULL) {
2027 return NULL;
2028 }
2029 state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */
2030 state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */
2031 return afterQueryFrag;
2032 }
2033
2034 case _UT('?'):
2035 {
2036 const URI_CHAR * const afterQueryFrag
2037 = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory);
2038 if (afterQueryFrag == NULL) {
2039 return NULL;
2040 }
2041 state->uri->query.first = first + 1; /* QUERY BEGIN */
2042 state->uri->query.afterLast = afterQueryFrag; /* QUERY END */
2043 return URI_FUNC(ParseUriTailTwo)(state, afterQueryFrag, afterLast, memory);
2044 }
2045
2046 default:
2047 return first;
2048 }
2049 }
2050
2051
2052
2053 /*
2054 * [uriTailTwo]-><#>[queryFrag]
2055 * [uriTailTwo]-><NULL>
2056 */
URI_FUNC(ParseUriTailTwo)2057 static URI_INLINE const URI_CHAR * URI_FUNC(ParseUriTailTwo)(
2058 URI_TYPE(ParserState) * state,
2059 const URI_CHAR * first, const URI_CHAR * afterLast,
2060 UriMemoryManager * memory) {
2061 if (first >= afterLast) {
2062 return afterLast;
2063 }
2064
2065 switch (*first) {
2066 case _UT('#'):
2067 {
2068 const URI_CHAR * const afterQueryFrag = URI_FUNC(ParseQueryFrag)(state, first + 1, afterLast, memory);
2069 if (afterQueryFrag == NULL) {
2070 return NULL;
2071 }
2072 state->uri->fragment.first = first + 1; /* FRAGMENT BEGIN */
2073 state->uri->fragment.afterLast = afterQueryFrag; /* FRAGMENT END */
2074 return afterQueryFrag;
2075 }
2076
2077 default:
2078 return first;
2079 }
2080 }
2081
2082
2083
2084 /*
2085 * [zeroMoreSlashSegs]-></>[segment][zeroMoreSlashSegs]
2086 * [zeroMoreSlashSegs]-><NULL>
2087 */
URI_FUNC(ParseZeroMoreSlashSegs)2088 static const URI_CHAR * URI_FUNC(ParseZeroMoreSlashSegs)(
2089 URI_TYPE(ParserState) * state, const URI_CHAR * first,
2090 const URI_CHAR * afterLast, UriMemoryManager * memory) {
2091 if (first >= afterLast) {
2092 return afterLast;
2093 }
2094
2095 switch (*first) {
2096 case _UT('/'):
2097 {
2098 const URI_CHAR * const afterSegment
2099 = URI_FUNC(ParseSegment)(state, first + 1, afterLast, memory);
2100 if (afterSegment == NULL) {
2101 return NULL;
2102 }
2103 if (!URI_FUNC(PushPathSegment)(state, first + 1, afterSegment, memory)) { /* SEGMENT BOTH */
2104 URI_FUNC(StopMalloc)(state, memory);
2105 return NULL;
2106 }
2107 return URI_FUNC(ParseZeroMoreSlashSegs)(state, afterSegment, afterLast, memory);
2108 }
2109
2110 default:
2111 return first;
2112 }
2113 }
2114
2115
2116
URI_FUNC(ResetParserStateExceptUri)2117 static URI_INLINE void URI_FUNC(ResetParserStateExceptUri)(URI_TYPE(ParserState) * state) {
2118 URI_TYPE(Uri) * const uriBackup = state->uri;
2119 memset(state, 0, sizeof(URI_TYPE(ParserState)));
2120 state->uri = uriBackup;
2121 }
2122
2123
2124
URI_FUNC(PushPathSegment)2125 static URI_INLINE UriBool URI_FUNC(PushPathSegment)(
2126 URI_TYPE(ParserState) * state, const URI_CHAR * first,
2127 const URI_CHAR * afterLast, UriMemoryManager * memory) {
2128 URI_TYPE(PathSegment) * segment = memory->calloc(memory, 1, sizeof(URI_TYPE(PathSegment)));
2129 if (segment == NULL) {
2130 return URI_FALSE; /* Raises malloc error */
2131 }
2132 if (first == afterLast) {
2133 segment->text.first = URI_FUNC(SafeToPointTo);
2134 segment->text.afterLast = URI_FUNC(SafeToPointTo);
2135 } else {
2136 segment->text.first = first;
2137 segment->text.afterLast = afterLast;
2138 }
2139
2140 /* First segment ever? */
2141 if (state->uri->pathHead == NULL) {
2142 /* First segment ever, set head and tail */
2143 state->uri->pathHead = segment;
2144 state->uri->pathTail = segment;
2145 } else {
2146 /* Append, update tail */
2147 state->uri->pathTail->next = segment;
2148 state->uri->pathTail = segment;
2149 }
2150
2151 return URI_TRUE; /* Success */
2152 }
2153
2154
2155
URI_FUNC(ParseUriEx)2156 int URI_FUNC(ParseUriEx)(URI_TYPE(ParserState) * state,
2157 const URI_CHAR * first, const URI_CHAR * afterLast) {
2158 return URI_FUNC(ParseUriExMm)(state, first, afterLast, NULL);
2159 }
2160
2161
2162
URI_FUNC(ParseUriExMm)2163 static int URI_FUNC(ParseUriExMm)(URI_TYPE(ParserState) * state,
2164 const URI_CHAR * first, const URI_CHAR * afterLast,
2165 UriMemoryManager * memory) {
2166 const URI_CHAR * afterUriReference;
2167 URI_TYPE(Uri) * uri;
2168
2169 /* Check params */
2170 if ((state == NULL) || (first == NULL) || (afterLast == NULL)) {
2171 return URI_ERROR_NULL;
2172 }
2173 URI_CHECK_MEMORY_MANAGER(memory); /* may return */
2174
2175 uri = state->uri;
2176
2177 /* Init parser */
2178 URI_FUNC(ResetParserStateExceptUri)(state);
2179 URI_FUNC(ResetUri)(uri);
2180
2181 /* Parse */
2182 afterUriReference = URI_FUNC(ParseUriReference)(state, first, afterLast, memory);
2183 if (afterUriReference == NULL) {
2184 /* Waterproof errorPos <= afterLast */
2185 if (state->errorPos && (state->errorPos > afterLast)) {
2186 state->errorPos = afterLast;
2187 }
2188 return state->errorCode;
2189 }
2190 if (afterUriReference != afterLast) {
2191 if (afterUriReference < afterLast) {
2192 URI_FUNC(StopSyntax)(state, afterUriReference, memory);
2193 } else {
2194 URI_FUNC(StopSyntax)(state, afterLast, memory);
2195 }
2196 return state->errorCode;
2197 }
2198 return URI_SUCCESS;
2199 }
2200
2201
2202
URI_FUNC(ParseUri)2203 int URI_FUNC(ParseUri)(URI_TYPE(ParserState) * state, const URI_CHAR * text) {
2204 if ((state == NULL) || (text == NULL)) {
2205 return URI_ERROR_NULL;
2206 }
2207 return URI_FUNC(ParseUriEx)(state, text, text + URI_STRLEN(text));
2208 }
2209
2210
2211
URI_FUNC(ParseSingleUri)2212 int URI_FUNC(ParseSingleUri)(URI_TYPE(Uri) * uri, const URI_CHAR * text,
2213 const URI_CHAR ** errorPos) {
2214 return URI_FUNC(ParseSingleUriEx)(uri, text, NULL, errorPos);
2215 }
2216
2217
2218
URI_FUNC(ParseSingleUriEx)2219 int URI_FUNC(ParseSingleUriEx)(URI_TYPE(Uri) * uri,
2220 const URI_CHAR * first, const URI_CHAR * afterLast,
2221 const URI_CHAR ** errorPos) {
2222 if ((afterLast == NULL) && (first != NULL)) {
2223 afterLast = first + URI_STRLEN(first);
2224 }
2225 return URI_FUNC(ParseSingleUriExMm)(uri, first, afterLast, errorPos, NULL);
2226 }
2227
2228
2229
URI_FUNC(ParseSingleUriExMm)2230 int URI_FUNC(ParseSingleUriExMm)(URI_TYPE(Uri) * uri,
2231 const URI_CHAR * first, const URI_CHAR * afterLast,
2232 const URI_CHAR ** errorPos, UriMemoryManager * memory) {
2233 URI_TYPE(ParserState) state;
2234 int res;
2235
2236 /* Check params */
2237 if ((uri == NULL) || (first == NULL) || (afterLast == NULL)) {
2238 return URI_ERROR_NULL;
2239 }
2240 URI_CHECK_MEMORY_MANAGER(memory); /* may return */
2241
2242 state.uri = uri;
2243
2244 res = URI_FUNC(ParseUriExMm)(&state, first, afterLast, memory);
2245
2246 if (res != URI_SUCCESS) {
2247 if (errorPos != NULL) {
2248 *errorPos = state.errorPos;
2249 }
2250 URI_FUNC(FreeUriMembersMm)(uri, memory);
2251 }
2252
2253 return res;
2254 }
2255
2256
2257
URI_FUNC(FreeUriMembers)2258 void URI_FUNC(FreeUriMembers)(URI_TYPE(Uri) * uri) {
2259 URI_FUNC(FreeUriMembersMm)(uri, NULL);
2260 }
2261
2262
2263
URI_FUNC(FreeUriMembersMm)2264 int URI_FUNC(FreeUriMembersMm)(URI_TYPE(Uri) * uri, UriMemoryManager * memory) {
2265 if (uri == NULL) {
2266 return URI_ERROR_NULL;
2267 }
2268
2269 URI_CHECK_MEMORY_MANAGER(memory); /* may return */
2270
2271 if (uri->owner) {
2272 /* Scheme */
2273 if (uri->scheme.first != NULL) {
2274 if (uri->scheme.first != uri->scheme.afterLast) {
2275 memory->free(memory, (URI_CHAR *)uri->scheme.first);
2276 }
2277 uri->scheme.first = NULL;
2278 uri->scheme.afterLast = NULL;
2279 }
2280
2281 /* User info */
2282 if (uri->userInfo.first != NULL) {
2283 if (uri->userInfo.first != uri->userInfo.afterLast) {
2284 memory->free(memory, (URI_CHAR *)uri->userInfo.first);
2285 }
2286 uri->userInfo.first = NULL;
2287 uri->userInfo.afterLast = NULL;
2288 }
2289
2290 /* Host data - IPvFuture (may affect host text) */
2291 if (uri->hostData.ipFuture.first != NULL) {
2292 /* NOTE: .hostData.ipFuture may hold the very same range pointers
2293 * as .hostText; then we need to prevent freeing memory twice. */
2294 if (uri->hostText.first == uri->hostData.ipFuture.first) {
2295 uri->hostText.first = NULL;
2296 uri->hostText.afterLast = NULL;
2297 }
2298
2299 if (uri->hostData.ipFuture.first != uri->hostData.ipFuture.afterLast) {
2300 memory->free(memory, (URI_CHAR *)uri->hostData.ipFuture.first);
2301 }
2302 uri->hostData.ipFuture.first = NULL;
2303 uri->hostData.ipFuture.afterLast = NULL;
2304 }
2305
2306 /* Host text (after IPvFuture, see above) */
2307 if (uri->hostText.first != NULL) {
2308 if (uri->hostText.first != uri->hostText.afterLast) {
2309 memory->free(memory, (URI_CHAR *)uri->hostText.first);
2310 }
2311 uri->hostText.first = NULL;
2312 uri->hostText.afterLast = NULL;
2313 }
2314 }
2315
2316 /* Host data - IPv4 */
2317 if (uri->hostData.ip4 != NULL) {
2318 memory->free(memory, uri->hostData.ip4);
2319 uri->hostData.ip4 = NULL;
2320 }
2321
2322 /* Host data - IPv6 */
2323 if (uri->hostData.ip6 != NULL) {
2324 memory->free(memory, uri->hostData.ip6);
2325 uri->hostData.ip6 = NULL;
2326 }
2327
2328 /* Port text */
2329 if (uri->owner && (uri->portText.first != NULL)) {
2330 if (uri->portText.first != uri->portText.afterLast) {
2331 memory->free(memory, (URI_CHAR *)uri->portText.first);
2332 }
2333 uri->portText.first = NULL;
2334 uri->portText.afterLast = NULL;
2335 }
2336
2337 /* Path */
2338 if (uri->pathHead != NULL) {
2339 URI_TYPE(PathSegment) * segWalk = uri->pathHead;
2340 while (segWalk != NULL) {
2341 URI_TYPE(PathSegment) * const next = segWalk->next;
2342 if (uri->owner && (segWalk->text.first != NULL)
2343 && (segWalk->text.first < segWalk->text.afterLast)) {
2344 memory->free(memory, (URI_CHAR *)segWalk->text.first);
2345 }
2346 memory->free(memory, segWalk);
2347 segWalk = next;
2348 }
2349 uri->pathHead = NULL;
2350 uri->pathTail = NULL;
2351 }
2352
2353 if (uri->owner) {
2354 /* Query */
2355 if (uri->query.first != NULL) {
2356 if (uri->query.first != uri->query.afterLast) {
2357 memory->free(memory, (URI_CHAR *)uri->query.first);
2358 }
2359 uri->query.first = NULL;
2360 uri->query.afterLast = NULL;
2361 }
2362
2363 /* Fragment */
2364 if (uri->fragment.first != NULL) {
2365 if (uri->fragment.first != uri->fragment.afterLast) {
2366 memory->free(memory, (URI_CHAR *)uri->fragment.first);
2367 }
2368 uri->fragment.first = NULL;
2369 uri->fragment.afterLast = NULL;
2370 }
2371 }
2372
2373 return URI_SUCCESS;
2374 }
2375
2376
2377
URI_FUNC(_TESTING_ONLY_ParseIpSix)2378 UriBool URI_FUNC(_TESTING_ONLY_ParseIpSix)(const URI_CHAR * text) {
2379 UriMemoryManager * const memory = &defaultMemoryManager;
2380 URI_TYPE(Uri) uri;
2381 URI_TYPE(ParserState) parser;
2382 const URI_CHAR * const afterIpSix = text + URI_STRLEN(text);
2383 const URI_CHAR * res;
2384
2385 URI_FUNC(ResetUri)(&uri);
2386 parser.uri = &uri;
2387 URI_FUNC(ResetParserStateExceptUri)(&parser);
2388 parser.uri->hostData.ip6 = memory->malloc(memory, 1 * sizeof(UriIp6));
2389 res = URI_FUNC(ParseIPv6address2)(&parser, text, afterIpSix, memory);
2390 URI_FUNC(FreeUriMembersMm)(&uri, memory);
2391 return res == afterIpSix ? URI_TRUE : URI_FALSE;
2392 }
2393
2394
2395
URI_FUNC(_TESTING_ONLY_ParseIpFour)2396 UriBool URI_FUNC(_TESTING_ONLY_ParseIpFour)(const URI_CHAR * text) {
2397 unsigned char octets[4];
2398 int res = URI_FUNC(ParseIpFourAddress)(octets, text, text + URI_STRLEN(text));
2399 return (res == URI_SUCCESS) ? URI_TRUE : URI_FALSE;
2400 }
2401
2402
2403
2404 #undef URI_SET_DIGIT
2405 #undef URI_SET_HEX_LETTER_UPPER
2406 #undef URI_SET_HEX_LETTER_LOWER
2407 #undef URI_SET_HEXDIG
2408 #undef URI_SET_ALPHA
2409
2410
2411
2412 #endif
2413