xref: /qemu/util/uri.c (revision 7c0dfcf9)
1 /**
2  * uri.c: set of generic URI related routines
3  *
4  * Reference: RFCs 3986, 2732 and 2373
5  *
6  * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a copy
9  * of this software and associated documentation files (the "Software"), to deal
10  * in the Software without restriction, including without limitation the rights
11  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12  * copies of the Software, and to permit persons to whom the Software is
13  * furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice shall be included in
16  * all copies or substantial portions of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
21  * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
22  * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24  *
25  * Except as contained in this notice, the name of Daniel Veillard shall not
26  * be used in advertising or otherwise to promote the sale, use or other
27  * dealings in this Software without prior written authorization from him.
28  *
29  * daniel@veillard.com
30  *
31  **
32  *
33  * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
34  *
35  * This library is free software; you can redistribute it and/or
36  * modify it under the terms of the GNU Lesser General Public
37  * License as published by the Free Software Foundation; either
38  * version 2.1 of the License, or (at your option) any later version.
39  *
40  * This library is distributed in the hope that it will be useful,
41  * but WITHOUT ANY WARRANTY; without even the implied warranty of
42  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43  * Lesser General Public License for more details.
44  *
45  * You should have received a copy of the GNU Lesser General Public
46  * License along with this library. If not, see <https://www.gnu.org/licenses/>.
47  *
48  * Authors:
49  *    Richard W.M. Jones <rjones@redhat.com>
50  *
51  */
52 
53 #include "qemu/osdep.h"
54 #include "qemu/cutils.h"
55 
56 #include "qemu/uri.h"
57 
58 static void uri_clean(URI *uri);
59 
60 /*
61  * Old rule from 2396 used in legacy handling code
62  * alpha    = lowalpha | upalpha
63  */
64 #define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
65 
66 /*
67  * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
68  *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
69  *            "u" | "v" | "w" | "x" | "y" | "z"
70  */
71 
72 #define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
73 
74 /*
75  * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
76  *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
77  *           "U" | "V" | "W" | "X" | "Y" | "Z"
78  */
79 #define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
80 
81 #ifdef IS_DIGIT
82 #undef IS_DIGIT
83 #endif
84 /*
85  * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
86  */
87 #define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
88 
89 /*
90  * alphanum = alpha | digit
91  */
92 
93 #define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
94 
95 /*
96  * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
97  */
98 
99 #define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||            \
100     ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||           \
101     ((x) == '(') || ((x) == ')'))
102 
103 /*
104  * unwise = "{" | "}" | "|" | "\" | "^" | "`"
105  */
106 
107 #define IS_UNWISE(p)                                                           \
108     (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||                  \
109      ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||                 \
110      ((*(p) == ']')) || ((*(p) == '`')))
111 /*
112  * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
113  *            "[" | "]"
114  */
115 
116 #define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') ||        \
117     ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') ||            \
118     ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') ||            \
119     ((x) == ']'))
120 
121 /*
122  * unreserved = alphanum | mark
123  */
124 
125 #define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
126 
127 /*
128  * Skip to next pointer char, handle escaped sequences
129  */
130 
131 #define NEXT(p) ((*p == '%') ? p += 3 : p++)
132 
133 /*
134  * Productions from the spec.
135  *
136  *    authority     = server | reg_name
137  *    reg_name      = 1*( unreserved | escaped | "$" | "," |
138  *                        ";" | ":" | "@" | "&" | "=" | "+" )
139  *
140  * path          = [ abs_path | opaque_part ]
141  */
142 
143 /************************************************************************
144  *                                                                      *
145  *                         RFC 3986 parser                              *
146  *                                                                      *
147  ************************************************************************/
148 
149 #define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
150 #define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||                      \
151                       ((*(p) >= 'A') && (*(p) <= 'Z')))
152 #define ISA_HEXDIG(p)                                                          \
153     (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||                       \
154      ((*(p) >= 'A') && (*(p) <= 'F')))
155 
156 /*
157  *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
158  *                     / "*" / "+" / "," / ";" / "="
159  */
160 #define ISA_SUB_DELIM(p)                                                       \
161     (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||                  \
162      ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||                  \
163      ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||                  \
164      ((*(p) == '=')) || ((*(p) == '\'')))
165 
166 /*
167  *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
168  */
169 #define ISA_UNRESERVED(p)                                                      \
170     ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||                    \
171      ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
172 
173 /*
174  *    pct-encoded   = "%" HEXDIG HEXDIG
175  */
176 #define ISA_PCT_ENCODED(p)                                                     \
177     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
178 
179 /*
180  *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
181  */
182 #define ISA_PCHAR(p)                                                           \
183     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||            \
184      ((*(p) == ':')) || ((*(p) == '@')))
185 
186 /**
187  * rfc3986_parse_scheme:
188  * @uri:  pointer to an URI structure
189  * @str:  pointer to the string to analyze
190  *
191  * Parse an URI scheme
192  *
193  * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
194  *
195  * Returns 0 or the error code
196  */
197 static int rfc3986_parse_scheme(URI *uri, const char **str)
198 {
199     const char *cur;
200 
201     if (str == NULL) {
202         return -1;
203     }
204 
205     cur = *str;
206     if (!ISA_ALPHA(cur)) {
207         return 2;
208     }
209     cur++;
210     while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || (*cur == '+') || (*cur == '-') ||
211            (*cur == '.')) {
212         cur++;
213     }
214     if (uri != NULL) {
215         g_free(uri->scheme);
216         uri->scheme = g_strndup(*str, cur - *str);
217     }
218     *str = cur;
219     return 0;
220 }
221 
222 /**
223  * rfc3986_parse_fragment:
224  * @uri:  pointer to an URI structure
225  * @str:  pointer to the string to analyze
226  *
227  * Parse the query part of an URI
228  *
229  * fragment      = *( pchar / "/" / "?" )
230  * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
231  *       in the fragment identifier but this is used very broadly for
232  *       xpointer scheme selection, so we are allowing it here to not break
233  *       for example all the DocBook processing chains.
234  *
235  * Returns 0 or the error code
236  */
237 static int rfc3986_parse_fragment(URI *uri, const char **str)
238 {
239     const char *cur;
240 
241     if (str == NULL) {
242         return -1;
243     }
244 
245     cur = *str;
246 
247     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
248            (*cur == '[') || (*cur == ']') ||
249            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
250         NEXT(cur);
251     }
252     if (uri != NULL) {
253         g_free(uri->fragment);
254         if (uri->cleanup & 2) {
255             uri->fragment = g_strndup(*str, cur - *str);
256         } else {
257             uri->fragment = g_uri_unescape_segment(*str, cur, NULL);
258         }
259     }
260     *str = cur;
261     return 0;
262 }
263 
264 /**
265  * rfc3986_parse_query:
266  * @uri:  pointer to an URI structure
267  * @str:  pointer to the string to analyze
268  *
269  * Parse the query part of an URI
270  *
271  * query = *uric
272  *
273  * Returns 0 or the error code
274  */
275 static int rfc3986_parse_query(URI *uri, const char **str)
276 {
277     const char *cur;
278 
279     if (str == NULL) {
280         return -1;
281     }
282 
283     cur = *str;
284 
285     while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
286            ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) {
287         NEXT(cur);
288     }
289     if (uri != NULL) {
290         g_free(uri->query);
291         uri->query = g_strndup(*str, cur - *str);
292     }
293     *str = cur;
294     return 0;
295 }
296 
297 /**
298  * rfc3986_parse_port:
299  * @uri:  pointer to an URI structure
300  * @str:  the string to analyze
301  *
302  * Parse a port  part and fills in the appropriate fields
303  * of the @uri structure
304  *
305  * port          = *DIGIT
306  *
307  * Returns 0 or the error code
308  */
309 static int rfc3986_parse_port(URI *uri, const char **str)
310 {
311     const char *cur = *str;
312     int port = 0;
313 
314     if (ISA_DIGIT(cur)) {
315         while (ISA_DIGIT(cur)) {
316             port = port * 10 + (*cur - '0');
317             if (port > 65535) {
318                 return 1;
319             }
320             cur++;
321         }
322         if (uri) {
323             uri->port = port;
324         }
325         *str = cur;
326         return 0;
327     }
328     return 1;
329 }
330 
331 /**
332  * rfc3986_parse_user_info:
333  * @uri:  pointer to an URI structure
334  * @str:  the string to analyze
335  *
336  * Parse a user information part and fill in the appropriate fields
337  * of the @uri structure
338  *
339  * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
340  *
341  * Returns 0 or the error code
342  */
343 static int rfc3986_parse_user_info(URI *uri, const char **str)
344 {
345     const char *cur;
346 
347     cur = *str;
348     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur) ||
349            (*cur == ':')) {
350         NEXT(cur);
351     }
352     if (*cur == '@') {
353         if (uri != NULL) {
354             g_free(uri->user);
355             if (uri->cleanup & 2) {
356                 uri->user = g_strndup(*str, cur - *str);
357             } else {
358                 uri->user = g_uri_unescape_segment(*str, cur, NULL);
359             }
360         }
361         *str = cur;
362         return 0;
363     }
364     return 1;
365 }
366 
367 /**
368  * rfc3986_parse_dec_octet:
369  * @str:  the string to analyze
370  *
371  *    dec-octet     = DIGIT                 ; 0-9
372  *                  / %x31-39 DIGIT         ; 10-99
373  *                  / "1" 2DIGIT            ; 100-199
374  *                  / "2" %x30-34 DIGIT     ; 200-249
375  *                  / "25" %x30-35          ; 250-255
376  *
377  * Skip a dec-octet.
378  *
379  * Returns 0 if found and skipped, 1 otherwise
380  */
381 static int rfc3986_parse_dec_octet(const char **str)
382 {
383     const char *cur = *str;
384 
385     if (!(ISA_DIGIT(cur))) {
386         return 1;
387     }
388     if (!ISA_DIGIT(cur + 1)) {
389         cur++;
390     } else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur + 2))) {
391         cur += 2;
392     } else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) {
393         cur += 3;
394     } else if ((*cur == '2') && (*(cur + 1) >= '0') && (*(cur + 1) <= '4') &&
395              (ISA_DIGIT(cur + 2))) {
396         cur += 3;
397     } else if ((*cur == '2') && (*(cur + 1) == '5') && (*(cur + 2) >= '0') &&
398              (*(cur + 1) <= '5')) {
399         cur += 3;
400     } else {
401         return 1;
402     }
403     *str = cur;
404     return 0;
405 }
406 /**
407  * rfc3986_parse_host:
408  * @uri:  pointer to an URI structure
409  * @str:  the string to analyze
410  *
411  * Parse an host part and fills in the appropriate fields
412  * of the @uri structure
413  *
414  * host          = IP-literal / IPv4address / reg-name
415  * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
416  * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
417  * reg-name      = *( unreserved / pct-encoded / sub-delims )
418  *
419  * Returns 0 or the error code
420  */
421 static int rfc3986_parse_host(URI *uri, const char **str)
422 {
423     const char *cur = *str;
424     const char *host;
425 
426     host = cur;
427     /*
428      * IPv6 and future addressing scheme are enclosed between brackets
429      */
430     if (*cur == '[') {
431         cur++;
432         while ((*cur != ']') && (*cur != 0)) {
433             cur++;
434         }
435         if (*cur != ']') {
436             return 1;
437         }
438         cur++;
439         goto found;
440     }
441     /*
442      * try to parse an IPv4
443      */
444     if (ISA_DIGIT(cur)) {
445         if (rfc3986_parse_dec_octet(&cur) != 0) {
446             goto not_ipv4;
447         }
448         if (*cur != '.') {
449             goto not_ipv4;
450         }
451         cur++;
452         if (rfc3986_parse_dec_octet(&cur) != 0) {
453             goto not_ipv4;
454         }
455         if (*cur != '.') {
456             goto not_ipv4;
457         }
458         if (rfc3986_parse_dec_octet(&cur) != 0) {
459             goto not_ipv4;
460         }
461         if (*cur != '.') {
462             goto not_ipv4;
463         }
464         if (rfc3986_parse_dec_octet(&cur) != 0) {
465             goto not_ipv4;
466         }
467         goto found;
468     not_ipv4:
469         cur = *str;
470     }
471     /*
472      * then this should be a hostname which can be empty
473      */
474     while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) {
475         NEXT(cur);
476     }
477 found:
478     if (uri != NULL) {
479         g_free(uri->authority);
480         uri->authority = NULL;
481         g_free(uri->server);
482         if (cur != host) {
483             if (uri->cleanup & 2) {
484                 uri->server = g_strndup(host, cur - host);
485             } else {
486                 uri->server = g_uri_unescape_segment(host, cur, NULL);
487             }
488         } else {
489             uri->server = NULL;
490         }
491     }
492     *str = cur;
493     return 0;
494 }
495 
496 /**
497  * rfc3986_parse_authority:
498  * @uri:  pointer to an URI structure
499  * @str:  the string to analyze
500  *
501  * Parse an authority part and fills in the appropriate fields
502  * of the @uri structure
503  *
504  * authority     = [ userinfo "@" ] host [ ":" port ]
505  *
506  * Returns 0 or the error code
507  */
508 static int rfc3986_parse_authority(URI *uri, const char **str)
509 {
510     const char *cur;
511     int ret;
512 
513     cur = *str;
514     /*
515      * try to parse a userinfo and check for the trailing @
516      */
517     ret = rfc3986_parse_user_info(uri, &cur);
518     if ((ret != 0) || (*cur != '@')) {
519         cur = *str;
520     } else {
521         cur++;
522     }
523     ret = rfc3986_parse_host(uri, &cur);
524     if (ret != 0) {
525         return ret;
526     }
527     if (*cur == ':') {
528         cur++;
529         ret = rfc3986_parse_port(uri, &cur);
530         if (ret != 0) {
531             return ret;
532         }
533     }
534     *str = cur;
535     return 0;
536 }
537 
538 /**
539  * rfc3986_parse_segment:
540  * @str:  the string to analyze
541  * @forbid: an optional forbidden character
542  * @empty: allow an empty segment
543  *
544  * Parse a segment and fills in the appropriate fields
545  * of the @uri structure
546  *
547  * segment       = *pchar
548  * segment-nz    = 1*pchar
549  * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
550  *               ; non-zero-length segment without any colon ":"
551  *
552  * Returns 0 or the error code
553  */
554 static int rfc3986_parse_segment(const char **str, char forbid, int empty)
555 {
556     const char *cur;
557 
558     cur = *str;
559     if (!ISA_PCHAR(cur)) {
560         if (empty) {
561             return 0;
562         }
563         return 1;
564     }
565     while (ISA_PCHAR(cur) && (*cur != forbid)) {
566         NEXT(cur);
567     }
568     *str = cur;
569     return 0;
570 }
571 
572 /**
573  * rfc3986_parse_path_ab_empty:
574  * @uri:  pointer to an URI structure
575  * @str:  the string to analyze
576  *
577  * Parse an path absolute or empty and fills in the appropriate fields
578  * of the @uri structure
579  *
580  * path-abempty  = *( "/" segment )
581  *
582  * Returns 0 or the error code
583  */
584 static int rfc3986_parse_path_ab_empty(URI *uri, const char **str)
585 {
586     const char *cur;
587     int ret;
588 
589     cur = *str;
590 
591     while (*cur == '/') {
592         cur++;
593         ret = rfc3986_parse_segment(&cur, 0, 1);
594         if (ret != 0) {
595             return ret;
596         }
597     }
598     if (uri != NULL) {
599         g_free(uri->path);
600         if (*str != cur) {
601             if (uri->cleanup & 2) {
602                 uri->path = g_strndup(*str, cur - *str);
603             } else {
604                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
605             }
606         } else {
607             uri->path = NULL;
608         }
609     }
610     *str = cur;
611     return 0;
612 }
613 
614 /**
615  * rfc3986_parse_path_absolute:
616  * @uri:  pointer to an URI structure
617  * @str:  the string to analyze
618  *
619  * Parse an path absolute and fills in the appropriate fields
620  * of the @uri structure
621  *
622  * path-absolute = "/" [ segment-nz *( "/" segment ) ]
623  *
624  * Returns 0 or the error code
625  */
626 static int rfc3986_parse_path_absolute(URI *uri, const char **str)
627 {
628     const char *cur;
629     int ret;
630 
631     cur = *str;
632 
633     if (*cur != '/') {
634         return 1;
635     }
636     cur++;
637     ret = rfc3986_parse_segment(&cur, 0, 0);
638     if (ret == 0) {
639         while (*cur == '/') {
640             cur++;
641             ret = rfc3986_parse_segment(&cur, 0, 1);
642             if (ret != 0) {
643                 return ret;
644             }
645         }
646     }
647     if (uri != NULL) {
648         g_free(uri->path);
649         if (cur != *str) {
650             if (uri->cleanup & 2) {
651                 uri->path = g_strndup(*str, cur - *str);
652             } else {
653                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
654             }
655         } else {
656             uri->path = NULL;
657         }
658     }
659     *str = cur;
660     return 0;
661 }
662 
663 /**
664  * rfc3986_parse_path_rootless:
665  * @uri:  pointer to an URI structure
666  * @str:  the string to analyze
667  *
668  * Parse an path without root and fills in the appropriate fields
669  * of the @uri structure
670  *
671  * path-rootless = segment-nz *( "/" segment )
672  *
673  * Returns 0 or the error code
674  */
675 static int rfc3986_parse_path_rootless(URI *uri, const char **str)
676 {
677     const char *cur;
678     int ret;
679 
680     cur = *str;
681 
682     ret = rfc3986_parse_segment(&cur, 0, 0);
683     if (ret != 0) {
684         return ret;
685     }
686     while (*cur == '/') {
687         cur++;
688         ret = rfc3986_parse_segment(&cur, 0, 1);
689         if (ret != 0) {
690             return ret;
691         }
692     }
693     if (uri != NULL) {
694         g_free(uri->path);
695         if (cur != *str) {
696             if (uri->cleanup & 2) {
697                 uri->path = g_strndup(*str, cur - *str);
698             } else {
699                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
700             }
701         } else {
702             uri->path = NULL;
703         }
704     }
705     *str = cur;
706     return 0;
707 }
708 
709 /**
710  * rfc3986_parse_path_no_scheme:
711  * @uri:  pointer to an URI structure
712  * @str:  the string to analyze
713  *
714  * Parse an path which is not a scheme and fills in the appropriate fields
715  * of the @uri structure
716  *
717  * path-noscheme = segment-nz-nc *( "/" segment )
718  *
719  * Returns 0 or the error code
720  */
721 static int rfc3986_parse_path_no_scheme(URI *uri, const char **str)
722 {
723     const char *cur;
724     int ret;
725 
726     cur = *str;
727 
728     ret = rfc3986_parse_segment(&cur, ':', 0);
729     if (ret != 0) {
730         return ret;
731     }
732     while (*cur == '/') {
733         cur++;
734         ret = rfc3986_parse_segment(&cur, 0, 1);
735         if (ret != 0) {
736             return ret;
737         }
738     }
739     if (uri != NULL) {
740         g_free(uri->path);
741         if (cur != *str) {
742             if (uri->cleanup & 2) {
743                 uri->path = g_strndup(*str, cur - *str);
744             } else {
745                 uri->path = g_uri_unescape_segment(*str, cur, NULL);
746             }
747         } else {
748             uri->path = NULL;
749         }
750     }
751     *str = cur;
752     return 0;
753 }
754 
755 /**
756  * rfc3986_parse_hier_part:
757  * @uri:  pointer to an URI structure
758  * @str:  the string to analyze
759  *
760  * Parse an hierarchical part and fills in the appropriate fields
761  * of the @uri structure
762  *
763  * hier-part     = "//" authority path-abempty
764  *                / path-absolute
765  *                / path-rootless
766  *                / path-empty
767  *
768  * Returns 0 or the error code
769  */
770 static int rfc3986_parse_hier_part(URI *uri, const char **str)
771 {
772     const char *cur;
773     int ret;
774 
775     cur = *str;
776 
777     if ((*cur == '/') && (*(cur + 1) == '/')) {
778         cur += 2;
779         ret = rfc3986_parse_authority(uri, &cur);
780         if (ret != 0) {
781             return ret;
782         }
783         ret = rfc3986_parse_path_ab_empty(uri, &cur);
784         if (ret != 0) {
785             return ret;
786         }
787         *str = cur;
788         return 0;
789     } else if (*cur == '/') {
790         ret = rfc3986_parse_path_absolute(uri, &cur);
791         if (ret != 0) {
792             return ret;
793         }
794     } else if (ISA_PCHAR(cur)) {
795         ret = rfc3986_parse_path_rootless(uri, &cur);
796         if (ret != 0) {
797             return ret;
798         }
799     } else {
800         /* path-empty is effectively empty */
801         if (uri != NULL) {
802             g_free(uri->path);
803             uri->path = NULL;
804         }
805     }
806     *str = cur;
807     return 0;
808 }
809 
810 /**
811  * rfc3986_parse_relative_ref:
812  * @uri:  pointer to an URI structure
813  * @str:  the string to analyze
814  *
815  * Parse an URI string and fills in the appropriate fields
816  * of the @uri structure
817  *
818  * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
819  * relative-part = "//" authority path-abempty
820  *               / path-absolute
821  *               / path-noscheme
822  *               / path-empty
823  *
824  * Returns 0 or the error code
825  */
826 static int rfc3986_parse_relative_ref(URI *uri, const char *str)
827 {
828     int ret;
829 
830     if ((*str == '/') && (*(str + 1) == '/')) {
831         str += 2;
832         ret = rfc3986_parse_authority(uri, &str);
833         if (ret != 0) {
834             return ret;
835         }
836         ret = rfc3986_parse_path_ab_empty(uri, &str);
837         if (ret != 0) {
838             return ret;
839         }
840     } else if (*str == '/') {
841         ret = rfc3986_parse_path_absolute(uri, &str);
842         if (ret != 0) {
843             return ret;
844         }
845     } else if (ISA_PCHAR(str)) {
846         ret = rfc3986_parse_path_no_scheme(uri, &str);
847         if (ret != 0) {
848             return ret;
849         }
850     } else {
851         /* path-empty is effectively empty */
852         if (uri != NULL) {
853             g_free(uri->path);
854             uri->path = NULL;
855         }
856     }
857 
858     if (*str == '?') {
859         str++;
860         ret = rfc3986_parse_query(uri, &str);
861         if (ret != 0) {
862             return ret;
863         }
864     }
865     if (*str == '#') {
866         str++;
867         ret = rfc3986_parse_fragment(uri, &str);
868         if (ret != 0) {
869             return ret;
870         }
871     }
872     if (*str != 0) {
873         uri_clean(uri);
874         return 1;
875     }
876     return 0;
877 }
878 
879 /**
880  * rfc3986_parse:
881  * @uri:  pointer to an URI structure
882  * @str:  the string to analyze
883  *
884  * Parse an URI string and fills in the appropriate fields
885  * of the @uri structure
886  *
887  * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
888  *
889  * Returns 0 or the error code
890  */
891 static int rfc3986_parse(URI *uri, const char *str)
892 {
893     int ret;
894 
895     ret = rfc3986_parse_scheme(uri, &str);
896     if (ret != 0) {
897         return ret;
898     }
899     if (*str != ':') {
900         return 1;
901     }
902     str++;
903     ret = rfc3986_parse_hier_part(uri, &str);
904     if (ret != 0) {
905         return ret;
906     }
907     if (*str == '?') {
908         str++;
909         ret = rfc3986_parse_query(uri, &str);
910         if (ret != 0) {
911             return ret;
912         }
913     }
914     if (*str == '#') {
915         str++;
916         ret = rfc3986_parse_fragment(uri, &str);
917         if (ret != 0) {
918             return ret;
919         }
920     }
921     if (*str != 0) {
922         uri_clean(uri);
923         return 1;
924     }
925     return 0;
926 }
927 
928 /**
929  * rfc3986_parse_uri_reference:
930  * @uri:  pointer to an URI structure
931  * @str:  the string to analyze
932  *
933  * Parse an URI reference string and fills in the appropriate fields
934  * of the @uri structure
935  *
936  * URI-reference = URI / relative-ref
937  *
938  * Returns 0 or the error code
939  */
940 static int rfc3986_parse_uri_reference(URI *uri, const char *str)
941 {
942     int ret;
943 
944     if (str == NULL) {
945         return -1;
946     }
947     uri_clean(uri);
948 
949     /*
950      * Try first to parse absolute refs, then fallback to relative if
951      * it fails.
952      */
953     ret = rfc3986_parse(uri, str);
954     if (ret != 0) {
955         uri_clean(uri);
956         ret = rfc3986_parse_relative_ref(uri, str);
957         if (ret != 0) {
958             uri_clean(uri);
959             return ret;
960         }
961     }
962     return 0;
963 }
964 
965 /**
966  * uri_parse:
967  * @str:  the URI string to analyze
968  *
969  * Parse an URI based on RFC 3986
970  *
971  * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
972  *
973  * Returns a newly built URI or NULL in case of error
974  */
975 URI *uri_parse(const char *str)
976 {
977     URI *uri;
978     int ret;
979 
980     if (str == NULL) {
981         return NULL;
982     }
983     uri = uri_new();
984     ret = rfc3986_parse_uri_reference(uri, str);
985     if (ret) {
986         uri_free(uri);
987         return NULL;
988     }
989     return uri;
990 }
991 
992 /**
993  * uri_parse_into:
994  * @uri:  pointer to an URI structure
995  * @str:  the string to analyze
996  *
997  * Parse an URI reference string based on RFC 3986 and fills in the
998  * appropriate fields of the @uri structure
999  *
1000  * URI-reference = URI / relative-ref
1001  *
1002  * Returns 0 or the error code
1003  */
1004 int uri_parse_into(URI *uri, const char *str)
1005 {
1006     return rfc3986_parse_uri_reference(uri, str);
1007 }
1008 
1009 /**
1010  * uri_parse_raw:
1011  * @str:  the URI string to analyze
1012  * @raw:  if 1 unescaping of URI pieces are disabled
1013  *
1014  * Parse an URI but allows to keep intact the original fragments.
1015  *
1016  * URI-reference = URI / relative-ref
1017  *
1018  * Returns a newly built URI or NULL in case of error
1019  */
1020 URI *uri_parse_raw(const char *str, int raw)
1021 {
1022     URI *uri;
1023     int ret;
1024 
1025     if (str == NULL) {
1026         return NULL;
1027     }
1028     uri = uri_new();
1029     if (raw) {
1030         uri->cleanup |= 2;
1031     }
1032     ret = uri_parse_into(uri, str);
1033     if (ret) {
1034         uri_free(uri);
1035         return NULL;
1036     }
1037     return uri;
1038 }
1039 
1040 /************************************************************************
1041  *                                                                      *
1042  *                    Generic URI structure functions                   *
1043  *                                                                      *
1044  ************************************************************************/
1045 
1046 /**
1047  * uri_new:
1048  *
1049  * Simply creates an empty URI
1050  *
1051  * Returns the new structure or NULL in case of error
1052  */
1053 URI *uri_new(void)
1054 {
1055     return g_new0(URI, 1);
1056 }
1057 
1058 /**
1059  * realloc2n:
1060  *
1061  * Function to handle properly a reallocation when saving an URI
1062  * Also imposes some limit on the length of an URI string output
1063  */
1064 static char *realloc2n(char *ret, int *max)
1065 {
1066     char *temp;
1067     int tmp;
1068 
1069     tmp = *max * 2;
1070     temp = g_realloc(ret, (tmp + 1));
1071     *max = tmp;
1072     return temp;
1073 }
1074 
1075 /**
1076  * uri_to_string:
1077  * @uri:  pointer to an URI
1078  *
1079  * Save the URI as an escaped string
1080  *
1081  * Returns a new string (to be deallocated by caller)
1082  */
1083 char *uri_to_string(URI *uri)
1084 {
1085     char *ret = NULL;
1086     char *temp;
1087     const char *p;
1088     int len;
1089     int max;
1090 
1091     if (uri == NULL) {
1092         return NULL;
1093     }
1094 
1095     max = 80;
1096     ret = g_malloc(max + 1);
1097     len = 0;
1098 
1099     if (uri->scheme != NULL) {
1100         p = uri->scheme;
1101         while (*p != 0) {
1102             if (len >= max) {
1103                 temp = realloc2n(ret, &max);
1104                 ret = temp;
1105             }
1106             ret[len++] = *p++;
1107         }
1108         if (len >= max) {
1109             temp = realloc2n(ret, &max);
1110             ret = temp;
1111         }
1112         ret[len++] = ':';
1113     }
1114     if (uri->opaque != NULL) {
1115         p = uri->opaque;
1116         while (*p != 0) {
1117             if (len + 3 >= max) {
1118                 temp = realloc2n(ret, &max);
1119                 ret = temp;
1120             }
1121             if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) {
1122                 ret[len++] = *p++;
1123             } else {
1124                 int val = *(unsigned char *)p++;
1125                 int hi = val / 0x10, lo = val % 0x10;
1126                 ret[len++] = '%';
1127                 ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1128                 ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1129             }
1130         }
1131     } else {
1132         if (uri->server != NULL) {
1133             if (len + 3 >= max) {
1134                 temp = realloc2n(ret, &max);
1135                 ret = temp;
1136             }
1137             ret[len++] = '/';
1138             ret[len++] = '/';
1139             if (uri->user != NULL) {
1140                 p = uri->user;
1141                 while (*p != 0) {
1142                     if (len + 3 >= max) {
1143                         temp = realloc2n(ret, &max);
1144                         ret = temp;
1145                     }
1146                     if ((IS_UNRESERVED(*(p))) || ((*(p) == ';')) ||
1147                         ((*(p) == ':')) || ((*(p) == '&')) || ((*(p) == '=')) ||
1148                         ((*(p) == '+')) || ((*(p) == '$')) || ((*(p) == ','))) {
1149                         ret[len++] = *p++;
1150                     } else {
1151                         int val = *(unsigned char *)p++;
1152                         int hi = val / 0x10, lo = val % 0x10;
1153                         ret[len++] = '%';
1154                         ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1155                         ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1156                     }
1157                 }
1158                 if (len + 3 >= max) {
1159                     temp = realloc2n(ret, &max);
1160                     ret = temp;
1161                 }
1162                 ret[len++] = '@';
1163             }
1164             p = uri->server;
1165             while (*p != 0) {
1166                 if (len >= max) {
1167                     temp = realloc2n(ret, &max);
1168                     ret = temp;
1169                 }
1170                 ret[len++] = *p++;
1171             }
1172             if (uri->port > 0) {
1173                 if (len + 10 >= max) {
1174                     temp = realloc2n(ret, &max);
1175                     ret = temp;
1176                 }
1177                 len += snprintf(&ret[len], max - len, ":%d", uri->port);
1178             }
1179         } else if (uri->authority != NULL) {
1180             if (len + 3 >= max) {
1181                 temp = realloc2n(ret, &max);
1182                 ret = temp;
1183             }
1184             ret[len++] = '/';
1185             ret[len++] = '/';
1186             p = uri->authority;
1187             while (*p != 0) {
1188                 if (len + 3 >= max) {
1189                     temp = realloc2n(ret, &max);
1190                     ret = temp;
1191                 }
1192                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '$')) ||
1193                     ((*(p) == ',')) || ((*(p) == ';')) || ((*(p) == ':')) ||
1194                     ((*(p) == '@')) || ((*(p) == '&')) || ((*(p) == '=')) ||
1195                     ((*(p) == '+'))) {
1196                     ret[len++] = *p++;
1197                 } else {
1198                     int val = *(unsigned char *)p++;
1199                     int hi = val / 0x10, lo = val % 0x10;
1200                     ret[len++] = '%';
1201                     ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1202                     ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1203                 }
1204             }
1205         } else if (uri->scheme != NULL) {
1206             if (len + 3 >= max) {
1207                 temp = realloc2n(ret, &max);
1208                 ret = temp;
1209             }
1210             ret[len++] = '/';
1211             ret[len++] = '/';
1212         }
1213         if (uri->path != NULL) {
1214             p = uri->path;
1215             /*
1216              * the colon in file:///d: should not be escaped or
1217              * Windows accesses fail later.
1218              */
1219             if ((uri->scheme != NULL) && (p[0] == '/') &&
1220                 (((p[1] >= 'a') && (p[1] <= 'z')) ||
1221                  ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
1222                 (p[2] == ':') && (!strcmp(uri->scheme, "file"))) {
1223                 if (len + 3 >= max) {
1224                     temp = realloc2n(ret, &max);
1225                     ret = temp;
1226                 }
1227                 ret[len++] = *p++;
1228                 ret[len++] = *p++;
1229                 ret[len++] = *p++;
1230             }
1231             while (*p != 0) {
1232                 if (len + 3 >= max) {
1233                     temp = realloc2n(ret, &max);
1234                     ret = temp;
1235                 }
1236                 if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
1237                     ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
1238                     ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
1239                     ((*(p) == ','))) {
1240                     ret[len++] = *p++;
1241                 } else {
1242                     int val = *(unsigned char *)p++;
1243                     int hi = val / 0x10, lo = val % 0x10;
1244                     ret[len++] = '%';
1245                     ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1246                     ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1247                 }
1248             }
1249         }
1250         if (uri->query != NULL) {
1251             if (len + 1 >= max) {
1252                 temp = realloc2n(ret, &max);
1253                 ret = temp;
1254             }
1255             ret[len++] = '?';
1256             p = uri->query;
1257             while (*p != 0) {
1258                 if (len + 1 >= max) {
1259                     temp = realloc2n(ret, &max);
1260                     ret = temp;
1261                 }
1262                 ret[len++] = *p++;
1263             }
1264         }
1265     }
1266     if (uri->fragment != NULL) {
1267         if (len + 3 >= max) {
1268             temp = realloc2n(ret, &max);
1269             ret = temp;
1270         }
1271         ret[len++] = '#';
1272         p = uri->fragment;
1273         while (*p != 0) {
1274             if (len + 3 >= max) {
1275                 temp = realloc2n(ret, &max);
1276                 ret = temp;
1277             }
1278             if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) {
1279                 ret[len++] = *p++;
1280             } else {
1281                 int val = *(unsigned char *)p++;
1282                 int hi = val / 0x10, lo = val % 0x10;
1283                 ret[len++] = '%';
1284                 ret[len++] = hi + (hi > 9 ? 'A' - 10 : '0');
1285                 ret[len++] = lo + (lo > 9 ? 'A' - 10 : '0');
1286             }
1287         }
1288     }
1289     if (len >= max) {
1290         temp = realloc2n(ret, &max);
1291         ret = temp;
1292     }
1293     ret[len] = 0;
1294     return ret;
1295 }
1296 
1297 /**
1298  * uri_clean:
1299  * @uri:  pointer to an URI
1300  *
1301  * Make sure the URI struct is free of content
1302  */
1303 static void uri_clean(URI *uri)
1304 {
1305     if (uri == NULL) {
1306         return;
1307     }
1308 
1309     g_free(uri->scheme);
1310     uri->scheme = NULL;
1311     g_free(uri->server);
1312     uri->server = NULL;
1313     g_free(uri->user);
1314     uri->user = NULL;
1315     g_free(uri->path);
1316     uri->path = NULL;
1317     g_free(uri->fragment);
1318     uri->fragment = NULL;
1319     g_free(uri->opaque);
1320     uri->opaque = NULL;
1321     g_free(uri->authority);
1322     uri->authority = NULL;
1323     g_free(uri->query);
1324     uri->query = NULL;
1325 }
1326 
1327 /**
1328  * uri_free:
1329  * @uri:  pointer to an URI, NULL is ignored
1330  *
1331  * Free up the URI struct
1332  */
1333 void uri_free(URI *uri)
1334 {
1335     uri_clean(uri);
1336     g_free(uri);
1337 }
1338 
1339 /************************************************************************
1340  *                                                                      *
1341  *                           Public functions                           *
1342  *                                                                      *
1343  ************************************************************************/
1344 
1345 /*
1346  * Utility functions to help parse and assemble query strings.
1347  */
1348 
1349 struct QueryParams *query_params_new(int init_alloc)
1350 {
1351     struct QueryParams *ps;
1352 
1353     if (init_alloc <= 0) {
1354         init_alloc = 1;
1355     }
1356 
1357     ps = g_new(QueryParams, 1);
1358     ps->n = 0;
1359     ps->alloc = init_alloc;
1360     ps->p = g_new(QueryParam, ps->alloc);
1361 
1362     return ps;
1363 }
1364 
1365 /* Ensure there is space to store at least one more parameter
1366  * at the end of the set.
1367  */
1368 static int query_params_append(struct QueryParams *ps, const char *name,
1369                                const char *value)
1370 {
1371     if (ps->n >= ps->alloc) {
1372         ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
1373         ps->alloc *= 2;
1374     }
1375 
1376     ps->p[ps->n].name = g_strdup(name);
1377     ps->p[ps->n].value = g_strdup(value);
1378     ps->p[ps->n].ignore = 0;
1379     ps->n++;
1380 
1381     return 0;
1382 }
1383 
1384 void query_params_free(struct QueryParams *ps)
1385 {
1386     int i;
1387 
1388     for (i = 0; i < ps->n; ++i) {
1389         g_free(ps->p[i].name);
1390         g_free(ps->p[i].value);
1391     }
1392     g_free(ps->p);
1393     g_free(ps);
1394 }
1395 
1396 struct QueryParams *query_params_parse(const char *query)
1397 {
1398     struct QueryParams *ps;
1399     const char *end, *eq;
1400 
1401     ps = query_params_new(0);
1402     if (!query || query[0] == '\0') {
1403         return ps;
1404     }
1405 
1406     while (*query) {
1407         char *name = NULL, *value = NULL;
1408 
1409         /* Find the next separator, or end of the string. */
1410         end = strchr(query, '&');
1411         if (!end) {
1412             end = qemu_strchrnul(query, ';');
1413         }
1414 
1415         /* Find the first '=' character between here and end. */
1416         eq = strchr(query, '=');
1417         if (eq && eq >= end) {
1418             eq = NULL;
1419         }
1420 
1421         /* Empty section (eg. "&&"). */
1422         if (end == query) {
1423             goto next;
1424         }
1425 
1426         /* If there is no '=' character, then we have just "name"
1427          * and consistent with CGI.pm we assume value is "".
1428          */
1429         else if (!eq) {
1430             name = g_uri_unescape_segment(query, end, NULL);
1431             value = NULL;
1432         }
1433         /* Or if we have "name=" here (works around annoying
1434          * problem when calling uri_string_unescape with len = 0).
1435          */
1436         else if (eq + 1 == end) {
1437             name = g_uri_unescape_segment(query, eq, NULL);
1438             value = g_new0(char, 1);
1439         }
1440         /* If the '=' character is at the beginning then we have
1441          * "=value" and consistent with CGI.pm we _ignore_ this.
1442          */
1443         else if (query == eq) {
1444             goto next;
1445         }
1446 
1447         /* Otherwise it's "name=value". */
1448         else {
1449             name = g_uri_unescape_segment(query, eq, NULL);
1450             value = g_uri_unescape_segment(eq + 1, end, NULL);
1451         }
1452 
1453         /* Append to the parameter set. */
1454         query_params_append(ps, name, value);
1455         g_free(name);
1456         g_free(value);
1457 
1458     next:
1459         query = end;
1460         if (*query) {
1461             query++; /* skip '&' separator */
1462         }
1463     }
1464 
1465     return ps;
1466 }
1467